@genai-fi/nanogpt 0.17.5 → 0.18.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Generator.d.ts +2 -15
- package/dist/Generator.js +45 -34
- package/dist/{RealDiv-CGwv0liw.js → RealDiv-ioj6Z-ox.js} +9 -9
- package/dist/{Reshape-BW__R4mZ.js → Reshape-BZC-ebeR.js} +7 -7
- package/dist/{Reshape-CPBkTIH2.js → Reshape-pwprEaej.js} +1 -1
- package/dist/TeachableLLM.d.ts +3 -8
- package/dist/TeachableLLM.js +61 -44
- package/dist/Trainer.d.ts +6 -4
- package/dist/Trainer.js +107 -92
- package/dist/{axis_util-GTVlo58H.js → axis_util-QWWgLjut.js} +1 -1
- package/dist/backend.js +2 -2
- package/dist/{backend_util-GaFarB78.js → backend_util-qwSFfxYx.js} +21 -21
- package/dist/{backend_webgpu-BqASlsbV.js → backend_webgpu-DI2wXEC2.js} +8 -8
- package/dist/{broadcast_to-eS93CCN_.js → broadcast_to-C_EJTVTZ.js} +2 -2
- package/dist/checks/appendCache.js +2 -2
- package/dist/checks/attentionMask.js +5 -5
- package/dist/checks/gelu.js +2 -2
- package/dist/checks/matMulGelu.js +2 -2
- package/dist/checks/normRMS.js +6 -6
- package/dist/checks/normRMSGrad.js +3 -3
- package/dist/checks/packUnpack.js +6 -6
- package/dist/checks/qkv.js +2 -2
- package/dist/checks/rope.js +2 -2
- package/dist/{clip_by_value-DDA7rrcT.js → clip_by_value-CLAD4h_I.js} +1 -1
- package/dist/complex-3DpPEG9B.js +11 -0
- package/dist/{concat-CAQpCret.js → concat-Dqk7Xk7h.js} +5 -5
- package/dist/{concat_util-D18dJ4fD.js → concat_util-C1Mxe27t.js} +1 -1
- package/dist/{dataset-CGGp1z9P.js → dataset-DlqAN81i.js} +3 -3
- package/dist/{dropout_util--NxWuYg2.js → dropout_util-N0z8Os-K.js} +1 -1
- package/dist/{expand_dims-Bkd1YD5x.js → expand_dims-D0rBtgT1.js} +4 -4
- package/dist/{exports_initializers-CYzKLjN7.js → exports_initializers-DIOZQt_L.js} +1 -1
- package/dist/{floor-BQtb-Azg.js → floor-CymuCmTO.js} +1 -1
- package/dist/{gather-qIqEqaGn.js → gather-DEyjXNb1.js} +1 -1
- package/dist/{gelu-B220X1Go.js → gelu-DpTCC3eB.js} +1 -1
- package/dist/{gpgpu_math-BwvV12df.js → gpgpu_math-3bCb5ooU.js} +25 -25
- package/dist/{index-CjOWnMXP.js → index-BQvB7LCC.js} +15 -15
- package/dist/{index-CUXkjxiT.js → index-DSGwv2Yx.js} +33 -33
- package/dist/inference/types.d.ts +16 -0
- package/dist/inference/types.js +1 -0
- package/dist/{kernel_funcs_utils-pq0CK9co.js → kernel_funcs_utils-DGqzNlHT.js} +6 -6
- package/dist/layers/BaseLayer.js +4 -4
- package/dist/layers/CausalSelfAttention.js +6 -6
- package/dist/layers/LoRA.js +4 -4
- package/dist/layers/MLP.js +4 -4
- package/dist/layers/PositionEmbedding.js +5 -5
- package/dist/layers/RMSNorm.js +3 -3
- package/dist/layers/RoPECache.js +4 -4
- package/dist/layers/TiedEmbedding.js +6 -6
- package/dist/layers/TransformerBlock.js +1 -1
- package/dist/layers/WeightStore.js +2 -2
- package/dist/loader/load.d.ts +2 -8
- package/dist/loader/loadTransformers.d.ts +2 -8
- package/dist/loader/loadTransformers.js +14 -12
- package/dist/loader/newZipLoad.d.ts +2 -8
- package/dist/loader/newZipLoad.js +25 -10
- package/dist/loader/oldZipLoad.js +13 -13
- package/dist/loader/save.d.ts +9 -2
- package/dist/loader/save.js +65 -55
- package/dist/loader/types.d.ts +30 -1
- package/dist/main.d.ts +2 -0
- package/dist/main.js +45 -43
- package/dist/{matMul16-BcVC_E62.js → matMul16-BIT70Vya.js} +3 -3
- package/dist/{matMulGelu-JNLZqKQp.js → matMulGelu-CsZnh18H.js} +18 -18
- package/dist/mat_mul-DP86qZtZ.js +11 -0
- package/dist/mod-BXjLYwvM.js +11 -0
- package/dist/models/NanoGPTV1.js +2 -2
- package/dist/models/NanoGPTV2.js +2 -2
- package/dist/models/model.d.ts +3 -2
- package/dist/models/model.js +13 -13
- package/dist/{not_equal-hurPF26l.js → not_equal-CkQKkKZy.js} +15 -15
- package/dist/{ones-BytntneX.js → ones-DbVB5N58.js} +3 -3
- package/dist/ops/adamAdjust.js +3 -3
- package/dist/ops/adamMoments.js +3 -3
- package/dist/ops/add16.js +1 -1
- package/dist/ops/appendCache.js +6 -6
- package/dist/ops/attentionMask.js +3 -3
- package/dist/ops/concat16.js +3 -3
- package/dist/ops/cpu/adamAdjust.js +9 -9
- package/dist/ops/cpu/adamMoments.js +5 -5
- package/dist/ops/cpu/appendCache.js +2 -2
- package/dist/ops/cpu/attentionMask.js +6 -6
- package/dist/ops/cpu/fusedSoftmax.js +4 -4
- package/dist/ops/cpu/gatherSub.js +5 -5
- package/dist/ops/cpu/gelu.js +4 -4
- package/dist/ops/cpu/matMul16.js +2 -2
- package/dist/ops/cpu/matMulGelu.js +7 -7
- package/dist/ops/cpu/matMulMul.js +2 -2
- package/dist/ops/cpu/mulDropout.js +5 -5
- package/dist/ops/cpu/normRMS.js +1 -1
- package/dist/ops/cpu/qkv.js +3 -3
- package/dist/ops/cpu/rope.js +5 -5
- package/dist/ops/cpu/scatterSub.js +5 -5
- package/dist/ops/dot16.js +2 -2
- package/dist/ops/dropout.js +6 -6
- package/dist/ops/dropout16.js +1 -1
- package/dist/ops/gatherSub.js +1 -1
- package/dist/ops/gelu.js +2 -2
- package/dist/ops/globalNorm.js +7 -7
- package/dist/ops/grads/add16.js +1 -1
- package/dist/ops/grads/attentionMask.js +2 -2
- package/dist/ops/grads/dropout16.js +1 -1
- package/dist/ops/grads/gelu.js +2 -2
- package/dist/ops/grads/matMul16.js +3 -3
- package/dist/ops/grads/matMulGelu.js +1 -1
- package/dist/ops/grads/mul16.js +1 -1
- package/dist/ops/grads/normRMS.js +7 -7
- package/dist/ops/grads/pack16.js +3 -3
- package/dist/ops/grads/qkv.js +11 -11
- package/dist/ops/grads/rope.js +2 -2
- package/dist/ops/grads/softmax16.js +1 -1
- package/dist/ops/grads/unpack16.js +2 -2
- package/dist/ops/matMul16.js +3 -3
- package/dist/ops/matMulGelu.js +6 -6
- package/dist/ops/matMulMul.js +3 -3
- package/dist/ops/mul16.js +1 -1
- package/dist/ops/mulDrop.js +3 -3
- package/dist/ops/normRMS.js +4 -4
- package/dist/ops/pack16.js +2 -2
- package/dist/ops/qkv.js +3 -3
- package/dist/ops/reshape16.js +6 -6
- package/dist/ops/rope.js +2 -2
- package/dist/ops/scatterSub.js +1 -1
- package/dist/ops/slice16.js +2 -2
- package/dist/ops/softmax16.js +1 -1
- package/dist/ops/sub16.js +1 -1
- package/dist/ops/sum16.js +6 -6
- package/dist/ops/transpose16.js +3 -3
- package/dist/ops/unpack16.js +2 -2
- package/dist/ops/webgl/adamAdjust.js +2 -2
- package/dist/ops/webgl/adamMoments.js +1 -1
- package/dist/ops/webgl/appendCache.js +1 -1
- package/dist/ops/webgl/attentionMask.js +1 -1
- package/dist/ops/webgl/dropout16.js +1 -1
- package/dist/ops/webgl/fusedSoftmax.js +7 -7
- package/dist/ops/webgl/gatherSub.js +3 -3
- package/dist/ops/webgl/gelu.js +2 -2
- package/dist/ops/webgl/log.js +3 -3
- package/dist/ops/webgl/matMul16.js +13 -13
- package/dist/ops/webgl/matMulGelu.js +4 -4
- package/dist/ops/webgl/matMulMul.js +2 -2
- package/dist/ops/webgl/mulDropout.js +1 -1
- package/dist/ops/webgl/normRMS.js +2 -2
- package/dist/ops/webgl/qkv.js +1 -1
- package/dist/ops/webgl/rope.js +1 -1
- package/dist/ops/webgl/scatterSub.js +2 -2
- package/dist/ops/webgpu/adamAdjust.js +3 -3
- package/dist/ops/webgpu/adamMoments.js +3 -3
- package/dist/ops/webgpu/add16.js +6 -6
- package/dist/ops/webgpu/appendCache.js +3 -3
- package/dist/ops/webgpu/attentionMask.js +2 -2
- package/dist/ops/webgpu/attentionMask32_program.js +2 -2
- package/dist/ops/webgpu/clipScale.js +7 -7
- package/dist/ops/webgpu/concat16.js +5 -5
- package/dist/ops/webgpu/dropout16.js +6 -6
- package/dist/ops/webgpu/gatherSub.js +3 -3
- package/dist/ops/webgpu/gelu.js +8 -8
- package/dist/ops/webgpu/matMul16.js +16 -16
- package/dist/ops/webgpu/matMul16_program.js +2 -2
- package/dist/ops/webgpu/mul16.js +5 -5
- package/dist/ops/webgpu/norm2.js +1 -1
- package/dist/ops/webgpu/normRMS.js +2 -2
- package/dist/ops/webgpu/normRMSGrad.js +4 -4
- package/dist/ops/webgpu/pack16.js +4 -4
- package/dist/ops/webgpu/pack16_program.js +2 -2
- package/dist/ops/webgpu/qkv.js +2 -2
- package/dist/ops/webgpu/rope.js +3 -3
- package/dist/ops/webgpu/scatterSub.js +3 -3
- package/dist/ops/webgpu/slice16.js +4 -4
- package/dist/ops/webgpu/softmax16.js +4 -4
- package/dist/ops/webgpu/softmax16_program.js +2 -2
- package/dist/ops/webgpu/softmax16_subgroup_program.js +2 -2
- package/dist/ops/webgpu/softmax16grad.js +4 -4
- package/dist/ops/webgpu/sub16.js +6 -6
- package/dist/ops/webgpu/sum16.js +3 -3
- package/dist/ops/webgpu/transpose16.js +8 -8
- package/dist/ops/webgpu/transpose16_program.js +2 -2
- package/dist/ops/webgpu/transpose16_shared_program.js +3 -3
- package/dist/ops/webgpu/unpack16.js +3 -3
- package/dist/ops/webgpu/utils/binary_op.js +3 -3
- package/dist/ops/webgpu/utils/reductions.js +5 -5
- package/dist/{ops-CsXeTq1P.js → ops-CURIZSVt.js} +100 -100
- package/dist/{pack16-bqltoUlR.js → pack16-WlOSOuZA.js} +2 -2
- package/dist/patches/webgpu_backend.js +6 -6
- package/dist/patches/webgpu_base.js +1 -1
- package/dist/patches/webgpu_program.js +2 -2
- package/dist/{random_normal-IBRrha8a.js → random_normal-CIm8lk2-.js} +1 -1
- package/dist/{random_width-DN5ZtQkM.js → random_width-B_fVXhGx.js} +131 -131
- package/dist/{range-C-CjF-LI.js → range-BDxO73mk.js} +1 -1
- package/dist/{readers-iz5u3HBo.js → readers-17HLdxVM.js} +2 -2
- package/dist/relu-DTvZKBsZ.js +9 -0
- package/dist/{reshape-BDOuCSNW.js → reshape-BIN71H3p.js} +1 -1
- package/dist/{resize_nearest_neighbor-BojqlfRe.js → resize_nearest_neighbor-C6_0dAnK.js} +41 -41
- package/dist/{rope-0j_f1TPm.js → rope-CC5RjmKU.js} +4 -4
- package/dist/{scatter_nd_util-ByNJaL6I.js → scatter_nd_util-C-x73Cj6.js} +1 -1
- package/dist/{segment_util-Dasb2Zaf.js → segment_util-4zuHV5IG.js} +2 -2
- package/dist/{selu_util-BLhIqRkw.js → selu_util-BXdhy_W6.js} +5 -5
- package/dist/{shared-CagdqkLh.js → shared-DRWDyk9w.js} +6 -6
- package/dist/{shared-3agzAqQ_.js → shared-zTaJ5siv.js} +1 -1
- package/dist/slice-BvItlgXu.js +12 -0
- package/dist/{slice_util-CC35pLmT.js → slice_util-DPY56GzQ.js} +5 -5
- package/dist/{softmax-D4q1LJN7.js → softmax-BLGJqdwx.js} +1 -1
- package/dist/split-BN9LkEgS.js +9 -0
- package/dist/{squeeze-ho4wLUek.js → squeeze-O_YWJpw_.js} +2 -2
- package/dist/{stack-DudVrtmG.js → stack-z6QE7kmP.js} +1 -1
- package/dist/{step-BTxPtq1r.js → step-DQY6_ABw.js} +4 -4
- package/dist/{sum-BpiwSWvg.js → sum-D39FeU5h.js} +3 -3
- package/dist/{tensor-BWFldCso.js → tensor-D8e0Gd7c.js} +1 -1
- package/dist/{tensor1d-LMGMIUlr.js → tensor1d-BMl0eZYV.js} +1 -1
- package/dist/{tensor2d-BnXMKScO.js → tensor2d-DTtQ1QcT.js} +1 -1
- package/dist/{tensor4d-C6UCG_u8.js → tensor4d-Dj4rDssL.js} +1 -1
- package/dist/{tfjs_backend-BGnG-ppu.js → tfjs_backend-Bk3PmK91.js} +65 -65
- package/dist/{tile-CFy-xTO6.js → tile-CsWlVKKz.js} +1 -1
- package/dist/tokeniser/BaseTokeniser.d.ts +4 -1
- package/dist/tokeniser/BaseTokeniser.js +35 -15
- package/dist/tokeniser/CharTokeniser.d.ts +1 -1
- package/dist/tokeniser/CharTokeniser.js +29 -28
- package/dist/tokeniser/bpe.d.ts +1 -1
- package/dist/tokeniser/bpe.js +23 -22
- package/dist/tokeniser/type.d.ts +3 -1
- package/dist/training/AdamW.d.ts +3 -0
- package/dist/training/AdamW.js +59 -30
- package/dist/training/BasicTrainer.d.ts +1 -0
- package/dist/training/BasicTrainer.js +112 -92
- package/dist/training/DatasetBuilder.js +3 -3
- package/dist/training/Evaluator.js +2 -2
- package/dist/training/LRScheduler.d.ts +1 -0
- package/dist/training/LRScheduler.js +18 -12
- package/dist/training/PreTrainer.js +3 -3
- package/dist/training/SFTDatasetBuilder.js +3 -3
- package/dist/training/SFTTrainer.js +1 -1
- package/dist/training/orthoGrad.js +1 -1
- package/dist/training/sparseCrossEntropy.js +30 -30
- package/dist/training/types.d.ts +5 -3
- package/dist/training/validation.js +13 -13
- package/dist/{transpose-9kRxIXWR.js → transpose-Qxz-4os3.js} +7 -7
- package/dist/{unsorted_segment_sum-DJvk5xnh.js → unsorted_segment_sum-BfFVV9Zm.js} +20 -20
- package/dist/utilities/datasetID.d.ts +2 -0
- package/dist/utilities/datasetID.js +21 -0
- package/dist/utilities/dummy.js +6 -6
- package/dist/utilities/multinomialCPU.js +2 -2
- package/dist/utilities/packed.js +1 -1
- package/dist/utilities/performance.js +1 -1
- package/dist/utilities/profile.js +1 -1
- package/dist/utilities/safetensors.js +2 -2
- package/dist/utilities/sentences.js +5 -5
- package/dist/utilities/weights.js +2 -2
- package/dist/{variable-Ck482e3n.js → variable-SSATClyt.js} +1 -1
- package/dist/{webgpu_program-B4HmApL1.js → webgpu_program-CbjdYLYk.js} +1 -1
- package/dist/{webgpu_util-DYlGSwOJ.js → webgpu_util-DuofJBMo.js} +7 -7
- package/dist/{zeros-DvZpK8s6.js → zeros-Bw0puq_w.js} +2 -2
- package/dist/{zeros_like-CWjDdwr-.js → zeros_like-rOHr54NY.js} +69 -69
- package/package.json +3 -3
- package/dist/complex-DI35Q-gW.js +0 -11
- package/dist/mat_mul-DhG0Newp.js +0 -11
- package/dist/mod-CSdCpRjf.js +0 -11
- package/dist/relu-J_X6MUzx.js +0 -9
- package/dist/slice-BzS11Qh0.js +0 -12
- package/dist/split-C2Sj255c.js +0 -9
|
@@ -1,13 +1,13 @@
|
|
|
1
|
-
import "../random_width-
|
|
2
|
-
import "../index-
|
|
3
|
-
import { r as s } from "../exports_initializers-
|
|
1
|
+
import "../random_width-B_fVXhGx.js";
|
|
2
|
+
import "../index-DSGwv2Yx.js";
|
|
3
|
+
import { r as s } from "../exports_initializers-DIOZQt_L.js";
|
|
4
4
|
import a from "./BaseLayer.js";
|
|
5
5
|
import { dot16 as o } from "../ops/dot16.js";
|
|
6
6
|
import { isPackedTensor as r } from "../utilities/packed.js";
|
|
7
|
-
import { p as m } from "../pack16-
|
|
7
|
+
import { p as m } from "../pack16-WlOSOuZA.js";
|
|
8
8
|
import { transpose16 as d } from "../ops/transpose16.js";
|
|
9
|
-
import { v as p } from "../variable-
|
|
10
|
-
import { g as h } from "../gather-
|
|
9
|
+
import { v as p } from "../variable-SSATClyt.js";
|
|
10
|
+
import { g as h } from "../gather-DEyjXNb1.js";
|
|
11
11
|
class g extends a {
|
|
12
12
|
vocabSize;
|
|
13
13
|
embedDim;
|
|
@@ -2,7 +2,7 @@ import p from "./CausalSelfAttention.js";
|
|
|
2
2
|
import h from "./MLP.js";
|
|
3
3
|
import l from "./RMSNorm.js";
|
|
4
4
|
import m from "./BaseLayer.js";
|
|
5
|
-
import { k as n, t as u } from "../index-
|
|
5
|
+
import { k as n, t as u } from "../index-DSGwv2Yx.js";
|
|
6
6
|
import { add16 as d } from "../ops/add16.js";
|
|
7
7
|
class _ extends m {
|
|
8
8
|
ln1;
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { u as o } from "../index-DSGwv2Yx.js";
|
|
2
2
|
import { p as h } from "../index-DmeWGGmS.js";
|
|
3
|
-
import { v as b } from "../variable-
|
|
3
|
+
import { v as b } from "../variable-SSATClyt.js";
|
|
4
4
|
class d {
|
|
5
5
|
_variables = /* @__PURE__ */ new Map();
|
|
6
6
|
touchedVariables = /* @__PURE__ */ new Set();
|
package/dist/loader/load.d.ts
CHANGED
|
@@ -1,12 +1,6 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { default as Model, ModelForwardAttributes } from '../models/model';
|
|
3
|
-
import { TransformersMetadata } from './types';
|
|
1
|
+
import { LoadResult } from './types';
|
|
4
2
|
export declare const VERSION = 2;
|
|
5
3
|
export interface LoadModelOptions {
|
|
6
4
|
sourceURL?: string;
|
|
7
5
|
}
|
|
8
|
-
export declare function loadModel(data: Blob | Buffer | string, options?: LoadModelOptions): Promise<
|
|
9
|
-
model: Model<ModelForwardAttributes>;
|
|
10
|
-
tokeniser: ITokeniser;
|
|
11
|
-
metaData: TransformersMetadata;
|
|
12
|
-
}>;
|
|
6
|
+
export declare function loadModel(data: Blob | Buffer | string, options?: LoadModelOptions): Promise<LoadResult>;
|
|
@@ -1,10 +1,4 @@
|
|
|
1
1
|
import { GPTConfig } from '../models/config';
|
|
2
|
-
import {
|
|
3
|
-
import { default as Model, ModelForwardAttributes } from '../models/model';
|
|
4
|
-
import { TransformersConfig, TransformersMetadata, TransformersTokeniser } from './types';
|
|
2
|
+
import { LoadResult, TransformersConfig, TransformersMetadata, TransformersTokeniser } from './types';
|
|
5
3
|
export declare function mapTransformersConfigToGPTConfig(config: TransformersConfig): GPTConfig;
|
|
6
|
-
export default function loadTransformers(config: TransformersConfig, tokeniser: TransformersTokeniser, metadata: TransformersMetadata, weightData: ArrayBuffer): Promise<
|
|
7
|
-
model: Model<ModelForwardAttributes, GPTConfig>;
|
|
8
|
-
tokeniser: ITokeniser;
|
|
9
|
-
metaData: TransformersMetadata;
|
|
10
|
-
}>;
|
|
4
|
+
export default function loadTransformers(config: TransformersConfig, tokeniser: TransformersTokeniser, metadata: TransformersMetadata, weightData: ArrayBuffer): Promise<LoadResult>;
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import p from "../tokeniser/CharTokeniser.js";
|
|
2
2
|
import _ from "../tokeniser/bpe.js";
|
|
3
3
|
import { load_safetensors as b } from "../utilities/safetensors.js";
|
|
4
|
-
import { aa as c } from "../index-
|
|
4
|
+
import { aa as c } from "../index-DSGwv2Yx.js";
|
|
5
5
|
import { dummyPassAsync as u } from "../utilities/dummy.js";
|
|
6
6
|
import y from "../models/factory.js";
|
|
7
7
|
function h(e) {
|
|
8
|
-
let
|
|
9
|
-
return e.model_type === "GenAI_NanoGPT_v1" ?
|
|
8
|
+
let a;
|
|
9
|
+
return e.model_type === "GenAI_NanoGPT_v1" ? a = {
|
|
10
10
|
modelType: "GenAI_NanoGPT_v1",
|
|
11
11
|
vocabSize: e.vocab_size,
|
|
12
12
|
blockSize: e.block_size,
|
|
@@ -15,7 +15,7 @@ function h(e) {
|
|
|
15
15
|
nEmbed: e.hidden_size,
|
|
16
16
|
mlpFactor: e.mlpFactor,
|
|
17
17
|
useRope: e.useRope
|
|
18
|
-
} :
|
|
18
|
+
} : a = {
|
|
19
19
|
modelType: "GenAI_NanoGPT_v2",
|
|
20
20
|
vocabSize: e.vocab_size,
|
|
21
21
|
blockSize: e.block_size,
|
|
@@ -26,17 +26,19 @@ function h(e) {
|
|
|
26
26
|
loraConfig: e.loraConfig ? new Map(Object.entries(e.loraConfig)) : void 0,
|
|
27
27
|
loraName: e.loraName,
|
|
28
28
|
windowSize: e.windowSize
|
|
29
|
-
},
|
|
29
|
+
}, a;
|
|
30
30
|
}
|
|
31
|
-
async function
|
|
32
|
-
const
|
|
33
|
-
|
|
34
|
-
|
|
31
|
+
async function G(e, a, r, n) {
|
|
32
|
+
const m = h(e), o = (a.type ?? "char") === "char" ? new p(a.vocab) : new _(a.vocab, a.merges);
|
|
33
|
+
a.datasetID && (o.datasetID = a.datasetID), a.id ? o.id = a.id : o.generateID();
|
|
34
|
+
const d = await b(n), s = /* @__PURE__ */ new Map();
|
|
35
|
+
for (const [i, l] of Object.entries(d))
|
|
36
|
+
s.set(i, [l]);
|
|
35
37
|
c();
|
|
36
|
-
const
|
|
37
|
-
return
|
|
38
|
+
const t = y(m);
|
|
39
|
+
return t.metaData = r, await u(t), t.weightStore.loadWeights(s, !!r.url), { model: t, tokeniser: o, metaData: r };
|
|
38
40
|
}
|
|
39
41
|
export {
|
|
40
|
-
|
|
42
|
+
G as default,
|
|
41
43
|
h as mapTransformersConfigToGPTConfig
|
|
42
44
|
};
|
|
@@ -1,9 +1,3 @@
|
|
|
1
|
-
import { ITokeniser } from '../main';
|
|
2
1
|
import { default as zip } from 'jszip';
|
|
3
|
-
import {
|
|
4
|
-
|
|
5
|
-
export default function loadZipFile(zipFile: zip, metaData: TransformersMetadata): Promise<{
|
|
6
|
-
model: Model<ModelForwardAttributes>;
|
|
7
|
-
tokeniser: ITokeniser;
|
|
8
|
-
metaData: TransformersMetadata;
|
|
9
|
-
}>;
|
|
2
|
+
import { LoadResult, TransformersMetadata } from './types';
|
|
3
|
+
export default function loadZipFile(zipFile: zip, metaData: TransformersMetadata): Promise<LoadResult>;
|
|
@@ -1,16 +1,31 @@
|
|
|
1
|
-
import
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
1
|
+
import h from "./loadTransformers.js";
|
|
2
|
+
import { AdamWOptimizer as m } from "../training/AdamW.js";
|
|
3
|
+
async function u(i, c) {
|
|
4
|
+
const t = await i.file("config.json")?.async("string");
|
|
5
|
+
if (!t)
|
|
5
6
|
throw new Error("Config file not found in the zip archive");
|
|
6
|
-
const
|
|
7
|
-
if (!
|
|
7
|
+
const g = JSON.parse(t), n = await i.file("tokeniser.json")?.async("string");
|
|
8
|
+
if (!n)
|
|
8
9
|
throw new Error("Tokeniser file not found in the zip archive");
|
|
9
|
-
const
|
|
10
|
-
if (!
|
|
10
|
+
const w = JSON.parse(n), e = await i.file("model.safetensors")?.async("arraybuffer");
|
|
11
|
+
if (!e)
|
|
11
12
|
throw new Error("Model weights not found in the zip archive");
|
|
12
|
-
|
|
13
|
+
const r = await i.file("optimizer_config.json")?.async("string");
|
|
14
|
+
let o;
|
|
15
|
+
if (r) {
|
|
16
|
+
const l = JSON.parse(r), f = await i.file("optimizer.safetensors")?.async("arraybuffer");
|
|
17
|
+
if (!f)
|
|
18
|
+
throw new Error("Optimizer weights not found in the zip archive");
|
|
19
|
+
o = new m(l), await o.loadMoments(f);
|
|
20
|
+
}
|
|
21
|
+
const a = await i.file("training_log.json")?.async("string");
|
|
22
|
+
let s;
|
|
23
|
+
return a && (s = JSON.parse(a)), {
|
|
24
|
+
...await h(g, w, c, e),
|
|
25
|
+
optimizer: o,
|
|
26
|
+
log: s
|
|
27
|
+
};
|
|
13
28
|
}
|
|
14
29
|
export {
|
|
15
|
-
|
|
30
|
+
u as default
|
|
16
31
|
};
|
|
@@ -1,22 +1,22 @@
|
|
|
1
|
-
import { aa as y } from "../index-
|
|
2
|
-
import "../random_width-
|
|
3
|
-
import "../zeros_like-
|
|
1
|
+
import { aa as y } from "../index-DSGwv2Yx.js";
|
|
2
|
+
import "../random_width-B_fVXhGx.js";
|
|
3
|
+
import "../zeros_like-rOHr54NY.js";
|
|
4
4
|
import "../Generator.js";
|
|
5
5
|
import "../index-Cp39cXWe.js";
|
|
6
|
-
import "../dataset-
|
|
6
|
+
import "../dataset-DlqAN81i.js";
|
|
7
7
|
import "../ops/cpu/attentionMask.js";
|
|
8
8
|
import "../ops/webgl/attentionMask.js";
|
|
9
9
|
import "../ops/grads/attentionMask.js";
|
|
10
10
|
import "../ops/cpu/rope.js";
|
|
11
11
|
import "../ops/webgl/rope.js";
|
|
12
|
-
import "../rope-
|
|
12
|
+
import "../rope-CC5RjmKU.js";
|
|
13
13
|
import "../ops/cpu/appendCache.js";
|
|
14
14
|
import "../ops/webgl/appendCache.js";
|
|
15
15
|
import "../ops/grads/softmax16.js";
|
|
16
|
-
import "../matMul16-
|
|
16
|
+
import "../matMul16-BIT70Vya.js";
|
|
17
17
|
import "../ops/webgl/matMul16.js";
|
|
18
18
|
import "../ops/cpu/matMul16.js";
|
|
19
|
-
import "../pack16-
|
|
19
|
+
import "../pack16-WlOSOuZA.js";
|
|
20
20
|
import "../ops/transpose16.js";
|
|
21
21
|
import "../ops/reshape16.js";
|
|
22
22
|
import "../ops/cpu/qkv.js";
|
|
@@ -33,22 +33,22 @@ import g from "../tokeniser/CharTokeniser.js";
|
|
|
33
33
|
import k from "../tokeniser/bpe.js";
|
|
34
34
|
import { dummyPassAsync as u } from "../utilities/dummy.js";
|
|
35
35
|
import b from "../models/factory.js";
|
|
36
|
+
import "../ops/cpu/adamAdjust.js";
|
|
37
|
+
import "../ops/webgl/adamAdjust.js";
|
|
38
|
+
import "../ops/cpu/adamMoments.js";
|
|
39
|
+
import "../ops/webgl/adamMoments.js";
|
|
36
40
|
import "../index-DvYrXKkX.js";
|
|
37
41
|
import "../ops/cpu/gatherSub.js";
|
|
38
42
|
import "../ops/webgl/gatherSub.js";
|
|
39
43
|
import "../ops/cpu/scatterSub.js";
|
|
40
44
|
import "../ops/webgl/scatterSub.js";
|
|
41
|
-
import "../ops/cpu/adamAdjust.js";
|
|
42
|
-
import "../ops/webgl/adamAdjust.js";
|
|
43
|
-
import "../ops/cpu/adamMoments.js";
|
|
44
|
-
import "../ops/webgl/adamMoments.js";
|
|
45
45
|
import "../papaparse.min-C0cScC2i.js";
|
|
46
46
|
import "../ops/cpu/matMulGelu.js";
|
|
47
|
-
import "../matMulGelu-
|
|
47
|
+
import "../matMulGelu-CsZnh18H.js";
|
|
48
48
|
import "../ops/grads/matMulGelu.js";
|
|
49
49
|
import "../ops/cpu/gelu.js";
|
|
50
50
|
import "../ops/webgl/gelu.js";
|
|
51
|
-
import "../gelu-
|
|
51
|
+
import "../gelu-DpTCC3eB.js";
|
|
52
52
|
import "../ops/webgl/log.js";
|
|
53
53
|
import "../checks/normRMS.js";
|
|
54
54
|
import "../checks/normRMSGrad.js";
|
package/dist/loader/save.d.ts
CHANGED
|
@@ -1,9 +1,16 @@
|
|
|
1
1
|
import { ITokeniser } from '../tokeniser/type';
|
|
2
2
|
import { default as Model, ModelForwardAttributes } from '../models/model';
|
|
3
|
-
import {
|
|
3
|
+
import { AdamWOptimizer } from '../training/AdamW';
|
|
4
|
+
import { TrainingLogEntry } from '../training/types';
|
|
5
|
+
import { GPTConfig } from '../models/config';
|
|
4
6
|
export interface SaveOptions {
|
|
5
7
|
name?: string;
|
|
6
8
|
metadata?: Record<string, unknown>;
|
|
7
9
|
files?: Record<string, unknown>;
|
|
10
|
+
includeOptimizer?: boolean;
|
|
8
11
|
}
|
|
9
|
-
export
|
|
12
|
+
export interface ExtraSaveItems {
|
|
13
|
+
optimizer?: AdamWOptimizer;
|
|
14
|
+
trainingLog?: TrainingLogEntry[];
|
|
15
|
+
}
|
|
16
|
+
export declare function saveModel(model: Model<ModelForwardAttributes, GPTConfig>, tokeniser: ITokeniser, options?: SaveOptions, extraItems?: ExtraSaveItems): Promise<Blob>;
|
package/dist/loader/save.js
CHANGED
|
@@ -1,73 +1,83 @@
|
|
|
1
|
-
import { z as
|
|
2
|
-
import
|
|
1
|
+
import { z as y } from "../jszip.min-BZhlzntC.js";
|
|
2
|
+
import b from "../tokeniser/CharTokeniser.js";
|
|
3
3
|
import { save_safetensors as _ } from "../utilities/safetensors.js";
|
|
4
|
-
import { VERSION as
|
|
5
|
-
async function
|
|
6
|
-
const
|
|
7
|
-
|
|
8
|
-
const n = new
|
|
9
|
-
|
|
10
|
-
|
|
4
|
+
import { VERSION as m } from "./load.js";
|
|
5
|
+
async function d(i, a, o, t) {
|
|
6
|
+
const g = /* @__PURE__ */ new Map();
|
|
7
|
+
i.weightStore.saveWeights(g);
|
|
8
|
+
const n = new y();
|
|
9
|
+
if (t?.optimizer) {
|
|
10
|
+
const e = await t.optimizer.saveMoments();
|
|
11
|
+
n.file("optimizer.safetensors", e, { binary: !0 }), n.file("optimizer_config.json", JSON.stringify(t.optimizer.serializeConfig()), {
|
|
12
|
+
binary: !1
|
|
13
|
+
});
|
|
14
|
+
}
|
|
15
|
+
t?.trainingLog && n.file("training_log.json", JSON.stringify(t.trainingLog, void 0, 4), {
|
|
16
|
+
binary: !1
|
|
17
|
+
});
|
|
18
|
+
const s = {};
|
|
19
|
+
g.forEach((e, f) => {
|
|
20
|
+
e.length === 1 && (s[f] = e[0]);
|
|
11
21
|
});
|
|
12
22
|
const c = await _(s);
|
|
13
23
|
n.file("model.safetensors", c, { binary: !0 });
|
|
14
|
-
const
|
|
15
|
-
let
|
|
16
|
-
|
|
24
|
+
const p = i.config.modelType;
|
|
25
|
+
let r;
|
|
26
|
+
p === "GenAI_NanoGPT_v1" ? r = {
|
|
17
27
|
model_type: "GenAI_NanoGPT_v1",
|
|
18
|
-
vocab_size:
|
|
19
|
-
hidden_size:
|
|
20
|
-
num_hidden_layers:
|
|
21
|
-
num_attention_heads:
|
|
22
|
-
block_size:
|
|
23
|
-
mlpFactor:
|
|
24
|
-
useRope:
|
|
25
|
-
} :
|
|
28
|
+
vocab_size: a.getVocab().length,
|
|
29
|
+
hidden_size: i.config.nEmbed,
|
|
30
|
+
num_hidden_layers: i.config.nLayer,
|
|
31
|
+
num_attention_heads: i.config.nHead,
|
|
32
|
+
block_size: i.config.blockSize,
|
|
33
|
+
mlpFactor: i.config.mlpFactor,
|
|
34
|
+
useRope: i.config.useRope
|
|
35
|
+
} : r = {
|
|
26
36
|
model_type: "GenAI_NanoGPT_v2",
|
|
27
|
-
vocab_size:
|
|
28
|
-
hidden_size:
|
|
29
|
-
num_hidden_layers:
|
|
30
|
-
num_attention_heads:
|
|
31
|
-
block_size:
|
|
32
|
-
mlpFactor:
|
|
33
|
-
loraConfig:
|
|
34
|
-
loraName:
|
|
35
|
-
windowSize:
|
|
36
|
-
}, n.file("config.json", JSON.stringify(
|
|
37
|
+
vocab_size: a.getVocab().length,
|
|
38
|
+
hidden_size: i.config.nEmbed,
|
|
39
|
+
num_hidden_layers: i.config.nLayer,
|
|
40
|
+
num_attention_heads: i.config.nHead,
|
|
41
|
+
block_size: i.config.blockSize,
|
|
42
|
+
mlpFactor: i.config.mlpFactor,
|
|
43
|
+
loraConfig: i.config.loraConfig ? Object.fromEntries(i.config.loraConfig) : void 0,
|
|
44
|
+
loraName: i.config.loraName,
|
|
45
|
+
windowSize: i.config.windowSize
|
|
46
|
+
}, n.file("config.json", JSON.stringify(r, void 0, 4), {
|
|
47
|
+
binary: !1
|
|
48
|
+
});
|
|
49
|
+
const l = {
|
|
50
|
+
version: m,
|
|
51
|
+
application: "@genai-fi/nanogpt",
|
|
52
|
+
meta: o?.metadata,
|
|
53
|
+
name: o?.name,
|
|
54
|
+
training: i.metaData?.training || void 0,
|
|
55
|
+
reference: i.metaData?.url || void 0,
|
|
56
|
+
phase: i.metaData?.phase || void 0,
|
|
57
|
+
pretrainingData: i.metaData?.pretrainingData || void 0,
|
|
58
|
+
pretrainingSettings: i.metaData?.pretrainingSettings || void 0,
|
|
59
|
+
generationSettings: i.metaData?.generationSettings || void 0,
|
|
60
|
+
actionLog: i.metaData?.actionLog || void 0
|
|
61
|
+
};
|
|
62
|
+
if (n.file("meta.json", JSON.stringify(l, void 0, 4), {
|
|
37
63
|
binary: !1
|
|
38
64
|
}), n.file(
|
|
39
|
-
"meta.json",
|
|
40
|
-
JSON.stringify(
|
|
41
|
-
{
|
|
42
|
-
version: b,
|
|
43
|
-
application: "@genai-fi/nanogpt",
|
|
44
|
-
meta: a?.metadata,
|
|
45
|
-
name: a?.name,
|
|
46
|
-
training: e.trainingState || void 0,
|
|
47
|
-
reference: e.metaData?.url || void 0,
|
|
48
|
-
phase: e.metaData?.phase || void 0
|
|
49
|
-
},
|
|
50
|
-
void 0,
|
|
51
|
-
4
|
|
52
|
-
),
|
|
53
|
-
{
|
|
54
|
-
binary: !1
|
|
55
|
-
}
|
|
56
|
-
), n.file(
|
|
57
65
|
"tokeniser.json",
|
|
58
66
|
JSON.stringify({
|
|
59
|
-
type:
|
|
60
|
-
vocab:
|
|
61
|
-
merges:
|
|
67
|
+
type: a instanceof b ? "char" : "bpe",
|
|
68
|
+
vocab: a.getVocab(),
|
|
69
|
+
merges: a.getMerges(),
|
|
70
|
+
datasetID: a.datasetID,
|
|
71
|
+
id: a.id
|
|
62
72
|
}),
|
|
63
73
|
{
|
|
64
74
|
binary: !1
|
|
65
75
|
}
|
|
66
|
-
),
|
|
67
|
-
for (const [
|
|
68
|
-
n.file(
|
|
76
|
+
), o?.files)
|
|
77
|
+
for (const [e, f] of Object.entries(o.files))
|
|
78
|
+
n.file(e, JSON.stringify(f), { binary: !1 });
|
|
69
79
|
return n.generateAsync({ type: "blob" });
|
|
70
80
|
}
|
|
71
81
|
export {
|
|
72
|
-
|
|
82
|
+
d as saveModel
|
|
73
83
|
};
|
package/dist/loader/types.d.ts
CHANGED
|
@@ -1,5 +1,9 @@
|
|
|
1
|
+
import { GenerateOptions } from '../inference/types';
|
|
1
2
|
import { LoRAConfig } from '../models/config';
|
|
2
|
-
import { TrainingState } from '../
|
|
3
|
+
import { default as Model, ModelForwardAttributes, TrainingState } from '../models/model';
|
|
4
|
+
import { ITokeniser } from '../tokeniser/type';
|
|
5
|
+
import { AdamWOptimizer } from '../training/AdamW';
|
|
6
|
+
import { TrainingLogEntry, TrainingOptions } from '../training/types';
|
|
3
7
|
export interface TransformersConfigBase {
|
|
4
8
|
model_type: 'GenAI_NanoGPT_v1' | 'GenAI_NanoGPT_v2';
|
|
5
9
|
vocab_size: number;
|
|
@@ -24,15 +28,40 @@ export interface TransformersTokeniser {
|
|
|
24
28
|
type: 'char' | 'bpe';
|
|
25
29
|
vocab: string[];
|
|
26
30
|
merges: [string, string][];
|
|
31
|
+
datasetID?: string;
|
|
32
|
+
id?: string;
|
|
27
33
|
}
|
|
28
34
|
export type ModelPhase = 'untrained' | 'pretrained' | 'finetuned';
|
|
35
|
+
export interface DatasetMetadata {
|
|
36
|
+
id: string;
|
|
37
|
+
name: string;
|
|
38
|
+
}
|
|
39
|
+
export interface ActionLogEntry {
|
|
40
|
+
action: 'pretrain' | 'generate' | 'finetune';
|
|
41
|
+
timestamp: number;
|
|
42
|
+
duration: number;
|
|
43
|
+
tokensProcessed: number;
|
|
44
|
+
options: TrainingOptions | GenerateOptions;
|
|
45
|
+
}
|
|
29
46
|
export interface TransformersMetadata {
|
|
30
47
|
name?: string;
|
|
31
48
|
version: number;
|
|
32
49
|
application: string;
|
|
33
50
|
training?: TrainingState;
|
|
34
51
|
reference?: string;
|
|
52
|
+
id?: string;
|
|
35
53
|
url?: string;
|
|
36
54
|
phase?: ModelPhase;
|
|
55
|
+
pretrainingData?: DatasetMetadata[];
|
|
56
|
+
pretrainingSettings?: TrainingOptions;
|
|
57
|
+
generationSettings?: GenerateOptions;
|
|
58
|
+
actionLog?: ActionLogEntry[];
|
|
37
59
|
[key: string]: unknown;
|
|
38
60
|
}
|
|
61
|
+
export interface LoadResult {
|
|
62
|
+
model: Model<ModelForwardAttributes>;
|
|
63
|
+
tokeniser: ITokeniser;
|
|
64
|
+
metaData: TransformersMetadata;
|
|
65
|
+
optimizer?: AdamWOptimizer;
|
|
66
|
+
log?: TrainingLogEntry[];
|
|
67
|
+
}
|
package/dist/main.d.ts
CHANGED
|
@@ -12,7 +12,9 @@ export { default as TeachableLLM } from './TeachableLLM';
|
|
|
12
12
|
export { default as CharTokeniser } from './tokeniser/CharTokeniser';
|
|
13
13
|
export { default as BPETokeniser } from './tokeniser/bpe';
|
|
14
14
|
export { default as waitForModel } from './utilities/waitForModel';
|
|
15
|
+
export { default as generateDatasetID } from './utilities/datasetID';
|
|
15
16
|
export { default as loadTextData } from './data/textLoader';
|
|
17
|
+
export type { DatasetMetadata } from './loader/types';
|
|
16
18
|
export { default as Generator, type IGenerator } from './Generator';
|
|
17
19
|
export { default as Evaluator } from './training/Evaluator';
|
|
18
20
|
export { default as Trainer } from './Trainer';
|
package/dist/main.js
CHANGED
|
@@ -1,21 +1,22 @@
|
|
|
1
|
-
import "./index-
|
|
2
|
-
import "./random_width-
|
|
3
|
-
import "./zeros_like-
|
|
1
|
+
import "./index-DSGwv2Yx.js";
|
|
2
|
+
import "./random_width-B_fVXhGx.js";
|
|
3
|
+
import "./zeros_like-rOHr54NY.js";
|
|
4
4
|
import { default as io } from "./Generator.js";
|
|
5
5
|
import "./index-Cp39cXWe.js";
|
|
6
|
-
import "./dataset-
|
|
6
|
+
import "./dataset-DlqAN81i.js";
|
|
7
7
|
import { default as fo } from "./models/NanoGPTV1.js";
|
|
8
8
|
import { default as lo } from "./TeachableLLM.js";
|
|
9
9
|
import { default as uo } from "./tokeniser/CharTokeniser.js";
|
|
10
10
|
import { default as ko } from "./tokeniser/bpe.js";
|
|
11
11
|
import { default as go } from "./utilities/waitForModel.js";
|
|
12
|
-
import { default as Co } from "./
|
|
13
|
-
import { default as Eo } from "./
|
|
14
|
-
import { default as Bo } from "./
|
|
15
|
-
import { default as vo } from "./
|
|
16
|
-
import {
|
|
17
|
-
import {
|
|
18
|
-
import {
|
|
12
|
+
import { default as Co } from "./utilities/datasetID.js";
|
|
13
|
+
import { default as Eo } from "./data/textLoader.js";
|
|
14
|
+
import { default as Bo } from "./training/Evaluator.js";
|
|
15
|
+
import { default as vo } from "./Trainer.js";
|
|
16
|
+
import { default as Do } from "./models/model.js";
|
|
17
|
+
import { estimateMemoryUsage as So, estimateParameterCount as Ao, estimateResources as Fo, estimateTrainingMemoryUsage as Go, validateConfig as Ro } from "./utilities/parameters.js";
|
|
18
|
+
import { default as wo } from "./utilities/topP.js";
|
|
19
|
+
import { Task as Io, tokensFromTasks as No } from "./training/tasks/Task.js";
|
|
19
20
|
import o from "./training/tasks/PretrainingTask.js";
|
|
20
21
|
import r from "./training/tasks/StartSentenceTask.js";
|
|
21
22
|
import t from "./training/tasks/ConversationTask.js";
|
|
@@ -31,15 +32,15 @@ import "./ops/webgl/qkv.js";
|
|
|
31
32
|
import "./ops/grads/qkv.js";
|
|
32
33
|
import "./ops/cpu/rope.js";
|
|
33
34
|
import "./ops/webgl/rope.js";
|
|
34
|
-
import "./rope-
|
|
35
|
+
import "./rope-CC5RjmKU.js";
|
|
35
36
|
import "./ops/cpu/appendCache.js";
|
|
36
37
|
import "./ops/webgl/appendCache.js";
|
|
37
38
|
import "./ops/cpu/matMulGelu.js";
|
|
38
|
-
import "./matMulGelu-
|
|
39
|
+
import "./matMulGelu-CsZnh18H.js";
|
|
39
40
|
import "./ops/grads/matMulGelu.js";
|
|
40
41
|
import "./ops/cpu/gelu.js";
|
|
41
42
|
import "./ops/webgl/gelu.js";
|
|
42
|
-
import "./gelu-
|
|
43
|
+
import "./gelu-DpTCC3eB.js";
|
|
43
44
|
import "./ops/cpu/normRMS.js";
|
|
44
45
|
import "./ops/webgl/normRMS.js";
|
|
45
46
|
import "./ops/grads/normRMS.js";
|
|
@@ -48,21 +49,21 @@ import "./ops/cpu/adamMoments.js";
|
|
|
48
49
|
import "./ops/webgl/adamMoments.js";
|
|
49
50
|
import "./ops/cpu/adamAdjust.js";
|
|
50
51
|
import "./ops/webgl/adamAdjust.js";
|
|
51
|
-
import { u as e, p as m } from "./pack16-
|
|
52
|
+
import { u as e, p as m } from "./pack16-WlOSOuZA.js";
|
|
52
53
|
import "./ops/grads/softmax16.js";
|
|
53
|
-
import "./matMul16-
|
|
54
|
+
import "./matMul16-BIT70Vya.js";
|
|
54
55
|
import "./ops/webgl/matMul16.js";
|
|
55
56
|
import "./ops/cpu/matMul16.js";
|
|
56
57
|
import "./ops/transpose16.js";
|
|
57
|
-
import { selectBackend as
|
|
58
|
-
import { default as
|
|
59
|
-
import
|
|
60
|
-
import
|
|
58
|
+
import { selectBackend as Wo } from "./backend.js";
|
|
59
|
+
import { default as qo } from "./utilities/performance.js";
|
|
60
|
+
import a from "./layers/CausalSelfAttention.js";
|
|
61
|
+
import p from "./layers/MLP.js";
|
|
61
62
|
import i from "./layers/TransformerBlock.js";
|
|
62
63
|
import s from "./layers/RoPECache.js";
|
|
63
|
-
import { AdamWOptimizer as
|
|
64
|
-
import { default as
|
|
65
|
-
import { sentenceEmbeddings as
|
|
64
|
+
import { AdamWOptimizer as Jo } from "./training/AdamW.js";
|
|
65
|
+
import { default as Qo } from "./checks/index.js";
|
|
66
|
+
import { sentenceEmbeddings as Xo, sentenceEmbeddingsTensor as Yo } from "./utilities/sentences.js";
|
|
66
67
|
const to = {
|
|
67
68
|
PretrainingTask: o,
|
|
68
69
|
StartSentenceTask: r,
|
|
@@ -71,37 +72,38 @@ const to = {
|
|
|
71
72
|
pack16: m,
|
|
72
73
|
unpack16: e
|
|
73
74
|
}, mo = {
|
|
74
|
-
CausalSelfAttention:
|
|
75
|
-
MLP:
|
|
75
|
+
CausalSelfAttention: a,
|
|
76
|
+
MLP: p,
|
|
76
77
|
TransformerBlock: i,
|
|
77
78
|
RoPECache: s
|
|
78
79
|
};
|
|
79
80
|
export {
|
|
80
|
-
|
|
81
|
+
Jo as AdamWOptimizer,
|
|
81
82
|
ko as BPETokeniser,
|
|
82
83
|
uo as CharTokeniser,
|
|
83
|
-
|
|
84
|
+
Bo as Evaluator,
|
|
84
85
|
io as Generator,
|
|
85
|
-
|
|
86
|
+
Do as Model,
|
|
86
87
|
fo as NanoGPT,
|
|
87
|
-
|
|
88
|
+
Io as Task,
|
|
88
89
|
lo as TeachableLLM,
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
90
|
+
vo as Trainer,
|
|
91
|
+
Qo as checks,
|
|
92
|
+
So as estimateMemoryUsage,
|
|
93
|
+
Ao as estimateParameterCount,
|
|
94
|
+
Fo as estimateResources,
|
|
95
|
+
Go as estimateTrainingMemoryUsage,
|
|
96
|
+
Co as generateDatasetID,
|
|
95
97
|
mo as layers,
|
|
96
|
-
|
|
98
|
+
Eo as loadTextData,
|
|
97
99
|
eo as ops,
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
100
|
+
qo as performanceTest,
|
|
101
|
+
Wo as selectBackend,
|
|
102
|
+
Xo as sentenceEmbeddings,
|
|
103
|
+
Yo as sentenceEmbeddingsTensor,
|
|
102
104
|
to as tasks,
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
105
|
+
No as tokensFromTasks,
|
|
106
|
+
wo as topP,
|
|
107
|
+
Ro as validateConfig,
|
|
106
108
|
go as waitForModel
|
|
107
109
|
};
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { c as y, e as h } from "./index-DSGwv2Yx.js";
|
|
2
2
|
import "./ops/webgl/matMul16.js";
|
|
3
3
|
import "./ops/cpu/matMul16.js";
|
|
4
4
|
import { isPackedTensor as v } from "./utilities/packed.js";
|
|
5
|
-
import { p as g } from "./pack16-
|
|
6
|
-
import { d as k } from "./gelu-
|
|
5
|
+
import { p as g } from "./pack16-WlOSOuZA.js";
|
|
6
|
+
import { d as k } from "./gelu-DpTCC3eB.js";
|
|
7
7
|
import { transpose16 as S } from "./ops/transpose16.js";
|
|
8
8
|
import { reshape16 as w } from "./ops/reshape16.js";
|
|
9
9
|
import { mul16 as D } from "./ops/mul16.js";
|