@genai-fi/nanogpt 0.6.2 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Generator.js +11 -11
- package/dist/NanoGPTModel.d.ts +2 -2
- package/dist/NanoGPTModel.js +104 -136
- package/dist/{RealDiv-BYViZwhN.js → RealDiv-C4hOvYOZ.js} +26 -25
- package/dist/{Reshape-t7Kcikjk.js → Reshape-BLijOA8h.js} +5 -5
- package/dist/TeachableLLM.d.ts +3 -0
- package/dist/TeachableLLM.js +50 -47
- package/dist/{TiedEmbedding-9WeDwvjO.js → TiedEmbedding-BLltddza.js} +4 -4
- package/dist/{axis_util-Bu4h7XWV.js → axis_util-DaAl5MER.js} +3 -3
- package/dist/backend.d.ts +1 -0
- package/dist/backend.js +7 -0
- package/dist/backend_util-DWiwsi2N.js +749 -0
- package/dist/{broadcast_to-DARN-DBD.js → broadcast_to-C4v-j9yA.js} +2 -2
- package/dist/{concat-5aPGqw3Z.js → concat-CsHeR4zV.js} +8 -8
- package/dist/{dataset-pgqp-YfL.js → dataset-JDyjG3QR.js} +3 -3
- package/dist/{dropout-Bciw46HT.js → dropout-hpDwECTe.js} +7 -7
- package/dist/{gather-DjyCjmOD.js → gather-D0_gPiBz.js} +4 -4
- package/dist/gelu-uyHP1x1f.js +26 -0
- package/dist/gpgpu_math-DJm3ZTAf.js +2371 -0
- package/dist/index-BPPzKVdR.js +12099 -0
- package/dist/{index-BAzbokzv.js → index-C0dhsYom.js} +405 -389
- package/dist/{kernel_funcs_utils-CUxJCg0g.js → kernel_funcs_utils-CwRTFqrc.js} +31 -30
- package/dist/layers/BaseLayer.js +2 -2
- package/dist/layers/CausalSelfAttention.js +6 -6
- package/dist/layers/MLP.js +5 -5
- package/dist/layers/RMSNorm.js +3 -3
- package/dist/layers/RoPECache.js +4 -4
- package/dist/layers/TiedEmbedding.js +5 -5
- package/dist/layers/TransformerBlock.js +1 -1
- package/dist/loader/loadTransformers.js +1 -1
- package/dist/loader/oldZipLoad.js +5 -5
- package/dist/{log_sum_exp-YEo2h3gb.js → log_sum_exp-D086OgZJ.js} +15 -15
- package/dist/main.d.ts +2 -0
- package/dist/main.js +9 -5
- package/dist/{mat_mul-7121rsJk.js → mat_mul-1nwdPkQ_.js} +4 -4
- package/dist/{max-DtlIuVeW.js → max-BQc2Aj-I.js} +4 -4
- package/dist/{mulmat_packed_gpu-D4nKF7Je.js → mulmat_packed_gpu-Gzf3I9UV.js} +1 -1
- package/dist/non_max_suppression_impl-CsEgBuMA.js +134 -0
- package/dist/{ones-BBlSRqn1.js → ones-D63HpSF_.js} +2 -2
- package/dist/ops/appendCache.js +3 -3
- package/dist/ops/attentionMask.js +1 -1
- package/dist/ops/cpu/appendCache.js +8 -8
- package/dist/ops/cpu/attentionMask.js +9 -9
- package/dist/ops/cpu/fusedSoftmax.js +17 -11
- package/dist/ops/cpu/gatherSub.js +7 -7
- package/dist/ops/cpu/gelu.js +13 -13
- package/dist/ops/cpu/matMulGelu.js +36 -24
- package/dist/ops/cpu/matMulMul.js +14 -8
- package/dist/ops/cpu/mulDropout.js +9 -3
- package/dist/ops/cpu/normRMS.js +5 -5
- package/dist/ops/cpu/qkv.js +3 -3
- package/dist/ops/cpu/rope.js +5 -5
- package/dist/ops/cpu/scatterSub.js +11 -11
- package/dist/ops/fusedSoftmax.js +1 -1
- package/dist/ops/gatherSub.js +1 -1
- package/dist/ops/gelu.js +2 -2
- package/dist/ops/grads/attentionMask.js +1 -1
- package/dist/ops/grads/fusedSoftmax.js +2 -2
- package/dist/ops/grads/gelu.js +3 -24
- package/dist/ops/grads/matMulGelu.js +5 -5
- package/dist/ops/grads/normRMS.js +6 -6
- package/dist/ops/grads/qkv.js +1 -1
- package/dist/ops/grads/rope.js +3 -3
- package/dist/ops/matMulGelu.js +1 -1
- package/dist/ops/matMulMul.js +1 -1
- package/dist/ops/mulDrop.js +1 -1
- package/dist/ops/normRMS.js +1 -1
- package/dist/ops/qkv.js +1 -1
- package/dist/ops/rope.js +4 -4
- package/dist/ops/scatterSub.js +1 -1
- package/dist/ops/webgl/appendCache.js +1 -1
- package/dist/ops/webgl/attentionMask.js +1 -1
- package/dist/ops/webgl/fusedSoftmax.js +4 -4
- package/dist/ops/webgl/gatherSub.js +1 -1
- package/dist/ops/webgl/gelu.js +2 -2
- package/dist/ops/webgl/log.js +5 -5
- package/dist/ops/webgl/matMulGelu.js +17 -17
- package/dist/ops/webgl/matMulMul.js +1 -1
- package/dist/ops/webgl/mulDropout.js +4 -4
- package/dist/ops/webgl/normRMS.js +2 -2
- package/dist/ops/webgl/qkv.js +1 -1
- package/dist/ops/webgl/rope.js +1 -1
- package/dist/ops/webgl/scatterSub.js +1 -1
- package/dist/ops/webgpu/appendCache.js +56 -0
- package/dist/ops/webgpu/attentionMask.d.ts +1 -0
- package/dist/ops/webgpu/attentionMask.js +64 -0
- package/dist/ops/webgpu/gatherSub.d.ts +1 -0
- package/dist/ops/webgpu/gatherSub.js +37 -0
- package/dist/ops/webgpu/gelu.d.ts +14 -0
- package/dist/ops/webgpu/gelu.js +86 -0
- package/dist/ops/webgpu/index.d.ts +0 -0
- package/dist/ops/webgpu/index.js +8 -0
- package/dist/ops/webgpu/normRMS.d.ts +1 -0
- package/dist/ops/webgpu/normRMS.js +115 -0
- package/dist/ops/webgpu/qkv.d.ts +1 -0
- package/dist/ops/webgpu/qkv.js +56 -0
- package/dist/ops/webgpu/rope.d.ts +1 -0
- package/dist/ops/webgpu/rope.js +68 -0
- package/dist/ops/webgpu/scatterSub.d.ts +1 -0
- package/dist/ops/webgpu/scatterSub.js +37 -0
- package/dist/{ops-C0sQEcPw.js → ops-CIQLNshk.js} +452 -503
- package/dist/{random_width-DWzaOgrn.js → random_width-DkYP8W8N.js} +143 -144
- package/dist/{range-DYsrnfiy.js → range-CYzpQY53.js} +1 -1
- package/dist/{reciprocal-CJQeasVa.js → reciprocal-_A9yv27J.js} +1 -1
- package/dist/{register_all_kernels-BfFCQAqs.js → register_all_kernels-guvSxp7M.js} +202 -200
- package/dist/{reshape-krWGKraP.js → reshape-BMUzc1UY.js} +3 -3
- package/dist/{scatter_nd_util-93ln7Hut.js → scatter_nd_util-IRBqKz_b.js} +3 -3
- package/dist/{selu_util-sntGesxr.js → selu_util-Dt_iuXaq.js} +6 -6
- package/dist/shared-BNa2q6jD.js +69 -0
- package/dist/{shared-Ca6iDobD.js → shared-CDu9S76h.js} +541 -606
- package/dist/{sin-D_h-qCSx.js → sin-Cocju-BY.js} +6 -6
- package/dist/{softmax-fsdtf6JC.js → softmax-GPNK3o-U.js} +3 -3
- package/dist/{split-eiktj-6L.js → split-CHzJjxDv.js} +4 -4
- package/dist/{stack-dfEEz2OY.js → stack-Dpgg_1W1.js} +2 -2
- package/dist/{sum-BE_Irnim.js → sum-B8wEpKsg.js} +5 -5
- package/dist/{tensor-Xyi595sG.js → tensor-RvZVNmg0.js} +1 -1
- package/dist/{tensor2d-CPEkynbH.js → tensor2d-B_kyod7_.js} +1 -1
- package/dist/training/AdamExt.js +1 -1
- package/dist/training/DatasetBuilder.js +2 -2
- package/dist/training/Evaluator.js +1 -1
- package/dist/training/FullTrainer.js +20 -20
- package/dist/training/Trainer.d.ts +5 -6
- package/dist/training/Trainer.js +59 -60
- package/dist/training/sparseCrossEntropy.js +19 -26
- package/dist/utilities/dummy.js +19 -19
- package/dist/utilities/generate.js +15 -16
- package/dist/utilities/multinomialCPU.d.ts +2 -0
- package/dist/utilities/multinomialCPU.js +13 -0
- package/dist/utilities/performance.d.ts +2 -0
- package/dist/utilities/performance.js +16 -0
- package/dist/utilities/profile.d.ts +1 -0
- package/dist/utilities/profile.js +9 -6
- package/dist/utilities/safetensors.js +2 -2
- package/dist/utilities/weights.js +2 -2
- package/dist/{variable-wSS22xj5.js → variable-DXEUOwew.js} +1 -1
- package/dist/webgpu_util-g13LvDIv.js +625 -0
- package/dist/{zeros-YJDE7oRb.js → zeros-DCPCdFGq.js} +8 -8
- package/package.json +2 -1
- package/dist/gpgpu_math-CNslybmD.js +0 -3115
- package/dist/norm-CzltS9Fz.js +0 -86
- package/dist/ops/node/sparseCrossEntropy.js +0 -11
- /package/dist/ops/{node/sparseCrossEntropy.d.ts → webgpu/appendCache.d.ts} +0 -0
package/dist/TeachableLLM.js
CHANGED
|
@@ -1,17 +1,17 @@
|
|
|
1
|
-
import { defaultConfig as
|
|
2
|
-
import
|
|
3
|
-
import { saveModel as
|
|
4
|
-
import { loadModel as
|
|
5
|
-
import
|
|
6
|
-
import
|
|
7
|
-
import { E as
|
|
1
|
+
import { defaultConfig as _ } from "./config.js";
|
|
2
|
+
import f from "./NanoGPTModel.js";
|
|
3
|
+
import { saveModel as u } from "./utilities/save.js";
|
|
4
|
+
import { loadModel as d } from "./loader/load.js";
|
|
5
|
+
import l from "./Generator.js";
|
|
6
|
+
import p from "./Trainer.js";
|
|
7
|
+
import { E as g } from "./index-Dwqa6Zy2.js";
|
|
8
8
|
import { dummyPassTrainAsync as m } from "./utilities/dummy.js";
|
|
9
|
-
import
|
|
10
|
-
import
|
|
9
|
+
import c from "./tokeniser/CharTokeniser.js";
|
|
10
|
+
import k from "./tokeniser/bpe.js";
|
|
11
11
|
import "./papaparse.min-C8l2Kvo1.js";
|
|
12
12
|
import "./index-Tf7vU29b.js";
|
|
13
13
|
import "./jszip.min-CjP2V1VV.js";
|
|
14
|
-
import "./index-
|
|
14
|
+
import "./index-C0dhsYom.js";
|
|
15
15
|
import "./ops/cpu/scatterSub.js";
|
|
16
16
|
import "./ops/webgl/scatterSub.js";
|
|
17
17
|
import "./ops/cpu/gatherSub.js";
|
|
@@ -22,9 +22,9 @@ import "./ops/grads/attentionMask.js";
|
|
|
22
22
|
import "./ops/cpu/qkv.js";
|
|
23
23
|
import "./ops/webgl/qkv.js";
|
|
24
24
|
import "./ops/grads/qkv.js";
|
|
25
|
-
import "./random_width-
|
|
26
|
-
import "./register_all_kernels-
|
|
27
|
-
import "./dataset-
|
|
25
|
+
import "./random_width-DkYP8W8N.js";
|
|
26
|
+
import "./register_all_kernels-guvSxp7M.js";
|
|
27
|
+
import "./dataset-JDyjG3QR.js";
|
|
28
28
|
import "./ops/cpu/rope.js";
|
|
29
29
|
import "./ops/webgl/rope.js";
|
|
30
30
|
import "./ops/grads/rope.js";
|
|
@@ -38,14 +38,14 @@ import "./ops/webgl/matMulGelu.js";
|
|
|
38
38
|
import "./ops/grads/matMulGelu.js";
|
|
39
39
|
import "./ops/cpu/gelu.js";
|
|
40
40
|
import "./ops/webgl/gelu.js";
|
|
41
|
-
import "./
|
|
41
|
+
import "./gelu-uyHP1x1f.js";
|
|
42
42
|
import "./ops/cpu/normRMS.js";
|
|
43
43
|
import "./ops/webgl/normRMS.js";
|
|
44
44
|
import "./ops/grads/normRMS.js";
|
|
45
45
|
import "./ops/webgl/log.js";
|
|
46
|
-
import
|
|
46
|
+
import w from "./utilities/profile.js";
|
|
47
47
|
class a {
|
|
48
|
-
ee = new
|
|
48
|
+
ee = new g();
|
|
49
49
|
_config;
|
|
50
50
|
_model;
|
|
51
51
|
_tokeniser;
|
|
@@ -58,66 +58,71 @@ class a {
|
|
|
58
58
|
get vocab() {
|
|
59
59
|
return this._tokeniser?.getVocab() || [];
|
|
60
60
|
}
|
|
61
|
+
/** Model is fully loaded */
|
|
61
62
|
get loaded() {
|
|
62
63
|
return !!this._model && !!this._tokeniser && !!this._config;
|
|
63
64
|
}
|
|
64
65
|
get config() {
|
|
65
66
|
if (!this._config)
|
|
66
|
-
throw new Error("
|
|
67
|
+
throw new Error("configuration_not_initialized.");
|
|
67
68
|
return this._config.gpt;
|
|
68
69
|
}
|
|
69
70
|
get model() {
|
|
70
71
|
if (!this._model)
|
|
71
|
-
throw new Error("
|
|
72
|
+
throw new Error("model_not_initialized.");
|
|
72
73
|
return this._model;
|
|
73
74
|
}
|
|
74
75
|
get tokeniser() {
|
|
75
76
|
if (!this._tokeniser)
|
|
76
|
-
throw new Error("
|
|
77
|
+
throw new Error("tokeniser_not_initialized.");
|
|
77
78
|
return this._tokeniser;
|
|
78
79
|
}
|
|
79
80
|
get status() {
|
|
80
81
|
return this._status;
|
|
81
82
|
}
|
|
83
|
+
/** Model is both ready and not busy */
|
|
82
84
|
get ready() {
|
|
83
|
-
return this._status === "ready" && !!this._model && !!this._tokeniser
|
|
85
|
+
return this._status === "ready" && !!this._model && !!this._tokeniser;
|
|
86
|
+
}
|
|
87
|
+
get busy() {
|
|
88
|
+
return this._status === "busy" || this._status === "training";
|
|
84
89
|
}
|
|
85
90
|
estimateTrainingMemoryUsage(t) {
|
|
86
|
-
const e = this._memoryRequirements ?? { perBatch: 0, gradients: 0 },
|
|
87
|
-
return
|
|
91
|
+
const e = this._memoryRequirements ?? { perBatch: 0, gradients: 0 }, i = e.perBatch * t, o = e.gradients;
|
|
92
|
+
return i * 0.66 + o * 4;
|
|
88
93
|
}
|
|
89
94
|
setStatus(t) {
|
|
90
95
|
this._status !== t && (this._status = t, this.ee.emit("status", t));
|
|
91
96
|
}
|
|
92
97
|
saveModel(t) {
|
|
93
98
|
if (!this._model || !this._tokeniser)
|
|
94
|
-
throw new Error("
|
|
95
|
-
return
|
|
99
|
+
throw new Error("model_or_tokeniser_not_initialized.");
|
|
100
|
+
return u(this._model, this._tokeniser, {
|
|
96
101
|
...t,
|
|
97
102
|
name: t?.name || this.meta.name
|
|
98
103
|
});
|
|
99
104
|
}
|
|
100
105
|
static loadModel(t) {
|
|
101
106
|
const e = new a();
|
|
102
|
-
return
|
|
103
|
-
e._model =
|
|
104
|
-
e._memoryRequirements =
|
|
105
|
-
}).catch((
|
|
106
|
-
e.setStatus("error"), e.ee.emit("error",
|
|
107
|
+
return d(t).then(({ model: i, tokeniser: o, name: s }) => {
|
|
108
|
+
e._model = i, e._tokeniser = o, e._config = i.config, s && (e.meta.name = s), e.setStatus("warmup"), m(i).then((r) => {
|
|
109
|
+
e._memoryRequirements = r, e.setStatus("ready"), e.ee.emit("loaded");
|
|
110
|
+
}).catch((r) => {
|
|
111
|
+
e.setStatus("error"), e.ee.emit("error", r);
|
|
107
112
|
});
|
|
108
|
-
}).catch((
|
|
109
|
-
e.setStatus("error"), e.ee.emit("error",
|
|
113
|
+
}).catch((i) => {
|
|
114
|
+
e.setStatus("error"), e.ee.emit("error", i);
|
|
110
115
|
}), e;
|
|
111
116
|
}
|
|
112
117
|
static create(t, e = {}) {
|
|
113
|
-
const
|
|
114
|
-
return
|
|
115
|
-
|
|
116
|
-
h === "trained" &&
|
|
118
|
+
const i = { ..._, ...e }, o = t === "char" ? new c(i.vocabSize) : new k(i.vocabSize), s = new f(i), r = new a(o, s);
|
|
119
|
+
return r.setStatus("warmup"), m(s).then((n) => {
|
|
120
|
+
r._memoryRequirements = n, r.tokeniser.trained ? (r.setStatus("ready"), r.ee.emit("loaded")) : (r.setStatus("awaitingTokens"), r.ee.emit("loaded"), r.tokeniser.once("trainStatus", (h) => {
|
|
121
|
+
h === "trained" && r.setStatus("ready");
|
|
117
122
|
}));
|
|
118
123
|
}).catch((n) => {
|
|
119
|
-
|
|
120
|
-
}),
|
|
124
|
+
r.setStatus("error"), r.ee.emit("error", n);
|
|
125
|
+
}), r;
|
|
121
126
|
}
|
|
122
127
|
getProfiler() {
|
|
123
128
|
return this._model?.getProfiler();
|
|
@@ -128,24 +133,22 @@ class a {
|
|
|
128
133
|
set enableProfiler(t) {
|
|
129
134
|
if (t) {
|
|
130
135
|
if (!this._config)
|
|
131
|
-
|
|
132
|
-
this._config.layerConfig.profiler || (this._config.layerConfig.profiler = new
|
|
136
|
+
return;
|
|
137
|
+
this._config.layerConfig.profiler || (this._config.layerConfig.profiler = new w());
|
|
133
138
|
} else
|
|
134
139
|
this._config?.layerConfig.profiler && (this._config.layerConfig.profiler = void 0);
|
|
135
140
|
}
|
|
136
141
|
getNumParams() {
|
|
137
|
-
|
|
138
|
-
throw new Error("Model is not initialized.");
|
|
139
|
-
return this._model.getNumParams();
|
|
142
|
+
return this._model ? this._model.getNumParams() : 0;
|
|
140
143
|
}
|
|
141
144
|
trainer() {
|
|
142
145
|
if (!this._model || !this._tokeniser)
|
|
143
|
-
throw new Error("
|
|
144
|
-
const t = new
|
|
145
|
-
return t.on("start", () => this.setStatus("training")), t.on("stop", () => this.setStatus("ready")), t.on("log", async (e,
|
|
146
|
+
throw new Error("model_or_tokeniser_not_initialized.");
|
|
147
|
+
const t = new p(this._model, this._tokeniser);
|
|
148
|
+
return t.on("start", () => this.setStatus("training")), t.on("stop", () => this.setStatus("ready")), t.on("log", async (e, i) => {
|
|
146
149
|
const o = this.ee.listeners("trainStep");
|
|
147
150
|
for (const s of o)
|
|
148
|
-
await s(e,
|
|
151
|
+
await s(e, i);
|
|
149
152
|
}), t;
|
|
150
153
|
}
|
|
151
154
|
train(t, e) {
|
|
@@ -160,7 +163,7 @@ class a {
|
|
|
160
163
|
generator() {
|
|
161
164
|
if (!this._model || !this._tokeniser)
|
|
162
165
|
throw new Error("model_or_tokeniser_not_initialized.");
|
|
163
|
-
const t = new
|
|
166
|
+
const t = new l(this._model, this._tokeniser);
|
|
164
167
|
return t.on("start", () => {
|
|
165
168
|
this.status === "ready" && this.setStatus("busy");
|
|
166
169
|
}), t.on("stop", () => {
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import { R as a, d as s } from "./random_width-
|
|
2
|
-
import "./index-
|
|
1
|
+
import { R as a, d as s } from "./random_width-DkYP8W8N.js";
|
|
2
|
+
import "./index-C0dhsYom.js";
|
|
3
3
|
import o from "./layers/BaseLayer.js";
|
|
4
|
-
import { v as m } from "./variable-
|
|
5
|
-
import { g as d } from "./gather-
|
|
4
|
+
import { v as m } from "./variable-DXEUOwew.js";
|
|
5
|
+
import { g as d } from "./gather-D0_gPiBz.js";
|
|
6
6
|
/**
|
|
7
7
|
* @license
|
|
8
8
|
* Copyright 2018 Google LLC
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { l as c } from "./index-C0dhsYom.js";
|
|
2
2
|
/**
|
|
3
3
|
* @license
|
|
4
4
|
* Copyright 2017 Google LLC. All Rights Reserved.
|
|
@@ -28,7 +28,7 @@ function a(e, n, t) {
|
|
|
28
28
|
t.indexOf(u) === -1 ? s.push(e[o++]) : s.push(n[f++]);
|
|
29
29
|
return s;
|
|
30
30
|
}
|
|
31
|
-
function
|
|
31
|
+
function p(e, n) {
|
|
32
32
|
const t = [], r = e.length;
|
|
33
33
|
for (let o = 0; o < r; o++)
|
|
34
34
|
n.indexOf(o) === -1 && t.push(e[o]);
|
|
@@ -62,7 +62,7 @@ function x(e, n) {
|
|
|
62
62
|
export {
|
|
63
63
|
x as a,
|
|
64
64
|
m as b,
|
|
65
|
-
|
|
65
|
+
p as c,
|
|
66
66
|
i as d,
|
|
67
67
|
h as e,
|
|
68
68
|
a as f,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function selectBackend(backendName: 'cpu' | 'webgl' | 'webgpu'): Promise<void>;
|
package/dist/backend.js
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import { g as a, s as i, r as o } from "./index-C0dhsYom.js";
|
|
2
|
+
async function e(t) {
|
|
3
|
+
a() !== t && (t === "webgpu" && (await import("./index-BPPzKVdR.js"), await import("./ops/webgpu/index.js")), await i(t), await o(), console.log(`Backend set to ${t}`));
|
|
4
|
+
}
|
|
5
|
+
export {
|
|
6
|
+
e as selectBackend
|
|
7
|
+
};
|