@genai-fi/nanogpt 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/NanoGPTModel.js +78 -88
- package/dist/TeachableLLM.d.ts +13 -2
- package/dist/TeachableLLM.js +48 -23
- package/dist/{exports_layers-7idKoYqh.js → exports_layers-tbTBcwMM.js} +1 -1
- package/dist/layers/CausalSelfAttention.js +2 -2
- package/dist/layers/MLP.js +2 -2
- package/dist/layers/TiedEmbedding.js +1 -1
- package/dist/main.d.ts +1 -0
- package/dist/{random_width-PbCt7RXv.js → random_width-oeUIlUZj.js} +0 -2
- package/dist/tokeniser/bpe.js +3 -3
- package/package.json +1 -1
package/dist/NanoGPTModel.js
CHANGED
|
@@ -5,14 +5,14 @@ import C from "./layers/RoPECache.js";
|
|
|
5
5
|
import q from "./layers/RMSNorm.js";
|
|
6
6
|
import { estimateParameterCount as K } from "./utilities/parameters.js";
|
|
7
7
|
import { createSoftmaxCrossEntropyWithGrad as N } from "./training/sparseCrossEntropy.js";
|
|
8
|
-
import
|
|
9
|
-
import { r as
|
|
10
|
-
import { o as
|
|
11
|
-
import { e as
|
|
12
|
-
import { r as
|
|
13
|
-
import { r as
|
|
14
|
-
import { g as
|
|
15
|
-
import { s as
|
|
8
|
+
import T from "./layers/BaseLayer.js";
|
|
9
|
+
import { r as R, e as D, p as A } from "./random_width-oeUIlUZj.js";
|
|
10
|
+
import { o as y, h as E, p as B, E as z, W as G, X as O, Y as Q, t as w, Z as X, f as _ } from "./index-pWA4_lUh.js";
|
|
11
|
+
import { e as j, a as U } from "./exports_layers-tbTBcwMM.js";
|
|
12
|
+
import { r as S } from "./reshape-C8CR_Bad.js";
|
|
13
|
+
import { r as V } from "./range-CcDl05lo.js";
|
|
14
|
+
import { g as Y } from "./gather-BPGW8RsB.js";
|
|
15
|
+
import { s as Z } from "./softmax-Be_lsqUc.js";
|
|
16
16
|
/**
|
|
17
17
|
* @license
|
|
18
18
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
@@ -29,13 +29,13 @@ import { s as J } from "./softmax-Be_lsqUc.js";
|
|
|
29
29
|
* limitations under the License.
|
|
30
30
|
* =============================================================================
|
|
31
31
|
*/
|
|
32
|
-
function
|
|
33
|
-
let e = E(m, "a", "mod"),
|
|
34
|
-
[e,
|
|
35
|
-
const i = { a: e, b:
|
|
36
|
-
return
|
|
32
|
+
function H(m, t) {
|
|
33
|
+
let e = E(m, "a", "mod"), o = E(t, "b", "mod");
|
|
34
|
+
[e, o] = B(e, o);
|
|
35
|
+
const i = { a: e, b: o };
|
|
36
|
+
return z.runKernel(G, i);
|
|
37
37
|
}
|
|
38
|
-
const
|
|
38
|
+
const J = /* @__PURE__ */ y({ mod_: H });
|
|
39
39
|
/**
|
|
40
40
|
* @license
|
|
41
41
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
@@ -52,17 +52,17 @@ const et = /* @__PURE__ */ v({ mod_: tt });
|
|
|
52
52
|
* limitations under the License.
|
|
53
53
|
* =============================================================================
|
|
54
54
|
*/
|
|
55
|
-
function
|
|
56
|
-
const i = E(m, "logits", "multinomial"),
|
|
57
|
-
if (
|
|
58
|
-
throw new Error(`Error in multinomial: you need at least 2 outcomes, but got ${
|
|
59
|
-
if (
|
|
60
|
-
throw new Error(`Rank of probabilities must be 1 or 2, but is ${
|
|
55
|
+
function tt(m, t, e, o = !1) {
|
|
56
|
+
const i = E(m, "logits", "multinomial"), s = i.size, r = i.rank;
|
|
57
|
+
if (s < 2)
|
|
58
|
+
throw new Error(`Error in multinomial: you need at least 2 outcomes, but got ${s}.`);
|
|
59
|
+
if (r > 2)
|
|
60
|
+
throw new Error(`Rank of probabilities must be 1 or 2, but is ${r}`);
|
|
61
61
|
e = e || Math.random();
|
|
62
|
-
const
|
|
63
|
-
return
|
|
62
|
+
const n = { logits: r === 1 ? S(i, [1, -1]) : i }, h = { numSamples: t, seed: e, normalized: o }, a = z.runKernel(O, n, h);
|
|
63
|
+
return r === 1 ? S(a, [a.size]) : a;
|
|
64
64
|
}
|
|
65
|
-
const
|
|
65
|
+
const I = /* @__PURE__ */ y({ multinomial_: tt });
|
|
66
66
|
/**
|
|
67
67
|
* @license
|
|
68
68
|
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
@@ -79,20 +79,20 @@ const S = /* @__PURE__ */ v({ multinomial_: ot });
|
|
|
79
79
|
* limitations under the License.
|
|
80
80
|
* =============================================================================
|
|
81
81
|
*/
|
|
82
|
-
function
|
|
83
|
-
const
|
|
84
|
-
if (
|
|
82
|
+
function et(m, t = 1, e = !0) {
|
|
83
|
+
const o = E(m, "x", "topk");
|
|
84
|
+
if (o.rank === 0)
|
|
85
85
|
throw new Error("topk() expects the input to be of rank 1 or higher");
|
|
86
|
-
const i =
|
|
86
|
+
const i = o.shape[o.shape.length - 1];
|
|
87
87
|
if (t < 0)
|
|
88
88
|
throw new Error(`'k' passed to topk() must be >= 0 but got ${t}`);
|
|
89
89
|
if (t > i)
|
|
90
90
|
throw new Error(`'k' passed to topk() must be <= the last dimension (${i}) but got ${t}`);
|
|
91
|
-
const
|
|
92
|
-
return { values: l, indices:
|
|
91
|
+
const s = { x: o }, r = { k: t, sorted: e }, [l, n] = z.runKernel(Q, s, r);
|
|
92
|
+
return { values: l, indices: n };
|
|
93
93
|
}
|
|
94
|
-
const
|
|
95
|
-
class
|
|
94
|
+
const ot = /* @__PURE__ */ y({ topk_: et });
|
|
95
|
+
class kt extends T {
|
|
96
96
|
config;
|
|
97
97
|
wte;
|
|
98
98
|
// Token embeddings
|
|
@@ -111,12 +111,12 @@ class Et extends R {
|
|
|
111
111
|
vocabSize: this.config.vocabSize,
|
|
112
112
|
embedDim: this.config.nEmbed,
|
|
113
113
|
name: "token_embedding"
|
|
114
|
-
}), this.config.useRope === !1 ? this.wpe =
|
|
114
|
+
}), this.config.useRope === !1 ? this.wpe = j({
|
|
115
115
|
inputDim: this.config.blockSize,
|
|
116
116
|
outputDim: this.config.nEmbed,
|
|
117
117
|
name: "positional_embedding",
|
|
118
|
-
embeddingsInitializer:
|
|
119
|
-
}) : this.ropeCache = new C(this.config), this.drop =
|
|
118
|
+
embeddingsInitializer: R({ mean: 0, stddev: 0.02 })
|
|
119
|
+
}) : this.ropeCache = new C(this.config), this.drop = U({ rate: this.config.dropout }), this.blocks = [];
|
|
120
120
|
for (let e = 0; e < this.config.nLayer; e++)
|
|
121
121
|
this.blocks.push(new L(e, this.config, this.ropeCache));
|
|
122
122
|
this.lnF = new q([this.config.nEmbed], 1e-8, "final_rms_norm");
|
|
@@ -142,14 +142,14 @@ class Et extends R {
|
|
|
142
142
|
this.blocks[e].loadWeights(t);
|
|
143
143
|
this.lnF.setWeights(t.get("final_rms_norm") || []);
|
|
144
144
|
}
|
|
145
|
-
inputPhase(t, e,
|
|
145
|
+
inputPhase(t, e, o = !1) {
|
|
146
146
|
return w(() => {
|
|
147
147
|
const i = this.wte.embed(t);
|
|
148
148
|
if (this.config.useRope === !1) {
|
|
149
|
-
const [,
|
|
150
|
-
return this.drop.apply(
|
|
149
|
+
const [, s] = t.shape, r = this.config.blockSize, l = V(0, s, 1, "int32"), n = J(X(l, _(e, "int32")), _(r, "int32")), h = this.wpe.apply(n), a = i.add(h);
|
|
150
|
+
return this.drop.apply(a, { training: o });
|
|
151
151
|
} else
|
|
152
|
-
return this.drop.apply(i, { training:
|
|
152
|
+
return this.drop.apply(i, { training: o });
|
|
153
153
|
});
|
|
154
154
|
}
|
|
155
155
|
setSkipMask(t) {
|
|
@@ -186,8 +186,8 @@ class Et extends R {
|
|
|
186
186
|
calculateLoss(t, e) {
|
|
187
187
|
try {
|
|
188
188
|
return N()(t, e).mean();
|
|
189
|
-
} catch (
|
|
190
|
-
throw console.error("Error computing loss:",
|
|
189
|
+
} catch (o) {
|
|
190
|
+
throw console.error("Error computing loss:", o), new Error(`Loss computation failed: ${o}`);
|
|
191
191
|
}
|
|
192
192
|
}
|
|
193
193
|
// Attention rollout per Abnar & Zuidema (2020)
|
|
@@ -196,77 +196,67 @@ class Et extends R {
|
|
|
196
196
|
return w(() => {
|
|
197
197
|
if (t.length === 0)
|
|
198
198
|
throw new Error("No attentions for rollout");
|
|
199
|
-
const [e,
|
|
200
|
-
for (const
|
|
201
|
-
const [
|
|
202
|
-
if (
|
|
199
|
+
const [e, o, i] = t[0].shape;
|
|
200
|
+
for (const s of t) {
|
|
201
|
+
const [r, l, n] = s.shape;
|
|
202
|
+
if (r !== e || l !== o || n !== i)
|
|
203
203
|
throw new Error(
|
|
204
|
-
`Inconsistent attention shapes in rollout: expected [${e},${
|
|
204
|
+
`Inconsistent attention shapes in rollout: expected [${e},${o},${i}] got [${r},${l},${n}]`
|
|
205
205
|
);
|
|
206
206
|
}
|
|
207
|
-
if (
|
|
208
|
-
const
|
|
209
|
-
let
|
|
207
|
+
if (o === i) {
|
|
208
|
+
const s = D(i, i).expandDims(0);
|
|
209
|
+
let r = s.tile([e, 1, 1]);
|
|
210
210
|
for (const l of t) {
|
|
211
|
-
const
|
|
212
|
-
|
|
211
|
+
const n = l.add(s);
|
|
212
|
+
r = n.div(n.sum(-1, !0)).matMul(r);
|
|
213
213
|
}
|
|
214
|
-
return
|
|
214
|
+
return r;
|
|
215
215
|
}
|
|
216
|
-
|
|
217
|
-
let o = null;
|
|
218
|
-
const n = A([i - 1], "int32"), l = B(n, i).reshape([1, 1, i]).tile([e, 1, 1]);
|
|
219
|
-
n.dispose();
|
|
220
|
-
for (const r of t) {
|
|
221
|
-
let a = r.add(l);
|
|
222
|
-
a = a.div(a.sum(-1, !0)), o == null ? o = a : (o = o.mul(a), o = o.div(o.sum(-1, !0)));
|
|
223
|
-
}
|
|
224
|
-
return o;
|
|
225
|
-
}
|
|
226
|
-
throw new Error(`Unsupported attention shapes for rollout: [B=${e}, Q=${s}, K=${i}]`);
|
|
216
|
+
throw new Error(`Unsupported attention shapes for rollout: [B=${e}, Q=${o}, K=${i}]`);
|
|
227
217
|
});
|
|
228
218
|
}
|
|
229
|
-
forward(t, e,
|
|
219
|
+
forward(t, e, o = !1, i = !1, s) {
|
|
230
220
|
return this.validateInput(t), w(() => {
|
|
231
221
|
this.startMemory();
|
|
232
|
-
const
|
|
233
|
-
let l = this.inputPhase(t,
|
|
234
|
-
const
|
|
235
|
-
if (
|
|
236
|
-
throw console.error("Cache",
|
|
222
|
+
const r = s?.[0]?.length ?? 0;
|
|
223
|
+
let l = this.inputPhase(t, r, o);
|
|
224
|
+
const n = [];
|
|
225
|
+
if (s && s.length !== this.blocks.length)
|
|
226
|
+
throw console.error("Cache", s), new Error(`Cache length ${s.length} does not match number of blocks ${this.blocks.length}`);
|
|
237
227
|
for (let c = 0; c < this.blocks.length; c++) {
|
|
238
228
|
const u = l, d = this.blocks[c], {
|
|
239
229
|
output: b,
|
|
240
230
|
attention: k,
|
|
241
231
|
cache: f
|
|
242
|
-
} = d.call(l,
|
|
243
|
-
l = b, u.dispose(), i && k &&
|
|
232
|
+
} = d.call(l, o, i, s ? s[c] : void 0);
|
|
233
|
+
l = b, u.dispose(), i && k && n.push(k), s && f ? (s[c]?.k.dispose(), s[c]?.v.dispose(), s[c] = f) : f && (f.k.dispose(), f.v.dispose());
|
|
244
234
|
}
|
|
245
|
-
let
|
|
246
|
-
i &&
|
|
247
|
-
const
|
|
235
|
+
let h;
|
|
236
|
+
i && n.length > 0 && (h = this.computeAttentionRollout(n)), l = this.lnF.apply(l);
|
|
237
|
+
const a = this.wte.project(l);
|
|
248
238
|
let p;
|
|
249
|
-
return e && (p = this.calculateLoss(
|
|
239
|
+
return e && (p = this.calculateLoss(a, e)), this.endMemory("Forward"), { logits: a, loss: p, attention: i ? h : void 0 };
|
|
250
240
|
});
|
|
251
241
|
}
|
|
252
|
-
generate(t, e,
|
|
253
|
-
const i =
|
|
242
|
+
generate(t, e, o) {
|
|
243
|
+
const i = o?.temperature ?? 1, s = o?.topK, r = o?.usePadding ?? !1, l = o?.includeAttention ?? !1;
|
|
254
244
|
return w(() => {
|
|
255
|
-
const
|
|
256
|
-
[0,
|
|
257
|
-
[
|
|
258
|
-
), p =
|
|
245
|
+
const n = t, h = n.shape[1], a = h <= this.config.blockSize ? n : n.slice(
|
|
246
|
+
[0, h - this.config.blockSize],
|
|
247
|
+
[n.shape[0], this.config.blockSize]
|
|
248
|
+
), p = r ? this.config.blockSize - a.shape[1] : 0, c = p > 0 ? A(a, [
|
|
259
249
|
[0, 0],
|
|
260
250
|
[0, p]
|
|
261
|
-
]) :
|
|
251
|
+
]) : a, { logits: u, attention: d } = this.forward(c, void 0, !1, l, e), b = u.shape[1] - 1 - p, k = u.slice([0, b, 0], [u.shape[0], 1, u.shape[2]]), f = d ? d.slice([0, b, 0], [d.shape[0], 1, d.shape[2]]) : void 0, $ = k.div(i);
|
|
262
252
|
let g;
|
|
263
|
-
if (
|
|
264
|
-
const { values:
|
|
265
|
-
g =
|
|
253
|
+
if (s) {
|
|
254
|
+
const { values: M, indices: x } = ot($, s), W = I(M.squeeze([1]), 1);
|
|
255
|
+
g = Y(x.squeeze([1]), W, 1);
|
|
266
256
|
} else
|
|
267
|
-
g =
|
|
268
|
-
let
|
|
269
|
-
return
|
|
257
|
+
g = I($.squeeze([1]), 1);
|
|
258
|
+
let v;
|
|
259
|
+
return o?.includeProbabilities && (v = Z($.squeeze([1]))), g = g.reshape([1, 1]), { output: g, attention: f?.squeeze([1]), probabilities: v };
|
|
270
260
|
});
|
|
271
261
|
}
|
|
272
262
|
getNumParams() {
|
|
@@ -277,5 +267,5 @@ class Et extends R {
|
|
|
277
267
|
}
|
|
278
268
|
}
|
|
279
269
|
export {
|
|
280
|
-
|
|
270
|
+
kt as default
|
|
281
271
|
};
|
package/dist/TeachableLLM.d.ts
CHANGED
|
@@ -4,15 +4,17 @@ import { default as NanoGPT } from './NanoGPTModel';
|
|
|
4
4
|
import { SaveOptions } from './utilities/save';
|
|
5
5
|
import { default as Generator, IGenerateOptions } from './Generator';
|
|
6
6
|
import { default as Trainer, ITrainerOptions } from './Trainer';
|
|
7
|
-
import { default as EE } from 'eventemitter3';
|
|
8
7
|
import { default as MemoryProfiler } from './utilities/profile';
|
|
9
8
|
type TeachableLLMStatus = 'warmup' | 'awaitingTokens' | 'ready' | 'training' | 'loading' | 'busy' | 'error';
|
|
10
|
-
export default class TeachableLLM
|
|
9
|
+
export default class TeachableLLM {
|
|
10
|
+
private ee;
|
|
11
11
|
private _config?;
|
|
12
12
|
private _model?;
|
|
13
13
|
private _tokeniser?;
|
|
14
14
|
private _status;
|
|
15
15
|
constructor(tokeniser?: ITokeniser, model?: NanoGPT);
|
|
16
|
+
get vocab(): string[];
|
|
17
|
+
get loaded(): boolean;
|
|
16
18
|
get config(): GPTConfig;
|
|
17
19
|
get model(): NanoGPT;
|
|
18
20
|
get tokeniser(): ITokeniser;
|
|
@@ -28,8 +30,17 @@ export default class TeachableLLM extends EE<'status' | 'error' | 'trainStep'> {
|
|
|
28
30
|
getNumParams(): number;
|
|
29
31
|
trainer(): Trainer;
|
|
30
32
|
train(text: string[], options?: ITrainerOptions): Promise<void>;
|
|
33
|
+
trainTokeniser(text: string[]): Promise<number>;
|
|
31
34
|
generator(): Generator;
|
|
32
35
|
generateText(prompt?: string, options?: IGenerateOptions): Promise<string>;
|
|
33
36
|
dispose(): void;
|
|
37
|
+
on(event: 'status', listener: (status: TeachableLLMStatus) => void): void;
|
|
38
|
+
on(event: 'error', listener: (error: Error) => void): void;
|
|
39
|
+
on(event: 'trainStep', listener: (step: number) => void): void;
|
|
40
|
+
on(event: 'loaded', listener: () => void): void;
|
|
41
|
+
off(event: 'status', listener: (status: TeachableLLMStatus) => void): void;
|
|
42
|
+
off(event: 'error', listener: (error: Error) => void): void;
|
|
43
|
+
off(event: 'trainStep', listener: (step: number) => void): void;
|
|
44
|
+
off(event: 'loaded', listener: () => void): void;
|
|
34
45
|
}
|
|
35
46
|
export {};
|
package/dist/TeachableLLM.js
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
import { defaultConfig as
|
|
1
|
+
import { defaultConfig as d } from "./config.js";
|
|
2
2
|
import h from "./NanoGPTModel.js";
|
|
3
|
-
import { saveModel as
|
|
3
|
+
import { saveModel as l } from "./utilities/save.js";
|
|
4
4
|
import { loadModel as f } from "./utilities/load.js";
|
|
5
5
|
import u from "./Generator.js";
|
|
6
6
|
import _ from "./Trainer.js";
|
|
7
|
-
import { E as
|
|
7
|
+
import { E as c } from "./index-Dwqa6Zy2.js";
|
|
8
8
|
import { dummyPassAsync as m } from "./utilities/dummy.js";
|
|
9
|
-
import
|
|
9
|
+
import p from "./tokeniser/CharTokeniser.js";
|
|
10
10
|
import g from "./tokeniser/bpe.js";
|
|
11
11
|
import "./papaparse.min-C8l2Kvo1.js";
|
|
12
12
|
import "./index-Tf7vU29b.js";
|
|
@@ -29,13 +29,20 @@ import "./ops/grads/rope.js";
|
|
|
29
29
|
import "./ops/cpu/appendCache.js";
|
|
30
30
|
import "./ops/webgl/appendCache.js";
|
|
31
31
|
import w from "./utilities/profile.js";
|
|
32
|
-
class a
|
|
32
|
+
class a {
|
|
33
|
+
ee = new c();
|
|
33
34
|
_config;
|
|
34
35
|
_model;
|
|
35
36
|
_tokeniser;
|
|
36
37
|
_status = "loading";
|
|
37
38
|
constructor(t, e) {
|
|
38
|
-
|
|
39
|
+
this._config = e?.config, this._tokeniser = t, this._model = e;
|
|
40
|
+
}
|
|
41
|
+
get vocab() {
|
|
42
|
+
return this._tokeniser?.getVocab() || [];
|
|
43
|
+
}
|
|
44
|
+
get loaded() {
|
|
45
|
+
return !!this._model && !!this._tokeniser && !!this._config;
|
|
39
46
|
}
|
|
40
47
|
get config() {
|
|
41
48
|
if (!this._config)
|
|
@@ -59,34 +66,34 @@ class a extends p {
|
|
|
59
66
|
return this._status === "ready" && !!this._model && !!this._tokeniser && this.tokeniser.trained;
|
|
60
67
|
}
|
|
61
68
|
setStatus(t) {
|
|
62
|
-
this._status !== t && (this._status = t, this.emit("status", t));
|
|
69
|
+
this._status !== t && (this._status = t, this.ee.emit("status", t));
|
|
63
70
|
}
|
|
64
71
|
saveModel(t) {
|
|
65
72
|
if (!this._model || !this._tokeniser)
|
|
66
73
|
throw new Error("Model or tokeniser is not initialized.");
|
|
67
|
-
return
|
|
74
|
+
return l(this._model, this._tokeniser, t);
|
|
68
75
|
}
|
|
69
76
|
static loadModel(t) {
|
|
70
77
|
const e = new a();
|
|
71
|
-
return f(t).then(({ model:
|
|
72
|
-
e._model =
|
|
73
|
-
e.setStatus("ready");
|
|
78
|
+
return f(t).then(({ model: i, tokeniser: o }) => {
|
|
79
|
+
e._model = i, e._tokeniser = o, e._config = i.config, e.setStatus("warmup"), m(i).then(() => {
|
|
80
|
+
e.setStatus("ready"), e.ee.emit("loaded");
|
|
74
81
|
}).catch((s) => {
|
|
75
|
-
e.setStatus("error"), e.emit("error", s);
|
|
82
|
+
e.setStatus("error"), e.ee.emit("error", s);
|
|
76
83
|
});
|
|
77
|
-
}).catch((
|
|
78
|
-
e.setStatus("error"), e.emit("error",
|
|
84
|
+
}).catch((i) => {
|
|
85
|
+
e.setStatus("error"), e.ee.emit("error", i);
|
|
79
86
|
}), e;
|
|
80
87
|
}
|
|
81
88
|
static create(t, e = {}) {
|
|
82
|
-
const
|
|
83
|
-
return
|
|
84
|
-
|
|
85
|
-
n === "trained" &&
|
|
89
|
+
const i = { ...d, ...e }, o = t === "char" ? new p(i.vocabSize) : new g(i.vocabSize), s = new h(i), r = new a(o, s);
|
|
90
|
+
return r.setStatus("warmup"), m(s).then(() => {
|
|
91
|
+
r.tokeniser.trained ? (r.setStatus("ready"), r.ee.emit("loaded")) : (r.setStatus("awaitingTokens"), r.ee.emit("loaded"), r.tokeniser.once("trainStatus", (n) => {
|
|
92
|
+
n === "trained" && r.setStatus("ready");
|
|
86
93
|
}));
|
|
87
94
|
}).catch((n) => {
|
|
88
|
-
|
|
89
|
-
}),
|
|
95
|
+
r.setStatus("error"), r.ee.emit("error", n);
|
|
96
|
+
}), r;
|
|
90
97
|
}
|
|
91
98
|
getProfiler() {
|
|
92
99
|
return this._model?.getProfiler();
|
|
@@ -112,17 +119,23 @@ class a extends p {
|
|
|
112
119
|
throw new Error("Model or tokeniser is not initialized.");
|
|
113
120
|
const t = new _(this._model, this._tokeniser);
|
|
114
121
|
return t.on("start", () => this.setStatus("training")), t.on("stop", () => this.setStatus("ready")), t.on("log", async (e) => {
|
|
115
|
-
const
|
|
116
|
-
for (const o of
|
|
122
|
+
const i = this.ee.listeners("trainStep");
|
|
123
|
+
for (const o of i)
|
|
117
124
|
await o(e);
|
|
118
125
|
}), t;
|
|
119
126
|
}
|
|
120
127
|
train(t, e) {
|
|
121
128
|
return this.trainer().train(t, e);
|
|
122
129
|
}
|
|
130
|
+
async trainTokeniser(t) {
|
|
131
|
+
if (!this._tokeniser)
|
|
132
|
+
throw new Error("tokeniser_not_initialized.");
|
|
133
|
+
const e = await this._tokeniser.train(t);
|
|
134
|
+
return this._status === "awaitingTokens" && this.setStatus("ready"), e;
|
|
135
|
+
}
|
|
123
136
|
generator() {
|
|
124
137
|
if (!this._model || !this._tokeniser)
|
|
125
|
-
throw new Error("
|
|
138
|
+
throw new Error("model_or_tokeniser_not_initialized.");
|
|
126
139
|
const t = new u(this._model, this._tokeniser);
|
|
127
140
|
return t.on("start", () => {
|
|
128
141
|
this.status === "ready" && this.setStatus("busy");
|
|
@@ -136,6 +149,18 @@ class a extends p {
|
|
|
136
149
|
dispose() {
|
|
137
150
|
this._model?.dispose();
|
|
138
151
|
}
|
|
152
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
153
|
+
on(t, e) {
|
|
154
|
+
if (t === "loaded" && this.loaded) {
|
|
155
|
+
setTimeout(() => e(), 0);
|
|
156
|
+
return;
|
|
157
|
+
}
|
|
158
|
+
this.ee.on(t, e);
|
|
159
|
+
}
|
|
160
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
161
|
+
off(t, e) {
|
|
162
|
+
this.ee.off(t, e);
|
|
163
|
+
}
|
|
139
164
|
}
|
|
140
165
|
export {
|
|
141
166
|
a as default
|
|
@@ -4,8 +4,8 @@ import { qkv as y } from "../ops/qkv.js";
|
|
|
4
4
|
import { rope as m } from "../ops/rope.js";
|
|
5
5
|
import { appendCache as b } from "../ops/appendCache.js";
|
|
6
6
|
import { w as j, x as f, t as z } from "../index-pWA4_lUh.js";
|
|
7
|
-
import { r as w, l as E, w as D, b as T } from "../random_width-
|
|
8
|
-
import { d as L, a as k } from "../exports_layers-
|
|
7
|
+
import { r as w, l as E, w as D, b as T } from "../random_width-oeUIlUZj.js";
|
|
8
|
+
import { d as L, a as k } from "../exports_layers-tbTBcwMM.js";
|
|
9
9
|
import { o as W } from "../ones-Cog-G2ag.js";
|
|
10
10
|
import { z as M } from "../zeros-CCy9C3uU.js";
|
|
11
11
|
import { v as A } from "../variable-Dl_ub3pk.js";
|
package/dist/layers/MLP.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { t as n } from "../index-pWA4_lUh.js";
|
|
2
2
|
import l from "./BaseLayer.js";
|
|
3
|
-
import { r as s } from "../random_width-
|
|
4
|
-
import { d as i, a as c } from "../exports_layers-
|
|
3
|
+
import { r as s } from "../random_width-oeUIlUZj.js";
|
|
4
|
+
import { d as i, a as c } from "../exports_layers-tbTBcwMM.js";
|
|
5
5
|
class u extends l {
|
|
6
6
|
cFc;
|
|
7
7
|
cProj;
|
package/dist/main.d.ts
CHANGED
|
@@ -7,5 +7,6 @@ export { default as loadTextData } from './data/textLoader';
|
|
|
7
7
|
export type { ITrainerOptions } from './Trainer';
|
|
8
8
|
export type { IGenerateOptions } from './Generator';
|
|
9
9
|
export type { TrainingLogEntry } from './NanoGPTModel';
|
|
10
|
+
export type { ITokeniser } from './tokeniser/type';
|
|
10
11
|
export type { GPTConfig } from './config';
|
|
11
12
|
export { estimateParameterCount, estimateMemoryUsage, estimateTrainingMemoryUsage, estimateResources, validateConfig, } from './utilities/parameters';
|
|
@@ -15481,9 +15481,7 @@ export {
|
|
|
15481
15481
|
Yt as d,
|
|
15482
15482
|
Jr as e,
|
|
15483
15483
|
Zd as l,
|
|
15484
|
-
cp as o,
|
|
15485
15484
|
Yr as p,
|
|
15486
15485
|
wy as r,
|
|
15487
|
-
At as t,
|
|
15488
15486
|
$t as w
|
|
15489
15487
|
};
|
package/dist/tokeniser/bpe.js
CHANGED
|
@@ -53,7 +53,7 @@ function m(o, e) {
|
|
|
53
53
|
o.tokens[s] = n;
|
|
54
54
|
}), o.pairs.delete(u(e.a, e.b));
|
|
55
55
|
}
|
|
56
|
-
class
|
|
56
|
+
class S extends g {
|
|
57
57
|
targetSize;
|
|
58
58
|
vocab = /* @__PURE__ */ new Set();
|
|
59
59
|
vocabIndex = /* @__PURE__ */ new Map();
|
|
@@ -93,7 +93,7 @@ class w extends g {
|
|
|
93
93
|
let f = 0;
|
|
94
94
|
for (const i of this.vocab.keys())
|
|
95
95
|
this.vocabIndex.set(i, f++);
|
|
96
|
-
return this.vocab.size;
|
|
96
|
+
return this.emit("trainStatus", "trained"), this.vocab.size;
|
|
97
97
|
}
|
|
98
98
|
getVocab() {
|
|
99
99
|
return Array.from(this.vocab);
|
|
@@ -126,5 +126,5 @@ class w extends g {
|
|
|
126
126
|
}
|
|
127
127
|
}
|
|
128
128
|
export {
|
|
129
|
-
|
|
129
|
+
S as default
|
|
130
130
|
};
|