@genai-fi/nanogpt 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/LICENSE +7 -0
  2. package/README.md +20 -0
  3. package/dist/Generator.d.ts +14 -0
  4. package/dist/Generator.js +39 -0
  5. package/dist/NanoGPTModel.d.ts +35 -0
  6. package/dist/NanoGPTModel.js +129 -0
  7. package/dist/TeachableLLM.d.ts +21 -0
  8. package/dist/TeachableLLM.js +47 -0
  9. package/dist/Trainer.d.ts +19 -0
  10. package/dist/Trainer.js +34 -0
  11. package/dist/_commonjsHelpers-DaMA6jEr.js +8 -0
  12. package/dist/assets/worker-BYeSPNkq.js +1 -0
  13. package/dist/config.d.ts +11 -0
  14. package/dist/config.js +19 -0
  15. package/dist/index-B8nyc6IR.js +3899 -0
  16. package/dist/index-SOhdqzHq.js +113 -0
  17. package/dist/jszip.min-BLbRbbKt.js +2324 -0
  18. package/dist/layers/CausalSelfAttention.d.ts +22 -0
  19. package/dist/layers/CausalSelfAttention.js +75 -0
  20. package/dist/layers/LayerNorm.d.ts +12 -0
  21. package/dist/layers/LayerNorm.js +30 -0
  22. package/dist/layers/MLP.d.ts +17 -0
  23. package/dist/layers/MLP.js +57 -0
  24. package/dist/layers/TiedEmbedding.d.ts +22 -0
  25. package/dist/layers/TiedEmbedding.js +532 -0
  26. package/dist/layers/TransformerBlock.d.ts +19 -0
  27. package/dist/layers/TransformerBlock.js +47 -0
  28. package/dist/main.d.ts +6 -0
  29. package/dist/main.js +8 -0
  30. package/dist/tokeniser/CharTokeniser.d.ts +20 -0
  31. package/dist/tokeniser/CharTokeniser.js +52 -0
  32. package/dist/tokeniser/NodeTokeniser.d.ts +19 -0
  33. package/dist/tokeniser/NodeTokeniser.js +46 -0
  34. package/dist/tokeniser/WebTokeniser.d.ts +18 -0
  35. package/dist/tokeniser/WebTokeniser.js +96 -0
  36. package/dist/tokeniser/bpe.d.ts +14 -0
  37. package/dist/tokeniser/bpe.js +102 -0
  38. package/dist/tokeniser/messages.d.ts +61 -0
  39. package/dist/tokeniser/messages.js +1 -0
  40. package/dist/tokeniser/type.d.ts +14 -0
  41. package/dist/tokeniser/type.js +1 -0
  42. package/dist/tokeniser/worker.d.ts +1 -0
  43. package/dist/tokeniser/worker.js +53 -0
  44. package/dist/training/AdamExt.d.ts +23 -0
  45. package/dist/training/AdamExt.js +43 -0
  46. package/dist/training/DatasetBuilder.d.ts +12 -0
  47. package/dist/training/DatasetBuilder.js +27 -0
  48. package/dist/training/FullTrainer.d.ts +17 -0
  49. package/dist/training/FullTrainer.js +75 -0
  50. package/dist/training/LayerTrainer.d.ts +28 -0
  51. package/dist/training/LayerTrainer.js +108 -0
  52. package/dist/training/Trainer.d.ts +73 -0
  53. package/dist/training/Trainer.js +87 -0
  54. package/dist/training/lwSchedule.d.ts +7 -0
  55. package/dist/training/lwSchedule.js +162 -0
  56. package/dist/utilities/generate.d.ts +3 -0
  57. package/dist/utilities/generate.js +22 -0
  58. package/dist/utilities/load.d.ts +7 -0
  59. package/dist/utilities/load.js +47 -0
  60. package/dist/utilities/save.d.ts +3 -0
  61. package/dist/utilities/save.js +21 -0
  62. package/dist/utilities/textLoader.d.ts +1 -0
  63. package/dist/utilities/textLoader.js +438 -0
  64. package/dist/utilities/tokenParse.d.ts +1 -0
  65. package/dist/utilities/tokenParse.js +66 -0
  66. package/dist/utilities/weights.d.ts +12 -0
  67. package/dist/utilities/weights.js +43 -0
  68. package/package.json +59 -0
@@ -0,0 +1,532 @@
1
+ import { o as p, c as a, b as V, E as u, C as X, R as Y, d as A, B as Z, S as ee, f as te, g as se, h as ne, I as re, L as ae, N as ue, P as ie, i as oe, j as le, k as ce, l as pe, n as B, t as he, T as L, m as fe, p as me, q as de, r as q, u as P, v as U, _ as H } from "../index-B8nyc6IR.js";
2
+ /**
3
+ * @license
4
+ * Copyright 2020 Google LLC. All Rights Reserved.
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ * =============================================================================
17
+ */
18
+ function ge(t, e) {
19
+ const s = a(t, "real", "complex"), n = a(e, "imag", "complex");
20
+ V(s.shape, n.shape, `real and imag shapes, ${s.shape} and ${n.shape}, must match in call to tf.complex().`);
21
+ const r = { real: s, imag: n };
22
+ return u.runKernel(X, r);
23
+ }
24
+ const $e = /* @__PURE__ */ p({ complex_: ge });
25
+ /**
26
+ * @license
27
+ * Copyright 2020 Google LLC. All Rights Reserved.
28
+ * Licensed under the Apache License, Version 2.0 (the "License");
29
+ * you may not use this file except in compliance with the License.
30
+ * You may obtain a copy of the License at
31
+ *
32
+ * http://www.apache.org/licenses/LICENSE-2.0
33
+ *
34
+ * Unless required by applicable law or agreed to in writing, software
35
+ * distributed under the License is distributed on an "AS IS" BASIS,
36
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
37
+ * See the License for the specific language governing permissions and
38
+ * limitations under the License.
39
+ * =============================================================================
40
+ */
41
+ function xe(t, e) {
42
+ const n = { x: a(t, "x", "reshape", "string_or_numeric") }, r = { shape: e };
43
+ return u.runKernel(Y, n, r);
44
+ }
45
+ const f = /* @__PURE__ */ p({ reshape_: xe });
46
+ /**
47
+ * @license
48
+ * Copyright 2020 Google LLC. All Rights Reserved.
49
+ * Licensed under the Apache License, Version 2.0 (the "License");
50
+ * you may not use this file except in compliance with the License.
51
+ * You may obtain a copy of the License at
52
+ *
53
+ * http://www.apache.org/licenses/LICENSE-2.0
54
+ *
55
+ * Unless required by applicable law or agreed to in writing, software
56
+ * distributed under the License is distributed on an "AS IS" BASIS,
57
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
58
+ * See the License for the specific language governing permissions and
59
+ * limitations under the License.
60
+ * =============================================================================
61
+ */
62
+ function ke(t, e, s = !1, n = !1) {
63
+ let r = a(t, "a", "matMul"), i = a(e, "b", "matMul");
64
+ [r, i] = A(r, i);
65
+ const l = { a: r, b: i }, h = { transposeA: s, transposeB: n };
66
+ return u.runKernel(Z, l, h);
67
+ }
68
+ const d = /* @__PURE__ */ p({ matMul_: ke });
69
+ /**
70
+ * @license
71
+ * Copyright 2018 Google LLC. All Rights Reserved.
72
+ * Licensed under the Apache License, Version 2.0 (the "License");
73
+ * you may not use this file except in compliance with the License.
74
+ * You may obtain a copy of the License at
75
+ *
76
+ * http://www.apache.org/licenses/LICENSE-2.0
77
+ *
78
+ * Unless required by applicable law or agreed to in writing, software
79
+ * distributed under the License is distributed on an "AS IS" BASIS,
80
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
81
+ * See the License for the specific language governing permissions and
82
+ * limitations under the License.
83
+ * =============================================================================
84
+ */
85
+ function be(t) {
86
+ const s = { x: a(t, "x", "sigmoid", "float32") };
87
+ return u.runKernel(ee, s);
88
+ }
89
+ const De = /* @__PURE__ */ p({ sigmoid_: be });
90
+ /**
91
+ * @license
92
+ * Copyright 2020 Google LLC. All Rights Reserved.
93
+ * Licensed under the Apache License, Version 2.0 (the "License");
94
+ * you may not use this file except in compliance with the License.
95
+ * You may obtain a copy of the License at
96
+ *
97
+ * http://www.apache.org/licenses/LICENSE-2.0
98
+ *
99
+ * Unless required by applicable law or agreed to in writing, software
100
+ * distributed under the License is distributed on an "AS IS" BASIS,
101
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
102
+ * See the License for the specific language governing permissions and
103
+ * limitations under the License.
104
+ * =============================================================================
105
+ */
106
+ function Se(t) {
107
+ const s = { x: a(t, "x", "elu", "float32") };
108
+ return u.runKernel(te, s);
109
+ }
110
+ const ye = /* @__PURE__ */ p({ elu_: Se });
111
+ /**
112
+ * @license
113
+ * Copyright 2018 Google LLC. All Rights Reserved.
114
+ * Licensed under the Apache License, Version 2.0 (the "License");
115
+ * you may not use this file except in compliance with the License.
116
+ * You may obtain a copy of the License at
117
+ *
118
+ * http://www.apache.org/licenses/LICENSE-2.0
119
+ *
120
+ * Unless required by applicable law or agreed to in writing, software
121
+ * distributed under the License is distributed on an "AS IS" BASIS,
122
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
123
+ * See the License for the specific language governing permissions and
124
+ * limitations under the License.
125
+ * =============================================================================
126
+ */
127
+ function _e(t, e = null, s = !1) {
128
+ let n = a(t, "x", "sum");
129
+ n.dtype === "bool" && (n = se(n, "int32"));
130
+ const r = { x: n }, i = { axis: e, keepDims: s };
131
+ return u.runKernel(ne, r, i);
132
+ }
133
+ const Me = /* @__PURE__ */ p({ sum_: _e });
134
+ /**
135
+ * @license
136
+ * Copyright 2020 Google LLC. All Rights Reserved.
137
+ * Licensed under the Apache License, Version 2.0 (the "License");
138
+ * you may not use this file except in compliance with the License.
139
+ * You may obtain a copy of the License at
140
+ *
141
+ * http://www.apache.org/licenses/LICENSE-2.0
142
+ *
143
+ * Unless required by applicable law or agreed to in writing, software
144
+ * distributed under the License is distributed on an "AS IS" BASIS,
145
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
146
+ * See the License for the specific language governing permissions and
147
+ * limitations under the License.
148
+ * =============================================================================
149
+ */
150
+ function Ke(t) {
151
+ const s = { input: a(t, "input", "imag") };
152
+ return u.runKernel(re, s);
153
+ }
154
+ const we = /* @__PURE__ */ p({ imag_: Ke });
155
+ /**
156
+ * @license
157
+ * Copyright 2020 Google LLC. All Rights Reserved.
158
+ * Licensed under the Apache License, Version 2.0 (the "License");
159
+ * you may not use this file except in compliance with the License.
160
+ * You may obtain a copy of the License at
161
+ *
162
+ * http://www.apache.org/licenses/LICENSE-2.0
163
+ *
164
+ * Unless required by applicable law or agreed to in writing, software
165
+ * distributed under the License is distributed on an "AS IS" BASIS,
166
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
167
+ * See the License for the specific language governing permissions and
168
+ * limitations under the License.
169
+ * =============================================================================
170
+ */
171
+ function ze(t, e = 0.2) {
172
+ const n = { x: a(t, "x", "leakyRelu") }, r = { alpha: e };
173
+ return u.runKernel(ae, n, r);
174
+ }
175
+ const Ee = /* @__PURE__ */ p({ leakyRelu_: ze });
176
+ /**
177
+ * @license
178
+ * Copyright 2018 Google LLC. All Rights Reserved.
179
+ * Licensed under the Apache License, Version 2.0 (the "License");
180
+ * you may not use this file except in compliance with the License.
181
+ * You may obtain a copy of the License at
182
+ *
183
+ * http://www.apache.org/licenses/LICENSE-2.0
184
+ *
185
+ * Unless required by applicable law or agreed to in writing, software
186
+ * distributed under the License is distributed on an "AS IS" BASIS,
187
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
188
+ * See the License for the specific language governing permissions and
189
+ * limitations under the License.
190
+ * =============================================================================
191
+ */
192
+ function We(t) {
193
+ const s = { x: a(t, "x", "neg") };
194
+ return u.runKernel(ue, s);
195
+ }
196
+ const Oe = /* @__PURE__ */ p({ neg_: We });
197
+ /**
198
+ * @license
199
+ * Copyright 2020 Google LLC. All Rights Reserved.
200
+ * Licensed under the Apache License, Version 2.0 (the "License");
201
+ * you may not use this file except in compliance with the License.
202
+ * You may obtain a copy of the License at
203
+ *
204
+ * http://www.apache.org/licenses/LICENSE-2.0
205
+ *
206
+ * Unless required by applicable law or agreed to in writing, software
207
+ * distributed under the License is distributed on an "AS IS" BASIS,
208
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
209
+ * See the License for the specific language governing permissions and
210
+ * limitations under the License.
211
+ * =============================================================================
212
+ */
213
+ function Re(t, e) {
214
+ const s = a(t, "x", "prelu"), n = a(e, "alpha", "prelu"), r = { x: s, alpha: n };
215
+ return u.runKernel(ie, r);
216
+ }
217
+ const Fe = /* @__PURE__ */ p({ prelu_: Re });
218
+ /**
219
+ * @license
220
+ * Copyright 2020 Google LLC. All Rights Reserved.
221
+ * Licensed under the Apache License, Version 2.0 (the "License");
222
+ * you may not use this file except in compliance with the License.
223
+ * You may obtain a copy of the License at
224
+ *
225
+ * http://www.apache.org/licenses/LICENSE-2.0
226
+ *
227
+ * Unless required by applicable law or agreed to in writing, software
228
+ * distributed under the License is distributed on an "AS IS" BASIS,
229
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
230
+ * See the License for the specific language governing permissions and
231
+ * limitations under the License.
232
+ * =============================================================================
233
+ */
234
+ function Le(t) {
235
+ const s = { input: a(t, "input", "real") };
236
+ return u.runKernel(oe, s);
237
+ }
238
+ const Ae = /* @__PURE__ */ p({ real_: Le });
239
+ /**
240
+ * @license
241
+ * Copyright 2020 Google LLC. All Rights Reserved.
242
+ * Licensed under the Apache License, Version 2.0 (the "License");
243
+ * you may not use this file except in compliance with the License.
244
+ * You may obtain a copy of the License at
245
+ *
246
+ * http://www.apache.org/licenses/LICENSE-2.0
247
+ *
248
+ * Unless required by applicable law or agreed to in writing, software
249
+ * distributed under the License is distributed on an "AS IS" BASIS,
250
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
251
+ * See the License for the specific language governing permissions and
252
+ * limitations under the License.
253
+ * =============================================================================
254
+ */
255
+ function Be(t) {
256
+ const s = { x: a(t, "x", "relu") };
257
+ return u.runKernel(le, s);
258
+ }
259
+ const Te = /* @__PURE__ */ p({ relu_: Be });
260
+ /**
261
+ * @license
262
+ * Copyright 2020 Google LLC. All Rights Reserved.
263
+ * Licensed under the Apache License, Version 2.0 (the "License");
264
+ * you may not use this file except in compliance with the License.
265
+ * You may obtain a copy of the License at
266
+ *
267
+ * http://www.apache.org/licenses/LICENSE-2.0
268
+ *
269
+ * Unless required by applicable law or agreed to in writing, software
270
+ * distributed under the License is distributed on an "AS IS" BASIS,
271
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
272
+ * See the License for the specific language governing permissions and
273
+ * limitations under the License.
274
+ * =============================================================================
275
+ */
276
+ function Ne(t) {
277
+ const s = { x: a(t, "x", "relu6") };
278
+ return u.runKernel(ce, s);
279
+ }
280
+ const ve = /* @__PURE__ */ p({ relu6_: Ne });
281
+ /**
282
+ * @license
283
+ * Copyright 2018 Google LLC. All Rights Reserved.
284
+ * Licensed under the Apache License, Version 2.0 (the "License");
285
+ * you may not use this file except in compliance with the License.
286
+ * You may obtain a copy of the License at
287
+ *
288
+ * http://www.apache.org/licenses/LICENSE-2.0
289
+ *
290
+ * Unless required by applicable law or agreed to in writing, software
291
+ * distributed under the License is distributed on an "AS IS" BASIS,
292
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
293
+ * See the License for the specific language governing permissions and
294
+ * limitations under the License.
295
+ * =============================================================================
296
+ */
297
+ function Ce(t, e = 0) {
298
+ const n = { x: a(t, "x", "step") }, r = { alpha: e };
299
+ return u.runKernel(pe, n, r);
300
+ }
301
+ const Ge = /* @__PURE__ */ p({ step_: Ce });
302
+ /**
303
+ * @license
304
+ * Copyright 2018 Google LLC. All Rights Reserved.
305
+ * Licensed under the Apache License, Version 2.0 (the "License");
306
+ * you may not use this file except in compliance with the License.
307
+ * You may obtain a copy of the License at
308
+ *
309
+ * http://www.apache.org/licenses/LICENSE-2.0
310
+ *
311
+ * Unless required by applicable law or agreed to in writing, software
312
+ * distributed under the License is distributed on an "AS IS" BASIS,
313
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
314
+ * See the License for the specific language governing permissions and
315
+ * limitations under the License.
316
+ * =============================================================================
317
+ */
318
+ function Ie(t, e, s) {
319
+ const n = a(t, "x", "transpose");
320
+ if (e == null && (e = n.shape.map((l, h) => h).reverse()), B(n.rank === e.length, () => `Error in transpose: rank of input ${n.rank} must match length of perm ${e}.`), e.forEach((l) => {
321
+ B(l >= 0 && l < n.rank, () => `All entries in 'perm' must be between 0 and ${n.rank - 1} but got ${e}`);
322
+ }), n.rank <= 1)
323
+ return n.clone();
324
+ const r = { x: n }, i = { perm: e };
325
+ return n.dtype === "complex64" ? he(() => {
326
+ let l = Ae(n), h = we(n);
327
+ return l = u.runKernel(L, { x: l }, i), h = u.runKernel(L, { x: h }, i), s && (h = Oe(h)), $e(l, h);
328
+ }) : u.runKernel(L, r, i);
329
+ }
330
+ const je = /* @__PURE__ */ p({ transpose_: Ie });
331
+ /**
332
+ * @license
333
+ * Copyright 2019 Google LLC. All Rights Reserved.
334
+ * Licensed under the Apache License, Version 2.0 (the "License");
335
+ * you may not use this file except in compliance with the License.
336
+ * You may obtain a copy of the License at
337
+ *
338
+ * http://www.apache.org/licenses/LICENSE-2.0
339
+ *
340
+ * Unless required by applicable law or agreed to in writing, software
341
+ * distributed under the License is distributed on an "AS IS" BASIS,
342
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
343
+ * See the License for the specific language governing permissions and
344
+ * limitations under the License.
345
+ * =============================================================================
346
+ */
347
+ function qe(t, e, s) {
348
+ if (s == null || s === "linear")
349
+ return t;
350
+ if (s === "relu")
351
+ return fe(t, Ge(e));
352
+ throw new Error(`Cannot compute gradient for fused activation ${s}.`);
353
+ }
354
+ function Pe(t, e) {
355
+ let s = e;
356
+ const n = me(t.shape, e.shape);
357
+ return n.length > 0 && (s = Me(s, n)), f(s, t.shape);
358
+ }
359
+ function Ue(t, e, s, n) {
360
+ if (e === "linear")
361
+ return t;
362
+ if (e === "relu")
363
+ return Te(t);
364
+ if (e === "elu")
365
+ return ye(t);
366
+ if (e === "relu6")
367
+ return ve(t);
368
+ if (e === "prelu")
369
+ return Fe(t, s);
370
+ if (e === "leakyrelu")
371
+ return Ee(t, n);
372
+ if (e === "sigmoid")
373
+ return De(t);
374
+ throw new Error(`Unknown fused activation ${e}.`);
375
+ }
376
+ const He = (t, e) => !(t > 0) || e === "linear";
377
+ /**
378
+ * @license
379
+ * Copyright 2019 Google LLC. All Rights Reserved.
380
+ * Licensed under the Apache License, Version 2.0 (the "License");
381
+ * you may not use this file except in compliance with the License.
382
+ * You may obtain a copy of the License at
383
+ *
384
+ * http://www.apache.org/licenses/LICENSE-2.0
385
+ *
386
+ * Unless required by applicable law or agreed to in writing, software
387
+ * distributed under the License is distributed on an "AS IS" BASIS,
388
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
389
+ * See the License for the specific language governing permissions and
390
+ * limitations under the License.
391
+ * =============================================================================
392
+ */
393
+ function Je({ a: t, b: e, transposeA: s = !1, transposeB: n = !1, bias: r, activation: i = "linear", preluActivationWeights: l, leakyreluAlpha: h = 0.2 }) {
394
+ if (He(u.state.gradientDepth, i) === !1) {
395
+ let x = d(t, e, s, n);
396
+ return r != null && (x = de(x, r)), Ue(x, i, l, h);
397
+ }
398
+ let o = a(t, "a", "fused matMul"), c = a(e, "b", "fused matMul");
399
+ [o, c] = A(o, c);
400
+ const b = s ? o.shape[o.rank - 2] : o.shape[o.rank - 1], D = n ? c.shape[c.rank - 1] : c.shape[c.rank - 2], w = s ? o.shape[o.rank - 1] : o.shape[o.rank - 2], z = n ? c.shape[c.rank - 2] : c.shape[c.rank - 1], T = o.shape.slice(0, -2), S = c.shape.slice(0, -2), N = q(T), v = q(S);
401
+ B(b === D, () => `Error in fused matMul: inner shapes (${b}) and (${D}) of Tensors with shapes ${o.shape} and ${c.shape} and transposeA=${s} and transposeB=${n} must match.`);
402
+ const O = P(o.shape.slice(0, -2), c.shape.slice(0, -2)).concat([w, z]), R = s ? f(o, [N, b, w]) : f(o, [N, w, b]), F = n ? f(c, [v, z, D]) : f(c, [v, D, z]);
403
+ let y;
404
+ r != null && (y = a(r, "bias", "fused matMul"), [y] = A(y, o), P(O, y.shape));
405
+ let C;
406
+ l != null && (C = a(l, "prelu weights", "fused matMul"));
407
+ const G = (x, K) => {
408
+ const [g, $, k, E] = K, m = qe(f(x, k.shape), k, i);
409
+ let _, M;
410
+ if (!s && !n ? (_ = d(m, $, !1, !0), M = d(g, m, !0, !1)) : !s && n ? (_ = d(m, $, !1, !1), M = d(m, g, !0, !1)) : s && !n ? (_ = d($, m, !1, !0), M = d(g, m, !1, !1)) : (_ = d($, m, !0, !0), M = d(m, g, !0, !0)), r != null) {
411
+ const Q = Pe(E, m);
412
+ return [_, M, Q];
413
+ } else
414
+ return [_, M];
415
+ }, I = {
416
+ a: R,
417
+ b: F,
418
+ bias: y,
419
+ preluActivationWeights: C
420
+ }, j = { transposeA: s, transposeB: n, activation: i, leakyreluAlpha: h };
421
+ return r == null ? U((K, g, $) => {
422
+ const k = (
423
+ // tslint:disable-next-line: no-unnecessary-type-assertion
424
+ u.runKernel(H, I, j)
425
+ );
426
+ return $([K, g, k]), { value: f(k, O), gradFunc: G };
427
+ })(R, F) : U((K, g, $, k) => {
428
+ const E = (
429
+ // tslint:disable-next-line: no-unnecessary-type-assertion
430
+ u.runKernel(H, I, j)
431
+ );
432
+ return k([K, g, E, $]), { value: f(E, O), gradFunc: G };
433
+ })(R, F, y);
434
+ }
435
+ const J = /* @__PURE__ */ p({ fusedMatMul_: Je });
436
+ /**
437
+ * @license
438
+ * Copyright 2018 Google LLC
439
+ *
440
+ * Use of this source code is governed by an MIT-style
441
+ * license that can be found in the LICENSE file or at
442
+ * https://opensource.org/licenses/MIT.
443
+ * =============================================================================
444
+ */
445
+ class W extends Error {
446
+ constructor(e) {
447
+ super(e), Object.setPrototypeOf(this, W.prototype);
448
+ }
449
+ }
450
+ /**
451
+ * @license
452
+ * Copyright 2018 Google LLC
453
+ *
454
+ * Use of this source code is governed by an MIT-style
455
+ * license that can be found in the LICENSE file or at
456
+ * https://opensource.org/licenses/MIT.
457
+ * =============================================================================
458
+ */
459
+ function Qe(t, e, s, n) {
460
+ if (t.rank < 2 || e.rank < 2)
461
+ throw new W(`dot requires both inputs to be rank >= 2 but got x shape = ${t.shape} and y shape = ${e.shape}`);
462
+ if (e.rank >= 3) {
463
+ const r = t.shape.slice(-1)[0], i = e.shape.slice(-2)[0];
464
+ if (r !== i)
465
+ throw new W(`If rank y >= 3, then the second last dim of y must equal the last dim of x but got x shape = ${t.shape} and y shape = ${e.shape}`);
466
+ }
467
+ if (t.rank === 2 && e.rank === 2)
468
+ return J({
469
+ a: t,
470
+ b: e,
471
+ transposeA: !1,
472
+ transposeB: !1,
473
+ bias: null,
474
+ activation: s
475
+ });
476
+ {
477
+ const r = t.shape.slice(), i = r.pop();
478
+ t = f(t, [-1, i]);
479
+ const l = e.shape.slice(), h = l.pop(), o = l.pop(), c = [...l, h], b = Array.from({ length: e.rank }, (T, S) => S === 0 ? e.rank - 2 : S <= e.rank - 2 ? S - 1 : S);
480
+ e = f(je(e, b), [o, -1]);
481
+ const D = [...r, ...c];
482
+ return f(J({
483
+ a: t,
484
+ b: e,
485
+ transposeA: !1,
486
+ transposeB: !1,
487
+ bias: null,
488
+ activation: s
489
+ }), D);
490
+ }
491
+ }
492
+ class Ye {
493
+ vocabSize;
494
+ embedDim;
495
+ tf;
496
+ tiedWeights;
497
+ initializer;
498
+ constructor(e, s, n) {
499
+ this.vocabSize = s.vocabSize, this.embedDim = s.embedDim, this.tf = e, this.initializer = this.tf.initializers.randomNormal({
500
+ mean: 0,
501
+ stddev: 0.02
502
+ }), this.tiedWeights = this.tf.variable(
503
+ this.initializer.apply([this.vocabSize, this.embedDim]),
504
+ !0,
505
+ n || "tied_embedding"
506
+ );
507
+ }
508
+ get variables() {
509
+ return [this.tiedWeights];
510
+ }
511
+ embed(e) {
512
+ return this.tf.gather(this.tiedWeights, e, 0);
513
+ }
514
+ project(e) {
515
+ return Qe(e, this.tiedWeights.transpose());
516
+ }
517
+ getWeights() {
518
+ return [this.tiedWeights];
519
+ }
520
+ setWeights(e) {
521
+ this.tiedWeights.assign(e[0]);
522
+ }
523
+ getConfig() {
524
+ return {
525
+ vocabSize: this.vocabSize,
526
+ embedDim: this.embedDim
527
+ };
528
+ }
529
+ }
530
+ export {
531
+ Ye as default
532
+ };
@@ -0,0 +1,19 @@
1
+ import { default as TF } from '@tensorflow/tfjs';
2
+ import { GPTConfig } from '../config';
3
+ export default class Block {
4
+ private ln1;
5
+ private attn;
6
+ private ln2;
7
+ private mlp;
8
+ private tf;
9
+ private index;
10
+ private _trainable;
11
+ skipped: boolean;
12
+ constructor(tf: typeof TF, index: number, config: GPTConfig);
13
+ get variables(): TF.Variable[];
14
+ get trainable(): boolean;
15
+ set trainable(value: boolean);
16
+ saveWeights(map: Map<string, TF.Tensor[]>): void;
17
+ loadWeights(weights: Map<string, TF.Tensor[]>): void;
18
+ call(x: TF.Tensor, training?: boolean): TF.Tensor;
19
+ }
@@ -0,0 +1,47 @@
1
+ import r from "./CausalSelfAttention.js";
2
+ import d from "./MLP.js";
3
+ import n from "./LayerNorm.js";
4
+ class _ {
5
+ ln1;
6
+ attn;
7
+ ln2;
8
+ mlp;
9
+ tf;
10
+ index;
11
+ _trainable = !0;
12
+ skipped = !1;
13
+ constructor(t, s, i) {
14
+ this.tf = t, this.index = s, this.ln1 = new n(t, [i.nEmbed], 1e-5, `block_${this.index}_ln1`), this.attn = new r(this.tf, this.index, i), this.ln2 = new n(t, [i.nEmbed], 1e-5, `block_${this.index}_ln2`), this.mlp = new d(this.tf, this.index, i);
15
+ }
16
+ get variables() {
17
+ return [
18
+ ...this.ln1.trainableWeights.map((t) => t),
19
+ ...this.attn.variables,
20
+ ...this.ln2.trainableWeights.map((t) => t),
21
+ ...this.mlp.variables
22
+ ];
23
+ }
24
+ get trainable() {
25
+ return this._trainable;
26
+ }
27
+ set trainable(t) {
28
+ this._trainable = t, this.ln1.trainable = t, this.ln2.trainable = t, this.attn.trainable = t, this.mlp.trainable = t;
29
+ }
30
+ saveWeights(t) {
31
+ this.attn.saveWeights(t), this.mlp.saveWeights(t), t.set(`block_${this.index}_ln1`, this.ln1.getWeights()), t.set(`block_${this.index}_ln2`, this.ln2.getWeights());
32
+ }
33
+ loadWeights(t) {
34
+ this.attn.loadWeights(t), this.mlp.loadWeights(t), this.ln1.setWeights(t.get(`block_${this.index}_ln1`) || []), this.ln2.setWeights(t.get(`block_${this.index}_ln2`) || []);
35
+ }
36
+ call(t, s = !1) {
37
+ return this.tf.tidy(() => {
38
+ if (this.skipped)
39
+ return t;
40
+ const i = this.ln1.apply(t), l = this.attn.call(i, s), e = t.add(l), a = this.ln2.apply(e), h = this.mlp.call(a, s);
41
+ return e.add(h);
42
+ });
43
+ }
44
+ }
45
+ export {
46
+ _ as default
47
+ };
package/dist/main.d.ts ADDED
@@ -0,0 +1,6 @@
1
+ export { default as NanoGPT } from './NanoGPTModel';
2
+ export { default as TeachableLLM } from './TeachableLLM';
3
+ export { default as CharTokeniser } from './tokeniser/CharTokeniser';
4
+ export type { ITrainerOptions } from './Trainer';
5
+ export type { IGenerateOptions } from './Generator';
6
+ export type { TrainingLogEntry } from './NanoGPTModel';
package/dist/main.js ADDED
@@ -0,0 +1,8 @@
1
+ import { default as o } from "./NanoGPTModel.js";
2
+ import { default as f } from "./TeachableLLM.js";
3
+ import { default as l } from "./tokeniser/CharTokeniser.js";
4
+ export {
5
+ l as CharTokeniser,
6
+ o as NanoGPT,
7
+ f as TeachableLLM
8
+ };
@@ -0,0 +1,20 @@
1
+ import { default as EE } from 'eventemitter3';
2
+ import { ITokeniser } from './type';
3
+ export default class CharTokeniser extends EE<'trainStatus'> implements ITokeniser {
4
+ vocabSize: number;
5
+ eosToken: number;
6
+ vocab: string[];
7
+ private cache;
8
+ constructor(vocab?: string[]);
9
+ get trained(): boolean;
10
+ destroy(): void;
11
+ train(text: string[]): Promise<number>;
12
+ tokenise(text: string[], numeric: true): Promise<number[][]>;
13
+ tokenise(text: string[]): Promise<string[][]>;
14
+ detokenise(tokens: number[][]): Promise<string[]>;
15
+ encode(text: string): Promise<number[]>;
16
+ decode(tokens: number[]): Promise<string>;
17
+ getVocab(): string[];
18
+ getMerges(): Promise<[string, string][]>;
19
+ createTrainingData(text: string[], windowSize?: number): Promise<[number[], number[]]>;
20
+ }
@@ -0,0 +1,52 @@
1
+ import { E as r } from "../index-SOhdqzHq.js";
2
+ class h extends r {
3
+ vocabSize = 0;
4
+ eosToken = 0;
5
+ vocab = [];
6
+ cache = /* @__PURE__ */ new Map();
7
+ constructor(t) {
8
+ super(), this.vocab = t || [], this.vocab.length > 0 && (this.vocabSize = this.vocab.length, this.eosToken = this.vocab.indexOf("<eos>"), this.vocab.forEach((a, s) => {
9
+ this.cache.set(a, s);
10
+ }));
11
+ }
12
+ get trained() {
13
+ return this.vocabSize > 0;
14
+ }
15
+ destroy() {
16
+ }
17
+ async train(t) {
18
+ const a = new Set(t.map((e) => e.split("")).flat()), s = Array.from(a);
19
+ return s.sort((e, o) => e.charCodeAt(0) - o.charCodeAt(0)), this.vocab = [...s, "<eos>"], this.eosToken = this.vocab.indexOf("<eos>"), this.vocabSize = this.vocab.length, this.vocab.forEach((e, o) => {
20
+ this.cache.set(e, o);
21
+ }), this.vocabSize;
22
+ }
23
+ async tokenise(t, a) {
24
+ if (!this.trained)
25
+ throw new Error("Tokeniser not trained");
26
+ return t.map((e) => a ? e.split("").map((o) => this.cache.get(o) ?? -1) : e.split(""));
27
+ }
28
+ async detokenise(t) {
29
+ return t.map((s) => s.map((e) => this.vocab[e]).join(""));
30
+ }
31
+ async encode(t) {
32
+ return (await this.tokenise([t], !0))[0];
33
+ }
34
+ async decode(t) {
35
+ return (await this.detokenise([t]))[0];
36
+ }
37
+ getVocab() {
38
+ return this.vocab;
39
+ }
40
+ async getMerges() {
41
+ return [];
42
+ }
43
+ async createTrainingData(t, a = 5) {
44
+ const s = await this.tokenise(t, !0), e = [], o = [];
45
+ for (let i = 0; i < s.length - a; i++)
46
+ e.push(...s[i].slice(0, a)), o.push(s[i + 1][0]);
47
+ return [e, o];
48
+ }
49
+ }
50
+ export {
51
+ h as default
52
+ };