@genai-fi/nanogpt 0.2.12 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. package/dist/Generator.js +30 -25
  2. package/dist/NanoGPTModel.d.ts +13 -14
  3. package/dist/NanoGPTModel.js +142 -70
  4. package/dist/TeachableLLM.d.ts +16 -7
  5. package/dist/TeachableLLM.js +81 -44
  6. package/dist/Trainer.js +8 -8
  7. package/dist/concat-BIZS_td9.js +33 -0
  8. package/dist/data/parquet.js +1 -1
  9. package/dist/exports_layers-tbTBcwMM.js +25 -0
  10. package/dist/{sum-D7fu15XL.js → gather-BPGW8RsB.js} +6 -8
  11. package/dist/index-C4L8Cm77.js +349 -0
  12. package/dist/{index-YPKosni4.js → index-pWA4_lUh.js} +1020 -782
  13. package/dist/layers/CausalSelfAttention.d.ts +11 -11
  14. package/dist/layers/CausalSelfAttention.js +71 -63
  15. package/dist/layers/MLP.d.ts +6 -7
  16. package/dist/layers/MLP.js +18 -16
  17. package/dist/layers/RMSNorm.d.ts +6 -7
  18. package/dist/layers/RMSNorm.js +15 -13
  19. package/dist/layers/RoPECache.d.ts +4 -5
  20. package/dist/layers/RoPECache.js +36 -12
  21. package/dist/layers/TiedEmbedding.d.ts +7 -8
  22. package/dist/layers/TiedEmbedding.js +16 -418
  23. package/dist/layers/TransformerBlock.d.ts +8 -9
  24. package/dist/layers/TransformerBlock.js +12 -12
  25. package/dist/main.d.ts +2 -0
  26. package/dist/main.js +35 -21
  27. package/dist/{mat_mul-Bu7bhLms.js → mat_mul-D7_a4KJn.js} +5 -5
  28. package/dist/moments-DfcpfwKi.js +132 -0
  29. package/dist/ones-Cog-G2ag.js +29 -0
  30. package/dist/ops/appendCache.d.ts +2 -0
  31. package/dist/ops/appendCache.js +9 -0
  32. package/dist/ops/attentionMask.d.ts +1 -1
  33. package/dist/ops/attentionMask.js +7 -85
  34. package/dist/ops/cpu/appendCache.d.ts +2 -0
  35. package/dist/ops/cpu/appendCache.js +28 -0
  36. package/dist/ops/cpu/attentionMask.js +18 -0
  37. package/dist/ops/cpu/gatherSub.d.ts +1 -0
  38. package/dist/ops/cpu/gatherSub.js +34 -0
  39. package/dist/ops/cpu/qkv.d.ts +5 -0
  40. package/dist/ops/cpu/qkv.js +38 -0
  41. package/dist/ops/cpu/rope.d.ts +6 -0
  42. package/dist/ops/cpu/rope.js +38 -0
  43. package/dist/ops/cpu/scatterSub.d.ts +1 -0
  44. package/dist/ops/cpu/scatterSub.js +70 -0
  45. package/dist/ops/gatherSub.d.ts +1 -1
  46. package/dist/ops/gatherSub.js +6 -63
  47. package/dist/ops/grads/attentionMask.d.ts +1 -0
  48. package/dist/ops/grads/attentionMask.js +21 -0
  49. package/dist/ops/grads/qkv.d.ts +1 -0
  50. package/dist/ops/grads/qkv.js +20 -0
  51. package/dist/ops/grads/rope.d.ts +1 -0
  52. package/dist/ops/grads/rope.js +14 -0
  53. package/dist/ops/node/sparseCrossEntropy.js +1 -1
  54. package/dist/ops/qkv.d.ts +1 -6
  55. package/dist/ops/qkv.js +7 -124
  56. package/dist/ops/rope.d.ts +0 -5
  57. package/dist/ops/rope.js +7 -151
  58. package/dist/ops/scatterSub.d.ts +1 -1
  59. package/dist/ops/scatterSub.js +6 -147
  60. package/dist/ops/webgl/appendCache.d.ts +1 -0
  61. package/dist/ops/webgl/appendCache.js +43 -0
  62. package/dist/ops/webgl/attentionMask.d.ts +1 -0
  63. package/dist/ops/webgl/attentionMask.js +43 -0
  64. package/dist/ops/webgl/gatherSub.d.ts +1 -0
  65. package/dist/ops/webgl/gatherSub.js +27 -0
  66. package/dist/ops/webgl/qkv.d.ts +1 -0
  67. package/dist/ops/webgl/qkv.js +46 -0
  68. package/dist/ops/webgl/rope.d.ts +1 -0
  69. package/dist/ops/webgl/rope.js +56 -0
  70. package/dist/ops/webgl/scatterSub.d.ts +1 -0
  71. package/dist/ops/webgl/scatterSub.js +27 -0
  72. package/dist/{parquet-BRl5lE_I.js → parquet-C0Tlmv9c.js} +3045 -3048
  73. package/dist/random_width-oeUIlUZj.js +15487 -0
  74. package/dist/range-CcDl05lo.js +26 -0
  75. package/dist/{reshape-DmnmKT6r.js → reshape-C8CR_Bad.js} +3 -3
  76. package/dist/sin-BJIrfnj7.js +47 -0
  77. package/dist/softmax-Be_lsqUc.js +105 -0
  78. package/dist/{complex-CJ-qCcLB.js → split-DZbvruEP.js} +6 -8
  79. package/dist/stack-BMm-efee.js +27 -0
  80. package/dist/sum-C7Mgy9Bw.js +104 -0
  81. package/dist/tensor-DJVbYhh1.js +24 -0
  82. package/dist/tensor2d-ZuQSh2D-.js +30 -0
  83. package/dist/tokeniser/bpe.d.ts +17 -6
  84. package/dist/tokeniser/bpe.js +89 -61
  85. package/dist/training/AdamExt.js +1 -1
  86. package/dist/training/DatasetBuilder.d.ts +6 -6
  87. package/dist/training/DatasetBuilder.js +1262 -17
  88. package/dist/training/Evaluator.d.ts +3 -2
  89. package/dist/training/FullTrainer.d.ts +9 -8
  90. package/dist/training/FullTrainer.js +26 -25
  91. package/dist/training/LayerTrainer.d.ts +9 -8
  92. package/dist/training/LayerTrainer.js +34 -33
  93. package/dist/training/Trainer.d.ts +22 -21
  94. package/dist/training/Trainer.js +21 -18
  95. package/dist/training/sparseCrossEntropy.js +22 -166
  96. package/dist/utilities/dummy.js +10 -8
  97. package/dist/utilities/generate.js +14 -11
  98. package/dist/utilities/load.d.ts +1 -2
  99. package/dist/utilities/load.js +37 -35
  100. package/dist/utilities/profile.js +1 -1
  101. package/dist/utilities/save.js +14 -9
  102. package/dist/utilities/tokenParse.d.ts +1 -1
  103. package/dist/utilities/tokenParse.js +7 -61
  104. package/dist/utilities/weights.d.ts +3 -3
  105. package/dist/utilities/weights.js +21 -19
  106. package/dist/variable-Dl_ub3pk.js +23 -0
  107. package/dist/{stack-BtKpB0Ry.js → zeros-CCy9C3uU.js} +18 -16
  108. package/package.json +2 -1
  109. package/dist/assets/worker-BYeSPNkq.js +0 -1
  110. package/dist/tokeniser/NodeTokeniser.d.ts +0 -20
  111. package/dist/tokeniser/NodeTokeniser.js +0 -46
  112. package/dist/tokeniser/WebTokeniser.d.ts +0 -18
  113. package/dist/tokeniser/WebTokeniser.js +0 -96
  114. package/dist/tokeniser/worker.js +0 -53
  115. /package/dist/{tokeniser/worker.d.ts → ops/cpu/attentionMask.d.ts} +0 -0
@@ -1,438 +1,36 @@
1
- import { o as h, d as i, E as o, K as X, N as Y, O as Z, Q as J, T as ee, U as te, V as se, W as ne, X as re, Y as ue, l as L, I as ae, Z as A, a as ie, _ as oe, D as le, f as q, v as C, $ as P, H as U, a0 as H } from "../index-YPKosni4.js";
2
- import { r as f } from "../reshape-DmnmKT6r.js";
3
- import { s as ce } from "../sum-D7fu15XL.js";
4
- import { m } from "../mat_mul-Bu7bhLms.js";
5
- import { c as pe } from "../complex-CJ-qCcLB.js";
6
- /**
7
- * @license
8
- * Copyright 2018 Google LLC. All Rights Reserved.
9
- * Licensed under the Apache License, Version 2.0 (the "License");
10
- * you may not use this file except in compliance with the License.
11
- * You may obtain a copy of the License at
12
- *
13
- * http://www.apache.org/licenses/LICENSE-2.0
14
- *
15
- * Unless required by applicable law or agreed to in writing, software
16
- * distributed under the License is distributed on an "AS IS" BASIS,
17
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18
- * See the License for the specific language governing permissions and
19
- * limitations under the License.
20
- * =============================================================================
21
- */
22
- function he(t) {
23
- const s = { x: i(t, "x", "sigmoid", "float32") };
24
- return o.runKernel(X, s);
25
- }
26
- const fe = /* @__PURE__ */ h({ sigmoid_: he });
27
- /**
28
- * @license
29
- * Copyright 2020 Google LLC. All Rights Reserved.
30
- * Licensed under the Apache License, Version 2.0 (the "License");
31
- * you may not use this file except in compliance with the License.
32
- * You may obtain a copy of the License at
33
- *
34
- * http://www.apache.org/licenses/LICENSE-2.0
35
- *
36
- * Unless required by applicable law or agreed to in writing, software
37
- * distributed under the License is distributed on an "AS IS" BASIS,
38
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
39
- * See the License for the specific language governing permissions and
40
- * limitations under the License.
41
- * =============================================================================
42
- */
43
- function de(t) {
44
- const s = { x: i(t, "x", "elu", "float32") };
45
- return o.runKernel(Y, s);
46
- }
47
- const me = /* @__PURE__ */ h({ elu_: de });
48
- /**
49
- * @license
50
- * Copyright 2020 Google LLC. All Rights Reserved.
51
- * Licensed under the Apache License, Version 2.0 (the "License");
52
- * you may not use this file except in compliance with the License.
53
- * You may obtain a copy of the License at
54
- *
55
- * http://www.apache.org/licenses/LICENSE-2.0
56
- *
57
- * Unless required by applicable law or agreed to in writing, software
58
- * distributed under the License is distributed on an "AS IS" BASIS,
59
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
60
- * See the License for the specific language governing permissions and
61
- * limitations under the License.
62
- * =============================================================================
63
- */
64
- function ge(t) {
65
- const s = { input: i(t, "input", "imag") };
66
- return o.runKernel(Z, s);
67
- }
68
- const $e = /* @__PURE__ */ h({ imag_: ge });
69
- /**
70
- * @license
71
- * Copyright 2020 Google LLC. All Rights Reserved.
72
- * Licensed under the Apache License, Version 2.0 (the "License");
73
- * you may not use this file except in compliance with the License.
74
- * You may obtain a copy of the License at
75
- *
76
- * http://www.apache.org/licenses/LICENSE-2.0
77
- *
78
- * Unless required by applicable law or agreed to in writing, software
79
- * distributed under the License is distributed on an "AS IS" BASIS,
80
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
81
- * See the License for the specific language governing permissions and
82
- * limitations under the License.
83
- * =============================================================================
84
- */
85
- function xe(t, e = 0.2) {
86
- const n = { x: i(t, "x", "leakyRelu") }, r = { alpha: e };
87
- return o.runKernel(J, n, r);
88
- }
89
- const ke = /* @__PURE__ */ h({ leakyRelu_: xe });
90
- /**
91
- * @license
92
- * Copyright 2018 Google LLC. All Rights Reserved.
93
- * Licensed under the Apache License, Version 2.0 (the "License");
94
- * you may not use this file except in compliance with the License.
95
- * You may obtain a copy of the License at
96
- *
97
- * http://www.apache.org/licenses/LICENSE-2.0
98
- *
99
- * Unless required by applicable law or agreed to in writing, software
100
- * distributed under the License is distributed on an "AS IS" BASIS,
101
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
102
- * See the License for the specific language governing permissions and
103
- * limitations under the License.
104
- * =============================================================================
105
- */
106
- function De(t) {
107
- const s = { x: i(t, "x", "neg") };
108
- return o.runKernel(ee, s);
109
- }
110
- const be = /* @__PURE__ */ h({ neg_: De });
111
- /**
112
- * @license
113
- * Copyright 2020 Google LLC. All Rights Reserved.
114
- * Licensed under the Apache License, Version 2.0 (the "License");
115
- * you may not use this file except in compliance with the License.
116
- * You may obtain a copy of the License at
117
- *
118
- * http://www.apache.org/licenses/LICENSE-2.0
119
- *
120
- * Unless required by applicable law or agreed to in writing, software
121
- * distributed under the License is distributed on an "AS IS" BASIS,
122
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
123
- * See the License for the specific language governing permissions and
124
- * limitations under the License.
125
- * =============================================================================
126
- */
127
- function ye(t, e) {
128
- const s = i(t, "x", "prelu"), n = i(e, "alpha", "prelu"), r = { x: s, alpha: n };
129
- return o.runKernel(te, r);
130
- }
131
- const Se = /* @__PURE__ */ h({ prelu_: ye });
132
- /**
133
- * @license
134
- * Copyright 2020 Google LLC. All Rights Reserved.
135
- * Licensed under the Apache License, Version 2.0 (the "License");
136
- * you may not use this file except in compliance with the License.
137
- * You may obtain a copy of the License at
138
- *
139
- * http://www.apache.org/licenses/LICENSE-2.0
140
- *
141
- * Unless required by applicable law or agreed to in writing, software
142
- * distributed under the License is distributed on an "AS IS" BASIS,
143
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
144
- * See the License for the specific language governing permissions and
145
- * limitations under the License.
146
- * =============================================================================
147
- */
148
- function Ke(t) {
149
- const s = { input: i(t, "input", "real") };
150
- return o.runKernel(se, s);
151
- }
152
- const _e = /* @__PURE__ */ h({ real_: Ke });
153
- /**
154
- * @license
155
- * Copyright 2020 Google LLC. All Rights Reserved.
156
- * Licensed under the Apache License, Version 2.0 (the "License");
157
- * you may not use this file except in compliance with the License.
158
- * You may obtain a copy of the License at
159
- *
160
- * http://www.apache.org/licenses/LICENSE-2.0
161
- *
162
- * Unless required by applicable law or agreed to in writing, software
163
- * distributed under the License is distributed on an "AS IS" BASIS,
164
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
165
- * See the License for the specific language governing permissions and
166
- * limitations under the License.
167
- * =============================================================================
168
- */
169
- function Me(t) {
170
- const s = { x: i(t, "x", "relu") };
171
- return o.runKernel(ne, s);
172
- }
173
- const We = /* @__PURE__ */ h({ relu_: Me });
174
- /**
175
- * @license
176
- * Copyright 2020 Google LLC. All Rights Reserved.
177
- * Licensed under the Apache License, Version 2.0 (the "License");
178
- * you may not use this file except in compliance with the License.
179
- * You may obtain a copy of the License at
180
- *
181
- * http://www.apache.org/licenses/LICENSE-2.0
182
- *
183
- * Unless required by applicable law or agreed to in writing, software
184
- * distributed under the License is distributed on an "AS IS" BASIS,
185
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
186
- * See the License for the specific language governing permissions and
187
- * limitations under the License.
188
- * =============================================================================
189
- */
190
- function we(t) {
191
- const s = { x: i(t, "x", "relu6") };
192
- return o.runKernel(re, s);
193
- }
194
- const ze = /* @__PURE__ */ h({ relu6_: we });
195
- /**
196
- * @license
197
- * Copyright 2018 Google LLC. All Rights Reserved.
198
- * Licensed under the Apache License, Version 2.0 (the "License");
199
- * you may not use this file except in compliance with the License.
200
- * You may obtain a copy of the License at
201
- *
202
- * http://www.apache.org/licenses/LICENSE-2.0
203
- *
204
- * Unless required by applicable law or agreed to in writing, software
205
- * distributed under the License is distributed on an "AS IS" BASIS,
206
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
207
- * See the License for the specific language governing permissions and
208
- * limitations under the License.
209
- * =============================================================================
210
- */
211
- function Ee(t, e = 0) {
212
- const n = { x: i(t, "x", "step") }, r = { alpha: e };
213
- return o.runKernel(ue, n, r);
214
- }
215
- const Oe = /* @__PURE__ */ h({ step_: Ee });
216
- /**
217
- * @license
218
- * Copyright 2018 Google LLC. All Rights Reserved.
219
- * Licensed under the Apache License, Version 2.0 (the "License");
220
- * you may not use this file except in compliance with the License.
221
- * You may obtain a copy of the License at
222
- *
223
- * http://www.apache.org/licenses/LICENSE-2.0
224
- *
225
- * Unless required by applicable law or agreed to in writing, software
226
- * distributed under the License is distributed on an "AS IS" BASIS,
227
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
228
- * See the License for the specific language governing permissions and
229
- * limitations under the License.
230
- * =============================================================================
231
- */
232
- function Fe(t, e, s) {
233
- const n = i(t, "x", "transpose");
234
- if (e == null && (e = n.shape.map((l, p) => p).reverse()), L(n.rank === e.length, () => `Error in transpose: rank of input ${n.rank} must match length of perm ${e}.`), e.forEach((l) => {
235
- L(l >= 0 && l < n.rank, () => `All entries in 'perm' must be between 0 and ${n.rank - 1} but got ${e}`);
236
- }), n.rank <= 1)
237
- return n.clone();
238
- const r = { x: n }, c = { perm: e };
239
- return n.dtype === "complex64" ? ae(() => {
240
- let l = _e(n), p = $e(n);
241
- return l = o.runKernel(A, { x: l }, c), p = o.runKernel(A, { x: p }, c), s && (p = be(p)), pe(l, p);
242
- }) : o.runKernel(A, r, c);
243
- }
244
- const Re = /* @__PURE__ */ h({ transpose_: Fe });
245
- /**
246
- * @license
247
- * Copyright 2019 Google LLC. All Rights Reserved.
248
- * Licensed under the Apache License, Version 2.0 (the "License");
249
- * you may not use this file except in compliance with the License.
250
- * You may obtain a copy of the License at
251
- *
252
- * http://www.apache.org/licenses/LICENSE-2.0
253
- *
254
- * Unless required by applicable law or agreed to in writing, software
255
- * distributed under the License is distributed on an "AS IS" BASIS,
256
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
257
- * See the License for the specific language governing permissions and
258
- * limitations under the License.
259
- * =============================================================================
260
- */
261
- function Ae(t, e, s) {
262
- if (s == null || s === "linear")
263
- return t;
264
- if (s === "relu")
265
- return ie(t, Oe(e));
266
- throw new Error(`Cannot compute gradient for fused activation ${s}.`);
267
- }
268
- function Le(t, e) {
269
- let s = e;
270
- const n = oe(t.shape, e.shape);
271
- return n.length > 0 && (s = ce(s, n)), f(s, t.shape);
272
- }
273
- function Te(t, e, s, n) {
274
- if (e === "linear")
275
- return t;
276
- if (e === "relu")
277
- return We(t);
278
- if (e === "elu")
279
- return me(t);
280
- if (e === "relu6")
281
- return ze(t);
282
- if (e === "prelu")
283
- return Se(t, s);
284
- if (e === "leakyrelu")
285
- return ke(t, n);
286
- if (e === "sigmoid")
287
- return fe(t);
288
- throw new Error(`Unknown fused activation ${e}.`);
289
- }
290
- const Be = (t, e) => !(t > 0) || e === "linear";
291
- /**
292
- * @license
293
- * Copyright 2019 Google LLC. All Rights Reserved.
294
- * Licensed under the Apache License, Version 2.0 (the "License");
295
- * you may not use this file except in compliance with the License.
296
- * You may obtain a copy of the License at
297
- *
298
- * http://www.apache.org/licenses/LICENSE-2.0
299
- *
300
- * Unless required by applicable law or agreed to in writing, software
301
- * distributed under the License is distributed on an "AS IS" BASIS,
302
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
303
- * See the License for the specific language governing permissions and
304
- * limitations under the License.
305
- * =============================================================================
306
- */
307
- function Ne({ a: t, b: e, transposeA: s = !1, transposeB: n = !1, bias: r, activation: c = "linear", preluActivationWeights: l, leakyreluAlpha: p = 0.2 }) {
308
- if (Be(o.state.gradientDepth, c) === !1) {
309
- let x = m(t, e, s, n);
310
- return r != null && (x = le(x, r)), Te(x, c, l, p);
311
- }
312
- let u = i(t, "a", "fused matMul"), a = i(e, "b", "fused matMul");
313
- [u, a] = q(u, a);
314
- const D = s ? u.shape[u.rank - 2] : u.shape[u.rank - 1], b = n ? a.shape[a.rank - 1] : a.shape[a.rank - 2], W = s ? u.shape[u.rank - 1] : u.shape[u.rank - 2], w = n ? a.shape[a.rank - 2] : a.shape[a.rank - 1], T = u.shape.slice(0, -2), y = a.shape.slice(0, -2), B = C(T), N = C(y);
315
- L(D === b, () => `Error in fused matMul: inner shapes (${D}) and (${b}) of Tensors with shapes ${u.shape} and ${a.shape} and transposeA=${s} and transposeB=${n} must match.`);
316
- const O = P(u.shape.slice(0, -2), a.shape.slice(0, -2)).concat([W, w]), F = s ? f(u, [B, D, W]) : f(u, [B, W, D]), R = n ? f(a, [N, w, b]) : f(a, [N, b, w]);
317
- let S;
318
- r != null && (S = i(r, "bias", "fused matMul"), [S] = q(S, u), P(O, S.shape));
319
- let v;
320
- l != null && (v = i(l, "prelu weights", "fused matMul"));
321
- const G = (x, M) => {
322
- const [g, $, k, z] = M, d = Ae(f(x, k.shape), k, c);
323
- let K, _;
324
- if (!s && !n ? (K = m(d, $, !1, !0), _ = m(g, d, !0, !1)) : !s && n ? (K = m(d, $, !1, !1), _ = m(d, g, !0, !1)) : s && !n ? (K = m($, d, !1, !0), _ = m(g, d, !1, !1)) : (K = m($, d, !0, !0), _ = m(d, g, !0, !0)), r != null) {
325
- const V = Le(z, d);
326
- return [K, _, V];
327
- } else
328
- return [K, _];
329
- }, I = {
330
- a: F,
331
- b: R,
332
- bias: S,
333
- preluActivationWeights: v
334
- }, j = { transposeA: s, transposeB: n, activation: c, leakyreluAlpha: p };
335
- return r == null ? U((M, g, $) => {
336
- const k = (
337
- // tslint:disable-next-line: no-unnecessary-type-assertion
338
- o.runKernel(H, I, j)
339
- );
340
- return $([M, g, k]), { value: f(k, O), gradFunc: G };
341
- })(F, R) : U((M, g, $, k) => {
342
- const z = (
343
- // tslint:disable-next-line: no-unnecessary-type-assertion
344
- o.runKernel(H, I, j)
345
- );
346
- return k([M, g, z, $]), { value: f(z, O), gradFunc: G };
347
- })(F, R, S);
348
- }
349
- const Q = /* @__PURE__ */ h({ fusedMatMul_: Ne });
350
- /**
351
- * @license
352
- * Copyright 2018 Google LLC
353
- *
354
- * Use of this source code is governed by an MIT-style
355
- * license that can be found in the LICENSE file or at
356
- * https://opensource.org/licenses/MIT.
357
- * =============================================================================
358
- */
359
- class E extends Error {
360
- constructor(e) {
361
- super(e), Object.setPrototypeOf(this, E.prototype);
362
- }
363
- }
364
- /**
365
- * @license
366
- * Copyright 2018 Google LLC
367
- *
368
- * Use of this source code is governed by an MIT-style
369
- * license that can be found in the LICENSE file or at
370
- * https://opensource.org/licenses/MIT.
371
- * =============================================================================
372
- */
373
- function ve(t, e, s, n) {
374
- if (t.rank < 2 || e.rank < 2)
375
- throw new E(`dot requires both inputs to be rank >= 2 but got x shape = ${t.shape} and y shape = ${e.shape}`);
376
- if (e.rank >= 3) {
377
- const r = t.shape.slice(-1)[0], c = e.shape.slice(-2)[0];
378
- if (r !== c)
379
- throw new E(`If rank y >= 3, then the second last dim of y must equal the last dim of x but got x shape = ${t.shape} and y shape = ${e.shape}`);
380
- }
381
- if (t.rank === 2 && e.rank === 2)
382
- return Q({
383
- a: t,
384
- b: e,
385
- transposeA: !1,
386
- transposeB: !1,
387
- bias: null,
388
- activation: s
389
- });
390
- {
391
- const r = t.shape.slice(), c = r.pop();
392
- t = f(t, [-1, c]);
393
- const l = e.shape.slice(), p = l.pop(), u = l.pop(), a = [...l, p], D = Array.from({ length: e.rank }, (T, y) => y === 0 ? e.rank - 2 : y <= e.rank - 2 ? y - 1 : y);
394
- e = f(Re(e, D), [u, -1]);
395
- const b = [...r, ...a];
396
- return f(Q({
397
- a: t,
398
- b: e,
399
- transposeA: !1,
400
- transposeB: !1,
401
- bias: null,
402
- activation: s
403
- }), b);
404
- }
405
- }
406
- class Ue {
1
+ import { r as t, d as s } from "../random_width-oeUIlUZj.js";
2
+ import "../index-pWA4_lUh.js";
3
+ import { v as r } from "../variable-Dl_ub3pk.js";
4
+ import { g as d } from "../gather-BPGW8RsB.js";
5
+ class b {
407
6
  vocabSize;
408
7
  embedDim;
409
- tf;
410
8
  tiedWeights;
411
9
  initializer;
412
- constructor(e, s, n) {
413
- this.vocabSize = s.vocabSize, this.embedDim = s.embedDim, this.tf = e, this.initializer = this.tf.initializers.randomNormal({
10
+ constructor(i, e) {
11
+ this.vocabSize = i.vocabSize, this.embedDim = i.embedDim, this.initializer = t({
414
12
  mean: 0,
415
13
  stddev: 0.02
416
- }), this.tiedWeights = this.tf.variable(
14
+ }), this.tiedWeights = r(
417
15
  this.initializer.apply([this.vocabSize, this.embedDim]),
418
16
  !0,
419
- n || "tied_embedding"
17
+ e || "tied_embedding"
420
18
  );
421
19
  }
422
20
  get variables() {
423
21
  return [this.tiedWeights];
424
22
  }
425
- embed(e) {
426
- return this.tf.gather(this.tiedWeights, e, 0);
23
+ embed(i) {
24
+ return d(this.tiedWeights, i, 0);
427
25
  }
428
- project(e) {
429
- return ve(e, this.tiedWeights.transpose());
26
+ project(i) {
27
+ return s(i, this.tiedWeights.transpose());
430
28
  }
431
29
  getWeights() {
432
30
  return [this.tiedWeights];
433
31
  }
434
- setWeights(e) {
435
- this.tiedWeights.assign(e[0]);
32
+ setWeights(i) {
33
+ this.tiedWeights.assign(i[0]);
436
34
  }
437
35
  getConfig() {
438
36
  return {
@@ -445,5 +43,5 @@ class Ue {
445
43
  }
446
44
  }
447
45
  export {
448
- Ue as default
46
+ b as default
449
47
  };
@@ -1,29 +1,28 @@
1
- import { default as TF } from '@tensorflow/tfjs';
2
1
  import { GPTConfig } from '../config';
3
2
  import { KVCache } from './CausalSelfAttention';
4
3
  import { default as RoPECache } from './RoPECache';
5
4
  import { default as MemoryProfiler } from '../utilities/profile';
6
5
  import { default as BaseLayer } from './BaseLayer';
6
+ import { Tensor, Variable } from '@tensorflow/tfjs-core';
7
7
  export default class Block extends BaseLayer {
8
8
  private ln1;
9
9
  private attn;
10
10
  private ln2;
11
11
  private mlp;
12
- private tf;
13
12
  private index;
14
13
  private _trainable;
15
14
  skipped: boolean;
16
- constructor(tf: typeof TF, index: number, config: GPTConfig, ropeCache?: RoPECache);
15
+ constructor(index: number, config: GPTConfig, ropeCache?: RoPECache);
17
16
  setProfiler(value: MemoryProfiler | undefined): void;
18
- get variables(): TF.Variable[];
17
+ get variables(): Variable[];
19
18
  get trainable(): boolean;
20
19
  set trainable(value: boolean);
21
- saveWeights(map: Map<string, TF.Tensor[]>): void;
22
- loadWeights(weights: Map<string, TF.Tensor[]>): void;
20
+ saveWeights(map: Map<string, Tensor[]>): void;
21
+ loadWeights(weights: Map<string, Tensor[]>): void;
23
22
  private getMLPOutput;
24
- call(x: TF.Tensor, training?: boolean, includeAttention?: boolean, cache?: KVCache): {
25
- output: TF.Tensor;
26
- attention?: TF.Tensor;
23
+ call(x: Tensor, training?: boolean, includeAttention?: boolean, cache?: KVCache): {
24
+ output: Tensor;
25
+ attention?: Tensor;
27
26
  cache?: KVCache;
28
27
  };
29
28
  dispose(): void;
@@ -1,18 +1,18 @@
1
- import a from "./CausalSelfAttention.js";
1
+ import h from "./CausalSelfAttention.js";
2
2
  import o from "./MLP.js";
3
3
  import r from "./RMSNorm.js";
4
4
  import p from "./BaseLayer.js";
5
- class f extends p {
5
+ import { t as d } from "../index-pWA4_lUh.js";
6
+ class g extends p {
6
7
  ln1;
7
8
  attn;
8
9
  ln2;
9
10
  mlp;
10
- tf;
11
11
  index;
12
12
  _trainable = !0;
13
13
  skipped = !1;
14
- constructor(t, i, s, e) {
15
- super(), this.tf = t, this.index = i, this.ln1 = new r(t, [s.nEmbed], 1e-8, `block_${this.index}_rms1`), this.attn = new a(this.tf, this.index, s, e), this.ln2 = new r(t, [s.nEmbed], 1e-8, `block_${this.index}_rms2`), this.mlp = new o(this.tf, this.index, s);
14
+ constructor(t, s, i) {
15
+ super(), this.index = t, this.ln1 = new r([s.nEmbed], 1e-8, `block_${this.index}_rms1`), this.attn = new h(this.index, s, i), this.ln2 = new r([s.nEmbed], 1e-8, `block_${this.index}_rms2`), this.mlp = new o(this.index, s);
16
16
  }
17
17
  setProfiler(t) {
18
18
  this._profiler = t, this.attn.setProfiler(t), this.mlp.setProfiler(t), this.ln1.setProfiler(t), this.ln2.setProfiler(t);
@@ -37,17 +37,17 @@ class f extends p {
37
37
  loadWeights(t) {
38
38
  this.attn.loadWeights(t), this.mlp.loadWeights(t), this.ln1.setWeights(t.get(`block_${this.index}_rms1`) || []), this.ln2.setWeights(t.get(`block_${this.index}_rms2`) || []);
39
39
  }
40
- getMLPOutput(t, i) {
41
- const s = this.ln2.apply(t), e = this.mlp.call(s, i);
40
+ getMLPOutput(t, s) {
41
+ const i = this.ln2.apply(t), e = this.mlp.call(i, s);
42
42
  return t.add(e);
43
43
  }
44
- call(t, i = !1, s = !1, e) {
45
- return this.tf.tidy(() => {
44
+ call(t, s = !1, i = !1, e) {
45
+ return d(() => {
46
46
  if (this.skipped)
47
47
  return { output: t };
48
- const l = this.ln1.apply(t), n = this.attn.call(l, i, s, e), h = t.add(n.output);
48
+ const l = this.ln1.apply(t), n = this.attn.call(l, s, i, e), a = t.add(n.output);
49
49
  return {
50
- output: this.getMLPOutput(h, i),
50
+ output: this.getMLPOutput(a, s),
51
51
  attention: n.attention,
52
52
  cache: n.presentKV
53
53
  };
@@ -58,5 +58,5 @@ class f extends p {
58
58
  }
59
59
  }
60
60
  export {
61
- f as default
61
+ g as default
62
62
  };
package/dist/main.d.ts CHANGED
@@ -1,10 +1,12 @@
1
1
  export { default as NanoGPT } from './NanoGPTModel';
2
2
  export { default as TeachableLLM } from './TeachableLLM';
3
3
  export { default as CharTokeniser } from './tokeniser/CharTokeniser';
4
+ export { default as BPETokeniser } from './tokeniser/bpe';
4
5
  export { default as waitForModel } from './utilities/waitForModel';
5
6
  export { default as loadTextData } from './data/textLoader';
6
7
  export type { ITrainerOptions } from './Trainer';
7
8
  export type { IGenerateOptions } from './Generator';
8
9
  export type { TrainingLogEntry } from './NanoGPTModel';
10
+ export type { ITokeniser } from './tokeniser/type';
9
11
  export type { GPTConfig } from './config';
10
12
  export { estimateParameterCount, estimateMemoryUsage, estimateTrainingMemoryUsage, estimateResources, validateConfig, } from './utilities/parameters';
package/dist/main.js CHANGED
@@ -1,23 +1,37 @@
1
- import { default as s } from "./NanoGPTModel.js";
2
- import { default as p } from "./TeachableLLM.js";
3
- import { default as d } from "./tokeniser/CharTokeniser.js";
4
- import { default as x } from "./utilities/waitForModel.js";
5
- import { default as T } from "./data/textLoader.js";
6
- import { estimateMemoryUsage as M, estimateParameterCount as C, estimateResources as c, estimateTrainingMemoryUsage as h, validateConfig as y } from "./utilities/parameters.js";
7
- import "./ops/scatterSub.js";
8
- import "./ops/gatherSub.js";
9
- import "./ops/attentionMask.js";
10
- import "./ops/qkv.js";
11
- import "./ops/rope.js";
1
+ import { default as P } from "./NanoGPTModel.js";
2
+ import { default as h } from "./TeachableLLM.js";
3
+ import { default as y } from "./tokeniser/CharTokeniser.js";
4
+ import { default as U } from "./tokeniser/bpe.js";
5
+ import { default as v } from "./utilities/waitForModel.js";
6
+ import { default as B } from "./data/textLoader.js";
7
+ import { estimateMemoryUsage as E, estimateParameterCount as F, estimateResources as G, estimateTrainingMemoryUsage as N, validateConfig as R } from "./utilities/parameters.js";
8
+ import "./index-pWA4_lUh.js";
9
+ import "./ops/cpu/scatterSub.js";
10
+ import "./ops/webgl/scatterSub.js";
11
+ import "./ops/cpu/gatherSub.js";
12
+ import "./ops/webgl/gatherSub.js";
13
+ import "./ops/cpu/attentionMask.js";
14
+ import "./ops/webgl/attentionMask.js";
15
+ import "./ops/grads/attentionMask.js";
16
+ import "./ops/cpu/qkv.js";
17
+ import "./ops/webgl/qkv.js";
18
+ import "./ops/grads/qkv.js";
19
+ import "@tensorflow/tfjs";
20
+ import "./ops/cpu/rope.js";
21
+ import "./ops/webgl/rope.js";
22
+ import "./ops/grads/rope.js";
23
+ import "./ops/cpu/appendCache.js";
24
+ import "./ops/webgl/appendCache.js";
12
25
  export {
13
- d as CharTokeniser,
14
- s as NanoGPT,
15
- p as TeachableLLM,
16
- M as estimateMemoryUsage,
17
- C as estimateParameterCount,
18
- c as estimateResources,
19
- h as estimateTrainingMemoryUsage,
20
- T as loadTextData,
21
- y as validateConfig,
22
- x as waitForModel
26
+ U as BPETokeniser,
27
+ y as CharTokeniser,
28
+ P as NanoGPT,
29
+ h as TeachableLLM,
30
+ E as estimateMemoryUsage,
31
+ F as estimateParameterCount,
32
+ G as estimateResources,
33
+ N as estimateTrainingMemoryUsage,
34
+ B as loadTextData,
35
+ R as validateConfig,
36
+ v as waitForModel
23
37
  };
@@ -1,4 +1,4 @@
1
- import { o as m, d as s, f as c, E as M, B as f } from "./index-YPKosni4.js";
1
+ import { o as m, h as s, p as c, E as M, B as p } from "./index-pWA4_lUh.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -15,13 +15,13 @@ import { o as m, d as s, f as c, E as M, B as f } from "./index-YPKosni4.js";
15
15
  * limitations under the License.
16
16
  * =============================================================================
17
17
  */
18
- function p(e, o, n = !1, l = !1) {
18
+ function f(e, o, n = !1, l = !1) {
19
19
  let a = s(e, "a", "matMul"), t = s(o, "b", "matMul");
20
20
  [a, t] = c(a, t);
21
21
  const r = { a, b: t }, u = { transposeA: n, transposeB: l };
22
- return M.runKernel(f, r, u);
22
+ return M.runKernel(p, r, u);
23
23
  }
24
- const i = /* @__PURE__ */ m({ matMul_: p });
24
+ const h = /* @__PURE__ */ m({ matMul_: f });
25
25
  export {
26
- i as m
26
+ h as m
27
27
  };