@genai-fi/nanogpt 0.2.5 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,6 @@
1
- import { o as p, c as a, b as V, E as u, C as X, R as Y, d as A, B as Z, S as ee, f as te, g as se, h as ne, I as re, L as ae, N as ue, P as ie, i as oe, j as le, k as ce, l as pe, n as B, t as he, T as L, m as fe, p as me, q as de, r as q, u as P, v as U, _ as H } from "../index-DcaSvB38.js";
1
+ import { o as h, c as u, x as B, E as c, B as V, y as X, D as Y, I as Z, F as ee, N as te, H as se, J as ne, K as re, O as ae, Q as ue, f as L, w as ie, T as A, m as oe, U as le, t as ce, k as C, V as P, v as U, _ as H } from "../index-D1SlunD-.js";
2
+ import { s as pe, r as f } from "../sum-02UQ5Eaq.js";
3
+ import { c as he } from "../complex-D6Bq1XDf.js";
2
4
  /**
3
5
  * @license
4
6
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -15,57 +17,13 @@ import { o as p, c as a, b as V, E as u, C as X, R as Y, d as A, B as Z, S as ee
15
17
  * limitations under the License.
16
18
  * =============================================================================
17
19
  */
18
- function ge(t, e) {
19
- const s = a(t, "real", "complex"), n = a(e, "imag", "complex");
20
- V(s.shape, n.shape, `real and imag shapes, ${s.shape} and ${n.shape}, must match in call to tf.complex().`);
21
- const r = { real: s, imag: n };
22
- return u.runKernel(X, r);
20
+ function fe(t, e, s = !1, n = !1) {
21
+ let r = u(t, "a", "matMul"), i = u(e, "b", "matMul");
22
+ [r, i] = B(r, i);
23
+ const o = { a: r, b: i }, p = { transposeA: s, transposeB: n };
24
+ return c.runKernel(V, o, p);
23
25
  }
24
- const $e = /* @__PURE__ */ p({ complex_: ge });
25
- /**
26
- * @license
27
- * Copyright 2020 Google LLC. All Rights Reserved.
28
- * Licensed under the Apache License, Version 2.0 (the "License");
29
- * you may not use this file except in compliance with the License.
30
- * You may obtain a copy of the License at
31
- *
32
- * http://www.apache.org/licenses/LICENSE-2.0
33
- *
34
- * Unless required by applicable law or agreed to in writing, software
35
- * distributed under the License is distributed on an "AS IS" BASIS,
36
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
37
- * See the License for the specific language governing permissions and
38
- * limitations under the License.
39
- * =============================================================================
40
- */
41
- function xe(t, e) {
42
- const n = { x: a(t, "x", "reshape", "string_or_numeric") }, r = { shape: e };
43
- return u.runKernel(Y, n, r);
44
- }
45
- const f = /* @__PURE__ */ p({ reshape_: xe });
46
- /**
47
- * @license
48
- * Copyright 2020 Google LLC. All Rights Reserved.
49
- * Licensed under the Apache License, Version 2.0 (the "License");
50
- * you may not use this file except in compliance with the License.
51
- * You may obtain a copy of the License at
52
- *
53
- * http://www.apache.org/licenses/LICENSE-2.0
54
- *
55
- * Unless required by applicable law or agreed to in writing, software
56
- * distributed under the License is distributed on an "AS IS" BASIS,
57
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
58
- * See the License for the specific language governing permissions and
59
- * limitations under the License.
60
- * =============================================================================
61
- */
62
- function ke(t, e, s = !1, n = !1) {
63
- let r = a(t, "a", "matMul"), i = a(e, "b", "matMul");
64
- [r, i] = A(r, i);
65
- const l = { a: r, b: i }, h = { transposeA: s, transposeB: n };
66
- return u.runKernel(Z, l, h);
67
- }
68
- const d = /* @__PURE__ */ p({ matMul_: ke });
26
+ const m = /* @__PURE__ */ h({ matMul_: fe });
69
27
  /**
70
28
  * @license
71
29
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -82,11 +40,11 @@ const d = /* @__PURE__ */ p({ matMul_: ke });
82
40
  * limitations under the License.
83
41
  * =============================================================================
84
42
  */
85
- function be(t) {
86
- const s = { x: a(t, "x", "sigmoid", "float32") };
87
- return u.runKernel(ee, s);
43
+ function de(t) {
44
+ const s = { x: u(t, "x", "sigmoid", "float32") };
45
+ return c.runKernel(X, s);
88
46
  }
89
- const De = /* @__PURE__ */ p({ sigmoid_: be });
47
+ const me = /* @__PURE__ */ h({ sigmoid_: de });
90
48
  /**
91
49
  * @license
92
50
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -103,34 +61,11 @@ const De = /* @__PURE__ */ p({ sigmoid_: be });
103
61
  * limitations under the License.
104
62
  * =============================================================================
105
63
  */
106
- function Se(t) {
107
- const s = { x: a(t, "x", "elu", "float32") };
108
- return u.runKernel(te, s);
109
- }
110
- const ye = /* @__PURE__ */ p({ elu_: Se });
111
- /**
112
- * @license
113
- * Copyright 2018 Google LLC. All Rights Reserved.
114
- * Licensed under the Apache License, Version 2.0 (the "License");
115
- * you may not use this file except in compliance with the License.
116
- * You may obtain a copy of the License at
117
- *
118
- * http://www.apache.org/licenses/LICENSE-2.0
119
- *
120
- * Unless required by applicable law or agreed to in writing, software
121
- * distributed under the License is distributed on an "AS IS" BASIS,
122
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
123
- * See the License for the specific language governing permissions and
124
- * limitations under the License.
125
- * =============================================================================
126
- */
127
- function _e(t, e = null, s = !1) {
128
- let n = a(t, "x", "sum");
129
- n.dtype === "bool" && (n = se(n, "int32"));
130
- const r = { x: n }, i = { axis: e, keepDims: s };
131
- return u.runKernel(ne, r, i);
64
+ function ge(t) {
65
+ const s = { x: u(t, "x", "elu", "float32") };
66
+ return c.runKernel(Y, s);
132
67
  }
133
- const Me = /* @__PURE__ */ p({ sum_: _e });
68
+ const $e = /* @__PURE__ */ h({ elu_: ge });
134
69
  /**
135
70
  * @license
136
71
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -147,11 +82,11 @@ const Me = /* @__PURE__ */ p({ sum_: _e });
147
82
  * limitations under the License.
148
83
  * =============================================================================
149
84
  */
150
- function Ke(t) {
151
- const s = { input: a(t, "input", "imag") };
152
- return u.runKernel(re, s);
85
+ function xe(t) {
86
+ const s = { input: u(t, "input", "imag") };
87
+ return c.runKernel(Z, s);
153
88
  }
154
- const we = /* @__PURE__ */ p({ imag_: Ke });
89
+ const ke = /* @__PURE__ */ h({ imag_: xe });
155
90
  /**
156
91
  * @license
157
92
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -168,11 +103,11 @@ const we = /* @__PURE__ */ p({ imag_: Ke });
168
103
  * limitations under the License.
169
104
  * =============================================================================
170
105
  */
171
- function We(t, e = 0.2) {
172
- const n = { x: a(t, "x", "leakyRelu") }, r = { alpha: e };
173
- return u.runKernel(ae, n, r);
106
+ function De(t, e = 0.2) {
107
+ const n = { x: u(t, "x", "leakyRelu") }, r = { alpha: e };
108
+ return c.runKernel(ee, n, r);
174
109
  }
175
- const ze = /* @__PURE__ */ p({ leakyRelu_: We });
110
+ const be = /* @__PURE__ */ h({ leakyRelu_: De });
176
111
  /**
177
112
  * @license
178
113
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -189,11 +124,11 @@ const ze = /* @__PURE__ */ p({ leakyRelu_: We });
189
124
  * limitations under the License.
190
125
  * =============================================================================
191
126
  */
192
- function Ee(t) {
193
- const s = { x: a(t, "x", "neg") };
194
- return u.runKernel(ue, s);
127
+ function ye(t) {
128
+ const s = { x: u(t, "x", "neg") };
129
+ return c.runKernel(te, s);
195
130
  }
196
- const Oe = /* @__PURE__ */ p({ neg_: Ee });
131
+ const Se = /* @__PURE__ */ h({ neg_: ye });
197
132
  /**
198
133
  * @license
199
134
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -210,11 +145,11 @@ const Oe = /* @__PURE__ */ p({ neg_: Ee });
210
145
  * limitations under the License.
211
146
  * =============================================================================
212
147
  */
213
- function Re(t, e) {
214
- const s = a(t, "x", "prelu"), n = a(e, "alpha", "prelu"), r = { x: s, alpha: n };
215
- return u.runKernel(ie, r);
148
+ function Me(t, e) {
149
+ const s = u(t, "x", "prelu"), n = u(e, "alpha", "prelu"), r = { x: s, alpha: n };
150
+ return c.runKernel(se, r);
216
151
  }
217
- const Fe = /* @__PURE__ */ p({ prelu_: Re });
152
+ const Ke = /* @__PURE__ */ h({ prelu_: Me });
218
153
  /**
219
154
  * @license
220
155
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -231,11 +166,11 @@ const Fe = /* @__PURE__ */ p({ prelu_: Re });
231
166
  * limitations under the License.
232
167
  * =============================================================================
233
168
  */
234
- function Le(t) {
235
- const s = { input: a(t, "input", "real") };
236
- return u.runKernel(oe, s);
169
+ function _e(t) {
170
+ const s = { input: u(t, "input", "real") };
171
+ return c.runKernel(ne, s);
237
172
  }
238
- const Ae = /* @__PURE__ */ p({ real_: Le });
173
+ const we = /* @__PURE__ */ h({ real_: _e });
239
174
  /**
240
175
  * @license
241
176
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -252,11 +187,11 @@ const Ae = /* @__PURE__ */ p({ real_: Le });
252
187
  * limitations under the License.
253
188
  * =============================================================================
254
189
  */
255
- function Be(t) {
256
- const s = { x: a(t, "x", "relu") };
257
- return u.runKernel(le, s);
190
+ function We(t) {
191
+ const s = { x: u(t, "x", "relu") };
192
+ return c.runKernel(re, s);
258
193
  }
259
- const Te = /* @__PURE__ */ p({ relu_: Be });
194
+ const ze = /* @__PURE__ */ h({ relu_: We });
260
195
  /**
261
196
  * @license
262
197
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -273,11 +208,11 @@ const Te = /* @__PURE__ */ p({ relu_: Be });
273
208
  * limitations under the License.
274
209
  * =============================================================================
275
210
  */
276
- function Ne(t) {
277
- const s = { x: a(t, "x", "relu6") };
278
- return u.runKernel(ce, s);
211
+ function Ee(t) {
212
+ const s = { x: u(t, "x", "relu6") };
213
+ return c.runKernel(ae, s);
279
214
  }
280
- const ve = /* @__PURE__ */ p({ relu6_: Ne });
215
+ const Oe = /* @__PURE__ */ h({ relu6_: Ee });
281
216
  /**
282
217
  * @license
283
218
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -294,11 +229,11 @@ const ve = /* @__PURE__ */ p({ relu6_: Ne });
294
229
  * limitations under the License.
295
230
  * =============================================================================
296
231
  */
297
- function Ce(t, e = 0) {
298
- const n = { x: a(t, "x", "step") }, r = { alpha: e };
299
- return u.runKernel(pe, n, r);
232
+ function Fe(t, e = 0) {
233
+ const n = { x: u(t, "x", "step") }, r = { alpha: e };
234
+ return c.runKernel(ue, n, r);
300
235
  }
301
- const Ge = /* @__PURE__ */ p({ step_: Ce });
236
+ const Re = /* @__PURE__ */ h({ step_: Fe });
302
237
  /**
303
238
  * @license
304
239
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -315,19 +250,19 @@ const Ge = /* @__PURE__ */ p({ step_: Ce });
315
250
  * limitations under the License.
316
251
  * =============================================================================
317
252
  */
318
- function Ie(t, e, s) {
319
- const n = a(t, "x", "transpose");
320
- if (e == null && (e = n.shape.map((l, h) => h).reverse()), B(n.rank === e.length, () => `Error in transpose: rank of input ${n.rank} must match length of perm ${e}.`), e.forEach((l) => {
321
- B(l >= 0 && l < n.rank, () => `All entries in 'perm' must be between 0 and ${n.rank - 1} but got ${e}`);
253
+ function Ae(t, e, s) {
254
+ const n = u(t, "x", "transpose");
255
+ if (e == null && (e = n.shape.map((o, p) => p).reverse()), L(n.rank === e.length, () => `Error in transpose: rank of input ${n.rank} must match length of perm ${e}.`), e.forEach((o) => {
256
+ L(o >= 0 && o < n.rank, () => `All entries in 'perm' must be between 0 and ${n.rank - 1} but got ${e}`);
322
257
  }), n.rank <= 1)
323
258
  return n.clone();
324
259
  const r = { x: n }, i = { perm: e };
325
- return n.dtype === "complex64" ? he(() => {
326
- let l = Ae(n), h = we(n);
327
- return l = u.runKernel(L, { x: l }, i), h = u.runKernel(L, { x: h }, i), s && (h = Oe(h)), $e(l, h);
328
- }) : u.runKernel(L, r, i);
260
+ return n.dtype === "complex64" ? ie(() => {
261
+ let o = we(n), p = ke(n);
262
+ return o = c.runKernel(A, { x: o }, i), p = c.runKernel(A, { x: p }, i), s && (p = Se(p)), he(o, p);
263
+ }) : c.runKernel(A, r, i);
329
264
  }
330
- const je = /* @__PURE__ */ p({ transpose_: Ie });
265
+ const Be = /* @__PURE__ */ h({ transpose_: Ae });
331
266
  /**
332
267
  * @license
333
268
  * Copyright 2019 Google LLC. All Rights Reserved.
@@ -344,36 +279,36 @@ const je = /* @__PURE__ */ p({ transpose_: Ie });
344
279
  * limitations under the License.
345
280
  * =============================================================================
346
281
  */
347
- function qe(t, e, s) {
282
+ function Le(t, e, s) {
348
283
  if (s == null || s === "linear")
349
284
  return t;
350
285
  if (s === "relu")
351
- return fe(t, Ge(e));
286
+ return oe(t, Re(e));
352
287
  throw new Error(`Cannot compute gradient for fused activation ${s}.`);
353
288
  }
354
- function Pe(t, e) {
289
+ function Te(t, e) {
355
290
  let s = e;
356
- const n = me(t.shape, e.shape);
357
- return n.length > 0 && (s = Me(s, n)), f(s, t.shape);
291
+ const n = le(t.shape, e.shape);
292
+ return n.length > 0 && (s = pe(s, n)), f(s, t.shape);
358
293
  }
359
- function Ue(t, e, s, n) {
294
+ function Ne(t, e, s, n) {
360
295
  if (e === "linear")
361
296
  return t;
362
297
  if (e === "relu")
363
- return Te(t);
298
+ return ze(t);
364
299
  if (e === "elu")
365
- return ye(t);
300
+ return $e(t);
366
301
  if (e === "relu6")
367
- return ve(t);
302
+ return Oe(t);
368
303
  if (e === "prelu")
369
- return Fe(t, s);
304
+ return Ke(t, s);
370
305
  if (e === "leakyrelu")
371
- return ze(t, n);
306
+ return be(t, n);
372
307
  if (e === "sigmoid")
373
- return De(t);
308
+ return me(t);
374
309
  throw new Error(`Unknown fused activation ${e}.`);
375
310
  }
376
- const He = (t, e) => !(t > 0) || e === "linear";
311
+ const ve = (t, e) => !(t > 0) || e === "linear";
377
312
  /**
378
313
  * @license
379
314
  * Copyright 2019 Google LLC. All Rights Reserved.
@@ -390,49 +325,49 @@ const He = (t, e) => !(t > 0) || e === "linear";
390
325
  * limitations under the License.
391
326
  * =============================================================================
392
327
  */
393
- function Je({ a: t, b: e, transposeA: s = !1, transposeB: n = !1, bias: r, activation: i = "linear", preluActivationWeights: l, leakyreluAlpha: h = 0.2 }) {
394
- if (He(u.state.gradientDepth, i) === !1) {
395
- let x = d(t, e, s, n);
396
- return r != null && (x = de(x, r)), Ue(x, i, l, h);
328
+ function Ge({ a: t, b: e, transposeA: s = !1, transposeB: n = !1, bias: r, activation: i = "linear", preluActivationWeights: o, leakyreluAlpha: p = 0.2 }) {
329
+ if (ve(c.state.gradientDepth, i) === !1) {
330
+ let x = m(t, e, s, n);
331
+ return r != null && (x = ce(x, r)), Ne(x, i, o, p);
397
332
  }
398
- let o = a(t, "a", "fused matMul"), c = a(e, "b", "fused matMul");
399
- [o, c] = A(o, c);
400
- const b = s ? o.shape[o.rank - 2] : o.shape[o.rank - 1], D = n ? c.shape[c.rank - 1] : c.shape[c.rank - 2], w = s ? o.shape[o.rank - 1] : o.shape[o.rank - 2], W = n ? c.shape[c.rank - 2] : c.shape[c.rank - 1], T = o.shape.slice(0, -2), S = c.shape.slice(0, -2), N = q(T), v = q(S);
401
- B(b === D, () => `Error in fused matMul: inner shapes (${b}) and (${D}) of Tensors with shapes ${o.shape} and ${c.shape} and transposeA=${s} and transposeB=${n} must match.`);
402
- const O = P(o.shape.slice(0, -2), c.shape.slice(0, -2)).concat([w, W]), R = s ? f(o, [N, b, w]) : f(o, [N, w, b]), F = n ? f(c, [v, W, D]) : f(c, [v, D, W]);
403
- let y;
404
- r != null && (y = a(r, "bias", "fused matMul"), [y] = A(y, o), P(O, y.shape));
405
- let C;
406
- l != null && (C = a(l, "prelu weights", "fused matMul"));
407
- const G = (x, K) => {
408
- const [g, $, k, z] = K, m = qe(f(x, k.shape), k, i);
409
- let _, M;
410
- if (!s && !n ? (_ = d(m, $, !1, !0), M = d(g, m, !0, !1)) : !s && n ? (_ = d(m, $, !1, !1), M = d(m, g, !0, !1)) : s && !n ? (_ = d($, m, !1, !0), M = d(g, m, !1, !1)) : (_ = d($, m, !0, !0), M = d(m, g, !0, !0)), r != null) {
411
- const Q = Pe(z, m);
412
- return [_, M, Q];
333
+ let a = u(t, "a", "fused matMul"), l = u(e, "b", "fused matMul");
334
+ [a, l] = B(a, l);
335
+ const D = s ? a.shape[a.rank - 2] : a.shape[a.rank - 1], b = n ? l.shape[l.rank - 1] : l.shape[l.rank - 2], w = s ? a.shape[a.rank - 1] : a.shape[a.rank - 2], W = n ? l.shape[l.rank - 2] : l.shape[l.rank - 1], T = a.shape.slice(0, -2), y = l.shape.slice(0, -2), N = C(T), v = C(y);
336
+ L(D === b, () => `Error in fused matMul: inner shapes (${D}) and (${b}) of Tensors with shapes ${a.shape} and ${l.shape} and transposeA=${s} and transposeB=${n} must match.`);
337
+ const O = P(a.shape.slice(0, -2), l.shape.slice(0, -2)).concat([w, W]), F = s ? f(a, [N, D, w]) : f(a, [N, w, D]), R = n ? f(l, [v, W, b]) : f(l, [v, b, W]);
338
+ let S;
339
+ r != null && (S = u(r, "bias", "fused matMul"), [S] = B(S, a), P(O, S.shape));
340
+ let G;
341
+ o != null && (G = u(o, "prelu weights", "fused matMul"));
342
+ const I = (x, _) => {
343
+ const [g, $, k, z] = _, d = Le(f(x, k.shape), k, i);
344
+ let M, K;
345
+ if (!s && !n ? (M = m(d, $, !1, !0), K = m(g, d, !0, !1)) : !s && n ? (M = m(d, $, !1, !1), K = m(d, g, !0, !1)) : s && !n ? (M = m($, d, !1, !0), K = m(g, d, !1, !1)) : (M = m($, d, !0, !0), K = m(d, g, !0, !0)), r != null) {
346
+ const Q = Te(z, d);
347
+ return [M, K, Q];
413
348
  } else
414
- return [_, M];
415
- }, I = {
416
- a: R,
417
- b: F,
418
- bias: y,
419
- preluActivationWeights: C
420
- }, j = { transposeA: s, transposeB: n, activation: i, leakyreluAlpha: h };
421
- return r == null ? U((K, g, $) => {
349
+ return [M, K];
350
+ }, j = {
351
+ a: F,
352
+ b: R,
353
+ bias: S,
354
+ preluActivationWeights: G
355
+ }, q = { transposeA: s, transposeB: n, activation: i, leakyreluAlpha: p };
356
+ return r == null ? U((_, g, $) => {
422
357
  const k = (
423
358
  // tslint:disable-next-line: no-unnecessary-type-assertion
424
- u.runKernel(H, I, j)
359
+ c.runKernel(H, j, q)
425
360
  );
426
- return $([K, g, k]), { value: f(k, O), gradFunc: G };
427
- })(R, F) : U((K, g, $, k) => {
361
+ return $([_, g, k]), { value: f(k, O), gradFunc: I };
362
+ })(F, R) : U((_, g, $, k) => {
428
363
  const z = (
429
364
  // tslint:disable-next-line: no-unnecessary-type-assertion
430
- u.runKernel(H, I, j)
365
+ c.runKernel(H, j, q)
431
366
  );
432
- return k([K, g, z, $]), { value: f(z, O), gradFunc: G };
433
- })(R, F, y);
367
+ return k([_, g, z, $]), { value: f(z, O), gradFunc: I };
368
+ })(F, R, S);
434
369
  }
435
- const J = /* @__PURE__ */ p({ fusedMatMul_: Je });
370
+ const J = /* @__PURE__ */ h({ fusedMatMul_: Ge });
436
371
  /**
437
372
  * @license
438
373
  * Copyright 2018 Google LLC
@@ -456,7 +391,7 @@ class E extends Error {
456
391
  * https://opensource.org/licenses/MIT.
457
392
  * =============================================================================
458
393
  */
459
- function Qe(t, e, s, n) {
394
+ function Ie(t, e, s, n) {
460
395
  if (t.rank < 2 || e.rank < 2)
461
396
  throw new E(`dot requires both inputs to be rank >= 2 but got x shape = ${t.shape} and y shape = ${e.shape}`);
462
397
  if (e.rank >= 3) {
@@ -476,9 +411,9 @@ function Qe(t, e, s, n) {
476
411
  {
477
412
  const r = t.shape.slice(), i = r.pop();
478
413
  t = f(t, [-1, i]);
479
- const l = e.shape.slice(), h = l.pop(), o = l.pop(), c = [...l, h], b = Array.from({ length: e.rank }, (T, S) => S === 0 ? e.rank - 2 : S <= e.rank - 2 ? S - 1 : S);
480
- e = f(je(e, b), [o, -1]);
481
- const D = [...r, ...c];
414
+ const o = e.shape.slice(), p = o.pop(), a = o.pop(), l = [...o, p], D = Array.from({ length: e.rank }, (T, y) => y === 0 ? e.rank - 2 : y <= e.rank - 2 ? y - 1 : y);
415
+ e = f(Be(e, D), [a, -1]);
416
+ const b = [...r, ...l];
482
417
  return f(J({
483
418
  a: t,
484
419
  b: e,
@@ -486,10 +421,10 @@ function Qe(t, e, s, n) {
486
421
  transposeB: !1,
487
422
  bias: null,
488
423
  activation: s
489
- }), D);
424
+ }), b);
490
425
  }
491
426
  }
492
- class Ye {
427
+ class Ue {
493
428
  vocabSize;
494
429
  embedDim;
495
430
  tf;
@@ -512,7 +447,7 @@ class Ye {
512
447
  return this.tf.gather(this.tiedWeights, e, 0);
513
448
  }
514
449
  project(e) {
515
- return Qe(e, this.tiedWeights.transpose());
450
+ return Ie(e, this.tiedWeights.transpose());
516
451
  }
517
452
  getWeights() {
518
453
  return [this.tiedWeights];
@@ -531,5 +466,5 @@ class Ye {
531
466
  }
532
467
  }
533
468
  export {
534
- Ye as default
469
+ Ue as default
535
470
  };
package/dist/main.d.ts CHANGED
@@ -6,3 +6,5 @@ export { default as loadTextData } from './data/textLoader';
6
6
  export type { ITrainerOptions } from './Trainer';
7
7
  export type { IGenerateOptions } from './Generator';
8
8
  export type { TrainingLogEntry } from './NanoGPTModel';
9
+ export type { GPTConfig } from './config';
10
+ export { estimateParameterCount, estimateMemoryUsage, estimateTrainingMemoryUsage, estimateResources, validateConfig, } from './utilities/parameters';
package/dist/main.js CHANGED
@@ -1,12 +1,20 @@
1
- import { default as o } from "./NanoGPTModel.js";
2
- import { default as t } from "./TeachableLLM.js";
3
- import { default as l } from "./tokeniser/CharTokeniser.js";
4
- import { default as s } from "./utilities/waitForModel.js";
5
- import { default as m } from "./data/textLoader.js";
1
+ import { default as r } from "./NanoGPTModel.js";
2
+ import { default as s } from "./TeachableLLM.js";
3
+ import { default as i } from "./tokeniser/CharTokeniser.js";
4
+ import { default as d } from "./utilities/waitForModel.js";
5
+ import { default as u } from "./data/textLoader.js";
6
+ import { estimateMemoryUsage as n, estimateParameterCount as T, estimateResources as g, estimateTrainingMemoryUsage as M, validateConfig as C } from "./utilities/parameters.js";
7
+ import "./ops/scatterSub.js";
8
+ import "./ops/gatherSub.js";
6
9
  export {
7
- l as CharTokeniser,
8
- o as NanoGPT,
9
- t as TeachableLLM,
10
- m as loadTextData,
11
- s as waitForModel
10
+ i as CharTokeniser,
11
+ r as NanoGPT,
12
+ s as TeachableLLM,
13
+ n as estimateMemoryUsage,
14
+ T as estimateParameterCount,
15
+ g as estimateResources,
16
+ M as estimateTrainingMemoryUsage,
17
+ u as loadTextData,
18
+ C as validateConfig,
19
+ d as waitForModel
12
20
  };
@@ -0,0 +1,2 @@
1
+ import { Tensor } from '@tensorflow/tfjs';
2
+ export declare function gatherSub(values: Tensor, labels: Tensor, logits: Tensor): Tensor;
@@ -0,0 +1,66 @@
1
+ import { engine as l } from "@tensorflow/tfjs";
2
+ import { o as g, c as i, E as b, G as d, r as c, a as h } from "../index-D1SlunD-.js";
3
+ import { r as p, s as f } from "../stack-DB2YLlAs.js";
4
+ /**
5
+ * @license
6
+ * Copyright 2018 Google LLC. All Rights Reserved.
7
+ * Licensed under the Apache License, Version 2.0 (the "License");
8
+ * you may not use this file except in compliance with the License.
9
+ * You may obtain a copy of the License at
10
+ *
11
+ * http://www.apache.org/licenses/LICENSE-2.0
12
+ *
13
+ * Unless required by applicable law or agreed to in writing, software
14
+ * distributed under the License is distributed on an "AS IS" BASIS,
15
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ * See the License for the specific language governing permissions and
17
+ * limitations under the License.
18
+ * =============================================================================
19
+ */
20
+ function m(e, t) {
21
+ const n = i(t, "indices", "gatherND", "int32"), r = { params: i(e, "x", "gatherND", "string_or_numeric"), indices: n };
22
+ return b.runKernel(d, r);
23
+ }
24
+ const N = /* @__PURE__ */ g({ gatherND_: m });
25
+ class S {
26
+ variableNames = ["labels", "logits", "values"];
27
+ outputShape;
28
+ userCode;
29
+ constructor(t) {
30
+ this.outputShape = [t], this.userCode = `
31
+ void main() {
32
+ int coords = getOutputCoords();
33
+ int index = int(getLabelsAtOutCoords());
34
+ float val = getValuesAtOutCoords();
35
+ float logit = getLogits(coords, index);
36
+ setOutput(val - logit);
37
+ }
38
+ `;
39
+ }
40
+ }
41
+ function k(e) {
42
+ const { logits: t, labels: n, values: s } = e.inputs, r = e.backend, o = n.shape[0], a = new S(o);
43
+ return r.runWebGLProgram(a, [n, t, s], "float32");
44
+ }
45
+ const G = {
46
+ kernelName: "EfficientGatherSub",
47
+ backendName: "webgl",
48
+ kernelFunc: k
49
+ };
50
+ c(G);
51
+ function v(e) {
52
+ const { values: t, labels: n, logits: s } = e.inputs, r = n.shape[0], o = p(0, r, 1, "int32"), a = f([o, n], 1), u = N(s, a);
53
+ return h(t, u);
54
+ }
55
+ const C = {
56
+ kernelName: "EfficientGatherSub",
57
+ backendName: "cpu",
58
+ kernelFunc: v
59
+ };
60
+ c(C);
61
+ function K(e, t, n) {
62
+ return l().runKernel("EfficientGatherSub", { logits: n, labels: t, values: e }, {});
63
+ }
64
+ export {
65
+ K as gatherSub
66
+ };
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,11 @@
1
+ import { r as o } from "../../index-D1SlunD-.js";
2
+ function r(e) {
3
+ const { logits: t, labels: n } = e.inputs;
4
+ return e.backend.executeMultipleOutputs("SparseSoftmaxCrossEntropyWithLogits", [], [t, n], 2);
5
+ }
6
+ const s = {
7
+ kernelName: "NativeSparseSoftmaxCrossEntropy",
8
+ backendName: "tensorflow",
9
+ kernelFunc: r
10
+ };
11
+ o(s);
@@ -0,0 +1,2 @@
1
+ import { Tensor } from '@tensorflow/tfjs';
2
+ export declare function scatterSub(probabilities: Tensor, labels: Tensor, scale: Tensor): Tensor;