@genai-fi/nanogpt 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. package/dist/Generator.js +6 -6
  2. package/dist/NanoGPTModel.js +9 -9
  3. package/dist/{RealDiv-7xu-pkZN.js → RealDiv-BYViZwhN.js} +6 -6
  4. package/dist/{Reshape-BYC1oUku.js → Reshape-t7Kcikjk.js} +2 -2
  5. package/dist/TeachableLLM.js +5 -5
  6. package/dist/{TiedEmbedding-C1HBot-5.js → TiedEmbedding-9WeDwvjO.js} +4 -4
  7. package/dist/{axis_util-CCNL7jea.js → axis_util-Bu4h7XWV.js} +1 -1
  8. package/dist/{broadcast_to-CddAF879.js → broadcast_to-DARN-DBD.js} +2 -2
  9. package/dist/{concat-XOK9ANZu.js → concat-5aPGqw3Z.js} +8 -8
  10. package/dist/{dataset-BFFipD1c.js → dataset-pgqp-YfL.js} +5 -5
  11. package/dist/{dropout-xlKRoJyU.js → dropout-Bciw46HT.js} +10 -10
  12. package/dist/{gather-DKtUaTtA.js → gather-DjyCjmOD.js} +1 -1
  13. package/dist/{gpgpu_math-B_ycgZ4W.js → gpgpu_math-CNslybmD.js} +31 -31
  14. package/dist/{index-CamYe_M8.js → index-BAzbokzv.js} +31 -31
  15. package/dist/{kernel_funcs_utils-D5MS0JFg.js → kernel_funcs_utils-CUxJCg0g.js} +5 -5
  16. package/dist/layers/BaseLayer.js +2 -2
  17. package/dist/layers/CausalSelfAttention.js +6 -6
  18. package/dist/layers/MLP.js +5 -5
  19. package/dist/layers/RMSNorm.js +3 -3
  20. package/dist/layers/RoPECache.js +4 -4
  21. package/dist/layers/TiedEmbedding.js +5 -5
  22. package/dist/layers/TransformerBlock.js +1 -1
  23. package/dist/loader/load.d.ts +13 -0
  24. package/dist/loader/load.js +27 -0
  25. package/dist/loader/loadHF.d.ts +7 -0
  26. package/dist/loader/loadHF.js +22 -0
  27. package/dist/{utilities/load.d.ts → loader/loadTransformers.d.ts} +11 -11
  28. package/dist/loader/loadTransformers.js +28 -0
  29. package/dist/loader/newZipLoad.d.ts +8 -0
  30. package/dist/loader/newZipLoad.js +21 -0
  31. package/dist/loader/oldZipLoad.d.ts +7 -0
  32. package/dist/loader/oldZipLoad.js +76 -0
  33. package/dist/{log_sum_exp-CV_5-TTu.js → log_sum_exp-YEo2h3gb.js} +16 -16
  34. package/dist/main.js +4 -4
  35. package/dist/{mat_mul-CAbRFWUj.js → mat_mul-7121rsJk.js} +4 -4
  36. package/dist/{max-JBBv7aUf.js → max-DtlIuVeW.js} +3 -3
  37. package/dist/{mulmat_packed_gpu-DW4doKL_.js → mulmat_packed_gpu-D4nKF7Je.js} +1 -1
  38. package/dist/{norm-B9dQTFYn.js → norm-CzltS9Fz.js} +10 -10
  39. package/dist/{ones-CMHNqMr6.js → ones-BBlSRqn1.js} +2 -2
  40. package/dist/ops/appendCache.js +3 -3
  41. package/dist/ops/attentionMask.js +1 -1
  42. package/dist/ops/cpu/appendCache.js +2 -2
  43. package/dist/ops/cpu/attentionMask.js +6 -6
  44. package/dist/ops/cpu/fusedSoftmax.js +2 -2
  45. package/dist/ops/cpu/gatherSub.js +9 -9
  46. package/dist/ops/cpu/gelu.js +1 -1
  47. package/dist/ops/cpu/matMulGelu.js +1 -1
  48. package/dist/ops/cpu/matMulMul.js +1 -1
  49. package/dist/ops/cpu/mulDropout.js +1 -1
  50. package/dist/ops/cpu/normRMS.js +1 -1
  51. package/dist/ops/cpu/qkv.js +3 -3
  52. package/dist/ops/cpu/rope.js +5 -5
  53. package/dist/ops/cpu/scatterSub.js +14 -14
  54. package/dist/ops/fusedSoftmax.js +1 -1
  55. package/dist/ops/gatherSub.js +1 -1
  56. package/dist/ops/gelu.js +1 -1
  57. package/dist/ops/grads/attentionMask.js +1 -1
  58. package/dist/ops/grads/fusedSoftmax.js +4 -4
  59. package/dist/ops/grads/gelu.js +1 -1
  60. package/dist/ops/grads/matMulGelu.js +1 -1
  61. package/dist/ops/grads/normRMS.js +1 -1
  62. package/dist/ops/grads/qkv.js +1 -1
  63. package/dist/ops/grads/rope.js +1 -1
  64. package/dist/ops/matMulGelu.js +1 -1
  65. package/dist/ops/matMulMul.js +1 -1
  66. package/dist/ops/mulDrop.js +1 -1
  67. package/dist/ops/node/sparseCrossEntropy.js +1 -1
  68. package/dist/ops/normRMS.js +1 -1
  69. package/dist/ops/qkv.js +1 -1
  70. package/dist/ops/rope.js +4 -4
  71. package/dist/ops/scatterSub.js +1 -1
  72. package/dist/ops/webgl/appendCache.js +1 -1
  73. package/dist/ops/webgl/attentionMask.js +1 -1
  74. package/dist/ops/webgl/fusedSoftmax.js +4 -4
  75. package/dist/ops/webgl/gatherSub.js +1 -1
  76. package/dist/ops/webgl/gelu.js +2 -2
  77. package/dist/ops/webgl/log.js +3 -3
  78. package/dist/ops/webgl/matMulGelu.js +17 -17
  79. package/dist/ops/webgl/matMulMul.js +1 -1
  80. package/dist/ops/webgl/mulDropout.js +1 -1
  81. package/dist/ops/webgl/normRMS.js +2 -2
  82. package/dist/ops/webgl/qkv.js +1 -1
  83. package/dist/ops/webgl/rope.js +1 -1
  84. package/dist/ops/webgl/scatterSub.js +1 -1
  85. package/dist/{ops-DqtYemmV.js → ops-C0sQEcPw.js} +78 -78
  86. package/dist/{random_width-CLMQG5Jn.js → random_width-DWzaOgrn.js} +22 -22
  87. package/dist/{range-DqYjKnuG.js → range-DYsrnfiy.js} +1 -1
  88. package/dist/{reciprocal-z49filta.js → reciprocal-CJQeasVa.js} +1 -1
  89. package/dist/{register_all_kernels-COt6wLD0.js → register_all_kernels-BfFCQAqs.js} +28 -28
  90. package/dist/{reshape-C45vIIRU.js → reshape-krWGKraP.js} +1 -1
  91. package/dist/{scatter_nd_util-qgtnviTE.js → scatter_nd_util-93ln7Hut.js} +3 -3
  92. package/dist/{selu_util-4QV_GXTB.js → selu_util-sntGesxr.js} +41 -41
  93. package/dist/{shared-ByfrGA97.js → shared-Ca6iDobD.js} +6 -6
  94. package/dist/{sin-9JBrfVaB.js → sin-D_h-qCSx.js} +1 -1
  95. package/dist/{softmax-DvMvui-_.js → softmax-fsdtf6JC.js} +1 -1
  96. package/dist/{split-DxrHrPFK.js → split-eiktj-6L.js} +4 -4
  97. package/dist/{stack-DgaoDmnF.js → stack-dfEEz2OY.js} +2 -2
  98. package/dist/{sum-BpcpxNEh.js → sum-BE_Irnim.js} +3 -3
  99. package/dist/{tensor-CDz5x1mP.js → tensor-Xyi595sG.js} +1 -1
  100. package/dist/{tensor2d-jO8JY5Jd.js → tensor2d-CPEkynbH.js} +1 -1
  101. package/dist/training/AdamExt.js +1 -1
  102. package/dist/training/DatasetBuilder.js +2 -2
  103. package/dist/training/FullTrainer.js +1 -1
  104. package/dist/training/Trainer.js +3 -3
  105. package/dist/training/sparseCrossEntropy.js +5 -5
  106. package/dist/utilities/dummy.js +2 -2
  107. package/dist/utilities/generate.js +3 -3
  108. package/dist/utilities/profile.js +1 -1
  109. package/dist/utilities/safetensors.js +2 -2
  110. package/dist/utilities/save.js +1 -1
  111. package/dist/utilities/weights.js +2 -2
  112. package/dist/{variable-CLVXjN7F.js → variable-wSS22xj5.js} +1 -1
  113. package/dist/{zeros-DUkkVccu.js → zeros-YJDE7oRb.js} +10 -10
  114. package/package.json +3 -3
  115. package/dist/utilities/load.js +0 -99
@@ -1,6 +1,6 @@
1
- import { k as N, ao as H, q as _, w as S, E as L, ap as te, aq as ne, al as se, an as re, ar as ie, as as oe, at as le, b as ue, au as fe, av as O } from "./index-CamYe_M8.js";
2
- import { r as ce } from "./reshape-C45vIIRU.js";
3
- import { s as ae } from "./sum-BpcpxNEh.js";
1
+ import { i as N, ao as H, o as _, q as S, E as L, ap as te, aq as ne, al as se, an as re, ar as ie, as as oe, at as le, a as ue, au as fe, av as O } from "./index-BAzbokzv.js";
2
+ import { r as ce } from "./reshape-krWGKraP.js";
3
+ import { s as ae } from "./sum-BE_Irnim.js";
4
4
  /**
5
5
  * @license
6
6
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -51,7 +51,7 @@ function X(e, t, n, s, r, u, i = !1, o = "channelsLast") {
51
51
  [c, g, a, p] = e;
52
52
  else
53
53
  throw new Error(`Unknown dataFormat ${o}`);
54
- const [l, h, , d] = t, [A, m] = T(n), [b, M] = T(s), f = G(l, b), w = G(h, M), { padInfo: E, outHeight: I, outWidth: x } = de(r, a, p, A, m, f, w, u, o), y = i ? d * g : d;
54
+ const [l, h, , d] = t, [A, m] = T(n), [b, k] = T(s), f = G(l, b), E = G(h, k), { padInfo: w, outHeight: I, outWidth: x } = de(r, a, p, A, m, f, E, u, o), y = i ? d * g : d;
55
55
  let $;
56
56
  return o === "channelsFirst" ? $ = [c, y, I, x] : o === "channelsLast" && ($ = [c, I, x, y]), {
57
57
  batchSize: c,
@@ -62,15 +62,15 @@ function X(e, t, n, s, r, u, i = !1, o = "channelsLast") {
62
62
  outHeight: I,
63
63
  outWidth: x,
64
64
  outChannels: y,
65
- padInfo: E,
65
+ padInfo: w,
66
66
  strideHeight: A,
67
67
  strideWidth: m,
68
68
  filterHeight: l,
69
69
  filterWidth: h,
70
70
  effectiveFilterHeight: f,
71
- effectiveFilterWidth: w,
71
+ effectiveFilterWidth: E,
72
72
  dilationHeight: b,
73
- dilationWidth: M,
73
+ dilationWidth: k,
74
74
  inShape: e,
75
75
  outShape: $,
76
76
  filterShape: t
@@ -84,22 +84,22 @@ function he(e, t, n, s, r, u = !1, i = "channelsLast", o) {
84
84
  [c, l, a, p, g] = e;
85
85
  else
86
86
  throw new Error(`Unknown dataFormat ${i}`);
87
- const [h, d, A, , m] = t, [b, M, f] = W(n), [w, E, I] = W(s), x = G(h, w), y = G(d, E), $ = G(A, I), { padInfo: C, outDepth: k, outHeight: D, outWidth: F } = me(r, a, p, g, b, M, f, x, y, $, o), V = u ? m * l : m;
87
+ const [h, d, A, , m] = t, [b, k, f] = W(n), [E, w, I] = W(s), x = G(h, E), y = G(d, w), $ = G(A, I), { padInfo: C, outDepth: M, outHeight: D, outWidth: F } = me(r, a, p, g, b, k, f, x, y, $, o), V = u ? m * l : m;
88
88
  let U;
89
- return i === "channelsFirst" ? U = [c, V, k, D, F] : i === "channelsLast" && (U = [c, k, D, F, V]), {
89
+ return i === "channelsFirst" ? U = [c, V, M, D, F] : i === "channelsLast" && (U = [c, M, D, F, V]), {
90
90
  batchSize: c,
91
91
  dataFormat: i,
92
92
  inDepth: a,
93
93
  inHeight: p,
94
94
  inWidth: g,
95
95
  inChannels: l,
96
- outDepth: k,
96
+ outDepth: M,
97
97
  outHeight: D,
98
98
  outWidth: F,
99
99
  outChannels: V,
100
100
  padInfo: C,
101
101
  strideDepth: b,
102
- strideHeight: M,
102
+ strideHeight: k,
103
103
  strideWidth: f,
104
104
  filterDepth: h,
105
105
  filterHeight: d,
@@ -107,8 +107,8 @@ function he(e, t, n, s, r, u = !1, i = "channelsLast", o) {
107
107
  effectiveFilterDepth: x,
108
108
  effectiveFilterHeight: y,
109
109
  effectiveFilterWidth: $,
110
- dilationDepth: w,
111
- dilationHeight: E,
110
+ dilationDepth: E,
111
+ dilationHeight: w,
112
112
  dilationWidth: I,
113
113
  inShape: e,
114
114
  outShape: U,
@@ -175,8 +175,8 @@ function me(e, t, n, s, r, u, i, o, c, a, p) {
175
175
  l = m[0], h = m[1], d = m[2];
176
176
  } else if (e === "same") {
177
177
  l = Math.ceil(t / r), h = Math.ceil(n / u), d = Math.ceil(s / i);
178
- const A = (l - 1) * r + o - t, m = (h - 1) * u + c - n, b = (d - 1) * i + a - s, M = Math.floor(A / 2), f = A - M, w = Math.floor(m / 2), E = m - w, I = Math.floor(b / 2), x = b - I;
179
- g = { top: w, bottom: E, left: I, right: x, front: M, back: f, type: "SAME" };
178
+ const A = (l - 1) * r + o - t, m = (h - 1) * u + c - n, b = (d - 1) * i + a - s, k = Math.floor(A / 2), f = A - k, E = Math.floor(m / 2), w = m - E, I = Math.floor(b / 2), x = b - I;
179
+ g = { top: E, bottom: w, left: I, right: x, front: k, back: f, type: "SAME" };
180
180
  } else
181
181
  throw Error(`Unknown padding parameter: ${e}`);
182
182
  return { padInfo: g, outDepth: l, outHeight: h, outWidth: d };
@@ -244,11 +244,11 @@ function Qe(e, t, n) {
244
244
  * limitations under the License.
245
245
  * =============================================================================
246
246
  */
247
- function we(e) {
247
+ function Ee(e) {
248
248
  const n = { x: S(e, "x", "sigmoid", "float32") };
249
249
  return L.runKernel(te, n);
250
250
  }
251
- const Ee = /* @__PURE__ */ _({ sigmoid_: we });
251
+ const we = /* @__PURE__ */ _({ sigmoid_: Ee });
252
252
  /**
253
253
  * @license
254
254
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -311,7 +311,7 @@ function ye(e, t) {
311
311
  const n = S(e, "x", "prelu"), s = S(t, "alpha", "prelu"), r = { x: n, alpha: s };
312
312
  return L.runKernel(re, r);
313
313
  }
314
- const ke = /* @__PURE__ */ _({ prelu_: ye });
314
+ const Me = /* @__PURE__ */ _({ prelu_: ye });
315
315
  /**
316
316
  * @license
317
317
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -328,11 +328,11 @@ const ke = /* @__PURE__ */ _({ prelu_: ye });
328
328
  * limitations under the License.
329
329
  * =============================================================================
330
330
  */
331
- function Me(e) {
331
+ function ke(e) {
332
332
  const n = { x: S(e, "x", "relu") };
333
333
  return L.runKernel(ie, n);
334
334
  }
335
- const Se = /* @__PURE__ */ _({ relu_: Me });
335
+ const Se = /* @__PURE__ */ _({ relu_: ke });
336
336
  /**
337
337
  * @license
338
338
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -413,11 +413,11 @@ function ze(e, t, n, s) {
413
413
  if (t === "relu6")
414
414
  return Ne(e);
415
415
  if (t === "prelu")
416
- return ke(e, n);
416
+ return Me(e, n);
417
417
  if (t === "leakyrelu")
418
418
  return xe(e, s);
419
419
  if (t === "sigmoid")
420
- return Ee(e);
420
+ return we(e);
421
421
  throw new Error(`Unknown fused activation ${t}.`);
422
422
  }
423
423
  const et = (e, t) => !(e > 0) || t === "linear";
@@ -600,43 +600,43 @@ function We(e, t, n, s, r, u, i, o, c) {
600
600
  for (let f = 0; f < e.length; ++f) {
601
601
  if (l.strides[f] === 0)
602
602
  throw Error(`strides[${f}] must be non-zero`);
603
- const w = !!(l.shrinkAxisMask & 1 << f), E = e[f];
604
- if (E === -1) {
605
- m.push(w ? 1 : -1);
603
+ const E = !!(l.shrinkAxisMask & 1 << f), w = e[f];
604
+ if (w === -1) {
605
+ m.push(E ? 1 : -1);
606
606
  continue;
607
607
  }
608
608
  const I = [l.beginMask & 1 << f, l.endMask & 1 << f], x = [
609
609
  l.strides[f] > 0 ? 0 : -1,
610
- l.strides[f] > 0 ? E : E - 1
610
+ l.strides[f] > 0 ? w : w - 1
611
611
  ];
612
- if (w && l.strides[f] <= 0)
612
+ if (E && l.strides[f] <= 0)
613
613
  throw Error("only stride 1 allowed on non-range indexing.");
614
614
  A = A && l.strides[f] === 1;
615
615
  const y = !!(l.beginMask & 1 << f && l.endMask & 1 << f);
616
616
  if (l.beginValid && l.endValid) {
617
- if (w) {
618
- const D = l.begin[f] < 0 ? E + l.begin[f] : l.begin[f];
619
- if (l.begin[f] = D, l.end[f] = l.begin[f] + 1, D < 0 || D >= E)
617
+ if (E) {
618
+ const D = l.begin[f] < 0 ? w + l.begin[f] : l.begin[f];
619
+ if (l.begin[f] = D, l.end[f] = l.begin[f] + 1, D < 0 || D >= w)
620
620
  throw Error(`slice index ${l.begin[f]} of dimension ${f} out of bounds.`);
621
621
  } else
622
- l.begin[f] = K(l.begin[f], 0, l.strides[f], E, I, x), l.end[f] = K(l.end[f], 1, l.strides[f], E, I, x);
623
- const k = l.strides[f] === 1 && l.begin[f] === 0 && l.end[f] === E;
624
- h = h && k, d = d && (f === 0 && l.strides[f] === 1 || k);
622
+ l.begin[f] = K(l.begin[f], 0, l.strides[f], w, I, x), l.end[f] = K(l.end[f], 1, l.strides[f], w, I, x);
623
+ const M = l.strides[f] === 1 && l.begin[f] === 0 && l.end[f] === w;
624
+ h = h && M, d = d && (f === 0 && l.strides[f] === 1 || M);
625
625
  } else
626
626
  h = h && l.strides[f] === 1 && y, d = d && (f === 0 && l.strides[f] === 1 || y);
627
627
  let $, C = !1;
628
- if (l.beginValid && l.endValid ? ($ = l.end[f] - l.begin[f], C = !0) : w ? ($ = 1, C = !0) : y && E >= 0 && (l.strides[f] < 0 ? $ = -E : $ = E, C = !0), C) {
629
- let k;
630
- $ === 0 || $ < 0 != l.strides[f] < 0 ? k = 0 : k = Math.trunc($ / l.strides[f]) + ($ % l.strides[f] !== 0 ? 1 : 0), m.push(k);
628
+ if (l.beginValid && l.endValid ? ($ = l.end[f] - l.begin[f], C = !0) : E ? ($ = 1, C = !0) : y && w >= 0 && (l.strides[f] < 0 ? $ = -w : $ = w, C = !0), C) {
629
+ let M;
630
+ $ === 0 || $ < 0 != l.strides[f] < 0 ? M = 0 : M = Math.trunc($ / l.strides[f]) + ($ % l.strides[f] !== 0 ? 1 : 0), m.push(M);
631
631
  } else
632
632
  m.push(-1);
633
633
  }
634
634
  for (let f = 0; f < l.finalShapeGatherIndices.length; ++f) {
635
- const w = l.finalShapeGatherIndices[f];
636
- w >= 0 ? b.push(m[w]) : w === v && b.push(1);
635
+ const E = l.finalShapeGatherIndices[f];
636
+ E >= 0 ? b.push(m[E]) : E === v && b.push(1);
637
637
  }
638
638
  return {
639
- finalShapeSparse: b.filter((f, w) => l.finalShapeGatherIndices[w] !== v),
639
+ finalShapeSparse: b.filter((f, E) => l.finalShapeGatherIndices[E] !== v),
640
640
  finalShape: b,
641
641
  isIdentity: h,
642
642
  sliceDim0: d,
@@ -706,7 +706,7 @@ const tt = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
706
706
  */
707
707
  const nt = 1.7580993408473768, st = 1.0507009873554805;
708
708
  export {
709
- ke as A,
709
+ Me as A,
710
710
  Ne as B,
711
711
  We as C,
712
712
  Re as D,
@@ -733,7 +733,7 @@ export {
733
733
  P as t,
734
734
  Ve as u,
735
735
  Le as v,
736
- Ee as w,
736
+ we as w,
737
737
  $e as x,
738
738
  Se as y,
739
739
  xe as z
@@ -1,9 +1,9 @@
1
- import { k as qt, bc as _e, i as A, j as Ut, W as K, bd as et, aH as ht, be as gt, bf as at, _ as Ve, $ as st, ak as De, a$ as We, bg as $e, bh as ze, bi as Be, bj as Mt, ah as H, bk as Ot, bl as D, bm as Lt, bn as At, a8 as _t, bo as Vt, ag as Dt, bp as Wt, aT as $t, aU as zt, aW as Bt, aX as Gt, a9 as jt, bq as Zt, a_ as Ht, br as Kt, aY as Ge, am as je, bs as Xt, b2 as Ze, bt as He, p as Yt, u as Ke, t as Xe, bu as Jt, bv as wt, ap as Qt, aO as Ye, bw as te, bx as ee, by as ne, aG as pt, bz as se, bA as ot } from "./index-CamYe_M8.js";
2
- import { h as it, m as kt, k as Je, l as Qe, R as tn, v as en, n as nn, o as sn, p as on, q as rn, r as an, s as ln, t as cn, w as un, x as hn, y as fn, z as Nt, A as gn, B as dn, C as mn } from "./gpgpu_math-B_ycgZ4W.js";
3
- import { g as pn, a as In, e as wn, c as bn } from "./axis_util-CCNL7jea.js";
4
- import { b as xn } from "./broadcast_to-CddAF879.js";
5
- import { r as En } from "./reshape-C45vIIRU.js";
6
- import { p as Fn, q as yn, r as kn, u as Nn } from "./selu_util-4QV_GXTB.js";
1
+ import { i as qt, bc as _e, g as A, h as Ut, W as K, bd as et, aH as ht, be as gt, bf as at, _ as Ve, $ as st, ak as De, a$ as We, bg as $e, bh as ze, bi as Be, bj as Mt, ah as H, bk as Ot, bl as D, bm as Lt, bn as At, a8 as _t, bo as Vt, ag as Dt, bp as Wt, aT as $t, aU as zt, aW as Bt, aX as Gt, a9 as jt, bq as Zt, a_ as Ht, br as Kt, aY as Ge, am as je, bs as Xt, b2 as Ze, bt as He, p as Yt, u as Ke, t as Xe, bu as Jt, bv as wt, ap as Qt, aO as Ye, bw as te, bx as ee, by as ne, aG as pt, bz as se, bA as ot } from "./index-BAzbokzv.js";
2
+ import { h as it, m as kt, k as Je, l as Qe, R as tn, v as en, n as nn, o as sn, p as on, q as rn, r as an, s as ln, t as cn, w as un, x as hn, y as fn, z as Nt, A as gn, B as dn, C as mn } from "./gpgpu_math-CNslybmD.js";
3
+ import { g as pn, a as In, e as wn, c as bn } from "./axis_util-Bu4h7XWV.js";
4
+ import { b as xn } from "./broadcast_to-DARN-DBD.js";
5
+ import { r as En } from "./reshape-krWGKraP.js";
6
+ import { p as Fn, q as yn, r as kn, u as Nn } from "./selu_util-sntGesxr.js";
7
7
  import { g as Sn } from "./_commonjsHelpers-ByX85dGu.js";
8
8
  function vn(e, t) {
9
9
  for (var n = 0; n < t.length; n++) {
@@ -1,4 +1,4 @@
1
- import { q as o, w as t, E as c, a0 as a, a1 as e } from "./index-CamYe_M8.js";
1
+ import { o, q as t, E as c, a0 as a, a1 as e } from "./index-BAzbokzv.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -1,4 +1,4 @@
1
- import { q as r, w as f, E as e, S as i } from "./index-CamYe_M8.js";
1
+ import { o as r, q as f, E as e, S as i } from "./index-BAzbokzv.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -1,4 +1,4 @@
1
- import { q as p, w as i, E as a, H as c } from "./index-CamYe_M8.js";
1
+ import { o as p, q as i, E as a, F as c } from "./index-BAzbokzv.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -15,9 +15,9 @@ import { q as p, w as i, E as a, H as c } from "./index-CamYe_M8.js";
15
15
  * limitations under the License.
16
16
  * =============================================================================
17
17
  */
18
- function e(t, s, n = 0) {
19
- const o = { x: i(t, "x", "split") }, r = { numOrSizeSplits: s, axis: n };
20
- return a.runKernel(c, o, r);
18
+ function e(t, s, o = 0) {
19
+ const n = { x: i(t, "x", "split") }, r = { numOrSizeSplits: s, axis: o };
20
+ return a.runKernel(c, n, r);
21
21
  }
22
22
  const u = /* @__PURE__ */ p({ split_: e });
23
23
  export {
@@ -1,4 +1,4 @@
1
- import { q as e, x as c, k as n, E as k, P as i } from "./index-CamYe_M8.js";
1
+ import { o as e, w as c, i as n, E as i, P as k } from "./index-BAzbokzv.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -19,7 +19,7 @@ function u(r, t = 0) {
19
19
  const s = c(r, "tensors", "stack", "string_or_numeric");
20
20
  n(s.length >= 1, () => "Pass at least one tensor to tf.stack"), s.length > 0 && n(t <= s[0].rank, () => "Axis must be <= rank of the tensor");
21
21
  const o = s, a = { axis: t };
22
- return k.runKernel(i, o, a);
22
+ return i.runKernel(k, o, a);
23
23
  }
24
24
  const l = /* @__PURE__ */ e({ stack_: u });
25
25
  export {
@@ -1,4 +1,4 @@
1
- import { q as e, w as u, N as c, E as l, O as m } from "./index-CamYe_M8.js";
1
+ import { o as e, q as u, L as c, E as l, N as m } from "./index-BAzbokzv.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -15,10 +15,10 @@ import { q as e, w as u, N as c, E as l, O as m } from "./index-CamYe_M8.js";
15
15
  * limitations under the License.
16
16
  * =============================================================================
17
17
  */
18
- function i(t, n = null, o = !1) {
18
+ function i(t, o = null, n = !1) {
19
19
  let s = u(t, "x", "sum");
20
20
  s.dtype === "bool" && (s = c(s, "int32"));
21
- const r = { x: s }, a = { axis: n, keepDims: o };
21
+ const r = { x: s }, a = { axis: o, keepDims: n };
22
22
  return l.runKernel(m, r, a);
23
23
  }
24
24
  const f = /* @__PURE__ */ e({ sum_: i });
@@ -1,4 +1,4 @@
1
- import { Q as t, U as a } from "./index-CamYe_M8.js";
1
+ import { O as t, Q as a } from "./index-BAzbokzv.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -1,4 +1,4 @@
1
- import { V as t, Q as s, U as a } from "./index-CamYe_M8.js";
1
+ import { V as t, O as s, Q as a } from "./index-BAzbokzv.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -1,4 +1,4 @@
1
- import { A as r, b as c, f as h, s as g, e as o } from "../index-CamYe_M8.js";
1
+ import { A as r, a as c, s as h, b as g, e as o } from "../index-BAzbokzv.js";
2
2
  class u extends r {
3
3
  constructor(t, e, s, a, i) {
4
4
  super(t, e, s, a), this.config = i, this.startLearningRate = t;
@@ -1,5 +1,5 @@
1
- import { t as u } from "../index-CamYe_M8.js";
2
- import { d as z, i as f } from "../dataset-BFFipD1c.js";
1
+ import { t as u } from "../index-BAzbokzv.js";
2
+ import { d as z, i as f } from "../dataset-pgqp-YfL.js";
3
3
  import "../index-Tf7vU29b.js";
4
4
  /**
5
5
  * @license
@@ -1,7 +1,7 @@
1
1
  import { generateText as T } from "../utilities/generate.js";
2
2
  import L from "./Trainer.js";
3
3
  import x from "./Evaluator.js";
4
- import { a as h } from "../index-CamYe_M8.js";
4
+ import { d as h } from "../index-BAzbokzv.js";
5
5
  import y from "../utilities/profile.js";
6
6
  const D = {
7
7
  desiredLoss: 0.01,
@@ -1,8 +1,8 @@
1
1
  import { DatasetBuilder as g, flattenTokens as m, PAGE_FACTOR as u } from "./DatasetBuilder.js";
2
2
  import f from "./AdamExt.js";
3
- import { t as y, v as z, a as c } from "../index-CamYe_M8.js";
4
- import { n as S } from "../norm-B9dQTFYn.js";
5
- import { z as p } from "../zeros-DUkkVccu.js";
3
+ import { t as y, v as z, d as c } from "../index-BAzbokzv.js";
4
+ import { n as S } from "../norm-CzltS9Fz.js";
5
+ import { z as p } from "../zeros-YJDE7oRb.js";
6
6
  class R {
7
7
  constructor(t, e, s = 1e-3) {
8
8
  this.tokenizer = e, this.model = t, this.learningRate = s, this.resetOptimizer(), this.datasetBuilder = new g(e, t.config.gpt.blockSize);
@@ -1,9 +1,9 @@
1
1
  import { gatherSub as L } from "../ops/gatherSub.js";
2
2
  import { scatterSub as y } from "../ops/scatterSub.js";
3
- import { e as u, c as i, z as S, t as f, s as G } from "../index-CamYe_M8.js";
4
- import { s as v } from "../softmax-DvMvui-_.js";
5
- import { m as z } from "../max-JBBv7aUf.js";
6
- import { l as k } from "../log_sum_exp-CV_5-TTu.js";
3
+ import { e as u, l as i, z as S, t as f, b as G } from "../index-BAzbokzv.js";
4
+ import { s as v } from "../softmax-fsdtf6JC.js";
5
+ import { m as z } from "../max-DtlIuVeW.js";
6
+ import { l as k } from "../log_sum_exp-YEo2h3gb.js";
7
7
  function F(a, s) {
8
8
  return f(() => {
9
9
  const e = a.shape[a.shape.length - 1], o = a.shape.slice(0, -1).reduce((d, c) => d * c, 1), p = a.shape.length > 2 ? a.reshape([o, e]) : a, n = s.shape.length > 1 ? s.reshape([o]).cast("int32") : s.cast("int32"), t = z(p, -1, !0), r = G(p, t), h = k(r, -1);
@@ -23,7 +23,7 @@ function j() {
23
23
  (s, e, m) => {
24
24
  const o = s.shape[s.shape.length - 1], n = s.shape.slice(0, -1).reduce((d, c) => d * c, 1), t = s.reshape([n, o]), r = e.reshape([n]).cast("int32"), h = F(t, r);
25
25
  return m([t, r]), t.dispose(), r.dispose(), { value: h, gradFunc: (d, c) => f(() => {
26
- const l = c[0], b = c[1], x = v(l), C = y(x, b, d), E = S(e);
26
+ const g = c[0], b = c[1], x = v(g), C = y(x, b, d), E = S(e);
27
27
  return [C.reshape(s.shape), E];
28
28
  }) };
29
29
  }
@@ -1,5 +1,5 @@
1
- import { m as f, v as S, e as w } from "../index-CamYe_M8.js";
2
- import { z as i } from "../zeros-DUkkVccu.js";
1
+ import { m as f, v as S, e as w } from "../index-BAzbokzv.js";
2
+ import { z as i } from "../zeros-YJDE7oRb.js";
3
3
  async function P(s) {
4
4
  const t = i([1, s.config.gpt.blockSize], "int32"), [e, n] = s.forward({ training: !1 }, t);
5
5
  await e.data(), e.dispose(), n && n.dispose(), t.dispose();
@@ -1,6 +1,6 @@
1
- import { t as y } from "../index-CamYe_M8.js";
2
- import { t as x } from "../tensor2d-jO8JY5Jd.js";
3
- import { c as f } from "../concat-XOK9ANZu.js";
1
+ import { t as y } from "../index-BAzbokzv.js";
2
+ import { t as x } from "../tensor2d-CPEkynbH.js";
3
+ import { c as f } from "../concat-5aPGqw3Z.js";
4
4
  async function A(o, r, a, c, T) {
5
5
  if (c <= 0)
6
6
  throw new Error("Length must be a positive integer");
@@ -1,4 +1,4 @@
1
- import { m as a } from "../index-CamYe_M8.js";
1
+ import { m as a } from "../index-BAzbokzv.js";
2
2
  const m = 1024 * 1024;
3
3
  class l {
4
4
  log = /* @__PURE__ */ new Map();
@@ -1,5 +1,5 @@
1
- import "../index-CamYe_M8.js";
2
- import { t as y } from "../tensor-CDz5x1mP.js";
1
+ import "../index-BAzbokzv.js";
2
+ import { t as y } from "../tensor-Xyi595sG.js";
3
3
  function l(t) {
4
4
  if (t === "float32") return "F32";
5
5
  if (t === "int32") return "I32";
@@ -1,7 +1,7 @@
1
1
  import { j as p } from "../jszip.min-CjP2V1VV.js";
2
2
  import b from "../tokeniser/CharTokeniser.js";
3
3
  import { save_safetensors as l } from "./safetensors.js";
4
- import { VERSION as y } from "./load.js";
4
+ import { VERSION as y } from "../loader/load.js";
5
5
  async function N(e, a, n) {
6
6
  const f = n?.includeLog ?? !0, s = /* @__PURE__ */ new Map();
7
7
  e.saveWeights(s);
@@ -1,5 +1,5 @@
1
- import "../index-CamYe_M8.js";
2
- import { t as p } from "../tensor-CDz5x1mP.js";
1
+ import "../index-BAzbokzv.js";
2
+ import { t as p } from "../tensor-Xyi595sG.js";
3
3
  function h(n) {
4
4
  const e = n.reduce((s, o) => s + o.length, 0), a = new Float32Array(e);
5
5
  let t = 0;
@@ -1,4 +1,4 @@
1
- import { E as i } from "./index-CamYe_M8.js";
1
+ import { E as i } from "./index-BAzbokzv.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -1,4 +1,4 @@
1
- import { q as m, w as r, Z as l, E as c, _ as i, y as p, $ as u, i as f } from "./index-CamYe_M8.js";
1
+ import { o as m, q as r, Z as l, E as c, _ as i, x as p, $ as u, g as x } from "./index-BAzbokzv.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -15,13 +15,13 @@ import { q as m, w as r, Z as l, E as c, _ as i, y as p, $ as u, i as f } from "
15
15
  * limitations under the License.
16
16
  * =============================================================================
17
17
  */
18
- function x(a, e) {
19
- const s = r(a, "real", "complex"), o = r(e, "imag", "complex");
20
- l(s.shape, o.shape, `real and imag shapes, ${s.shape} and ${o.shape}, must match in call to tf.complex().`);
21
- const n = { real: s, imag: o };
18
+ function f(a, e) {
19
+ const o = r(a, "real", "complex"), s = r(e, "imag", "complex");
20
+ l(o.shape, s.shape, `real and imag shapes, ${o.shape} and ${s.shape}, must match in call to tf.complex().`);
21
+ const n = { real: o, imag: s };
22
22
  return c.runKernel(i, n);
23
23
  }
24
- const g = /* @__PURE__ */ m({ complex_: x });
24
+ const g = /* @__PURE__ */ m({ complex_: f });
25
25
  /**
26
26
  * @license
27
27
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -40,11 +40,11 @@ const g = /* @__PURE__ */ m({ complex_: x });
40
40
  */
41
41
  function t(a, e = "float32") {
42
42
  if (p(a), e === "complex64") {
43
- const o = t(a, "float32"), n = t(a, "float32");
44
- return g(o, n);
43
+ const s = t(a, "float32"), n = t(a, "float32");
44
+ return g(s, n);
45
45
  }
46
- const s = u(f(a), e);
47
- return c.makeTensor(s, a, e);
46
+ const o = u(x(a), e);
47
+ return c.makeTensor(o, a, e);
48
48
  }
49
49
  export {
50
50
  g as c,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@genai-fi/nanogpt",
3
- "version": "0.6.1",
3
+ "version": "0.6.2",
4
4
  "type": "module",
5
5
  "main": "dist/main.js",
6
6
  "types": "dist/main.d.ts",
@@ -49,10 +49,10 @@
49
49
  },
50
50
  "dependencies": {
51
51
  "@dsnp/parquetjs": "^1.8.7",
52
+ "@tensorflow/tfjs": "^4.22.0",
52
53
  "eventemitter3": "^5.0.1",
53
54
  "jszip": "^3.10.1",
54
55
  "papaparse": "^5.5.3",
55
- "pdfjs-dist": "^5.4.149",
56
- "@tensorflow/tfjs": "^4.22.0"
56
+ "pdfjs-dist": "^5.4.149"
57
57
  }
58
58
  }
@@ -1,99 +0,0 @@
1
- import { j as v } from "../jszip.min-CjP2V1VV.js";
2
- import { importWeights as F } from "./weights.js";
3
- import h from "../tokeniser/CharTokeniser.js";
4
- import b from "../NanoGPTModel.js";
5
- import { dummyPassAsync as u } from "./dummy.js";
6
- import { d as k } from "../index-CamYe_M8.js";
7
- import j from "../tokeniser/bpe.js";
8
- import { load_safetensors as N } from "./safetensors.js";
9
- const I = 2;
10
- async function O(t) {
11
- const s = await fetch(t);
12
- if (!s.ok)
13
- throw new Error(`Failed to fetch ${t}: ${s.statusText}`);
14
- return s.arrayBuffer();
15
- }
16
- async function S(t) {
17
- const s = /* @__PURE__ */ new Map(), r = await t.file("manifest.json")?.async("string");
18
- if (!r)
19
- throw new Error("Manifest file not found in the zip archive");
20
- const p = JSON.parse(r);
21
- for (const [o, a] of Object.entries(p.weightSpec))
22
- s.set(o, { spec: a, data: new Float32Array() });
23
- const e = await t.file("tokeniser.json")?.async("string");
24
- if (!e)
25
- throw new Error("Tokeniser file not found in the zip archive");
26
- const i = JSON.parse(e), c = (i.type ?? "char") === "char" ? new h(i.vocab) : new j(i.vocab, i.merges), d = /* @__PURE__ */ new Map();
27
- for (const o of Object.keys(t.files))
28
- if (o.endsWith(".bin")) {
29
- const a = o.replace(".bin", ""), w = await t.file(o).async("arraybuffer"), g = new Float32Array(w), l = s.get(a) || { spec: [], data: new Float32Array() };
30
- l.data = g, s.set(a, l);
31
- const n = await F(l);
32
- d.set(a, n);
33
- }
34
- k();
35
- const f = new b(p.config);
36
- await u(f), f.loadWeights(d);
37
- const m = await t.file("log.json")?.async("string");
38
- if (m)
39
- try {
40
- const o = JSON.parse(m);
41
- f.log = o;
42
- } catch (o) {
43
- throw console.error("Error parsing training log:", o), new Error(`Failed to parse training log: ${o}`);
44
- }
45
- return { model: f, tokeniser: c };
46
- }
47
- async function R(t) {
48
- const s = typeof t == "string" ? await O(t) : t, r = await v.loadAsync(s);
49
- if (r.file("manifest.json"))
50
- return S(r);
51
- {
52
- const p = await r.file("config.json")?.async("string");
53
- if (!p)
54
- throw new Error("Config file not found in the zip archive");
55
- const e = JSON.parse(p), i = {
56
- vocabSize: e.vocab_size,
57
- blockSize: e.block_size,
58
- nLayer: e.num_hidden_layers,
59
- nHead: e.num_attention_heads,
60
- nEmbed: e.hidden_size,
61
- dropout: e.dropout,
62
- biasInLinear: e.biasInLinear,
63
- biasInLayerNorm: e.biasInLayerNorm,
64
- mlpFactor: e.mlpFactor,
65
- useRope: e.useRope
66
- }, y = await r.file("tokeniser.json")?.async("string");
67
- if (!y)
68
- throw new Error("Tokeniser file not found in the zip archive");
69
- const c = JSON.parse(y), f = (c.type ?? "char") === "char" ? new h(c.vocab) : new j(c.vocab, c.merges), m = await N(await r.file("model.safetensors").async("arraybuffer")), o = /* @__PURE__ */ new Map();
70
- for (const [n, E] of Object.entries(m))
71
- o.set(n, [E]);
72
- k();
73
- const a = new b(i);
74
- await u(a), a.loadWeights(o);
75
- const w = await r.file("meta.json")?.async("string");
76
- let g;
77
- if (w)
78
- try {
79
- const n = JSON.parse(w);
80
- n.name && (g = n.name);
81
- } catch (n) {
82
- console.error("Error parsing meta file:", n);
83
- }
84
- const l = await r.file("log.json")?.async("string");
85
- if (l)
86
- try {
87
- const n = JSON.parse(l);
88
- a.log = n;
89
- } catch (n) {
90
- throw console.error("Error parsing training log:", n), new Error(`Failed to parse training log: ${n}`);
91
- }
92
- return { model: a, tokeniser: f, name: g };
93
- }
94
- }
95
- export {
96
- I as VERSION,
97
- R as loadModel,
98
- S as loadOldModel
99
- };