@genai-fi/nanogpt 0.6.3 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. package/dist/Generator.js +17 -13
  2. package/dist/NanoGPTModel.d.ts +2 -2
  3. package/dist/NanoGPTModel.js +104 -136
  4. package/dist/{RealDiv-BYViZwhN.js → RealDiv-CVYNbZxu.js} +30 -29
  5. package/dist/{Reshape-t7Kcikjk.js → Reshape-CEsEp0AI.js} +5 -5
  6. package/dist/Reshape-Do18N3gO.js +30 -0
  7. package/dist/TeachableLLM.js +9 -5
  8. package/dist/{TiedEmbedding-9WeDwvjO.js → TiedEmbedding-ccLBFiZi.js} +4 -4
  9. package/dist/{axis_util-Bu4h7XWV.js → axis_util-5DTW2tFV.js} +3 -3
  10. package/dist/backend.d.ts +1 -0
  11. package/dist/backend.js +7 -0
  12. package/dist/backend_util-C9Ut8n0Q.js +749 -0
  13. package/dist/{broadcast_to-DARN-DBD.js → broadcast_to-Ba9h_8DO.js} +2 -2
  14. package/dist/{concat-5aPGqw3Z.js → concat-CbXTetof.js} +8 -8
  15. package/dist/{dataset-pgqp-YfL.js → dataset-U3PrjwgU.js} +7 -7
  16. package/dist/{dropout-Bciw46HT.js → dropout-DPfPgWWe.js} +8 -8
  17. package/dist/{gather-DjyCjmOD.js → gather-Bbh8DHhM.js} +1 -1
  18. package/dist/gelu-BFwVnd1r.js +26 -0
  19. package/dist/gpgpu_math-DffelNS-.js +2371 -0
  20. package/dist/index-DYD_yPa-.js +12076 -0
  21. package/dist/{index-BAzbokzv.js → index-UdZhlibC.js} +414 -398
  22. package/dist/{kernel_funcs_utils-CUxJCg0g.js → kernel_funcs_utils-CXDy3EN7.js} +31 -30
  23. package/dist/layers/BaseLayer.js +2 -2
  24. package/dist/layers/CausalSelfAttention.js +8 -8
  25. package/dist/layers/MLP.js +5 -5
  26. package/dist/layers/RMSNorm.js +3 -3
  27. package/dist/layers/RoPECache.js +4 -4
  28. package/dist/layers/TiedEmbedding.js +5 -5
  29. package/dist/layers/TransformerBlock.js +1 -1
  30. package/dist/loader/loadTransformers.js +1 -1
  31. package/dist/loader/oldZipLoad.js +11 -7
  32. package/dist/{log_sum_exp-YEo2h3gb.js → log_sum_exp-BnmCkHWl.js} +16 -16
  33. package/dist/main.d.ts +13 -0
  34. package/dist/main.js +44 -23
  35. package/dist/{mat_mul-7121rsJk.js → mat_mul-dwmZz69e.js} +4 -4
  36. package/dist/{max-DtlIuVeW.js → max-ByjEGoFx.js} +3 -3
  37. package/dist/{mulmat_packed_gpu-D4nKF7Je.js → mulmat_packed_gpu-IGPBp6h9.js} +1 -1
  38. package/dist/non_max_suppression_impl-CsEgBuMA.js +134 -0
  39. package/dist/{ones-BBlSRqn1.js → ones-C8Mfln6-.js} +2 -2
  40. package/dist/ops/adamAdjust.d.ts +2 -0
  41. package/dist/ops/adamAdjust.js +9 -0
  42. package/dist/ops/adamMoments.d.ts +2 -0
  43. package/dist/ops/adamMoments.js +9 -0
  44. package/dist/ops/appendCache.js +3 -3
  45. package/dist/ops/attentionMask.js +1 -1
  46. package/dist/ops/cpu/adamAdjust.d.ts +1 -0
  47. package/dist/ops/cpu/adamAdjust.js +18 -0
  48. package/dist/ops/cpu/adamMoments.d.ts +1 -0
  49. package/dist/ops/cpu/adamMoments.js +16 -0
  50. package/dist/ops/cpu/appendCache.js +8 -8
  51. package/dist/ops/cpu/attentionMask.js +9 -9
  52. package/dist/ops/cpu/fusedSoftmax.js +17 -11
  53. package/dist/ops/cpu/gatherSub.js +7 -7
  54. package/dist/ops/cpu/gelu.js +13 -13
  55. package/dist/ops/cpu/matMulGelu.js +36 -24
  56. package/dist/ops/cpu/matMulMul.js +14 -8
  57. package/dist/ops/cpu/mulDropout.js +9 -3
  58. package/dist/ops/cpu/normRMS.js +5 -5
  59. package/dist/ops/cpu/qkv.js +3 -3
  60. package/dist/ops/cpu/rope.js +5 -5
  61. package/dist/ops/cpu/scatterSub.js +11 -11
  62. package/dist/ops/fusedSoftmax.js +1 -1
  63. package/dist/ops/gatherSub.js +1 -1
  64. package/dist/ops/gelu.js +2 -2
  65. package/dist/ops/grads/attentionMask.js +1 -1
  66. package/dist/ops/grads/fusedSoftmax.js +2 -2
  67. package/dist/ops/grads/gelu.js +3 -24
  68. package/dist/ops/grads/matMulGelu.js +5 -5
  69. package/dist/ops/grads/normRMS.js +6 -6
  70. package/dist/ops/grads/qkv.js +1 -1
  71. package/dist/ops/grads/rope.js +3 -3
  72. package/dist/ops/matMulGelu.js +1 -1
  73. package/dist/ops/matMulMul.js +1 -1
  74. package/dist/ops/mulDrop.js +1 -1
  75. package/dist/ops/normRMS.js +1 -1
  76. package/dist/ops/qkv.js +1 -1
  77. package/dist/ops/rope.js +4 -4
  78. package/dist/ops/scatterSub.js +1 -1
  79. package/dist/ops/webgl/adamAdjust.d.ts +1 -0
  80. package/dist/ops/webgl/adamAdjust.js +50 -0
  81. package/dist/ops/webgl/adamMoments.d.ts +1 -0
  82. package/dist/ops/webgl/adamMoments.js +38 -0
  83. package/dist/ops/webgl/appendCache.js +1 -1
  84. package/dist/ops/webgl/attentionMask.js +1 -1
  85. package/dist/ops/webgl/fusedSoftmax.js +4 -4
  86. package/dist/ops/webgl/gatherSub.js +8 -8
  87. package/dist/ops/webgl/gelu.js +2 -2
  88. package/dist/ops/webgl/log.js +5 -5
  89. package/dist/ops/webgl/matMulGelu.js +17 -17
  90. package/dist/ops/webgl/matMulMul.js +1 -1
  91. package/dist/ops/webgl/mulDropout.js +4 -4
  92. package/dist/ops/webgl/normRMS.js +2 -2
  93. package/dist/ops/webgl/qkv.js +1 -1
  94. package/dist/ops/webgl/rope.js +1 -1
  95. package/dist/ops/webgl/scatterSub.js +1 -1
  96. package/dist/ops/webgpu/adamAdjust.d.ts +1 -0
  97. package/dist/ops/webgpu/adamAdjust.js +52 -0
  98. package/dist/ops/webgpu/adamMoments.d.ts +1 -0
  99. package/dist/ops/webgpu/adamMoments.js +51 -0
  100. package/dist/ops/webgpu/appendCache.d.ts +1 -0
  101. package/dist/ops/webgpu/appendCache.js +57 -0
  102. package/dist/ops/webgpu/attentionMask.d.ts +1 -0
  103. package/dist/ops/webgpu/attentionMask.js +65 -0
  104. package/dist/ops/webgpu/gatherSub.d.ts +1 -0
  105. package/dist/ops/webgpu/gatherSub.js +52 -0
  106. package/dist/ops/webgpu/gelu.d.ts +14 -0
  107. package/dist/ops/webgpu/gelu.js +87 -0
  108. package/dist/ops/webgpu/index.d.ts +0 -0
  109. package/dist/ops/webgpu/index.js +11 -0
  110. package/dist/ops/webgpu/normRMS.d.ts +1 -0
  111. package/dist/ops/webgpu/normRMS.js +41 -0
  112. package/dist/ops/webgpu/normRMSGrad.d.ts +1 -0
  113. package/dist/ops/webgpu/normRMSGrad.js +128 -0
  114. package/dist/ops/webgpu/qkv.d.ts +1 -0
  115. package/dist/ops/webgpu/qkv.js +57 -0
  116. package/dist/ops/webgpu/rope.d.ts +1 -0
  117. package/dist/ops/webgpu/rope.js +69 -0
  118. package/dist/ops/webgpu/scatterSub.d.ts +1 -0
  119. package/dist/ops/webgpu/scatterSub.js +38 -0
  120. package/dist/ops/webgpu/utils/reductions.d.ts +9 -0
  121. package/dist/ops/webgpu/utils/reductions.js +68 -0
  122. package/dist/{ops-C0sQEcPw.js → ops-aRTXR2Sr.js} +433 -508
  123. package/dist/{random_width-DWzaOgrn.js → random_width-DbSpgl4o.js} +144 -144
  124. package/dist/{range-DYsrnfiy.js → range-D9CZhVlR.js} +1 -1
  125. package/dist/{reciprocal-CJQeasVa.js → reciprocal-CGB48wZB.js} +1 -1
  126. package/dist/{register_all_kernels-BfFCQAqs.js → register_all_kernels-DnbAyBXt.js} +203 -200
  127. package/dist/{reshape-krWGKraP.js → reshape-BR0eoLYN.js} +1 -1
  128. package/dist/{scatter_nd_util-93ln7Hut.js → scatter_nd_util-OjyAxku2.js} +3 -3
  129. package/dist/{selu_util-sntGesxr.js → selu_util-Ce6pu9IM.js} +44 -44
  130. package/dist/{shared-Ca6iDobD.js → shared-Czipaeb6.js} +541 -606
  131. package/dist/shared-DS5waSIY.js +69 -0
  132. package/dist/{sin-D_h-qCSx.js → sin-CiBxrDqX.js} +1 -1
  133. package/dist/slice-BHbDHObE.js +28 -0
  134. package/dist/{softmax-fsdtf6JC.js → softmax-JMEIUo2J.js} +1 -1
  135. package/dist/{split-eiktj-6L.js → split-CRU0PjVV.js} +4 -4
  136. package/dist/{stack-dfEEz2OY.js → stack-ikk2Y8_P.js} +2 -2
  137. package/dist/{sum-BE_Irnim.js → sum-NLYbiDag.js} +3 -3
  138. package/dist/{tensor-Xyi595sG.js → tensor-Do9PKbIE.js} +1 -1
  139. package/dist/{tensor2d-CPEkynbH.js → tensor2d-CWHxHpLh.js} +1 -1
  140. package/dist/training/Adam.d.ts +22 -0
  141. package/dist/training/Adam.js +93 -0
  142. package/dist/training/AdamExt.d.ts +1 -1
  143. package/dist/training/AdamExt.js +13 -12
  144. package/dist/training/DatasetBuilder.js +2 -2
  145. package/dist/training/Evaluator.js +1 -1
  146. package/dist/training/FullTrainer.js +27 -27
  147. package/dist/training/Trainer.d.ts +5 -6
  148. package/dist/training/Trainer.js +54 -55
  149. package/dist/training/sparseCrossEntropy.d.ts +0 -4
  150. package/dist/training/sparseCrossEntropy.js +7 -7
  151. package/dist/utilities/arrayClose.d.ts +1 -0
  152. package/dist/utilities/arrayClose.js +11 -0
  153. package/dist/utilities/dummy.js +19 -19
  154. package/dist/utilities/generate.js +15 -16
  155. package/dist/utilities/multinomialCPU.d.ts +2 -0
  156. package/dist/utilities/multinomialCPU.js +13 -0
  157. package/dist/utilities/performance.d.ts +2 -0
  158. package/dist/utilities/performance.js +16 -0
  159. package/dist/utilities/profile.d.ts +1 -0
  160. package/dist/utilities/profile.js +9 -6
  161. package/dist/utilities/safetensors.js +2 -2
  162. package/dist/utilities/weights.js +2 -2
  163. package/dist/{variable-wSS22xj5.js → variable-BTBkayv_.js} +1 -1
  164. package/dist/webgpu_program-WaoMq-WD.js +548 -0
  165. package/dist/webgpu_util-DhSeP4b6.js +80 -0
  166. package/dist/{zeros-YJDE7oRb.js → zeros-DnPT2nD4.js} +10 -10
  167. package/package.json +2 -1
  168. package/dist/gpgpu_math-CNslybmD.js +0 -3115
  169. package/dist/norm-CzltS9Fz.js +0 -86
@@ -1,5 +1,6 @@
1
- import { h as B, g as G, ak as K, _ as W, al as z, am as V, ac as N, an as F, u as S } from "./index-BAzbokzv.js";
2
- import { u as O, f as Y, h as v } from "./gpgpu_math-CNslybmD.js";
1
+ import { k as B, j as G, am as K, a6 as W, an as z, ao as V, ac as N, ap as F, u as S } from "./index-UdZhlibC.js";
2
+ import { u as O, f as Y } from "./gpgpu_math-DffelNS-.js";
3
+ import { f as v } from "./backend_util-C9Ut8n0Q.js";
3
4
  /**
4
5
  * @license
5
6
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -22,7 +23,7 @@ function H(t, e) {
22
23
  function Z(t, e) {
23
24
  return e === 1 ? [t] : H(t, e);
24
25
  }
25
- function ie(t, e) {
26
+ function pe(t, e) {
26
27
  if (t === 1)
27
28
  return "rc";
28
29
  let s = "";
@@ -62,11 +63,11 @@ class q {
62
63
  `;
63
64
  }
64
65
  }
65
- const T = "if (isnan(x)) return x;", M = "return x;", pe = "return abs(x);", j = "return (x >= 0.0) ? x : (exp(x) - 1.0);", J = T + `
66
+ const T = "if (isnan(x)) return x;", j = "return x;", de = "return abs(x);", M = "return (x >= 0.0) ? x : (exp(x) - 1.0);", J = T + `
66
67
  return (x < 0.0) ? 0.0 : x;
67
68
  `, Q = T + `
68
69
  return (x < 0.0) ? 0.0 : min(6.0, x);
69
- `, de = "return x;", X = "return 1.0 / (1.0 + exp(-1.0 * x));";
70
+ `, he = "return x;", X = "return 1.0 / (1.0 + exp(-1.0 * x));";
70
71
  /**
71
72
  * @license
72
73
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -145,7 +146,7 @@ class ne {
145
146
  * limitations under the License.
146
147
  * =============================================================================
147
148
  */
148
- const he = `
149
+ const fe = `
149
150
  if (isnan(a)) return a;
150
151
  if (isnan(b)) return b;
151
152
  `;
@@ -180,7 +181,7 @@ class b {
180
181
  * limitations under the License.
181
182
  * =============================================================================
182
183
  */
183
- const fe = `
184
+ const xe = `
184
185
  result.r = isNaN.r ? NAN : result.r;
185
186
  result.g = isNaN.g ? NAN : result.g;
186
187
  result.b = isNaN.b ? NAN : result.b;
@@ -268,7 +269,7 @@ function P(t) {
268
269
  const { inputs: e, backend: s } = t, { x: r } = e;
269
270
  return s.incRef(r.dataId), { dataId: r.dataId, shape: r.shape, dtype: r.dtype };
270
271
  }
271
- const xe = {
272
+ const ge = {
272
273
  kernelName: K,
273
274
  backendName: "webgl",
274
275
  kernelFunc: P
@@ -293,7 +294,7 @@ function L(t) {
293
294
  const { inputs: e, backend: s } = t, { real: r, imag: u } = e, n = s.makeTensorInfo(r.shape, "complex64"), o = s.texData.get(n.dataId), i = P({ inputs: { x: r }, backend: s }), a = P({ inputs: { x: u }, backend: s });
294
295
  return o.complexTensorInfos = { real: i, imag: a }, n;
295
296
  }
296
- const ge = {
297
+ const me = {
297
298
  kernelName: W,
298
299
  backendName: "webgl",
299
300
  kernelFunc: L
@@ -322,7 +323,7 @@ function oe(t) {
322
323
  const { inputs: e, backend: s, attrs: r } = t, { x: u } = e, { alpha: n } = r, o = s.makeTensorInfo([], "float32", V(n, "float32")), i = N().getBool("WEBGL_PACK_BINARY_OPERATIONS") ? new E(k, u.shape, o.shape) : new b(w, u.shape, o.shape), a = s.runWebGLProgram(i, [u, o], "float32");
323
324
  return s.disposeIntermediateTensorInfo(o), a;
324
325
  }
325
- const me = {
326
+ const be = {
326
327
  kernelName: z,
327
328
  backendName: "webgl",
328
329
  kernelFunc: oe
@@ -351,7 +352,7 @@ function ue(t) {
351
352
  const { inputs: e, backend: s } = t, { x: r, alpha: u } = e, n = N().getBool("WEBGL_PACK_BINARY_OPERATIONS") ? new E(U, r.shape, u.shape) : new b(R, r.shape, u.shape);
352
353
  return s.runWebGLProgram(n, [r, u], "float32");
353
354
  }
354
- const be = {
355
+ const Ne = {
355
356
  kernelName: F,
356
357
  backendName: "webgl",
357
358
  kernelFunc: ue
@@ -372,8 +373,8 @@ const be = {
372
373
  * limitations under the License.
373
374
  * =============================================================================
374
375
  */
375
- const Ne = "if (isnan(x)) return x;";
376
- function Oe({ opSnippet: t, packedOpSnippet: e, cpuKernelImpl: s, dtype: r }) {
376
+ const Oe = "if (isnan(x)) return x;";
377
+ function ye({ opSnippet: t, packedOpSnippet: e, cpuKernelImpl: s, dtype: r }) {
377
378
  return ({ inputs: u, backend: n }) => {
378
379
  const { x: o } = u, i = n, a = r || o.dtype;
379
380
  if (i.shouldExecuteOnCPU([o]) && s != null) {
@@ -385,7 +386,7 @@ function Oe({ opSnippet: t, packedOpSnippet: e, cpuKernelImpl: s, dtype: r }) {
385
386
  return c ? l = new ne(o.shape, e) : l = new q(o.shape, t), i.runWebGLProgram(l, [o], a);
386
387
  };
387
388
  }
388
- function ye({ opSnippet: t, packedOpSnippet: e, checkOutOfBounds: s = !1, supportsComplex: r = !1, cpuKernelImpl: u, dtype: n }) {
389
+ function Ie({ opSnippet: t, packedOpSnippet: e, checkOutOfBounds: s = !1, supportsComplex: r = !1, cpuKernelImpl: u, dtype: n }) {
389
390
  return ({ inputs: o, backend: i }) => {
390
391
  const { a, b: c } = o, l = i;
391
392
  if (r && a.dtype === "complex64") {
@@ -422,13 +423,13 @@ function ye({ opSnippet: t, packedOpSnippet: e, checkOutOfBounds: s = !1, suppor
422
423
  return y ? I = new E(e, a.shape, c.shape, s) : I = new b(t, a.shape, c.shape), l.runWebGLProgram(I, [a, c], d);
423
424
  };
424
425
  }
425
- function Ie(t, e = !1) {
426
+ function Ae(t, e = !1) {
426
427
  if (t === "linear")
427
- return e ? ee : M;
428
+ return e ? ee : j;
428
429
  if (t === "relu")
429
430
  return e ? se : J;
430
431
  if (t === "elu")
431
- return e ? te : j;
432
+ return e ? te : M;
432
433
  if (t === "relu6")
433
434
  return e ? ae : Q;
434
435
  if (t === "prelu")
@@ -440,26 +441,26 @@ function Ie(t, e = !1) {
440
441
  throw new Error(`Activation ${t} has not been implemented for the WebGL backend.`);
441
442
  }
442
443
  export {
443
- pe as A,
444
+ de as A,
444
445
  E as B,
445
446
  T as C,
446
447
  ne as U,
447
448
  Z as a,
448
- ye as b,
449
- ie as c,
450
- de as d,
449
+ Ie as b,
450
+ pe as c,
451
+ he as d,
451
452
  q as e,
452
453
  L as f,
453
454
  H as g,
454
455
  b as h,
455
456
  P as i,
456
- he as j,
457
- fe as k,
458
- Ne as l,
459
- Ie as m,
460
- ge as n,
461
- xe as o,
462
- me as p,
463
- be as q,
464
- Oe as u
457
+ fe as j,
458
+ xe as k,
459
+ Oe as l,
460
+ Ae as m,
461
+ me as n,
462
+ ge as o,
463
+ be as p,
464
+ Ne as q,
465
+ ye as u
465
466
  };
@@ -1,5 +1,5 @@
1
- import { T as g, l as p, e as o, n as v } from "../index-BAzbokzv.js";
2
- import { v as _ } from "../variable-wSS22xj5.js";
1
+ import { T as g, y as p, e as o, A as v } from "../index-UdZhlibC.js";
2
+ import { v as _ } from "../variable-BTBkayv_.js";
3
3
  class M {
4
4
  parent;
5
5
  config;
@@ -3,14 +3,14 @@ import O from "./BaseLayer.js";
3
3
  import { qkv as P } from "../ops/qkv.js";
4
4
  import { rope as v } from "../ops/rope.js";
5
5
  import { appendCache as V } from "../ops/appendCache.js";
6
- import { k as c, t as C } from "../index-BAzbokzv.js";
6
+ import { w as c, t as C } from "../index-UdZhlibC.js";
7
7
  import { fusedSoftmax as T } from "../ops/fusedSoftmax.js";
8
- import { d as y } from "../random_width-DWzaOgrn.js";
9
- import { v as b } from "../variable-wSS22xj5.js";
10
- import { r as k, d as L } from "../dropout-Bciw46HT.js";
11
- import { r as N } from "../reshape-krWGKraP.js";
12
- import { m as R } from "../mat_mul-7121rsJk.js";
13
- class $ extends O {
8
+ import { d as y } from "../random_width-DbSpgl4o.js";
9
+ import { v as b } from "../variable-BTBkayv_.js";
10
+ import { r as k, d as L } from "../dropout-DPfPgWWe.js";
11
+ import { r as N } from "../reshape-BR0eoLYN.js";
12
+ import { m as R } from "../mat_mul-dwmZz69e.js";
13
+ class W extends O {
14
14
  divisor;
15
15
  index;
16
16
  units;
@@ -92,5 +92,5 @@ class $ extends O {
92
92
  }
93
93
  }
94
94
  export {
95
- $ as default
95
+ W as default
96
96
  };
@@ -1,10 +1,10 @@
1
- import { t as l } from "../index-BAzbokzv.js";
1
+ import { t as l } from "../index-UdZhlibC.js";
2
2
  import u from "./BaseLayer.js";
3
3
  import { matMulGelu as M } from "../ops/matMulGelu.js";
4
- import { v as o } from "../variable-wSS22xj5.js";
5
- import { r as h, d as f } from "../dropout-Bciw46HT.js";
6
- import { r as d } from "../reshape-krWGKraP.js";
7
- import { m as c } from "../mat_mul-7121rsJk.js";
4
+ import { v as o } from "../variable-BTBkayv_.js";
5
+ import { r as h, d as f } from "../dropout-DPfPgWWe.js";
6
+ import { r as d } from "../reshape-BR0eoLYN.js";
7
+ import { m as c } from "../mat_mul-dwmZz69e.js";
8
8
  class V extends u {
9
9
  index;
10
10
  hiddenUnits;
@@ -1,8 +1,8 @@
1
- import { t as s } from "../index-BAzbokzv.js";
1
+ import { t as s } from "../index-UdZhlibC.js";
2
2
  import e from "./BaseLayer.js";
3
3
  import { normRMS as a } from "../ops/normRMS.js";
4
- import { v as i } from "../variable-wSS22xj5.js";
5
- import { o as m } from "../ones-BBlSRqn1.js";
4
+ import { v as i } from "../variable-BTBkayv_.js";
5
+ import { o as m } from "../ones-C8Mfln6-.js";
6
6
  class f extends e {
7
7
  GAMMA;
8
8
  constructor(r, t = "", o) {
@@ -1,7 +1,7 @@
1
- import { s as t, j as h, t as n, k as p } from "../index-BAzbokzv.js";
2
- import { r as c } from "../reciprocal-CJQeasVa.js";
3
- import { c as f, s as m } from "../sin-D_h-qCSx.js";
4
- import { r as a } from "../range-DYsrnfiy.js";
1
+ import { b as t, x as h, t as n, w as p } from "../index-UdZhlibC.js";
2
+ import { r as c } from "../reciprocal-CGB48wZB.js";
3
+ import { c as f, s as m } from "../sin-CiBxrDqX.js";
4
+ import { r as a } from "../range-D9CZhVlR.js";
5
5
  class D {
6
6
  constructor(o) {
7
7
  this.config = o;
@@ -1,9 +1,9 @@
1
- import "../random_width-DWzaOgrn.js";
2
- import "../index-BAzbokzv.js";
3
- import { T as e } from "../TiedEmbedding-9WeDwvjO.js";
1
+ import "../random_width-DbSpgl4o.js";
2
+ import "../index-UdZhlibC.js";
3
+ import { T as e } from "../TiedEmbedding-ccLBFiZi.js";
4
4
  import "./BaseLayer.js";
5
- import "../variable-wSS22xj5.js";
6
- import "../gather-DjyCjmOD.js";
5
+ import "../variable-BTBkayv_.js";
6
+ import "../gather-Bbh8DHhM.js";
7
7
  export {
8
8
  e as default
9
9
  };
@@ -2,7 +2,7 @@ import l from "./CausalSelfAttention.js";
2
2
  import r from "./MLP.js";
3
3
  import o from "./RMSNorm.js";
4
4
  import d from "./BaseLayer.js";
5
- import { t as p } from "../index-BAzbokzv.js";
5
+ import { t as p } from "../index-UdZhlibC.js";
6
6
  class k extends d {
7
7
  ln1;
8
8
  attn;
@@ -2,7 +2,7 @@ import b from "../NanoGPTModel.js";
2
2
  import c from "../tokeniser/CharTokeniser.js";
3
3
  import l from "../tokeniser/bpe.js";
4
4
  import { load_safetensors as u } from "../utilities/safetensors.js";
5
- import { U as y } from "../index-BAzbokzv.js";
5
+ import { a0 as y } from "../index-UdZhlibC.js";
6
6
  import { dummyPassAsync as h } from "../utilities/dummy.js";
7
7
  async function L(e, a, r, t) {
8
8
  const n = {
@@ -1,13 +1,17 @@
1
1
  import d from "../NanoGPTModel.js";
2
2
  import "../jszip.min-CjP2V1VV.js";
3
3
  import h from "../tokeniser/CharTokeniser.js";
4
- import { U as k } from "../index-BAzbokzv.js";
4
+ import { a0 as k } from "../index-UdZhlibC.js";
5
5
  import b from "../tokeniser/bpe.js";
6
6
  import { dummyPassAsync as u } from "../utilities/dummy.js";
7
7
  import "../Generator.js";
8
8
  import "../index-Dwqa6Zy2.js";
9
- import "../dataset-pgqp-YfL.js";
9
+ import "../dataset-U3PrjwgU.js";
10
10
  import "../index-Tf7vU29b.js";
11
+ import "../ops/cpu/adamAdjust.js";
12
+ import "../ops/webgl/adamAdjust.js";
13
+ import "../ops/cpu/adamMoments.js";
14
+ import "../ops/webgl/adamMoments.js";
11
15
  import "../papaparse.min-C8l2Kvo1.js";
12
16
  import "../ops/cpu/scatterSub.js";
13
17
  import "../ops/webgl/scatterSub.js";
@@ -19,8 +23,8 @@ import "../ops/grads/attentionMask.js";
19
23
  import "../ops/cpu/qkv.js";
20
24
  import "../ops/webgl/qkv.js";
21
25
  import "../ops/grads/qkv.js";
22
- import "../random_width-DWzaOgrn.js";
23
- import "../register_all_kernels-BfFCQAqs.js";
26
+ import "../random_width-DbSpgl4o.js";
27
+ import "../register_all_kernels-DnbAyBXt.js";
24
28
  import "../ops/cpu/rope.js";
25
29
  import "../ops/webgl/rope.js";
26
30
  import "../ops/grads/rope.js";
@@ -34,13 +38,13 @@ import "../ops/webgl/matMulGelu.js";
34
38
  import "../ops/grads/matMulGelu.js";
35
39
  import "../ops/cpu/gelu.js";
36
40
  import "../ops/webgl/gelu.js";
37
- import "../ops/grads/gelu.js";
41
+ import "../gelu-BFwVnd1r.js";
38
42
  import "../ops/cpu/normRMS.js";
39
43
  import "../ops/webgl/normRMS.js";
40
44
  import "../ops/grads/normRMS.js";
41
45
  import "../ops/webgl/log.js";
42
46
  import { importWeights as O } from "../utilities/weights.js";
43
- async function ft(o) {
47
+ async function yt(o) {
44
48
  const n = /* @__PURE__ */ new Map(), s = await o.file("manifest.json")?.async("string");
45
49
  if (!s)
46
50
  throw new Error("Manifest file not found in the zip archive");
@@ -72,5 +76,5 @@ async function ft(o) {
72
76
  return { model: e, tokeniser: l };
73
77
  }
74
78
  export {
75
- ft as default
79
+ yt as default
76
80
  };
@@ -1,8 +1,8 @@
1
- import { o as r, q as p, E as u, a8 as E, a9 as h, p as S, b as $, a7 as d } from "./index-BAzbokzv.js";
2
- import { e as b } from "./axis_util-Bu4h7XWV.js";
3
- import { m as K } from "./max-DtlIuVeW.js";
4
- import { r as m } from "./reshape-krWGKraP.js";
5
- import { s as T } from "./sum-BE_Irnim.js";
1
+ import { B as r, C as p, E as u, ae as E, af as h, p as S, c as $, o as d } from "./index-UdZhlibC.js";
2
+ import { e as K } from "./axis_util-5DTW2tFV.js";
3
+ import { m as T } from "./max-ByjEGoFx.js";
4
+ import { r as m } from "./reshape-BR0eoLYN.js";
5
+ import { s as _ } from "./sum-NLYbiDag.js";
6
6
  /**
7
7
  * @license
8
8
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -19,11 +19,11 @@ import { s as T } from "./sum-BE_Irnim.js";
19
19
  * limitations under the License.
20
20
  * =============================================================================
21
21
  */
22
- function _(s) {
22
+ function b(s) {
23
23
  const o = { x: p(s, "x", "exp") };
24
24
  return u.runKernel(E, o);
25
25
  }
26
- const N = /* @__PURE__ */ r({ exp_: _ });
26
+ const N = /* @__PURE__ */ r({ exp_: b });
27
27
  /**
28
28
  * @license
29
29
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -40,11 +40,11 @@ const N = /* @__PURE__ */ r({ exp_: _ });
40
40
  * limitations under the License.
41
41
  * =============================================================================
42
42
  */
43
- function q(s) {
43
+ function v(s) {
44
44
  const o = { x: p(s, "x", "log", "float32") };
45
45
  return u.runKernel(h, o);
46
46
  }
47
- const v = /* @__PURE__ */ r({ log_: q });
47
+ const w = /* @__PURE__ */ r({ log_: v });
48
48
  /**
49
49
  * @license
50
50
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -61,22 +61,22 @@ const v = /* @__PURE__ */ r({ log_: q });
61
61
  * limitations under the License.
62
62
  * =============================================================================
63
63
  */
64
- function w(s, n = null, o = !1) {
65
- const a = p(s, "x", "logSumExp"), t = S(n, a.shape), x = K(
64
+ function A(s, n = null, o = !1) {
65
+ const a = p(s, "x", "logSumExp"), t = S(n, a.shape), x = T(
66
66
  a,
67
67
  t,
68
68
  !0
69
69
  /* keepDims */
70
- ), i = $(a, x), l = N(i), f = T(l, t), c = v(f), e = d(m(x, c.shape), c);
70
+ ), i = $(a, x), l = N(i), f = _(l, t), c = w(f), e = d(m(x, c.shape), c);
71
71
  if (o) {
72
- const g = b(e.shape, t);
72
+ const g = K(e.shape, t);
73
73
  return m(e, g);
74
74
  }
75
75
  return e;
76
76
  }
77
- const M = /* @__PURE__ */ r({ logSumExp_: w });
77
+ const L = /* @__PURE__ */ r({ logSumExp_: A });
78
78
  export {
79
- v as a,
79
+ w as a,
80
80
  N as e,
81
- M as l
81
+ L as l
82
82
  };
package/dist/main.d.ts CHANGED
@@ -1,3 +1,7 @@
1
+ import { default as CausalSelfAttention } from './layers/CausalSelfAttention';
2
+ import { default as MLP } from './layers/MLP';
3
+ import { default as TransformerBlock } from './layers/TransformerBlock';
4
+ import { default as RoPECache } from './layers/RoPECache';
1
5
  export { default as NanoGPT } from './NanoGPTModel';
2
6
  export { default as TeachableLLM } from './TeachableLLM';
3
7
  export { default as CharTokeniser } from './tokeniser/CharTokeniser';
@@ -11,3 +15,12 @@ export type { ITokeniser } from './tokeniser/type';
11
15
  export type { TrainingProgress } from './training/Trainer';
12
16
  export type { GPTConfig } from './config';
13
17
  export { estimateParameterCount, estimateMemoryUsage, estimateTrainingMemoryUsage, estimateResources, validateConfig, } from './utilities/parameters';
18
+ export { selectBackend } from './backend';
19
+ export { default as performanceTest } from './utilities/performance';
20
+ export declare const layers: {
21
+ CausalSelfAttention: typeof CausalSelfAttention;
22
+ MLP: typeof MLP;
23
+ TransformerBlock: typeof TransformerBlock;
24
+ RoPECache: typeof RoPECache;
25
+ };
26
+ export { default as AdamExt } from './training/AdamExt';
package/dist/main.js CHANGED
@@ -1,11 +1,11 @@
1
- import { default as R } from "./NanoGPTModel.js";
2
- import { default as q } from "./TeachableLLM.js";
3
- import { default as A } from "./tokeniser/CharTokeniser.js";
4
- import { default as I } from "./tokeniser/bpe.js";
5
- import { default as K } from "./utilities/waitForModel.js";
6
- import { default as Q } from "./data/textLoader.js";
7
- import { estimateMemoryUsage as V, estimateParameterCount as W, estimateResources as X, estimateTrainingMemoryUsage as Y, validateConfig as Z } from "./utilities/parameters.js";
8
- import "./index-BAzbokzv.js";
1
+ import { default as W } from "./NanoGPTModel.js";
2
+ import { default as Y } from "./TeachableLLM.js";
3
+ import { default as _ } from "./tokeniser/CharTokeniser.js";
4
+ import { default as oo } from "./tokeniser/bpe.js";
5
+ import { default as to } from "./utilities/waitForModel.js";
6
+ import { default as eo } from "./data/textLoader.js";
7
+ import { estimateMemoryUsage as po, estimateParameterCount as ao, estimateResources as fo, estimateTrainingMemoryUsage as so, validateConfig as lo } from "./utilities/parameters.js";
8
+ import "./index-UdZhlibC.js";
9
9
  import "./ops/cpu/scatterSub.js";
10
10
  import "./ops/webgl/scatterSub.js";
11
11
  import "./ops/cpu/gatherSub.js";
@@ -16,10 +16,10 @@ import "./ops/grads/attentionMask.js";
16
16
  import "./ops/cpu/qkv.js";
17
17
  import "./ops/webgl/qkv.js";
18
18
  import "./ops/grads/qkv.js";
19
- import "./random_width-DWzaOgrn.js";
20
- import "./register_all_kernels-BfFCQAqs.js";
19
+ import "./random_width-DbSpgl4o.js";
20
+ import "./register_all_kernels-DnbAyBXt.js";
21
21
  import "./index-Tf7vU29b.js";
22
- import "./dataset-pgqp-YfL.js";
22
+ import "./dataset-U3PrjwgU.js";
23
23
  import "./ops/cpu/rope.js";
24
24
  import "./ops/webgl/rope.js";
25
25
  import "./ops/grads/rope.js";
@@ -33,21 +33,42 @@ import "./ops/webgl/matMulGelu.js";
33
33
  import "./ops/grads/matMulGelu.js";
34
34
  import "./ops/cpu/gelu.js";
35
35
  import "./ops/webgl/gelu.js";
36
- import "./ops/grads/gelu.js";
36
+ import "./gelu-BFwVnd1r.js";
37
37
  import "./ops/cpu/normRMS.js";
38
38
  import "./ops/webgl/normRMS.js";
39
39
  import "./ops/grads/normRMS.js";
40
40
  import "./ops/webgl/log.js";
41
+ import "./ops/cpu/adamMoments.js";
42
+ import "./ops/webgl/adamMoments.js";
43
+ import "./ops/cpu/adamAdjust.js";
44
+ import "./ops/webgl/adamAdjust.js";
45
+ import { selectBackend as xo } from "./backend.js";
46
+ import { default as co } from "./utilities/performance.js";
47
+ import o from "./layers/CausalSelfAttention.js";
48
+ import r from "./layers/MLP.js";
49
+ import t from "./layers/TransformerBlock.js";
50
+ import m from "./layers/RoPECache.js";
51
+ import { default as ko } from "./training/AdamExt.js";
52
+ const O = {
53
+ CausalSelfAttention: o,
54
+ MLP: r,
55
+ TransformerBlock: t,
56
+ RoPECache: m
57
+ };
41
58
  export {
42
- I as BPETokeniser,
43
- A as CharTokeniser,
44
- R as NanoGPT,
45
- q as TeachableLLM,
46
- V as estimateMemoryUsage,
47
- W as estimateParameterCount,
48
- X as estimateResources,
49
- Y as estimateTrainingMemoryUsage,
50
- Q as loadTextData,
51
- Z as validateConfig,
52
- K as waitForModel
59
+ ko as AdamExt,
60
+ oo as BPETokeniser,
61
+ _ as CharTokeniser,
62
+ W as NanoGPT,
63
+ Y as TeachableLLM,
64
+ po as estimateMemoryUsage,
65
+ ao as estimateParameterCount,
66
+ fo as estimateResources,
67
+ so as estimateTrainingMemoryUsage,
68
+ O as layers,
69
+ eo as loadTextData,
70
+ co as performanceTest,
71
+ xo as selectBackend,
72
+ lo as validateConfig,
73
+ to as waitForModel
53
74
  };
@@ -1,4 +1,4 @@
1
- import { o as m, q as s, B as c, E as M, C as p } from "./index-BAzbokzv.js";
1
+ import { B as m, C as s, I as c, E as M, J as p } from "./index-UdZhlibC.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -15,10 +15,10 @@ import { o as m, q as s, B as c, E as M, C as p } from "./index-BAzbokzv.js";
15
15
  * limitations under the License.
16
16
  * =============================================================================
17
17
  */
18
- function f(e, o, n = !1, l = !1) {
19
- let a = s(e, "a", "matMul"), t = s(o, "b", "matMul");
18
+ function f(e, n, o = !1, l = !1) {
19
+ let a = s(e, "a", "matMul"), t = s(n, "b", "matMul");
20
20
  [a, t] = c(a, t);
21
- const r = { a, b: t }, u = { transposeA: n, transposeB: l };
21
+ const r = { a, b: t }, u = { transposeA: o, transposeB: l };
22
22
  return M.runKernel(p, r, u);
23
23
  }
24
24
  const i = /* @__PURE__ */ m({ matMul_: f });
@@ -1,4 +1,4 @@
1
- import { o as r, q as e, E as x, M as c } from "./index-BAzbokzv.js";
1
+ import { B as r, C as e, E as x, U as c } from "./index-UdZhlibC.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -15,8 +15,8 @@ import { o as r, q as e, E as x, M as c } from "./index-BAzbokzv.js";
15
15
  * limitations under the License.
16
16
  * =============================================================================
17
17
  */
18
- function m(n, o = null, s = !1) {
19
- const t = { x: e(n, "x", "max") }, a = { reductionIndices: o, keepDims: s };
18
+ function m(n, s = null, o = !1) {
19
+ const t = { x: e(n, "x", "max") }, a = { reductionIndices: s, keepDims: o };
20
20
  return x.runKernel(c, t, a);
21
21
  }
22
22
  const l = /* @__PURE__ */ r({ max_: m });
@@ -1,4 +1,4 @@
1
- import { u as z } from "./gpgpu_math-CNslybmD.js";
1
+ import { u as z } from "./gpgpu_math-DffelNS-.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2018 Google LLC. All Rights Reserved.