@genai-fi/nanogpt 0.6.3 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. package/dist/Generator.js +11 -11
  2. package/dist/NanoGPTModel.d.ts +2 -2
  3. package/dist/NanoGPTModel.js +104 -136
  4. package/dist/{RealDiv-BYViZwhN.js → RealDiv-C4hOvYOZ.js} +26 -25
  5. package/dist/{Reshape-t7Kcikjk.js → Reshape-BLijOA8h.js} +5 -5
  6. package/dist/TeachableLLM.js +5 -5
  7. package/dist/{TiedEmbedding-9WeDwvjO.js → TiedEmbedding-BLltddza.js} +4 -4
  8. package/dist/{axis_util-Bu4h7XWV.js → axis_util-DaAl5MER.js} +3 -3
  9. package/dist/backend.d.ts +1 -0
  10. package/dist/backend.js +7 -0
  11. package/dist/backend_util-DWiwsi2N.js +749 -0
  12. package/dist/{broadcast_to-DARN-DBD.js → broadcast_to-C4v-j9yA.js} +2 -2
  13. package/dist/{concat-5aPGqw3Z.js → concat-CsHeR4zV.js} +8 -8
  14. package/dist/{dataset-pgqp-YfL.js → dataset-JDyjG3QR.js} +3 -3
  15. package/dist/{dropout-Bciw46HT.js → dropout-hpDwECTe.js} +7 -7
  16. package/dist/{gather-DjyCjmOD.js → gather-D0_gPiBz.js} +4 -4
  17. package/dist/gelu-uyHP1x1f.js +26 -0
  18. package/dist/gpgpu_math-DJm3ZTAf.js +2371 -0
  19. package/dist/index-BPPzKVdR.js +12099 -0
  20. package/dist/{index-BAzbokzv.js → index-C0dhsYom.js} +405 -389
  21. package/dist/{kernel_funcs_utils-CUxJCg0g.js → kernel_funcs_utils-CwRTFqrc.js} +31 -30
  22. package/dist/layers/BaseLayer.js +2 -2
  23. package/dist/layers/CausalSelfAttention.js +6 -6
  24. package/dist/layers/MLP.js +5 -5
  25. package/dist/layers/RMSNorm.js +3 -3
  26. package/dist/layers/RoPECache.js +4 -4
  27. package/dist/layers/TiedEmbedding.js +5 -5
  28. package/dist/layers/TransformerBlock.js +1 -1
  29. package/dist/loader/loadTransformers.js +1 -1
  30. package/dist/loader/oldZipLoad.js +5 -5
  31. package/dist/{log_sum_exp-YEo2h3gb.js → log_sum_exp-D086OgZJ.js} +15 -15
  32. package/dist/main.d.ts +2 -0
  33. package/dist/main.js +9 -5
  34. package/dist/{mat_mul-7121rsJk.js → mat_mul-1nwdPkQ_.js} +4 -4
  35. package/dist/{max-DtlIuVeW.js → max-BQc2Aj-I.js} +4 -4
  36. package/dist/{mulmat_packed_gpu-D4nKF7Je.js → mulmat_packed_gpu-Gzf3I9UV.js} +1 -1
  37. package/dist/non_max_suppression_impl-CsEgBuMA.js +134 -0
  38. package/dist/{ones-BBlSRqn1.js → ones-D63HpSF_.js} +2 -2
  39. package/dist/ops/appendCache.js +3 -3
  40. package/dist/ops/attentionMask.js +1 -1
  41. package/dist/ops/cpu/appendCache.js +8 -8
  42. package/dist/ops/cpu/attentionMask.js +9 -9
  43. package/dist/ops/cpu/fusedSoftmax.js +17 -11
  44. package/dist/ops/cpu/gatherSub.js +7 -7
  45. package/dist/ops/cpu/gelu.js +13 -13
  46. package/dist/ops/cpu/matMulGelu.js +36 -24
  47. package/dist/ops/cpu/matMulMul.js +14 -8
  48. package/dist/ops/cpu/mulDropout.js +9 -3
  49. package/dist/ops/cpu/normRMS.js +5 -5
  50. package/dist/ops/cpu/qkv.js +3 -3
  51. package/dist/ops/cpu/rope.js +5 -5
  52. package/dist/ops/cpu/scatterSub.js +11 -11
  53. package/dist/ops/fusedSoftmax.js +1 -1
  54. package/dist/ops/gatherSub.js +1 -1
  55. package/dist/ops/gelu.js +2 -2
  56. package/dist/ops/grads/attentionMask.js +1 -1
  57. package/dist/ops/grads/fusedSoftmax.js +2 -2
  58. package/dist/ops/grads/gelu.js +3 -24
  59. package/dist/ops/grads/matMulGelu.js +5 -5
  60. package/dist/ops/grads/normRMS.js +6 -6
  61. package/dist/ops/grads/qkv.js +1 -1
  62. package/dist/ops/grads/rope.js +3 -3
  63. package/dist/ops/matMulGelu.js +1 -1
  64. package/dist/ops/matMulMul.js +1 -1
  65. package/dist/ops/mulDrop.js +1 -1
  66. package/dist/ops/normRMS.js +1 -1
  67. package/dist/ops/qkv.js +1 -1
  68. package/dist/ops/rope.js +4 -4
  69. package/dist/ops/scatterSub.js +1 -1
  70. package/dist/ops/webgl/appendCache.js +1 -1
  71. package/dist/ops/webgl/attentionMask.js +1 -1
  72. package/dist/ops/webgl/fusedSoftmax.js +4 -4
  73. package/dist/ops/webgl/gatherSub.js +1 -1
  74. package/dist/ops/webgl/gelu.js +2 -2
  75. package/dist/ops/webgl/log.js +5 -5
  76. package/dist/ops/webgl/matMulGelu.js +17 -17
  77. package/dist/ops/webgl/matMulMul.js +1 -1
  78. package/dist/ops/webgl/mulDropout.js +4 -4
  79. package/dist/ops/webgl/normRMS.js +2 -2
  80. package/dist/ops/webgl/qkv.js +1 -1
  81. package/dist/ops/webgl/rope.js +1 -1
  82. package/dist/ops/webgl/scatterSub.js +1 -1
  83. package/dist/ops/webgpu/appendCache.d.ts +1 -0
  84. package/dist/ops/webgpu/appendCache.js +56 -0
  85. package/dist/ops/webgpu/attentionMask.d.ts +1 -0
  86. package/dist/ops/webgpu/attentionMask.js +64 -0
  87. package/dist/ops/webgpu/gatherSub.d.ts +1 -0
  88. package/dist/ops/webgpu/gatherSub.js +37 -0
  89. package/dist/ops/webgpu/gelu.d.ts +14 -0
  90. package/dist/ops/webgpu/gelu.js +86 -0
  91. package/dist/ops/webgpu/index.d.ts +0 -0
  92. package/dist/ops/webgpu/index.js +8 -0
  93. package/dist/ops/webgpu/normRMS.d.ts +1 -0
  94. package/dist/ops/webgpu/normRMS.js +115 -0
  95. package/dist/ops/webgpu/qkv.d.ts +1 -0
  96. package/dist/ops/webgpu/qkv.js +56 -0
  97. package/dist/ops/webgpu/rope.d.ts +1 -0
  98. package/dist/ops/webgpu/rope.js +68 -0
  99. package/dist/ops/webgpu/scatterSub.d.ts +1 -0
  100. package/dist/ops/webgpu/scatterSub.js +37 -0
  101. package/dist/{ops-C0sQEcPw.js → ops-CIQLNshk.js} +452 -503
  102. package/dist/{random_width-DWzaOgrn.js → random_width-DkYP8W8N.js} +143 -144
  103. package/dist/{range-DYsrnfiy.js → range-CYzpQY53.js} +1 -1
  104. package/dist/{reciprocal-CJQeasVa.js → reciprocal-_A9yv27J.js} +1 -1
  105. package/dist/{register_all_kernels-BfFCQAqs.js → register_all_kernels-guvSxp7M.js} +202 -200
  106. package/dist/{reshape-krWGKraP.js → reshape-BMUzc1UY.js} +3 -3
  107. package/dist/{scatter_nd_util-93ln7Hut.js → scatter_nd_util-IRBqKz_b.js} +3 -3
  108. package/dist/{selu_util-sntGesxr.js → selu_util-Dt_iuXaq.js} +6 -6
  109. package/dist/shared-BNa2q6jD.js +69 -0
  110. package/dist/{shared-Ca6iDobD.js → shared-CDu9S76h.js} +541 -606
  111. package/dist/{sin-D_h-qCSx.js → sin-Cocju-BY.js} +6 -6
  112. package/dist/{softmax-fsdtf6JC.js → softmax-GPNK3o-U.js} +3 -3
  113. package/dist/{split-eiktj-6L.js → split-CHzJjxDv.js} +4 -4
  114. package/dist/{stack-dfEEz2OY.js → stack-Dpgg_1W1.js} +2 -2
  115. package/dist/{sum-BE_Irnim.js → sum-B8wEpKsg.js} +5 -5
  116. package/dist/{tensor-Xyi595sG.js → tensor-RvZVNmg0.js} +1 -1
  117. package/dist/{tensor2d-CPEkynbH.js → tensor2d-B_kyod7_.js} +1 -1
  118. package/dist/training/AdamExt.js +1 -1
  119. package/dist/training/DatasetBuilder.js +2 -2
  120. package/dist/training/Evaluator.js +1 -1
  121. package/dist/training/FullTrainer.js +20 -20
  122. package/dist/training/Trainer.d.ts +5 -6
  123. package/dist/training/Trainer.js +59 -60
  124. package/dist/training/sparseCrossEntropy.js +4 -4
  125. package/dist/utilities/dummy.js +19 -19
  126. package/dist/utilities/generate.js +15 -16
  127. package/dist/utilities/multinomialCPU.d.ts +2 -0
  128. package/dist/utilities/multinomialCPU.js +13 -0
  129. package/dist/utilities/performance.d.ts +2 -0
  130. package/dist/utilities/performance.js +16 -0
  131. package/dist/utilities/profile.d.ts +1 -0
  132. package/dist/utilities/profile.js +9 -6
  133. package/dist/utilities/safetensors.js +2 -2
  134. package/dist/utilities/weights.js +2 -2
  135. package/dist/{variable-wSS22xj5.js → variable-DXEUOwew.js} +1 -1
  136. package/dist/webgpu_util-g13LvDIv.js +625 -0
  137. package/dist/{zeros-YJDE7oRb.js → zeros-DCPCdFGq.js} +8 -8
  138. package/package.json +2 -1
  139. package/dist/gpgpu_math-CNslybmD.js +0 -3115
  140. package/dist/norm-CzltS9Fz.js +0 -86
@@ -1,4 +1,4 @@
1
- import { o, q as t, E as c, a0 as a, a1 as e } from "./index-BAzbokzv.js";
1
+ import { x as o, y as t, E as c, a3 as a, a4 as x } from "./index-C0dhsYom.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -15,11 +15,11 @@ import { o, q as t, E as c, a0 as a, a1 as e } from "./index-BAzbokzv.js";
15
15
  * limitations under the License.
16
16
  * =============================================================================
17
17
  */
18
- function i(n) {
18
+ function e(n) {
19
19
  const s = { x: t(n, "x", "cos", "float32") };
20
20
  return c.runKernel(a, s);
21
21
  }
22
- const f = /* @__PURE__ */ o({ cos_: i });
22
+ const f = /* @__PURE__ */ o({ cos_: e });
23
23
  /**
24
24
  * @license
25
25
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -36,11 +36,11 @@ const f = /* @__PURE__ */ o({ cos_: i });
36
36
  * limitations under the License.
37
37
  * =============================================================================
38
38
  */
39
- function x(n) {
39
+ function i(n) {
40
40
  const s = { x: t(n, "x", "sin", "float32") };
41
- return c.runKernel(e, s);
41
+ return c.runKernel(x, s);
42
42
  }
43
- const p = /* @__PURE__ */ o({ sin_: x });
43
+ const p = /* @__PURE__ */ o({ sin_: i });
44
44
  export {
45
45
  f as c,
46
46
  p as s
@@ -1,4 +1,4 @@
1
- import { o as r, q as f, E as e, S as i } from "./index-BAzbokzv.js";
1
+ import { x as r, y as f, E as e, S as i } from "./index-C0dhsYom.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -22,7 +22,7 @@ function l(s, o = -1) {
22
22
  const n = { logits: t }, a = { dim: o };
23
23
  return e.runKernel(i, n, a);
24
24
  }
25
- const p = /* @__PURE__ */ r({ softmax_: l });
25
+ const c = /* @__PURE__ */ r({ softmax_: l });
26
26
  export {
27
- p as s
27
+ c as s
28
28
  };
@@ -1,4 +1,4 @@
1
- import { o as p, q as i, E as a, F as c } from "./index-BAzbokzv.js";
1
+ import { x as p, y as i, E as a, J as c } from "./index-C0dhsYom.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -15,9 +15,9 @@ import { o as p, q as i, E as a, F as c } from "./index-BAzbokzv.js";
15
15
  * limitations under the License.
16
16
  * =============================================================================
17
17
  */
18
- function e(t, s, o = 0) {
19
- const n = { x: i(t, "x", "split") }, r = { numOrSizeSplits: s, axis: o };
20
- return a.runKernel(c, n, r);
18
+ function e(t, s, n = 0) {
19
+ const o = { x: i(t, "x", "split") }, r = { numOrSizeSplits: s, axis: n };
20
+ return a.runKernel(c, o, r);
21
21
  }
22
22
  const u = /* @__PURE__ */ p({ split_: e });
23
23
  export {
@@ -1,4 +1,4 @@
1
- import { o as e, w as c, i as n, E as i, P as k } from "./index-BAzbokzv.js";
1
+ import { x as e, B as c, l as n, E as k, P as i } from "./index-C0dhsYom.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -19,7 +19,7 @@ function u(r, t = 0) {
19
19
  const s = c(r, "tensors", "stack", "string_or_numeric");
20
20
  n(s.length >= 1, () => "Pass at least one tensor to tf.stack"), s.length > 0 && n(t <= s[0].rank, () => "Axis must be <= rank of the tensor");
21
21
  const o = s, a = { axis: t };
22
- return i.runKernel(k, o, a);
22
+ return k.runKernel(i, o, a);
23
23
  }
24
24
  const l = /* @__PURE__ */ e({ stack_: u });
25
25
  export {
@@ -1,4 +1,4 @@
1
- import { o as e, q as u, L as c, E as l, N as m } from "./index-BAzbokzv.js";
1
+ import { x as e, y as u, Q as c, E as l, U as m } from "./index-C0dhsYom.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -15,13 +15,13 @@ import { o as e, q as u, L as c, E as l, N as m } from "./index-BAzbokzv.js";
15
15
  * limitations under the License.
16
16
  * =============================================================================
17
17
  */
18
- function i(t, o = null, n = !1) {
18
+ function i(t, n = null, o = !1) {
19
19
  let s = u(t, "x", "sum");
20
20
  s.dtype === "bool" && (s = c(s, "int32"));
21
- const r = { x: s }, a = { axis: o, keepDims: n };
21
+ const r = { x: s }, a = { axis: n, keepDims: o };
22
22
  return l.runKernel(m, r, a);
23
23
  }
24
- const f = /* @__PURE__ */ e({ sum_: i });
24
+ const x = /* @__PURE__ */ e({ sum_: i });
25
25
  export {
26
- f as s
26
+ x as s
27
27
  };
@@ -1,4 +1,4 @@
1
- import { O as t, Q as a } from "./index-BAzbokzv.js";
1
+ import { V as t, W as a } from "./index-C0dhsYom.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -1,4 +1,4 @@
1
- import { V as t, O as s, Q as a } from "./index-BAzbokzv.js";
1
+ import { X as t, V as s, W as a } from "./index-C0dhsYom.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -1,4 +1,4 @@
1
- import { A as r, a as c, s as h, b as g, e as o } from "../index-BAzbokzv.js";
1
+ import { A as r, a as c, b as h, c as g, e as o } from "../index-C0dhsYom.js";
2
2
  class u extends r {
3
3
  constructor(t, e, s, a, i) {
4
4
  super(t, e, s, a), this.config = i, this.startLearningRate = t;
@@ -1,5 +1,5 @@
1
- import { t as u } from "../index-BAzbokzv.js";
2
- import { d as z, i as f } from "../dataset-pgqp-YfL.js";
1
+ import { t as u } from "../index-C0dhsYom.js";
2
+ import { d as z, i as f } from "../dataset-JDyjG3QR.js";
3
3
  import "../index-Tf7vU29b.js";
4
4
  /**
5
5
  * @license
@@ -11,7 +11,7 @@ class p {
11
11
  if (e.done) break;
12
12
  const c = e.value, { xs: r, ys: i } = c, [u, l] = this.model.forward({ training: !1 }, r, i);
13
13
  u.dispose(), r.dispose(), i.dispose();
14
- const d = l.arraySync();
14
+ const d = await l.array();
15
15
  l.dispose(), t += d, o++;
16
16
  }
17
17
  return t / o;
@@ -1,7 +1,7 @@
1
1
  import { generateText as T } from "../utilities/generate.js";
2
2
  import L from "./Trainer.js";
3
3
  import x from "./Evaluator.js";
4
- import { d as h } from "../index-BAzbokzv.js";
4
+ import { d as h } from "../index-C0dhsYom.js";
5
5
  import y from "../utilities/profile.js";
6
6
  const D = {
7
7
  desiredLoss: 0.01,
@@ -9,12 +9,12 @@ const D = {
9
9
  maxSteps: 1e3
10
10
  };
11
11
  class I extends L {
12
- constructor(i, e, o = 3e-4) {
13
- super(i, e, o);
12
+ constructor(i, e, r = 3e-4) {
13
+ super(i, e, r);
14
14
  }
15
15
  // Train for multiple epochs using Dataset API - FIXED memory leaks
16
- async trainOnDataset(i, e, o) {
17
- const { desiredLoss: p, logInterval: g, onStep: l, prompt: c, maxSteps: u } = {
16
+ async trainOnDataset(i, e, r) {
17
+ const { desiredLoss: p, logInterval: g, onStep: l, prompt: m, maxSteps: u } = {
18
18
  ...D,
19
19
  ...e
20
20
  }, n = Date.now(), t = {
@@ -27,33 +27,33 @@ class I extends L {
27
27
  trainingDuration: 0,
28
28
  ...this.lastState || {}
29
29
  };
30
- this.lastState = t, this.dummyPass(), this.model.trainable = !0, e?.advancedMetrics && (this.model.getProfiler() || (this.model.config.layerConfig.profiler = new y())), this.running = !0, t.logStartTime = n;
31
- const m = o ? new x(this.model, o) : void 0, f = await i.iterator();
30
+ this.lastState = t, await this.dummyPass(), this.model.trainable = !0, e?.advancedMetrics && (this.model.getProfiler() || (this.model.config.layerConfig.profiler = new y())), this.running = !0, t.logStartTime = n;
31
+ const c = r ? new x(this.model, r) : void 0, f = await i.iterator();
32
32
  try {
33
33
  for (; this.running && !(t.lastLoss < p); ) {
34
- const r = await f.next();
35
- if (r.done) break;
36
- const d = r.value, v = this.trainBatch(t, d, e.advancedMetrics || !1), s = {
34
+ const o = await f.next();
35
+ if (o.done) break;
36
+ const d = o.value, S = this.trainBatch(t, d), s = {
37
37
  loss: t.lastLoss,
38
38
  step: t.step,
39
39
  time: Date.now() - n,
40
40
  batchSize: d.xs.shape[0],
41
- learningRate: e?.advancedMetrics ? this.optimizer.lr : void 0,
42
- gradientNorm: e?.advancedMetrics ? t.gradientNorm : void 0
41
+ learningRate: e?.advancedMetrics ? this.optimizer.lr : void 0
42
+ //gradientNorm: options?.advancedMetrics ? await state.gradientNorm : undefined,
43
43
  };
44
44
  if (this.model.log.push(s), t.step % g === 0) {
45
- await v;
46
- const S = Date.now();
47
- if (t.trainingDuration += S - t.logStartTime, m)
45
+ await S;
46
+ const v = Date.now();
47
+ if (t.trainingDuration += v - t.logStartTime, c)
48
48
  try {
49
- const a = await m.evaluate(5);
49
+ const a = await c.evaluate(5);
50
50
  t.validationLosses.push(a), s.valLoss = a;
51
51
  } catch (a) {
52
52
  console.error("Validation error:", a);
53
53
  }
54
54
  if (l) {
55
- if (c) {
56
- const w = await T(this.tokenizer, this.model, c, 100, {
55
+ if (m) {
56
+ const w = await T(this.tokenizer, this.model, m, 100, {
57
57
  temperature: 0.8
58
58
  });
59
59
  s.example = w;
@@ -70,8 +70,8 @@ class I extends L {
70
70
  }
71
71
  t.step >= u && this.stop();
72
72
  }
73
- } catch (r) {
74
- throw console.error("Training error:", r), h(), r;
73
+ } catch (o) {
74
+ throw console.error("Training error:", o), h(), o;
75
75
  }
76
76
  return h(), this.running = !1, { losses: t.losses, validationLosses: t.validationLosses };
77
77
  }
@@ -11,7 +11,7 @@ export interface TrainingState {
11
11
  totalSteps: number;
12
12
  losses: number[];
13
13
  validationLosses: number[];
14
- gradientNorm?: number;
14
+ gradientNorm?: Promise<number>;
15
15
  }
16
16
  export interface TrainingProgress {
17
17
  duration: number;
@@ -47,16 +47,15 @@ export default abstract class GPTTrainer {
47
47
  stop(): void;
48
48
  getOptimizer(): AdamExt;
49
49
  resetOptimizer(config?: AdamConfig): void;
50
- private maxGradNorm;
51
- protected trainStep(state: Partial<TrainingState>, batch: {
50
+ protected trainStep(_state: Partial<TrainingState>, batch: {
52
51
  xs: Tensor;
53
52
  ys: Tensor;
54
- }, dummy?: boolean, calcNorm?: boolean): Scalar;
55
- protected dummyPass(): void;
53
+ }, dummy?: boolean): Scalar;
54
+ protected dummyPass(): Promise<void>;
56
55
  protected trainBatch(state: TrainingState, batch: {
57
56
  xs: Tensor;
58
57
  ys: Tensor;
59
- }, calcNorm?: boolean): Promise<number>;
58
+ }): Promise<number>;
60
59
  abstract trainOnDataset(dataset: Dataset<{
61
60
  xs: Tensor;
62
61
  ys: Tensor;
@@ -1,11 +1,10 @@
1
- import { DatasetBuilder as g, flattenTokens as m, PAGE_FACTOR as u } from "./DatasetBuilder.js";
2
- import f from "./AdamExt.js";
3
- import { t as y, v as z, d as c } from "../index-BAzbokzv.js";
4
- import { n as S } from "../norm-CzltS9Fz.js";
5
- import { z as p } from "../zeros-YJDE7oRb.js";
6
- class R {
7
- constructor(t, e, s = 1e-3) {
8
- this.tokenizer = e, this.model = t, this.learningRate = s, this.resetOptimizer(), this.datasetBuilder = new g(e, t.config.gpt.blockSize);
1
+ import { DatasetBuilder as m, flattenTokens as p, PAGE_FACTOR as u } from "./DatasetBuilder.js";
2
+ import g from "./AdamExt.js";
3
+ import { t as f, v as y, d as c } from "../index-C0dhsYom.js";
4
+ import { z as h } from "../zeros-DCPCdFGq.js";
5
+ class x {
6
+ constructor(t, s, e = 1e-3) {
7
+ this.tokenizer = s, this.model = t, this.learningRate = e, this.resetOptimizer(), this.datasetBuilder = new m(s, t.config.gpt.blockSize);
9
8
  }
10
9
  model;
11
10
  optimizer;
@@ -27,7 +26,7 @@ class R {
27
26
  }
28
27
  resetOptimizer(t = { learningRateFactor: 1, beta1: 0.9, beta2: 0.99, epsilon: 1e-8 }) {
29
28
  this.optimizer && this.optimizer.dispose();
30
- const e = new f(
29
+ const s = new g(
31
30
  t.learningRateFactor * this.learningRate,
32
31
  t.beta1,
33
32
  t.beta2,
@@ -39,78 +38,78 @@ class R {
39
38
  weightDecay: 0
40
39
  }
41
40
  );
42
- this.optimizer = e;
41
+ this.optimizer = s;
43
42
  }
44
- maxGradNorm(t) {
45
- let e = 0;
46
- return Object.keys(t).forEach((s) => {
47
- const a = t[s], r = S(a), i = r.dataSync()[0];
48
- r.dispose(), i > e && (e = i);
49
- }), e;
50
- }
51
- trainStep(t, e, s = !1, a = !1) {
52
- return y(() => {
43
+ /*private async maxGradNorm(grads: NamedVariableMap): Promise<number> {
44
+ let maxNorm = 0;
45
+ // Print all gradients
46
+ await Promise.all(
47
+ Object.keys(grads).map(async (varName) => {
48
+ const grad = grads[varName];
49
+ const temp = norm(grad);
50
+ const gradNorm = (await temp.data())[0];
51
+ temp.dispose();
52
+ if (gradNorm > maxNorm) {
53
+ maxNorm = gradNorm;
54
+ }
55
+ })
56
+ );
57
+ return maxNorm;
58
+ }*/
59
+ trainStep(t, s, e = !1) {
60
+ return f(() => {
53
61
  this.model.getProfiler()?.startMemory();
54
- const { xs: r, ys: i } = e, d = () => {
55
- const [n, h] = this.model.forward({ training: !0 }, r, i);
56
- return n.dispose(), h;
57
- }, { value: l, grads: o } = z(d);
58
- if (s)
59
- this.model.getProfiler()?.endMemory("Training");
60
- else {
61
- if (a) {
62
- const n = this.maxGradNorm(o);
63
- t.gradientNorm = n;
64
- }
65
- this.optimizer.applyGradients(o), this.model.getProfiler()?.endMemory("Training"), c(o);
66
- }
67
- return l;
62
+ const { xs: a, ys: i } = s, o = () => {
63
+ const [l, d] = this.model.forward({ training: !0 }, a, i);
64
+ return l.dispose(), d;
65
+ }, { value: n, grads: r } = y(o);
66
+ return e ? this.model.getProfiler()?.endMemory("Training") : (this.optimizer.applyGradients(r), this.model.getProfiler()?.endMemory("Training"), c(r)), n;
68
67
  });
69
68
  }
70
- dummyPass() {
71
- const t = p([1, this.model.config.gpt.blockSize], "int32"), e = p([1, this.model.config.gpt.blockSize], "int32");
69
+ async dummyPass() {
70
+ const t = h([1, this.model.config.gpt.blockSize], "int32"), s = h([1, this.model.config.gpt.blockSize], "int32");
72
71
  try {
73
- const s = this.trainStep({}, { xs: t, ys: e }, !0);
74
- s.dataSync(), s.dispose();
75
- } catch (s) {
76
- console.error("Error during dummy pass:", s);
72
+ const e = this.trainStep({}, { xs: t, ys: s }, !0);
73
+ await e.data(), e.dispose();
74
+ } catch (e) {
75
+ console.error("Error during dummy pass:", e);
77
76
  } finally {
78
- t.dispose(), e.dispose();
77
+ t.dispose(), s.dispose();
79
78
  }
80
79
  }
81
- async trainBatch(t, e, s = !1) {
80
+ async trainBatch(t, s) {
82
81
  try {
83
- const a = this.trainStep(t, e, !1, s);
84
- return e.xs.dispose(), e.ys.dispose(), t.step++, t.totalSteps++, a.array().then((r) => (t.lastLoss = r, t.losses.push(t.lastLoss), a.dispose(), t.lastLoss));
85
- } catch (a) {
86
- throw console.error(`Error processing batch at step ${t.step}:`, a), c(), a;
82
+ const e = this.trainStep(t, s, !1);
83
+ return s.xs.dispose(), s.ys.dispose(), t.step++, t.totalSteps++, e.array().then((a) => (t.lastLoss = a, t.losses.push(t.lastLoss), e.dispose(), t.lastLoss));
84
+ } catch (e) {
85
+ throw console.error(`Error processing batch at step ${t.step}:`, e), c(), e;
87
86
  }
88
87
  }
89
- async createTrainValidationSplit(t, e = 32, s = 0.1) {
90
- const a = await m(t, this.tokenizer), r = /* @__PURE__ */ new Set();
91
- if (s > 0) {
92
- const l = Math.floor(a.length / (this.datasetBuilder.blockSize * u)), o = Math.max(1, Math.floor(l * s));
93
- for (; r.size < o; ) {
94
- const n = Math.floor(Math.random() * l);
95
- r.add(n);
88
+ async createTrainValidationSplit(t, s = 32, e = 0.1) {
89
+ const a = await p(t, this.tokenizer), i = /* @__PURE__ */ new Set();
90
+ if (e > 0) {
91
+ const r = Math.floor(a.length / (this.datasetBuilder.blockSize * u)), l = Math.max(1, Math.floor(r * e));
92
+ for (; i.size < l; ) {
93
+ const d = Math.floor(Math.random() * r);
94
+ i.add(d);
96
95
  }
97
96
  }
98
- const i = await this.datasetBuilder.createTextDataset(a, e, r, !1), d = await this.datasetBuilder.createTextDataset(
97
+ const o = await this.datasetBuilder.createTextDataset(a, s, i, !1), n = await this.datasetBuilder.createTextDataset(
99
98
  a,
100
- e,
101
- r,
99
+ s,
100
+ i,
102
101
  !0
103
102
  );
104
- return { trainDataset: i, validationDataset: d };
103
+ return { trainDataset: o, validationDataset: n };
105
104
  }
106
- async createDataset(t, e = 32) {
107
- const s = await m(t, this.tokenizer);
108
- return await this.datasetBuilder.createTextDataset(s, e);
105
+ async createDataset(t, s = 32) {
106
+ const e = await p(t, this.tokenizer);
107
+ return await this.datasetBuilder.createTextDataset(e, s);
109
108
  }
110
109
  dispose() {
111
110
  this.optimizer && this.optimizer.dispose();
112
111
  }
113
112
  }
114
113
  export {
115
- R as default
114
+ x as default
116
115
  };
@@ -1,9 +1,9 @@
1
1
  import { gatherSub as x } from "../ops/gatherSub.js";
2
2
  import { scatterSub as L } from "../ops/scatterSub.js";
3
- import { l as C, t as u, z as E, b as G } from "../index-BAzbokzv.js";
4
- import { s as y } from "../softmax-fsdtf6JC.js";
5
- import { m as z } from "../max-DtlIuVeW.js";
6
- import { l as v } from "../log_sum_exp-YEo2h3gb.js";
3
+ import { q as C, t as u, z as E, c as G } from "../index-C0dhsYom.js";
4
+ import { s as y } from "../softmax-GPNK3o-U.js";
5
+ import { m as z } from "../max-BQc2Aj-I.js";
6
+ import { l as v } from "../log_sum_exp-D086OgZJ.js";
7
7
  function k(t, s) {
8
8
  return u(() => {
9
9
  const n = t.shape[t.shape.length - 1], c = t.shape.slice(0, -1).reduce((o, e) => o * e, 1), h = t.shape.length > 2 ? t.reshape([c, n]) : t, p = s.shape.length > 1 ? s.reshape([c]).cast("int32") : s.cast("int32"), r = z(h, -1, !0), a = G(h, r), m = v(a, -1);
@@ -1,35 +1,35 @@
1
- import { m as f, v as S, e as w } from "../index-BAzbokzv.js";
2
- import { z as i } from "../zeros-YJDE7oRb.js";
3
- async function P(s) {
1
+ import { m as y, v as P, e as S } from "../index-C0dhsYom.js";
2
+ import { z as i } from "../zeros-DCPCdFGq.js";
3
+ async function w(s) {
4
4
  const t = i([1, s.config.gpt.blockSize], "int32"), [e, n] = s.forward({ training: !1 }, t);
5
5
  await e.data(), e.dispose(), n && n.dispose(), t.dispose();
6
6
  }
7
- async function v(s) {
8
- const t = f(), e = t.numBytesInGPUAllocated ?? t.numBytes;
9
- await P(s);
7
+ async function k(s) {
8
+ const t = y(), e = t.numBytesInGPUAllocated ?? t.numBytesAllocatedInGPU ?? t.numBytes;
9
+ await w(s);
10
10
  const n = i([1, s.config.gpt.blockSize], "int32"), r = i([1, s.config.gpt.blockSize], "int32"), o = {
11
11
  perBatch: 0,
12
12
  tapeSize: 0,
13
13
  gradients: s.getNumParams() * 4
14
- }, g = () => {
15
- const [a, l] = s.forward({ training: !0 }, n, r), d = w().state.activeTape;
14
+ }, f = () => {
15
+ const [c, l] = s.forward({ training: !0 }, n, r), p = S().state.activeTape;
16
16
  let u = 0;
17
- if (d)
18
- for (const z of d)
17
+ if (p)
18
+ for (const z of p)
19
19
  u += z.saved?.reduce((B, I) => B + I.size * 4, 0) || 0;
20
- return o.tapeSize = u, a.dispose(), l;
21
- }, { value: c, grads: m } = S(g), p = f(), y = p.numBytesInGPUAllocated ?? p.numBytes;
22
- o.perBatch = y - e - o.gradients, console.log("Dummy training memory requirements:", o), await c.data(), c.dispose();
23
- for (const a in m)
24
- m[a].dispose();
20
+ return o.tapeSize = u, c.dispose(), l;
21
+ }, { value: m, grads: d } = P(f), a = y(), g = a.numBytesInGPUAllocated ?? a.numBytesAllocatedInGPU ?? a.numBytes;
22
+ o.perBatch = g - e - o.gradients, console.log("Dummy training memory requirements:", o), await m.data(), m.dispose();
23
+ for (const c in d)
24
+ d[c].dispose();
25
25
  return n.dispose(), r.dispose(), o;
26
26
  }
27
- function A(s) {
27
+ function v(s) {
28
28
  const t = i([1, s.config.gpt.blockSize], "int32"), [e, n] = s.forward({ training: !1 }, t);
29
29
  e.dispose(), n && n.dispose(), t.dispose();
30
30
  }
31
31
  export {
32
- A as dummyPass,
33
- P as dummyPassAsync,
34
- v as dummyPassTrainAsync
32
+ v as dummyPass,
33
+ w as dummyPassAsync,
34
+ k as dummyPassTrainAsync
35
35
  };
@@ -1,23 +1,22 @@
1
- import { t as y } from "../index-BAzbokzv.js";
2
- import { t as x } from "../tensor2d-CPEkynbH.js";
3
- import { c as f } from "../concat-5aPGqw3Z.js";
4
- async function A(o, r, a, c, T) {
1
+ import "../index-C0dhsYom.js";
2
+ import { t as m } from "../tensor2d-B_kyod7_.js";
3
+ import { c as u } from "../concat-CsHeR4zV.js";
4
+ async function v(o, r, a, c, f) {
5
5
  if (c <= 0)
6
6
  throw new Error("Length must be a positive integer");
7
7
  if (a.length === 0)
8
8
  throw new Error("Prompt cannot be an empty string");
9
- const p = await o.tokenise([a], !0), s = r.config.gpt.useRope ? new Array(r.config.gpt.nLayer).fill(void 0) : void 0, d = y(() => {
10
- let t = x(p, [1, p[0].length], "int32"), e = t;
11
- for (let g = 0; g < c; g++) {
12
- const { output: n } = r.generate(t, s, T), l = t, m = e;
13
- e = f([e, n], 1), t = s ? n : f([t, n], 1), l.dispose(), m.dispose(), s || n.dispose();
14
- }
15
- return e;
16
- }), w = await d.array();
17
- d.dispose();
18
- const i = w[0], u = i.indexOf(o.eosToken);
19
- return u !== -1 && i.splice(u), await o.decode(i);
9
+ const p = await o.tokenise([a], !0), s = r.config.gpt.useRope ? new Array(r.config.gpt.nLayer).fill(void 0) : void 0;
10
+ let e = m(p, [1, p[0].length], "int32"), t = e;
11
+ for (let g = 0; g < c; g++) {
12
+ const { output: n } = await r.generate(e, s, f), T = e, l = t;
13
+ t = u([t, n], 1), e = s ? n : u([e, n], 1), T.dispose(), l.dispose(), s || n.dispose();
14
+ }
15
+ const w = await t.array();
16
+ t.dispose();
17
+ const i = w[0], d = i.indexOf(o.eosToken);
18
+ return d !== -1 && i.splice(d), await o.decode(i);
20
19
  }
21
20
  export {
22
- A as generateText
21
+ v as generateText
23
22
  };
@@ -0,0 +1,2 @@
1
+ import { Tensor2D } from '@tensorflow/tfjs-core';
2
+ export default function multinomialCPU(probs: number[]): Tensor2D;
@@ -0,0 +1,13 @@
1
+ import "../index-C0dhsYom.js";
2
+ import { t as e } from "../tensor2d-B_kyod7_.js";
3
+ function l(n) {
4
+ let r = 0;
5
+ const i = Math.random();
6
+ for (let t = 0; t < n.length; t++)
7
+ if (r += n[t], i < r)
8
+ return e([[t]], [1, 1], "int32");
9
+ return e([[n.length - 1]], [1, 1], "int32");
10
+ }
11
+ export {
12
+ l as default
13
+ };
@@ -0,0 +1,2 @@
1
+ import { Tensor } from '@tensorflow/tfjs-core';
2
+ export default function performanceTest(fn: () => Tensor, iterations?: number): Promise<number>;
@@ -0,0 +1,16 @@
1
+ import { t as r } from "../index-C0dhsYom.js";
2
+ async function d(s, o = 10) {
3
+ for (let e = 0; e < 10; e++) {
4
+ const t = s();
5
+ await t.data(), t.dispose();
6
+ }
7
+ const a = performance.now();
8
+ for (let e = 0; e < o; e++) {
9
+ const t = r(s);
10
+ e === o - 1 && await t.data(), t.dispose();
11
+ }
12
+ return (performance.now() - a) / o;
13
+ }
14
+ export {
15
+ d as default
16
+ };
@@ -1,6 +1,7 @@
1
1
  import { MemoryInfo } from '@tensorflow/tfjs-core';
2
2
  export interface ExtendedMemoryInfo extends MemoryInfo {
3
3
  numBytesInGPUAllocated?: number;
4
+ numBytesAllocatedInGPU?: number;
4
5
  }
5
6
  export default class MemoryProfiler {
6
7
  private log;
@@ -1,5 +1,5 @@
1
- import { m as a } from "../index-BAzbokzv.js";
2
- const m = 1024 * 1024;
1
+ import { m as a } from "../index-C0dhsYom.js";
2
+ const s = 1024 * 1024;
3
3
  class l {
4
4
  log = /* @__PURE__ */ new Map();
5
5
  maxMemory = 0;
@@ -20,14 +20,17 @@ class l {
20
20
  console.warn("MemoryProfiler: endMemory called without matching startMemory");
21
21
  return;
22
22
  }
23
- const e = a(), s = this.lastMemInfo.pop(), t = (e.numBytesInGPUAllocated ?? e.numBytes) - (s?.numBytes ?? s?.numBytesInGPUAllocated ?? 0);
24
- this.log.set(o, Math.max(this.log.get(o) || 0, t)), t > this.maxMemory && (this.maxMemory = t, this.maxLabel = o), this.peakMemory = Math.max(this.peakMemory, e.numBytesInGPUAllocated ?? e.numBytes);
23
+ const e = a(), t = this.lastMemInfo.pop(), m = (e.numBytesInGPUAllocated ?? e.numBytesAllocatedInGPU ?? e.numBytes) - (t?.numBytesInGPUAllocated ?? t?.numBytesAllocatedInGPU ?? t?.numBytes ?? 0);
24
+ this.log.set(o, Math.max(this.log.get(o) || 0, m)), m > this.maxMemory && (this.maxMemory = m, this.maxLabel = o), this.peakMemory = Math.max(
25
+ this.peakMemory,
26
+ e.numBytesInGPUAllocated ?? e.numBytesAllocatedInGPU ?? e.numBytes
27
+ );
25
28
  }
26
29
  printSummary() {
27
30
  console.log("Memory Usage Summary:");
28
31
  for (const [o, e] of this.log.entries())
29
- console.log(`- ${o}: ${(e / m).toFixed(2)} MB`);
30
- this.maxLabel && console.log(`Peak Memory Usage: ${(this.maxMemory / m).toFixed(2)} MB at "${this.maxLabel}"`), console.log(`Overall Peak Memory Usage: ${(this.peakMemory / m).toFixed(2)} MB`);
32
+ console.log(`- ${o}: ${(e / s).toFixed(2)} MB`);
33
+ this.maxLabel && console.log(`Peak Memory Usage: ${(this.maxMemory / s).toFixed(2)} MB at "${this.maxLabel}"`), console.log(`Overall Peak Memory Usage: ${(this.peakMemory / s).toFixed(2)} MB`);
31
34
  }
32
35
  }
33
36
  export {
@@ -1,5 +1,5 @@
1
- import "../index-BAzbokzv.js";
2
- import { t as y } from "../tensor-Xyi595sG.js";
1
+ import "../index-C0dhsYom.js";
2
+ import { t as y } from "../tensor-RvZVNmg0.js";
3
3
  function l(t) {
4
4
  if (t === "float32") return "F32";
5
5
  if (t === "int32") return "I32";
@@ -1,5 +1,5 @@
1
- import "../index-BAzbokzv.js";
2
- import { t as p } from "../tensor-Xyi595sG.js";
1
+ import "../index-C0dhsYom.js";
2
+ import { t as p } from "../tensor-RvZVNmg0.js";
3
3
  function h(n) {
4
4
  const e = n.reduce((s, o) => s + o.length, 0), a = new Float32Array(e);
5
5
  let t = 0;