@genai-fi/nanogpt 0.7.0 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. package/dist/Generator.js +13 -9
  2. package/dist/NanoGPTModel.js +10 -10
  3. package/dist/{RealDiv-C4hOvYOZ.js → RealDiv-Dy0p8Bvo.js} +11 -11
  4. package/dist/{Reshape-BLijOA8h.js → Reshape-DH5srBP0.js} +2 -2
  5. package/dist/Reshape-DvudQDvJ.js +30 -0
  6. package/dist/TeachableLLM.js +9 -5
  7. package/dist/{TiedEmbedding-BLltddza.js → TiedEmbedding-BxOerUmB.js} +4 -4
  8. package/dist/{axis_util-DaAl5MER.js → axis_util-BzbKo31C.js} +1 -1
  9. package/dist/backend.js +2 -2
  10. package/dist/{backend_util-DWiwsi2N.js → backend_util-TE7aTPhZ.js} +40 -40
  11. package/dist/{broadcast_to-C4v-j9yA.js → broadcast_to-CdbwV-Dj.js} +2 -2
  12. package/dist/{concat-CsHeR4zV.js → concat-CsxrgovM.js} +1 -1
  13. package/dist/{dataset-JDyjG3QR.js → dataset-CtdBYwjo.js} +7 -7
  14. package/dist/{dropout-hpDwECTe.js → dropout-DYs5QFGQ.js} +11 -11
  15. package/dist/{gather-D0_gPiBz.js → gather-CMMy2KEG.js} +4 -4
  16. package/dist/{gelu-uyHP1x1f.js → gelu-C-dPj6Ku.js} +1 -1
  17. package/dist/{gpgpu_math-DJm3ZTAf.js → gpgpu_math-DGNLNL4I.js} +2 -2
  18. package/dist/{index-C0dhsYom.js → index-BoWRt-10.js} +126 -126
  19. package/dist/{index-BPPzKVdR.js → index-CLthM0TO.js} +1083 -1106
  20. package/dist/{kernel_funcs_utils-CwRTFqrc.js → kernel_funcs_utils-BYKWV8Aa.js} +3 -3
  21. package/dist/layers/BaseLayer.js +2 -2
  22. package/dist/layers/CausalSelfAttention.js +8 -8
  23. package/dist/layers/MLP.js +5 -5
  24. package/dist/layers/RMSNorm.js +3 -3
  25. package/dist/layers/RoPECache.js +4 -4
  26. package/dist/layers/TiedEmbedding.js +5 -5
  27. package/dist/layers/TransformerBlock.js +1 -1
  28. package/dist/loader/loadTransformers.js +1 -1
  29. package/dist/loader/oldZipLoad.js +11 -7
  30. package/dist/{log_sum_exp-D086OgZJ.js → log_sum_exp-DbjkV734.js} +8 -8
  31. package/dist/main.d.ts +11 -0
  32. package/dist/main.js +44 -27
  33. package/dist/{mat_mul-1nwdPkQ_.js → mat_mul-8m8pfdcx.js} +1 -1
  34. package/dist/{max-BQc2Aj-I.js → max-Ddnnb5xe.js} +3 -3
  35. package/dist/{mulmat_packed_gpu-Gzf3I9UV.js → mulmat_packed_gpu-VSekgsNv.js} +1 -1
  36. package/dist/{ones-D63HpSF_.js → ones-Dj0SDhHf.js} +2 -2
  37. package/dist/ops/adamAdjust.d.ts +2 -0
  38. package/dist/ops/adamAdjust.js +9 -0
  39. package/dist/ops/adamMoments.d.ts +2 -0
  40. package/dist/ops/adamMoments.js +9 -0
  41. package/dist/ops/appendCache.js +3 -3
  42. package/dist/ops/attentionMask.js +1 -1
  43. package/dist/ops/cpu/adamAdjust.d.ts +1 -0
  44. package/dist/ops/cpu/adamAdjust.js +18 -0
  45. package/dist/ops/cpu/adamMoments.d.ts +1 -0
  46. package/dist/ops/cpu/adamMoments.js +16 -0
  47. package/dist/ops/cpu/appendCache.js +2 -2
  48. package/dist/ops/cpu/attentionMask.js +5 -5
  49. package/dist/ops/cpu/fusedSoftmax.js +2 -2
  50. package/dist/ops/cpu/gatherSub.js +3 -3
  51. package/dist/ops/cpu/gelu.js +1 -1
  52. package/dist/ops/cpu/matMulGelu.js +2 -2
  53. package/dist/ops/cpu/matMulMul.js +1 -1
  54. package/dist/ops/cpu/mulDropout.js +1 -1
  55. package/dist/ops/cpu/normRMS.js +1 -1
  56. package/dist/ops/cpu/qkv.js +3 -3
  57. package/dist/ops/cpu/rope.js +5 -5
  58. package/dist/ops/cpu/scatterSub.js +11 -11
  59. package/dist/ops/fusedSoftmax.js +1 -1
  60. package/dist/ops/gatherSub.js +1 -1
  61. package/dist/ops/gelu.js +2 -2
  62. package/dist/ops/grads/attentionMask.js +1 -1
  63. package/dist/ops/grads/fusedSoftmax.js +2 -2
  64. package/dist/ops/grads/gelu.js +2 -2
  65. package/dist/ops/grads/matMulGelu.js +1 -1
  66. package/dist/ops/grads/normRMS.js +1 -1
  67. package/dist/ops/grads/qkv.js +1 -1
  68. package/dist/ops/grads/rope.js +1 -1
  69. package/dist/ops/matMulGelu.js +1 -1
  70. package/dist/ops/matMulMul.js +1 -1
  71. package/dist/ops/mulDrop.js +1 -1
  72. package/dist/ops/normRMS.js +1 -1
  73. package/dist/ops/qkv.js +1 -1
  74. package/dist/ops/rope.js +4 -4
  75. package/dist/ops/scatterSub.js +1 -1
  76. package/dist/ops/webgl/adamAdjust.d.ts +1 -0
  77. package/dist/ops/webgl/adamAdjust.js +50 -0
  78. package/dist/ops/webgl/adamMoments.d.ts +1 -0
  79. package/dist/ops/webgl/adamMoments.js +40 -0
  80. package/dist/ops/webgl/appendCache.js +1 -1
  81. package/dist/ops/webgl/attentionMask.js +1 -1
  82. package/dist/ops/webgl/fusedSoftmax.js +4 -4
  83. package/dist/ops/webgl/gatherSub.js +8 -8
  84. package/dist/ops/webgl/gelu.js +2 -2
  85. package/dist/ops/webgl/log.js +3 -3
  86. package/dist/ops/webgl/matMulGelu.js +4 -4
  87. package/dist/ops/webgl/matMulMul.js +1 -1
  88. package/dist/ops/webgl/mulDropout.js +1 -1
  89. package/dist/ops/webgl/normRMS.js +2 -2
  90. package/dist/ops/webgl/qkv.js +1 -1
  91. package/dist/ops/webgl/rope.js +1 -1
  92. package/dist/ops/webgl/scatterSub.js +1 -1
  93. package/dist/ops/webgpu/adamAdjust.d.ts +1 -0
  94. package/dist/ops/webgpu/adamAdjust.js +54 -0
  95. package/dist/ops/webgpu/adamMoments.d.ts +1 -0
  96. package/dist/ops/webgpu/adamMoments.js +58 -0
  97. package/dist/ops/webgpu/appendCache.js +22 -18
  98. package/dist/ops/webgpu/attentionMask.js +24 -17
  99. package/dist/ops/webgpu/gatherSub.js +17 -15
  100. package/dist/ops/webgpu/gelu.js +7 -6
  101. package/dist/ops/webgpu/index.js +3 -0
  102. package/dist/ops/webgpu/normRMS.js +35 -101
  103. package/dist/ops/webgpu/normRMSGrad.d.ts +1 -0
  104. package/dist/ops/webgpu/normRMSGrad.js +133 -0
  105. package/dist/ops/webgpu/qkv.js +21 -16
  106. package/dist/ops/webgpu/rope.js +37 -23
  107. package/dist/ops/webgpu/scatterSub.js +16 -13
  108. package/dist/ops/webgpu/utils/reductions.d.ts +9 -0
  109. package/dist/ops/webgpu/utils/reductions.js +68 -0
  110. package/dist/{ops-CIQLNshk.js → ops-BFGCx8Ri.js} +195 -219
  111. package/dist/{random_width-DkYP8W8N.js → random_width-sZORGo5k.js} +22 -21
  112. package/dist/{range-CYzpQY53.js → range-CRuAh-gd.js} +1 -1
  113. package/dist/{reciprocal-_A9yv27J.js → reciprocal-BvGAyKyu.js} +1 -1
  114. package/dist/{register_all_kernels-guvSxp7M.js → register_all_kernels-BwDSRN-f.js} +30 -29
  115. package/dist/{reshape-BMUzc1UY.js → reshape-CdBq1WJ6.js} +3 -3
  116. package/dist/{scatter_nd_util-IRBqKz_b.js → scatter_nd_util-DUstGbU1.js} +1 -1
  117. package/dist/{selu_util-Dt_iuXaq.js → selu_util-BJEXVvjX.js} +41 -41
  118. package/dist/{shared-CDu9S76h.js → shared-B8ztnyEk.js} +6 -6
  119. package/dist/{shared-BNa2q6jD.js → shared-wS99K7_n.js} +1 -1
  120. package/dist/{sin-Cocju-BY.js → sin-BeA3tsEd.js} +6 -6
  121. package/dist/slice-BiOsknYS.js +28 -0
  122. package/dist/{softmax-GPNK3o-U.js → softmax-Bv_6lyMX.js} +3 -3
  123. package/dist/{split-CHzJjxDv.js → split-B-dikLRw.js} +1 -1
  124. package/dist/{stack-Dpgg_1W1.js → stack-B17UN2nn.js} +1 -1
  125. package/dist/{sum-B8wEpKsg.js → sum-66ew2byf.js} +3 -3
  126. package/dist/{tensor-RvZVNmg0.js → tensor-JwS7ZYY6.js} +1 -1
  127. package/dist/{tensor2d-B_kyod7_.js → tensor2d-wxPAnDQy.js} +1 -1
  128. package/dist/training/Adam.d.ts +22 -0
  129. package/dist/training/Adam.js +93 -0
  130. package/dist/training/AdamExt.d.ts +1 -1
  131. package/dist/training/AdamExt.js +13 -12
  132. package/dist/training/DatasetBuilder.js +35 -32
  133. package/dist/training/FullTrainer.js +22 -22
  134. package/dist/training/Trainer.d.ts +1 -1
  135. package/dist/training/Trainer.js +32 -32
  136. package/dist/training/sparseCrossEntropy.d.ts +0 -4
  137. package/dist/training/sparseCrossEntropy.js +7 -7
  138. package/dist/utilities/arrayClose.d.ts +1 -0
  139. package/dist/utilities/arrayClose.js +11 -0
  140. package/dist/utilities/dummy.js +2 -2
  141. package/dist/utilities/generate.js +3 -3
  142. package/dist/utilities/multinomialCPU.js +2 -2
  143. package/dist/utilities/performance.d.ts +1 -1
  144. package/dist/utilities/performance.js +11 -11
  145. package/dist/utilities/profile.js +1 -1
  146. package/dist/utilities/safetensors.js +2 -2
  147. package/dist/utilities/weights.js +2 -2
  148. package/dist/{variable-DXEUOwew.js → variable-BuddVFLa.js} +1 -1
  149. package/dist/{webgpu_util-g13LvDIv.js → webgpu_program-PFzf1hAQ.js} +138 -215
  150. package/dist/webgpu_util-D____QpY.js +80 -0
  151. package/dist/{zeros-DCPCdFGq.js → zeros--BdLQ3oG.js} +4 -4
  152. package/package.json +1 -1
@@ -1,6 +1,6 @@
1
- import { k as B, j as G, ak as K, a1 as W, al as z, am as V, ac as N, an as F, u as S } from "./index-C0dhsYom.js";
2
- import { u as O, f as Y } from "./gpgpu_math-DJm3ZTAf.js";
3
- import { f as v } from "./backend_util-DWiwsi2N.js";
1
+ import { k as B, j as G, am as K, a6 as W, an as z, ao as V, ac as N, ap as F, u as S } from "./index-BoWRt-10.js";
2
+ import { u as O, f as Y } from "./gpgpu_math-DGNLNL4I.js";
3
+ import { f as v } from "./backend_util-TE7aTPhZ.js";
4
4
  /**
5
5
  * @license
6
6
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -1,5 +1,5 @@
1
- import { T as g, q as p, e as o, w as v } from "../index-C0dhsYom.js";
2
- import { v as _ } from "../variable-DXEUOwew.js";
1
+ import { T as g, y as p, e as o, A as v } from "../index-BoWRt-10.js";
2
+ import { v as _ } from "../variable-BuddVFLa.js";
3
3
  class M {
4
4
  parent;
5
5
  config;
@@ -3,14 +3,14 @@ import O from "./BaseLayer.js";
3
3
  import { qkv as P } from "../ops/qkv.js";
4
4
  import { rope as v } from "../ops/rope.js";
5
5
  import { appendCache as V } from "../ops/appendCache.js";
6
- import { o as c, t as C } from "../index-C0dhsYom.js";
6
+ import { w as c, t as C } from "../index-BoWRt-10.js";
7
7
  import { fusedSoftmax as T } from "../ops/fusedSoftmax.js";
8
- import { d as y } from "../random_width-DkYP8W8N.js";
9
- import { v as b } from "../variable-DXEUOwew.js";
10
- import { r as k, d as L } from "../dropout-hpDwECTe.js";
11
- import { r as N } from "../reshape-BMUzc1UY.js";
12
- import { m as R } from "../mat_mul-1nwdPkQ_.js";
13
- class $ extends O {
8
+ import { d as y } from "../random_width-sZORGo5k.js";
9
+ import { v as b } from "../variable-BuddVFLa.js";
10
+ import { r as k, d as L } from "../dropout-DYs5QFGQ.js";
11
+ import { r as N } from "../reshape-CdBq1WJ6.js";
12
+ import { m as R } from "../mat_mul-8m8pfdcx.js";
13
+ class W extends O {
14
14
  divisor;
15
15
  index;
16
16
  units;
@@ -92,5 +92,5 @@ class $ extends O {
92
92
  }
93
93
  }
94
94
  export {
95
- $ as default
95
+ W as default
96
96
  };
@@ -1,10 +1,10 @@
1
- import { t as l } from "../index-C0dhsYom.js";
1
+ import { t as l } from "../index-BoWRt-10.js";
2
2
  import u from "./BaseLayer.js";
3
3
  import { matMulGelu as M } from "../ops/matMulGelu.js";
4
- import { v as o } from "../variable-DXEUOwew.js";
5
- import { r as h, d as f } from "../dropout-hpDwECTe.js";
6
- import { r as d } from "../reshape-BMUzc1UY.js";
7
- import { m as c } from "../mat_mul-1nwdPkQ_.js";
4
+ import { v as o } from "../variable-BuddVFLa.js";
5
+ import { r as h, d as f } from "../dropout-DYs5QFGQ.js";
6
+ import { r as d } from "../reshape-CdBq1WJ6.js";
7
+ import { m as c } from "../mat_mul-8m8pfdcx.js";
8
8
  class V extends u {
9
9
  index;
10
10
  hiddenUnits;
@@ -1,8 +1,8 @@
1
- import { t as s } from "../index-C0dhsYom.js";
1
+ import { t as s } from "../index-BoWRt-10.js";
2
2
  import e from "./BaseLayer.js";
3
3
  import { normRMS as a } from "../ops/normRMS.js";
4
- import { v as i } from "../variable-DXEUOwew.js";
5
- import { o as m } from "../ones-D63HpSF_.js";
4
+ import { v as i } from "../variable-BuddVFLa.js";
5
+ import { o as m } from "../ones-Dj0SDhHf.js";
6
6
  class f extends e {
7
7
  GAMMA;
8
8
  constructor(r, t = "", o) {
@@ -1,7 +1,7 @@
1
- import { b as t, n as h, t as n, o as p } from "../index-C0dhsYom.js";
2
- import { r as c } from "../reciprocal-_A9yv27J.js";
3
- import { c as f, s as m } from "../sin-Cocju-BY.js";
4
- import { r as a } from "../range-CYzpQY53.js";
1
+ import { b as t, x as h, t as n, w as p } from "../index-BoWRt-10.js";
2
+ import { r as c } from "../reciprocal-BvGAyKyu.js";
3
+ import { c as f, s as m } from "../sin-BeA3tsEd.js";
4
+ import { r as a } from "../range-CRuAh-gd.js";
5
5
  class D {
6
6
  constructor(o) {
7
7
  this.config = o;
@@ -1,9 +1,9 @@
1
- import "../random_width-DkYP8W8N.js";
2
- import "../index-C0dhsYom.js";
3
- import { T as e } from "../TiedEmbedding-BLltddza.js";
1
+ import "../random_width-sZORGo5k.js";
2
+ import "../index-BoWRt-10.js";
3
+ import { T as e } from "../TiedEmbedding-BxOerUmB.js";
4
4
  import "./BaseLayer.js";
5
- import "../variable-DXEUOwew.js";
6
- import "../gather-D0_gPiBz.js";
5
+ import "../variable-BuddVFLa.js";
6
+ import "../gather-CMMy2KEG.js";
7
7
  export {
8
8
  e as default
9
9
  };
@@ -2,7 +2,7 @@ import l from "./CausalSelfAttention.js";
2
2
  import r from "./MLP.js";
3
3
  import o from "./RMSNorm.js";
4
4
  import d from "./BaseLayer.js";
5
- import { t as p } from "../index-C0dhsYom.js";
5
+ import { t as p } from "../index-BoWRt-10.js";
6
6
  class k extends d {
7
7
  ln1;
8
8
  attn;
@@ -2,7 +2,7 @@ import b from "../NanoGPTModel.js";
2
2
  import c from "../tokeniser/CharTokeniser.js";
3
3
  import l from "../tokeniser/bpe.js";
4
4
  import { load_safetensors as u } from "../utilities/safetensors.js";
5
- import { Y as y } from "../index-C0dhsYom.js";
5
+ import { a0 as y } from "../index-BoWRt-10.js";
6
6
  import { dummyPassAsync as h } from "../utilities/dummy.js";
7
7
  async function L(e, a, r, t) {
8
8
  const n = {
@@ -1,13 +1,17 @@
1
1
  import d from "../NanoGPTModel.js";
2
2
  import "../jszip.min-CjP2V1VV.js";
3
3
  import h from "../tokeniser/CharTokeniser.js";
4
- import { Y as k } from "../index-C0dhsYom.js";
4
+ import { a0 as k } from "../index-BoWRt-10.js";
5
5
  import b from "../tokeniser/bpe.js";
6
6
  import { dummyPassAsync as u } from "../utilities/dummy.js";
7
7
  import "../Generator.js";
8
8
  import "../index-Dwqa6Zy2.js";
9
- import "../dataset-JDyjG3QR.js";
9
+ import "../dataset-CtdBYwjo.js";
10
10
  import "../index-Tf7vU29b.js";
11
+ import "../ops/cpu/adamAdjust.js";
12
+ import "../ops/webgl/adamAdjust.js";
13
+ import "../ops/cpu/adamMoments.js";
14
+ import "../ops/webgl/adamMoments.js";
11
15
  import "../papaparse.min-C8l2Kvo1.js";
12
16
  import "../ops/cpu/scatterSub.js";
13
17
  import "../ops/webgl/scatterSub.js";
@@ -19,8 +23,8 @@ import "../ops/grads/attentionMask.js";
19
23
  import "../ops/cpu/qkv.js";
20
24
  import "../ops/webgl/qkv.js";
21
25
  import "../ops/grads/qkv.js";
22
- import "../random_width-DkYP8W8N.js";
23
- import "../register_all_kernels-guvSxp7M.js";
26
+ import "../random_width-sZORGo5k.js";
27
+ import "../register_all_kernels-BwDSRN-f.js";
24
28
  import "../ops/cpu/rope.js";
25
29
  import "../ops/webgl/rope.js";
26
30
  import "../ops/grads/rope.js";
@@ -34,13 +38,13 @@ import "../ops/webgl/matMulGelu.js";
34
38
  import "../ops/grads/matMulGelu.js";
35
39
  import "../ops/cpu/gelu.js";
36
40
  import "../ops/webgl/gelu.js";
37
- import "../gelu-uyHP1x1f.js";
41
+ import "../gelu-C-dPj6Ku.js";
38
42
  import "../ops/cpu/normRMS.js";
39
43
  import "../ops/webgl/normRMS.js";
40
44
  import "../ops/grads/normRMS.js";
41
45
  import "../ops/webgl/log.js";
42
46
  import { importWeights as O } from "../utilities/weights.js";
43
- async function ft(o) {
47
+ async function yt(o) {
44
48
  const n = /* @__PURE__ */ new Map(), s = await o.file("manifest.json")?.async("string");
45
49
  if (!s)
46
50
  throw new Error("Manifest file not found in the zip archive");
@@ -72,5 +76,5 @@ async function ft(o) {
72
76
  return { model: e, tokeniser: l };
73
77
  }
74
78
  export {
75
- ft as default
79
+ yt as default
76
80
  };
@@ -1,8 +1,8 @@
1
- import { x as r, y as p, E as u, a7 as E, a8 as h, p as S, c as $, a6 as d } from "./index-C0dhsYom.js";
2
- import { e as K } from "./axis_util-DaAl5MER.js";
3
- import { m as T } from "./max-BQc2Aj-I.js";
4
- import { r as m } from "./reshape-BMUzc1UY.js";
5
- import { s as _ } from "./sum-B8wEpKsg.js";
1
+ import { B as r, C as p, E as u, ae as E, af as h, p as S, c as $, o as d } from "./index-BoWRt-10.js";
2
+ import { e as K } from "./axis_util-BzbKo31C.js";
3
+ import { m as T } from "./max-Ddnnb5xe.js";
4
+ import { r as m } from "./reshape-CdBq1WJ6.js";
5
+ import { s as _ } from "./sum-66ew2byf.js";
6
6
  /**
7
7
  * @license
8
8
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -61,7 +61,7 @@ const w = /* @__PURE__ */ r({ log_: v });
61
61
  * limitations under the License.
62
62
  * =============================================================================
63
63
  */
64
- function y(s, n = null, o = !1) {
64
+ function A(s, n = null, o = !1) {
65
65
  const a = p(s, "x", "logSumExp"), t = S(n, a.shape), x = T(
66
66
  a,
67
67
  t,
@@ -74,9 +74,9 @@ function y(s, n = null, o = !1) {
74
74
  }
75
75
  return e;
76
76
  }
77
- const M = /* @__PURE__ */ r({ logSumExp_: y });
77
+ const L = /* @__PURE__ */ r({ logSumExp_: A });
78
78
  export {
79
79
  w as a,
80
80
  N as e,
81
- M as l
81
+ L as l
82
82
  };
package/dist/main.d.ts CHANGED
@@ -1,3 +1,7 @@
1
+ import { default as CausalSelfAttention } from './layers/CausalSelfAttention';
2
+ import { default as MLP } from './layers/MLP';
3
+ import { default as TransformerBlock } from './layers/TransformerBlock';
4
+ import { default as RoPECache } from './layers/RoPECache';
1
5
  export { default as NanoGPT } from './NanoGPTModel';
2
6
  export { default as TeachableLLM } from './TeachableLLM';
3
7
  export { default as CharTokeniser } from './tokeniser/CharTokeniser';
@@ -13,3 +17,10 @@ export type { GPTConfig } from './config';
13
17
  export { estimateParameterCount, estimateMemoryUsage, estimateTrainingMemoryUsage, estimateResources, validateConfig, } from './utilities/parameters';
14
18
  export { selectBackend } from './backend';
15
19
  export { default as performanceTest } from './utilities/performance';
20
+ export declare const layers: {
21
+ CausalSelfAttention: typeof CausalSelfAttention;
22
+ MLP: typeof MLP;
23
+ TransformerBlock: typeof TransformerBlock;
24
+ RoPECache: typeof RoPECache;
25
+ };
26
+ export { default as AdamExt } from './training/AdamExt';
package/dist/main.js CHANGED
@@ -1,11 +1,11 @@
1
- import { default as R } from "./NanoGPTModel.js";
2
- import { default as q } from "./TeachableLLM.js";
3
- import { default as A } from "./tokeniser/CharTokeniser.js";
4
- import { default as I } from "./tokeniser/bpe.js";
5
- import { default as K } from "./utilities/waitForModel.js";
6
- import { default as Q } from "./data/textLoader.js";
7
- import { estimateMemoryUsage as V, estimateParameterCount as W, estimateResources as X, estimateTrainingMemoryUsage as Y, validateConfig as Z } from "./utilities/parameters.js";
8
- import "./index-C0dhsYom.js";
1
+ import { default as W } from "./NanoGPTModel.js";
2
+ import { default as Y } from "./TeachableLLM.js";
3
+ import { default as _ } from "./tokeniser/CharTokeniser.js";
4
+ import { default as oo } from "./tokeniser/bpe.js";
5
+ import { default as to } from "./utilities/waitForModel.js";
6
+ import { default as eo } from "./data/textLoader.js";
7
+ import { estimateMemoryUsage as po, estimateParameterCount as ao, estimateResources as fo, estimateTrainingMemoryUsage as so, validateConfig as lo } from "./utilities/parameters.js";
8
+ import "./index-BoWRt-10.js";
9
9
  import "./ops/cpu/scatterSub.js";
10
10
  import "./ops/webgl/scatterSub.js";
11
11
  import "./ops/cpu/gatherSub.js";
@@ -16,10 +16,10 @@ import "./ops/grads/attentionMask.js";
16
16
  import "./ops/cpu/qkv.js";
17
17
  import "./ops/webgl/qkv.js";
18
18
  import "./ops/grads/qkv.js";
19
- import "./random_width-DkYP8W8N.js";
20
- import "./register_all_kernels-guvSxp7M.js";
19
+ import "./random_width-sZORGo5k.js";
20
+ import "./register_all_kernels-BwDSRN-f.js";
21
21
  import "./index-Tf7vU29b.js";
22
- import "./dataset-JDyjG3QR.js";
22
+ import "./dataset-CtdBYwjo.js";
23
23
  import "./ops/cpu/rope.js";
24
24
  import "./ops/webgl/rope.js";
25
25
  import "./ops/grads/rope.js";
@@ -33,25 +33,42 @@ import "./ops/webgl/matMulGelu.js";
33
33
  import "./ops/grads/matMulGelu.js";
34
34
  import "./ops/cpu/gelu.js";
35
35
  import "./ops/webgl/gelu.js";
36
- import "./gelu-uyHP1x1f.js";
36
+ import "./gelu-C-dPj6Ku.js";
37
37
  import "./ops/cpu/normRMS.js";
38
38
  import "./ops/webgl/normRMS.js";
39
39
  import "./ops/grads/normRMS.js";
40
40
  import "./ops/webgl/log.js";
41
- import { selectBackend as $ } from "./backend.js";
42
- import { default as ot } from "./utilities/performance.js";
41
+ import "./ops/cpu/adamMoments.js";
42
+ import "./ops/webgl/adamMoments.js";
43
+ import "./ops/cpu/adamAdjust.js";
44
+ import "./ops/webgl/adamAdjust.js";
45
+ import { selectBackend as xo } from "./backend.js";
46
+ import { default as co } from "./utilities/performance.js";
47
+ import o from "./layers/CausalSelfAttention.js";
48
+ import r from "./layers/MLP.js";
49
+ import t from "./layers/TransformerBlock.js";
50
+ import m from "./layers/RoPECache.js";
51
+ import { default as ko } from "./training/AdamExt.js";
52
+ const O = {
53
+ CausalSelfAttention: o,
54
+ MLP: r,
55
+ TransformerBlock: t,
56
+ RoPECache: m
57
+ };
43
58
  export {
44
- I as BPETokeniser,
45
- A as CharTokeniser,
46
- R as NanoGPT,
47
- q as TeachableLLM,
48
- V as estimateMemoryUsage,
49
- W as estimateParameterCount,
50
- X as estimateResources,
51
- Y as estimateTrainingMemoryUsage,
52
- Q as loadTextData,
53
- ot as performanceTest,
54
- $ as selectBackend,
55
- Z as validateConfig,
56
- K as waitForModel
59
+ ko as AdamExt,
60
+ oo as BPETokeniser,
61
+ _ as CharTokeniser,
62
+ W as NanoGPT,
63
+ Y as TeachableLLM,
64
+ po as estimateMemoryUsage,
65
+ ao as estimateParameterCount,
66
+ fo as estimateResources,
67
+ so as estimateTrainingMemoryUsage,
68
+ O as layers,
69
+ eo as loadTextData,
70
+ co as performanceTest,
71
+ xo as selectBackend,
72
+ lo as validateConfig,
73
+ to as waitForModel
57
74
  };
@@ -1,4 +1,4 @@
1
- import { x as m, y as s, F as c, E as M, H as p } from "./index-C0dhsYom.js";
1
+ import { B as m, C as s, I as c, E as M, J as p } from "./index-BoWRt-10.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -1,4 +1,4 @@
1
- import { x as r, y as x, E as e, N as c } from "./index-C0dhsYom.js";
1
+ import { B as r, C as e, E as x, U as c } from "./index-BoWRt-10.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -16,8 +16,8 @@ import { x as r, y as x, E as e, N as c } from "./index-C0dhsYom.js";
16
16
  * =============================================================================
17
17
  */
18
18
  function m(n, s = null, o = !1) {
19
- const t = { x: x(n, "x", "max") }, a = { reductionIndices: s, keepDims: o };
20
- return e.runKernel(c, t, a);
19
+ const t = { x: e(n, "x", "max") }, a = { reductionIndices: s, keepDims: o };
20
+ return x.runKernel(c, t, a);
21
21
  }
22
22
  const l = /* @__PURE__ */ r({ max_: m });
23
23
  export {
@@ -1,4 +1,4 @@
1
- import { u as z } from "./gpgpu_math-DJm3ZTAf.js";
1
+ import { u as z } from "./gpgpu_math-DGNLNL4I.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -1,5 +1,5 @@
1
- import { C as n, D as t, j as m, E as i } from "./index-C0dhsYom.js";
2
- import { z as c, c as f } from "./zeros-DCPCdFGq.js";
1
+ import { F as n, H as t, j as m, E as i } from "./index-BoWRt-10.js";
2
+ import { z as c, c as f } from "./zeros--BdLQ3oG.js";
3
3
  /**
4
4
  * @license
5
5
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -0,0 +1,2 @@
1
+ import { Tensor } from '@tensorflow/tfjs-core';
2
+ export declare function adamAdjust(moments: Tensor, value: Tensor, beta1: number, beta2: number, epsilon: number, learningRate: number): Tensor;
@@ -0,0 +1,9 @@
1
+ import { e as i } from "../index-BoWRt-10.js";
2
+ import "./cpu/adamAdjust.js";
3
+ import "./webgl/adamAdjust.js";
4
+ function p(r, t, e, n, m, o) {
5
+ return i().runKernel("AdamAdjust", { moments: r, value: t }, { beta1: e, beta2: n, epsilon: m, learningRate: o });
6
+ }
7
+ export {
8
+ p as adamAdjust
9
+ };
@@ -0,0 +1,2 @@
1
+ import { Tensor } from '@tensorflow/tfjs-core';
2
+ export declare function adamMoments(moments: Tensor, gradient: Tensor, beta1: number, beta2: number): Tensor;
@@ -0,0 +1,9 @@
1
+ import { e as o } from "../index-BoWRt-10.js";
2
+ import "./cpu/adamMoments.js";
3
+ import "./webgl/adamMoments.js";
4
+ function p(e, n, r, m) {
5
+ return o().runKernel("AdamMoments", { moments: e, gradient: n }, { beta1: r, beta2: m });
6
+ }
7
+ export {
8
+ p as adamMoments
9
+ };
@@ -1,8 +1,8 @@
1
- import { e as a } from "../index-C0dhsYom.js";
1
+ import { e as a } from "../index-BoWRt-10.js";
2
2
  import "./cpu/appendCache.js";
3
3
  import "./webgl/appendCache.js";
4
- import { c as s } from "../concat-CsHeR4zV.js";
5
- import { z as c } from "../zeros-DCPCdFGq.js";
4
+ import { c as s } from "../concat-CsxrgovM.js";
5
+ import { z as c } from "../zeros--BdLQ3oG.js";
6
6
  function i(r, p, n, o) {
7
7
  if (!o) {
8
8
  const e = r.shape[2];
@@ -1,4 +1,4 @@
1
- import { e as o } from "../index-C0dhsYom.js";
1
+ import { e as o } from "../index-BoWRt-10.js";
2
2
  import "./cpu/attentionMask.js";
3
3
  import "./webgl/attentionMask.js";
4
4
  import "./grads/attentionMask.js";
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,18 @@
1
+ import { f as k, n as t, o as i, a as q, q as z } from "../../index-BoWRt-10.js";
2
+ function A(a) {
3
+ const { moments: n, value: r } = a.inputs, { beta1: l, beta2: u, epsilon: m, learningRate: d } = a.attrs, e = n.shape.length, c = new Array(e).fill(0), s = n.shape.slice();
4
+ s[e - 1] = 1;
5
+ const o = c.slice();
6
+ o[e - 1] = 1;
7
+ const b = s.slice(), p = n.slice(c, s).squeeze([e - 1]), M = n.slice(o, b).squeeze([e - 1]), f = t(p, l), g = t(M, u);
8
+ return i(
9
+ q(t(f, i(z(g), m ?? 1e-8)), -d),
10
+ r
11
+ );
12
+ }
13
+ const C = {
14
+ kernelName: "AdamAdjust",
15
+ backendName: "cpu",
16
+ kernelFunc: A
17
+ };
18
+ k(C);
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,16 @@
1
+ import { f as p } from "../../index-BoWRt-10.js";
2
+ import { s as f } from "../../stack-B17UN2nn.js";
3
+ function b(t) {
4
+ const { moments: n, gradient: c } = t.inputs, { beta1: o, beta2: m } = t.attrs, e = n.shape.length, a = new Array(e).fill(0), s = n.shape.slice();
5
+ s[e - 1] = 1;
6
+ const i = a.slice();
7
+ i[e - 1] = 1;
8
+ const r = s.slice(), l = n.slice(a, s).squeeze([e - 1]), u = n.slice(i, r).squeeze([e - 1]), M = l.mul(o).add(c.mul(1 - o)), d = u.mul(m).add(c.square().mul(1 - m));
9
+ return f([M, d], -1);
10
+ }
11
+ const g = {
12
+ kernelName: "AdamMoments",
13
+ backendName: "cpu",
14
+ kernelFunc: b
15
+ };
16
+ p(g);
@@ -1,5 +1,5 @@
1
- import { f as d } from "../../index-C0dhsYom.js";
2
- import { c as h } from "../../concat-CsHeR4zV.js";
1
+ import { f as d } from "../../index-BoWRt-10.js";
2
+ import { c as h } from "../../concat-CsxrgovM.js";
3
3
  function u(p) {
4
4
  const { cache: n, item: s } = p.inputs, { maxSize: i, pastLen: c } = p.attrs, t = n.shape[0], o = n.shape[1], a = n.shape[3], e = s.shape[2];
5
5
  if (c + e <= i) {
@@ -1,8 +1,8 @@
1
- import { f as a, h as p, b as u } from "../../index-C0dhsYom.js";
2
- import { l as N, w as b } from "../../ops-CIQLNshk.js";
3
- import { o as A } from "../../ones-D63HpSF_.js";
4
- import { z as I } from "../../zeros-DCPCdFGq.js";
5
- import { m as g } from "../../mat_mul-1nwdPkQ_.js";
1
+ import { f as a, h as p, b as u } from "../../index-BoWRt-10.js";
2
+ import { l as N, w as b } from "../../ops-BFGCx8Ri.js";
3
+ import { o as A } from "../../ones-Dj0SDhHf.js";
4
+ import { z as I } from "../../zeros--BdLQ3oG.js";
5
+ import { m as g } from "../../mat_mul-8m8pfdcx.js";
6
6
  function o(n) {
7
7
  const { q: s, k: e } = n.inputs, { divisor: r } = n.attrs, c = s.shape[2], t = e.shape[2], m = N.bandPart(A([t, t]), -1, 0).cast("bool"), l = I([t, t]), i = p([t, t], Number.NEGATIVE_INFINITY), f = b(m, l, i), k = g(s, e, !1, !0).mul(u(r)), d = f.slice([0, 0], [c, t]).expandDims(0).expandDims(0);
8
8
  return k.add(d);
@@ -1,5 +1,5 @@
1
- import { f as e } from "../../index-C0dhsYom.js";
2
- import { s as f } from "../../softmax-GPNK3o-U.js";
1
+ import { f as e } from "../../index-BoWRt-10.js";
2
+ import { s as f } from "../../softmax-Bv_6lyMX.js";
3
3
  function n(t) {
4
4
  const { inputs: s, attrs: a } = t, { logits: o } = s, { dim: i, dropoutRate: r } = a;
5
5
  if (!o)
@@ -1,6 +1,6 @@
1
- import { x as u, y as c, E as g, $ as h, f as m, c as p } from "../../index-C0dhsYom.js";
2
- import { r as f } from "../../range-CYzpQY53.js";
3
- import { s as l } from "../../stack-Dpgg_1W1.js";
1
+ import { B as u, C as c, E as g, a5 as h, f as m, c as p } from "../../index-BoWRt-10.js";
2
+ import { r as f } from "../../range-CRuAh-gd.js";
3
+ import { s as l } from "../../stack-B17UN2nn.js";
4
4
  /**
5
5
  * @license
6
6
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -1,4 +1,4 @@
1
- import { f as t, t as d } from "../../index-C0dhsYom.js";
1
+ import { f as t, t as d } from "../../index-BoWRt-10.js";
2
2
  const o = 0.7978845608028654, c = 0.044715;
3
3
  function m(r) {
4
4
  const { inputs: u } = r, { x: n } = u, e = n;
@@ -1,5 +1,5 @@
1
- import { f as e, t as m } from "../../index-C0dhsYom.js";
2
- import { g as d, d as M } from "../../gelu-uyHP1x1f.js";
1
+ import { f as e, t as m } from "../../index-BoWRt-10.js";
2
+ import { g as d, d as M } from "../../gelu-C-dPj6Ku.js";
3
3
  function c(t) {
4
4
  const { inputs: u } = t, { x: n, kernel: l } = u, a = n, r = l;
5
5
  return m(() => {
@@ -1,4 +1,4 @@
1
- import { f as e, t as i } from "../../index-C0dhsYom.js";
1
+ import { f as e, t as i } from "../../index-BoWRt-10.js";
2
2
  function n(t) {
3
3
  const { inputs: r, attrs: l } = t, { transposeA: o, transposeB: s } = l, { x: u, kernel: a, y: c } = r, m = u, k = a, M = c;
4
4
  return i(() => m.matMul(k, o, s).mul(M));
@@ -1,4 +1,4 @@
1
- import { f as e, a as t } from "../../index-C0dhsYom.js";
1
+ import { f as e, a as t } from "../../index-BoWRt-10.js";
2
2
  function n(o) {
3
3
  const { inputs: r } = o, { a: l, b: u } = r;
4
4
  return console.warn("Using fallback mulDrop implementation without dropout."), t(l, u);
@@ -1,4 +1,4 @@
1
- import { f as o, t as d } from "../../index-C0dhsYom.js";
1
+ import { f as o, t as d } from "../../index-BoWRt-10.js";
2
2
  function i(t) {
3
3
  const { inputs: e } = t, { x: n, gamma: s } = e, r = n, a = s;
4
4
  return d(() => {
@@ -1,6 +1,6 @@
1
- import { f as q } from "../../index-C0dhsYom.js";
2
- import { r as o } from "../../reshape-BMUzc1UY.js";
3
- import { s as x } from "../../split-CHzJjxDv.js";
1
+ import { f as q } from "../../index-BoWRt-10.js";
2
+ import { r as o } from "../../reshape-CdBq1WJ6.js";
3
+ import { s as x } from "../../split-B-dikLRw.js";
4
4
  function v(p) {
5
5
  const { x: c, kernel: K } = p.inputs, { heads: n } = p.attrs, [s, e, t] = c.shape, a = o(c, [s * e, t]), i = a.dot(K);
6
6
  a.dispose();
@@ -1,8 +1,8 @@
1
- import { f as S } from "../../index-C0dhsYom.js";
2
- import { r as F } from "../../range-CYzpQY53.js";
3
- import { g as I } from "../../gather-D0_gPiBz.js";
4
- import { s as E } from "../../stack-Dpgg_1W1.js";
5
- import { c as T } from "../../concat-CsHeR4zV.js";
1
+ import { f as S } from "../../index-BoWRt-10.js";
2
+ import { r as F } from "../../range-CRuAh-gd.js";
3
+ import { g as I } from "../../gather-CMMy2KEG.js";
4
+ import { s as E } from "../../stack-B17UN2nn.js";
5
+ import { c as T } from "../../concat-CsxrgovM.js";
6
6
  function U(t, c, p, o, r) {
7
7
  const n = o.shape[3], s = p;
8
8
  if (s > n) return o;
@@ -1,8 +1,8 @@
1
- import { x as f, C as g, y as r, E as l, _ as N, f as b, c as S, a as h } from "../../index-C0dhsYom.js";
2
- import { v as D } from "../../scatter_nd_util-IRBqKz_b.js";
3
- import { r as k } from "../../range-CYzpQY53.js";
4
- import { s as v } from "../../stack-Dpgg_1W1.js";
5
- import { o as E } from "../../ones-D63HpSF_.js";
1
+ import { B as f, F as g, C as r, E as l, a4 as N, f as b, c as S, a as h } from "../../index-BoWRt-10.js";
2
+ import { v as D } from "../../scatter_nd_util-DUstGbU1.js";
3
+ import { r as k } from "../../range-CRuAh-gd.js";
4
+ import { s as v } from "../../stack-B17UN2nn.js";
5
+ import { o as E } from "../../ones-Dj0SDhHf.js";
6
6
  /**
7
7
  * @license
8
8
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -26,14 +26,14 @@ function I(a, e, s) {
26
26
  const c = { indices: n, updates: t }, o = { shape: s };
27
27
  return l.runKernel(N, c, o);
28
28
  }
29
- const y = /* @__PURE__ */ f({ scatterND_: I });
30
- function C(a) {
31
- const { logits: e, labels: s, dy: n } = a.inputs, t = s.shape[0], c = e.shape[1], o = k(0, t, 1, "int32"), i = v([o, s], 1), d = E([t]), u = y(i, d, [t, c]), p = S(e, u), m = n.reshape([t, 1]);
29
+ const C = /* @__PURE__ */ f({ scatterND_: I });
30
+ function K(a) {
31
+ const { logits: e, labels: s, dy: n } = a.inputs, t = s.shape[0], c = e.shape[1], o = k(0, t, 1, "int32"), i = v([o, s], 1), d = E([t]), u = C(i, d, [t, c]), p = S(e, u), m = n.reshape([t, 1]);
32
32
  return h(p, m);
33
33
  }
34
- const K = {
34
+ const L = {
35
35
  kernelName: "EfficientScatterSub",
36
36
  backendName: "cpu",
37
- kernelFunc: C
37
+ kernelFunc: K
38
38
  };
39
- b(K);
39
+ b(L);
@@ -1,4 +1,4 @@
1
- import { e as t } from "../index-C0dhsYom.js";
1
+ import { e as t } from "../index-BoWRt-10.js";
2
2
  import "./cpu/fusedSoftmax.js";
3
3
  import "./webgl/fusedSoftmax.js";
4
4
  import "./grads/fusedSoftmax.js";
@@ -1,4 +1,4 @@
1
- import { e as n } from "../index-C0dhsYom.js";
1
+ import { e as n } from "../index-BoWRt-10.js";
2
2
  import "./cpu/gatherSub.js";
3
3
  import "./webgl/gatherSub.js";
4
4
  function f(r, e, t) {