@genai-fi/nanogpt 0.10.3 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (225) hide show
  1. package/dist/Generator.d.ts +10 -5
  2. package/dist/Generator.js +1789 -1765
  3. package/dist/{RealDiv-KAPDe8zB.js → RealDiv-Ds-jvL09.js} +22 -22
  4. package/dist/{Reshape-BYkmUnAv.js → Reshape-Cd6e-Otn.js} +1 -1
  5. package/dist/{Reshape-Zt6eb7yh.js → Reshape-Ct266DEk.js} +9 -9
  6. package/dist/TeachableLLM.d.ts +4 -3
  7. package/dist/TeachableLLM.js +14 -14
  8. package/dist/Trainer.d.ts +2 -2
  9. package/dist/Trainer.js +6 -6
  10. package/dist/{axis_util-BaG7mf5A.js → axis_util-DofAuy0p.js} +3 -3
  11. package/dist/backend.js +2 -2
  12. package/dist/{backend_util-RCe-rHaj.js → backend_util-C7NWHpv7.js} +7 -7
  13. package/dist/{backend_webgpu-DE3ACOLx.js → backend_webgpu-B0Vls736.js} +10 -10
  14. package/dist/{broadcast_to-B3eYlZm7.js → broadcast_to-DDaNMbX7.js} +2 -2
  15. package/dist/checks/appendCache.js +2 -2
  16. package/dist/checks/attentionMask.js +3 -3
  17. package/dist/checks/gelu.js +2 -2
  18. package/dist/checks/matMulGelu.js +2 -2
  19. package/dist/checks/normRMS.js +4 -4
  20. package/dist/checks/normRMSGrad.js +3 -3
  21. package/dist/checks/packUnpack.js +2 -2
  22. package/dist/checks/qkv.js +4 -4
  23. package/dist/checks/rope.js +2 -2
  24. package/dist/{clip_by_value-BnO7-a88.js → clip_by_value-Dn5tzexi.js} +4 -4
  25. package/dist/complex-DClmWqJt.js +11 -0
  26. package/dist/{concat-BV8bt5H-.js → concat-C6X3AAlQ.js} +1 -1
  27. package/dist/{concat_util-DpW8mL_l.js → concat_util-CHsJFZJJ.js} +1 -1
  28. package/dist/{dataset-BcwmTGYc.js → dataset-DcjWqUVQ.js} +7 -7
  29. package/dist/{dropout-BcvN9JYi.js → dropout-OxuaJz6z.js} +11 -11
  30. package/dist/{expand_dims-DT4tEPwA.js → expand_dims-BzfJK2uc.js} +3 -3
  31. package/dist/{exports_initializers-Hta_rEnm.js → exports_initializers-eS9QJ6ut.js} +1 -1
  32. package/dist/{floor-D5QdR_le.js → floor-DIb-lN_u.js} +1 -1
  33. package/dist/gather-BcO5UQNJ.js +9 -0
  34. package/dist/{gelu-CjNPL4OH.js → gelu-DqTbCx5x.js} +1 -1
  35. package/dist/{gpgpu_math-DAOmgtXR.js → gpgpu_math-CJcbnKPC.js} +2 -2
  36. package/dist/{index-DOvlwCh-.js → index-D0RBWjq8.js} +52 -52
  37. package/dist/{index-BwexR4lA.js → index-Dj5TkmPY.js} +89 -89
  38. package/dist/{kernel_funcs_utils-CCzYdUZg.js → kernel_funcs_utils-CSaumNDs.js} +11 -11
  39. package/dist/layers/BaseLayer.js +2 -2
  40. package/dist/layers/CausalSelfAttention.js +6 -6
  41. package/dist/layers/MLP.js +4 -4
  42. package/dist/layers/PositionEmbedding.js +5 -5
  43. package/dist/layers/RMSNorm.js +3 -3
  44. package/dist/layers/RoPECache.js +4 -4
  45. package/dist/layers/TiedEmbedding.js +6 -6
  46. package/dist/layers/TransformerBlock.js +1 -1
  47. package/dist/loader/loadTransformers.js +1 -1
  48. package/dist/loader/oldZipLoad.js +17 -17
  49. package/dist/log_sum_exp-VLZgbFAH.js +39 -0
  50. package/dist/main.d.ts +1 -1
  51. package/dist/main.js +9 -9
  52. package/dist/{matMul16-BWRSOCWB.js → matMul16-cDxwemKj.js} +7 -7
  53. package/dist/{matMulGelu-CzfgT6Wq.js → matMulGelu-B2s_80-H.js} +18 -18
  54. package/dist/{mat_mul-SjpJRLyL.js → mat_mul-DxpNTCRz.js} +3 -3
  55. package/dist/{mod-AnXEvvpo.js → mod-PrOKlFxH.js} +1 -1
  56. package/dist/models/NanoGPTV1.js +2 -2
  57. package/dist/models/model.js +9 -9
  58. package/dist/{ones-D2rT0xk2.js → ones-BX_wEgzB.js} +3 -3
  59. package/dist/ops/adamAdjust.js +1 -1
  60. package/dist/ops/adamMoments.js +1 -1
  61. package/dist/ops/add16.js +1 -1
  62. package/dist/ops/appendCache.js +3 -3
  63. package/dist/ops/attentionMask.js +1 -1
  64. package/dist/ops/concat16.js +2 -2
  65. package/dist/ops/cpu/adamAdjust.js +6 -6
  66. package/dist/ops/cpu/adamMoments.js +2 -2
  67. package/dist/ops/cpu/appendCache.js +5 -5
  68. package/dist/ops/cpu/attentionMask.js +10 -10
  69. package/dist/ops/cpu/fusedSoftmax.js +2 -2
  70. package/dist/ops/cpu/gatherSub.js +6 -6
  71. package/dist/ops/cpu/gelu.js +9 -9
  72. package/dist/ops/cpu/matMul16.js +2 -2
  73. package/dist/ops/cpu/matMulGelu.js +3 -3
  74. package/dist/ops/cpu/matMulMul.js +1 -1
  75. package/dist/ops/cpu/mulDropout.js +1 -1
  76. package/dist/ops/cpu/normRMS.js +3 -3
  77. package/dist/ops/cpu/qkv.js +3 -3
  78. package/dist/ops/cpu/rope.js +9 -9
  79. package/dist/ops/cpu/scatterSub.js +11 -11
  80. package/dist/ops/dot16.js +2 -2
  81. package/dist/ops/gatherSub.js +1 -1
  82. package/dist/ops/gelu.js +2 -2
  83. package/dist/ops/grads/add16.js +4 -4
  84. package/dist/ops/grads/attentionMask.js +2 -2
  85. package/dist/ops/grads/gelu.js +2 -2
  86. package/dist/ops/grads/matMul16.js +3 -3
  87. package/dist/ops/grads/matMulGelu.js +3 -3
  88. package/dist/ops/grads/normRMS.js +7 -7
  89. package/dist/ops/grads/pack16.js +3 -3
  90. package/dist/ops/grads/qkv.js +6 -6
  91. package/dist/ops/grads/rope.js +2 -2
  92. package/dist/ops/grads/softmax16.js +1 -1
  93. package/dist/ops/grads/unpack16.js +2 -2
  94. package/dist/ops/matMul16.js +3 -3
  95. package/dist/ops/matMulGelu.js +2 -2
  96. package/dist/ops/matMulMul.js +1 -1
  97. package/dist/ops/mul16.js +1 -1
  98. package/dist/ops/mulDrop.js +1 -1
  99. package/dist/ops/normRMS.js +1 -1
  100. package/dist/ops/pack16.js +2 -2
  101. package/dist/ops/qkv.js +1 -1
  102. package/dist/ops/reshape16.js +6 -6
  103. package/dist/ops/rope.js +2 -2
  104. package/dist/ops/scatterSub.js +1 -1
  105. package/dist/ops/slice16.js +2 -2
  106. package/dist/ops/softmax16.js +1 -1
  107. package/dist/ops/sub16.js +1 -1
  108. package/dist/ops/sum16.js +2 -2
  109. package/dist/ops/transpose16.js +3 -3
  110. package/dist/ops/unpack16.js +2 -2
  111. package/dist/ops/webgl/adamAdjust.js +2 -2
  112. package/dist/ops/webgl/adamMoments.js +1 -1
  113. package/dist/ops/webgl/appendCache.js +1 -1
  114. package/dist/ops/webgl/attentionMask.js +4 -4
  115. package/dist/ops/webgl/fusedSoftmax.js +6 -6
  116. package/dist/ops/webgl/gatherSub.js +1 -1
  117. package/dist/ops/webgl/gelu.js +2 -2
  118. package/dist/ops/webgl/log.js +3 -3
  119. package/dist/ops/webgl/matMul16.js +11 -11
  120. package/dist/ops/webgl/matMulGelu.js +4 -4
  121. package/dist/ops/webgl/matMulMul.js +7 -7
  122. package/dist/ops/webgl/mulDropout.js +1 -1
  123. package/dist/ops/webgl/normRMS.js +7 -7
  124. package/dist/ops/webgl/qkv.js +1 -1
  125. package/dist/ops/webgl/rope.js +4 -4
  126. package/dist/ops/webgl/scatterSub.js +1 -1
  127. package/dist/ops/webgpu/adamAdjust.js +3 -3
  128. package/dist/ops/webgpu/adamMoments.js +3 -3
  129. package/dist/ops/webgpu/add16.js +1 -1
  130. package/dist/ops/webgpu/appendCache.js +3 -3
  131. package/dist/ops/webgpu/attentionMask.js +5 -5
  132. package/dist/ops/webgpu/attentionMask32_program.js +2 -2
  133. package/dist/ops/webgpu/concat16.js +5 -5
  134. package/dist/ops/webgpu/gatherSub.js +5 -5
  135. package/dist/ops/webgpu/gelu.js +3 -3
  136. package/dist/ops/webgpu/matMul16.js +18 -18
  137. package/dist/ops/webgpu/matMul16_program.js +2 -2
  138. package/dist/ops/webgpu/mul16.js +4 -4
  139. package/dist/ops/webgpu/normRMS.js +6 -6
  140. package/dist/ops/webgpu/normRMSGrad.js +4 -4
  141. package/dist/ops/webgpu/pack16.js +1 -1
  142. package/dist/ops/webgpu/pack16_program.js +2 -2
  143. package/dist/ops/webgpu/qkv.js +6 -6
  144. package/dist/ops/webgpu/rope.js +3 -3
  145. package/dist/ops/webgpu/scatterSub.js +3 -3
  146. package/dist/ops/webgpu/slice16.js +4 -4
  147. package/dist/ops/webgpu/softmax16.js +2 -2
  148. package/dist/ops/webgpu/softmax16_program.js +2 -2
  149. package/dist/ops/webgpu/softmax16_subgroup_program.js +2 -2
  150. package/dist/ops/webgpu/softmax16grad.js +1 -1
  151. package/dist/ops/webgpu/sub16.js +4 -4
  152. package/dist/ops/webgpu/sum16.js +6 -6
  153. package/dist/ops/webgpu/transpose16.js +2 -2
  154. package/dist/ops/webgpu/transpose16_program.js +2 -2
  155. package/dist/ops/webgpu/transpose16_shared_program.js +3 -3
  156. package/dist/ops/webgpu/unpack16.js +3 -3
  157. package/dist/ops/webgpu/utils/binary_op.js +3 -3
  158. package/dist/ops/webgpu/utils/reductions.js +4 -4
  159. package/dist/{ops-B5yanEdW.js → ops-FJapAPfm.js} +56 -56
  160. package/dist/{pack16-nQ6JaLo-.js → pack16-k4jq6aMX.js} +7 -7
  161. package/dist/patches/webgpu_backend.js +7 -7
  162. package/dist/patches/webgpu_base.js +1 -1
  163. package/dist/patches/webgpu_program.js +8 -8
  164. package/dist/{random_width-or-CEftb.js → random_width-UGQn4OWb.js} +33 -33
  165. package/dist/range-CuGvVN2c.js +10 -0
  166. package/dist/{relu-CP0ZcxWO.js → relu-Cf80uA2p.js} +1 -1
  167. package/dist/{reshape-ByE68wS9.js → reshape-CkjKPPqB.js} +1 -1
  168. package/dist/{resize_nearest_neighbor-B19mCEg2.js → resize_nearest_neighbor-DB8k9KN_.js} +43 -43
  169. package/dist/{rope-Ir4mTyD1.js → rope-BmZmp9uP.js} +1 -1
  170. package/dist/{scatter_nd_util-lvSiX8q4.js → scatter_nd_util-BY22Cc-C.js} +1 -1
  171. package/dist/{selu_util-kbhpTdYD.js → selu_util-BuLbmbrl.js} +5 -5
  172. package/dist/{shared-DT1TkE6w.js → shared-B7USJZgw.js} +1 -1
  173. package/dist/{shared-dntlHIDQ.js → shared-BQboIImQ.js} +86 -86
  174. package/dist/{slice-BfEGSH82.js → slice-Aqy7KbJh.js} +3 -3
  175. package/dist/{slice_util-uTKwiEpW.js → slice_util-D8CQRenR.js} +7 -7
  176. package/dist/{softmax-CA5jFsLR.js → softmax-faLoUZVT.js} +1 -1
  177. package/dist/{split-CVLc0w--.js → split-BNz5jcGc.js} +3 -3
  178. package/dist/{squeeze-C7Z2srUo.js → squeeze--YMgaAAf.js} +2 -2
  179. package/dist/{stack-Cf4n9h0N.js → stack-WJK22CFn.js} +1 -1
  180. package/dist/{step-CINUs5QB.js → step-dXR33iOg.js} +32 -32
  181. package/dist/sum-BdplSvq_.js +11 -0
  182. package/dist/tensor-BQqrDvpx.js +8 -0
  183. package/dist/tensor1d-LxP9asMm.js +11 -0
  184. package/dist/{tensor2d-Bs9wZRc7.js → tensor2d-BN1sSfQO.js} +3 -3
  185. package/dist/{tensor4d-BARPdTaS.js → tensor4d-DVwr7pLF.js} +1 -1
  186. package/dist/{tfjs_backend-y1cvNhLA.js → tfjs_backend-Vi4JfLzT.js} +28 -28
  187. package/dist/{tile-mbfagpsB.js → tile-CvN_LyVr.js} +4 -4
  188. package/dist/tokeniser/BaseTokeniser.d.ts +27 -0
  189. package/dist/tokeniser/BaseTokeniser.js +94 -0
  190. package/dist/tokeniser/CharTokeniser.d.ts +4 -3
  191. package/dist/tokeniser/CharTokeniser.js +46 -32
  192. package/dist/tokeniser/bpe.d.ts +4 -3
  193. package/dist/tokeniser/bpe.js +60 -45
  194. package/dist/tokeniser/type.d.ts +11 -0
  195. package/dist/training/Adam.js +2 -2
  196. package/dist/training/AdamExt.js +1 -1
  197. package/dist/training/DatasetBuilder.d.ts +2 -2
  198. package/dist/training/DatasetBuilder.js +32 -36
  199. package/dist/training/FullTrainer.js +1 -1
  200. package/dist/training/Trainer.d.ts +3 -3
  201. package/dist/training/Trainer.js +2 -2
  202. package/dist/training/sparseCrossEntropy.js +3 -3
  203. package/dist/{transpose-ClWiBS_b.js → transpose-JawVKyZy.js} +5 -5
  204. package/dist/{unsorted_segment_sum-BDDhB_E6.js → unsorted_segment_sum-LAbmE9G4.js} +78 -78
  205. package/dist/utilities/dummy.js +3 -3
  206. package/dist/utilities/multinomialCPU.js +2 -2
  207. package/dist/utilities/packed.js +1 -1
  208. package/dist/utilities/performance.js +1 -1
  209. package/dist/utilities/profile.js +1 -1
  210. package/dist/utilities/safetensors.js +2 -2
  211. package/dist/utilities/sentences.js +5 -5
  212. package/dist/utilities/weights.js +2 -2
  213. package/dist/{variable-WawDEaAb.js → variable-DQ9yYgEU.js} +1 -1
  214. package/dist/{webgpu_program-DuOXPQol.js → webgpu_program-CAE4RICo.js} +3 -3
  215. package/dist/{webgpu_util-RxEF33Rj.js → webgpu_util-BdovYhXr.js} +1 -1
  216. package/dist/{zeros-KnWaWf-X.js → zeros-DeiE2zTa.js} +2 -2
  217. package/dist/{zeros_like-DvE73F4e.js → zeros_like-BAz3iKru.js} +77 -77
  218. package/package.json +1 -1
  219. package/dist/complex-DjxcVmoX.js +0 -11
  220. package/dist/gather-D3JcZUaI.js +0 -9
  221. package/dist/log_sum_exp-ngO0-4pK.js +0 -39
  222. package/dist/range-BklejeeW.js +0 -10
  223. package/dist/sum-DWAtNGez.js +0 -11
  224. package/dist/tensor-DJoc7gJU.js +0 -8
  225. package/dist/tensor1d-D11P_7Dp.js +0 -11
@@ -1,7 +1,7 @@
1
- import { l as _, aG as P, A as k, B as M, E as v, aH as F, aI as J, aJ as X, aK as q, aL as Q, aM as Y } from "./index-DOvlwCh-.js";
1
+ import { y as _, aG as P, q as A, u as M, E as v, aH as q, aI as F, aJ as J, aK as X, aL as Q, aM as Y } from "./index-D0RBWjq8.js";
2
2
  function it(t, n, e, o, s = "NHWC", f) {
3
3
  const l = t[3], r = [...n, l], c = nt(s);
4
- return B(t, r, e, f, o, null, null, c);
4
+ return j(t, r, e, f, o, null, null, c);
5
5
  }
6
6
  function ft(t, n, e, o, s, f, l = "channelsLast") {
7
7
  const [r, c] = T(n);
@@ -12,7 +12,7 @@ function ft(t, n, e, o, s, f, l = "channelsLast") {
12
12
  u = [r, c, t[1], t[1]];
13
13
  else
14
14
  throw new Error(`Unknown dataFormat ${l}`);
15
- return B(t, u, e, o, s, f, !1, l);
15
+ return j(t, u, e, o, s, f, !1, l);
16
16
  }
17
17
  function ht(t, n, e, o, s, f, l = "NDHWC") {
18
18
  const [r, c, u] = K(n);
@@ -25,7 +25,7 @@ function ht(t, n, e, o, s, f, l = "NDHWC") {
25
25
  throw new Error(`Unknown dataFormat ${l}`);
26
26
  return Z(t, h, e, o, s, !1, i, f);
27
27
  }
28
- function B(t, n, e, o, s, f, l = !1, r = "channelsLast") {
28
+ function j(t, n, e, o, s, f, l = !1, r = "channelsLast") {
29
29
  let [c, u, h, i] = [-1, -1, -1, -1];
30
30
  if (r === "channelsLast")
31
31
  [c, u, h, i] = t;
@@ -33,15 +33,15 @@ function B(t, n, e, o, s, f, l = !1, r = "channelsLast") {
33
33
  [c, i, u, h] = t;
34
34
  else
35
35
  throw new Error(`Unknown dataFormat ${r}`);
36
- const [a, p, , $] = n, [w, g] = T(e), [x, y] = T(o), L = A(a, x), b = A(p, y), { padInfo: C, outHeight: E, outWidth: D } = d(s, u, h, w, g, L, b, f, r), I = l ? $ * i : $;
36
+ const [a, p, , $] = n, [w, g] = T(e), [x, E] = T(o), L = k(a, x), b = k(p, E), { padInfo: C, outHeight: y, outWidth: D } = d(s, u, h, w, g, L, b, f, r), I = l ? $ * i : $;
37
37
  let m;
38
- return r === "channelsFirst" ? m = [c, I, E, D] : r === "channelsLast" && (m = [c, E, D, I]), {
38
+ return r === "channelsFirst" ? m = [c, I, y, D] : r === "channelsLast" && (m = [c, y, D, I]), {
39
39
  batchSize: c,
40
40
  dataFormat: r,
41
41
  inHeight: u,
42
42
  inWidth: h,
43
43
  inChannels: i,
44
- outHeight: E,
44
+ outHeight: y,
45
45
  outWidth: D,
46
46
  outChannels: I,
47
47
  padInfo: C,
@@ -52,7 +52,7 @@ function B(t, n, e, o, s, f, l = !1, r = "channelsLast") {
52
52
  effectiveFilterHeight: L,
53
53
  effectiveFilterWidth: b,
54
54
  dilationHeight: x,
55
- dilationWidth: y,
55
+ dilationWidth: E,
56
56
  inShape: t,
57
57
  outShape: m,
58
58
  filterShape: n
@@ -66,7 +66,7 @@ function Z(t, n, e, o, s, f = !1, l = "channelsLast", r) {
66
66
  [c, a, u, h, i] = t;
67
67
  else
68
68
  throw new Error(`Unknown dataFormat ${l}`);
69
- const [p, $, w, , g] = n, [x, y, L] = K(e), [b, C, E] = K(o), D = A(p, b), I = A($, C), m = A(w, E), { padInfo: G, outDepth: N, outHeight: R, outWidth: W } = tt(s, u, h, i, x, y, L, D, I, m, r), H = f ? g * a : g;
69
+ const [p, $, w, , g] = n, [x, E, L] = K(e), [b, C, y] = K(o), D = k(p, b), I = k($, C), m = k(w, y), { padInfo: G, outDepth: N, outHeight: R, outWidth: W } = tt(s, u, h, i, x, E, L, D, I, m, r), H = f ? g * a : g;
70
70
  let O;
71
71
  return l === "channelsFirst" ? O = [c, H, N, R, W] : l === "channelsLast" && (O = [c, N, R, W, H]), {
72
72
  batchSize: c,
@@ -81,7 +81,7 @@ function Z(t, n, e, o, s, f = !1, l = "channelsLast", r) {
81
81
  outChannels: H,
82
82
  padInfo: G,
83
83
  strideDepth: x,
84
- strideHeight: y,
84
+ strideHeight: E,
85
85
  strideWidth: L,
86
86
  filterDepth: p,
87
87
  filterHeight: $,
@@ -91,26 +91,26 @@ function Z(t, n, e, o, s, f = !1, l = "channelsLast", r) {
91
91
  effectiveFilterWidth: m,
92
92
  dilationDepth: b,
93
93
  dilationHeight: C,
94
- dilationWidth: E,
94
+ dilationWidth: y,
95
95
  inShape: t,
96
96
  outShape: O,
97
97
  filterShape: n
98
98
  };
99
99
  }
100
100
  function z(t, n, e, o, s) {
101
- o == null && (o = j(t, n, e));
101
+ o == null && (o = B(t, n, e));
102
102
  const f = t[0], l = t[1], r = U((f - n + 2 * o) / e + 1, s), c = U((l - n + 2 * o) / e + 1, s);
103
103
  return [r, c];
104
104
  }
105
105
  function S(t, n, e, o, s, f) {
106
- s == null && (s = j(t, n[0], o[0]));
106
+ s == null && (s = B(t, n[0], o[0]));
107
107
  const l = [0, 0, 0, e];
108
108
  for (let r = 0; r < 3; r++)
109
109
  t[r] + 2 * s >= n[r] && (l[r] = U((t[r] - n[r] + 2 * s) / o[r] + 1, f));
110
110
  return l;
111
111
  }
112
- function j(t, n, e, o = 1) {
113
- const s = A(n, o);
112
+ function B(t, n, e, o = 1) {
113
+ const s = k(n, o);
114
114
  return Math.floor((t[0] * (e - 1) - e + s) / 2);
115
115
  }
116
116
  function T(t) {
@@ -119,7 +119,7 @@ function T(t) {
119
119
  function K(t) {
120
120
  return typeof t == "number" ? [t, t, t] : t;
121
121
  }
122
- function A(t, n) {
122
+ function k(t, n) {
123
123
  return n <= 1 ? t : t + (t - 1) * (n - 1);
124
124
  }
125
125
  function d(t, n, e, o, s, f, l, r, c) {
@@ -157,8 +157,8 @@ function tt(t, n, e, o, s, f, l, r, c, u, h) {
157
157
  a = g[0], p = g[1], $ = g[2];
158
158
  } else if (t === "same") {
159
159
  a = Math.ceil(n / s), p = Math.ceil(e / f), $ = Math.ceil(o / l);
160
- const w = (a - 1) * s + r - n, g = (p - 1) * f + c - e, x = ($ - 1) * l + u - o, y = Math.floor(w / 2), L = w - y, b = Math.floor(g / 2), C = g - b, E = Math.floor(x / 2), D = x - E;
161
- i = { top: b, bottom: C, left: E, right: D, front: y, back: L, type: "SAME" };
160
+ const w = (a - 1) * s + r - n, g = (p - 1) * f + c - e, x = ($ - 1) * l + u - o, E = Math.floor(w / 2), L = w - E, b = Math.floor(g / 2), C = g - b, y = Math.floor(x / 2), D = x - y;
161
+ i = { top: b, bottom: C, left: y, right: D, front: E, back: L, type: "SAME" };
162
162
  } else
163
163
  throw Error(`Unknown padding parameter: ${t}`);
164
164
  return { padInfo: i, outDepth: a, outHeight: p, outWidth: $ };
@@ -212,41 +212,41 @@ function $t(t, n, e) {
212
212
  }
213
213
  function et(t) {
214
214
  const e = { x: M(t, "x", "sigmoid", "float32") };
215
- return v.runKernel(F, e);
215
+ return v.runKernel(q, e);
216
216
  }
217
- const gt = /* @__PURE__ */ k({ sigmoid_: et });
217
+ const gt = /* @__PURE__ */ A({ sigmoid_: et });
218
218
  function ot(t) {
219
219
  const e = { x: M(t, "x", "elu", "float32") };
220
- return v.runKernel(J, e);
220
+ return v.runKernel(F, e);
221
221
  }
222
- const wt = /* @__PURE__ */ k({ elu_: ot });
222
+ const wt = /* @__PURE__ */ A({ elu_: ot });
223
223
  function st(t, n = 0.2) {
224
224
  const o = { x: M(t, "x", "leakyRelu") }, s = { alpha: n };
225
- return v.runKernel(X, o, s);
225
+ return v.runKernel(J, o, s);
226
226
  }
227
- const xt = /* @__PURE__ */ k({ leakyRelu_: st });
227
+ const xt = /* @__PURE__ */ A({ leakyRelu_: st });
228
228
  function rt(t, n) {
229
229
  const e = M(t, "x", "prelu"), o = M(n, "alpha", "prelu"), s = { x: e, alpha: o };
230
- return v.runKernel(q, s);
230
+ return v.runKernel(X, s);
231
231
  }
232
- const Et = /* @__PURE__ */ k({ prelu_: rt });
232
+ const yt = /* @__PURE__ */ A({ prelu_: rt });
233
233
  function lt(t) {
234
234
  const e = { x: M(t, "x", "relu6") };
235
235
  return v.runKernel(Q, e);
236
236
  }
237
- const yt = /* @__PURE__ */ k({ relu6_: lt });
237
+ const Et = /* @__PURE__ */ A({ relu6_: lt });
238
238
  function ct(t, n = 0) {
239
239
  const o = { x: M(t, "x", "step") }, s = { alpha: n };
240
240
  return v.runKernel(Y, o, s);
241
241
  }
242
- const bt = /* @__PURE__ */ k({ step_: ct });
242
+ const bt = /* @__PURE__ */ A({ step_: ct });
243
243
  export {
244
244
  bt as a,
245
- B as b,
245
+ j as b,
246
246
  $t as c,
247
247
  Z as d,
248
248
  wt as e,
249
- j as f,
249
+ B as f,
250
250
  it as g,
251
251
  ft as h,
252
252
  ht as i,
@@ -254,8 +254,8 @@ export {
254
254
  at as k,
255
255
  xt as l,
256
256
  pt as m,
257
- Et as p,
258
- yt as r,
257
+ yt as p,
258
+ Et as r,
259
259
  gt as s,
260
260
  V as t
261
261
  };
@@ -0,0 +1,11 @@
1
+ import { q as a, u as e, N as c, E as l, Q as m } from "./index-D0RBWjq8.js";
2
+ function i(t, n = null, o = !1) {
3
+ let s = e(t, "x", "sum");
4
+ s.dtype === "bool" && (s = c(s, "int32"));
5
+ const r = { x: s }, u = { axis: n, keepDims: o };
6
+ return l.runKernel(m, r, u);
7
+ }
8
+ const f = /* @__PURE__ */ a({ sum_: i });
9
+ export {
10
+ f as s
11
+ };
@@ -0,0 +1,8 @@
1
+ import { Y as t, Z as a } from "./index-D0RBWjq8.js";
2
+ function f(r, n, e) {
3
+ const o = t(r, e);
4
+ return a(r, n, o, e);
5
+ }
6
+ export {
7
+ f as t
8
+ };
@@ -0,0 +1,11 @@
1
+ import { X as o, Y as s, Z as t } from "./index-D0RBWjq8.js";
2
+ function h(r, e) {
3
+ o(r);
4
+ const n = s(r, e);
5
+ if (n.length !== 1)
6
+ throw new Error("tensor1d() requires values to be a flat/TypedArray");
7
+ return t(r, null, n, e);
8
+ }
9
+ export {
10
+ h as t
11
+ };
@@ -1,13 +1,13 @@
1
- import { $ as t, a0 as a, a1 as s } from "./index-DOvlwCh-.js";
1
+ import { X as t, Y as s, Z as a } from "./index-D0RBWjq8.js";
2
2
  function i(n, r, o) {
3
3
  if (t(n), r != null && r.length !== 2)
4
4
  throw new Error("tensor2d() requires shape to have two numbers");
5
- const e = a(n, o);
5
+ const e = s(n, o);
6
6
  if (e.length !== 2 && e.length !== 1)
7
7
  throw new Error("tensor2d() requires values to be number[][] or flat/TypedArray");
8
8
  if (e.length === 1 && r == null)
9
9
  throw new Error("tensor2d() requires shape to be provided when `values` are a flat/TypedArray");
10
- return s(n, r, e, o);
10
+ return a(n, r, e, o);
11
11
  }
12
12
  export {
13
13
  i as t
@@ -1,4 +1,4 @@
1
- import { $ as t, a0 as a, a1 as s } from "./index-DOvlwCh-.js";
1
+ import { X as t, Y as a, Z as s } from "./index-D0RBWjq8.js";
2
2
  function i(n, r, o) {
3
3
  if (t(n), r != null && r.length !== 4)
4
4
  throw new Error("tensor4d() requires shape to have four numbers");
@@ -1,16 +1,16 @@
1
- import { A as w, B as S, l as T, E as q, x as U, L as ie, h as ue, a3 as ae, a6 as le, aX as fe, aF as he, aY as Te, t as $, X as ge, m as ke, w as _e, aZ as Ie } from "./index-DOvlwCh-.js";
2
- import { t as Ee } from "./tensor1d-D11P_7Dp.js";
3
- import { r as Le, d as Ne } from "./dropout-BcvN9JYi.js";
4
- import { s as F } from "./slice-BfEGSH82.js";
5
- import { r as c } from "./reshape-ByE68wS9.js";
6
- import { g as be } from "./gather-D3JcZUaI.js";
7
- import { e as Fe } from "./step-CINUs5QB.js";
8
- import { c as Ce } from "./clip_by_value-BnO7-a88.js";
9
- import { t as Pe } from "./tile-mbfagpsB.js";
10
- import { s as ve, b as Me, c as je, g as Ve } from "./selu_util-kbhpTdYD.js";
11
- import { m as k } from "./mat_mul-SjpJRLyL.js";
12
- import { t as Ue } from "./transpose-ClWiBS_b.js";
13
- import { c as M } from "./concat-BV8bt5H-.js";
1
+ import { q as w, u as S, y as T, E as J, n as U, D as ie, V as ue, $ as ae, a2 as le, aX as fe, aF as he, aY as Te, t as $, N as ge, m as ke, l as _e, aZ as Ie } from "./index-D0RBWjq8.js";
2
+ import { t as Ee } from "./tensor1d-LxP9asMm.js";
3
+ import { r as Le, d as Ne } from "./dropout-OxuaJz6z.js";
4
+ import { s as F } from "./slice-Aqy7KbJh.js";
5
+ import { r as c } from "./reshape-CkjKPPqB.js";
6
+ import { g as be } from "./gather-BcO5UQNJ.js";
7
+ import { e as Fe } from "./step-dXR33iOg.js";
8
+ import { c as Ce } from "./clip_by_value-Dn5tzexi.js";
9
+ import { t as Pe } from "./tile-CvN_LyVr.js";
10
+ import { s as ve, b as Me, c as je, g as Ve } from "./selu_util-BuLbmbrl.js";
11
+ import { m as k } from "./mat_mul-DxpNTCRz.js";
12
+ import { t as Ue } from "./transpose-JawVKyZy.js";
13
+ import { c as M } from "./concat-C6X3AAlQ.js";
14
14
  function Be(e) {
15
15
  return M(
16
16
  e,
@@ -22,11 +22,11 @@ const xe = /* @__PURE__ */ w({ concat1d_: Be });
22
22
  function Ge(e, n) {
23
23
  return M(e, n);
24
24
  }
25
- const Je = /* @__PURE__ */ w({ concat2d_: Ge });
26
- function qe(e, n) {
25
+ const qe = /* @__PURE__ */ w({ concat2d_: Ge });
26
+ function Je(e, n) {
27
27
  return M(e, n);
28
28
  }
29
- const Ke = /* @__PURE__ */ w({ concat3d_: qe });
29
+ const Ke = /* @__PURE__ */ w({ concat3d_: Je });
30
30
  function Ze(e, n) {
31
31
  return M(e, n);
32
32
  }
@@ -52,7 +52,7 @@ function He(e, n, t) {
52
52
  }
53
53
  const V = /* @__PURE__ */ w({ slice4d_: He });
54
54
  function Qe({ a: e, b: n, transposeA: t = !1, transposeB: s = !1, bias: r, activation: o = "linear", preluActivationWeights: a, leakyreluAlpha: f = 0.2 }) {
55
- if (ve(q.state.gradientDepth, o) === !1) {
55
+ if (ve(J.state.gradientDepth, o) === !1) {
56
56
  let D = k(e, n, t, s);
57
57
  return r != null && (D = U(D, r)), Me(D, o, a, f);
58
58
  }
@@ -60,7 +60,7 @@ function Qe({ a: e, b: n, transposeA: t = !1, transposeB: s = !1, bias: r, activ
60
60
  [i, u] = ie(i, u);
61
61
  const m = t ? i.shape[i.rank - 2] : i.shape[i.rank - 1], d = s ? u.shape[u.rank - 1] : u.shape[u.rank - 2], _ = t ? i.shape[i.rank - 1] : i.shape[i.rank - 2], h = s ? u.shape[u.rank - 2] : u.shape[u.rank - 1], ee = i.shape.slice(0, -2), I = u.shape.slice(0, -2), ne = ue(ee), te = ue(I);
62
62
  T(m === d, () => `Error in fused matMul: inner shapes (${m}) and (${d}) of Tensors with shapes ${i.shape} and ${u.shape} and transposeA=${t} and transposeB=${s} must match.`);
63
- const x = ae(i.shape.slice(0, -2), u.shape.slice(0, -2)).concat([_, h]), G = t ? c(i, [ne, m, _]) : c(i, [ne, _, m]), J = s ? c(u, [te, h, d]) : c(u, [te, d, h]);
63
+ const x = ae(i.shape.slice(0, -2), u.shape.slice(0, -2)).concat([_, h]), G = t ? c(i, [ne, m, _]) : c(i, [ne, _, m]), q = s ? c(u, [te, h, d]) : c(u, [te, d, h]);
64
64
  let E;
65
65
  r != null && (E = S(r, "bias", "fused matMul"), [E] = ie(E, i), ae(x, E.shape));
66
66
  let se;
@@ -75,23 +75,23 @@ function Qe({ a: e, b: n, transposeA: t = !1, transposeB: s = !1, bias: r, activ
75
75
  return [L, N];
76
76
  }, oe = {
77
77
  a: G,
78
- b: J,
78
+ b: q,
79
79
  bias: E,
80
80
  preluActivationWeights: se
81
81
  }, ce = { transposeA: t, transposeB: s, activation: o, leakyreluAlpha: f };
82
82
  return r == null ? le((C, A, O) => {
83
83
  const y = (
84
84
  // tslint:disable-next-line: no-unnecessary-type-assertion
85
- q.runKernel(fe, oe, ce)
85
+ J.runKernel(fe, oe, ce)
86
86
  );
87
87
  return O([C, A, y]), { value: c(y, x), gradFunc: re };
88
- })(G, J) : le((C, A, O, y) => {
88
+ })(G, q) : le((C, A, O, y) => {
89
89
  const j = (
90
90
  // tslint:disable-next-line: no-unnecessary-type-assertion
91
- q.runKernel(fe, oe, ce)
91
+ J.runKernel(fe, oe, ce)
92
92
  );
93
93
  return y([C, A, j, O]), { value: c(j, x), gradFunc: re };
94
- })(G, J, E);
94
+ })(G, q, E);
95
95
  }
96
96
  const pe = /* @__PURE__ */ w({ fusedMatMul_: Qe });
97
97
  class $e extends Error {
@@ -279,7 +279,7 @@ function Gn(e) {
279
279
  B(rn, "PoolMode", e);
280
280
  }
281
281
  const P = [], me = "/";
282
- function Jn(e, n) {
282
+ function qn(e, n) {
283
283
  P.push(e);
284
284
  try {
285
285
  const t = n();
@@ -291,7 +291,7 @@ function Jn(e, n) {
291
291
  function cn() {
292
292
  return P.length === 0 ? "" : P.join(me) + me;
293
293
  }
294
- function qn(e) {
294
+ function Jn(e) {
295
295
  if (!Se(e))
296
296
  throw new Error("Not a valid tensor name: '" + e + "'");
297
297
  return cn() + e;
@@ -482,7 +482,7 @@ function st(e, n) {
482
482
  case 1:
483
483
  return xe([e, n]);
484
484
  case 2:
485
- return Je([e, n], 0);
485
+ return qe([e, n], 0);
486
486
  case 3:
487
487
  return Ke([e, n], 0);
488
488
  case 4:
@@ -621,14 +621,14 @@ export {
621
621
  ot as d,
622
622
  Fn as e,
623
623
  Kn as f,
624
- qn as g,
624
+ Jn as g,
625
625
  In as h,
626
626
  En as i,
627
627
  Xn as j,
628
628
  jn as k,
629
629
  zn as l,
630
630
  it as m,
631
- Jn as n,
631
+ qn as n,
632
632
  de as o,
633
633
  Hn as p,
634
634
  Nn as q,
@@ -1,11 +1,11 @@
1
- import { A as e, B as a, l as i, E as c, T as l } from "./index-DOvlwCh-.js";
2
- function u(r, t) {
1
+ import { q as e, u as a, y as i, E as c, T as u } from "./index-D0RBWjq8.js";
2
+ function l(r, t) {
3
3
  const n = a(r, "x", "tile", "string_or_numeric");
4
4
  i(n.rank === t.length, () => `Error in transpose: rank of input ${n.rank} must match length of reps ${t}.`);
5
5
  const s = { x: n }, o = { reps: t };
6
- return c.runKernel(l, s, o);
6
+ return c.runKernel(u, s, o);
7
7
  }
8
- const p = /* @__PURE__ */ e({ tile_: u });
8
+ const p = /* @__PURE__ */ e({ tile_: l });
9
9
  export {
10
10
  p as t
11
11
  };
@@ -0,0 +1,27 @@
1
+ import { Conversation, ITokeniser } from './type';
2
+ import { default as EE } from 'eventemitter3';
3
+ export declare const SPECIALS: string[];
4
+ export default abstract class BaseTokeniser extends EE<'trainStatus'> implements ITokeniser {
5
+ protected specialTokens: Map<string, number>;
6
+ protected specialTokenSet: Set<number>;
7
+ abstract vocabSize: number;
8
+ abstract eosToken: number;
9
+ abstract bosToken: number;
10
+ abstract trained: boolean;
11
+ abstract addToken(token: string, index?: number): number;
12
+ isSpecialToken(index: number): boolean;
13
+ protected addSpecialTokens(): void;
14
+ protected addSpecialToken(token: string, index: number): void;
15
+ abstract train(text: string[]): Promise<number>;
16
+ abstract tokenise(text: string[], numeric?: boolean): Promise<string[][] | number[][]>;
17
+ abstract detokenise(tokens: string[][] | number[][]): Promise<string[]>;
18
+ abstract getVocab(): string[];
19
+ abstract getMerges(): Promise<[string, string][]>;
20
+ abstract destroy(): void;
21
+ abstract encode(text: string): Promise<number[]>;
22
+ encodeSequence(text: string): Promise<number[]>;
23
+ encodeConversation(conversation: Conversation[], completion?: boolean): Promise<number[]>;
24
+ abstract decode(tokens: number[]): Promise<string>;
25
+ decodeConversation(tokens: number[]): Promise<Conversation[]>;
26
+ getSpecialTokenIndex(token: string): number | undefined;
27
+ }
@@ -0,0 +1,94 @@
1
+ import { E as r } from "../index-DvYrXKkX.js";
2
+ const h = [
3
+ "<eos>",
4
+ "<bos>",
5
+ "",
6
+ "<|user_start|>",
7
+ "<|user_end|>",
8
+ "<|assistant_start|>",
9
+ "<|assistant_end|>",
10
+ "<|system_start|>",
11
+ "<|system_end|>"
12
+ ];
13
+ class k extends r {
14
+ specialTokens = /* @__PURE__ */ new Map();
15
+ specialTokenSet = /* @__PURE__ */ new Set();
16
+ isSpecialToken(e) {
17
+ return this.specialTokenSet.has(e);
18
+ }
19
+ addSpecialTokens() {
20
+ h.forEach((e, t) => {
21
+ this.addToken(e, t), this.specialTokens.set(e, t), this.specialTokenSet.add(t);
22
+ });
23
+ }
24
+ addSpecialToken(e, t) {
25
+ this.specialTokens.set(e, t), this.specialTokenSet.add(t);
26
+ }
27
+ async encodeSequence(e) {
28
+ const t = await this.encode(e);
29
+ return [this.bosToken, ...t, this.eosToken];
30
+ }
31
+ async encodeConversation(e, t) {
32
+ const s = [[this.bosToken]], a = [
33
+ this.getSpecialTokenIndex("<|user_start|>"),
34
+ this.getSpecialTokenIndex("<|assistant_start|>"),
35
+ this.getSpecialTokenIndex("<|system_start|>")
36
+ ], n = [
37
+ this.getSpecialTokenIndex("<|user_end|>"),
38
+ this.getSpecialTokenIndex("<|assistant_end|>"),
39
+ this.getSpecialTokenIndex("<|system_end|>")
40
+ ];
41
+ for (const i of e) {
42
+ const c = await this.encode(i.content);
43
+ switch (i.role) {
44
+ case "user":
45
+ s.push([a[0]]);
46
+ break;
47
+ case "assistant":
48
+ s.push([a[1]]);
49
+ break;
50
+ case "system":
51
+ s.push([a[2]]);
52
+ break;
53
+ }
54
+ switch (s.push(c), i.role) {
55
+ case "user":
56
+ s.push([n[0]]);
57
+ break;
58
+ case "assistant":
59
+ s.push([n[1]]);
60
+ break;
61
+ case "system":
62
+ s.push([n[2]]);
63
+ break;
64
+ }
65
+ }
66
+ const o = s.flat();
67
+ return t ? o.push(a[1]) : o.push(this.eosToken), o;
68
+ }
69
+ async decodeConversation(e) {
70
+ const t = [];
71
+ let s = 0;
72
+ for (; s < e.length; ) {
73
+ const a = e[s];
74
+ let n = null;
75
+ if (a === this.getSpecialTokenIndex("<|user_start|>") ? n = "user" : a === this.getSpecialTokenIndex("<|assistant_start|>") ? n = "assistant" : a === this.getSpecialTokenIndex("<|system_start|>") && (n = "system"), n) {
76
+ s++;
77
+ const o = [];
78
+ for (; s < e.length && e[s] !== this.getSpecialTokenIndex(`<|${n}_end|>`); )
79
+ o.push(e[s]), s++;
80
+ const i = await this.decode(o);
81
+ t.push({ role: n, content: i });
82
+ }
83
+ s++;
84
+ }
85
+ return t;
86
+ }
87
+ getSpecialTokenIndex(e) {
88
+ return this.specialTokens.get(e);
89
+ }
90
+ }
91
+ export {
92
+ h as SPECIALS,
93
+ k as default
94
+ };
@@ -1,14 +1,15 @@
1
- import { default as EE } from 'eventemitter3';
2
- import { ITokeniser } from './type';
3
- export default class CharTokeniser extends EE<'trainStatus'> implements ITokeniser {
1
+ import { default as BaseTokeniser } from './BaseTokeniser';
2
+ export default class CharTokeniser extends BaseTokeniser {
4
3
  vocabSize: number;
5
4
  eosToken: number;
5
+ bosToken: number;
6
6
  unkToken: number;
7
7
  vocab: string[];
8
8
  private cache;
9
9
  private _trained;
10
10
  constructor(vocabSize: number);
11
11
  constructor(vocab: string[]);
12
+ addToken(token: string, index?: number): number;
12
13
  get trained(): boolean;
13
14
  destroy(): void;
14
15
  train(text: string[]): Promise<number>;