@genai-fi/nanogpt 0.18.1 → 0.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (251) hide show
  1. package/dist/Generator.js +30 -30
  2. package/dist/{RealDiv-ioj6Z-ox.js → RealDiv-CGwv0liw.js} +9 -9
  3. package/dist/{Reshape-BZC-ebeR.js → Reshape-BW__R4mZ.js} +7 -7
  4. package/dist/{Reshape-pwprEaej.js → Reshape-CPBkTIH2.js} +1 -1
  5. package/dist/TeachableLLM.js +17 -17
  6. package/dist/Trainer.js +97 -95
  7. package/dist/{axis_util-QWWgLjut.js → axis_util-GTVlo58H.js} +1 -1
  8. package/dist/backend.js +2 -2
  9. package/dist/{backend_util-qwSFfxYx.js → backend_util-GaFarB78.js} +21 -21
  10. package/dist/{backend_webgpu-DI2wXEC2.js → backend_webgpu-BqASlsbV.js} +8 -8
  11. package/dist/{broadcast_to-C_EJTVTZ.js → broadcast_to-eS93CCN_.js} +2 -2
  12. package/dist/checks/appendCache.js +2 -2
  13. package/dist/checks/attentionMask.js +5 -5
  14. package/dist/checks/gelu.js +2 -2
  15. package/dist/checks/matMulGelu.js +2 -2
  16. package/dist/checks/normRMS.js +6 -6
  17. package/dist/checks/normRMSGrad.js +3 -3
  18. package/dist/checks/packUnpack.js +6 -6
  19. package/dist/checks/qkv.js +2 -2
  20. package/dist/checks/rope.js +2 -2
  21. package/dist/{clip_by_value-CLAD4h_I.js → clip_by_value-DDA7rrcT.js} +1 -1
  22. package/dist/complex-DI35Q-gW.js +11 -0
  23. package/dist/{concat-Dqk7Xk7h.js → concat-CAQpCret.js} +5 -5
  24. package/dist/{concat_util-C1Mxe27t.js → concat_util-D18dJ4fD.js} +1 -1
  25. package/dist/{dataset-DlqAN81i.js → dataset-CGGp1z9P.js} +3 -3
  26. package/dist/{dropout_util-N0z8Os-K.js → dropout_util--NxWuYg2.js} +1 -1
  27. package/dist/{expand_dims-D0rBtgT1.js → expand_dims-Bkd1YD5x.js} +4 -4
  28. package/dist/{exports_initializers-DIOZQt_L.js → exports_initializers-CYzKLjN7.js} +1 -1
  29. package/dist/{floor-CymuCmTO.js → floor-BQtb-Azg.js} +1 -1
  30. package/dist/{gather-DEyjXNb1.js → gather-qIqEqaGn.js} +1 -1
  31. package/dist/{gelu-DpTCC3eB.js → gelu-B220X1Go.js} +1 -1
  32. package/dist/{gpgpu_math-3bCb5ooU.js → gpgpu_math-BwvV12df.js} +25 -25
  33. package/dist/{index-DSGwv2Yx.js → index-CUXkjxiT.js} +33 -33
  34. package/dist/{index-BQvB7LCC.js → index-CjOWnMXP.js} +15 -15
  35. package/dist/{kernel_funcs_utils-DGqzNlHT.js → kernel_funcs_utils-pq0CK9co.js} +6 -6
  36. package/dist/layers/BaseLayer.js +4 -4
  37. package/dist/layers/CausalSelfAttention.d.ts +1 -0
  38. package/dist/layers/CausalSelfAttention.js +14 -14
  39. package/dist/layers/LoRA.js +4 -4
  40. package/dist/layers/MLP.js +4 -4
  41. package/dist/layers/PositionEmbedding.js +5 -5
  42. package/dist/layers/RMSNorm.js +3 -3
  43. package/dist/layers/RoPECache.js +4 -4
  44. package/dist/layers/TiedEmbedding.js +6 -6
  45. package/dist/layers/TransformerBlock.d.ts +1 -0
  46. package/dist/layers/TransformerBlock.js +1 -1
  47. package/dist/layers/WeightStore.js +2 -2
  48. package/dist/loader/loadTransformers.js +1 -1
  49. package/dist/loader/oldZipLoad.js +9 -9
  50. package/dist/loader/save.js +38 -31
  51. package/dist/main.js +9 -9
  52. package/dist/{matMul16-BIT70Vya.js → matMul16-BcVC_E62.js} +3 -3
  53. package/dist/{matMulGelu-CsZnh18H.js → matMulGelu-JNLZqKQp.js} +18 -18
  54. package/dist/mat_mul-DhG0Newp.js +11 -0
  55. package/dist/mod-CSdCpRjf.js +11 -0
  56. package/dist/models/NanoGPTV1.js +2 -2
  57. package/dist/models/NanoGPTV2.js +2 -2
  58. package/dist/models/model.d.ts +1 -0
  59. package/dist/models/model.js +9 -9
  60. package/dist/{not_equal-CkQKkKZy.js → not_equal-hurPF26l.js} +15 -15
  61. package/dist/{ones-DbVB5N58.js → ones-BytntneX.js} +3 -3
  62. package/dist/ops/adamAdjust.js +3 -3
  63. package/dist/ops/adamMoments.js +3 -3
  64. package/dist/ops/add16.js +1 -1
  65. package/dist/ops/appendCache.js +6 -6
  66. package/dist/ops/attentionMask.js +3 -3
  67. package/dist/ops/concat16.js +3 -3
  68. package/dist/ops/cpu/adamAdjust.js +9 -9
  69. package/dist/ops/cpu/adamMoments.js +5 -5
  70. package/dist/ops/cpu/appendCache.js +2 -2
  71. package/dist/ops/cpu/attentionMask.js +6 -6
  72. package/dist/ops/cpu/fusedSoftmax.js +4 -4
  73. package/dist/ops/cpu/gatherSub.js +5 -5
  74. package/dist/ops/cpu/gelu.js +4 -4
  75. package/dist/ops/cpu/matMul16.js +2 -2
  76. package/dist/ops/cpu/matMulGelu.js +7 -7
  77. package/dist/ops/cpu/matMulMul.js +2 -2
  78. package/dist/ops/cpu/mulDropout.js +5 -5
  79. package/dist/ops/cpu/normRMS.js +1 -1
  80. package/dist/ops/cpu/qkv.js +3 -3
  81. package/dist/ops/cpu/rope.js +5 -5
  82. package/dist/ops/cpu/scatterSub.js +5 -5
  83. package/dist/ops/dot16.js +2 -2
  84. package/dist/ops/dropout.js +6 -6
  85. package/dist/ops/dropout16.js +1 -1
  86. package/dist/ops/gatherSub.js +1 -1
  87. package/dist/ops/gelu.js +2 -2
  88. package/dist/ops/globalNorm.js +7 -7
  89. package/dist/ops/grads/add16.js +1 -1
  90. package/dist/ops/grads/attentionMask.js +2 -2
  91. package/dist/ops/grads/dropout16.js +1 -1
  92. package/dist/ops/grads/gelu.js +2 -2
  93. package/dist/ops/grads/matMul16.js +3 -3
  94. package/dist/ops/grads/matMulGelu.js +1 -1
  95. package/dist/ops/grads/mul16.js +1 -1
  96. package/dist/ops/grads/normRMS.js +7 -7
  97. package/dist/ops/grads/pack16.js +3 -3
  98. package/dist/ops/grads/qkv.js +11 -11
  99. package/dist/ops/grads/rope.js +2 -2
  100. package/dist/ops/grads/softmax16.js +1 -1
  101. package/dist/ops/grads/unpack16.js +2 -2
  102. package/dist/ops/matMul16.js +3 -3
  103. package/dist/ops/matMulGelu.js +6 -6
  104. package/dist/ops/matMulMul.js +3 -3
  105. package/dist/ops/mul16.js +1 -1
  106. package/dist/ops/mulDrop.js +3 -3
  107. package/dist/ops/normRMS.js +4 -4
  108. package/dist/ops/pack16.js +2 -2
  109. package/dist/ops/qkv.js +3 -3
  110. package/dist/ops/reshape16.js +6 -6
  111. package/dist/ops/rope.js +2 -2
  112. package/dist/ops/scatterSub.js +1 -1
  113. package/dist/ops/slice16.js +2 -2
  114. package/dist/ops/softmax16.js +1 -1
  115. package/dist/ops/sub16.js +1 -1
  116. package/dist/ops/sum16.js +6 -6
  117. package/dist/ops/transpose16.js +3 -3
  118. package/dist/ops/unpack16.js +2 -2
  119. package/dist/ops/webgl/adamAdjust.js +2 -2
  120. package/dist/ops/webgl/adamMoments.js +1 -1
  121. package/dist/ops/webgl/appendCache.js +1 -1
  122. package/dist/ops/webgl/attentionMask.js +1 -1
  123. package/dist/ops/webgl/dropout16.js +1 -1
  124. package/dist/ops/webgl/fusedSoftmax.js +7 -7
  125. package/dist/ops/webgl/gatherSub.js +3 -3
  126. package/dist/ops/webgl/gelu.js +2 -2
  127. package/dist/ops/webgl/log.js +3 -3
  128. package/dist/ops/webgl/matMul16.js +13 -13
  129. package/dist/ops/webgl/matMulGelu.js +4 -4
  130. package/dist/ops/webgl/matMulMul.js +2 -2
  131. package/dist/ops/webgl/mulDropout.js +1 -1
  132. package/dist/ops/webgl/normRMS.js +2 -2
  133. package/dist/ops/webgl/qkv.js +1 -1
  134. package/dist/ops/webgl/rope.js +1 -1
  135. package/dist/ops/webgl/scatterSub.js +2 -2
  136. package/dist/ops/webgpu/adamAdjust.js +3 -3
  137. package/dist/ops/webgpu/adamMoments.js +3 -3
  138. package/dist/ops/webgpu/add16.js +6 -6
  139. package/dist/ops/webgpu/appendCache.js +3 -3
  140. package/dist/ops/webgpu/attentionMask.js +2 -2
  141. package/dist/ops/webgpu/attentionMask32_program.js +2 -2
  142. package/dist/ops/webgpu/clipScale.js +7 -7
  143. package/dist/ops/webgpu/concat16.js +5 -5
  144. package/dist/ops/webgpu/dropout16.js +6 -6
  145. package/dist/ops/webgpu/gatherSub.js +3 -3
  146. package/dist/ops/webgpu/gelu.js +8 -8
  147. package/dist/ops/webgpu/matMul16.js +16 -16
  148. package/dist/ops/webgpu/matMul16_program.js +2 -2
  149. package/dist/ops/webgpu/mul16.js +5 -5
  150. package/dist/ops/webgpu/norm2.js +1 -1
  151. package/dist/ops/webgpu/normRMS.js +2 -2
  152. package/dist/ops/webgpu/normRMSGrad.js +4 -4
  153. package/dist/ops/webgpu/pack16.js +4 -4
  154. package/dist/ops/webgpu/pack16_program.js +2 -2
  155. package/dist/ops/webgpu/qkv.js +2 -2
  156. package/dist/ops/webgpu/rope.js +3 -3
  157. package/dist/ops/webgpu/scatterSub.js +3 -3
  158. package/dist/ops/webgpu/slice16.js +4 -4
  159. package/dist/ops/webgpu/softmax16.js +4 -4
  160. package/dist/ops/webgpu/softmax16_program.js +2 -2
  161. package/dist/ops/webgpu/softmax16_subgroup_program.js +2 -2
  162. package/dist/ops/webgpu/softmax16grad.js +4 -4
  163. package/dist/ops/webgpu/sub16.js +6 -6
  164. package/dist/ops/webgpu/sum16.js +3 -3
  165. package/dist/ops/webgpu/transpose16.js +8 -8
  166. package/dist/ops/webgpu/transpose16_program.js +2 -2
  167. package/dist/ops/webgpu/transpose16_shared_program.js +3 -3
  168. package/dist/ops/webgpu/unpack16.js +3 -3
  169. package/dist/ops/webgpu/utils/binary_op.js +3 -3
  170. package/dist/ops/webgpu/utils/reductions.js +5 -5
  171. package/dist/{ops-CURIZSVt.js → ops-CsXeTq1P.js} +100 -100
  172. package/dist/{pack16-WlOSOuZA.js → pack16-bqltoUlR.js} +2 -2
  173. package/dist/patches/webgpu_backend.js +6 -6
  174. package/dist/patches/webgpu_base.js +1 -1
  175. package/dist/patches/webgpu_program.js +2 -2
  176. package/dist/{random_normal-CIm8lk2-.js → random_normal-IBRrha8a.js} +1 -1
  177. package/dist/{random_width-B_fVXhGx.js → random_width-DN5ZtQkM.js} +131 -131
  178. package/dist/{range-BDxO73mk.js → range-C-CjF-LI.js} +1 -1
  179. package/dist/relu-J_X6MUzx.js +9 -0
  180. package/dist/{reshape-BIN71H3p.js → reshape-BDOuCSNW.js} +1 -1
  181. package/dist/{resize_nearest_neighbor-C6_0dAnK.js → resize_nearest_neighbor-BojqlfRe.js} +41 -41
  182. package/dist/{rope-CC5RjmKU.js → rope-DcrZM_e6.js} +5 -5
  183. package/dist/{scatter_nd_util-C-x73Cj6.js → scatter_nd_util-ByNJaL6I.js} +1 -1
  184. package/dist/{segment_util-4zuHV5IG.js → segment_util-Dasb2Zaf.js} +2 -2
  185. package/dist/{selu_util-BXdhy_W6.js → selu_util-BLhIqRkw.js} +5 -5
  186. package/dist/{shared-zTaJ5siv.js → shared-3agzAqQ_.js} +1 -1
  187. package/dist/{shared-DRWDyk9w.js → shared-CagdqkLh.js} +6 -6
  188. package/dist/slice-BzS11Qh0.js +12 -0
  189. package/dist/{slice_util-DPY56GzQ.js → slice_util-CC35pLmT.js} +5 -5
  190. package/dist/{softmax-BLGJqdwx.js → softmax-D4q1LJN7.js} +1 -1
  191. package/dist/split-C2Sj255c.js +9 -0
  192. package/dist/{squeeze-O_YWJpw_.js → squeeze-ho4wLUek.js} +2 -2
  193. package/dist/{stack-z6QE7kmP.js → stack-DudVrtmG.js} +1 -1
  194. package/dist/{step-DQY6_ABw.js → step-BTxPtq1r.js} +4 -4
  195. package/dist/{sum-D39FeU5h.js → sum-BpiwSWvg.js} +3 -3
  196. package/dist/{tensor-D8e0Gd7c.js → tensor-BWFldCso.js} +1 -1
  197. package/dist/{tensor1d-BMl0eZYV.js → tensor1d-LMGMIUlr.js} +1 -1
  198. package/dist/{tensor2d-DTtQ1QcT.js → tensor2d-BnXMKScO.js} +1 -1
  199. package/dist/{tensor4d-Dj4rDssL.js → tensor4d-C6UCG_u8.js} +1 -1
  200. package/dist/{tfjs_backend-Bk3PmK91.js → tfjs_backend-BGnG-ppu.js} +65 -65
  201. package/dist/{tile-CsWlVKKz.js → tile-CFy-xTO6.js} +1 -1
  202. package/dist/tokeniser/BaseTokeniser.d.ts +4 -0
  203. package/dist/tokeniser/BaseTokeniser.js +61 -56
  204. package/dist/tokeniser/type.d.ts +8 -0
  205. package/dist/training/AdamW.js +2 -2
  206. package/dist/training/BasicTrainer.d.ts +1 -0
  207. package/dist/training/BasicTrainer.js +95 -79
  208. package/dist/training/DatasetBuilder.d.ts +6 -2
  209. package/dist/training/DatasetBuilder.js +60 -41
  210. package/dist/training/Evaluator.d.ts +1 -2
  211. package/dist/training/Evaluator.js +21 -31
  212. package/dist/training/SFTTrainer.d.ts +3 -2
  213. package/dist/training/SFTTrainer.js +4 -3
  214. package/dist/training/orthoGrad.js +1 -1
  215. package/dist/training/sparseCrossEntropy.js +38 -38
  216. package/dist/training/tasks/ConversationTask.d.ts +4 -0
  217. package/dist/training/tasks/ConversationTask.js +7 -7
  218. package/dist/training/tasks/PretrainingTask.d.ts +4 -0
  219. package/dist/training/tasks/PretrainingTask.js +11 -7
  220. package/dist/training/tasks/StartSentenceTask.d.ts +4 -0
  221. package/dist/training/tasks/StartSentenceTask.js +5 -5
  222. package/dist/training/tasks/Task.d.ts +12 -0
  223. package/dist/training/tasks/Task.js +55 -31
  224. package/dist/training/types.d.ts +1 -0
  225. package/dist/training/validation.d.ts +1 -1
  226. package/dist/training/validation.js +33 -32
  227. package/dist/{transpose-Qxz-4os3.js → transpose-9kRxIXWR.js} +7 -7
  228. package/dist/{unsorted_segment_sum-BfFVV9Zm.js → unsorted_segment_sum-DJvk5xnh.js} +20 -20
  229. package/dist/utilities/dummy.js +6 -6
  230. package/dist/utilities/multinomialCPU.js +2 -2
  231. package/dist/utilities/packed.js +1 -1
  232. package/dist/utilities/performance.js +1 -1
  233. package/dist/utilities/profile.js +1 -1
  234. package/dist/utilities/safetensors.js +2 -2
  235. package/dist/utilities/sentences.js +5 -5
  236. package/dist/utilities/weights.js +2 -2
  237. package/dist/{variable-SSATClyt.js → variable-Ck482e3n.js} +1 -1
  238. package/dist/{webgpu_program-CbjdYLYk.js → webgpu_program-B4HmApL1.js} +1 -1
  239. package/dist/{webgpu_util-DuofJBMo.js → webgpu_util-DYlGSwOJ.js} +7 -7
  240. package/dist/{zeros-Bw0puq_w.js → zeros-DvZpK8s6.js} +2 -2
  241. package/dist/{zeros_like-rOHr54NY.js → zeros_like-CWjDdwr-.js} +69 -69
  242. package/package.json +1 -1
  243. package/dist/complex-3DpPEG9B.js +0 -11
  244. package/dist/mat_mul-DP86qZtZ.js +0 -11
  245. package/dist/mod-BXjLYwvM.js +0 -11
  246. package/dist/readers-17HLdxVM.js +0 -12
  247. package/dist/relu-DTvZKBsZ.js +0 -9
  248. package/dist/slice-BvItlgXu.js +0 -12
  249. package/dist/split-BN9LkEgS.js +0 -9
  250. package/dist/training/SFTDatasetBuilder.d.ts +0 -23
  251. package/dist/training/SFTDatasetBuilder.js +0 -85
@@ -3226,34 +3226,34 @@ function vs() {
3226
3226
  vs();
3227
3227
  export {
3228
3228
  di as $,
3229
- Ja as A,
3230
- ur as B,
3229
+ Kr as A,
3230
+ K as B,
3231
3231
  yr as C,
3232
- So as D,
3232
+ ur as D,
3233
3233
  f as E,
3234
3234
  Yr as F,
3235
3235
  Qr as G,
3236
- wo as H,
3237
- Gs as I,
3238
- Rt as J,
3239
- yo as K,
3240
- se as L,
3236
+ Ja as H,
3237
+ So as I,
3238
+ wo as J,
3239
+ Gs as K,
3240
+ Rt as L,
3241
3241
  Na as M,
3242
- z as N,
3242
+ yo as N,
3243
3243
  H as O,
3244
3244
  Oa as P,
3245
- Ue as Q,
3245
+ se as Q,
3246
3246
  Xa as R,
3247
3247
  uo as S,
3248
3248
  Co as T,
3249
- Cs as U,
3250
- Tn as V,
3251
- vn as W,
3252
- qa as X,
3253
- ni as Y,
3254
- hs as Z,
3249
+ z as U,
3250
+ Ue as V,
3251
+ Cs as W,
3252
+ Tn as X,
3253
+ vn as Y,
3254
+ qa as Z,
3255
3255
  os as _,
3256
- Ho as a,
3256
+ q as a,
3257
3257
  ea as a$,
3258
3258
  gi as a0,
3259
3259
  tt as a1,
@@ -3318,7 +3318,7 @@ export {
3318
3318
  $s as ax,
3319
3319
  xt as ay,
3320
3320
  he as az,
3321
- q as b,
3321
+ ni as b,
3322
3322
  Qe as b$,
3323
3323
  na as b0,
3324
3324
  la as b1,
@@ -3383,7 +3383,7 @@ export {
3383
3383
  Fr as bx,
3384
3384
  Mr as by,
3385
3385
  xr as bz,
3386
- Jo as c,
3386
+ Ho as c,
3387
3387
  en as c$,
3388
3388
  tn as c0,
3389
3389
  za as c1,
@@ -3448,7 +3448,7 @@ export {
3448
3448
  gs as cx,
3449
3449
  cr as cy,
3450
3450
  Tr as cz,
3451
- $ as d,
3451
+ M as d,
3452
3452
  to as d0,
3453
3453
  Za as d1,
3454
3454
  Bo as d2,
@@ -3491,26 +3491,26 @@ export {
3491
3491
  ja as dx,
3492
3492
  ko as dy,
3493
3493
  Io as dz,
3494
- ei as e,
3494
+ Jo as e,
3495
3495
  as as f,
3496
3496
  ai as g,
3497
- S as h,
3498
- nt as i,
3499
- V as j,
3497
+ ei as h,
3498
+ $ as i,
3499
+ S as j,
3500
3500
  Fn as k,
3501
- M as l,
3501
+ nt as l,
3502
3502
  b as m,
3503
- T as n,
3503
+ V as n,
3504
3504
  N as o,
3505
3505
  Qt as p,
3506
- Ct as q,
3506
+ T as q,
3507
3507
  ri as r,
3508
3508
  si as s,
3509
3509
  A as t,
3510
- Jn as u,
3511
- p as v,
3512
- An as w,
3513
- wr as x,
3514
- Kr as y,
3515
- K as z
3510
+ Ct as u,
3511
+ hs as v,
3512
+ Jn as w,
3513
+ p as x,
3514
+ An as y,
3515
+ wr as z
3516
3516
  };
@@ -1,20 +1,20 @@
1
- import { W as Mt } from "./backend_webgpu-DI2wXEC2.js";
2
- import { ae as Et, ab as X, v as L, dc as Ut, dd as Ht, bX as Gt, N as D, _ as j, bb as Xt, ai as Ye, aU as Kt, a7 as qt, am as fe, bP as Yt, c7 as jt, c8 as Qt, bV as Zt, cO as Jt, ar as es, ac as De, ah as te, aW as ts, bm as ss, bn as os, bo as is, c9 as as, ca as rs, cb as ns, cc as us, cd as ds, ce as ls, aM as cs, aP as hs, bp as ps, cy as fs, cP as ms, cQ as gs, B as xs, S as Cs, br as ws, bd as ys, de as Ss, aQ as bs, aq as vs, bS as ks, bT as Is, af as Rs, bY as Ps, C as $s, cS as Ds, ao as Ns, x as zs, bt as As, cD as Fs, bu as Ws, cz as Ls, cT as Vs, cA as Bs, bv as Ts, bw as _s, bf as Os, bx as Ms, by as Es, cB as Us, cf as Hs, bz as Gs, cF as Xs, cG as Ks, df as qs, cg as Ys, cU as js, cV as Qs, dg as Zs, c0 as Js, K as eo, be as to, aG as so, cW as oo, bA as io, bB as ao, aN as ro, y as no, bZ as uo, cp as lo, bg as co, F as ho, b_ as po, dh as fo, a9 as at, bs as mo, cE as go, di as xo, al as Co, G as wo, a5 as ke, a$ as yo, b0 as So, cq as bo, ch as vo, ci as ko, cj as Io, aH as Ro, b1 as Po, b2 as $o, dj as Do, aO as No, b3 as zo, b4 as Ao, bD as Fo, cl as Wo, ck as Lo, cY as Vo, b$ as Bo, bE as To, cC as _o, cZ as Oo, c_ as Mo, dk as Eo, aZ as Uo, b5 as Ho, cm as Go, M as Xo, D as Ko, dl as qo, ap as Yo, bi as jo, bj as Qo, bF as Zo, d3 as Jo, bG as ei, P as ti, a6 as si, bH as oi, c$ as ii, aI as ai, c1 as ri, X as ni, aX as ui, cn as di, A as li, aJ as ci, b9 as hi, d0 as pi, ba as fi, d1 as mi, bJ as gi, bh as xi, b6 as Ci, bK as wi, ak as yi, dm as Si, aL as bi, bL as vi, aF as ki, co as Ii, bM as Ri, bN as Pi, bC as $i, bI as Di, dn as Ni, dp as zi, T as Ai, av as rt, dq as Fi, L as Wi, H as Li, c3 as Vi, d2 as Bi, b7 as Ti, aK as _i, cr as Oi, dr as Mi, c5 as Ei, cs as Ui, bq as Hi, ds as Gi, ct as Xi, bl as Ki, b8 as qi, bO as Yi, a as ji } from "./index-DSGwv2Yx.js";
3
- import { i as Qi, a as Zi, c as b, f as v, M as Y, b as nt, d as ut, e as dt } from "./webgpu_util-DuofJBMo.js";
1
+ import { W as Mt } from "./backend_webgpu-BqASlsbV.js";
2
+ import { ae as Et, ab as X, x as L, dc as Ut, dd as Ht, bX as Gt, U as D, _ as j, bb as Xt, ai as Ye, aU as Kt, a7 as qt, am as fe, bP as Yt, c7 as jt, c8 as Qt, bV as Zt, cO as Jt, ar as es, ac as De, ah as te, aW as ts, bm as ss, bn as os, bo as is, c9 as as, ca as rs, cb as ns, cc as us, cd as ds, ce as ls, aM as cs, aP as hs, bp as ps, cy as fs, cP as ms, cQ as gs, D as xs, S as Cs, br as ws, bd as ys, de as Ss, aQ as bs, aq as vs, bS as ks, bT as Is, af as Rs, bY as Ps, C as $s, cS as Ds, ao as Ns, z as zs, bt as As, cD as Fs, bu as Ws, cz as Ls, cT as Vs, cA as Bs, bv as Ts, bw as _s, bf as Os, bx as Ms, by as Es, cB as Us, cf as Hs, bz as Gs, cF as Xs, cG as Ks, df as qs, cg as Ys, cU as js, cV as Qs, dg as Zs, c0 as Js, N as eo, be as to, aG as so, cW as oo, bA as io, bB as ao, aN as ro, A as no, bZ as uo, cp as lo, bg as co, F as ho, b_ as po, dh as fo, a9 as at, bs as mo, cE as go, di as xo, al as Co, G as wo, a5 as ke, a$ as yo, b0 as So, cq as bo, ch as vo, ci as ko, cj as Io, aH as Ro, b1 as Po, b2 as $o, dj as Do, aO as No, b3 as zo, b4 as Ao, bD as Fo, cl as Wo, ck as Lo, cY as Vo, b$ as Bo, bE as To, cC as _o, cZ as Oo, c_ as Mo, dk as Eo, aZ as Uo, b5 as Ho, cm as Go, M as Xo, I as Ko, dl as qo, ap as Yo, bi as jo, bj as Qo, bF as Zo, d3 as Jo, bG as ei, P as ti, a6 as si, bH as oi, c$ as ii, aI as ai, c1 as ri, Z as ni, aX as ui, cn as di, H as li, aJ as ci, b9 as hi, d0 as pi, ba as fi, d1 as mi, bJ as gi, bh as xi, b6 as Ci, bK as wi, ak as yi, dm as Si, aL as bi, bL as vi, aF as ki, co as Ii, bM as Ri, bN as Pi, bC as $i, bI as Di, dn as Ni, dp as zi, T as Ai, av as rt, dq as Fi, Q as Wi, J as Li, c3 as Vi, d2 as Bi, b7 as Ti, aK as _i, cr as Oi, dr as Mi, c5 as Ei, cs as Ui, bq as Hi, ds as Gi, ct as Xi, bl as Ki, b8 as qi, bO as Yi, c as ji } from "./index-CUXkjxiT.js";
3
+ import { i as Qi, a as Zi, c as b, f as v, M as Y, b as nt, d as ut, e as dt } from "./webgpu_util-DYlGSwOJ.js";
4
4
  import { g as _e, B as F } from "./binary_op_util-pKXltfxI.js";
5
- import { S as Ji, a as ea } from "./selu_util-BXdhy_W6.js";
6
- import { E as ta, t as sa, u as oa, w as ia, x as aa, y as ra, f as je, z as lt, A as ct, B as ht, C as na, D as ua, F as da, G as la, H as ca, I as ha, J as pa, K as fa, L as ma, M as ga } from "./backend_util-qwSFfxYx.js";
7
- import { t as W, e as S, h as Q, b as G, c as Ie, P as pt, d as xa, a as Ca } from "./webgpu_program-CbjdYLYk.js";
8
- import { r as R, a as wa } from "./Reshape-pwprEaej.js";
9
- import { s as ya } from "./shared-DRWDyk9w.js";
10
- import { c as Oe, a as Ce, b as we, d as Me, e as Sa, g as ft } from "./axis_util-QWWgLjut.js";
11
- import { h as ye, i as Ne, j as Se, b as Z, d as Ee, g as Ue, k as mt } from "./step-DQY6_ABw.js";
12
- import { p as ba, a as va, s as ka, b as Ia } from "./slice_util-DPY56GzQ.js";
13
- import { z as Ra } from "./zeros-Bw0puq_w.js";
14
- import { c as me, a as Pa } from "./concat_util-C1Mxe27t.js";
15
- import { c as $a, a as Da } from "./segment_util-4zuHV5IG.js";
5
+ import { S as Ji, a as ea } from "./selu_util-BLhIqRkw.js";
6
+ import { E as ta, t as sa, u as oa, w as ia, x as aa, y as ra, f as je, z as lt, A as ct, B as ht, C as na, D as ua, F as da, G as la, H as ca, I as ha, J as pa, K as fa, L as ma, M as ga } from "./backend_util-GaFarB78.js";
7
+ import { t as W, e as S, h as Q, b as G, c as Ie, P as pt, d as xa, a as Ca } from "./webgpu_program-B4HmApL1.js";
8
+ import { r as R, a as wa } from "./Reshape-CPBkTIH2.js";
9
+ import { s as ya } from "./shared-CagdqkLh.js";
10
+ import { c as Oe, a as Ce, b as we, d as Me, e as Sa, g as ft } from "./axis_util-GTVlo58H.js";
11
+ import { h as ye, i as Ne, j as Se, b as Z, d as Ee, g as Ue, k as mt } from "./step-BTxPtq1r.js";
12
+ import { p as ba, a as va, s as ka, b as Ia } from "./slice_util-CC35pLmT.js";
13
+ import { z as Ra } from "./zeros-DvZpK8s6.js";
14
+ import { c as me, a as Pa } from "./concat_util-D18dJ4fD.js";
15
+ import { c as $a, a as Da } from "./segment_util-Dasb2Zaf.js";
16
16
  import { n as Na, a as za } from "./non_max_suppression_impl-B2W7YjZB.js";
17
- import { c as He } from "./scatter_nd_util-C-x73Cj6.js";
17
+ import { c as He } from "./scatter_nd_util-ByNJaL6I.js";
18
18
  Qi() && Et(
19
19
  "webgpu",
20
20
  async () => {
@@ -1,6 +1,6 @@
1
- import { _ as B, N as G, aU as K, a7 as W, aH as z, aV as V, ab as N, aI as F, am as S } from "./index-DSGwv2Yx.js";
2
- import { u as O, f as H } from "./gpgpu_math-3bCb5ooU.js";
3
- import { f as v } from "./backend_util-qwSFfxYx.js";
1
+ import { _ as B, U as G, aU as K, a7 as W, aH as z, aV as V, ab as N, aI as F, am as S } from "./index-CUXkjxiT.js";
2
+ import { u as O, f as H } from "./gpgpu_math-BwvV12df.js";
3
+ import { f as v } from "./backend_util-GaFarB78.js";
4
4
  function Y(t, e) {
5
5
  return ["x", "y", "z", "w", "u", "v"].slice(0, e).map((s) => `${t}.${s}`);
6
6
  }
@@ -200,12 +200,12 @@ const be = {
200
200
  backendName: "webgl",
201
201
  kernelFunc: oe
202
202
  };
203
- const k = "return (a < 0.) ? b * a : a;", U = `
203
+ const U = "return (a < 0.) ? b * a : a;", k = `
204
204
  vec4 aLessThanZero = vec4(lessThan(a, vec4(0.)));
205
205
  return (aLessThanZero * (b * a)) + ((vec4(1.0) - aLessThanZero) * a);
206
206
  `;
207
207
  function ue(t) {
208
- const { inputs: e, backend: s } = t, { x: r, alpha: u } = e, n = N().getBool("WEBGL_PACK_BINARY_OPERATIONS") ? new E(U, r.shape, u.shape) : new b(k, r.shape, u.shape);
208
+ const { inputs: e, backend: s } = t, { x: r, alpha: u } = e, n = N().getBool("WEBGL_PACK_BINARY_OPERATIONS") ? new E(k, r.shape, u.shape) : new b(U, r.shape, u.shape);
209
209
  return s.runWebGLProgram(n, [r, u], "float32");
210
210
  }
211
211
  const Ne = {
@@ -273,7 +273,7 @@ function Ae(t, e = !1) {
273
273
  if (t === "relu6")
274
274
  return e ? ae : Q;
275
275
  if (t === "prelu")
276
- return e ? U : k;
276
+ return e ? k : U;
277
277
  if (t === "leakyrelu")
278
278
  return e ? R : w;
279
279
  if (t === "sigmoid")
@@ -1,4 +1,4 @@
1
- import { a2 as p, e as s, a4 as g } from "../index-DSGwv2Yx.js";
1
+ import { a2 as p, h as s, a4 as g } from "../index-CUXkjxiT.js";
2
2
  import b from "./WeightStore.js";
3
3
  class T {
4
4
  parent;
@@ -59,11 +59,11 @@ class T {
59
59
  checkpointingFn(t, ...e) {
60
60
  const r = this.trainableVariables;
61
61
  return p((...i) => {
62
- const o = i[i.length - 1], a = i.slice(0, e.length), n = this.forward(t, ...a);
63
- return o(a), { value: n, gradFunc: (h, l) => {
62
+ const o = i[i.length - 1], a = i.slice(0, e.length), h = this.forward(t, ...a);
63
+ return o(a), { value: h, gradFunc: (n, l) => {
64
64
  const c = s().state.activeTape;
65
65
  s().state.activeTape = [];
66
- const d = g((...u) => this.forward(t, ...u.slice(0, a.length)))([...l, ...r], h);
66
+ const d = g((...u) => this.forward(t, ...u.slice(0, a.length)))([...l, ...r], n);
67
67
  return s().state.activeTape = c, d;
68
68
  } };
69
69
  })(...e, ...r);
@@ -15,6 +15,7 @@ interface AttentionForwardAttributes extends ForwardAttributes {
15
15
  attentionScores?: AttentionScores;
16
16
  pastKV?: KVCache;
17
17
  seed?: number;
18
+ ropePositionOffset?: number;
18
19
  }
19
20
  export interface CausalSelfAttentionConfig {
20
21
  useQKNorm?: boolean;
@@ -1,20 +1,20 @@
1
1
  import { attentionMask as R } from "../ops/attentionMask.js";
2
2
  import J from "./BaseLayer.js";
3
- import { r as v } from "../rope-CC5RjmKU.js";
3
+ import { r as v } from "../rope-DcrZM_e6.js";
4
4
  import { appendCache as A } from "../ops/appendCache.js";
5
- import { k as c, t as L } from "../index-DSGwv2Yx.js";
5
+ import { k as c, t as L } from "../index-CUXkjxiT.js";
6
6
  import { softmax16 as y } from "../ops/softmax16.js";
7
- import { b as M } from "../matMul16-BIT70Vya.js";
8
- import { p as K } from "../pack16-WlOSOuZA.js";
7
+ import { b as M } from "../matMul16-BcVC_E62.js";
8
+ import { p as K } from "../pack16-bqltoUlR.js";
9
9
  import { transpose16 as j } from "../ops/transpose16.js";
10
10
  import { dot16 as E } from "../ops/dot16.js";
11
11
  import { reshape16 as _ } from "../ops/reshape16.js";
12
- import { isPackedTensor as l } from "../utilities/packed.js";
12
+ import { isPackedTensor as f } from "../utilities/packed.js";
13
13
  import { qkv as q } from "../ops/qkv.js";
14
14
  import { normRMS as O } from "../ops/normRMS.js";
15
15
  import { dropout16 as x } from "../ops/dropout16.js";
16
- import { v as P } from "../variable-SSATClyt.js";
17
- import { r as S } from "../random_normal-CIm8lk2-.js";
16
+ import { v as P } from "../variable-Ck482e3n.js";
17
+ import { r as S } from "../random_normal-IBRrha8a.js";
18
18
  class it extends J {
19
19
  constructor(t, o, s, i) {
20
20
  super(o, i), this.attentionConfig = s, this.index = t, this.units = o.nEmbed * 3, this.projUnits = o.nEmbed, this.ATTN = `block_${this.index}_cAttn`, this.PROJ = `block_${this.index}_cProj`, this.addVariable(this.ATTN), this.addVariable(this.PROJ), this.divisor = 1 / Math.sqrt(o.nEmbed / o.nHead);
@@ -39,11 +39,11 @@ class it extends J {
39
39
  return i.dispose(), e;
40
40
  }
41
41
  getQKV(t) {
42
- const o = l(t) ? K(this.getVariable(this.ATTN)) : this.getVariable(this.ATTN), s = q(t, o, this.config.nHead);
43
- return l(t) && o.dispose(), s;
42
+ const o = f(t) ? K(this.getVariable(this.ATTN)) : this.getVariable(this.ATTN), s = q(t, o, this.config.nHead);
43
+ return f(t) && o.dispose(), s;
44
44
  }
45
45
  getOutputProjection(t) {
46
- const o = t.shape[0], s = t.shape[2], i = this.config.nEmbed, e = l(t), r = j(t, [0, 2, 1, 3]), n = _(r, [o, s, e ? i / 2 : i]);
46
+ const o = t.shape[0], s = t.shape[2], i = this.config.nEmbed, e = f(t), r = j(t, [0, 2, 1, 3]), n = _(r, [o, s, e ? i / 2 : i]);
47
47
  r.dispose();
48
48
  const p = e ? K(this.getVariable(this.PROJ)) : this.getVariable(this.PROJ), a = E(n, p);
49
49
  return e && p.dispose(), n.dispose(), a;
@@ -59,13 +59,13 @@ class it extends J {
59
59
  forward(t, o) {
60
60
  return L(() => {
61
61
  this.startMemory();
62
- const [s, i, e] = this.getQKV(o), r = t.pastKV ? t.pastKV.cumulativeLength : 0, n = t.ropeCache, p = n ? v(s, n, r) : s, a = n ? v(i, n, r) : i, h = this.attentionConfig.useQKNorm ?? !1, m = h ? O(p) : p;
62
+ const [s, i, e] = this.getQKV(o), r = t.pastKV ? t.pastKV.cumulativeLength : t.ropePositionOffset || 0, n = t.ropeCache, p = n ? v(s, n, r) : s, a = n ? v(i, n, r) : i, h = this.attentionConfig.useQKNorm ?? !1, m = h ? O(p) : p;
63
63
  h && p.dispose();
64
- const f = h ? O(a) : a;
64
+ const l = h ? O(a) : a;
65
65
  h && a.dispose(), n && (s.dispose(), i.dispose());
66
66
  const T = t.pastKV ? t.pastKV.length : 0;
67
- t.pastKV && !t.training && this.updateCache(f, e, t.pastKV);
68
- const u = t.pastKV?.k ? t.pastKV.k : f, V = t.pastKV?.v ? t.pastKV.v : e;
67
+ t.pastKV && !t.training && this.updateCache(l, e, t.pastKV);
68
+ const u = t.pastKV?.k ? t.pastKV.k : l, V = t.pastKV?.v ? t.pastKV.v : e;
69
69
  let d;
70
70
  T > 0 ? d = this.getAttentionScores(m, u, T) : d = this.getAttentionScores(m, u), m.dispose(), t.pastKV || u.dispose();
71
71
  const g = M(d, V), b = t.attentionScores !== void 0 && t.attentionScores.attentionOut !== void 0;
@@ -1,8 +1,8 @@
1
- import { b as m, t as n } from "../index-DSGwv2Yx.js";
1
+ import { a as m, t as n } from "../index-CUXkjxiT.js";
2
2
  import { p } from "../index-DmeWGGmS.js";
3
- import { v as g } from "../variable-SSATClyt.js";
4
- import { r as S } from "../random_normal-CIm8lk2-.js";
5
- import { z as _ } from "../zeros-Bw0puq_w.js";
3
+ import { v as g } from "../variable-Ck482e3n.js";
4
+ import { r as S } from "../random_normal-IBRrha8a.js";
5
+ import { z as _ } from "../zeros-DvZpK8s6.js";
6
6
  class B {
7
7
  weightStore;
8
8
  alpha;
@@ -1,10 +1,10 @@
1
- import { t as M } from "../index-DSGwv2Yx.js";
1
+ import { t as M } from "../index-CUXkjxiT.js";
2
2
  import f from "./BaseLayer.js";
3
- import { b as h } from "../matMul16-BIT70Vya.js";
3
+ import { b as h } from "../matMul16-BcVC_E62.js";
4
4
  import { reshape16 as d } from "../ops/reshape16.js";
5
5
  import { dropout16 as L } from "../ops/dropout16.js";
6
- import { v as n } from "../variable-SSATClyt.js";
7
- import { r as m } from "../random_normal-CIm8lk2-.js";
6
+ import { v as n } from "../variable-Ck482e3n.js";
7
+ import { r as m } from "../random_normal-IBRrha8a.js";
8
8
  class N extends f {
9
9
  index;
10
10
  hiddenUnits;
@@ -1,9 +1,9 @@
1
- import { t as c, an as p, b as s } from "../index-DSGwv2Yx.js";
1
+ import { t as c, an as p, a as s } from "../index-CUXkjxiT.js";
2
2
  import f from "./BaseLayer.js";
3
- import { E as u } from "../random_width-B_fVXhGx.js";
4
- import { r as b } from "../exports_initializers-DIOZQt_L.js";
5
- import { m as g } from "../mod-BXjLYwvM.js";
6
- import { r as l } from "../range-BDxO73mk.js";
3
+ import { E as u } from "../random_width-DN5ZtQkM.js";
4
+ import { r as b } from "../exports_initializers-CYzKLjN7.js";
5
+ import { m as g } from "../mod-CSdCpRjf.js";
6
+ import { r as l } from "../range-C-CjF-LI.js";
7
7
  function h(e) {
8
8
  return new u(e);
9
9
  }
@@ -1,8 +1,8 @@
1
- import { t as i } from "../index-DSGwv2Yx.js";
1
+ import { t as i } from "../index-CUXkjxiT.js";
2
2
  import e from "./BaseLayer.js";
3
3
  import { normRMS as m } from "../ops/normRMS.js";
4
- import { v as a } from "../variable-SSATClyt.js";
5
- import { o as M } from "../ones-DbVB5N58.js";
4
+ import { v as a } from "../variable-Ck482e3n.js";
5
+ import { o as M } from "../ones-BytntneX.js";
6
6
  class l extends e {
7
7
  GAMMA;
8
8
  rmsConfig;
@@ -1,7 +1,7 @@
1
- import { d as a, b as n, p as c, t as p, k as r } from "../index-DSGwv2Yx.js";
2
- import { r as l } from "../zeros_like-rOHr54NY.js";
3
- import { c as f, s as m } from "../unsorted_segment_sum-BfFVV9Zm.js";
4
- import { r as h } from "../range-BDxO73mk.js";
1
+ import { i as a, a as n, p as c, t as p, k as r } from "../index-CUXkjxiT.js";
2
+ import { r as l } from "../zeros_like-CWjDdwr-.js";
3
+ import { c as f, s as m } from "../unsorted_segment_sum-DJvk5xnh.js";
4
+ import { r as h } from "../range-C-CjF-LI.js";
5
5
  class x {
6
6
  constructor(s) {
7
7
  this.config = s;
@@ -1,13 +1,13 @@
1
- import "../random_width-B_fVXhGx.js";
2
- import "../index-DSGwv2Yx.js";
3
- import { r as s } from "../exports_initializers-DIOZQt_L.js";
1
+ import "../random_width-DN5ZtQkM.js";
2
+ import "../index-CUXkjxiT.js";
3
+ import { r as s } from "../exports_initializers-CYzKLjN7.js";
4
4
  import a from "./BaseLayer.js";
5
5
  import { dot16 as o } from "../ops/dot16.js";
6
6
  import { isPackedTensor as r } from "../utilities/packed.js";
7
- import { p as m } from "../pack16-WlOSOuZA.js";
7
+ import { p as m } from "../pack16-bqltoUlR.js";
8
8
  import { transpose16 as d } from "../ops/transpose16.js";
9
- import { v as p } from "../variable-SSATClyt.js";
10
- import { g as h } from "../gather-DEyjXNb1.js";
9
+ import { v as p } from "../variable-Ck482e3n.js";
10
+ import { g as h } from "../gather-qIqEqaGn.js";
11
11
  class g extends a {
12
12
  vocabSize;
13
13
  embedDim;
@@ -8,6 +8,7 @@ interface BlockAttributes extends ForwardAttributes {
8
8
  pastKV?: KVCache;
9
9
  seed?: number;
10
10
  attentionScores?: AttentionScores;
11
+ ropePositionOffset?: number;
11
12
  }
12
13
  export type TransformerBlockConfig = MLPConfig & RMSNormConfig & CausalSelfAttentionConfig;
13
14
  export default class Block extends BaseLayer<BlockAttributes> {
@@ -2,7 +2,7 @@ import p from "./CausalSelfAttention.js";
2
2
  import h from "./MLP.js";
3
3
  import l from "./RMSNorm.js";
4
4
  import m from "./BaseLayer.js";
5
- import { k as n, t as u } from "../index-DSGwv2Yx.js";
5
+ import { k as n, t as u } from "../index-CUXkjxiT.js";
6
6
  import { add16 as d } from "../ops/add16.js";
7
7
  class _ extends m {
8
8
  ln1;
@@ -1,6 +1,6 @@
1
- import { u as o } from "../index-DSGwv2Yx.js";
1
+ import { w as o } from "../index-CUXkjxiT.js";
2
2
  import { p as h } from "../index-DmeWGGmS.js";
3
- import { v as b } from "../variable-SSATClyt.js";
3
+ import { v as b } from "../variable-Ck482e3n.js";
4
4
  class d {
5
5
  _variables = /* @__PURE__ */ new Map();
6
6
  touchedVariables = /* @__PURE__ */ new Set();
@@ -1,7 +1,7 @@
1
1
  import p from "../tokeniser/CharTokeniser.js";
2
2
  import _ from "../tokeniser/bpe.js";
3
3
  import { load_safetensors as b } from "../utilities/safetensors.js";
4
- import { aa as c } from "../index-DSGwv2Yx.js";
4
+ import { aa as c } from "../index-CUXkjxiT.js";
5
5
  import { dummyPassAsync as u } from "../utilities/dummy.js";
6
6
  import y from "../models/factory.js";
7
7
  function h(e) {
@@ -1,22 +1,22 @@
1
- import { aa as y } from "../index-DSGwv2Yx.js";
2
- import "../random_width-B_fVXhGx.js";
3
- import "../zeros_like-rOHr54NY.js";
1
+ import { aa as y } from "../index-CUXkjxiT.js";
2
+ import "../random_width-DN5ZtQkM.js";
3
+ import "../zeros_like-CWjDdwr-.js";
4
4
  import "../Generator.js";
5
5
  import "../index-Cp39cXWe.js";
6
- import "../dataset-DlqAN81i.js";
6
+ import "../dataset-CGGp1z9P.js";
7
7
  import "../ops/cpu/attentionMask.js";
8
8
  import "../ops/webgl/attentionMask.js";
9
9
  import "../ops/grads/attentionMask.js";
10
10
  import "../ops/cpu/rope.js";
11
11
  import "../ops/webgl/rope.js";
12
- import "../rope-CC5RjmKU.js";
12
+ import "../rope-DcrZM_e6.js";
13
13
  import "../ops/cpu/appendCache.js";
14
14
  import "../ops/webgl/appendCache.js";
15
15
  import "../ops/grads/softmax16.js";
16
- import "../matMul16-BIT70Vya.js";
16
+ import "../matMul16-BcVC_E62.js";
17
17
  import "../ops/webgl/matMul16.js";
18
18
  import "../ops/cpu/matMul16.js";
19
- import "../pack16-WlOSOuZA.js";
19
+ import "../pack16-bqltoUlR.js";
20
20
  import "../ops/transpose16.js";
21
21
  import "../ops/reshape16.js";
22
22
  import "../ops/cpu/qkv.js";
@@ -44,11 +44,11 @@ import "../ops/cpu/scatterSub.js";
44
44
  import "../ops/webgl/scatterSub.js";
45
45
  import "../papaparse.min-C0cScC2i.js";
46
46
  import "../ops/cpu/matMulGelu.js";
47
- import "../matMulGelu-CsZnh18H.js";
47
+ import "../matMulGelu-JNLZqKQp.js";
48
48
  import "../ops/grads/matMulGelu.js";
49
49
  import "../ops/cpu/gelu.js";
50
50
  import "../ops/webgl/gelu.js";
51
- import "../gelu-DpTCC3eB.js";
51
+ import "../gelu-B220X1Go.js";
52
52
  import "../ops/webgl/log.js";
53
53
  import "../checks/normRMS.js";
54
54
  import "../checks/normRMSGrad.js";
@@ -1,31 +1,38 @@
1
1
  import { z as y } from "../jszip.min-BZhlzntC.js";
2
- import b from "../tokeniser/CharTokeniser.js";
3
- import { save_safetensors as _ } from "../utilities/safetensors.js";
4
- import { VERSION as m } from "./load.js";
5
- async function d(i, a, o, t) {
6
- const g = /* @__PURE__ */ new Map();
7
- i.weightStore.saveWeights(g);
8
- const n = new y();
9
- if (t?.optimizer) {
10
- const e = await t.optimizer.saveMoments();
11
- n.file("optimizer.safetensors", e, { binary: !0 }), n.file("optimizer_config.json", JSON.stringify(t.optimizer.serializeConfig()), {
2
+ import _ from "../tokeniser/CharTokeniser.js";
3
+ import { save_safetensors as b } from "../utilities/safetensors.js";
4
+ import { VERSION as h } from "./load.js";
5
+ function m(i) {
6
+ if (i.length > 1e3) {
7
+ const n = Math.ceil(i.length / 1e3);
8
+ return i.filter((o, a) => a % n === 0 || a === i.length - 1);
9
+ }
10
+ return i;
11
+ }
12
+ async function u(i, n, o, a) {
13
+ const s = /* @__PURE__ */ new Map();
14
+ i.weightStore.saveWeights(s);
15
+ const e = new y();
16
+ if (a?.optimizer) {
17
+ const t = await a.optimizer.saveMoments();
18
+ e.file("optimizer.safetensors", t, { binary: !0 }), e.file("optimizer_config.json", JSON.stringify(a.optimizer.serializeConfig()), {
12
19
  binary: !1
13
20
  });
14
21
  }
15
- t?.trainingLog && n.file("training_log.json", JSON.stringify(t.trainingLog, void 0, 4), {
22
+ a?.trainingLog && e.file("training_log.json", JSON.stringify(m(a.trainingLog), void 0, 4), {
16
23
  binary: !1
17
24
  });
18
- const s = {};
19
- g.forEach((e, f) => {
20
- e.length === 1 && (s[f] = e[0]);
25
+ const g = {};
26
+ s.forEach((t, f) => {
27
+ t.length === 1 && (g[f] = t[0]);
21
28
  });
22
- const c = await _(s);
23
- n.file("model.safetensors", c, { binary: !0 });
29
+ const c = await b(g);
30
+ e.file("model.safetensors", c, { binary: !0 });
24
31
  const p = i.config.modelType;
25
32
  let r;
26
33
  p === "GenAI_NanoGPT_v1" ? r = {
27
34
  model_type: "GenAI_NanoGPT_v1",
28
- vocab_size: a.getVocab().length,
35
+ vocab_size: n.getVocab().length,
29
36
  hidden_size: i.config.nEmbed,
30
37
  num_hidden_layers: i.config.nLayer,
31
38
  num_attention_heads: i.config.nHead,
@@ -34,7 +41,7 @@ async function d(i, a, o, t) {
34
41
  useRope: i.config.useRope
35
42
  } : r = {
36
43
  model_type: "GenAI_NanoGPT_v2",
37
- vocab_size: a.getVocab().length,
44
+ vocab_size: n.getVocab().length,
38
45
  hidden_size: i.config.nEmbed,
39
46
  num_hidden_layers: i.config.nLayer,
40
47
  num_attention_heads: i.config.nHead,
@@ -43,11 +50,11 @@ async function d(i, a, o, t) {
43
50
  loraConfig: i.config.loraConfig ? Object.fromEntries(i.config.loraConfig) : void 0,
44
51
  loraName: i.config.loraName,
45
52
  windowSize: i.config.windowSize
46
- }, n.file("config.json", JSON.stringify(r, void 0, 4), {
53
+ }, e.file("config.json", JSON.stringify(r, void 0, 4), {
47
54
  binary: !1
48
55
  });
49
56
  const l = {
50
- version: m,
57
+ version: h,
51
58
  application: "@genai-fi/nanogpt",
52
59
  meta: o?.metadata,
53
60
  name: o?.name,
@@ -59,25 +66,25 @@ async function d(i, a, o, t) {
59
66
  generationSettings: i.metaData?.generationSettings || void 0,
60
67
  actionLog: i.metaData?.actionLog || void 0
61
68
  };
62
- if (n.file("meta.json", JSON.stringify(l, void 0, 4), {
69
+ if (e.file("meta.json", JSON.stringify(l, void 0, 4), {
63
70
  binary: !1
64
- }), n.file(
71
+ }), e.file(
65
72
  "tokeniser.json",
66
73
  JSON.stringify({
67
- type: a instanceof b ? "char" : "bpe",
68
- vocab: a.getVocab(),
69
- merges: a.getMerges(),
70
- datasetID: a.datasetID,
71
- id: a.id
74
+ type: n instanceof _ ? "char" : "bpe",
75
+ vocab: n.getVocab(),
76
+ merges: n.getMerges(),
77
+ datasetID: n.datasetID,
78
+ id: n.id
72
79
  }),
73
80
  {
74
81
  binary: !1
75
82
  }
76
83
  ), o?.files)
77
- for (const [e, f] of Object.entries(o.files))
78
- n.file(e, JSON.stringify(f), { binary: !1 });
79
- return n.generateAsync({ type: "blob" });
84
+ for (const [t, f] of Object.entries(o.files))
85
+ e.file(t, JSON.stringify(f), { binary: !1 });
86
+ return e.generateAsync({ type: "blob" });
80
87
  }
81
88
  export {
82
- d as saveModel
89
+ u as saveModel
83
90
  };
package/dist/main.js CHANGED
@@ -1,9 +1,9 @@
1
- import "./index-DSGwv2Yx.js";
2
- import "./random_width-B_fVXhGx.js";
3
- import "./zeros_like-rOHr54NY.js";
1
+ import "./index-CUXkjxiT.js";
2
+ import "./random_width-DN5ZtQkM.js";
3
+ import "./zeros_like-CWjDdwr-.js";
4
4
  import { default as io } from "./Generator.js";
5
5
  import "./index-Cp39cXWe.js";
6
- import "./dataset-DlqAN81i.js";
6
+ import "./dataset-CGGp1z9P.js";
7
7
  import { default as fo } from "./models/NanoGPTV1.js";
8
8
  import { default as lo } from "./TeachableLLM.js";
9
9
  import { default as uo } from "./tokeniser/CharTokeniser.js";
@@ -32,15 +32,15 @@ import "./ops/webgl/qkv.js";
32
32
  import "./ops/grads/qkv.js";
33
33
  import "./ops/cpu/rope.js";
34
34
  import "./ops/webgl/rope.js";
35
- import "./rope-CC5RjmKU.js";
35
+ import "./rope-DcrZM_e6.js";
36
36
  import "./ops/cpu/appendCache.js";
37
37
  import "./ops/webgl/appendCache.js";
38
38
  import "./ops/cpu/matMulGelu.js";
39
- import "./matMulGelu-CsZnh18H.js";
39
+ import "./matMulGelu-JNLZqKQp.js";
40
40
  import "./ops/grads/matMulGelu.js";
41
41
  import "./ops/cpu/gelu.js";
42
42
  import "./ops/webgl/gelu.js";
43
- import "./gelu-DpTCC3eB.js";
43
+ import "./gelu-B220X1Go.js";
44
44
  import "./ops/cpu/normRMS.js";
45
45
  import "./ops/webgl/normRMS.js";
46
46
  import "./ops/grads/normRMS.js";
@@ -49,9 +49,9 @@ import "./ops/cpu/adamMoments.js";
49
49
  import "./ops/webgl/adamMoments.js";
50
50
  import "./ops/cpu/adamAdjust.js";
51
51
  import "./ops/webgl/adamAdjust.js";
52
- import { u as e, p as m } from "./pack16-WlOSOuZA.js";
52
+ import { u as e, p as m } from "./pack16-bqltoUlR.js";
53
53
  import "./ops/grads/softmax16.js";
54
- import "./matMul16-BIT70Vya.js";
54
+ import "./matMul16-BcVC_E62.js";
55
55
  import "./ops/webgl/matMul16.js";
56
56
  import "./ops/cpu/matMul16.js";
57
57
  import "./ops/transpose16.js";
@@ -1,9 +1,9 @@
1
- import { c as y, e as h } from "./index-DSGwv2Yx.js";
1
+ import { e as y, h } from "./index-CUXkjxiT.js";
2
2
  import "./ops/webgl/matMul16.js";
3
3
  import "./ops/cpu/matMul16.js";
4
4
  import { isPackedTensor as v } from "./utilities/packed.js";
5
- import { p as g } from "./pack16-WlOSOuZA.js";
6
- import { d as k } from "./gelu-DpTCC3eB.js";
5
+ import { p as g } from "./pack16-bqltoUlR.js";
6
+ import { d as k } from "./gelu-B220X1Go.js";
7
7
  import { transpose16 as S } from "./ops/transpose16.js";
8
8
  import { reshape16 as w } from "./ops/reshape16.js";
9
9
  import { mul16 as D } from "./ops/mul16.js";