@genai-fi/nanogpt 0.20.0 → 0.20.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (433) hide show
  1. package/dist/BaseTokeniser-DSg9zcYq.js +221 -0
  2. package/dist/DatasetBuilder-DgURD85T.js +712 -0
  3. package/dist/Generator.d.ts +82 -0
  4. package/dist/Generator.js +2 -0
  5. package/dist/RealDiv-DBu0FQqT.js +362 -0
  6. package/dist/Reshape-CABOPB9d.js +94 -0
  7. package/dist/Reshape-DqO3r8BC.js +17 -0
  8. package/dist/TeachableLLM.d.ts +70 -0
  9. package/dist/TeachableLLM.js +2 -0
  10. package/dist/Trainer.d.ts +43 -0
  11. package/dist/Trainer.js +2 -0
  12. package/dist/backend.d.ts +2 -0
  13. package/dist/backend.js +13 -0
  14. package/dist/backend_util-Cg-roD1p.js +399 -0
  15. package/dist/binary_op_util-CrYk9LXL.js +103 -0
  16. package/dist/checks/appendCache.d.ts +1 -0
  17. package/dist/checks/appendCache.js +55 -0
  18. package/dist/checks/attentionMask.d.ts +1 -0
  19. package/dist/checks/attentionMask.js +56 -0
  20. package/dist/checks/check.d.ts +9 -0
  21. package/dist/checks/check.js +32 -0
  22. package/dist/checks/gelu.d.ts +1 -0
  23. package/dist/checks/gelu.js +46 -0
  24. package/dist/checks/index.d.ts +26 -0
  25. package/dist/checks/index.js +28 -0
  26. package/dist/checks/matMulGelu.d.ts +1 -0
  27. package/dist/checks/matMulGelu.js +84 -0
  28. package/dist/checks/normRMS.d.ts +1 -0
  29. package/dist/checks/normRMS.js +28 -0
  30. package/dist/checks/normRMSGrad.d.ts +1 -0
  31. package/dist/checks/normRMSGrad.js +22 -0
  32. package/dist/checks/packUnpack.d.ts +1 -0
  33. package/dist/checks/packUnpack.js +46 -0
  34. package/dist/checks/qkv.d.ts +1 -0
  35. package/dist/checks/qkv.js +34 -0
  36. package/dist/checks/rope.d.ts +1 -0
  37. package/dist/checks/rope.js +30 -0
  38. package/dist/checks/weights.d.ts +14 -0
  39. package/dist/checks/weights.js +27 -0
  40. package/dist/chunk-BPntVaq0.js +23 -0
  41. package/dist/complex_util-CkazZsaH.js +60 -0
  42. package/dist/concat_util-CWDZCBlA.js +19 -0
  43. package/dist/data/docx.d.ts +2 -0
  44. package/dist/data/docx.js +3046 -0
  45. package/dist/data/pdf.d.ts +2 -0
  46. package/dist/data/pdf.js +17 -0
  47. package/dist/data/textLoader.d.ts +7 -0
  48. package/dist/data/textLoader.js +613 -0
  49. package/dist/dist-BewPQWjc.js +7572 -0
  50. package/dist/dist-DVmq73nz.js +8775 -0
  51. package/dist/dist-DXwIvKxl.js +896 -0
  52. package/dist/dist-VEU5mfO0.js +7545 -0
  53. package/dist/gelu-Bf1HW1RY.js +27 -0
  54. package/dist/gpgpu_math-DvLcCH6u.js +1612 -0
  55. package/dist/inference/types.d.ts +16 -0
  56. package/dist/inference/types.js +0 -0
  57. package/dist/kernel_funcs_utils-HiXOOx3f.js +229 -0
  58. package/dist/layers/BaseLayer.d.ts +44 -0
  59. package/dist/layers/BaseLayer.js +76 -0
  60. package/dist/layers/CausalSelfAttention.d.ts +39 -0
  61. package/dist/layers/CausalSelfAttention.js +99 -0
  62. package/dist/layers/LoRA.d.ts +14 -0
  63. package/dist/layers/LoRA.js +48 -0
  64. package/dist/layers/MLP.d.ts +17 -0
  65. package/dist/layers/MLP.js +34 -0
  66. package/dist/layers/PositionEmbedding.d.ts +8 -0
  67. package/dist/layers/PositionEmbedding.js +27 -0
  68. package/dist/layers/RMSNorm.d.ts +12 -0
  69. package/dist/layers/RMSNorm.js +20 -0
  70. package/dist/layers/RoPECache.d.ts +18 -0
  71. package/dist/layers/RoPECache.js +337 -0
  72. package/dist/layers/TiedEmbedding.d.ts +13 -0
  73. package/dist/layers/TiedEmbedding.js +32 -0
  74. package/dist/layers/TransformerBlock.d.ts +27 -0
  75. package/dist/layers/TransformerBlock.js +51 -0
  76. package/dist/layers/WeightStore.d.ts +20 -0
  77. package/dist/layers/WeightStore.js +69 -0
  78. package/dist/loader/load.d.ts +6 -0
  79. package/dist/loader/load.js +2 -0
  80. package/dist/loader/loadHF.d.ts +8 -0
  81. package/dist/loader/loadHF.js +2 -0
  82. package/dist/loader/loadTransformers.d.ts +4 -0
  83. package/dist/loader/loadTransformers.js +2 -0
  84. package/dist/loader/loadZipMeta.d.ts +3 -0
  85. package/dist/loader/loadZipMeta.js +16 -0
  86. package/dist/loader/newZipLoad.d.ts +3 -0
  87. package/dist/loader/newZipLoad.js +2 -0
  88. package/dist/loader/oldZipLoad.d.ts +9 -0
  89. package/dist/loader/oldZipLoad.js +2 -0
  90. package/dist/loader/save.d.ts +16 -0
  91. package/dist/loader/save.js +2 -0
  92. package/dist/loader/types.d.ts +68 -0
  93. package/dist/loader/types.js +0 -0
  94. package/dist/main-D5CbfCiV.js +13500 -0
  95. package/dist/main.d.ts +50 -0
  96. package/dist/main.js +16 -0
  97. package/dist/matMul16-BNfZSnNM.js +81 -0
  98. package/dist/matMulGelu-CPTntosE.js +162 -0
  99. package/dist/models/NanoGPTV1.d.ts +16 -0
  100. package/dist/models/NanoGPTV1.js +2 -0
  101. package/dist/models/NanoGPTV2.d.ts +16 -0
  102. package/dist/models/NanoGPTV2.js +2 -0
  103. package/dist/models/config.d.ts +27 -0
  104. package/dist/models/config.js +37 -0
  105. package/dist/models/factory.d.ts +3 -0
  106. package/dist/models/factory.js +2 -0
  107. package/dist/models/model.d.ts +44 -0
  108. package/dist/models/model.js +2 -0
  109. package/dist/ops/adamAdjust.d.ts +2 -0
  110. package/dist/ops/adamAdjust.js +18 -0
  111. package/dist/ops/adamMoments.d.ts +2 -0
  112. package/dist/ops/adamMoments.js +16 -0
  113. package/dist/ops/add16.d.ts +2 -0
  114. package/dist/ops/add16.js +12 -0
  115. package/dist/ops/appendCache.d.ts +2 -0
  116. package/dist/ops/appendCache.js +25 -0
  117. package/dist/ops/attentionMask.d.ts +2 -0
  118. package/dist/ops/attentionMask.js +16 -0
  119. package/dist/ops/concat16.d.ts +2 -0
  120. package/dist/ops/concat16.js +8 -0
  121. package/dist/ops/cpu/adamAdjust.d.ts +1 -0
  122. package/dist/ops/cpu/adamAdjust.js +16 -0
  123. package/dist/ops/cpu/adamMoments.d.ts +1 -0
  124. package/dist/ops/cpu/adamMoments.js +16 -0
  125. package/dist/ops/cpu/appendCache.d.ts +1 -0
  126. package/dist/ops/cpu/appendCache.js +65 -0
  127. package/dist/ops/cpu/attentionMask.d.ts +1 -0
  128. package/dist/ops/cpu/attentionMask.js +16 -0
  129. package/dist/ops/cpu/fusedSoftmax.d.ts +9 -0
  130. package/dist/ops/cpu/fusedSoftmax.js +22 -0
  131. package/dist/ops/cpu/gatherSub.d.ts +1 -0
  132. package/dist/ops/cpu/gatherSub.js +12 -0
  133. package/dist/ops/cpu/gelu.d.ts +1 -0
  134. package/dist/ops/cpu/gelu.js +36 -0
  135. package/dist/ops/cpu/matMul16.d.ts +1 -0
  136. package/dist/ops/cpu/matMul16.js +14 -0
  137. package/dist/ops/cpu/matMulGelu.d.ts +1 -0
  138. package/dist/ops/cpu/matMulGelu.js +41 -0
  139. package/dist/ops/cpu/matMulMul.d.ts +1 -0
  140. package/dist/ops/cpu/matMulMul.js +20 -0
  141. package/dist/ops/cpu/mulDropout.d.ts +1 -0
  142. package/dist/ops/cpu/mulDropout.js +20 -0
  143. package/dist/ops/cpu/normRMS.d.ts +1 -0
  144. package/dist/ops/cpu/normRMS.js +35 -0
  145. package/dist/ops/cpu/qkv.d.ts +5 -0
  146. package/dist/ops/cpu/qkv.js +73 -0
  147. package/dist/ops/cpu/rope.d.ts +6 -0
  148. package/dist/ops/cpu/rope.js +81 -0
  149. package/dist/ops/cpu/scatterSub.d.ts +1 -0
  150. package/dist/ops/cpu/scatterSub.js +12 -0
  151. package/dist/ops/dot16.d.ts +2 -0
  152. package/dist/ops/dot16.js +29 -0
  153. package/dist/ops/dropout.d.ts +2 -0
  154. package/dist/ops/dropout.js +11 -0
  155. package/dist/ops/dropout16.d.ts +2 -0
  156. package/dist/ops/dropout16.js +22 -0
  157. package/dist/ops/gatherSub.d.ts +2 -0
  158. package/dist/ops/gatherSub.js +13 -0
  159. package/dist/ops/gelu.d.ts +3 -0
  160. package/dist/ops/gelu.js +2 -0
  161. package/dist/ops/globalNorm.d.ts +2 -0
  162. package/dist/ops/globalNorm.js +19 -0
  163. package/dist/ops/grads/add16.d.ts +1 -0
  164. package/dist/ops/grads/add16.js +27 -0
  165. package/dist/ops/grads/attentionMask.d.ts +1 -0
  166. package/dist/ops/grads/attentionMask.js +26 -0
  167. package/dist/ops/grads/dropout16.d.ts +1 -0
  168. package/dist/ops/grads/dropout16.js +1 -0
  169. package/dist/ops/grads/gelu.d.ts +2 -0
  170. package/dist/ops/grads/gelu.js +2 -0
  171. package/dist/ops/grads/matMul16.d.ts +2 -0
  172. package/dist/ops/grads/matMul16.js +2 -0
  173. package/dist/ops/grads/matMulGelu.d.ts +1 -0
  174. package/dist/ops/grads/matMulGelu.js +22 -0
  175. package/dist/ops/grads/mul16.d.ts +1 -0
  176. package/dist/ops/grads/mul16.js +1 -0
  177. package/dist/ops/grads/normRMS.d.ts +3 -0
  178. package/dist/ops/grads/normRMS.js +37 -0
  179. package/dist/ops/grads/pack16.d.ts +2 -0
  180. package/dist/ops/grads/pack16.js +2 -0
  181. package/dist/ops/grads/qkv.d.ts +3 -0
  182. package/dist/ops/grads/qkv.js +46 -0
  183. package/dist/ops/grads/rope.d.ts +2 -0
  184. package/dist/ops/grads/rope.js +2 -0
  185. package/dist/ops/grads/softmax16.d.ts +2 -0
  186. package/dist/ops/grads/softmax16.js +23 -0
  187. package/dist/ops/grads/unpack16.d.ts +2 -0
  188. package/dist/ops/grads/unpack16.js +2 -0
  189. package/dist/ops/grads/utils.d.ts +4 -0
  190. package/dist/ops/grads/utils.js +12 -0
  191. package/dist/ops/log.d.ts +0 -0
  192. package/dist/ops/log.js +1 -0
  193. package/dist/ops/matMul16.d.ts +15 -0
  194. package/dist/ops/matMul16.js +2 -0
  195. package/dist/ops/matMulGelu.d.ts +3 -0
  196. package/dist/ops/matMulGelu.js +20 -0
  197. package/dist/ops/matMulMul.d.ts +2 -0
  198. package/dist/ops/matMulMul.js +16 -0
  199. package/dist/ops/mul16.d.ts +2 -0
  200. package/dist/ops/mul16.js +43 -0
  201. package/dist/ops/mulDrop.d.ts +2 -0
  202. package/dist/ops/mulDrop.js +15 -0
  203. package/dist/ops/normRMS.d.ts +2 -0
  204. package/dist/ops/normRMS.js +22 -0
  205. package/dist/ops/pack16.d.ts +2 -0
  206. package/dist/ops/pack16.js +2 -0
  207. package/dist/ops/qkv.d.ts +2 -0
  208. package/dist/ops/qkv.js +16 -0
  209. package/dist/ops/reshape16.d.ts +2 -0
  210. package/dist/ops/reshape16.js +33 -0
  211. package/dist/ops/rope.d.ts +3 -0
  212. package/dist/ops/rope.js +2 -0
  213. package/dist/ops/scatterSub.d.ts +2 -0
  214. package/dist/ops/scatterSub.js +13 -0
  215. package/dist/ops/slice16.d.ts +2 -0
  216. package/dist/ops/slice16.js +11 -0
  217. package/dist/ops/softmax16.d.ts +2 -0
  218. package/dist/ops/softmax16.js +9 -0
  219. package/dist/ops/sub16.d.ts +2 -0
  220. package/dist/ops/sub16.js +11 -0
  221. package/dist/ops/sum16.d.ts +2 -0
  222. package/dist/ops/sum16.js +13 -0
  223. package/dist/ops/transpose16.d.ts +3 -0
  224. package/dist/ops/transpose16.js +32 -0
  225. package/dist/ops/unpack16.d.ts +2 -0
  226. package/dist/ops/unpack16.js +2 -0
  227. package/dist/ops/webgl/adamAdjust.d.ts +1 -0
  228. package/dist/ops/webgl/adamAdjust.js +82 -0
  229. package/dist/ops/webgl/adamMoments.d.ts +1 -0
  230. package/dist/ops/webgl/adamMoments.js +44 -0
  231. package/dist/ops/webgl/appendCache.d.ts +1 -0
  232. package/dist/ops/webgl/appendCache.js +53 -0
  233. package/dist/ops/webgl/attentionMask.d.ts +1 -0
  234. package/dist/ops/webgl/attentionMask.js +64 -0
  235. package/dist/ops/webgl/dropout16.d.ts +1 -0
  236. package/dist/ops/webgl/dropout16.js +12 -0
  237. package/dist/ops/webgl/fusedSoftmax.d.ts +11 -0
  238. package/dist/ops/webgl/fusedSoftmax.js +70 -0
  239. package/dist/ops/webgl/gatherSub.d.ts +1 -0
  240. package/dist/ops/webgl/gatherSub.js +28 -0
  241. package/dist/ops/webgl/gelu.d.ts +2 -0
  242. package/dist/ops/webgl/gelu.js +48 -0
  243. package/dist/ops/webgl/log.d.ts +17 -0
  244. package/dist/ops/webgl/log.js +14 -0
  245. package/dist/ops/webgl/matMul16.d.ts +1 -0
  246. package/dist/ops/webgl/matMul16.js +37 -0
  247. package/dist/ops/webgl/matMulGelu.d.ts +21 -0
  248. package/dist/ops/webgl/matMulGelu.js +2 -0
  249. package/dist/ops/webgl/matMulMul.d.ts +14 -0
  250. package/dist/ops/webgl/matMulMul.js +24 -0
  251. package/dist/ops/webgl/mulDropout.d.ts +1 -0
  252. package/dist/ops/webgl/mulDropout.js +32 -0
  253. package/dist/ops/webgl/normRMS.d.ts +1 -0
  254. package/dist/ops/webgl/normRMS.js +114 -0
  255. package/dist/ops/webgl/qkv.d.ts +1 -0
  256. package/dist/ops/webgl/qkv.js +54 -0
  257. package/dist/ops/webgl/rope.d.ts +1 -0
  258. package/dist/ops/webgl/rope.js +72 -0
  259. package/dist/ops/webgl/scatterSub.d.ts +1 -0
  260. package/dist/ops/webgl/scatterSub.js +28 -0
  261. package/dist/ops/webgpu/adamAdjust.d.ts +1 -0
  262. package/dist/ops/webgpu/adamAdjust.js +77 -0
  263. package/dist/ops/webgpu/adamMoments.d.ts +1 -0
  264. package/dist/ops/webgpu/adamMoments.js +76 -0
  265. package/dist/ops/webgpu/add16.d.ts +1 -0
  266. package/dist/ops/webgpu/add16.js +14 -0
  267. package/dist/ops/webgpu/appendCache.d.ts +1 -0
  268. package/dist/ops/webgpu/appendCache.js +130 -0
  269. package/dist/ops/webgpu/attentionMask.d.ts +1 -0
  270. package/dist/ops/webgpu/attentionMask.js +42 -0
  271. package/dist/ops/webgpu/attentionMask32_program.d.ts +19 -0
  272. package/dist/ops/webgpu/attentionMask32_program.js +62 -0
  273. package/dist/ops/webgpu/clipScale.d.ts +1 -0
  274. package/dist/ops/webgpu/clipScale.js +45 -0
  275. package/dist/ops/webgpu/concat16.d.ts +19 -0
  276. package/dist/ops/webgpu/concat16.js +111 -0
  277. package/dist/ops/webgpu/dropout16.d.ts +1 -0
  278. package/dist/ops/webgpu/dropout16.js +59 -0
  279. package/dist/ops/webgpu/gatherSub.d.ts +1 -0
  280. package/dist/ops/webgpu/gatherSub.js +52 -0
  281. package/dist/ops/webgpu/gelu.d.ts +14 -0
  282. package/dist/ops/webgpu/gelu.js +147 -0
  283. package/dist/ops/webgpu/index.d.ts +0 -0
  284. package/dist/ops/webgpu/index.js +26 -0
  285. package/dist/ops/webgpu/matMul16.d.ts +1 -0
  286. package/dist/ops/webgpu/matMul16.js +70 -0
  287. package/dist/ops/webgpu/matMul16_program.d.ts +42 -0
  288. package/dist/ops/webgpu/matMul16_program.js +303 -0
  289. package/dist/ops/webgpu/mul16.d.ts +1 -0
  290. package/dist/ops/webgpu/mul16.js +14 -0
  291. package/dist/ops/webgpu/norm2.d.ts +1 -0
  292. package/dist/ops/webgpu/norm2.js +46 -0
  293. package/dist/ops/webgpu/normRMS.d.ts +1 -0
  294. package/dist/ops/webgpu/normRMS.js +26 -0
  295. package/dist/ops/webgpu/normRMS16_program.d.ts +10 -0
  296. package/dist/ops/webgpu/normRMS16_program.js +28 -0
  297. package/dist/ops/webgpu/normRMS32_program.d.ts +10 -0
  298. package/dist/ops/webgpu/normRMS32_program.js +28 -0
  299. package/dist/ops/webgpu/normRMSGrad.d.ts +1 -0
  300. package/dist/ops/webgpu/normRMSGrad.js +225 -0
  301. package/dist/ops/webgpu/pack16.d.ts +1 -0
  302. package/dist/ops/webgpu/pack16.js +21 -0
  303. package/dist/ops/webgpu/pack16_program.d.ts +19 -0
  304. package/dist/ops/webgpu/pack16_program.js +93 -0
  305. package/dist/ops/webgpu/qkv.d.ts +1 -0
  306. package/dist/ops/webgpu/qkv.js +64 -0
  307. package/dist/ops/webgpu/rope.d.ts +1 -0
  308. package/dist/ops/webgpu/rope.js +163 -0
  309. package/dist/ops/webgpu/scatterSub.d.ts +1 -0
  310. package/dist/ops/webgpu/scatterSub.js +53 -0
  311. package/dist/ops/webgpu/slice16.d.ts +7 -0
  312. package/dist/ops/webgpu/slice16.js +74 -0
  313. package/dist/ops/webgpu/softmax16.d.ts +17 -0
  314. package/dist/ops/webgpu/softmax16.js +18 -0
  315. package/dist/ops/webgpu/softmax16_program.d.ts +13 -0
  316. package/dist/ops/webgpu/softmax16_program.js +89 -0
  317. package/dist/ops/webgpu/softmax16_subgroup_program.d.ts +17 -0
  318. package/dist/ops/webgpu/softmax16_subgroup_program.js +70 -0
  319. package/dist/ops/webgpu/softmax16grad.d.ts +1 -0
  320. package/dist/ops/webgpu/softmax16grad.js +31 -0
  321. package/dist/ops/webgpu/sub16.d.ts +1 -0
  322. package/dist/ops/webgpu/sub16.js +14 -0
  323. package/dist/ops/webgpu/sum16.d.ts +1 -0
  324. package/dist/ops/webgpu/sum16.js +29 -0
  325. package/dist/ops/webgpu/transpose16.d.ts +1 -0
  326. package/dist/ops/webgpu/transpose16.js +37 -0
  327. package/dist/ops/webgpu/transpose16_program.d.ts +16 -0
  328. package/dist/ops/webgpu/transpose16_program.js +51 -0
  329. package/dist/ops/webgpu/transpose16_shared_program.d.ts +15 -0
  330. package/dist/ops/webgpu/transpose16_shared_program.js +79 -0
  331. package/dist/ops/webgpu/unpack16.d.ts +1 -0
  332. package/dist/ops/webgpu/unpack16.js +60 -0
  333. package/dist/ops/webgpu/utils/binary_op.d.ts +35 -0
  334. package/dist/ops/webgpu/utils/binary_op.js +141 -0
  335. package/dist/ops/webgpu/utils/deviceInfo.d.ts +7 -0
  336. package/dist/ops/webgpu/utils/deviceInfo.js +11 -0
  337. package/dist/ops/webgpu/utils/reductions.d.ts +43 -0
  338. package/dist/ops/webgpu/utils/reductions.js +263 -0
  339. package/dist/pack16-Ck-spx_F.js +39 -0
  340. package/dist/patches/webgpu_backend.d.ts +18 -0
  341. package/dist/patches/webgpu_backend.js +43 -0
  342. package/dist/patches/webgpu_base.d.ts +21 -0
  343. package/dist/patches/webgpu_base.js +22 -0
  344. package/dist/patches/webgpu_program.d.ts +36 -0
  345. package/dist/patches/webgpu_program.js +293 -0
  346. package/dist/pdf-UoDqCYzz.js +16726 -0
  347. package/dist/picomatch-3tUnMMbd.js +1063 -0
  348. package/dist/rope-CbeGlsV8.js +25 -0
  349. package/dist/selu_util-zkAx5doH.js +24 -0
  350. package/dist/shared-D1coEFea.js +1314 -0
  351. package/dist/shared-DOgWaqvL.js +5 -0
  352. package/dist/slice_util-Dgb3ANWI.js +208 -0
  353. package/dist/tfjs_backend-BjuQ5FqB.js +614 -0
  354. package/dist/tokeniser/BaseTokeniser.d.ts +33 -0
  355. package/dist/tokeniser/BaseTokeniser.js +2 -0
  356. package/dist/tokeniser/CharTokeniser.d.ts +24 -0
  357. package/dist/tokeniser/CharTokeniser.js +92 -0
  358. package/dist/tokeniser/bpe.d.ts +28 -0
  359. package/dist/tokeniser/bpe.js +170 -0
  360. package/dist/tokeniser/messages.d.ts +61 -0
  361. package/dist/tokeniser/messages.js +0 -0
  362. package/dist/tokeniser/type.d.ts +34 -0
  363. package/dist/tokeniser/type.js +0 -0
  364. package/dist/training/AdamW.d.ts +36 -0
  365. package/dist/training/AdamW.js +128 -0
  366. package/dist/training/BasicTrainer.d.ts +63 -0
  367. package/dist/training/BasicTrainer.js +265 -0
  368. package/dist/training/DatasetBuilder.d.ts +26 -0
  369. package/dist/training/DatasetBuilder.js +2 -0
  370. package/dist/training/Evaluator.d.ts +19 -0
  371. package/dist/training/Evaluator.js +48 -0
  372. package/dist/training/LRScheduler.d.ts +12 -0
  373. package/dist/training/LRScheduler.js +38 -0
  374. package/dist/training/PreTrainer.d.ts +11 -0
  375. package/dist/training/PreTrainer.js +22 -0
  376. package/dist/training/SFTTrainer.d.ts +12 -0
  377. package/dist/training/SFTTrainer.js +24 -0
  378. package/dist/training/loss.d.ts +3 -0
  379. package/dist/training/loss.js +19 -0
  380. package/dist/training/orthoGrad.d.ts +2 -0
  381. package/dist/training/orthoGrad.js +10 -0
  382. package/dist/training/sparseCrossEntropy.d.ts +7 -0
  383. package/dist/training/sparseCrossEntropy.js +47 -0
  384. package/dist/training/tasks/ConversationTask.d.ts +18 -0
  385. package/dist/training/tasks/ConversationTask.js +38 -0
  386. package/dist/training/tasks/PretrainingTask.d.ts +17 -0
  387. package/dist/training/tasks/PretrainingTask.js +42 -0
  388. package/dist/training/tasks/StartSentenceTask.d.ts +18 -0
  389. package/dist/training/tasks/StartSentenceTask.js +45 -0
  390. package/dist/training/tasks/Task.d.ts +22 -0
  391. package/dist/training/tasks/Task.js +55 -0
  392. package/dist/training/tasks/splitter.d.ts +5 -0
  393. package/dist/training/tasks/splitter.js +18 -0
  394. package/dist/training/types.d.ts +78 -0
  395. package/dist/training/types.js +0 -0
  396. package/dist/training/validation.d.ts +17 -0
  397. package/dist/training/validation.js +2 -0
  398. package/dist/utilities/arrayClose.d.ts +1 -0
  399. package/dist/utilities/arrayClose.js +16 -0
  400. package/dist/utilities/datasetID.d.ts +2 -0
  401. package/dist/utilities/datasetID.js +18 -0
  402. package/dist/utilities/dummy.d.ts +9 -0
  403. package/dist/utilities/dummy.js +36 -0
  404. package/dist/utilities/multinomialCPU.d.ts +2 -0
  405. package/dist/utilities/multinomialCPU.js +9 -0
  406. package/dist/utilities/naming.d.ts +4 -0
  407. package/dist/utilities/naming.js +0 -0
  408. package/dist/utilities/packed.d.ts +4 -0
  409. package/dist/utilities/packed.js +13 -0
  410. package/dist/utilities/parameters.d.ts +11 -0
  411. package/dist/utilities/parameters.js +38 -0
  412. package/dist/utilities/performance.d.ts +2 -0
  413. package/dist/utilities/performance.js +16 -0
  414. package/dist/utilities/profile.d.ts +17 -0
  415. package/dist/utilities/profile.js +33 -0
  416. package/dist/utilities/safetensors.d.ts +3 -0
  417. package/dist/utilities/safetensors.js +53 -0
  418. package/dist/utilities/sentences.d.ts +5 -0
  419. package/dist/utilities/sentences.js +32 -0
  420. package/dist/utilities/tokenParse.d.ts +1 -0
  421. package/dist/utilities/tokenParse.js +17 -0
  422. package/dist/utilities/topP.d.ts +1 -0
  423. package/dist/utilities/topP.js +12 -0
  424. package/dist/utilities/waitForModel.d.ts +2 -0
  425. package/dist/utilities/waitForModel.js +12 -0
  426. package/dist/utilities/weights.d.ts +12 -0
  427. package/dist/utilities/weights.js +40 -0
  428. package/dist/utilities/yielder.d.ts +1 -0
  429. package/dist/utilities/yielder.js +7 -0
  430. package/dist/webgpu-Dt7BMzWz.js +525 -0
  431. package/dist/webgpu_program-WOyIVMlZ.js +392 -0
  432. package/dist/webgpu_util-B_F3SShA.js +106 -0
  433. package/package.json +1 -1
@@ -0,0 +1,614 @@
1
+ import { $ as e, An as t, Gr as n, In as r, L as i, Mi as a, Ps as o, Q as s, Ur as c, Wr as l, Wt as u, X as d, Yr as f, Z as p, _i as m, _r as h, an as g, b as _, br as v, cn as y, di as b, er as x, gr as S, ir as C, js as w, mi as T, mr as E, nr as D, oc as O, pi as k, pt as A, qr as j, rr as M, ti as N, tr as P, w as F, xi as I } from "./dist-BewPQWjc.js";
2
+ import { a as ee, i as te, o as ne, r as re } from "./selu_util-zkAx5doH.js";
3
+ //#region node_modules/@tensorflow/tfjs-core/dist/ops/fused/mat_mul.js
4
+ function ie({ a: e, b: t, transposeA: n = !1, transposeB: i = !1, bias: a, activation: s = "linear", preluActivationWeights: c, leakyreluAlpha: l = .2 }) {
5
+ if (ne(m.state.gradientDepth, s) === !1) {
6
+ let r = S(e, t, n, i);
7
+ return a != null && (r = j(r, a)), re(r, s, c, l);
8
+ }
9
+ let d = T(e, "a", "fused matMul"), f = T(t, "b", "fused matMul");
10
+ [d, f] = I(d, f);
11
+ let p = n ? d.shape[d.rank - 2] : d.shape[d.rank - 1], h = i ? f.shape[f.rank - 1] : f.shape[f.rank - 2], g = n ? d.shape[d.rank - 1] : d.shape[d.rank - 2], _ = i ? f.shape[f.rank - 2] : f.shape[f.rank - 1], y = d.shape.slice(0, -2), b = f.shape.slice(0, -2), x = O(y), C = O(b);
12
+ o(p === h, () => `Error in fused matMul: inner shapes (${p}) and (${h}) of Tensors with shapes ${d.shape} and ${f.shape} and transposeA=${n} and transposeB=${i} must match.`);
13
+ let E = r(d.shape.slice(0, -2), f.shape.slice(0, -2)).concat([g, _]), D = n ? v(d, [
14
+ x,
15
+ p,
16
+ g
17
+ ]) : v(d, [
18
+ x,
19
+ g,
20
+ p
21
+ ]), k = i ? v(f, [
22
+ C,
23
+ _,
24
+ h
25
+ ]) : v(f, [
26
+ C,
27
+ h,
28
+ _
29
+ ]), A;
30
+ a != null && (A = T(a, "bias", "fused matMul"), [A] = I(A, d), r(E, A.shape));
31
+ let M;
32
+ c != null && (M = T(c, "prelu weights", "fused matMul"));
33
+ let N = (e, t) => {
34
+ let [r, o, c, l] = t, u = ee(v(e, c.shape), c, s), d, f;
35
+ if (!n && !i ? (d = S(u, o, !1, !0), f = S(r, u, !0, !1)) : !n && i ? (d = S(u, o, !1, !1), f = S(u, r, !0, !1)) : n && !i ? (d = S(o, u, !1, !0), f = S(r, u, !1, !1)) : (d = S(o, u, !0, !0), f = S(u, r, !0, !0)), a != null) {
36
+ let e = te(l, u);
37
+ return [
38
+ d,
39
+ f,
40
+ e
41
+ ];
42
+ } else return [d, f];
43
+ }, P = {
44
+ a: D,
45
+ b: k,
46
+ bias: A,
47
+ preluActivationWeights: M
48
+ }, F = {
49
+ transposeA: n,
50
+ transposeB: i,
51
+ activation: s,
52
+ leakyreluAlpha: l
53
+ };
54
+ return a == null ? u((e, t, n) => {
55
+ let r = m.runKernel(w, P, F);
56
+ return n([
57
+ e,
58
+ t,
59
+ r
60
+ ]), {
61
+ value: v(r, E),
62
+ gradFunc: N
63
+ };
64
+ })(D, k) : u((e, t, n, r) => {
65
+ let i = m.runKernel(w, P, F);
66
+ return r([
67
+ e,
68
+ t,
69
+ i,
70
+ n
71
+ ]), {
72
+ value: v(i, E),
73
+ gradFunc: N
74
+ };
75
+ })(D, k, A);
76
+ }
77
+ var L = /* @__PURE__ */ k({ fusedMatMul_: ie }), ae = class e extends Error {
78
+ constructor(t) {
79
+ super(t), Object.setPrototypeOf(this, e.prototype);
80
+ }
81
+ }, oe = class e extends Error {
82
+ constructor(t) {
83
+ super(t), Object.setPrototypeOf(this, e.prototype);
84
+ }
85
+ }, R = class e extends Error {
86
+ constructor(t) {
87
+ super(t), Object.setPrototypeOf(this, e.prototype);
88
+ }
89
+ }, z = class e extends Error {
90
+ constructor(t) {
91
+ super(t), Object.setPrototypeOf(this, e.prototype);
92
+ }
93
+ }, se = class e extends Error {
94
+ constructor(t) {
95
+ super(t), Object.setPrototypeOf(this, e.prototype);
96
+ }
97
+ };
98
+ //#endregion
99
+ //#region node_modules/@tensorflow/tfjs-layers/dist/utils/generic_utils.js
100
+ function ce(e, t) {
101
+ if (Array.isArray(e)) {
102
+ let n = [];
103
+ for (let r = 0; r < t; r++) n = n.concat(e);
104
+ return n;
105
+ } else {
106
+ let n = Array(t);
107
+ return n.fill(e), n;
108
+ }
109
+ }
110
+ function B(e, t) {
111
+ if (!e) throw new se(t);
112
+ }
113
+ function le(e, t) {
114
+ let n = 0;
115
+ for (let r of e) r === t && n++;
116
+ return n;
117
+ }
118
+ function ue(e) {
119
+ return e.length === 1 ? e[0] : e;
120
+ }
121
+ function de(e) {
122
+ return Array.isArray(e) ? e : [e];
123
+ }
124
+ function fe(e) {
125
+ let t = e.replace(/(.)([A-Z][a-z0-9]+)/g, "$1_$2").replace(/([a-z])([A-Z])/g, "$1_$2").toLowerCase();
126
+ return t[0] === "_" ? "private" + t : t;
127
+ }
128
+ function pe(e) {
129
+ return e.length <= 1 || e.indexOf("_") === -1 ? e : e.replace(/[_]+(\w|$)/g, (e, t) => t.toUpperCase());
130
+ }
131
+ var V = {};
132
+ function me(e) {
133
+ if (e == null) return null;
134
+ let t = {};
135
+ return t.className = e.getClassName(), t.config = e.getConfig(), t;
136
+ }
137
+ function H(e) {
138
+ if (!(typeof e != "object" || !e)) if (Array.isArray(e)) e.forEach((e) => H(e));
139
+ else {
140
+ let t = Object.keys(e);
141
+ for (let n of t) {
142
+ let t = e[n];
143
+ typeof t == "object" && t && (!Array.isArray(t) && t.type === "ndarray" && typeof t.value == "number" ? e[n] = t.value : H(t));
144
+ }
145
+ }
146
+ }
147
+ function he(e, t = {}, n = {}, r = "object", i = !1) {
148
+ if (typeof e == "string") {
149
+ let i = e, a;
150
+ if (i in n) a = n[i];
151
+ else if (i in V) a = V[i];
152
+ else if (a = t[i], a == null) throw new R(`Unknown ${r}: ${e}. This may be due to one of the following reasons:\n1. The ${r} is defined in Python, in which case it needs to be ported to TensorFlow.js or your JavaScript code.\n2. The custom ${r} is defined in JavaScript, but is not registered properly with tf.serialization.registerClass().`);
153
+ return a;
154
+ } else {
155
+ let a = e;
156
+ if (a.className == null || a.config == null) throw new R(`${r}: Improper config format: ${JSON.stringify(a)}.\n'className' and 'config' must set.`);
157
+ let o = a.className, s, c;
158
+ if (o in n ? [s, c] = n[o] : o in V ? [s, c] = V.className : o in t && ([s, c] = t[o]), s == null) throw new R(`Unknown ${r}: ${o}. This may be due to one of the following reasons:\n1. The ${r} is defined in Python, in which case it needs to be ported to TensorFlow.js or your JavaScript code.\n2. The custom ${r} is defined in JavaScript, but is not registered properly with tf.serialization.registerClass().`);
159
+ if (c != null) {
160
+ let e = {};
161
+ for (let t of Object.keys(V)) e[t] = V[t];
162
+ for (let t of Object.keys(n)) e[t] = n[t];
163
+ let t = a.config;
164
+ t.customObjects = e;
165
+ let r = Object.assign({}, V);
166
+ for (let e of Object.keys(n)) V[e] = n[e];
167
+ H(a.config);
168
+ let o = c(s, a.config, n, i);
169
+ return V = Object.assign({}, r), o;
170
+ } else {
171
+ let e = Object.assign({}, V);
172
+ for (let e of Object.keys(n)) V[e] = n[e];
173
+ let t = new s(a.config);
174
+ return V = Object.assign({}, e), t;
175
+ }
176
+ }
177
+ }
178
+ function ge(e, t) {
179
+ return e < t ? -1 : +(e > t);
180
+ }
181
+ function _e(e, t) {
182
+ return -1 * ge(e, t);
183
+ }
184
+ function ve(e) {
185
+ if (e == null) return e;
186
+ let t = [];
187
+ for (let n of e) t.indexOf(n) === -1 && t.push(n);
188
+ return t;
189
+ }
190
+ function ye(e) {
191
+ if (e == null) throw new R(`Invalid value in obj: ${JSON.stringify(e)}`);
192
+ for (let t in e) if (e.hasOwnProperty(t)) return !1;
193
+ return !0;
194
+ }
195
+ function U(e, t, n) {
196
+ if (n != null && e.indexOf(n) < 0) throw new R(`${n} is not a valid ${t}. Valid values are ${e} or null/undefined.`);
197
+ }
198
+ function be(e, t, n = 0, r = Infinity) {
199
+ return B(n >= 0), B(r >= n), Array.isArray(e) && e.length >= n && e.length <= r && e.every((e) => typeof e === t);
200
+ }
201
+ function W(e, t) {
202
+ Array.isArray(e) ? (o(e.length > 0, () => `${t} is unexpectedly an empty array.`), e.forEach((e, n) => W(e, `element ${n + 1} of ${t}`))) : o(Number.isInteger(e) && e > 0, () => `Expected ${t} to be a positive integer, but got ${G(e)}.`);
203
+ }
204
+ function G(e) {
205
+ return e === null ? "null" : Array.isArray(e) ? "[" + e.map((e) => G(e)).join(",") + "]" : typeof e == "string" ? `"${e}"` : `${e}`;
206
+ }
207
+ function xe(e, t, n) {
208
+ let r = n == null ? a() : n(), i;
209
+ return (...o) => {
210
+ let s = n == null ? a() : n();
211
+ return s - r < t ? i : (r = s, i = e(...o), i);
212
+ };
213
+ }
214
+ function Se(e) {
215
+ return e === "relu" ? "relu" : e === "linear" ? "linear" : e === "elu" ? "elu" : null;
216
+ }
217
+ //#endregion
218
+ //#region node_modules/@tensorflow/tfjs-layers/dist/keras_format/common.js
219
+ var Ce = ["channelsFirst", "channelsLast"], we = ["nearest", "bilinear"], Te = [
220
+ "valid",
221
+ "same",
222
+ "causal"
223
+ ], Ee = ["max", "avg"], De = [
224
+ "sum",
225
+ "mul",
226
+ "concat",
227
+ "ave"
228
+ ], K = /* @__PURE__ */ new Map();
229
+ function q(e) {
230
+ U(Ce, "DataFormat", e);
231
+ }
232
+ function Oe(e) {
233
+ U(we, "InterpolationFormat", e);
234
+ }
235
+ function ke(e) {
236
+ U(Te, "PaddingMode", e);
237
+ }
238
+ function Ae(e) {
239
+ U(Ee, "PoolMode", e);
240
+ }
241
+ var J = [], je = "/";
242
+ function Me(e, t) {
243
+ J.push(e);
244
+ try {
245
+ let e = t();
246
+ return J.pop(), e;
247
+ } catch (e) {
248
+ throw J.pop(), e;
249
+ }
250
+ }
251
+ function Ne() {
252
+ return J.length === 0 ? "" : J.join(je) + je;
253
+ }
254
+ function Pe(e) {
255
+ if (!Le(e)) throw Error("Not a valid tensor name: '" + e + "'");
256
+ return Ne() + e;
257
+ }
258
+ function Fe(e) {
259
+ if (!Le(e)) throw Error("Not a valid tensor name: '" + e + "'");
260
+ K.has(e) || K.set(e, 0);
261
+ let t = K.get(e);
262
+ if (K.set(e, K.get(e) + 1), t > 0) {
263
+ let n = `${e}_${t}`;
264
+ return K.set(n, 1), n;
265
+ } else return e;
266
+ }
267
+ var Ie = /* @__PURE__ */ new RegExp(/^[A-Za-z0-9][-A-Za-z0-9\._\/]*$/);
268
+ function Le(e) {
269
+ return !!e.match(Ie);
270
+ }
271
+ //#endregion
272
+ //#region node_modules/@tensorflow/tfjs-layers/dist/utils/math_utils.js
273
+ function Re(e) {
274
+ return e === parseInt(e.toString(), 10);
275
+ }
276
+ function Y(e, t, n) {
277
+ t ??= 0, n ??= e.length;
278
+ let r = 1;
279
+ for (let i = t; i < n; ++i) r *= e[i];
280
+ return r;
281
+ }
282
+ function ze(e) {
283
+ if (e.length === 0) return NaN;
284
+ let t = Infinity;
285
+ for (let n = 0; n < e.length; n++) {
286
+ let r = e[n];
287
+ r < t && (t = r);
288
+ }
289
+ return t;
290
+ }
291
+ function Be(e) {
292
+ if (e.length === 0) return NaN;
293
+ let t = -Infinity;
294
+ for (let n = 0; n < e.length; n++) {
295
+ let r = e[n];
296
+ r > t && (t = r);
297
+ }
298
+ return t;
299
+ }
300
+ function Ve(e, t) {
301
+ if (t < e) throw new R(`end (${t}) < begin (${e}) is forbidden.`);
302
+ let n = [];
303
+ for (let r = e; r < t; ++r) n.push(r);
304
+ return n;
305
+ }
306
+ //#endregion
307
+ //#region node_modules/@tensorflow/tfjs-layers/dist/backend/common.js
308
+ var He;
309
+ function Ue() {
310
+ return He ??= N().epsilon(), He;
311
+ }
312
+ function X() {
313
+ return "channelsLast";
314
+ }
315
+ //#endregion
316
+ //#region node_modules/@tensorflow/tfjs-layers/dist/backend/tfjs_backend.js
317
+ function We(e, t) {
318
+ return f(e, t);
319
+ }
320
+ function Ge(e, t = -1) {
321
+ let n = e.shape.slice();
322
+ return t < 0 && (t = n.length + t + 1), n.splice(t, 0, 1), v(e, n);
323
+ }
324
+ function Ke(e, t) {
325
+ return b(() => {
326
+ if (e.shape.length !== 2) throw new R(`repeat() expects a rank-2 tensor, but received a rank-${e.shape.length} tensor.`);
327
+ return Qe(Ge(e, 1), [
328
+ 1,
329
+ t,
330
+ 1
331
+ ]);
332
+ });
333
+ }
334
+ function qe(e) {
335
+ return v(e, [Y(e.shape)]);
336
+ }
337
+ function Je(e) {
338
+ if (e.rank <= 1) throw new R(`batchFlatten requires a minimum rank of 2. Got rank: ${e.rank}.`);
339
+ return v(e, [e.shape[0], Y(e.shape, 1)]);
340
+ }
341
+ function Z(t, n, r) {
342
+ return b(() => {
343
+ switch (t.rank) {
344
+ case 1: return e(t, n, r);
345
+ case 2: return s(t, [n, 0], [r, t.shape[1]]);
346
+ case 3: return p(t, [
347
+ n,
348
+ 0,
349
+ 0
350
+ ], [
351
+ r,
352
+ t.shape[1],
353
+ t.shape[2]
354
+ ]);
355
+ case 4: return d(t, [
356
+ n,
357
+ 0,
358
+ 0,
359
+ 0
360
+ ], [
361
+ r,
362
+ t.shape[1],
363
+ t.shape[2],
364
+ t.shape[3]
365
+ ]);
366
+ case 5: return E(t, [
367
+ n,
368
+ 0,
369
+ 0,
370
+ 0,
371
+ 0
372
+ ], [
373
+ r,
374
+ t.shape[1],
375
+ t.shape[2],
376
+ t.shape[3],
377
+ t.shape[4]
378
+ ]);
379
+ case 6: return E(t, [
380
+ n,
381
+ 0,
382
+ 0,
383
+ 0,
384
+ 0,
385
+ 0
386
+ ], [
387
+ r,
388
+ t.shape[1],
389
+ t.shape[2],
390
+ t.shape[3],
391
+ t.shape[4],
392
+ t.shape[5]
393
+ ]);
394
+ default: throw new R(`sliceAlongFirstAxis() received an unsupported tensor rank: ${t.rank}`);
395
+ }
396
+ });
397
+ }
398
+ function Q(t, n, r) {
399
+ return b(() => {
400
+ switch (t.rank) {
401
+ case 1: return e(t, n, r);
402
+ case 2: return s(t, [0, n], [t.shape[0], r]);
403
+ case 3: return p(t, [
404
+ 0,
405
+ 0,
406
+ n
407
+ ], [
408
+ t.shape[0],
409
+ t.shape[1],
410
+ r
411
+ ]);
412
+ case 4: return d(t, [
413
+ 0,
414
+ 0,
415
+ 0,
416
+ n
417
+ ], [
418
+ t.shape[0],
419
+ t.shape[1],
420
+ t.shape[2],
421
+ r
422
+ ]);
423
+ default: throw new R(`sliceAlongLastAxis() received an unsupported tensor rank: ${t.rank}`);
424
+ }
425
+ });
426
+ }
427
+ function Ye(t, n, r, i) {
428
+ return b(() => {
429
+ switch (t.rank) {
430
+ case 1: return e(t, n, r);
431
+ case 2: switch (i) {
432
+ case 1: return Z(t, n, r);
433
+ case 2: return Q(t, n, r);
434
+ default: throw new R(`The axis is not within the rank of the tensor ${i}`);
435
+ }
436
+ case 3: switch (i) {
437
+ case 1: return Z(t, n, r);
438
+ case 2: return p(t, [
439
+ 0,
440
+ n,
441
+ 0
442
+ ], [
443
+ t.shape[0],
444
+ r,
445
+ t.shape[2]
446
+ ]);
447
+ case 3: return Q(t, n, r);
448
+ default: throw new R(`The axis is not within the rank of the tensor ${i}`);
449
+ }
450
+ case 4: switch (i) {
451
+ case 1: return Z(t, n, r);
452
+ case 2: return d(t, [
453
+ 0,
454
+ n,
455
+ 0,
456
+ 0
457
+ ], [
458
+ t.shape[0],
459
+ r,
460
+ t.shape[2],
461
+ t.shape[3]
462
+ ]);
463
+ case 3: return d(t, [
464
+ 0,
465
+ 0,
466
+ n,
467
+ 0
468
+ ], [
469
+ t.shape[0],
470
+ t.shape[1],
471
+ r,
472
+ t.shape[3]
473
+ ]);
474
+ case 4: return Q(t, n, r);
475
+ default: throw new R(`The axis is not within the rank of the tensor ${i}`);
476
+ }
477
+ default: throw new R(`sliceAlongLastAxis() received an unsupported tensor rank: ${t.rank}`);
478
+ }
479
+ });
480
+ }
481
+ function Xe(e, t = -1) {
482
+ let n;
483
+ return t < 0 && (n = e[0].rank, t = n === 0 ? 0 : n), t === e[0].rank && (t = -1), h(e, t);
484
+ }
485
+ function Ze(e, t) {
486
+ switch (e.rank) {
487
+ case 1: return M([e, t]);
488
+ case 2: return D([e, t], 0);
489
+ case 3: return P([e, t], 0);
490
+ case 4: return x([e, t], 0);
491
+ default: throw new R(`concatAlongFirstAxis() received an unsupported tensor rank: ${e.rank}`);
492
+ }
493
+ }
494
+ function Qe(e, t) {
495
+ if (Array.isArray(t) || (t = [t]), e.rank !== t.length) throw new R(`The length of input n (${t.length}) does not match the number of dimensions in input x (${e.rank})`);
496
+ return y(e, t);
497
+ }
498
+ function $e(e, t = 0, n = 1, r, i) {
499
+ return A(e, t, n, r, i);
500
+ }
501
+ function et(e, t, n, r) {
502
+ if (e.rank < 2 || t.rank < 2) throw new z(`dot requires both inputs to be rank >= 2 but got x shape = ${e.shape} and y shape = ${t.shape}`);
503
+ if (t.rank >= 3 && e.shape.slice(-1)[0] !== t.shape.slice(-2)[0]) throw new z(`If rank y >= 3, then the second last dim of y must equal the last dim of x but got x shape = ${e.shape} and y shape = ${t.shape}`);
504
+ if (e.rank === 2 && t.rank === 2) return L({
505
+ a: e,
506
+ b: t,
507
+ transposeA: !1,
508
+ transposeB: !1,
509
+ bias: r ? $(e.rank, r, X()) : null,
510
+ activation: n
511
+ });
512
+ {
513
+ let i = e.shape.slice(), a = i.pop();
514
+ e = v(e, [-1, a]);
515
+ let o = t.shape.slice(), s = o.pop(), c = o.pop(), l = [...o, s], u = Array.from({ length: t.rank }, (e, n) => n === 0 ? t.rank - 2 : n <= t.rank - 2 ? n - 1 : n);
516
+ t = v(F(t, u), [c, -1]);
517
+ let d = [...i, ...l];
518
+ return v(L({
519
+ a: e,
520
+ b: t,
521
+ transposeA: !1,
522
+ transposeB: !1,
523
+ bias: r ? $(e.rank, r, X()) : null,
524
+ activation: n
525
+ }), d);
526
+ }
527
+ }
528
+ function tt(e, t, n) {
529
+ return b(() => (t = Array.isArray(t) ? i(t, "int32") : f(t, "int32"), g(e, t, n)));
530
+ }
531
+ function nt(e) {
532
+ return l(e, e);
533
+ }
534
+ function $(e, t, n) {
535
+ let r = t.shape;
536
+ if (t.rank !== 1 && t.rank !== e) throw new R(`Unexpected bias dimensions: ${t.rank}; expected it to be 1 or ${e}`);
537
+ if (e === 5) {
538
+ if (n === "channelsFirst") return r.length === 1 ? v(t, [
539
+ 1,
540
+ r[0],
541
+ 1,
542
+ 1,
543
+ 1
544
+ ]) : v(t, [
545
+ 1,
546
+ r[3],
547
+ r[0],
548
+ r[1],
549
+ r[2]
550
+ ]);
551
+ if (n === "channelsLast") return r.length === 1 ? v(t, [
552
+ 1,
553
+ 1,
554
+ 1,
555
+ 1,
556
+ r[0]
557
+ ]) : v(t, [1].concat(r));
558
+ } else if (e === 4) {
559
+ if (n === "channelsFirst") return r.length === 1 ? v(t, [
560
+ 1,
561
+ r[0],
562
+ 1,
563
+ 1
564
+ ]) : v(t, [
565
+ 1,
566
+ r[2],
567
+ r[0],
568
+ r[1]
569
+ ]);
570
+ if (n === "channelsLast") return r.length === 1 ? v(t, [
571
+ 1,
572
+ 1,
573
+ 1,
574
+ r[0]
575
+ ]) : v(t, [1].concat(r));
576
+ } else if (e === 3) {
577
+ if (n === "channelsFirst") return r.length === 1 ? v(t, [
578
+ 1,
579
+ r[0],
580
+ 1
581
+ ]) : v(t, [
582
+ 1,
583
+ r[1],
584
+ r[0]
585
+ ]);
586
+ if (n === "channelsLast") return r.length === 1 ? v(t, [
587
+ 1,
588
+ 1,
589
+ r[0]
590
+ ]) : v(t, [1].concat(r));
591
+ } else if (e < 3) return t;
592
+ throw new R(`Unsupported input rank by biasAdd: ${t.rank}`);
593
+ }
594
+ function rt(e, t, n) {
595
+ return b(() => (n ??= X(), q(n), j(e, $(e.rank, t, n))));
596
+ }
597
+ function it(e, n = 1) {
598
+ if (n !== 1) throw new z(`Support for alpha values other than 1 (${n}) is not implemented yet.`);
599
+ return t(e);
600
+ }
601
+ function at(e) {
602
+ return b(() => n(e, j(c(e), 1)));
603
+ }
604
+ function ot(e, t, n, r) {
605
+ return b(() => _(e, t, n, r));
606
+ }
607
+ function st(e) {
608
+ return b(() => C(j(.5, l(.2, e)), 0, 1));
609
+ }
610
+ function ct(e, t, n = !1) {
611
+ return n ? e() : t();
612
+ }
613
+ //#endregion
614
+ export { ae as $, ke as A, le as B, Y as C, Ve as D, ze as E, De as F, ce as G, he as H, B as I, ue as J, _e as K, W as L, Pe as M, Fe as N, q as O, Me as P, ve as Q, be as R, X as S, Be as T, ye as U, xe as V, Se as W, de as X, pe as Y, fe as Z, Z as _, Xe as a, Qe as b, it as c, tt as d, z as et, st as f, Ye as g, Ke as h, Ze as i, Ae as j, Oe as k, Ge as l, $e as m, rt as n, R as nt, et as o, ct as p, me as q, We as r, ot as s, Je as t, oe as tt, qe as u, at as v, Re as w, Ue as x, nt as y, U as z };
@@ -0,0 +1,33 @@
1
+ import { Conversation, ITokeniser } from './type';
2
+ import { default as EE } from 'eventemitter3';
3
+ export declare const SPECIALS: string[];
4
+ export default abstract class BaseTokeniser extends EE<'trainStatus'> implements ITokeniser {
5
+ id: string;
6
+ datasetID?: string;
7
+ protected specialTokens: Map<string, number>;
8
+ protected specialTokenSet: Set<number>;
9
+ abstract vocabSize: number;
10
+ abstract eosToken: number;
11
+ abstract bosToken: number;
12
+ abstract trained: boolean;
13
+ abstract addToken(token: string, index?: number): number;
14
+ isSpecialToken(index: number): boolean;
15
+ protected addSpecialTokens(): void;
16
+ protected addSpecialToken(token: string, index: number): void;
17
+ generateID(): void;
18
+ abstract train(text: Conversation[][], cb?: (vocab: number) => void, datasetID?: string): Promise<number>;
19
+ abstract getVocab(): string[];
20
+ abstract getMerges(): [string, string][];
21
+ abstract destroy(): void;
22
+ abstract encode(text: string): number[];
23
+ encodeSequence(text: string): number[];
24
+ encodeAsSequence(conversation: Conversation[], completion?: boolean): number[];
25
+ encodeConversation(conversation: Conversation[], completion?: boolean): number[];
26
+ encodeConversation(conversation: Conversation[], completion: boolean, masking: boolean): {
27
+ tokens: number[];
28
+ mask: boolean[];
29
+ };
30
+ abstract decode(tokens: number[]): string;
31
+ decodeConversation(tokens: number[] | Uint16Array): Conversation[];
32
+ getSpecialTokenIndex(token: string): number | undefined;
33
+ }
@@ -0,0 +1,2 @@
1
+ import { n as e, t } from "../BaseTokeniser-DSg9zcYq.js";
2
+ export { e as SPECIALS, t as default };
@@ -0,0 +1,24 @@
1
+ import { default as BaseTokeniser } from './BaseTokeniser';
2
+ import { Conversation } from './type';
3
+ export default class CharTokeniser extends BaseTokeniser {
4
+ vocabSize: number;
5
+ eosToken: number;
6
+ bosToken: number;
7
+ unkToken: number;
8
+ vocab: string[];
9
+ private cache;
10
+ private _trained;
11
+ constructor(vocabSizeOrVocab: number | string[]);
12
+ addToken(token: string, index?: number): number;
13
+ get trained(): boolean;
14
+ destroy(): void;
15
+ train(text: Conversation[][], cb?: (vocab: number) => void, datasetID?: string): Promise<number>;
16
+ tokenise(text: string[], numeric: true): number[][];
17
+ tokenise(text: string[]): string[][];
18
+ detokenise(tokens: (number[] | Uint16Array)[]): string[];
19
+ encode(text: string): number[];
20
+ decode(tokens: number[] | Uint16Array): string;
21
+ getVocab(): string[];
22
+ getMerges(): [string, string][];
23
+ createTrainingData(text: string[], windowSize?: number): Promise<[number[], number[]]>;
24
+ }