@genai-fi/nanogpt 0.19.0 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (482) hide show
  1. package/package.json +9 -10
  2. package/dist/Generator.d.ts +0 -82
  3. package/dist/Generator.js +0 -11941
  4. package/dist/RealDiv-CGwv0liw.js +0 -365
  5. package/dist/Reshape-BW__R4mZ.js +0 -79
  6. package/dist/Reshape-CPBkTIH2.js +0 -14
  7. package/dist/TeachableLLM.d.ts +0 -70
  8. package/dist/TeachableLLM.js +0 -273
  9. package/dist/Trainer.d.ts +0 -43
  10. package/dist/Trainer.js +0 -244
  11. package/dist/_commonjsHelpers-ByX85dGu.js +0 -33
  12. package/dist/axis_util-GTVlo58H.js +0 -55
  13. package/dist/backend.d.ts +0 -2
  14. package/dist/backend.js +0 -13
  15. package/dist/backend_util-GaFarB78.js +0 -425
  16. package/dist/backend_webgpu-BqASlsbV.js +0 -545
  17. package/dist/binary_op_util-pKXltfxI.js +0 -192
  18. package/dist/broadcast_to-eS93CCN_.js +0 -28
  19. package/dist/checks/appendCache.d.ts +0 -1
  20. package/dist/checks/appendCache.js +0 -22
  21. package/dist/checks/attentionMask.d.ts +0 -1
  22. package/dist/checks/attentionMask.js +0 -37
  23. package/dist/checks/check.d.ts +0 -9
  24. package/dist/checks/check.js +0 -20
  25. package/dist/checks/gelu.d.ts +0 -1
  26. package/dist/checks/gelu.js +0 -18
  27. package/dist/checks/index.d.ts +0 -26
  28. package/dist/checks/index.js +0 -28
  29. package/dist/checks/matMulGelu.d.ts +0 -1
  30. package/dist/checks/matMulGelu.js +0 -28
  31. package/dist/checks/normRMS.d.ts +0 -1
  32. package/dist/checks/normRMS.js +0 -16
  33. package/dist/checks/normRMSGrad.d.ts +0 -1
  34. package/dist/checks/normRMSGrad.js +0 -12
  35. package/dist/checks/packUnpack.d.ts +0 -1
  36. package/dist/checks/packUnpack.js +0 -18
  37. package/dist/checks/qkv.d.ts +0 -1
  38. package/dist/checks/qkv.js +0 -34
  39. package/dist/checks/rope.d.ts +0 -1
  40. package/dist/checks/rope.js +0 -36
  41. package/dist/checks/weights.d.ts +0 -14
  42. package/dist/checks/weights.js +0 -31
  43. package/dist/clip_by_value-DDA7rrcT.js +0 -12
  44. package/dist/complex-DI35Q-gW.js +0 -11
  45. package/dist/complex_util-Yc1A_gV1.js +0 -55
  46. package/dist/concat-CAQpCret.js +0 -17
  47. package/dist/concat_util-D18dJ4fD.js +0 -22
  48. package/dist/data/docx.d.ts +0 -2
  49. package/dist/data/docx.js +0 -15
  50. package/dist/data/parquet.d.ts +0 -2
  51. package/dist/data/parquet.js +0 -17
  52. package/dist/data/pdf.d.ts +0 -2
  53. package/dist/data/pdf.js +0 -14
  54. package/dist/data/textLoader.d.ts +0 -7
  55. package/dist/data/textLoader.js +0 -108
  56. package/dist/dataset-CGGp1z9P.js +0 -1124
  57. package/dist/dropout_util--NxWuYg2.js +0 -27
  58. package/dist/expand_dims-Bkd1YD5x.js +0 -11
  59. package/dist/exports_initializers-CYzKLjN7.js +0 -7
  60. package/dist/floor-BQtb-Azg.js +0 -9
  61. package/dist/gather-qIqEqaGn.js +0 -9
  62. package/dist/gelu-B220X1Go.js +0 -26
  63. package/dist/gpgpu_math-BwvV12df.js +0 -2022
  64. package/dist/index-CUXkjxiT.js +0 -3516
  65. package/dist/index-CieiGp4Y.js +0 -349
  66. package/dist/index-CjOWnMXP.js +0 -7308
  67. package/dist/index-Cp39cXWe.js +0 -1016
  68. package/dist/index-D5v913EJ.js +0 -4
  69. package/dist/index-DmeWGGmS.js +0 -1074
  70. package/dist/index-DvYrXKkX.js +0 -113
  71. package/dist/index-Ksja3su6.js +0 -151
  72. package/dist/index-xuotMAFm.js +0 -118
  73. package/dist/inference/types.d.ts +0 -16
  74. package/dist/inference/types.js +0 -1
  75. package/dist/jszip.min-BZhlzntC.js +0 -2313
  76. package/dist/kernel_funcs_utils-pq0CK9co.js +0 -306
  77. package/dist/layers/BaseLayer.d.ts +0 -44
  78. package/dist/layers/BaseLayer.js +0 -74
  79. package/dist/layers/CausalSelfAttention.d.ts +0 -39
  80. package/dist/layers/CausalSelfAttention.js +0 -86
  81. package/dist/layers/LoRA.d.ts +0 -14
  82. package/dist/layers/LoRA.js +0 -58
  83. package/dist/layers/MLP.d.ts +0 -17
  84. package/dist/layers/MLP.js +0 -44
  85. package/dist/layers/PositionEmbedding.d.ts +0 -8
  86. package/dist/layers/PositionEmbedding.js +0 -31
  87. package/dist/layers/RMSNorm.d.ts +0 -12
  88. package/dist/layers/RMSNorm.js +0 -22
  89. package/dist/layers/RoPECache.d.ts +0 -18
  90. package/dist/layers/RoPECache.js +0 -50
  91. package/dist/layers/TiedEmbedding.d.ts +0 -13
  92. package/dist/layers/TiedEmbedding.js +0 -36
  93. package/dist/layers/TransformerBlock.d.ts +0 -27
  94. package/dist/layers/TransformerBlock.js +0 -40
  95. package/dist/layers/WeightStore.d.ts +0 -20
  96. package/dist/layers/WeightStore.js +0 -76
  97. package/dist/loader/load.d.ts +0 -6
  98. package/dist/loader/load.js +0 -68
  99. package/dist/loader/loadHF.d.ts +0 -8
  100. package/dist/loader/loadHF.js +0 -22
  101. package/dist/loader/loadTransformers.d.ts +0 -4
  102. package/dist/loader/loadTransformers.js +0 -44
  103. package/dist/loader/loadZipMeta.d.ts +0 -3
  104. package/dist/loader/loadZipMeta.js +0 -16
  105. package/dist/loader/newZipLoad.d.ts +0 -3
  106. package/dist/loader/newZipLoad.js +0 -31
  107. package/dist/loader/oldZipLoad.d.ts +0 -9
  108. package/dist/loader/oldZipLoad.js +0 -80
  109. package/dist/loader/save.d.ts +0 -16
  110. package/dist/loader/save.js +0 -90
  111. package/dist/loader/types.d.ts +0 -67
  112. package/dist/loader/types.js +0 -1
  113. package/dist/main.d.ts +0 -50
  114. package/dist/main.js +0 -109
  115. package/dist/matMul16-BcVC_E62.js +0 -80
  116. package/dist/matMulGelu-JNLZqKQp.js +0 -163
  117. package/dist/mat_mul-DhG0Newp.js +0 -11
  118. package/dist/mod-CSdCpRjf.js +0 -11
  119. package/dist/models/NanoGPTV1.d.ts +0 -16
  120. package/dist/models/NanoGPTV1.js +0 -99
  121. package/dist/models/NanoGPTV2.d.ts +0 -16
  122. package/dist/models/NanoGPTV2.js +0 -90
  123. package/dist/models/config.d.ts +0 -27
  124. package/dist/models/config.js +0 -50
  125. package/dist/models/factory.d.ts +0 -3
  126. package/dist/models/factory.js +0 -16
  127. package/dist/models/model.d.ts +0 -44
  128. package/dist/models/model.js +0 -134
  129. package/dist/non_max_suppression_impl-B2W7YjZB.js +0 -102
  130. package/dist/not_equal-hurPF26l.js +0 -64
  131. package/dist/ones-BytntneX.js +0 -14
  132. package/dist/ops/adamAdjust.d.ts +0 -2
  133. package/dist/ops/adamAdjust.js +0 -9
  134. package/dist/ops/adamMoments.d.ts +0 -2
  135. package/dist/ops/adamMoments.js +0 -9
  136. package/dist/ops/add16.d.ts +0 -2
  137. package/dist/ops/add16.js +0 -9
  138. package/dist/ops/appendCache.d.ts +0 -2
  139. package/dist/ops/appendCache.js +0 -22
  140. package/dist/ops/attentionMask.d.ts +0 -2
  141. package/dist/ops/attentionMask.js +0 -10
  142. package/dist/ops/concat16.d.ts +0 -2
  143. package/dist/ops/concat16.js +0 -9
  144. package/dist/ops/cpu/adamAdjust.d.ts +0 -1
  145. package/dist/ops/cpu/adamAdjust.js +0 -18
  146. package/dist/ops/cpu/adamMoments.d.ts +0 -1
  147. package/dist/ops/cpu/adamMoments.js +0 -16
  148. package/dist/ops/cpu/appendCache.d.ts +0 -1
  149. package/dist/ops/cpu/appendCache.js +0 -23
  150. package/dist/ops/cpu/attentionMask.d.ts +0 -1
  151. package/dist/ops/cpu/attentionMask.js +0 -22
  152. package/dist/ops/cpu/fusedSoftmax.d.ts +0 -9
  153. package/dist/ops/cpu/fusedSoftmax.js +0 -29
  154. package/dist/ops/cpu/gatherSub.d.ts +0 -1
  155. package/dist/ops/cpu/gatherSub.js +0 -18
  156. package/dist/ops/cpu/gelu.d.ts +0 -1
  157. package/dist/ops/cpu/gelu.js +0 -40
  158. package/dist/ops/cpu/matMul16.d.ts +0 -1
  159. package/dist/ops/cpu/matMul16.js +0 -15
  160. package/dist/ops/cpu/matMulGelu.d.ts +0 -1
  161. package/dist/ops/cpu/matMulGelu.js +0 -53
  162. package/dist/ops/cpu/matMulMul.d.ts +0 -1
  163. package/dist/ops/cpu/matMulMul.js +0 -23
  164. package/dist/ops/cpu/mulDropout.d.ts +0 -1
  165. package/dist/ops/cpu/mulDropout.js +0 -23
  166. package/dist/ops/cpu/normRMS.d.ts +0 -1
  167. package/dist/ops/cpu/normRMS.js +0 -39
  168. package/dist/ops/cpu/qkv.d.ts +0 -5
  169. package/dist/ops/cpu/qkv.js +0 -41
  170. package/dist/ops/cpu/rope.d.ts +0 -6
  171. package/dist/ops/cpu/rope.js +0 -38
  172. package/dist/ops/cpu/scatterSub.d.ts +0 -1
  173. package/dist/ops/cpu/scatterSub.js +0 -23
  174. package/dist/ops/dot16.d.ts +0 -2
  175. package/dist/ops/dot16.js +0 -42
  176. package/dist/ops/dropout.d.ts +0 -2
  177. package/dist/ops/dropout.js +0 -14
  178. package/dist/ops/dropout16.d.ts +0 -2
  179. package/dist/ops/dropout16.js +0 -25
  180. package/dist/ops/gatherSub.d.ts +0 -2
  181. package/dist/ops/gatherSub.js +0 -9
  182. package/dist/ops/gelu.d.ts +0 -3
  183. package/dist/ops/gelu.js +0 -8
  184. package/dist/ops/globalNorm.d.ts +0 -2
  185. package/dist/ops/globalNorm.js +0 -13
  186. package/dist/ops/grads/add16.d.ts +0 -1
  187. package/dist/ops/grads/add16.js +0 -26
  188. package/dist/ops/grads/attentionMask.d.ts +0 -1
  189. package/dist/ops/grads/attentionMask.js +0 -21
  190. package/dist/ops/grads/dropout16.d.ts +0 -1
  191. package/dist/ops/grads/dropout16.js +0 -2
  192. package/dist/ops/grads/gelu.d.ts +0 -2
  193. package/dist/ops/grads/gelu.js +0 -5
  194. package/dist/ops/grads/matMul16.d.ts +0 -2
  195. package/dist/ops/grads/matMul16.js +0 -9
  196. package/dist/ops/grads/matMulGelu.d.ts +0 -1
  197. package/dist/ops/grads/matMulGelu.js +0 -17
  198. package/dist/ops/grads/mul16.d.ts +0 -1
  199. package/dist/ops/grads/mul16.js +0 -4
  200. package/dist/ops/grads/normRMS.d.ts +0 -3
  201. package/dist/ops/grads/normRMS.js +0 -33
  202. package/dist/ops/grads/pack16.d.ts +0 -2
  203. package/dist/ops/grads/pack16.js +0 -6
  204. package/dist/ops/grads/qkv.d.ts +0 -3
  205. package/dist/ops/grads/qkv.js +0 -34
  206. package/dist/ops/grads/rope.d.ts +0 -2
  207. package/dist/ops/grads/rope.js +0 -5
  208. package/dist/ops/grads/softmax16.d.ts +0 -2
  209. package/dist/ops/grads/softmax16.js +0 -25
  210. package/dist/ops/grads/unpack16.d.ts +0 -2
  211. package/dist/ops/grads/unpack16.js +0 -5
  212. package/dist/ops/grads/utils.d.ts +0 -4
  213. package/dist/ops/grads/utils.js +0 -14
  214. package/dist/ops/log.d.ts +0 -0
  215. package/dist/ops/log.js +0 -1
  216. package/dist/ops/matMul16.d.ts +0 -15
  217. package/dist/ops/matMul16.js +0 -13
  218. package/dist/ops/matMulGelu.d.ts +0 -3
  219. package/dist/ops/matMulGelu.js +0 -14
  220. package/dist/ops/matMulMul.d.ts +0 -2
  221. package/dist/ops/matMulMul.js +0 -9
  222. package/dist/ops/mul16.d.ts +0 -2
  223. package/dist/ops/mul16.js +0 -39
  224. package/dist/ops/mulDrop.d.ts +0 -2
  225. package/dist/ops/mulDrop.js +0 -9
  226. package/dist/ops/normRMS.d.ts +0 -2
  227. package/dist/ops/normRMS.js +0 -19
  228. package/dist/ops/pack16.d.ts +0 -2
  229. package/dist/ops/pack16.js +0 -5
  230. package/dist/ops/qkv.d.ts +0 -2
  231. package/dist/ops/qkv.js +0 -10
  232. package/dist/ops/reshape16.d.ts +0 -2
  233. package/dist/ops/reshape16.js +0 -41
  234. package/dist/ops/rope.d.ts +0 -3
  235. package/dist/ops/rope.js +0 -7
  236. package/dist/ops/scatterSub.d.ts +0 -2
  237. package/dist/ops/scatterSub.js +0 -9
  238. package/dist/ops/slice16.d.ts +0 -2
  239. package/dist/ops/slice16.js +0 -9
  240. package/dist/ops/softmax16.d.ts +0 -2
  241. package/dist/ops/softmax16.js +0 -9
  242. package/dist/ops/sub16.d.ts +0 -2
  243. package/dist/ops/sub16.js +0 -8
  244. package/dist/ops/sum16.d.ts +0 -2
  245. package/dist/ops/sum16.js +0 -13
  246. package/dist/ops/transpose16.d.ts +0 -3
  247. package/dist/ops/transpose16.js +0 -40
  248. package/dist/ops/unpack16.d.ts +0 -2
  249. package/dist/ops/unpack16.js +0 -6
  250. package/dist/ops/webgl/adamAdjust.d.ts +0 -1
  251. package/dist/ops/webgl/adamAdjust.js +0 -49
  252. package/dist/ops/webgl/adamMoments.d.ts +0 -1
  253. package/dist/ops/webgl/adamMoments.js +0 -40
  254. package/dist/ops/webgl/appendCache.d.ts +0 -1
  255. package/dist/ops/webgl/appendCache.js +0 -44
  256. package/dist/ops/webgl/attentionMask.d.ts +0 -1
  257. package/dist/ops/webgl/attentionMask.js +0 -45
  258. package/dist/ops/webgl/dropout16.d.ts +0 -1
  259. package/dist/ops/webgl/dropout16.js +0 -11
  260. package/dist/ops/webgl/fusedSoftmax.d.ts +0 -11
  261. package/dist/ops/webgl/fusedSoftmax.js +0 -80
  262. package/dist/ops/webgl/gatherSub.d.ts +0 -1
  263. package/dist/ops/webgl/gatherSub.js +0 -27
  264. package/dist/ops/webgl/gelu.d.ts +0 -2
  265. package/dist/ops/webgl/gelu.js +0 -50
  266. package/dist/ops/webgl/log.d.ts +0 -17
  267. package/dist/ops/webgl/log.js +0 -23
  268. package/dist/ops/webgl/matMul16.d.ts +0 -1
  269. package/dist/ops/webgl/matMul16.js +0 -45
  270. package/dist/ops/webgl/matMulGelu.d.ts +0 -21
  271. package/dist/ops/webgl/matMulGelu.js +0 -9
  272. package/dist/ops/webgl/matMulMul.d.ts +0 -14
  273. package/dist/ops/webgl/matMulMul.js +0 -28
  274. package/dist/ops/webgl/mulDropout.d.ts +0 -1
  275. package/dist/ops/webgl/mulDropout.js +0 -41
  276. package/dist/ops/webgl/normRMS.d.ts +0 -1
  277. package/dist/ops/webgl/normRMS.js +0 -93
  278. package/dist/ops/webgl/qkv.d.ts +0 -1
  279. package/dist/ops/webgl/qkv.js +0 -46
  280. package/dist/ops/webgl/rope.d.ts +0 -1
  281. package/dist/ops/webgl/rope.js +0 -56
  282. package/dist/ops/webgl/scatterSub.d.ts +0 -1
  283. package/dist/ops/webgl/scatterSub.js +0 -27
  284. package/dist/ops/webgpu/adamAdjust.d.ts +0 -1
  285. package/dist/ops/webgpu/adamAdjust.js +0 -57
  286. package/dist/ops/webgpu/adamMoments.d.ts +0 -1
  287. package/dist/ops/webgpu/adamMoments.js +0 -60
  288. package/dist/ops/webgpu/add16.d.ts +0 -1
  289. package/dist/ops/webgpu/add16.js +0 -13
  290. package/dist/ops/webgpu/appendCache.d.ts +0 -1
  291. package/dist/ops/webgpu/appendCache.js +0 -105
  292. package/dist/ops/webgpu/attentionMask.d.ts +0 -1
  293. package/dist/ops/webgpu/attentionMask.js +0 -26
  294. package/dist/ops/webgpu/attentionMask32_program.d.ts +0 -19
  295. package/dist/ops/webgpu/attentionMask32_program.js +0 -54
  296. package/dist/ops/webgpu/clipScale.d.ts +0 -1
  297. package/dist/ops/webgpu/clipScale.js +0 -58
  298. package/dist/ops/webgpu/concat16.d.ts +0 -19
  299. package/dist/ops/webgpu/concat16.js +0 -126
  300. package/dist/ops/webgpu/dropout16.d.ts +0 -1
  301. package/dist/ops/webgpu/dropout16.js +0 -51
  302. package/dist/ops/webgpu/gatherSub.d.ts +0 -1
  303. package/dist/ops/webgpu/gatherSub.js +0 -39
  304. package/dist/ops/webgpu/gelu.d.ts +0 -14
  305. package/dist/ops/webgpu/gelu.js +0 -141
  306. package/dist/ops/webgpu/index.d.ts +0 -0
  307. package/dist/ops/webgpu/index.js +0 -26
  308. package/dist/ops/webgpu/matMul16.d.ts +0 -1
  309. package/dist/ops/webgpu/matMul16.js +0 -65
  310. package/dist/ops/webgpu/matMul16_program.d.ts +0 -42
  311. package/dist/ops/webgpu/matMul16_program.js +0 -343
  312. package/dist/ops/webgpu/mul16.d.ts +0 -1
  313. package/dist/ops/webgpu/mul16.js +0 -13
  314. package/dist/ops/webgpu/norm2.d.ts +0 -1
  315. package/dist/ops/webgpu/norm2.js +0 -76
  316. package/dist/ops/webgpu/normRMS.d.ts +0 -1
  317. package/dist/ops/webgpu/normRMS.js +0 -34
  318. package/dist/ops/webgpu/normRMS16_program.d.ts +0 -10
  319. package/dist/ops/webgpu/normRMS16_program.js +0 -25
  320. package/dist/ops/webgpu/normRMS32_program.d.ts +0 -10
  321. package/dist/ops/webgpu/normRMS32_program.js +0 -25
  322. package/dist/ops/webgpu/normRMSGrad.d.ts +0 -1
  323. package/dist/ops/webgpu/normRMSGrad.js +0 -284
  324. package/dist/ops/webgpu/pack16.d.ts +0 -1
  325. package/dist/ops/webgpu/pack16.js +0 -18
  326. package/dist/ops/webgpu/pack16_program.d.ts +0 -19
  327. package/dist/ops/webgpu/pack16_program.js +0 -92
  328. package/dist/ops/webgpu/qkv.d.ts +0 -1
  329. package/dist/ops/webgpu/qkv.js +0 -24
  330. package/dist/ops/webgpu/rope.d.ts +0 -1
  331. package/dist/ops/webgpu/rope.js +0 -135
  332. package/dist/ops/webgpu/scatterSub.d.ts +0 -1
  333. package/dist/ops/webgpu/scatterSub.js +0 -40
  334. package/dist/ops/webgpu/slice16.d.ts +0 -7
  335. package/dist/ops/webgpu/slice16.js +0 -69
  336. package/dist/ops/webgpu/softmax16.d.ts +0 -17
  337. package/dist/ops/webgpu/softmax16.js +0 -21
  338. package/dist/ops/webgpu/softmax16_program.d.ts +0 -13
  339. package/dist/ops/webgpu/softmax16_program.js +0 -73
  340. package/dist/ops/webgpu/softmax16_subgroup_program.d.ts +0 -17
  341. package/dist/ops/webgpu/softmax16_subgroup_program.js +0 -75
  342. package/dist/ops/webgpu/softmax16grad.d.ts +0 -1
  343. package/dist/ops/webgpu/softmax16grad.js +0 -37
  344. package/dist/ops/webgpu/sub16.d.ts +0 -1
  345. package/dist/ops/webgpu/sub16.js +0 -13
  346. package/dist/ops/webgpu/sum16.d.ts +0 -1
  347. package/dist/ops/webgpu/sum16.js +0 -38
  348. package/dist/ops/webgpu/transpose16.d.ts +0 -1
  349. package/dist/ops/webgpu/transpose16.js +0 -34
  350. package/dist/ops/webgpu/transpose16_program.d.ts +0 -16
  351. package/dist/ops/webgpu/transpose16_program.js +0 -50
  352. package/dist/ops/webgpu/transpose16_shared_program.d.ts +0 -15
  353. package/dist/ops/webgpu/transpose16_shared_program.js +0 -70
  354. package/dist/ops/webgpu/unpack16.d.ts +0 -1
  355. package/dist/ops/webgpu/unpack16.js +0 -48
  356. package/dist/ops/webgpu/utils/binary_op.d.ts +0 -35
  357. package/dist/ops/webgpu/utils/binary_op.js +0 -139
  358. package/dist/ops/webgpu/utils/deviceInfo.d.ts +0 -7
  359. package/dist/ops/webgpu/utils/deviceInfo.js +0 -11
  360. package/dist/ops/webgpu/utils/reductions.d.ts +0 -43
  361. package/dist/ops/webgpu/utils/reductions.js +0 -275
  362. package/dist/ops-CsXeTq1P.js +0 -476
  363. package/dist/pack16-bqltoUlR.js +0 -39
  364. package/dist/papaparse.min-C0cScC2i.js +0 -418
  365. package/dist/parquet-Bqjmp2vo.js +0 -44231
  366. package/dist/patches/webgpu_backend.d.ts +0 -18
  367. package/dist/patches/webgpu_backend.js +0 -56
  368. package/dist/patches/webgpu_base.d.ts +0 -21
  369. package/dist/patches/webgpu_base.js +0 -34
  370. package/dist/patches/webgpu_program.d.ts +0 -36
  371. package/dist/patches/webgpu_program.js +0 -400
  372. package/dist/pdf-NIhmP3sq.js +0 -19477
  373. package/dist/rand_util-CZ7yLoUm.js +0 -50
  374. package/dist/random_normal-IBRrha8a.js +0 -14
  375. package/dist/random_width-DN5ZtQkM.js +0 -9796
  376. package/dist/range-C-CjF-LI.js +0 -10
  377. package/dist/relu-J_X6MUzx.js +0 -9
  378. package/dist/reshape-BDOuCSNW.js +0 -9
  379. package/dist/resize_nearest_neighbor-BojqlfRe.js +0 -150
  380. package/dist/rope-DcrZM_e6.js +0 -24
  381. package/dist/scatter_nd_util-ByNJaL6I.js +0 -46
  382. package/dist/segment_util-Dasb2Zaf.js +0 -43
  383. package/dist/selu_util-BLhIqRkw.js +0 -44
  384. package/dist/shared-3agzAqQ_.js +0 -53
  385. package/dist/shared-CagdqkLh.js +0 -2143
  386. package/dist/slice-BzS11Qh0.js +0 -12
  387. package/dist/slice_util-CC35pLmT.js +0 -153
  388. package/dist/softmax-D4q1LJN7.js +0 -12
  389. package/dist/split-C2Sj255c.js +0 -9
  390. package/dist/squeeze-ho4wLUek.js +0 -10
  391. package/dist/stack-DudVrtmG.js +0 -11
  392. package/dist/step-BTxPtq1r.js +0 -261
  393. package/dist/sum-BpiwSWvg.js +0 -11
  394. package/dist/tensor-BWFldCso.js +0 -8
  395. package/dist/tensor1d-LMGMIUlr.js +0 -11
  396. package/dist/tensor2d-BnXMKScO.js +0 -14
  397. package/dist/tensor4d-C6UCG_u8.js +0 -14
  398. package/dist/tfjs_backend-BGnG-ppu.js +0 -654
  399. package/dist/tile-CFy-xTO6.js +0 -11
  400. package/dist/tokeniser/BaseTokeniser.d.ts +0 -33
  401. package/dist/tokeniser/BaseTokeniser.js +0 -124
  402. package/dist/tokeniser/CharTokeniser.d.ts +0 -24
  403. package/dist/tokeniser/CharTokeniser.js +0 -107
  404. package/dist/tokeniser/bpe.d.ts +0 -28
  405. package/dist/tokeniser/bpe.js +0 -173
  406. package/dist/tokeniser/messages.d.ts +0 -61
  407. package/dist/tokeniser/messages.js +0 -1
  408. package/dist/tokeniser/type.d.ts +0 -34
  409. package/dist/tokeniser/type.js +0 -1
  410. package/dist/training/AdamW.d.ts +0 -36
  411. package/dist/training/AdamW.js +0 -138
  412. package/dist/training/BasicTrainer.d.ts +0 -63
  413. package/dist/training/BasicTrainer.js +0 -265
  414. package/dist/training/DatasetBuilder.d.ts +0 -26
  415. package/dist/training/DatasetBuilder.js +0 -86
  416. package/dist/training/Evaluator.d.ts +0 -19
  417. package/dist/training/Evaluator.js +0 -39
  418. package/dist/training/LRScheduler.d.ts +0 -12
  419. package/dist/training/LRScheduler.js +0 -34
  420. package/dist/training/PreTrainer.d.ts +0 -11
  421. package/dist/training/PreTrainer.js +0 -20
  422. package/dist/training/SFTTrainer.d.ts +0 -12
  423. package/dist/training/SFTTrainer.js +0 -22
  424. package/dist/training/loss.d.ts +0 -3
  425. package/dist/training/loss.js +0 -24
  426. package/dist/training/orthoGrad.d.ts +0 -2
  427. package/dist/training/orthoGrad.js +0 -10
  428. package/dist/training/sparseCrossEntropy.d.ts +0 -7
  429. package/dist/training/sparseCrossEntropy.js +0 -69
  430. package/dist/training/tasks/ConversationTask.d.ts +0 -18
  431. package/dist/training/tasks/ConversationTask.js +0 -40
  432. package/dist/training/tasks/PretrainingTask.d.ts +0 -17
  433. package/dist/training/tasks/PretrainingTask.js +0 -47
  434. package/dist/training/tasks/StartSentenceTask.d.ts +0 -18
  435. package/dist/training/tasks/StartSentenceTask.js +0 -49
  436. package/dist/training/tasks/Task.d.ts +0 -22
  437. package/dist/training/tasks/Task.js +0 -68
  438. package/dist/training/tasks/splitter.d.ts +0 -5
  439. package/dist/training/tasks/splitter.js +0 -21
  440. package/dist/training/types.d.ts +0 -78
  441. package/dist/training/types.js +0 -1
  442. package/dist/training/validation.d.ts +0 -17
  443. package/dist/training/validation.js +0 -84
  444. package/dist/transpose-9kRxIXWR.js +0 -36
  445. package/dist/unsorted_segment_sum-DJvk5xnh.js +0 -277
  446. package/dist/utilities/arrayClose.d.ts +0 -1
  447. package/dist/utilities/arrayClose.js +0 -20
  448. package/dist/utilities/datasetID.d.ts +0 -2
  449. package/dist/utilities/datasetID.js +0 -21
  450. package/dist/utilities/dummy.d.ts +0 -9
  451. package/dist/utilities/dummy.js +0 -43
  452. package/dist/utilities/multinomialCPU.d.ts +0 -2
  453. package/dist/utilities/multinomialCPU.js +0 -13
  454. package/dist/utilities/naming.d.ts +0 -4
  455. package/dist/utilities/naming.js +0 -1
  456. package/dist/utilities/packed.d.ts +0 -4
  457. package/dist/utilities/packed.js +0 -15
  458. package/dist/utilities/parameters.d.ts +0 -11
  459. package/dist/utilities/parameters.js +0 -57
  460. package/dist/utilities/performance.d.ts +0 -2
  461. package/dist/utilities/performance.js +0 -16
  462. package/dist/utilities/profile.d.ts +0 -17
  463. package/dist/utilities/profile.js +0 -38
  464. package/dist/utilities/safetensors.d.ts +0 -3
  465. package/dist/utilities/safetensors.js +0 -83
  466. package/dist/utilities/sentences.d.ts +0 -5
  467. package/dist/utilities/sentences.js +0 -41
  468. package/dist/utilities/tokenParse.d.ts +0 -1
  469. package/dist/utilities/tokenParse.js +0 -21
  470. package/dist/utilities/topP.d.ts +0 -1
  471. package/dist/utilities/topP.js +0 -13
  472. package/dist/utilities/waitForModel.d.ts +0 -2
  473. package/dist/utilities/waitForModel.js +0 -12
  474. package/dist/utilities/weights.d.ts +0 -12
  475. package/dist/utilities/weights.js +0 -45
  476. package/dist/utilities/yielder.d.ts +0 -1
  477. package/dist/utilities/yielder.js +0 -7
  478. package/dist/variable-Ck482e3n.js +0 -7
  479. package/dist/webgpu_program-B4HmApL1.js +0 -525
  480. package/dist/webgpu_util-DYlGSwOJ.js +0 -64
  481. package/dist/zeros-DvZpK8s6.js +0 -13
  482. package/dist/zeros_like-CWjDdwr-.js +0 -721
@@ -1,80 +0,0 @@
1
- import { aa as y } from "../index-CUXkjxiT.js";
2
- import "../random_width-DN5ZtQkM.js";
3
- import "../zeros_like-CWjDdwr-.js";
4
- import "../Generator.js";
5
- import "../index-Cp39cXWe.js";
6
- import "../dataset-CGGp1z9P.js";
7
- import "../ops/cpu/attentionMask.js";
8
- import "../ops/webgl/attentionMask.js";
9
- import "../ops/grads/attentionMask.js";
10
- import "../ops/cpu/rope.js";
11
- import "../ops/webgl/rope.js";
12
- import "../rope-DcrZM_e6.js";
13
- import "../ops/cpu/appendCache.js";
14
- import "../ops/webgl/appendCache.js";
15
- import "../ops/grads/softmax16.js";
16
- import "../matMul16-BcVC_E62.js";
17
- import "../ops/webgl/matMul16.js";
18
- import "../ops/cpu/matMul16.js";
19
- import "../pack16-bqltoUlR.js";
20
- import "../ops/transpose16.js";
21
- import "../ops/reshape16.js";
22
- import "../ops/cpu/qkv.js";
23
- import "../ops/webgl/qkv.js";
24
- import "../ops/grads/qkv.js";
25
- import "../ops/cpu/normRMS.js";
26
- import "../ops/webgl/normRMS.js";
27
- import "../ops/grads/normRMS.js";
28
- import "../ops/dropout16.js";
29
- import "../ops/webgl/dropout16.js";
30
- import "../ops/grads/add16.js";
31
- import "../jszip.min-BZhlzntC.js";
32
- import g from "../tokeniser/CharTokeniser.js";
33
- import k from "../tokeniser/bpe.js";
34
- import { dummyPassAsync as u } from "../utilities/dummy.js";
35
- import b from "../models/factory.js";
36
- import "../ops/cpu/adamAdjust.js";
37
- import "../ops/webgl/adamAdjust.js";
38
- import "../ops/cpu/adamMoments.js";
39
- import "../ops/webgl/adamMoments.js";
40
- import "../index-DvYrXKkX.js";
41
- import "../ops/cpu/gatherSub.js";
42
- import "../ops/webgl/gatherSub.js";
43
- import "../ops/cpu/scatterSub.js";
44
- import "../ops/webgl/scatterSub.js";
45
- import "../papaparse.min-C0cScC2i.js";
46
- import "../ops/cpu/matMulGelu.js";
47
- import "../matMulGelu-JNLZqKQp.js";
48
- import "../ops/grads/matMulGelu.js";
49
- import "../ops/cpu/gelu.js";
50
- import "../ops/webgl/gelu.js";
51
- import "../gelu-B220X1Go.js";
52
- import "../ops/webgl/log.js";
53
- import "../checks/normRMS.js";
54
- import "../checks/normRMSGrad.js";
55
- import { importWeights as M } from "../utilities/weights.js";
56
- async function jt(r, m) {
57
- const p = /* @__PURE__ */ new Map(), s = await r.file("manifest.json")?.async("string");
58
- if (!s)
59
- throw new Error("Manifest file not found in the zip archive");
60
- const a = JSON.parse(s);
61
- for (const [t, o] of Object.entries(a.weightSpec))
62
- p.set(t, { spec: o, data: new Float32Array() });
63
- const c = await r.file("tokeniser.json")?.async("string");
64
- if (!c)
65
- throw new Error("Tokeniser file not found in the zip archive");
66
- const i = JSON.parse(c), l = (i.type ?? "char") === "char" ? new g(i.vocab) : new k(i.vocab, i.merges), f = /* @__PURE__ */ new Map();
67
- for (const t of Object.keys(r.files))
68
- if (t.endsWith(".bin")) {
69
- const o = t.replace(".bin", ""), w = await r.file(t).async("arraybuffer"), d = new Float32Array(w), n = p.get(o) || { spec: [], data: new Float32Array() };
70
- n.data = d, p.set(o, n);
71
- const h = await M(n);
72
- f.set(o, h);
73
- }
74
- y();
75
- const e = b(a.config);
76
- return e.metaData = m, await u(e), e.weightStore.loadWeights(f, !!m.url), { model: e, tokeniser: l, metaData: m };
77
- }
78
- export {
79
- jt as default
80
- };
@@ -1,16 +0,0 @@
1
- import { ITokeniser } from '../tokeniser/type';
2
- import { default as Model, ModelForwardAttributes } from '../models/model';
3
- import { AdamWOptimizer } from '../training/AdamW';
4
- import { TrainingLogEntry } from '../training/types';
5
- import { GPTConfig } from '../models/config';
6
- export interface SaveOptions {
7
- name?: string;
8
- metadata?: Record<string, unknown>;
9
- files?: Record<string, unknown>;
10
- includeOptimizer?: boolean;
11
- }
12
- export interface ExtraSaveItems {
13
- optimizer?: AdamWOptimizer;
14
- trainingLog?: TrainingLogEntry[];
15
- }
16
- export declare function saveModel(model: Model<ModelForwardAttributes, GPTConfig>, tokeniser: ITokeniser, options?: SaveOptions, extraItems?: ExtraSaveItems): Promise<Blob>;
@@ -1,90 +0,0 @@
1
- import { z as y } from "../jszip.min-BZhlzntC.js";
2
- import _ from "../tokeniser/CharTokeniser.js";
3
- import { save_safetensors as b } from "../utilities/safetensors.js";
4
- import { VERSION as h } from "./load.js";
5
- function m(i) {
6
- if (i.length > 1e3) {
7
- const n = Math.ceil(i.length / 1e3);
8
- return i.filter((o, a) => a % n === 0 || a === i.length - 1);
9
- }
10
- return i;
11
- }
12
- async function u(i, n, o, a) {
13
- const s = /* @__PURE__ */ new Map();
14
- i.weightStore.saveWeights(s);
15
- const e = new y();
16
- if (a?.optimizer) {
17
- const t = await a.optimizer.saveMoments();
18
- e.file("optimizer.safetensors", t, { binary: !0 }), e.file("optimizer_config.json", JSON.stringify(a.optimizer.serializeConfig()), {
19
- binary: !1
20
- });
21
- }
22
- a?.trainingLog && e.file("training_log.json", JSON.stringify(m(a.trainingLog), void 0, 4), {
23
- binary: !1
24
- });
25
- const g = {};
26
- s.forEach((t, f) => {
27
- t.length === 1 && (g[f] = t[0]);
28
- });
29
- const c = await b(g);
30
- e.file("model.safetensors", c, { binary: !0 });
31
- const p = i.config.modelType;
32
- let r;
33
- p === "GenAI_NanoGPT_v1" ? r = {
34
- model_type: "GenAI_NanoGPT_v1",
35
- vocab_size: n.getVocab().length,
36
- hidden_size: i.config.nEmbed,
37
- num_hidden_layers: i.config.nLayer,
38
- num_attention_heads: i.config.nHead,
39
- block_size: i.config.blockSize,
40
- mlpFactor: i.config.mlpFactor,
41
- useRope: i.config.useRope
42
- } : r = {
43
- model_type: "GenAI_NanoGPT_v2",
44
- vocab_size: n.getVocab().length,
45
- hidden_size: i.config.nEmbed,
46
- num_hidden_layers: i.config.nLayer,
47
- num_attention_heads: i.config.nHead,
48
- block_size: i.config.blockSize,
49
- mlpFactor: i.config.mlpFactor,
50
- loraConfig: i.config.loraConfig ? Object.fromEntries(i.config.loraConfig) : void 0,
51
- loraName: i.config.loraName,
52
- windowSize: i.config.windowSize
53
- }, e.file("config.json", JSON.stringify(r, void 0, 4), {
54
- binary: !1
55
- });
56
- const l = {
57
- version: h,
58
- application: "@genai-fi/nanogpt",
59
- meta: o?.metadata,
60
- name: o?.name,
61
- training: i.metaData?.training || void 0,
62
- reference: i.metaData?.url || void 0,
63
- phase: i.metaData?.phase || void 0,
64
- pretrainingData: i.metaData?.pretrainingData || void 0,
65
- pretrainingSettings: i.metaData?.pretrainingSettings || void 0,
66
- generationSettings: i.metaData?.generationSettings || void 0,
67
- actionLog: i.metaData?.actionLog || void 0
68
- };
69
- if (e.file("meta.json", JSON.stringify(l, void 0, 4), {
70
- binary: !1
71
- }), e.file(
72
- "tokeniser.json",
73
- JSON.stringify({
74
- type: n instanceof _ ? "char" : "bpe",
75
- vocab: n.getVocab(),
76
- merges: n.getMerges(),
77
- datasetID: n.datasetID,
78
- id: n.id
79
- }),
80
- {
81
- binary: !1
82
- }
83
- ), o?.files)
84
- for (const [t, f] of Object.entries(o.files))
85
- e.file(t, JSON.stringify(f), { binary: !1 });
86
- return e.generateAsync({ type: "blob" });
87
- }
88
- export {
89
- u as saveModel
90
- };
@@ -1,67 +0,0 @@
1
- import { GenerateOptions } from '../inference/types';
2
- import { LoRAConfig } from '../models/config';
3
- import { default as Model, ModelForwardAttributes, TrainingState } from '../models/model';
4
- import { ITokeniser } from '../tokeniser/type';
5
- import { AdamWOptimizer } from '../training/AdamW';
6
- import { TrainingLogEntry, TrainingOptions } from '../training/types';
7
- export interface TransformersConfigBase {
8
- model_type: 'GenAI_NanoGPT_v1' | 'GenAI_NanoGPT_v2';
9
- vocab_size: number;
10
- hidden_size: number;
11
- num_hidden_layers: number;
12
- num_attention_heads: number;
13
- block_size: number;
14
- mlpFactor: number;
15
- loraConfig?: Record<string, LoRAConfig>;
16
- loraName?: string;
17
- }
18
- export interface TransformersConfigV1 extends TransformersConfigBase {
19
- model_type: 'GenAI_NanoGPT_v1';
20
- useRope: boolean;
21
- }
22
- export interface TransformersConfigV2 extends TransformersConfigBase {
23
- model_type: 'GenAI_NanoGPT_v2';
24
- windowSize?: string;
25
- }
26
- export type TransformersConfig = TransformersConfigV1 | TransformersConfigV2;
27
- export interface TransformersTokeniser {
28
- type: 'char' | 'bpe';
29
- vocab: string[];
30
- merges: [string, string][];
31
- datasetID?: string;
32
- id?: string;
33
- }
34
- export type ModelPhase = 'untrained' | 'pretrained' | 'finetuned';
35
- export interface DatasetMetadata {
36
- id: string;
37
- name: string;
38
- }
39
- export interface ActionLogEntry {
40
- action: 'pretrain' | 'generate' | 'finetune';
41
- timestamp: number;
42
- duration: number;
43
- tokensProcessed: number;
44
- options: TrainingOptions | GenerateOptions;
45
- }
46
- export interface TransformersMetadata {
47
- name?: string;
48
- version: number;
49
- application: string;
50
- training?: TrainingState;
51
- reference?: string;
52
- id?: string;
53
- url?: string;
54
- phase?: ModelPhase;
55
- pretrainingData?: DatasetMetadata[];
56
- pretrainingSettings?: TrainingOptions;
57
- generationSettings?: GenerateOptions;
58
- actionLog?: ActionLogEntry[];
59
- [key: string]: unknown;
60
- }
61
- export interface LoadResult {
62
- model: Model<ModelForwardAttributes>;
63
- tokeniser: ITokeniser;
64
- metaData: TransformersMetadata;
65
- optimizer?: AdamWOptimizer;
66
- log?: TrainingLogEntry[];
67
- }
@@ -1 +0,0 @@
1
-
package/dist/main.d.ts DELETED
@@ -1,50 +0,0 @@
1
- import { default as PretrainingTask } from './training/tasks/PretrainingTask';
2
- import { default as StartSentenceTask } from './training/tasks/StartSentenceTask';
3
- import { default as ConversationTask } from './training/tasks/ConversationTask';
4
- import { pack16 } from './ops/pack16';
5
- import { unpack16 } from './ops/unpack16';
6
- import { default as CausalSelfAttention } from './layers/CausalSelfAttention';
7
- import { default as MLP } from './layers/MLP';
8
- import { default as TransformerBlock } from './layers/TransformerBlock';
9
- import { default as RoPECache } from './layers/RoPECache';
10
- export { default as NanoGPT } from './models/NanoGPTV1';
11
- export { default as TeachableLLM } from './TeachableLLM';
12
- export { default as CharTokeniser } from './tokeniser/CharTokeniser';
13
- export { default as BPETokeniser } from './tokeniser/bpe';
14
- export { default as waitForModel } from './utilities/waitForModel';
15
- export { default as generateDatasetID } from './utilities/datasetID';
16
- export { default as loadTextData } from './data/textLoader';
17
- export type { DatasetMetadata } from './loader/types';
18
- export { default as Generator, type IGenerator } from './Generator';
19
- export { default as Evaluator } from './training/Evaluator';
20
- export { default as Trainer } from './Trainer';
21
- export type { IGenerateOptions } from './Generator';
22
- export { type ModelForwardAttributes, default as Model } from './models/model';
23
- export type { ITokeniser, Conversation, Roles } from './tokeniser/type';
24
- export type { TrainingOptions, TrainingLogEntry } from './training/types';
25
- export type { GPTConfig } from './models/config';
26
- export { estimateParameterCount, estimateMemoryUsage, estimateTrainingMemoryUsage, estimateResources, validateConfig, } from './utilities/parameters';
27
- export { default as topP } from './utilities/topP';
28
- export { Task, tokensFromTasks } from './training/tasks/Task';
29
- export declare const tasks: {
30
- PretrainingTask: typeof PretrainingTask;
31
- StartSentenceTask: typeof StartSentenceTask;
32
- ConversationTask: typeof ConversationTask;
33
- };
34
- declare const ops: {
35
- pack16: typeof pack16;
36
- unpack16: typeof unpack16;
37
- };
38
- export { ops };
39
- export { selectBackend } from './backend';
40
- export { default as performanceTest } from './utilities/performance';
41
- export declare const layers: {
42
- CausalSelfAttention: typeof CausalSelfAttention;
43
- MLP: typeof MLP;
44
- TransformerBlock: typeof TransformerBlock;
45
- RoPECache: typeof RoPECache;
46
- };
47
- export { AdamWOptimizer } from './training/AdamW';
48
- export { default as checks } from './checks';
49
- export type { TensorStatistics } from './checks/weights';
50
- export { sentenceEmbeddings, sentenceEmbeddingsTensor } from './utilities/sentences';
package/dist/main.js DELETED
@@ -1,109 +0,0 @@
1
- import "./index-CUXkjxiT.js";
2
- import "./random_width-DN5ZtQkM.js";
3
- import "./zeros_like-CWjDdwr-.js";
4
- import { default as io } from "./Generator.js";
5
- import "./index-Cp39cXWe.js";
6
- import "./dataset-CGGp1z9P.js";
7
- import { default as fo } from "./models/NanoGPTV1.js";
8
- import { default as lo } from "./TeachableLLM.js";
9
- import { default as uo } from "./tokeniser/CharTokeniser.js";
10
- import { default as ko } from "./tokeniser/bpe.js";
11
- import { default as go } from "./utilities/waitForModel.js";
12
- import { default as Co } from "./utilities/datasetID.js";
13
- import { default as Eo } from "./data/textLoader.js";
14
- import { default as Bo } from "./training/Evaluator.js";
15
- import { default as vo } from "./Trainer.js";
16
- import { default as Do } from "./models/model.js";
17
- import { estimateMemoryUsage as So, estimateParameterCount as Ao, estimateResources as Fo, estimateTrainingMemoryUsage as Go, validateConfig as Ro } from "./utilities/parameters.js";
18
- import { default as wo } from "./utilities/topP.js";
19
- import { Task as Io, tokensFromTasks as No } from "./training/tasks/Task.js";
20
- import o from "./training/tasks/PretrainingTask.js";
21
- import r from "./training/tasks/StartSentenceTask.js";
22
- import t from "./training/tasks/ConversationTask.js";
23
- import "./ops/cpu/scatterSub.js";
24
- import "./ops/webgl/scatterSub.js";
25
- import "./ops/cpu/gatherSub.js";
26
- import "./ops/webgl/gatherSub.js";
27
- import "./ops/cpu/attentionMask.js";
28
- import "./ops/webgl/attentionMask.js";
29
- import "./ops/grads/attentionMask.js";
30
- import "./ops/cpu/qkv.js";
31
- import "./ops/webgl/qkv.js";
32
- import "./ops/grads/qkv.js";
33
- import "./ops/cpu/rope.js";
34
- import "./ops/webgl/rope.js";
35
- import "./rope-DcrZM_e6.js";
36
- import "./ops/cpu/appendCache.js";
37
- import "./ops/webgl/appendCache.js";
38
- import "./ops/cpu/matMulGelu.js";
39
- import "./matMulGelu-JNLZqKQp.js";
40
- import "./ops/grads/matMulGelu.js";
41
- import "./ops/cpu/gelu.js";
42
- import "./ops/webgl/gelu.js";
43
- import "./gelu-B220X1Go.js";
44
- import "./ops/cpu/normRMS.js";
45
- import "./ops/webgl/normRMS.js";
46
- import "./ops/grads/normRMS.js";
47
- import "./ops/webgl/log.js";
48
- import "./ops/cpu/adamMoments.js";
49
- import "./ops/webgl/adamMoments.js";
50
- import "./ops/cpu/adamAdjust.js";
51
- import "./ops/webgl/adamAdjust.js";
52
- import { u as e, p as m } from "./pack16-bqltoUlR.js";
53
- import "./ops/grads/softmax16.js";
54
- import "./matMul16-BcVC_E62.js";
55
- import "./ops/webgl/matMul16.js";
56
- import "./ops/cpu/matMul16.js";
57
- import "./ops/transpose16.js";
58
- import { selectBackend as Wo } from "./backend.js";
59
- import { default as qo } from "./utilities/performance.js";
60
- import a from "./layers/CausalSelfAttention.js";
61
- import p from "./layers/MLP.js";
62
- import i from "./layers/TransformerBlock.js";
63
- import s from "./layers/RoPECache.js";
64
- import { AdamWOptimizer as Jo } from "./training/AdamW.js";
65
- import { default as Qo } from "./checks/index.js";
66
- import { sentenceEmbeddings as Xo, sentenceEmbeddingsTensor as Yo } from "./utilities/sentences.js";
67
- const to = {
68
- PretrainingTask: o,
69
- StartSentenceTask: r,
70
- ConversationTask: t
71
- }, eo = {
72
- pack16: m,
73
- unpack16: e
74
- }, mo = {
75
- CausalSelfAttention: a,
76
- MLP: p,
77
- TransformerBlock: i,
78
- RoPECache: s
79
- };
80
- export {
81
- Jo as AdamWOptimizer,
82
- ko as BPETokeniser,
83
- uo as CharTokeniser,
84
- Bo as Evaluator,
85
- io as Generator,
86
- Do as Model,
87
- fo as NanoGPT,
88
- Io as Task,
89
- lo as TeachableLLM,
90
- vo as Trainer,
91
- Qo as checks,
92
- So as estimateMemoryUsage,
93
- Ao as estimateParameterCount,
94
- Fo as estimateResources,
95
- Go as estimateTrainingMemoryUsage,
96
- Co as generateDatasetID,
97
- mo as layers,
98
- Eo as loadTextData,
99
- eo as ops,
100
- qo as performanceTest,
101
- Wo as selectBackend,
102
- Xo as sentenceEmbeddings,
103
- Yo as sentenceEmbeddingsTensor,
104
- to as tasks,
105
- No as tokensFromTasks,
106
- wo as topP,
107
- Ro as validateConfig,
108
- go as waitForModel
109
- };
@@ -1,80 +0,0 @@
1
- import { e as y, h } from "./index-CUXkjxiT.js";
2
- import "./ops/webgl/matMul16.js";
3
- import "./ops/cpu/matMul16.js";
4
- import { isPackedTensor as v } from "./utilities/packed.js";
5
- import { p as g } from "./pack16-bqltoUlR.js";
6
- import { d as k } from "./gelu-B220X1Go.js";
7
- import { transpose16 as S } from "./ops/transpose16.js";
8
- import { reshape16 as w } from "./ops/reshape16.js";
9
- import { mul16 as D } from "./ops/mul16.js";
10
- const G = {
11
- kernelName: "MatMul16",
12
- inputsToSave: ["A", "B"],
13
- outputsToSave: [],
14
- gradFunc: (r, o, l) => {
15
- const [s, t] = o;
16
- if (Array.isArray(r))
17
- throw new Error("Expected dy to be a single Tensor");
18
- let e = r;
19
- const { transposeA: u, transposeB: i, scale: a, activation: p, originalShape: d, perm: m } = l;
20
- if (m && d) {
21
- const f = new Array(m.length);
22
- for (let M = 0; M < m.length; ++M)
23
- f[m[M]] = M;
24
- const c = e;
25
- e = S(e, f), c.dispose();
26
- }
27
- if (d) {
28
- const f = e;
29
- e = w(e, d), f.dispose();
30
- }
31
- if (p === "gelu") {
32
- const f = e, c = n(s, t, u, i);
33
- e = k(f, c), f.dispose(), c.dispose();
34
- } else if (p === "relu2") {
35
- const f = e, c = n(s, t, u, i, { activation: "relu", scale: 2 });
36
- e = D(f, c), f.dispose(), c.dispose();
37
- }
38
- if (!u && !i)
39
- return {
40
- A: () => a !== void 0 ? B(e, t, a, !1, !0) : n(e, t, !1, !0),
41
- B: () => a !== void 0 ? A(s, e, a, !0, !1) : n(s, e, !0, !1)
42
- };
43
- if (!u && i)
44
- return {
45
- A: () => a !== void 0 ? B(e, t, a, !1, !1) : n(e, t, !1, !1),
46
- B: () => a !== void 0 ? A(s, e, a, !0, !1) : n(s, e, !0, !1)
47
- };
48
- if (u && !i)
49
- return {
50
- A: () => a !== void 0 ? A(t, e, a, !1, !0) : n(t, e, !1, !0),
51
- B: () => a !== void 0 ? A(s, e, a, !1, !1) : n(s, e, !1, !1)
52
- };
53
- throw new Error("Gradient for transposeA=true and transposeB=true is not supported yet.");
54
- }
55
- };
56
- y(G);
57
- function n(r, o, l = !1, s = !1, t = {}) {
58
- const e = v(r), u = v(o), i = e || u, a = !i || e ? r : g(r), p = !i || u ? o : g(o), d = h().runKernel("MatMul16", { A: a, B: p }, { transposeA: l, transposeB: s, ...t });
59
- return i && !e && a.dispose(), i && !u && p.dispose(), d;
60
- }
61
- function j(r, o, l, s = !1, t = !1) {
62
- return n(r, o, s, t, { scale: l });
63
- }
64
- function B(r, o, l, s = !1, t = !1) {
65
- return n(r, o, s, t, { scaleA: l });
66
- }
67
- function A(r, o, l, s = !1, t = !1) {
68
- return n(r, o, s, t, { scaleB: l });
69
- }
70
- function q(r, o, l = !1, s = !1) {
71
- return n(r, o, l, s, { activation: "gelu" });
72
- }
73
- export {
74
- G as a,
75
- n as b,
76
- q as c,
77
- B as d,
78
- A as e,
79
- j as m
80
- };
@@ -1,163 +0,0 @@
1
- import { c as C, t as R, h as I, U as G, _ as L, x as U, am as F } from "./index-CUXkjxiT.js";
2
- import { r as M } from "./Reshape-BW__R4mZ.js";
3
- import { u as H } from "./gpgpu_math-BwvV12df.js";
4
- import { m as B } from "./mat_mul-DhG0Newp.js";
5
- class W {
6
- constructor(e, s, a, n = !1, o = !1, r = !1, i = null, u = !1, l = !1) {
7
- this.variableNames = ["matrixA", "matrixB"], this.packedInputs = !0, this.packedOutput = !0, this.outputShape = a, this.enableShapeUniforms = H(this.outputShape.length);
8
- const p = n ? e[1] : e[2], h = Math.ceil(p / 2), d = n ? "i * 2, rc.y" : "rc.y, i * 2", b = o ? "rc.z, i * 2" : "i * 2, rc.z", x = n ? ["a.xxyy", "a.zzww"] : ["a.xxzz", "a.yyww"], m = o ? ["b.xzxz", "b.ywyw"] : ["b.xyxy", "b.zwzw"];
9
- let c = "", g = "";
10
- i && (u ? c = `vec4 activation(vec4 a) {
11
- vec4 b = getPreluActivationWeightsAtOutCoords();
12
- ${i}
13
- }` : l ? c = `vec4 activation(vec4 a) {
14
- vec4 b = getLeakyreluAlphaAtOutCoords();
15
- ${i}
16
- }` : c = `vec4 activation(vec4 x) {
17
- ${i}
18
- }`, g = "result = activation(result);");
19
- const $ = r ? "result += getBiasAtOutCoords();" : "";
20
- r && this.variableNames.push("bias"), u && this.variableNames.push("preluActivationWeights"), l && this.variableNames.push("leakyreluAlpha");
21
- let f = "rc.x", v = "rc.x";
22
- e[0] < s[0] ? f = `imod(rc.x, ${e[0]})` : s[0] < e[0] && (v = `imod(rc.x, ${s[0]})`), this.userCode = `
23
- ${c}
24
- // Don't use uniform for sharedDimensionPacked for performance.
25
- const float sharedDimension = ${h}.0;
26
-
27
- vec4 dot2x2ARowBCol(ivec3 rc) {
28
- vec4 result = vec4(0);
29
- int batchA = ${f};
30
- int batchB = ${v};
31
- for (int i = 0; i < ${h}; i++) {
32
- vec4 a = getMatrixA(batchA, ${d});
33
- vec4 b = getMatrixB(batchB, ${b});
34
-
35
- // These swizzled products need to be separately added.
36
- // See: https://github.com/tensorflow/tfjs/issues/1735
37
- result += (${x[0]} * ${m[0]});
38
- result += (${x[1]} * ${m[1]});
39
- }
40
- return result;
41
- }
42
-
43
- void main() {
44
- ivec3 rc = getOutputCoords();
45
- vec4 result = dot2x2ARowBCol(rc);
46
-
47
- ${$}
48
-
49
- ${g}
50
-
51
- setOutput(result);
52
- }
53
- `;
54
- }
55
- }
56
- const S = 0.7978845608028654, w = 0.044715, j = `
57
- vec4 x3 = x * x * x;
58
- vec4 inner = x + ${w} * x3;
59
- inner = ${S} * inner;
60
- inner = vec4(
61
- abs(inner[0]) > 15.0 ? sign(inner[0]) : tanh(inner[0]),
62
- abs(inner[1]) > 15.0 ? sign(inner[1]) : tanh(inner[1]),
63
- abs(inner[2]) > 15.0 ? sign(inner[2]) : tanh(inner[2]),
64
- abs(inner[3]) > 15.0 ? sign(inner[3]) : tanh(inner[3])
65
- );
66
- inner = 0.5 * (1.0 + inner);
67
- vec4 result = x * inner;
68
- return result;
69
- `, q = `
70
- vec4 a2 = a * a;
71
- vec4 a3 = a2 * a;
72
- vec4 u = ${S} * (a + ${w} * a3);
73
- vec4 t = vec4(
74
- abs(u[0]) > 15.0 ? sign(u[0]) : tanh(u[0]),
75
- abs(u[1]) > 15.0 ? sign(u[1]) : tanh(u[1]),
76
- abs(u[2]) > 15.0 ? sign(u[2]) : tanh(u[2]),
77
- abs(u[3]) > 15.0 ? sign(u[3]) : tanh(u[3])
78
- );
79
- vec4 sech2 = 1.0 - t * t;
80
- vec4 du_dx = ${S} * (1.0 + 3.0 * ${w} * a2);
81
- vec4 dgelu = 0.5 * (1.0 + t) + 0.5 * a * sech2 * du_dx;
82
- return dgelu * b;
83
- `, se = 1e3;
84
- function O({
85
- a: t,
86
- b: e,
87
- transposeA: s,
88
- transposeB: a,
89
- backend: n,
90
- activationSnippet: o,
91
- multiplier: r
92
- }) {
93
- const i = t.shape.length, u = e.shape.length, l = s ? t.shape[i - 2] : t.shape[i - 1], p = a ? e.shape[u - 1] : e.shape[u - 2], h = s ? t.shape[i - 1] : t.shape[i - 2], d = a ? e.shape[u - 2] : e.shape[u - 1], b = t.shape.slice(0, -2), x = e.shape.slice(0, -2), m = G(b), c = G(x), $ = L(t.shape.slice(0, -2), e.shape.slice(0, -2)).concat([h, d]);
94
- U(
95
- l === p,
96
- () => `Error in matMul: inner shapes (${l}) and (${p}) of Tensors with shapes ${t.shape} and ${e.shape} and transposeA=${s} and transposeB=${a} must match.`
97
- );
98
- const f = s ? [m, l, h] : [m, h, l], v = a ? [c, d, p] : [c, p, d], A = M({ inputs: { x: t }, backend: n, attrs: { shape: f } }), y = M({ inputs: { x: e }, backend: n, attrs: { shape: v } }), D = [A, y], _ = Math.max(m, c), E = o, N = F(t.dtype, e.dtype), T = new W(
99
- f,
100
- v,
101
- [_, h, d],
102
- s,
103
- a,
104
- !1,
105
- E,
106
- !!r,
107
- !1
108
- ), k = [A, y];
109
- r && k.push(r);
110
- const z = n.runWebGLProgram(T, k, N), K = M({ inputs: { x: z }, backend: n, attrs: { shape: $ } });
111
- D.push(z);
112
- for (const P of D)
113
- n.disposeIntermediateTensorInfo(P);
114
- return K;
115
- }
116
- function J(t) {
117
- const { inputs: e, backend: s } = t, { x: a, kernel: n } = e;
118
- if (a === void 0 || n === void 0)
119
- throw new Error("BatchMatMul requires two input tensors.");
120
- return O({
121
- a,
122
- b: n,
123
- transposeA: !1,
124
- transposeB: !1,
125
- backend: s,
126
- activationSnippet: j
127
- });
128
- }
129
- const Q = {
130
- kernelName: "MatMulGelu",
131
- backendName: "webgl",
132
- kernelFunc: J
133
- };
134
- C(Q);
135
- function V(t) {
136
- const { dy: e, x: s, kernel: a } = t.inputs, n = t.backend;
137
- return R(() => {
138
- const o = I().makeTensorFromTensorInfo(
139
- O({
140
- a: s,
141
- b: a,
142
- transposeA: !1,
143
- transposeB: !1,
144
- backend: n,
145
- activationSnippet: q,
146
- multiplier: e
147
- })
148
- ), r = B(o, a, !1, !0), i = B(s, o, !0, !1);
149
- return [r, i];
150
- });
151
- }
152
- const X = {
153
- kernelName: "MatMulGeluGrad",
154
- backendName: "webgl",
155
- kernelFunc: V
156
- };
157
- C(X);
158
- export {
159
- W as M,
160
- se as a,
161
- O as b,
162
- J as c
163
- };
@@ -1,11 +0,0 @@
1
- import { o as m, q as s, B as c, E as M, D as p } from "./index-CUXkjxiT.js";
2
- function f(e, o, n = !1, l = !1) {
3
- let a = s(e, "a", "matMul"), t = s(o, "b", "matMul");
4
- [a, t] = c(a, t);
5
- const r = { a, b: t }, u = { transposeA: n, transposeB: l };
6
- return M.runKernel(p, r, u);
7
- }
8
- const i = /* @__PURE__ */ m({ matMul_: f });
9
- export {
10
- i as m
11
- };