@genai-fi/nanogpt 0.20.0 → 0.20.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (433) hide show
  1. package/dist/BaseTokeniser-DSg9zcYq.js +221 -0
  2. package/dist/DatasetBuilder-DgURD85T.js +712 -0
  3. package/dist/Generator.d.ts +82 -0
  4. package/dist/Generator.js +2 -0
  5. package/dist/RealDiv-DBu0FQqT.js +362 -0
  6. package/dist/Reshape-CABOPB9d.js +94 -0
  7. package/dist/Reshape-DqO3r8BC.js +17 -0
  8. package/dist/TeachableLLM.d.ts +70 -0
  9. package/dist/TeachableLLM.js +2 -0
  10. package/dist/Trainer.d.ts +43 -0
  11. package/dist/Trainer.js +2 -0
  12. package/dist/backend.d.ts +2 -0
  13. package/dist/backend.js +13 -0
  14. package/dist/backend_util-Cg-roD1p.js +399 -0
  15. package/dist/binary_op_util-CrYk9LXL.js +103 -0
  16. package/dist/checks/appendCache.d.ts +1 -0
  17. package/dist/checks/appendCache.js +55 -0
  18. package/dist/checks/attentionMask.d.ts +1 -0
  19. package/dist/checks/attentionMask.js +56 -0
  20. package/dist/checks/check.d.ts +9 -0
  21. package/dist/checks/check.js +32 -0
  22. package/dist/checks/gelu.d.ts +1 -0
  23. package/dist/checks/gelu.js +46 -0
  24. package/dist/checks/index.d.ts +26 -0
  25. package/dist/checks/index.js +28 -0
  26. package/dist/checks/matMulGelu.d.ts +1 -0
  27. package/dist/checks/matMulGelu.js +84 -0
  28. package/dist/checks/normRMS.d.ts +1 -0
  29. package/dist/checks/normRMS.js +28 -0
  30. package/dist/checks/normRMSGrad.d.ts +1 -0
  31. package/dist/checks/normRMSGrad.js +22 -0
  32. package/dist/checks/packUnpack.d.ts +1 -0
  33. package/dist/checks/packUnpack.js +46 -0
  34. package/dist/checks/qkv.d.ts +1 -0
  35. package/dist/checks/qkv.js +34 -0
  36. package/dist/checks/rope.d.ts +1 -0
  37. package/dist/checks/rope.js +30 -0
  38. package/dist/checks/weights.d.ts +14 -0
  39. package/dist/checks/weights.js +27 -0
  40. package/dist/chunk-BPntVaq0.js +23 -0
  41. package/dist/complex_util-CkazZsaH.js +60 -0
  42. package/dist/concat_util-CWDZCBlA.js +19 -0
  43. package/dist/data/docx.d.ts +2 -0
  44. package/dist/data/docx.js +3046 -0
  45. package/dist/data/pdf.d.ts +2 -0
  46. package/dist/data/pdf.js +17 -0
  47. package/dist/data/textLoader.d.ts +7 -0
  48. package/dist/data/textLoader.js +613 -0
  49. package/dist/dist-BewPQWjc.js +7572 -0
  50. package/dist/dist-DVmq73nz.js +8775 -0
  51. package/dist/dist-DXwIvKxl.js +896 -0
  52. package/dist/dist-VEU5mfO0.js +7545 -0
  53. package/dist/gelu-Bf1HW1RY.js +27 -0
  54. package/dist/gpgpu_math-DvLcCH6u.js +1612 -0
  55. package/dist/inference/types.d.ts +16 -0
  56. package/dist/inference/types.js +0 -0
  57. package/dist/kernel_funcs_utils-HiXOOx3f.js +229 -0
  58. package/dist/layers/BaseLayer.d.ts +44 -0
  59. package/dist/layers/BaseLayer.js +76 -0
  60. package/dist/layers/CausalSelfAttention.d.ts +39 -0
  61. package/dist/layers/CausalSelfAttention.js +99 -0
  62. package/dist/layers/LoRA.d.ts +14 -0
  63. package/dist/layers/LoRA.js +48 -0
  64. package/dist/layers/MLP.d.ts +17 -0
  65. package/dist/layers/MLP.js +34 -0
  66. package/dist/layers/PositionEmbedding.d.ts +8 -0
  67. package/dist/layers/PositionEmbedding.js +27 -0
  68. package/dist/layers/RMSNorm.d.ts +12 -0
  69. package/dist/layers/RMSNorm.js +20 -0
  70. package/dist/layers/RoPECache.d.ts +18 -0
  71. package/dist/layers/RoPECache.js +337 -0
  72. package/dist/layers/TiedEmbedding.d.ts +13 -0
  73. package/dist/layers/TiedEmbedding.js +32 -0
  74. package/dist/layers/TransformerBlock.d.ts +27 -0
  75. package/dist/layers/TransformerBlock.js +51 -0
  76. package/dist/layers/WeightStore.d.ts +20 -0
  77. package/dist/layers/WeightStore.js +69 -0
  78. package/dist/loader/load.d.ts +6 -0
  79. package/dist/loader/load.js +2 -0
  80. package/dist/loader/loadHF.d.ts +8 -0
  81. package/dist/loader/loadHF.js +2 -0
  82. package/dist/loader/loadTransformers.d.ts +4 -0
  83. package/dist/loader/loadTransformers.js +2 -0
  84. package/dist/loader/loadZipMeta.d.ts +3 -0
  85. package/dist/loader/loadZipMeta.js +16 -0
  86. package/dist/loader/newZipLoad.d.ts +3 -0
  87. package/dist/loader/newZipLoad.js +2 -0
  88. package/dist/loader/oldZipLoad.d.ts +9 -0
  89. package/dist/loader/oldZipLoad.js +2 -0
  90. package/dist/loader/save.d.ts +16 -0
  91. package/dist/loader/save.js +2 -0
  92. package/dist/loader/types.d.ts +68 -0
  93. package/dist/loader/types.js +0 -0
  94. package/dist/main-CPjeMv0G.js +13500 -0
  95. package/dist/main.d.ts +50 -0
  96. package/dist/main.js +16 -0
  97. package/dist/matMul16-BNfZSnNM.js +81 -0
  98. package/dist/matMulGelu-CPTntosE.js +162 -0
  99. package/dist/models/NanoGPTV1.d.ts +16 -0
  100. package/dist/models/NanoGPTV1.js +2 -0
  101. package/dist/models/NanoGPTV2.d.ts +16 -0
  102. package/dist/models/NanoGPTV2.js +2 -0
  103. package/dist/models/config.d.ts +27 -0
  104. package/dist/models/config.js +37 -0
  105. package/dist/models/factory.d.ts +3 -0
  106. package/dist/models/factory.js +2 -0
  107. package/dist/models/model.d.ts +44 -0
  108. package/dist/models/model.js +2 -0
  109. package/dist/ops/adamAdjust.d.ts +2 -0
  110. package/dist/ops/adamAdjust.js +18 -0
  111. package/dist/ops/adamMoments.d.ts +2 -0
  112. package/dist/ops/adamMoments.js +16 -0
  113. package/dist/ops/add16.d.ts +2 -0
  114. package/dist/ops/add16.js +12 -0
  115. package/dist/ops/appendCache.d.ts +2 -0
  116. package/dist/ops/appendCache.js +25 -0
  117. package/dist/ops/attentionMask.d.ts +2 -0
  118. package/dist/ops/attentionMask.js +16 -0
  119. package/dist/ops/concat16.d.ts +2 -0
  120. package/dist/ops/concat16.js +8 -0
  121. package/dist/ops/cpu/adamAdjust.d.ts +1 -0
  122. package/dist/ops/cpu/adamAdjust.js +16 -0
  123. package/dist/ops/cpu/adamMoments.d.ts +1 -0
  124. package/dist/ops/cpu/adamMoments.js +16 -0
  125. package/dist/ops/cpu/appendCache.d.ts +1 -0
  126. package/dist/ops/cpu/appendCache.js +65 -0
  127. package/dist/ops/cpu/attentionMask.d.ts +1 -0
  128. package/dist/ops/cpu/attentionMask.js +16 -0
  129. package/dist/ops/cpu/fusedSoftmax.d.ts +9 -0
  130. package/dist/ops/cpu/fusedSoftmax.js +22 -0
  131. package/dist/ops/cpu/gatherSub.d.ts +1 -0
  132. package/dist/ops/cpu/gatherSub.js +12 -0
  133. package/dist/ops/cpu/gelu.d.ts +1 -0
  134. package/dist/ops/cpu/gelu.js +36 -0
  135. package/dist/ops/cpu/matMul16.d.ts +1 -0
  136. package/dist/ops/cpu/matMul16.js +14 -0
  137. package/dist/ops/cpu/matMulGelu.d.ts +1 -0
  138. package/dist/ops/cpu/matMulGelu.js +41 -0
  139. package/dist/ops/cpu/matMulMul.d.ts +1 -0
  140. package/dist/ops/cpu/matMulMul.js +20 -0
  141. package/dist/ops/cpu/mulDropout.d.ts +1 -0
  142. package/dist/ops/cpu/mulDropout.js +20 -0
  143. package/dist/ops/cpu/normRMS.d.ts +1 -0
  144. package/dist/ops/cpu/normRMS.js +35 -0
  145. package/dist/ops/cpu/qkv.d.ts +5 -0
  146. package/dist/ops/cpu/qkv.js +73 -0
  147. package/dist/ops/cpu/rope.d.ts +6 -0
  148. package/dist/ops/cpu/rope.js +81 -0
  149. package/dist/ops/cpu/scatterSub.d.ts +1 -0
  150. package/dist/ops/cpu/scatterSub.js +12 -0
  151. package/dist/ops/dot16.d.ts +2 -0
  152. package/dist/ops/dot16.js +29 -0
  153. package/dist/ops/dropout.d.ts +2 -0
  154. package/dist/ops/dropout.js +11 -0
  155. package/dist/ops/dropout16.d.ts +2 -0
  156. package/dist/ops/dropout16.js +22 -0
  157. package/dist/ops/gatherSub.d.ts +2 -0
  158. package/dist/ops/gatherSub.js +13 -0
  159. package/dist/ops/gelu.d.ts +3 -0
  160. package/dist/ops/gelu.js +2 -0
  161. package/dist/ops/globalNorm.d.ts +2 -0
  162. package/dist/ops/globalNorm.js +19 -0
  163. package/dist/ops/grads/add16.d.ts +1 -0
  164. package/dist/ops/grads/add16.js +27 -0
  165. package/dist/ops/grads/attentionMask.d.ts +1 -0
  166. package/dist/ops/grads/attentionMask.js +26 -0
  167. package/dist/ops/grads/dropout16.d.ts +1 -0
  168. package/dist/ops/grads/dropout16.js +1 -0
  169. package/dist/ops/grads/gelu.d.ts +2 -0
  170. package/dist/ops/grads/gelu.js +2 -0
  171. package/dist/ops/grads/matMul16.d.ts +2 -0
  172. package/dist/ops/grads/matMul16.js +2 -0
  173. package/dist/ops/grads/matMulGelu.d.ts +1 -0
  174. package/dist/ops/grads/matMulGelu.js +22 -0
  175. package/dist/ops/grads/mul16.d.ts +1 -0
  176. package/dist/ops/grads/mul16.js +1 -0
  177. package/dist/ops/grads/normRMS.d.ts +3 -0
  178. package/dist/ops/grads/normRMS.js +37 -0
  179. package/dist/ops/grads/pack16.d.ts +2 -0
  180. package/dist/ops/grads/pack16.js +2 -0
  181. package/dist/ops/grads/qkv.d.ts +3 -0
  182. package/dist/ops/grads/qkv.js +46 -0
  183. package/dist/ops/grads/rope.d.ts +2 -0
  184. package/dist/ops/grads/rope.js +2 -0
  185. package/dist/ops/grads/softmax16.d.ts +2 -0
  186. package/dist/ops/grads/softmax16.js +23 -0
  187. package/dist/ops/grads/unpack16.d.ts +2 -0
  188. package/dist/ops/grads/unpack16.js +2 -0
  189. package/dist/ops/grads/utils.d.ts +4 -0
  190. package/dist/ops/grads/utils.js +12 -0
  191. package/dist/ops/log.d.ts +0 -0
  192. package/dist/ops/log.js +1 -0
  193. package/dist/ops/matMul16.d.ts +15 -0
  194. package/dist/ops/matMul16.js +2 -0
  195. package/dist/ops/matMulGelu.d.ts +3 -0
  196. package/dist/ops/matMulGelu.js +20 -0
  197. package/dist/ops/matMulMul.d.ts +2 -0
  198. package/dist/ops/matMulMul.js +16 -0
  199. package/dist/ops/mul16.d.ts +2 -0
  200. package/dist/ops/mul16.js +43 -0
  201. package/dist/ops/mulDrop.d.ts +2 -0
  202. package/dist/ops/mulDrop.js +15 -0
  203. package/dist/ops/normRMS.d.ts +2 -0
  204. package/dist/ops/normRMS.js +22 -0
  205. package/dist/ops/pack16.d.ts +2 -0
  206. package/dist/ops/pack16.js +2 -0
  207. package/dist/ops/qkv.d.ts +2 -0
  208. package/dist/ops/qkv.js +16 -0
  209. package/dist/ops/reshape16.d.ts +2 -0
  210. package/dist/ops/reshape16.js +33 -0
  211. package/dist/ops/rope.d.ts +3 -0
  212. package/dist/ops/rope.js +2 -0
  213. package/dist/ops/scatterSub.d.ts +2 -0
  214. package/dist/ops/scatterSub.js +13 -0
  215. package/dist/ops/slice16.d.ts +2 -0
  216. package/dist/ops/slice16.js +11 -0
  217. package/dist/ops/softmax16.d.ts +2 -0
  218. package/dist/ops/softmax16.js +9 -0
  219. package/dist/ops/sub16.d.ts +2 -0
  220. package/dist/ops/sub16.js +11 -0
  221. package/dist/ops/sum16.d.ts +2 -0
  222. package/dist/ops/sum16.js +13 -0
  223. package/dist/ops/transpose16.d.ts +3 -0
  224. package/dist/ops/transpose16.js +32 -0
  225. package/dist/ops/unpack16.d.ts +2 -0
  226. package/dist/ops/unpack16.js +2 -0
  227. package/dist/ops/webgl/adamAdjust.d.ts +1 -0
  228. package/dist/ops/webgl/adamAdjust.js +82 -0
  229. package/dist/ops/webgl/adamMoments.d.ts +1 -0
  230. package/dist/ops/webgl/adamMoments.js +44 -0
  231. package/dist/ops/webgl/appendCache.d.ts +1 -0
  232. package/dist/ops/webgl/appendCache.js +53 -0
  233. package/dist/ops/webgl/attentionMask.d.ts +1 -0
  234. package/dist/ops/webgl/attentionMask.js +64 -0
  235. package/dist/ops/webgl/dropout16.d.ts +1 -0
  236. package/dist/ops/webgl/dropout16.js +12 -0
  237. package/dist/ops/webgl/fusedSoftmax.d.ts +11 -0
  238. package/dist/ops/webgl/fusedSoftmax.js +70 -0
  239. package/dist/ops/webgl/gatherSub.d.ts +1 -0
  240. package/dist/ops/webgl/gatherSub.js +28 -0
  241. package/dist/ops/webgl/gelu.d.ts +2 -0
  242. package/dist/ops/webgl/gelu.js +48 -0
  243. package/dist/ops/webgl/log.d.ts +17 -0
  244. package/dist/ops/webgl/log.js +14 -0
  245. package/dist/ops/webgl/matMul16.d.ts +1 -0
  246. package/dist/ops/webgl/matMul16.js +37 -0
  247. package/dist/ops/webgl/matMulGelu.d.ts +21 -0
  248. package/dist/ops/webgl/matMulGelu.js +2 -0
  249. package/dist/ops/webgl/matMulMul.d.ts +14 -0
  250. package/dist/ops/webgl/matMulMul.js +24 -0
  251. package/dist/ops/webgl/mulDropout.d.ts +1 -0
  252. package/dist/ops/webgl/mulDropout.js +32 -0
  253. package/dist/ops/webgl/normRMS.d.ts +1 -0
  254. package/dist/ops/webgl/normRMS.js +114 -0
  255. package/dist/ops/webgl/qkv.d.ts +1 -0
  256. package/dist/ops/webgl/qkv.js +54 -0
  257. package/dist/ops/webgl/rope.d.ts +1 -0
  258. package/dist/ops/webgl/rope.js +72 -0
  259. package/dist/ops/webgl/scatterSub.d.ts +1 -0
  260. package/dist/ops/webgl/scatterSub.js +28 -0
  261. package/dist/ops/webgpu/adamAdjust.d.ts +1 -0
  262. package/dist/ops/webgpu/adamAdjust.js +77 -0
  263. package/dist/ops/webgpu/adamMoments.d.ts +1 -0
  264. package/dist/ops/webgpu/adamMoments.js +76 -0
  265. package/dist/ops/webgpu/add16.d.ts +1 -0
  266. package/dist/ops/webgpu/add16.js +14 -0
  267. package/dist/ops/webgpu/appendCache.d.ts +1 -0
  268. package/dist/ops/webgpu/appendCache.js +130 -0
  269. package/dist/ops/webgpu/attentionMask.d.ts +1 -0
  270. package/dist/ops/webgpu/attentionMask.js +42 -0
  271. package/dist/ops/webgpu/attentionMask32_program.d.ts +19 -0
  272. package/dist/ops/webgpu/attentionMask32_program.js +62 -0
  273. package/dist/ops/webgpu/clipScale.d.ts +1 -0
  274. package/dist/ops/webgpu/clipScale.js +45 -0
  275. package/dist/ops/webgpu/concat16.d.ts +19 -0
  276. package/dist/ops/webgpu/concat16.js +111 -0
  277. package/dist/ops/webgpu/dropout16.d.ts +1 -0
  278. package/dist/ops/webgpu/dropout16.js +59 -0
  279. package/dist/ops/webgpu/gatherSub.d.ts +1 -0
  280. package/dist/ops/webgpu/gatherSub.js +52 -0
  281. package/dist/ops/webgpu/gelu.d.ts +14 -0
  282. package/dist/ops/webgpu/gelu.js +147 -0
  283. package/dist/ops/webgpu/index.d.ts +0 -0
  284. package/dist/ops/webgpu/index.js +26 -0
  285. package/dist/ops/webgpu/matMul16.d.ts +1 -0
  286. package/dist/ops/webgpu/matMul16.js +70 -0
  287. package/dist/ops/webgpu/matMul16_program.d.ts +42 -0
  288. package/dist/ops/webgpu/matMul16_program.js +303 -0
  289. package/dist/ops/webgpu/mul16.d.ts +1 -0
  290. package/dist/ops/webgpu/mul16.js +14 -0
  291. package/dist/ops/webgpu/norm2.d.ts +1 -0
  292. package/dist/ops/webgpu/norm2.js +46 -0
  293. package/dist/ops/webgpu/normRMS.d.ts +1 -0
  294. package/dist/ops/webgpu/normRMS.js +26 -0
  295. package/dist/ops/webgpu/normRMS16_program.d.ts +10 -0
  296. package/dist/ops/webgpu/normRMS16_program.js +28 -0
  297. package/dist/ops/webgpu/normRMS32_program.d.ts +10 -0
  298. package/dist/ops/webgpu/normRMS32_program.js +28 -0
  299. package/dist/ops/webgpu/normRMSGrad.d.ts +1 -0
  300. package/dist/ops/webgpu/normRMSGrad.js +225 -0
  301. package/dist/ops/webgpu/pack16.d.ts +1 -0
  302. package/dist/ops/webgpu/pack16.js +21 -0
  303. package/dist/ops/webgpu/pack16_program.d.ts +19 -0
  304. package/dist/ops/webgpu/pack16_program.js +93 -0
  305. package/dist/ops/webgpu/qkv.d.ts +1 -0
  306. package/dist/ops/webgpu/qkv.js +64 -0
  307. package/dist/ops/webgpu/rope.d.ts +1 -0
  308. package/dist/ops/webgpu/rope.js +163 -0
  309. package/dist/ops/webgpu/scatterSub.d.ts +1 -0
  310. package/dist/ops/webgpu/scatterSub.js +53 -0
  311. package/dist/ops/webgpu/slice16.d.ts +7 -0
  312. package/dist/ops/webgpu/slice16.js +74 -0
  313. package/dist/ops/webgpu/softmax16.d.ts +17 -0
  314. package/dist/ops/webgpu/softmax16.js +18 -0
  315. package/dist/ops/webgpu/softmax16_program.d.ts +13 -0
  316. package/dist/ops/webgpu/softmax16_program.js +89 -0
  317. package/dist/ops/webgpu/softmax16_subgroup_program.d.ts +17 -0
  318. package/dist/ops/webgpu/softmax16_subgroup_program.js +70 -0
  319. package/dist/ops/webgpu/softmax16grad.d.ts +1 -0
  320. package/dist/ops/webgpu/softmax16grad.js +31 -0
  321. package/dist/ops/webgpu/sub16.d.ts +1 -0
  322. package/dist/ops/webgpu/sub16.js +14 -0
  323. package/dist/ops/webgpu/sum16.d.ts +1 -0
  324. package/dist/ops/webgpu/sum16.js +29 -0
  325. package/dist/ops/webgpu/transpose16.d.ts +1 -0
  326. package/dist/ops/webgpu/transpose16.js +37 -0
  327. package/dist/ops/webgpu/transpose16_program.d.ts +16 -0
  328. package/dist/ops/webgpu/transpose16_program.js +51 -0
  329. package/dist/ops/webgpu/transpose16_shared_program.d.ts +15 -0
  330. package/dist/ops/webgpu/transpose16_shared_program.js +79 -0
  331. package/dist/ops/webgpu/unpack16.d.ts +1 -0
  332. package/dist/ops/webgpu/unpack16.js +60 -0
  333. package/dist/ops/webgpu/utils/binary_op.d.ts +35 -0
  334. package/dist/ops/webgpu/utils/binary_op.js +141 -0
  335. package/dist/ops/webgpu/utils/deviceInfo.d.ts +7 -0
  336. package/dist/ops/webgpu/utils/deviceInfo.js +11 -0
  337. package/dist/ops/webgpu/utils/reductions.d.ts +43 -0
  338. package/dist/ops/webgpu/utils/reductions.js +263 -0
  339. package/dist/pack16-Ck-spx_F.js +39 -0
  340. package/dist/patches/webgpu_backend.d.ts +18 -0
  341. package/dist/patches/webgpu_backend.js +43 -0
  342. package/dist/patches/webgpu_base.d.ts +21 -0
  343. package/dist/patches/webgpu_base.js +22 -0
  344. package/dist/patches/webgpu_program.d.ts +36 -0
  345. package/dist/patches/webgpu_program.js +293 -0
  346. package/dist/pdf-UoDqCYzz.js +16726 -0
  347. package/dist/picomatch-3tUnMMbd.js +1063 -0
  348. package/dist/rope-CbeGlsV8.js +25 -0
  349. package/dist/selu_util-zkAx5doH.js +24 -0
  350. package/dist/shared-D1coEFea.js +1314 -0
  351. package/dist/shared-DOgWaqvL.js +5 -0
  352. package/dist/slice_util-Dgb3ANWI.js +208 -0
  353. package/dist/tfjs_backend-BjuQ5FqB.js +614 -0
  354. package/dist/tokeniser/BaseTokeniser.d.ts +33 -0
  355. package/dist/tokeniser/BaseTokeniser.js +2 -0
  356. package/dist/tokeniser/CharTokeniser.d.ts +24 -0
  357. package/dist/tokeniser/CharTokeniser.js +92 -0
  358. package/dist/tokeniser/bpe.d.ts +28 -0
  359. package/dist/tokeniser/bpe.js +170 -0
  360. package/dist/tokeniser/messages.d.ts +61 -0
  361. package/dist/tokeniser/messages.js +0 -0
  362. package/dist/tokeniser/type.d.ts +34 -0
  363. package/dist/tokeniser/type.js +0 -0
  364. package/dist/training/AdamW.d.ts +36 -0
  365. package/dist/training/AdamW.js +128 -0
  366. package/dist/training/BasicTrainer.d.ts +63 -0
  367. package/dist/training/BasicTrainer.js +265 -0
  368. package/dist/training/DatasetBuilder.d.ts +26 -0
  369. package/dist/training/DatasetBuilder.js +2 -0
  370. package/dist/training/Evaluator.d.ts +19 -0
  371. package/dist/training/Evaluator.js +48 -0
  372. package/dist/training/LRScheduler.d.ts +12 -0
  373. package/dist/training/LRScheduler.js +38 -0
  374. package/dist/training/PreTrainer.d.ts +11 -0
  375. package/dist/training/PreTrainer.js +22 -0
  376. package/dist/training/SFTTrainer.d.ts +12 -0
  377. package/dist/training/SFTTrainer.js +24 -0
  378. package/dist/training/loss.d.ts +3 -0
  379. package/dist/training/loss.js +19 -0
  380. package/dist/training/orthoGrad.d.ts +2 -0
  381. package/dist/training/orthoGrad.js +10 -0
  382. package/dist/training/sparseCrossEntropy.d.ts +7 -0
  383. package/dist/training/sparseCrossEntropy.js +47 -0
  384. package/dist/training/tasks/ConversationTask.d.ts +18 -0
  385. package/dist/training/tasks/ConversationTask.js +38 -0
  386. package/dist/training/tasks/PretrainingTask.d.ts +17 -0
  387. package/dist/training/tasks/PretrainingTask.js +42 -0
  388. package/dist/training/tasks/StartSentenceTask.d.ts +18 -0
  389. package/dist/training/tasks/StartSentenceTask.js +45 -0
  390. package/dist/training/tasks/Task.d.ts +22 -0
  391. package/dist/training/tasks/Task.js +55 -0
  392. package/dist/training/tasks/splitter.d.ts +5 -0
  393. package/dist/training/tasks/splitter.js +18 -0
  394. package/dist/training/types.d.ts +78 -0
  395. package/dist/training/types.js +0 -0
  396. package/dist/training/validation.d.ts +17 -0
  397. package/dist/training/validation.js +2 -0
  398. package/dist/utilities/arrayClose.d.ts +1 -0
  399. package/dist/utilities/arrayClose.js +16 -0
  400. package/dist/utilities/datasetID.d.ts +2 -0
  401. package/dist/utilities/datasetID.js +18 -0
  402. package/dist/utilities/dummy.d.ts +9 -0
  403. package/dist/utilities/dummy.js +36 -0
  404. package/dist/utilities/multinomialCPU.d.ts +2 -0
  405. package/dist/utilities/multinomialCPU.js +9 -0
  406. package/dist/utilities/naming.d.ts +4 -0
  407. package/dist/utilities/naming.js +0 -0
  408. package/dist/utilities/packed.d.ts +4 -0
  409. package/dist/utilities/packed.js +13 -0
  410. package/dist/utilities/parameters.d.ts +11 -0
  411. package/dist/utilities/parameters.js +38 -0
  412. package/dist/utilities/performance.d.ts +2 -0
  413. package/dist/utilities/performance.js +16 -0
  414. package/dist/utilities/profile.d.ts +17 -0
  415. package/dist/utilities/profile.js +33 -0
  416. package/dist/utilities/safetensors.d.ts +3 -0
  417. package/dist/utilities/safetensors.js +53 -0
  418. package/dist/utilities/sentences.d.ts +5 -0
  419. package/dist/utilities/sentences.js +32 -0
  420. package/dist/utilities/tokenParse.d.ts +1 -0
  421. package/dist/utilities/tokenParse.js +17 -0
  422. package/dist/utilities/topP.d.ts +1 -0
  423. package/dist/utilities/topP.js +12 -0
  424. package/dist/utilities/waitForModel.d.ts +2 -0
  425. package/dist/utilities/waitForModel.js +12 -0
  426. package/dist/utilities/weights.d.ts +12 -0
  427. package/dist/utilities/weights.js +40 -0
  428. package/dist/utilities/yielder.d.ts +1 -0
  429. package/dist/utilities/yielder.js +7 -0
  430. package/dist/webgpu-Dt7BMzWz.js +525 -0
  431. package/dist/webgpu_program-WOyIVMlZ.js +392 -0
  432. package/dist/webgpu_util-B_F3SShA.js +106 -0
  433. package/package.json +1 -1
@@ -0,0 +1,7 @@
1
+ import { Conversation } from '../tokeniser/type';
2
+ export interface DataOptions {
3
+ maxSize?: number;
4
+ column?: string;
5
+ hasHeader?: boolean;
6
+ }
7
+ export default function loadTextData(file: File, options?: DataOptions, cb?: (progress: number) => void): Promise<Conversation[][]>;
@@ -0,0 +1,613 @@
1
+ import { a as e, t } from "../chunk-BPntVaq0.js";
2
+ import { loadPDF as n } from "./pdf.js";
3
+ import { loadDOCX as r, t as i } from "./docx.js";
4
+ //#endregion
5
+ //#region lib/data/textLoader.ts
6
+ var a = /* @__PURE__ */ e((/* @__PURE__ */ t(((e, t) => {
7
+ ((n, r) => {
8
+ typeof define == "function" && define.amd ? define([], r) : typeof t == "object" && e !== void 0 ? t.exports = r() : n.Papa = r();
9
+ })(e, function e() {
10
+ var t = typeof self < "u" ? self : typeof window < "u" ? window : t === void 0 ? {} : t, n, r = !t.document && !!t.postMessage, i = t.IS_PAPA_WORKER || !1, a = {}, o = 0, s = {};
11
+ function c(e) {
12
+ this._handle = null, this._finished = !1, this._completed = !1, this._halted = !1, this._input = null, this._baseIndex = 0, this._partialLine = "", this._rowCount = 0, this._start = 0, this._nextChunk = null, this.isFirstChunk = !0, this._completeResults = {
13
+ data: [],
14
+ errors: [],
15
+ meta: {}
16
+ }, function(e) {
17
+ var t = y(e);
18
+ t.chunkSize = parseInt(t.chunkSize), e.step || e.chunk || (t.chunkSize = null), this._handle = new p(t), (this._handle.streamer = this)._config = t;
19
+ }.call(this, e), this.parseChunk = function(e, n) {
20
+ var r = parseInt(this._config.skipFirstNLines) || 0;
21
+ if (this.isFirstChunk && 0 < r) {
22
+ let t = this._config.newline;
23
+ t ||= (a = this._config.quoteChar || "\"", this._handle.guessLineEndings(e, a)), e = [...e.split(t).slice(r)].join(t);
24
+ }
25
+ this.isFirstChunk && x(this._config.beforeFirstChunk) && (a = this._config.beforeFirstChunk(e)) !== void 0 && (e = a), this.isFirstChunk = !1, this._halted = !1;
26
+ var r = this._partialLine + e, a = (this._partialLine = "", this._handle.parse(r, this._baseIndex, !this._finished));
27
+ if (!this._handle.paused() && !this._handle.aborted()) {
28
+ if (e = a.meta.cursor, r = (this._finished || (this._partialLine = r.substring(e - this._baseIndex), this._baseIndex = e), a && a.data && (this._rowCount += a.data.length), this._finished || this._config.preview && this._rowCount >= this._config.preview), i) t.postMessage({
29
+ results: a,
30
+ workerId: s.WORKER_ID,
31
+ finished: r
32
+ });
33
+ else if (x(this._config.chunk) && !n) {
34
+ if (this._config.chunk(a, this._handle), this._handle.paused() || this._handle.aborted()) return void (this._halted = !0);
35
+ this._completeResults = a = void 0;
36
+ }
37
+ return this._config.step || this._config.chunk || (this._completeResults.data = this._completeResults.data.concat(a.data), this._completeResults.errors = this._completeResults.errors.concat(a.errors), this._completeResults.meta = a.meta), this._completed || !r || !x(this._config.complete) || a && a.meta.aborted || (this._config.complete(this._completeResults, this._input), this._completed = !0), r || a && a.meta.paused || this._nextChunk(), a;
38
+ }
39
+ this._halted = !0;
40
+ }, this._sendError = function(e) {
41
+ x(this._config.error) ? this._config.error(e) : i && this._config.error && t.postMessage({
42
+ workerId: s.WORKER_ID,
43
+ error: e,
44
+ finished: !1
45
+ });
46
+ };
47
+ }
48
+ function l(e) {
49
+ var t;
50
+ (e ||= {}).chunkSize || (e.chunkSize = s.RemoteChunkSize), c.call(this, e), this._nextChunk = r ? function() {
51
+ this._readChunk(), this._chunkLoaded();
52
+ } : function() {
53
+ this._readChunk();
54
+ }, this.stream = function(e) {
55
+ this._input = e, this._nextChunk();
56
+ }, this._readChunk = function() {
57
+ if (this._finished) this._chunkLoaded();
58
+ else {
59
+ if (t = new XMLHttpRequest(), this._config.withCredentials && (t.withCredentials = this._config.withCredentials), r || (t.onload = b(this._chunkLoaded, this), t.onerror = b(this._chunkError, this)), t.open(this._config.downloadRequestBody ? "POST" : "GET", this._input, !r), this._config.downloadRequestHeaders) {
60
+ var e, n = this._config.downloadRequestHeaders;
61
+ for (e in n) t.setRequestHeader(e, n[e]);
62
+ }
63
+ var i;
64
+ this._config.chunkSize && (i = this._start + this._config.chunkSize - 1, t.setRequestHeader("Range", "bytes=" + this._start + "-" + i));
65
+ try {
66
+ t.send(this._config.downloadRequestBody);
67
+ } catch (e) {
68
+ this._chunkError(e.message);
69
+ }
70
+ r && t.status === 0 && this._chunkError();
71
+ }
72
+ }, this._chunkLoaded = function() {
73
+ t.readyState === 4 && (t.status < 200 || 400 <= t.status ? this._chunkError() : (this._start += this._config.chunkSize || t.responseText.length, this._finished = !this._config.chunkSize || this._start >= ((e) => (e = e.getResponseHeader("Content-Range")) === null ? -1 : parseInt(e.substring(e.lastIndexOf("/") + 1)))(t), this.parseChunk(t.responseText)));
74
+ }, this._chunkError = function(e) {
75
+ e = t.statusText || e, this._sendError(Error(e));
76
+ };
77
+ }
78
+ function u(e) {
79
+ (e ||= {}).chunkSize || (e.chunkSize = s.LocalChunkSize), c.call(this, e);
80
+ var t, n, r = typeof FileReader < "u";
81
+ this.stream = function(e) {
82
+ this._input = e, n = e.slice || e.webkitSlice || e.mozSlice, r ? ((t = new FileReader()).onload = b(this._chunkLoaded, this), t.onerror = b(this._chunkError, this)) : t = new FileReaderSync(), this._nextChunk();
83
+ }, this._nextChunk = function() {
84
+ this._finished || this._config.preview && !(this._rowCount < this._config.preview) || this._readChunk();
85
+ }, this._readChunk = function() {
86
+ var e = this._input, i = (this._config.chunkSize && (i = Math.min(this._start + this._config.chunkSize, this._input.size), e = n.call(e, this._start, i)), t.readAsText(e, this._config.encoding));
87
+ r || this._chunkLoaded({ target: { result: i } });
88
+ }, this._chunkLoaded = function(e) {
89
+ this._start += this._config.chunkSize, this._finished = !this._config.chunkSize || this._start >= this._input.size, this.parseChunk(e.target.result);
90
+ }, this._chunkError = function() {
91
+ this._sendError(t.error);
92
+ };
93
+ }
94
+ function d(e) {
95
+ var t;
96
+ c.call(this, e ||= {}), this.stream = function(e) {
97
+ return t = e, this._nextChunk();
98
+ }, this._nextChunk = function() {
99
+ var e, n;
100
+ if (!this._finished) return e = this._config.chunkSize, t = e ? (n = t.substring(0, e), t.substring(e)) : (n = t, ""), this._finished = !t, this.parseChunk(n);
101
+ };
102
+ }
103
+ function f(e) {
104
+ c.call(this, e ||= {});
105
+ var t = [], n = !0, r = !1;
106
+ this.pause = function() {
107
+ c.prototype.pause.apply(this, arguments), this._input.pause();
108
+ }, this.resume = function() {
109
+ c.prototype.resume.apply(this, arguments), this._input.resume();
110
+ }, this.stream = function(e) {
111
+ this._input = e, this._input.on("data", this._streamData), this._input.on("end", this._streamEnd), this._input.on("error", this._streamError);
112
+ }, this._checkIsFinished = function() {
113
+ r && t.length === 1 && (this._finished = !0);
114
+ }, this._nextChunk = function() {
115
+ this._checkIsFinished(), t.length ? this.parseChunk(t.shift()) : n = !0;
116
+ }, this._streamData = b(function(e) {
117
+ try {
118
+ t.push(typeof e == "string" ? e : e.toString(this._config.encoding)), n && (n = !1, this._checkIsFinished(), this.parseChunk(t.shift()));
119
+ } catch (e) {
120
+ this._streamError(e);
121
+ }
122
+ }, this), this._streamError = b(function(e) {
123
+ this._streamCleanUp(), this._sendError(e);
124
+ }, this), this._streamEnd = b(function() {
125
+ this._streamCleanUp(), r = !0, this._streamData("");
126
+ }, this), this._streamCleanUp = b(function() {
127
+ this._input.removeListener("data", this._streamData), this._input.removeListener("end", this._streamEnd), this._input.removeListener("error", this._streamError);
128
+ }, this);
129
+ }
130
+ function p(e) {
131
+ var t, n, r, i, a = 2 ** 53, o = -a, c = /^\s*-?(\d+\.?|\.\d+|\d+\.\d+)([eE][-+]?\d+)?\s*$/, l = /^((\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z)))$/, u = this, d = 0, f = 0, p = !1, g = !1, _ = [], v = {
132
+ data: [],
133
+ errors: [],
134
+ meta: {}
135
+ };
136
+ function b(t) {
137
+ return e.skipEmptyLines === "greedy" ? t.join("").trim() === "" : t.length === 1 && t[0].length === 0;
138
+ }
139
+ function S() {
140
+ if (v && r && (w("Delimiter", "UndetectableDelimiter", "Unable to auto-detect delimiting character; defaulted to '" + s.DefaultDelimiter + "'"), r = !1), e.skipEmptyLines && (v.data = v.data.filter(function(e) {
141
+ return !b(e);
142
+ })), C()) {
143
+ if (v) if (Array.isArray(v.data[0])) {
144
+ for (var t = 0; C() && t < v.data.length; t++) v.data[t].forEach(n);
145
+ v.data.splice(0, 1);
146
+ } else v.data.forEach(n);
147
+ function n(t, n) {
148
+ x(e.transformHeader) && (t = e.transformHeader(t, n)), _.push(t);
149
+ }
150
+ }
151
+ function n(t, n) {
152
+ for (var r = e.header ? {} : [], i = 0; i < t.length; i++) {
153
+ var s = i, u = t[i], u = ((t, n) => ((t) => (e.dynamicTypingFunction && e.dynamicTyping[t] === void 0 && (e.dynamicTyping[t] = e.dynamicTypingFunction(t)), !0 === (e.dynamicTyping[t] || e.dynamicTyping)))(t) ? n === "true" || n === "TRUE" || n !== "false" && n !== "FALSE" && (((e) => {
154
+ if (c.test(e) && (e = parseFloat(e), o < e && e < a)) return 1;
155
+ })(n) ? parseFloat(n) : l.test(n) ? new Date(n) : n === "" ? null : n) : n)(s = e.header ? i >= _.length ? "__parsed_extra" : _[i] : s, u = e.transform ? e.transform(u, s) : u);
156
+ s === "__parsed_extra" ? (r[s] = r[s] || [], r[s].push(u)) : r[s] = u;
157
+ }
158
+ return e.header && (i > _.length ? w("FieldMismatch", "TooManyFields", "Too many fields: expected " + _.length + " fields but parsed " + i, f + n) : i < _.length && w("FieldMismatch", "TooFewFields", "Too few fields: expected " + _.length + " fields but parsed " + i, f + n)), r;
159
+ }
160
+ var i;
161
+ v && (e.header || e.dynamicTyping || e.transform) && (i = 1, !v.data.length || Array.isArray(v.data[0]) ? (v.data = v.data.map(n), i = v.data.length) : v.data = n(v.data, 0), e.header && v.meta && (v.meta.fields = _), f += i);
162
+ }
163
+ function C() {
164
+ return e.header && _.length === 0;
165
+ }
166
+ function w(e, t, n, r) {
167
+ e = {
168
+ type: e,
169
+ code: t,
170
+ message: n
171
+ }, r !== void 0 && (e.row = r), v.errors.push(e);
172
+ }
173
+ x(e.step) && (i = e.step, e.step = function(t) {
174
+ v = t, C() ? S() : (S(), v.data.length !== 0 && (d += t.data.length, e.preview && d > e.preview ? n.abort() : (v.data = v.data[0], i(v, u))));
175
+ }), this.parse = function(i, a, o) {
176
+ var c = e.quoteChar || "\"", c = (e.newline ||= this.guessLineEndings(i, c), r = !1, e.delimiter ? x(e.delimiter) && (e.delimiter = e.delimiter(i), v.meta.delimiter = e.delimiter) : ((c = ((t, n, r, i, a) => {
177
+ var o, c, l, u;
178
+ a ||= [
179
+ ",",
180
+ " ",
181
+ "|",
182
+ ";",
183
+ s.RECORD_SEP,
184
+ s.UNIT_SEP
185
+ ];
186
+ for (var d = 0; d < a.length; d++) {
187
+ for (var f, p = a[d], m = 0, g = 0, _ = 0, v = (l = void 0, new h({
188
+ comments: i,
189
+ delimiter: p,
190
+ newline: n,
191
+ preview: 10
192
+ }).parse(t)), y = 0; y < v.data.length; y++) r && b(v.data[y]) ? _++ : (f = v.data[y].length, g += f, l === void 0 ? l = f : 0 < f && (m += Math.abs(f - l), l = f));
193
+ 0 < v.data.length && (g /= v.data.length - _), (c === void 0 || m <= c) && (u === void 0 || u < g) && 1.99 < g && (c = m, o = p, u = g);
194
+ }
195
+ return {
196
+ successful: !!(e.delimiter = o),
197
+ bestDelimiter: o
198
+ };
199
+ })(i, e.newline, e.skipEmptyLines, e.comments, e.delimitersToGuess)).successful ? e.delimiter = c.bestDelimiter : (r = !0, e.delimiter = s.DefaultDelimiter), v.meta.delimiter = e.delimiter), y(e));
200
+ return e.preview && e.header && c.preview++, t = i, n = new h(c), v = n.parse(t, a, o), S(), p ? { meta: { paused: !0 } } : v || { meta: { paused: !1 } };
201
+ }, this.paused = function() {
202
+ return p;
203
+ }, this.pause = function() {
204
+ p = !0, n.abort(), t = x(e.chunk) ? "" : t.substring(n.getCharIndex());
205
+ }, this.resume = function() {
206
+ u.streamer._halted ? (p = !1, u.streamer.parseChunk(t, !0)) : setTimeout(u.resume, 3);
207
+ }, this.aborted = function() {
208
+ return g;
209
+ }, this.abort = function() {
210
+ g = !0, n.abort(), v.meta.aborted = !0, x(e.complete) && e.complete(v), t = "";
211
+ }, this.guessLineEndings = function(e, t) {
212
+ e = e.substring(0, 1048576);
213
+ var t = RegExp(m(t) + "([^]*?)" + m(t), "gm"), n = (e = e.replace(t, "")).split("\r"), t = e.split("\n"), e = 1 < t.length && t[0].length < n[0].length;
214
+ if (n.length === 1 || e) return "\n";
215
+ for (var r = 0, i = 0; i < n.length; i++) n[i][0] === "\n" && r++;
216
+ return r >= n.length / 2 ? "\r\n" : "\r";
217
+ };
218
+ }
219
+ function m(e) {
220
+ return e.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
221
+ }
222
+ function h(e) {
223
+ var t = (e ||= {}).delimiter, n = e.newline, r = e.comments, i = e.step, a = e.preview, o = e.fastMode, c = null, l = !1, u = e.quoteChar == null ? "\"" : e.quoteChar, d = u;
224
+ if (e.escapeChar !== void 0 && (d = e.escapeChar), (typeof t != "string" || -1 < s.BAD_DELIMITERS.indexOf(t)) && (t = ","), r === t) throw Error("Comment character same as delimiter");
225
+ !0 === r ? r = "#" : (typeof r != "string" || -1 < s.BAD_DELIMITERS.indexOf(r)) && (r = !1), n !== "\n" && n !== "\r" && n !== "\r\n" && (n = "\n");
226
+ var f = 0, p = !1;
227
+ this.parse = function(s, h, g) {
228
+ if (typeof s != "string") throw Error("Input must be a string");
229
+ var _ = s.length, v = t.length, y = n.length, b = r.length, S = x(i), C = [], w = [], T = [], E = f = 0;
230
+ if (!s) return R();
231
+ if (o || !1 !== o && s.indexOf(u) === -1) {
232
+ for (var D = s.split(n), O = 0; O < D.length; O++) {
233
+ if (T = D[O], f += T.length, O !== D.length - 1) f += n.length;
234
+ else if (g) return R();
235
+ if (!r || T.substring(0, b) !== r) {
236
+ if (S) {
237
+ if (C = [], P(T.split(t)), z(), p) return R();
238
+ } else P(T.split(t));
239
+ if (a && a <= O) return C = C.slice(0, a), R(!0);
240
+ }
241
+ }
242
+ return R();
243
+ }
244
+ for (var k = s.indexOf(t, f), A = s.indexOf(n, f), j = new RegExp(m(d) + m(u), "g"), M = s.indexOf(u, f);;) if (s[f] === u) for (M = f, f++;;) {
245
+ if ((M = s.indexOf(u, M + 1)) === -1) return g || w.push({
246
+ type: "Quotes",
247
+ code: "MissingQuotes",
248
+ message: "Quoted field unterminated",
249
+ row: C.length,
250
+ index: f
251
+ }), I();
252
+ if (M === _ - 1) return I(s.substring(f, M).replace(j, u));
253
+ if (u === d && s[M + 1] === d) M++;
254
+ else if (u === d || M === 0 || s[M - 1] !== d) {
255
+ k !== -1 && k < M + 1 && (k = s.indexOf(t, M + 1));
256
+ var N = F((A = A !== -1 && A < M + 1 ? s.indexOf(n, M + 1) : A) === -1 ? k : Math.min(k, A));
257
+ if (s.substr(M + 1 + N, v) === t) {
258
+ T.push(s.substring(f, M).replace(j, u)), s[f = M + 1 + N + v] !== u && (M = s.indexOf(u, f)), k = s.indexOf(t, f), A = s.indexOf(n, f);
259
+ break;
260
+ }
261
+ if (N = F(A), s.substring(M + 1 + N, M + 1 + N + y) === n) {
262
+ if (T.push(s.substring(f, M).replace(j, u)), L(M + 1 + N + y), k = s.indexOf(t, f), M = s.indexOf(u, f), S && (z(), p)) return R();
263
+ if (a && C.length >= a) return R(!0);
264
+ break;
265
+ }
266
+ w.push({
267
+ type: "Quotes",
268
+ code: "InvalidQuotes",
269
+ message: "Trailing quote on quoted field is malformed",
270
+ row: C.length,
271
+ index: f
272
+ }), M++;
273
+ }
274
+ }
275
+ else if (r && T.length === 0 && s.substring(f, f + b) === r) {
276
+ if (A === -1) return R();
277
+ f = A + y, A = s.indexOf(n, f), k = s.indexOf(t, f);
278
+ } else if (k !== -1 && (k < A || A === -1)) T.push(s.substring(f, k)), f = k + v, k = s.indexOf(t, f);
279
+ else {
280
+ if (A === -1) break;
281
+ if (T.push(s.substring(f, A)), L(A + y), S && (z(), p)) return R();
282
+ if (a && C.length >= a) return R(!0);
283
+ }
284
+ return I();
285
+ function P(e) {
286
+ C.push(e), E = f;
287
+ }
288
+ function F(e) {
289
+ var t = 0;
290
+ return t = e !== -1 && (e = s.substring(M + 1, e)) && e.trim() === "" ? e.length : t;
291
+ }
292
+ function I(e) {
293
+ return g || (e === void 0 && (e = s.substring(f)), T.push(e), f = _, P(T), S && z()), R();
294
+ }
295
+ function L(e) {
296
+ f = e, P(T), T = [], A = s.indexOf(n, f);
297
+ }
298
+ function R(r) {
299
+ if (e.header && !h && C.length && !l) {
300
+ var i = C[0], a = Object.create(null), o = new Set(i);
301
+ let t = !1;
302
+ for (let n = 0; n < i.length; n++) {
303
+ let r = i[n];
304
+ if (a[r = x(e.transformHeader) ? e.transformHeader(r, n) : r]) {
305
+ let e, s = a[r];
306
+ for (; e = r + "_" + s, s++, o.has(e););
307
+ o.add(e), i[n] = e, a[r]++, t = !0, (c = c === null ? {} : c)[e] = r;
308
+ } else a[r] = 1, i[n] = r;
309
+ o.add(r);
310
+ }
311
+ t && console.warn("Duplicate headers found and renamed."), l = !0;
312
+ }
313
+ return {
314
+ data: C,
315
+ errors: w,
316
+ meta: {
317
+ delimiter: t,
318
+ linebreak: n,
319
+ aborted: p,
320
+ truncated: !!r,
321
+ cursor: E + (h || 0),
322
+ renamedHeaders: c
323
+ }
324
+ };
325
+ }
326
+ function z() {
327
+ i(R()), C = [], w = [];
328
+ }
329
+ }, this.abort = function() {
330
+ p = !0;
331
+ }, this.getCharIndex = function() {
332
+ return f;
333
+ };
334
+ }
335
+ function g(e) {
336
+ var t = e.data, n = a[t.workerId], r = !1;
337
+ if (t.error) n.userError(t.error, t.file);
338
+ else if (t.results && t.results.data) {
339
+ var i = {
340
+ abort: function() {
341
+ r = !0, _(t.workerId, {
342
+ data: [],
343
+ errors: [],
344
+ meta: { aborted: !0 }
345
+ });
346
+ },
347
+ pause: v,
348
+ resume: v
349
+ };
350
+ if (x(n.userStep)) {
351
+ for (var o = 0; o < t.results.data.length && (n.userStep({
352
+ data: t.results.data[o],
353
+ errors: t.results.errors,
354
+ meta: t.results.meta
355
+ }, i), !r); o++);
356
+ delete t.results;
357
+ } else x(n.userChunk) && (n.userChunk(t.results, i, t.file), delete t.results);
358
+ }
359
+ t.finished && !r && _(t.workerId, t.results);
360
+ }
361
+ function _(e, t) {
362
+ var n = a[e];
363
+ x(n.userComplete) && n.userComplete(t), n.terminate(), delete a[e];
364
+ }
365
+ function v() {
366
+ throw Error("Not implemented.");
367
+ }
368
+ function y(e) {
369
+ if (typeof e != "object" || !e) return e;
370
+ var t, n = Array.isArray(e) ? [] : {};
371
+ for (t in e) n[t] = y(e[t]);
372
+ return n;
373
+ }
374
+ function b(e, t) {
375
+ return function() {
376
+ e.apply(t, arguments);
377
+ };
378
+ }
379
+ function x(e) {
380
+ return typeof e == "function";
381
+ }
382
+ return s.parse = function(n, r) {
383
+ var i = (r ||= {}).dynamicTyping || !1;
384
+ if (x(i) && (r.dynamicTypingFunction = i, i = {}), r.dynamicTyping = i, r.transform = !!x(r.transform) && r.transform, !r.worker || !s.WORKERS_SUPPORTED) return i = null, s.NODE_STREAM_INPUT, typeof n == "string" ? (n = ((e) => e.charCodeAt(0) === 65279 ? e.slice(1) : e)(n), i = new (r.download ? l : d)(r)) : !0 === n.readable && x(n.read) && x(n.on) ? i = new f(r) : (t.File && n instanceof File || n instanceof Object) && (i = new u(r)), i.stream(n);
385
+ (i = (() => {
386
+ var n;
387
+ return !!s.WORKERS_SUPPORTED && (n = (() => {
388
+ var n = t.URL || t.webkitURL || null, r = e.toString();
389
+ return s.BLOB_URL ||= n.createObjectURL(new Blob([
390
+ "var global = (function() { if (typeof self !== 'undefined') { return self; } if (typeof window !== 'undefined') { return window; } if (typeof global !== 'undefined') { return global; } return {}; })(); global.IS_PAPA_WORKER=true; ",
391
+ "(",
392
+ r,
393
+ ")();"
394
+ ], { type: "text/javascript" }));
395
+ })(), (n = new t.Worker(n)).onmessage = g, n.id = o++, a[n.id] = n);
396
+ })()).userStep = r.step, i.userChunk = r.chunk, i.userComplete = r.complete, i.userError = r.error, r.step = x(r.step), r.chunk = x(r.chunk), r.complete = x(r.complete), r.error = x(r.error), delete r.worker, i.postMessage({
397
+ input: n,
398
+ config: r,
399
+ workerId: i.id
400
+ });
401
+ }, s.unparse = function(e, t) {
402
+ var n = !1, r = !0, i = ",", a = "\r\n", o = "\"", c = o + o, l = !1, u = null, d = !1, f = ((() => {
403
+ if (typeof t == "object") {
404
+ if (typeof t.delimiter != "string" || s.BAD_DELIMITERS.filter(function(e) {
405
+ return t.delimiter.indexOf(e) !== -1;
406
+ }).length || (i = t.delimiter), typeof t.quotes != "boolean" && typeof t.quotes != "function" && !Array.isArray(t.quotes) || (n = t.quotes), typeof t.skipEmptyLines != "boolean" && typeof t.skipEmptyLines != "string" || (l = t.skipEmptyLines), typeof t.newline == "string" && (a = t.newline), typeof t.quoteChar == "string" && (o = t.quoteChar), typeof t.header == "boolean" && (r = t.header), Array.isArray(t.columns)) {
407
+ if (t.columns.length === 0) throw Error("Option columns is empty");
408
+ u = t.columns;
409
+ }
410
+ t.escapeChar !== void 0 && (c = t.escapeChar + o), t.escapeFormulae instanceof RegExp ? d = t.escapeFormulae : typeof t.escapeFormulae == "boolean" && t.escapeFormulae && (d = /^[=+\-@\t\r].*$/);
411
+ }
412
+ })(), new RegExp(m(o), "g"));
413
+ if (typeof e == "string" && (e = JSON.parse(e)), Array.isArray(e)) {
414
+ if (!e.length || Array.isArray(e[0])) return p(null, e, l);
415
+ if (typeof e[0] == "object") return p(u || Object.keys(e[0]), e, l);
416
+ } else if (typeof e == "object") return typeof e.data == "string" && (e.data = JSON.parse(e.data)), Array.isArray(e.data) && (e.fields ||= e.meta && e.meta.fields || u, e.fields ||= Array.isArray(e.data[0]) ? e.fields : typeof e.data[0] == "object" ? Object.keys(e.data[0]) : [], Array.isArray(e.data[0]) || typeof e.data[0] == "object" || (e.data = [e.data])), p(e.fields || [], e.data || [], l);
417
+ throw Error("Unable to serialize unrecognized input");
418
+ function p(e, t, n) {
419
+ var o = "", s = (typeof e == "string" && (e = JSON.parse(e)), typeof t == "string" && (t = JSON.parse(t)), Array.isArray(e) && 0 < e.length), c = !Array.isArray(t[0]);
420
+ if (s && r) {
421
+ for (var l = 0; l < e.length; l++) 0 < l && (o += i), o += h(e[l], l);
422
+ 0 < t.length && (o += a);
423
+ }
424
+ for (var u = 0; u < t.length; u++) {
425
+ var d = (s ? e : t[u]).length, f = !1, p = s ? Object.keys(t[u]).length === 0 : t[u].length === 0;
426
+ if (n && !s && (f = n === "greedy" ? t[u].join("").trim() === "" : t[u].length === 1 && t[u][0].length === 0), n === "greedy" && s) {
427
+ for (var m = [], g = 0; g < d; g++) {
428
+ var _ = c ? e[g] : g;
429
+ m.push(t[u][_]);
430
+ }
431
+ f = m.join("").trim() === "";
432
+ }
433
+ if (!f) {
434
+ for (var v = 0; v < d; v++) {
435
+ 0 < v && !p && (o += i);
436
+ var y = s && c ? e[v] : v;
437
+ o += h(t[u][y], v);
438
+ }
439
+ u < t.length - 1 && (!n || 0 < d && !p) && (o += a);
440
+ }
441
+ }
442
+ return o;
443
+ }
444
+ function h(e, t) {
445
+ var r, a;
446
+ return e == null ? "" : e.constructor === Date ? JSON.stringify(e).slice(1, 25) : (a = !1, d && typeof e == "string" && d.test(e) && (e = "'" + e, a = !0), r = e.toString().replace(f, c), (a = a || !0 === n || typeof n == "function" && n(e, t) || Array.isArray(n) && n[t] || ((e, t) => {
447
+ for (var n = 0; n < t.length; n++) if (-1 < e.indexOf(t[n])) return !0;
448
+ return !1;
449
+ })(r, s.BAD_DELIMITERS) || -1 < r.indexOf(i) || r.charAt(0) === " " || r.charAt(r.length - 1) === " ") ? o + r + o : r);
450
+ }
451
+ }, s.RECORD_SEP = "", s.UNIT_SEP = "", s.BYTE_ORDER_MARK = "", s.BAD_DELIMITERS = [
452
+ "\r",
453
+ "\n",
454
+ "\"",
455
+ s.BYTE_ORDER_MARK
456
+ ], s.WORKERS_SUPPORTED = !r && !!t.Worker, s.NODE_STREAM_INPUT = 1, s.LocalChunkSize = 10485760, s.RemoteChunkSize = 5242880, s.DefaultDelimiter = ",", s.Parser = h, s.ParserHandle = p, s.NetworkStreamer = l, s.FileStreamer = u, s.StringStreamer = d, s.ReadableStreamStreamer = f, t.jQuery && ((n = t.jQuery).fn.parse = function(e) {
457
+ var r = e.config || {}, i = [];
458
+ return this.each(function(e) {
459
+ if (!(n(this).prop("tagName").toUpperCase() === "INPUT" && n(this).attr("type").toLowerCase() === "file" && t.FileReader) || !this.files || this.files.length === 0) return !0;
460
+ for (var a = 0; a < this.files.length; a++) i.push({
461
+ file: this.files[a],
462
+ inputElem: this,
463
+ instanceConfig: n.extend({}, r)
464
+ });
465
+ }), a(), this;
466
+ function a() {
467
+ if (i.length === 0) x(e.complete) && e.complete();
468
+ else {
469
+ var t, r, a, c, l = i[0];
470
+ if (x(e.before)) {
471
+ var u = e.before(l.file, l.inputElem);
472
+ if (typeof u == "object") {
473
+ if (u.action === "abort") return t = "AbortError", r = l.file, a = l.inputElem, c = u.reason, void (x(e.error) && e.error({ name: t }, r, a, c));
474
+ if (u.action === "skip") return void o();
475
+ typeof u.config == "object" && (l.instanceConfig = n.extend(l.instanceConfig, u.config));
476
+ } else if (u === "skip") return void o();
477
+ }
478
+ var d = l.instanceConfig.complete;
479
+ l.instanceConfig.complete = function(e) {
480
+ x(d) && d(e, l.file, l.inputElem), o();
481
+ }, s.parse(l.file, l.instanceConfig);
482
+ }
483
+ }
484
+ function o() {
485
+ i.splice(0, 1), a();
486
+ }
487
+ }), i && (t.onmessage = function(e) {
488
+ e = e.data, s.WORKER_ID === void 0 && e && (s.WORKER_ID = e.workerId), typeof e.input == "string" ? t.postMessage({
489
+ workerId: s.WORKER_ID,
490
+ results: s.parse(e.input, e.config),
491
+ finished: !0
492
+ }) : (t.File && e.input instanceof File || e.input instanceof Object) && (e = s.parse(e.input, e.config)) && t.postMessage({
493
+ workerId: s.WORKER_ID,
494
+ results: e,
495
+ finished: !0
496
+ });
497
+ }), (l.prototype = Object.create(c.prototype)).constructor = l, (u.prototype = Object.create(c.prototype)).constructor = u, (d.prototype = Object.create(d.prototype)).constructor = d, (f.prototype = Object.create(c.prototype)).constructor = f, s;
498
+ });
499
+ })))(), 1), o = /* @__PURE__ */ e(i(), 1);
500
+ function s(e, t) {
501
+ let n = e.findIndex((e) => e.toLowerCase() === t.toLowerCase());
502
+ return n === -1 ? 0 : n;
503
+ }
504
+ function c(e) {
505
+ return e.every((e) => e.length < 64);
506
+ }
507
+ function l(e) {
508
+ return e.split(".").pop() || "";
509
+ }
510
+ function u(e) {
511
+ switch (l(e)) {
512
+ case "json": return "application/json";
513
+ case "jsonl": return "application/jsonl";
514
+ case "parquet": return "application/parquet";
515
+ case "csv": return "text/csv";
516
+ case "txt": return "text/plain";
517
+ case "pdf": return "application/pdf";
518
+ case "docx": return "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
519
+ case "zip": return "application/zip";
520
+ default: return "unknown";
521
+ }
522
+ }
523
+ function d(e) {
524
+ if (!Array.isArray(e)) return !1;
525
+ let t = e[0];
526
+ return typeof t == "object" && !!t && "role" in t && "content" in t && typeof t.role == "string" && typeof t.content == "string";
527
+ }
528
+ async function f(e, t, i) {
529
+ let l = e.type === "" ? u(e.name) : e.type;
530
+ if (l === "application/parquet") throw Error("Parquet loading is not currently supported in the browser. Please convert your data to JSONL format.");
531
+ if (l === "application/pdf") return n(e, t?.maxSize);
532
+ if (l === "application/vnd.openxmlformats-officedocument.wordprocessingml.document") return r(e);
533
+ if (l === "application/json") {
534
+ let t = await e.text(), n = JSON.parse(t);
535
+ if (Array.isArray(n)) return n.map((e) => [typeof e == "string" ? {
536
+ role: "text",
537
+ content: e
538
+ } : "text" in e ? {
539
+ role: "text",
540
+ content: e.text
541
+ } : {
542
+ role: "text",
543
+ content: JSON.stringify(e)
544
+ }]);
545
+ throw Error("Expected JSON array");
546
+ }
547
+ if (l === "application/jsonl") {
548
+ let t = await e.text();
549
+ return i && i(.1), t.split("\n").filter((e) => e.trim() !== "").map((e, t, n) => {
550
+ i && t % 1e3 == 0 && i(.1 + t / n.length * .9);
551
+ try {
552
+ let t = JSON.parse(e);
553
+ return d(t) ? t : [typeof t == "string" ? {
554
+ role: "text",
555
+ content: t
556
+ } : "text" in t ? {
557
+ role: "text",
558
+ content: t.text
559
+ } : {
560
+ role: "text",
561
+ content: JSON.stringify(t)
562
+ }];
563
+ } catch {
564
+ return [{
565
+ role: "text",
566
+ content: e
567
+ }];
568
+ }
569
+ });
570
+ }
571
+ if (l === "application/zip") {
572
+ let n = await o.default.loadAsync(e), r = [], a = Object.keys(n.files);
573
+ for (let e = 0; e < a.length; e++) {
574
+ let o = a[e], s = n.file(o);
575
+ if (s) {
576
+ let n = await s.async("blob", (t) => {
577
+ i && i(.1 + (t.percent / 100 * .9 / a.length + e / a.length * .9));
578
+ }), c = await f(new File([n], o), t);
579
+ i && i(.1 + (e + 1) / a.length * .9), r = r.concat(c);
580
+ }
581
+ }
582
+ return r;
583
+ }
584
+ if (l === "text/csv") {
585
+ let n = await e.text();
586
+ return i && i(.1), new Promise((e, r) => {
587
+ a.default.parse(n, {
588
+ header: !1,
589
+ skipEmptyLines: !0,
590
+ delimiter: ",",
591
+ complete: (n) => {
592
+ if (n.errors.length > 0) console.error(n.errors), r(/* @__PURE__ */ Error("Error parsing file"));
593
+ else {
594
+ let r = s(n.data[0], t?.column || "text");
595
+ e((t?.hasHeader ?? c(n.data[0]) ? n.data.slice(1) : n.data).map((e) => [{
596
+ role: "text",
597
+ content: e[r]
598
+ }]));
599
+ }
600
+ },
601
+ error: (e) => {
602
+ r(e);
603
+ }
604
+ });
605
+ });
606
+ } else if (l === "text/plain") return [[{
607
+ role: "text",
608
+ content: await e.text()
609
+ }]];
610
+ throw Error(`Unsupported file type: ${l}`);
611
+ }
612
+ //#endregion
613
+ export { f as default };