@genai-fi/nanogpt 0.20.0 → 0.20.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/BaseTokeniser-DSg9zcYq.js +221 -0
- package/dist/DatasetBuilder-DgURD85T.js +712 -0
- package/dist/Generator.d.ts +82 -0
- package/dist/Generator.js +2 -0
- package/dist/RealDiv-DBu0FQqT.js +362 -0
- package/dist/Reshape-CABOPB9d.js +94 -0
- package/dist/Reshape-DqO3r8BC.js +17 -0
- package/dist/TeachableLLM.d.ts +70 -0
- package/dist/TeachableLLM.js +2 -0
- package/dist/Trainer.d.ts +43 -0
- package/dist/Trainer.js +2 -0
- package/dist/backend.d.ts +2 -0
- package/dist/backend.js +13 -0
- package/dist/backend_util-Cg-roD1p.js +399 -0
- package/dist/binary_op_util-CrYk9LXL.js +103 -0
- package/dist/checks/appendCache.d.ts +1 -0
- package/dist/checks/appendCache.js +55 -0
- package/dist/checks/attentionMask.d.ts +1 -0
- package/dist/checks/attentionMask.js +56 -0
- package/dist/checks/check.d.ts +9 -0
- package/dist/checks/check.js +32 -0
- package/dist/checks/gelu.d.ts +1 -0
- package/dist/checks/gelu.js +46 -0
- package/dist/checks/index.d.ts +26 -0
- package/dist/checks/index.js +28 -0
- package/dist/checks/matMulGelu.d.ts +1 -0
- package/dist/checks/matMulGelu.js +84 -0
- package/dist/checks/normRMS.d.ts +1 -0
- package/dist/checks/normRMS.js +28 -0
- package/dist/checks/normRMSGrad.d.ts +1 -0
- package/dist/checks/normRMSGrad.js +22 -0
- package/dist/checks/packUnpack.d.ts +1 -0
- package/dist/checks/packUnpack.js +46 -0
- package/dist/checks/qkv.d.ts +1 -0
- package/dist/checks/qkv.js +34 -0
- package/dist/checks/rope.d.ts +1 -0
- package/dist/checks/rope.js +30 -0
- package/dist/checks/weights.d.ts +14 -0
- package/dist/checks/weights.js +27 -0
- package/dist/chunk-BPntVaq0.js +23 -0
- package/dist/complex_util-CkazZsaH.js +60 -0
- package/dist/concat_util-CWDZCBlA.js +19 -0
- package/dist/data/docx.d.ts +2 -0
- package/dist/data/docx.js +3046 -0
- package/dist/data/pdf.d.ts +2 -0
- package/dist/data/pdf.js +17 -0
- package/dist/data/textLoader.d.ts +7 -0
- package/dist/data/textLoader.js +613 -0
- package/dist/dist-BewPQWjc.js +7572 -0
- package/dist/dist-DVmq73nz.js +8775 -0
- package/dist/dist-DXwIvKxl.js +896 -0
- package/dist/dist-VEU5mfO0.js +7545 -0
- package/dist/gelu-Bf1HW1RY.js +27 -0
- package/dist/gpgpu_math-DvLcCH6u.js +1612 -0
- package/dist/inference/types.d.ts +16 -0
- package/dist/inference/types.js +0 -0
- package/dist/kernel_funcs_utils-HiXOOx3f.js +229 -0
- package/dist/layers/BaseLayer.d.ts +44 -0
- package/dist/layers/BaseLayer.js +76 -0
- package/dist/layers/CausalSelfAttention.d.ts +39 -0
- package/dist/layers/CausalSelfAttention.js +99 -0
- package/dist/layers/LoRA.d.ts +14 -0
- package/dist/layers/LoRA.js +48 -0
- package/dist/layers/MLP.d.ts +17 -0
- package/dist/layers/MLP.js +34 -0
- package/dist/layers/PositionEmbedding.d.ts +8 -0
- package/dist/layers/PositionEmbedding.js +27 -0
- package/dist/layers/RMSNorm.d.ts +12 -0
- package/dist/layers/RMSNorm.js +20 -0
- package/dist/layers/RoPECache.d.ts +18 -0
- package/dist/layers/RoPECache.js +337 -0
- package/dist/layers/TiedEmbedding.d.ts +13 -0
- package/dist/layers/TiedEmbedding.js +32 -0
- package/dist/layers/TransformerBlock.d.ts +27 -0
- package/dist/layers/TransformerBlock.js +51 -0
- package/dist/layers/WeightStore.d.ts +20 -0
- package/dist/layers/WeightStore.js +69 -0
- package/dist/loader/load.d.ts +6 -0
- package/dist/loader/load.js +2 -0
- package/dist/loader/loadHF.d.ts +8 -0
- package/dist/loader/loadHF.js +2 -0
- package/dist/loader/loadTransformers.d.ts +4 -0
- package/dist/loader/loadTransformers.js +2 -0
- package/dist/loader/loadZipMeta.d.ts +3 -0
- package/dist/loader/loadZipMeta.js +16 -0
- package/dist/loader/newZipLoad.d.ts +3 -0
- package/dist/loader/newZipLoad.js +2 -0
- package/dist/loader/oldZipLoad.d.ts +9 -0
- package/dist/loader/oldZipLoad.js +2 -0
- package/dist/loader/save.d.ts +16 -0
- package/dist/loader/save.js +2 -0
- package/dist/loader/types.d.ts +68 -0
- package/dist/loader/types.js +0 -0
- package/dist/main-CPjeMv0G.js +13500 -0
- package/dist/main.d.ts +50 -0
- package/dist/main.js +16 -0
- package/dist/matMul16-BNfZSnNM.js +81 -0
- package/dist/matMulGelu-CPTntosE.js +162 -0
- package/dist/models/NanoGPTV1.d.ts +16 -0
- package/dist/models/NanoGPTV1.js +2 -0
- package/dist/models/NanoGPTV2.d.ts +16 -0
- package/dist/models/NanoGPTV2.js +2 -0
- package/dist/models/config.d.ts +27 -0
- package/dist/models/config.js +37 -0
- package/dist/models/factory.d.ts +3 -0
- package/dist/models/factory.js +2 -0
- package/dist/models/model.d.ts +44 -0
- package/dist/models/model.js +2 -0
- package/dist/ops/adamAdjust.d.ts +2 -0
- package/dist/ops/adamAdjust.js +18 -0
- package/dist/ops/adamMoments.d.ts +2 -0
- package/dist/ops/adamMoments.js +16 -0
- package/dist/ops/add16.d.ts +2 -0
- package/dist/ops/add16.js +12 -0
- package/dist/ops/appendCache.d.ts +2 -0
- package/dist/ops/appendCache.js +25 -0
- package/dist/ops/attentionMask.d.ts +2 -0
- package/dist/ops/attentionMask.js +16 -0
- package/dist/ops/concat16.d.ts +2 -0
- package/dist/ops/concat16.js +8 -0
- package/dist/ops/cpu/adamAdjust.d.ts +1 -0
- package/dist/ops/cpu/adamAdjust.js +16 -0
- package/dist/ops/cpu/adamMoments.d.ts +1 -0
- package/dist/ops/cpu/adamMoments.js +16 -0
- package/dist/ops/cpu/appendCache.d.ts +1 -0
- package/dist/ops/cpu/appendCache.js +65 -0
- package/dist/ops/cpu/attentionMask.d.ts +1 -0
- package/dist/ops/cpu/attentionMask.js +16 -0
- package/dist/ops/cpu/fusedSoftmax.d.ts +9 -0
- package/dist/ops/cpu/fusedSoftmax.js +22 -0
- package/dist/ops/cpu/gatherSub.d.ts +1 -0
- package/dist/ops/cpu/gatherSub.js +12 -0
- package/dist/ops/cpu/gelu.d.ts +1 -0
- package/dist/ops/cpu/gelu.js +36 -0
- package/dist/ops/cpu/matMul16.d.ts +1 -0
- package/dist/ops/cpu/matMul16.js +14 -0
- package/dist/ops/cpu/matMulGelu.d.ts +1 -0
- package/dist/ops/cpu/matMulGelu.js +41 -0
- package/dist/ops/cpu/matMulMul.d.ts +1 -0
- package/dist/ops/cpu/matMulMul.js +20 -0
- package/dist/ops/cpu/mulDropout.d.ts +1 -0
- package/dist/ops/cpu/mulDropout.js +20 -0
- package/dist/ops/cpu/normRMS.d.ts +1 -0
- package/dist/ops/cpu/normRMS.js +35 -0
- package/dist/ops/cpu/qkv.d.ts +5 -0
- package/dist/ops/cpu/qkv.js +73 -0
- package/dist/ops/cpu/rope.d.ts +6 -0
- package/dist/ops/cpu/rope.js +81 -0
- package/dist/ops/cpu/scatterSub.d.ts +1 -0
- package/dist/ops/cpu/scatterSub.js +12 -0
- package/dist/ops/dot16.d.ts +2 -0
- package/dist/ops/dot16.js +29 -0
- package/dist/ops/dropout.d.ts +2 -0
- package/dist/ops/dropout.js +11 -0
- package/dist/ops/dropout16.d.ts +2 -0
- package/dist/ops/dropout16.js +22 -0
- package/dist/ops/gatherSub.d.ts +2 -0
- package/dist/ops/gatherSub.js +13 -0
- package/dist/ops/gelu.d.ts +3 -0
- package/dist/ops/gelu.js +2 -0
- package/dist/ops/globalNorm.d.ts +2 -0
- package/dist/ops/globalNorm.js +19 -0
- package/dist/ops/grads/add16.d.ts +1 -0
- package/dist/ops/grads/add16.js +27 -0
- package/dist/ops/grads/attentionMask.d.ts +1 -0
- package/dist/ops/grads/attentionMask.js +26 -0
- package/dist/ops/grads/dropout16.d.ts +1 -0
- package/dist/ops/grads/dropout16.js +1 -0
- package/dist/ops/grads/gelu.d.ts +2 -0
- package/dist/ops/grads/gelu.js +2 -0
- package/dist/ops/grads/matMul16.d.ts +2 -0
- package/dist/ops/grads/matMul16.js +2 -0
- package/dist/ops/grads/matMulGelu.d.ts +1 -0
- package/dist/ops/grads/matMulGelu.js +22 -0
- package/dist/ops/grads/mul16.d.ts +1 -0
- package/dist/ops/grads/mul16.js +1 -0
- package/dist/ops/grads/normRMS.d.ts +3 -0
- package/dist/ops/grads/normRMS.js +37 -0
- package/dist/ops/grads/pack16.d.ts +2 -0
- package/dist/ops/grads/pack16.js +2 -0
- package/dist/ops/grads/qkv.d.ts +3 -0
- package/dist/ops/grads/qkv.js +46 -0
- package/dist/ops/grads/rope.d.ts +2 -0
- package/dist/ops/grads/rope.js +2 -0
- package/dist/ops/grads/softmax16.d.ts +2 -0
- package/dist/ops/grads/softmax16.js +23 -0
- package/dist/ops/grads/unpack16.d.ts +2 -0
- package/dist/ops/grads/unpack16.js +2 -0
- package/dist/ops/grads/utils.d.ts +4 -0
- package/dist/ops/grads/utils.js +12 -0
- package/dist/ops/log.d.ts +0 -0
- package/dist/ops/log.js +1 -0
- package/dist/ops/matMul16.d.ts +15 -0
- package/dist/ops/matMul16.js +2 -0
- package/dist/ops/matMulGelu.d.ts +3 -0
- package/dist/ops/matMulGelu.js +20 -0
- package/dist/ops/matMulMul.d.ts +2 -0
- package/dist/ops/matMulMul.js +16 -0
- package/dist/ops/mul16.d.ts +2 -0
- package/dist/ops/mul16.js +43 -0
- package/dist/ops/mulDrop.d.ts +2 -0
- package/dist/ops/mulDrop.js +15 -0
- package/dist/ops/normRMS.d.ts +2 -0
- package/dist/ops/normRMS.js +22 -0
- package/dist/ops/pack16.d.ts +2 -0
- package/dist/ops/pack16.js +2 -0
- package/dist/ops/qkv.d.ts +2 -0
- package/dist/ops/qkv.js +16 -0
- package/dist/ops/reshape16.d.ts +2 -0
- package/dist/ops/reshape16.js +33 -0
- package/dist/ops/rope.d.ts +3 -0
- package/dist/ops/rope.js +2 -0
- package/dist/ops/scatterSub.d.ts +2 -0
- package/dist/ops/scatterSub.js +13 -0
- package/dist/ops/slice16.d.ts +2 -0
- package/dist/ops/slice16.js +11 -0
- package/dist/ops/softmax16.d.ts +2 -0
- package/dist/ops/softmax16.js +9 -0
- package/dist/ops/sub16.d.ts +2 -0
- package/dist/ops/sub16.js +11 -0
- package/dist/ops/sum16.d.ts +2 -0
- package/dist/ops/sum16.js +13 -0
- package/dist/ops/transpose16.d.ts +3 -0
- package/dist/ops/transpose16.js +32 -0
- package/dist/ops/unpack16.d.ts +2 -0
- package/dist/ops/unpack16.js +2 -0
- package/dist/ops/webgl/adamAdjust.d.ts +1 -0
- package/dist/ops/webgl/adamAdjust.js +82 -0
- package/dist/ops/webgl/adamMoments.d.ts +1 -0
- package/dist/ops/webgl/adamMoments.js +44 -0
- package/dist/ops/webgl/appendCache.d.ts +1 -0
- package/dist/ops/webgl/appendCache.js +53 -0
- package/dist/ops/webgl/attentionMask.d.ts +1 -0
- package/dist/ops/webgl/attentionMask.js +64 -0
- package/dist/ops/webgl/dropout16.d.ts +1 -0
- package/dist/ops/webgl/dropout16.js +12 -0
- package/dist/ops/webgl/fusedSoftmax.d.ts +11 -0
- package/dist/ops/webgl/fusedSoftmax.js +70 -0
- package/dist/ops/webgl/gatherSub.d.ts +1 -0
- package/dist/ops/webgl/gatherSub.js +28 -0
- package/dist/ops/webgl/gelu.d.ts +2 -0
- package/dist/ops/webgl/gelu.js +48 -0
- package/dist/ops/webgl/log.d.ts +17 -0
- package/dist/ops/webgl/log.js +14 -0
- package/dist/ops/webgl/matMul16.d.ts +1 -0
- package/dist/ops/webgl/matMul16.js +37 -0
- package/dist/ops/webgl/matMulGelu.d.ts +21 -0
- package/dist/ops/webgl/matMulGelu.js +2 -0
- package/dist/ops/webgl/matMulMul.d.ts +14 -0
- package/dist/ops/webgl/matMulMul.js +24 -0
- package/dist/ops/webgl/mulDropout.d.ts +1 -0
- package/dist/ops/webgl/mulDropout.js +32 -0
- package/dist/ops/webgl/normRMS.d.ts +1 -0
- package/dist/ops/webgl/normRMS.js +114 -0
- package/dist/ops/webgl/qkv.d.ts +1 -0
- package/dist/ops/webgl/qkv.js +54 -0
- package/dist/ops/webgl/rope.d.ts +1 -0
- package/dist/ops/webgl/rope.js +72 -0
- package/dist/ops/webgl/scatterSub.d.ts +1 -0
- package/dist/ops/webgl/scatterSub.js +28 -0
- package/dist/ops/webgpu/adamAdjust.d.ts +1 -0
- package/dist/ops/webgpu/adamAdjust.js +77 -0
- package/dist/ops/webgpu/adamMoments.d.ts +1 -0
- package/dist/ops/webgpu/adamMoments.js +76 -0
- package/dist/ops/webgpu/add16.d.ts +1 -0
- package/dist/ops/webgpu/add16.js +14 -0
- package/dist/ops/webgpu/appendCache.d.ts +1 -0
- package/dist/ops/webgpu/appendCache.js +130 -0
- package/dist/ops/webgpu/attentionMask.d.ts +1 -0
- package/dist/ops/webgpu/attentionMask.js +42 -0
- package/dist/ops/webgpu/attentionMask32_program.d.ts +19 -0
- package/dist/ops/webgpu/attentionMask32_program.js +62 -0
- package/dist/ops/webgpu/clipScale.d.ts +1 -0
- package/dist/ops/webgpu/clipScale.js +45 -0
- package/dist/ops/webgpu/concat16.d.ts +19 -0
- package/dist/ops/webgpu/concat16.js +111 -0
- package/dist/ops/webgpu/dropout16.d.ts +1 -0
- package/dist/ops/webgpu/dropout16.js +59 -0
- package/dist/ops/webgpu/gatherSub.d.ts +1 -0
- package/dist/ops/webgpu/gatherSub.js +52 -0
- package/dist/ops/webgpu/gelu.d.ts +14 -0
- package/dist/ops/webgpu/gelu.js +147 -0
- package/dist/ops/webgpu/index.d.ts +0 -0
- package/dist/ops/webgpu/index.js +26 -0
- package/dist/ops/webgpu/matMul16.d.ts +1 -0
- package/dist/ops/webgpu/matMul16.js +70 -0
- package/dist/ops/webgpu/matMul16_program.d.ts +42 -0
- package/dist/ops/webgpu/matMul16_program.js +303 -0
- package/dist/ops/webgpu/mul16.d.ts +1 -0
- package/dist/ops/webgpu/mul16.js +14 -0
- package/dist/ops/webgpu/norm2.d.ts +1 -0
- package/dist/ops/webgpu/norm2.js +46 -0
- package/dist/ops/webgpu/normRMS.d.ts +1 -0
- package/dist/ops/webgpu/normRMS.js +26 -0
- package/dist/ops/webgpu/normRMS16_program.d.ts +10 -0
- package/dist/ops/webgpu/normRMS16_program.js +28 -0
- package/dist/ops/webgpu/normRMS32_program.d.ts +10 -0
- package/dist/ops/webgpu/normRMS32_program.js +28 -0
- package/dist/ops/webgpu/normRMSGrad.d.ts +1 -0
- package/dist/ops/webgpu/normRMSGrad.js +225 -0
- package/dist/ops/webgpu/pack16.d.ts +1 -0
- package/dist/ops/webgpu/pack16.js +21 -0
- package/dist/ops/webgpu/pack16_program.d.ts +19 -0
- package/dist/ops/webgpu/pack16_program.js +93 -0
- package/dist/ops/webgpu/qkv.d.ts +1 -0
- package/dist/ops/webgpu/qkv.js +64 -0
- package/dist/ops/webgpu/rope.d.ts +1 -0
- package/dist/ops/webgpu/rope.js +163 -0
- package/dist/ops/webgpu/scatterSub.d.ts +1 -0
- package/dist/ops/webgpu/scatterSub.js +53 -0
- package/dist/ops/webgpu/slice16.d.ts +7 -0
- package/dist/ops/webgpu/slice16.js +74 -0
- package/dist/ops/webgpu/softmax16.d.ts +17 -0
- package/dist/ops/webgpu/softmax16.js +18 -0
- package/dist/ops/webgpu/softmax16_program.d.ts +13 -0
- package/dist/ops/webgpu/softmax16_program.js +89 -0
- package/dist/ops/webgpu/softmax16_subgroup_program.d.ts +17 -0
- package/dist/ops/webgpu/softmax16_subgroup_program.js +70 -0
- package/dist/ops/webgpu/softmax16grad.d.ts +1 -0
- package/dist/ops/webgpu/softmax16grad.js +31 -0
- package/dist/ops/webgpu/sub16.d.ts +1 -0
- package/dist/ops/webgpu/sub16.js +14 -0
- package/dist/ops/webgpu/sum16.d.ts +1 -0
- package/dist/ops/webgpu/sum16.js +29 -0
- package/dist/ops/webgpu/transpose16.d.ts +1 -0
- package/dist/ops/webgpu/transpose16.js +37 -0
- package/dist/ops/webgpu/transpose16_program.d.ts +16 -0
- package/dist/ops/webgpu/transpose16_program.js +51 -0
- package/dist/ops/webgpu/transpose16_shared_program.d.ts +15 -0
- package/dist/ops/webgpu/transpose16_shared_program.js +79 -0
- package/dist/ops/webgpu/unpack16.d.ts +1 -0
- package/dist/ops/webgpu/unpack16.js +60 -0
- package/dist/ops/webgpu/utils/binary_op.d.ts +35 -0
- package/dist/ops/webgpu/utils/binary_op.js +141 -0
- package/dist/ops/webgpu/utils/deviceInfo.d.ts +7 -0
- package/dist/ops/webgpu/utils/deviceInfo.js +11 -0
- package/dist/ops/webgpu/utils/reductions.d.ts +43 -0
- package/dist/ops/webgpu/utils/reductions.js +263 -0
- package/dist/pack16-Ck-spx_F.js +39 -0
- package/dist/patches/webgpu_backend.d.ts +18 -0
- package/dist/patches/webgpu_backend.js +43 -0
- package/dist/patches/webgpu_base.d.ts +21 -0
- package/dist/patches/webgpu_base.js +22 -0
- package/dist/patches/webgpu_program.d.ts +36 -0
- package/dist/patches/webgpu_program.js +293 -0
- package/dist/pdf-UoDqCYzz.js +16726 -0
- package/dist/picomatch-3tUnMMbd.js +1063 -0
- package/dist/rope-CbeGlsV8.js +25 -0
- package/dist/selu_util-zkAx5doH.js +24 -0
- package/dist/shared-D1coEFea.js +1314 -0
- package/dist/shared-DOgWaqvL.js +5 -0
- package/dist/slice_util-Dgb3ANWI.js +208 -0
- package/dist/tfjs_backend-BjuQ5FqB.js +614 -0
- package/dist/tokeniser/BaseTokeniser.d.ts +33 -0
- package/dist/tokeniser/BaseTokeniser.js +2 -0
- package/dist/tokeniser/CharTokeniser.d.ts +24 -0
- package/dist/tokeniser/CharTokeniser.js +92 -0
- package/dist/tokeniser/bpe.d.ts +28 -0
- package/dist/tokeniser/bpe.js +170 -0
- package/dist/tokeniser/messages.d.ts +61 -0
- package/dist/tokeniser/messages.js +0 -0
- package/dist/tokeniser/type.d.ts +34 -0
- package/dist/tokeniser/type.js +0 -0
- package/dist/training/AdamW.d.ts +36 -0
- package/dist/training/AdamW.js +128 -0
- package/dist/training/BasicTrainer.d.ts +63 -0
- package/dist/training/BasicTrainer.js +265 -0
- package/dist/training/DatasetBuilder.d.ts +26 -0
- package/dist/training/DatasetBuilder.js +2 -0
- package/dist/training/Evaluator.d.ts +19 -0
- package/dist/training/Evaluator.js +48 -0
- package/dist/training/LRScheduler.d.ts +12 -0
- package/dist/training/LRScheduler.js +38 -0
- package/dist/training/PreTrainer.d.ts +11 -0
- package/dist/training/PreTrainer.js +22 -0
- package/dist/training/SFTTrainer.d.ts +12 -0
- package/dist/training/SFTTrainer.js +24 -0
- package/dist/training/loss.d.ts +3 -0
- package/dist/training/loss.js +19 -0
- package/dist/training/orthoGrad.d.ts +2 -0
- package/dist/training/orthoGrad.js +10 -0
- package/dist/training/sparseCrossEntropy.d.ts +7 -0
- package/dist/training/sparseCrossEntropy.js +47 -0
- package/dist/training/tasks/ConversationTask.d.ts +18 -0
- package/dist/training/tasks/ConversationTask.js +38 -0
- package/dist/training/tasks/PretrainingTask.d.ts +17 -0
- package/dist/training/tasks/PretrainingTask.js +42 -0
- package/dist/training/tasks/StartSentenceTask.d.ts +18 -0
- package/dist/training/tasks/StartSentenceTask.js +45 -0
- package/dist/training/tasks/Task.d.ts +22 -0
- package/dist/training/tasks/Task.js +55 -0
- package/dist/training/tasks/splitter.d.ts +5 -0
- package/dist/training/tasks/splitter.js +18 -0
- package/dist/training/types.d.ts +78 -0
- package/dist/training/types.js +0 -0
- package/dist/training/validation.d.ts +17 -0
- package/dist/training/validation.js +2 -0
- package/dist/utilities/arrayClose.d.ts +1 -0
- package/dist/utilities/arrayClose.js +16 -0
- package/dist/utilities/datasetID.d.ts +2 -0
- package/dist/utilities/datasetID.js +18 -0
- package/dist/utilities/dummy.d.ts +9 -0
- package/dist/utilities/dummy.js +36 -0
- package/dist/utilities/multinomialCPU.d.ts +2 -0
- package/dist/utilities/multinomialCPU.js +9 -0
- package/dist/utilities/naming.d.ts +4 -0
- package/dist/utilities/naming.js +0 -0
- package/dist/utilities/packed.d.ts +4 -0
- package/dist/utilities/packed.js +13 -0
- package/dist/utilities/parameters.d.ts +11 -0
- package/dist/utilities/parameters.js +38 -0
- package/dist/utilities/performance.d.ts +2 -0
- package/dist/utilities/performance.js +16 -0
- package/dist/utilities/profile.d.ts +17 -0
- package/dist/utilities/profile.js +33 -0
- package/dist/utilities/safetensors.d.ts +3 -0
- package/dist/utilities/safetensors.js +53 -0
- package/dist/utilities/sentences.d.ts +5 -0
- package/dist/utilities/sentences.js +32 -0
- package/dist/utilities/tokenParse.d.ts +1 -0
- package/dist/utilities/tokenParse.js +17 -0
- package/dist/utilities/topP.d.ts +1 -0
- package/dist/utilities/topP.js +12 -0
- package/dist/utilities/waitForModel.d.ts +2 -0
- package/dist/utilities/waitForModel.js +12 -0
- package/dist/utilities/weights.d.ts +12 -0
- package/dist/utilities/weights.js +40 -0
- package/dist/utilities/yielder.d.ts +1 -0
- package/dist/utilities/yielder.js +7 -0
- package/dist/webgpu-Dt7BMzWz.js +525 -0
- package/dist/webgpu_program-WOyIVMlZ.js +392 -0
- package/dist/webgpu_util-B_F3SShA.js +106 -0
- package/package.json +1 -1
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import { Conversation } from '../tokeniser/type';
|
|
2
|
+
export interface DataOptions {
|
|
3
|
+
maxSize?: number;
|
|
4
|
+
column?: string;
|
|
5
|
+
hasHeader?: boolean;
|
|
6
|
+
}
|
|
7
|
+
export default function loadTextData(file: File, options?: DataOptions, cb?: (progress: number) => void): Promise<Conversation[][]>;
|
|
@@ -0,0 +1,613 @@
|
|
|
1
|
+
import { a as e, t } from "../chunk-BPntVaq0.js";
|
|
2
|
+
import { loadPDF as n } from "./pdf.js";
|
|
3
|
+
import { loadDOCX as r, t as i } from "./docx.js";
|
|
4
|
+
//#endregion
|
|
5
|
+
//#region lib/data/textLoader.ts
|
|
6
|
+
var a = /* @__PURE__ */ e((/* @__PURE__ */ t(((e, t) => {
|
|
7
|
+
((n, r) => {
|
|
8
|
+
typeof define == "function" && define.amd ? define([], r) : typeof t == "object" && e !== void 0 ? t.exports = r() : n.Papa = r();
|
|
9
|
+
})(e, function e() {
|
|
10
|
+
var t = typeof self < "u" ? self : typeof window < "u" ? window : t === void 0 ? {} : t, n, r = !t.document && !!t.postMessage, i = t.IS_PAPA_WORKER || !1, a = {}, o = 0, s = {};
|
|
11
|
+
function c(e) {
|
|
12
|
+
this._handle = null, this._finished = !1, this._completed = !1, this._halted = !1, this._input = null, this._baseIndex = 0, this._partialLine = "", this._rowCount = 0, this._start = 0, this._nextChunk = null, this.isFirstChunk = !0, this._completeResults = {
|
|
13
|
+
data: [],
|
|
14
|
+
errors: [],
|
|
15
|
+
meta: {}
|
|
16
|
+
}, function(e) {
|
|
17
|
+
var t = y(e);
|
|
18
|
+
t.chunkSize = parseInt(t.chunkSize), e.step || e.chunk || (t.chunkSize = null), this._handle = new p(t), (this._handle.streamer = this)._config = t;
|
|
19
|
+
}.call(this, e), this.parseChunk = function(e, n) {
|
|
20
|
+
var r = parseInt(this._config.skipFirstNLines) || 0;
|
|
21
|
+
if (this.isFirstChunk && 0 < r) {
|
|
22
|
+
let t = this._config.newline;
|
|
23
|
+
t ||= (a = this._config.quoteChar || "\"", this._handle.guessLineEndings(e, a)), e = [...e.split(t).slice(r)].join(t);
|
|
24
|
+
}
|
|
25
|
+
this.isFirstChunk && x(this._config.beforeFirstChunk) && (a = this._config.beforeFirstChunk(e)) !== void 0 && (e = a), this.isFirstChunk = !1, this._halted = !1;
|
|
26
|
+
var r = this._partialLine + e, a = (this._partialLine = "", this._handle.parse(r, this._baseIndex, !this._finished));
|
|
27
|
+
if (!this._handle.paused() && !this._handle.aborted()) {
|
|
28
|
+
if (e = a.meta.cursor, r = (this._finished || (this._partialLine = r.substring(e - this._baseIndex), this._baseIndex = e), a && a.data && (this._rowCount += a.data.length), this._finished || this._config.preview && this._rowCount >= this._config.preview), i) t.postMessage({
|
|
29
|
+
results: a,
|
|
30
|
+
workerId: s.WORKER_ID,
|
|
31
|
+
finished: r
|
|
32
|
+
});
|
|
33
|
+
else if (x(this._config.chunk) && !n) {
|
|
34
|
+
if (this._config.chunk(a, this._handle), this._handle.paused() || this._handle.aborted()) return void (this._halted = !0);
|
|
35
|
+
this._completeResults = a = void 0;
|
|
36
|
+
}
|
|
37
|
+
return this._config.step || this._config.chunk || (this._completeResults.data = this._completeResults.data.concat(a.data), this._completeResults.errors = this._completeResults.errors.concat(a.errors), this._completeResults.meta = a.meta), this._completed || !r || !x(this._config.complete) || a && a.meta.aborted || (this._config.complete(this._completeResults, this._input), this._completed = !0), r || a && a.meta.paused || this._nextChunk(), a;
|
|
38
|
+
}
|
|
39
|
+
this._halted = !0;
|
|
40
|
+
}, this._sendError = function(e) {
|
|
41
|
+
x(this._config.error) ? this._config.error(e) : i && this._config.error && t.postMessage({
|
|
42
|
+
workerId: s.WORKER_ID,
|
|
43
|
+
error: e,
|
|
44
|
+
finished: !1
|
|
45
|
+
});
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
function l(e) {
|
|
49
|
+
var t;
|
|
50
|
+
(e ||= {}).chunkSize || (e.chunkSize = s.RemoteChunkSize), c.call(this, e), this._nextChunk = r ? function() {
|
|
51
|
+
this._readChunk(), this._chunkLoaded();
|
|
52
|
+
} : function() {
|
|
53
|
+
this._readChunk();
|
|
54
|
+
}, this.stream = function(e) {
|
|
55
|
+
this._input = e, this._nextChunk();
|
|
56
|
+
}, this._readChunk = function() {
|
|
57
|
+
if (this._finished) this._chunkLoaded();
|
|
58
|
+
else {
|
|
59
|
+
if (t = new XMLHttpRequest(), this._config.withCredentials && (t.withCredentials = this._config.withCredentials), r || (t.onload = b(this._chunkLoaded, this), t.onerror = b(this._chunkError, this)), t.open(this._config.downloadRequestBody ? "POST" : "GET", this._input, !r), this._config.downloadRequestHeaders) {
|
|
60
|
+
var e, n = this._config.downloadRequestHeaders;
|
|
61
|
+
for (e in n) t.setRequestHeader(e, n[e]);
|
|
62
|
+
}
|
|
63
|
+
var i;
|
|
64
|
+
this._config.chunkSize && (i = this._start + this._config.chunkSize - 1, t.setRequestHeader("Range", "bytes=" + this._start + "-" + i));
|
|
65
|
+
try {
|
|
66
|
+
t.send(this._config.downloadRequestBody);
|
|
67
|
+
} catch (e) {
|
|
68
|
+
this._chunkError(e.message);
|
|
69
|
+
}
|
|
70
|
+
r && t.status === 0 && this._chunkError();
|
|
71
|
+
}
|
|
72
|
+
}, this._chunkLoaded = function() {
|
|
73
|
+
t.readyState === 4 && (t.status < 200 || 400 <= t.status ? this._chunkError() : (this._start += this._config.chunkSize || t.responseText.length, this._finished = !this._config.chunkSize || this._start >= ((e) => (e = e.getResponseHeader("Content-Range")) === null ? -1 : parseInt(e.substring(e.lastIndexOf("/") + 1)))(t), this.parseChunk(t.responseText)));
|
|
74
|
+
}, this._chunkError = function(e) {
|
|
75
|
+
e = t.statusText || e, this._sendError(Error(e));
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
function u(e) {
|
|
79
|
+
(e ||= {}).chunkSize || (e.chunkSize = s.LocalChunkSize), c.call(this, e);
|
|
80
|
+
var t, n, r = typeof FileReader < "u";
|
|
81
|
+
this.stream = function(e) {
|
|
82
|
+
this._input = e, n = e.slice || e.webkitSlice || e.mozSlice, r ? ((t = new FileReader()).onload = b(this._chunkLoaded, this), t.onerror = b(this._chunkError, this)) : t = new FileReaderSync(), this._nextChunk();
|
|
83
|
+
}, this._nextChunk = function() {
|
|
84
|
+
this._finished || this._config.preview && !(this._rowCount < this._config.preview) || this._readChunk();
|
|
85
|
+
}, this._readChunk = function() {
|
|
86
|
+
var e = this._input, i = (this._config.chunkSize && (i = Math.min(this._start + this._config.chunkSize, this._input.size), e = n.call(e, this._start, i)), t.readAsText(e, this._config.encoding));
|
|
87
|
+
r || this._chunkLoaded({ target: { result: i } });
|
|
88
|
+
}, this._chunkLoaded = function(e) {
|
|
89
|
+
this._start += this._config.chunkSize, this._finished = !this._config.chunkSize || this._start >= this._input.size, this.parseChunk(e.target.result);
|
|
90
|
+
}, this._chunkError = function() {
|
|
91
|
+
this._sendError(t.error);
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
function d(e) {
|
|
95
|
+
var t;
|
|
96
|
+
c.call(this, e ||= {}), this.stream = function(e) {
|
|
97
|
+
return t = e, this._nextChunk();
|
|
98
|
+
}, this._nextChunk = function() {
|
|
99
|
+
var e, n;
|
|
100
|
+
if (!this._finished) return e = this._config.chunkSize, t = e ? (n = t.substring(0, e), t.substring(e)) : (n = t, ""), this._finished = !t, this.parseChunk(n);
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
function f(e) {
|
|
104
|
+
c.call(this, e ||= {});
|
|
105
|
+
var t = [], n = !0, r = !1;
|
|
106
|
+
this.pause = function() {
|
|
107
|
+
c.prototype.pause.apply(this, arguments), this._input.pause();
|
|
108
|
+
}, this.resume = function() {
|
|
109
|
+
c.prototype.resume.apply(this, arguments), this._input.resume();
|
|
110
|
+
}, this.stream = function(e) {
|
|
111
|
+
this._input = e, this._input.on("data", this._streamData), this._input.on("end", this._streamEnd), this._input.on("error", this._streamError);
|
|
112
|
+
}, this._checkIsFinished = function() {
|
|
113
|
+
r && t.length === 1 && (this._finished = !0);
|
|
114
|
+
}, this._nextChunk = function() {
|
|
115
|
+
this._checkIsFinished(), t.length ? this.parseChunk(t.shift()) : n = !0;
|
|
116
|
+
}, this._streamData = b(function(e) {
|
|
117
|
+
try {
|
|
118
|
+
t.push(typeof e == "string" ? e : e.toString(this._config.encoding)), n && (n = !1, this._checkIsFinished(), this.parseChunk(t.shift()));
|
|
119
|
+
} catch (e) {
|
|
120
|
+
this._streamError(e);
|
|
121
|
+
}
|
|
122
|
+
}, this), this._streamError = b(function(e) {
|
|
123
|
+
this._streamCleanUp(), this._sendError(e);
|
|
124
|
+
}, this), this._streamEnd = b(function() {
|
|
125
|
+
this._streamCleanUp(), r = !0, this._streamData("");
|
|
126
|
+
}, this), this._streamCleanUp = b(function() {
|
|
127
|
+
this._input.removeListener("data", this._streamData), this._input.removeListener("end", this._streamEnd), this._input.removeListener("error", this._streamError);
|
|
128
|
+
}, this);
|
|
129
|
+
}
|
|
130
|
+
function p(e) {
|
|
131
|
+
var t, n, r, i, a = 2 ** 53, o = -a, c = /^\s*-?(\d+\.?|\.\d+|\d+\.\d+)([eE][-+]?\d+)?\s*$/, l = /^((\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z)))$/, u = this, d = 0, f = 0, p = !1, g = !1, _ = [], v = {
|
|
132
|
+
data: [],
|
|
133
|
+
errors: [],
|
|
134
|
+
meta: {}
|
|
135
|
+
};
|
|
136
|
+
function b(t) {
|
|
137
|
+
return e.skipEmptyLines === "greedy" ? t.join("").trim() === "" : t.length === 1 && t[0].length === 0;
|
|
138
|
+
}
|
|
139
|
+
function S() {
|
|
140
|
+
if (v && r && (w("Delimiter", "UndetectableDelimiter", "Unable to auto-detect delimiting character; defaulted to '" + s.DefaultDelimiter + "'"), r = !1), e.skipEmptyLines && (v.data = v.data.filter(function(e) {
|
|
141
|
+
return !b(e);
|
|
142
|
+
})), C()) {
|
|
143
|
+
if (v) if (Array.isArray(v.data[0])) {
|
|
144
|
+
for (var t = 0; C() && t < v.data.length; t++) v.data[t].forEach(n);
|
|
145
|
+
v.data.splice(0, 1);
|
|
146
|
+
} else v.data.forEach(n);
|
|
147
|
+
function n(t, n) {
|
|
148
|
+
x(e.transformHeader) && (t = e.transformHeader(t, n)), _.push(t);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
function n(t, n) {
|
|
152
|
+
for (var r = e.header ? {} : [], i = 0; i < t.length; i++) {
|
|
153
|
+
var s = i, u = t[i], u = ((t, n) => ((t) => (e.dynamicTypingFunction && e.dynamicTyping[t] === void 0 && (e.dynamicTyping[t] = e.dynamicTypingFunction(t)), !0 === (e.dynamicTyping[t] || e.dynamicTyping)))(t) ? n === "true" || n === "TRUE" || n !== "false" && n !== "FALSE" && (((e) => {
|
|
154
|
+
if (c.test(e) && (e = parseFloat(e), o < e && e < a)) return 1;
|
|
155
|
+
})(n) ? parseFloat(n) : l.test(n) ? new Date(n) : n === "" ? null : n) : n)(s = e.header ? i >= _.length ? "__parsed_extra" : _[i] : s, u = e.transform ? e.transform(u, s) : u);
|
|
156
|
+
s === "__parsed_extra" ? (r[s] = r[s] || [], r[s].push(u)) : r[s] = u;
|
|
157
|
+
}
|
|
158
|
+
return e.header && (i > _.length ? w("FieldMismatch", "TooManyFields", "Too many fields: expected " + _.length + " fields but parsed " + i, f + n) : i < _.length && w("FieldMismatch", "TooFewFields", "Too few fields: expected " + _.length + " fields but parsed " + i, f + n)), r;
|
|
159
|
+
}
|
|
160
|
+
var i;
|
|
161
|
+
v && (e.header || e.dynamicTyping || e.transform) && (i = 1, !v.data.length || Array.isArray(v.data[0]) ? (v.data = v.data.map(n), i = v.data.length) : v.data = n(v.data, 0), e.header && v.meta && (v.meta.fields = _), f += i);
|
|
162
|
+
}
|
|
163
|
+
function C() {
|
|
164
|
+
return e.header && _.length === 0;
|
|
165
|
+
}
|
|
166
|
+
function w(e, t, n, r) {
|
|
167
|
+
e = {
|
|
168
|
+
type: e,
|
|
169
|
+
code: t,
|
|
170
|
+
message: n
|
|
171
|
+
}, r !== void 0 && (e.row = r), v.errors.push(e);
|
|
172
|
+
}
|
|
173
|
+
x(e.step) && (i = e.step, e.step = function(t) {
|
|
174
|
+
v = t, C() ? S() : (S(), v.data.length !== 0 && (d += t.data.length, e.preview && d > e.preview ? n.abort() : (v.data = v.data[0], i(v, u))));
|
|
175
|
+
}), this.parse = function(i, a, o) {
|
|
176
|
+
var c = e.quoteChar || "\"", c = (e.newline ||= this.guessLineEndings(i, c), r = !1, e.delimiter ? x(e.delimiter) && (e.delimiter = e.delimiter(i), v.meta.delimiter = e.delimiter) : ((c = ((t, n, r, i, a) => {
|
|
177
|
+
var o, c, l, u;
|
|
178
|
+
a ||= [
|
|
179
|
+
",",
|
|
180
|
+
" ",
|
|
181
|
+
"|",
|
|
182
|
+
";",
|
|
183
|
+
s.RECORD_SEP,
|
|
184
|
+
s.UNIT_SEP
|
|
185
|
+
];
|
|
186
|
+
for (var d = 0; d < a.length; d++) {
|
|
187
|
+
for (var f, p = a[d], m = 0, g = 0, _ = 0, v = (l = void 0, new h({
|
|
188
|
+
comments: i,
|
|
189
|
+
delimiter: p,
|
|
190
|
+
newline: n,
|
|
191
|
+
preview: 10
|
|
192
|
+
}).parse(t)), y = 0; y < v.data.length; y++) r && b(v.data[y]) ? _++ : (f = v.data[y].length, g += f, l === void 0 ? l = f : 0 < f && (m += Math.abs(f - l), l = f));
|
|
193
|
+
0 < v.data.length && (g /= v.data.length - _), (c === void 0 || m <= c) && (u === void 0 || u < g) && 1.99 < g && (c = m, o = p, u = g);
|
|
194
|
+
}
|
|
195
|
+
return {
|
|
196
|
+
successful: !!(e.delimiter = o),
|
|
197
|
+
bestDelimiter: o
|
|
198
|
+
};
|
|
199
|
+
})(i, e.newline, e.skipEmptyLines, e.comments, e.delimitersToGuess)).successful ? e.delimiter = c.bestDelimiter : (r = !0, e.delimiter = s.DefaultDelimiter), v.meta.delimiter = e.delimiter), y(e));
|
|
200
|
+
return e.preview && e.header && c.preview++, t = i, n = new h(c), v = n.parse(t, a, o), S(), p ? { meta: { paused: !0 } } : v || { meta: { paused: !1 } };
|
|
201
|
+
}, this.paused = function() {
|
|
202
|
+
return p;
|
|
203
|
+
}, this.pause = function() {
|
|
204
|
+
p = !0, n.abort(), t = x(e.chunk) ? "" : t.substring(n.getCharIndex());
|
|
205
|
+
}, this.resume = function() {
|
|
206
|
+
u.streamer._halted ? (p = !1, u.streamer.parseChunk(t, !0)) : setTimeout(u.resume, 3);
|
|
207
|
+
}, this.aborted = function() {
|
|
208
|
+
return g;
|
|
209
|
+
}, this.abort = function() {
|
|
210
|
+
g = !0, n.abort(), v.meta.aborted = !0, x(e.complete) && e.complete(v), t = "";
|
|
211
|
+
}, this.guessLineEndings = function(e, t) {
|
|
212
|
+
e = e.substring(0, 1048576);
|
|
213
|
+
var t = RegExp(m(t) + "([^]*?)" + m(t), "gm"), n = (e = e.replace(t, "")).split("\r"), t = e.split("\n"), e = 1 < t.length && t[0].length < n[0].length;
|
|
214
|
+
if (n.length === 1 || e) return "\n";
|
|
215
|
+
for (var r = 0, i = 0; i < n.length; i++) n[i][0] === "\n" && r++;
|
|
216
|
+
return r >= n.length / 2 ? "\r\n" : "\r";
|
|
217
|
+
};
|
|
218
|
+
}
|
|
219
|
+
function m(e) {
|
|
220
|
+
return e.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
221
|
+
}
|
|
222
|
+
function h(e) {
|
|
223
|
+
var t = (e ||= {}).delimiter, n = e.newline, r = e.comments, i = e.step, a = e.preview, o = e.fastMode, c = null, l = !1, u = e.quoteChar == null ? "\"" : e.quoteChar, d = u;
|
|
224
|
+
if (e.escapeChar !== void 0 && (d = e.escapeChar), (typeof t != "string" || -1 < s.BAD_DELIMITERS.indexOf(t)) && (t = ","), r === t) throw Error("Comment character same as delimiter");
|
|
225
|
+
!0 === r ? r = "#" : (typeof r != "string" || -1 < s.BAD_DELIMITERS.indexOf(r)) && (r = !1), n !== "\n" && n !== "\r" && n !== "\r\n" && (n = "\n");
|
|
226
|
+
var f = 0, p = !1;
|
|
227
|
+
this.parse = function(s, h, g) {
|
|
228
|
+
if (typeof s != "string") throw Error("Input must be a string");
|
|
229
|
+
var _ = s.length, v = t.length, y = n.length, b = r.length, S = x(i), C = [], w = [], T = [], E = f = 0;
|
|
230
|
+
if (!s) return R();
|
|
231
|
+
if (o || !1 !== o && s.indexOf(u) === -1) {
|
|
232
|
+
for (var D = s.split(n), O = 0; O < D.length; O++) {
|
|
233
|
+
if (T = D[O], f += T.length, O !== D.length - 1) f += n.length;
|
|
234
|
+
else if (g) return R();
|
|
235
|
+
if (!r || T.substring(0, b) !== r) {
|
|
236
|
+
if (S) {
|
|
237
|
+
if (C = [], P(T.split(t)), z(), p) return R();
|
|
238
|
+
} else P(T.split(t));
|
|
239
|
+
if (a && a <= O) return C = C.slice(0, a), R(!0);
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
return R();
|
|
243
|
+
}
|
|
244
|
+
for (var k = s.indexOf(t, f), A = s.indexOf(n, f), j = new RegExp(m(d) + m(u), "g"), M = s.indexOf(u, f);;) if (s[f] === u) for (M = f, f++;;) {
|
|
245
|
+
if ((M = s.indexOf(u, M + 1)) === -1) return g || w.push({
|
|
246
|
+
type: "Quotes",
|
|
247
|
+
code: "MissingQuotes",
|
|
248
|
+
message: "Quoted field unterminated",
|
|
249
|
+
row: C.length,
|
|
250
|
+
index: f
|
|
251
|
+
}), I();
|
|
252
|
+
if (M === _ - 1) return I(s.substring(f, M).replace(j, u));
|
|
253
|
+
if (u === d && s[M + 1] === d) M++;
|
|
254
|
+
else if (u === d || M === 0 || s[M - 1] !== d) {
|
|
255
|
+
k !== -1 && k < M + 1 && (k = s.indexOf(t, M + 1));
|
|
256
|
+
var N = F((A = A !== -1 && A < M + 1 ? s.indexOf(n, M + 1) : A) === -1 ? k : Math.min(k, A));
|
|
257
|
+
if (s.substr(M + 1 + N, v) === t) {
|
|
258
|
+
T.push(s.substring(f, M).replace(j, u)), s[f = M + 1 + N + v] !== u && (M = s.indexOf(u, f)), k = s.indexOf(t, f), A = s.indexOf(n, f);
|
|
259
|
+
break;
|
|
260
|
+
}
|
|
261
|
+
if (N = F(A), s.substring(M + 1 + N, M + 1 + N + y) === n) {
|
|
262
|
+
if (T.push(s.substring(f, M).replace(j, u)), L(M + 1 + N + y), k = s.indexOf(t, f), M = s.indexOf(u, f), S && (z(), p)) return R();
|
|
263
|
+
if (a && C.length >= a) return R(!0);
|
|
264
|
+
break;
|
|
265
|
+
}
|
|
266
|
+
w.push({
|
|
267
|
+
type: "Quotes",
|
|
268
|
+
code: "InvalidQuotes",
|
|
269
|
+
message: "Trailing quote on quoted field is malformed",
|
|
270
|
+
row: C.length,
|
|
271
|
+
index: f
|
|
272
|
+
}), M++;
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
else if (r && T.length === 0 && s.substring(f, f + b) === r) {
|
|
276
|
+
if (A === -1) return R();
|
|
277
|
+
f = A + y, A = s.indexOf(n, f), k = s.indexOf(t, f);
|
|
278
|
+
} else if (k !== -1 && (k < A || A === -1)) T.push(s.substring(f, k)), f = k + v, k = s.indexOf(t, f);
|
|
279
|
+
else {
|
|
280
|
+
if (A === -1) break;
|
|
281
|
+
if (T.push(s.substring(f, A)), L(A + y), S && (z(), p)) return R();
|
|
282
|
+
if (a && C.length >= a) return R(!0);
|
|
283
|
+
}
|
|
284
|
+
return I();
|
|
285
|
+
function P(e) {
|
|
286
|
+
C.push(e), E = f;
|
|
287
|
+
}
|
|
288
|
+
function F(e) {
|
|
289
|
+
var t = 0;
|
|
290
|
+
return t = e !== -1 && (e = s.substring(M + 1, e)) && e.trim() === "" ? e.length : t;
|
|
291
|
+
}
|
|
292
|
+
function I(e) {
|
|
293
|
+
return g || (e === void 0 && (e = s.substring(f)), T.push(e), f = _, P(T), S && z()), R();
|
|
294
|
+
}
|
|
295
|
+
function L(e) {
|
|
296
|
+
f = e, P(T), T = [], A = s.indexOf(n, f);
|
|
297
|
+
}
|
|
298
|
+
function R(r) {
|
|
299
|
+
if (e.header && !h && C.length && !l) {
|
|
300
|
+
var i = C[0], a = Object.create(null), o = new Set(i);
|
|
301
|
+
let t = !1;
|
|
302
|
+
for (let n = 0; n < i.length; n++) {
|
|
303
|
+
let r = i[n];
|
|
304
|
+
if (a[r = x(e.transformHeader) ? e.transformHeader(r, n) : r]) {
|
|
305
|
+
let e, s = a[r];
|
|
306
|
+
for (; e = r + "_" + s, s++, o.has(e););
|
|
307
|
+
o.add(e), i[n] = e, a[r]++, t = !0, (c = c === null ? {} : c)[e] = r;
|
|
308
|
+
} else a[r] = 1, i[n] = r;
|
|
309
|
+
o.add(r);
|
|
310
|
+
}
|
|
311
|
+
t && console.warn("Duplicate headers found and renamed."), l = !0;
|
|
312
|
+
}
|
|
313
|
+
return {
|
|
314
|
+
data: C,
|
|
315
|
+
errors: w,
|
|
316
|
+
meta: {
|
|
317
|
+
delimiter: t,
|
|
318
|
+
linebreak: n,
|
|
319
|
+
aborted: p,
|
|
320
|
+
truncated: !!r,
|
|
321
|
+
cursor: E + (h || 0),
|
|
322
|
+
renamedHeaders: c
|
|
323
|
+
}
|
|
324
|
+
};
|
|
325
|
+
}
|
|
326
|
+
function z() {
|
|
327
|
+
i(R()), C = [], w = [];
|
|
328
|
+
}
|
|
329
|
+
}, this.abort = function() {
|
|
330
|
+
p = !0;
|
|
331
|
+
}, this.getCharIndex = function() {
|
|
332
|
+
return f;
|
|
333
|
+
};
|
|
334
|
+
}
|
|
335
|
+
function g(e) {
|
|
336
|
+
var t = e.data, n = a[t.workerId], r = !1;
|
|
337
|
+
if (t.error) n.userError(t.error, t.file);
|
|
338
|
+
else if (t.results && t.results.data) {
|
|
339
|
+
var i = {
|
|
340
|
+
abort: function() {
|
|
341
|
+
r = !0, _(t.workerId, {
|
|
342
|
+
data: [],
|
|
343
|
+
errors: [],
|
|
344
|
+
meta: { aborted: !0 }
|
|
345
|
+
});
|
|
346
|
+
},
|
|
347
|
+
pause: v,
|
|
348
|
+
resume: v
|
|
349
|
+
};
|
|
350
|
+
if (x(n.userStep)) {
|
|
351
|
+
for (var o = 0; o < t.results.data.length && (n.userStep({
|
|
352
|
+
data: t.results.data[o],
|
|
353
|
+
errors: t.results.errors,
|
|
354
|
+
meta: t.results.meta
|
|
355
|
+
}, i), !r); o++);
|
|
356
|
+
delete t.results;
|
|
357
|
+
} else x(n.userChunk) && (n.userChunk(t.results, i, t.file), delete t.results);
|
|
358
|
+
}
|
|
359
|
+
t.finished && !r && _(t.workerId, t.results);
|
|
360
|
+
}
|
|
361
|
+
function _(e, t) {
|
|
362
|
+
var n = a[e];
|
|
363
|
+
x(n.userComplete) && n.userComplete(t), n.terminate(), delete a[e];
|
|
364
|
+
}
|
|
365
|
+
function v() {
|
|
366
|
+
throw Error("Not implemented.");
|
|
367
|
+
}
|
|
368
|
+
function y(e) {
|
|
369
|
+
if (typeof e != "object" || !e) return e;
|
|
370
|
+
var t, n = Array.isArray(e) ? [] : {};
|
|
371
|
+
for (t in e) n[t] = y(e[t]);
|
|
372
|
+
return n;
|
|
373
|
+
}
|
|
374
|
+
function b(e, t) {
|
|
375
|
+
return function() {
|
|
376
|
+
e.apply(t, arguments);
|
|
377
|
+
};
|
|
378
|
+
}
|
|
379
|
+
function x(e) {
|
|
380
|
+
return typeof e == "function";
|
|
381
|
+
}
|
|
382
|
+
return s.parse = function(n, r) {
|
|
383
|
+
var i = (r ||= {}).dynamicTyping || !1;
|
|
384
|
+
if (x(i) && (r.dynamicTypingFunction = i, i = {}), r.dynamicTyping = i, r.transform = !!x(r.transform) && r.transform, !r.worker || !s.WORKERS_SUPPORTED) return i = null, s.NODE_STREAM_INPUT, typeof n == "string" ? (n = ((e) => e.charCodeAt(0) === 65279 ? e.slice(1) : e)(n), i = new (r.download ? l : d)(r)) : !0 === n.readable && x(n.read) && x(n.on) ? i = new f(r) : (t.File && n instanceof File || n instanceof Object) && (i = new u(r)), i.stream(n);
|
|
385
|
+
(i = (() => {
|
|
386
|
+
var n;
|
|
387
|
+
return !!s.WORKERS_SUPPORTED && (n = (() => {
|
|
388
|
+
var n = t.URL || t.webkitURL || null, r = e.toString();
|
|
389
|
+
return s.BLOB_URL ||= n.createObjectURL(new Blob([
|
|
390
|
+
"var global = (function() { if (typeof self !== 'undefined') { return self; } if (typeof window !== 'undefined') { return window; } if (typeof global !== 'undefined') { return global; } return {}; })(); global.IS_PAPA_WORKER=true; ",
|
|
391
|
+
"(",
|
|
392
|
+
r,
|
|
393
|
+
")();"
|
|
394
|
+
], { type: "text/javascript" }));
|
|
395
|
+
})(), (n = new t.Worker(n)).onmessage = g, n.id = o++, a[n.id] = n);
|
|
396
|
+
})()).userStep = r.step, i.userChunk = r.chunk, i.userComplete = r.complete, i.userError = r.error, r.step = x(r.step), r.chunk = x(r.chunk), r.complete = x(r.complete), r.error = x(r.error), delete r.worker, i.postMessage({
|
|
397
|
+
input: n,
|
|
398
|
+
config: r,
|
|
399
|
+
workerId: i.id
|
|
400
|
+
});
|
|
401
|
+
}, s.unparse = function(e, t) {
|
|
402
|
+
var n = !1, r = !0, i = ",", a = "\r\n", o = "\"", c = o + o, l = !1, u = null, d = !1, f = ((() => {
|
|
403
|
+
if (typeof t == "object") {
|
|
404
|
+
if (typeof t.delimiter != "string" || s.BAD_DELIMITERS.filter(function(e) {
|
|
405
|
+
return t.delimiter.indexOf(e) !== -1;
|
|
406
|
+
}).length || (i = t.delimiter), typeof t.quotes != "boolean" && typeof t.quotes != "function" && !Array.isArray(t.quotes) || (n = t.quotes), typeof t.skipEmptyLines != "boolean" && typeof t.skipEmptyLines != "string" || (l = t.skipEmptyLines), typeof t.newline == "string" && (a = t.newline), typeof t.quoteChar == "string" && (o = t.quoteChar), typeof t.header == "boolean" && (r = t.header), Array.isArray(t.columns)) {
|
|
407
|
+
if (t.columns.length === 0) throw Error("Option columns is empty");
|
|
408
|
+
u = t.columns;
|
|
409
|
+
}
|
|
410
|
+
t.escapeChar !== void 0 && (c = t.escapeChar + o), t.escapeFormulae instanceof RegExp ? d = t.escapeFormulae : typeof t.escapeFormulae == "boolean" && t.escapeFormulae && (d = /^[=+\-@\t\r].*$/);
|
|
411
|
+
}
|
|
412
|
+
})(), new RegExp(m(o), "g"));
|
|
413
|
+
if (typeof e == "string" && (e = JSON.parse(e)), Array.isArray(e)) {
|
|
414
|
+
if (!e.length || Array.isArray(e[0])) return p(null, e, l);
|
|
415
|
+
if (typeof e[0] == "object") return p(u || Object.keys(e[0]), e, l);
|
|
416
|
+
} else if (typeof e == "object") return typeof e.data == "string" && (e.data = JSON.parse(e.data)), Array.isArray(e.data) && (e.fields ||= e.meta && e.meta.fields || u, e.fields ||= Array.isArray(e.data[0]) ? e.fields : typeof e.data[0] == "object" ? Object.keys(e.data[0]) : [], Array.isArray(e.data[0]) || typeof e.data[0] == "object" || (e.data = [e.data])), p(e.fields || [], e.data || [], l);
|
|
417
|
+
throw Error("Unable to serialize unrecognized input");
|
|
418
|
+
function p(e, t, n) {
|
|
419
|
+
var o = "", s = (typeof e == "string" && (e = JSON.parse(e)), typeof t == "string" && (t = JSON.parse(t)), Array.isArray(e) && 0 < e.length), c = !Array.isArray(t[0]);
|
|
420
|
+
if (s && r) {
|
|
421
|
+
for (var l = 0; l < e.length; l++) 0 < l && (o += i), o += h(e[l], l);
|
|
422
|
+
0 < t.length && (o += a);
|
|
423
|
+
}
|
|
424
|
+
for (var u = 0; u < t.length; u++) {
|
|
425
|
+
var d = (s ? e : t[u]).length, f = !1, p = s ? Object.keys(t[u]).length === 0 : t[u].length === 0;
|
|
426
|
+
if (n && !s && (f = n === "greedy" ? t[u].join("").trim() === "" : t[u].length === 1 && t[u][0].length === 0), n === "greedy" && s) {
|
|
427
|
+
for (var m = [], g = 0; g < d; g++) {
|
|
428
|
+
var _ = c ? e[g] : g;
|
|
429
|
+
m.push(t[u][_]);
|
|
430
|
+
}
|
|
431
|
+
f = m.join("").trim() === "";
|
|
432
|
+
}
|
|
433
|
+
if (!f) {
|
|
434
|
+
for (var v = 0; v < d; v++) {
|
|
435
|
+
0 < v && !p && (o += i);
|
|
436
|
+
var y = s && c ? e[v] : v;
|
|
437
|
+
o += h(t[u][y], v);
|
|
438
|
+
}
|
|
439
|
+
u < t.length - 1 && (!n || 0 < d && !p) && (o += a);
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
return o;
|
|
443
|
+
}
|
|
444
|
+
function h(e, t) {
|
|
445
|
+
var r, a;
|
|
446
|
+
return e == null ? "" : e.constructor === Date ? JSON.stringify(e).slice(1, 25) : (a = !1, d && typeof e == "string" && d.test(e) && (e = "'" + e, a = !0), r = e.toString().replace(f, c), (a = a || !0 === n || typeof n == "function" && n(e, t) || Array.isArray(n) && n[t] || ((e, t) => {
|
|
447
|
+
for (var n = 0; n < t.length; n++) if (-1 < e.indexOf(t[n])) return !0;
|
|
448
|
+
return !1;
|
|
449
|
+
})(r, s.BAD_DELIMITERS) || -1 < r.indexOf(i) || r.charAt(0) === " " || r.charAt(r.length - 1) === " ") ? o + r + o : r);
|
|
450
|
+
}
|
|
451
|
+
}, s.RECORD_SEP = "", s.UNIT_SEP = "", s.BYTE_ORDER_MARK = "", s.BAD_DELIMITERS = [
|
|
452
|
+
"\r",
|
|
453
|
+
"\n",
|
|
454
|
+
"\"",
|
|
455
|
+
s.BYTE_ORDER_MARK
|
|
456
|
+
], s.WORKERS_SUPPORTED = !r && !!t.Worker, s.NODE_STREAM_INPUT = 1, s.LocalChunkSize = 10485760, s.RemoteChunkSize = 5242880, s.DefaultDelimiter = ",", s.Parser = h, s.ParserHandle = p, s.NetworkStreamer = l, s.FileStreamer = u, s.StringStreamer = d, s.ReadableStreamStreamer = f, t.jQuery && ((n = t.jQuery).fn.parse = function(e) {
|
|
457
|
+
var r = e.config || {}, i = [];
|
|
458
|
+
return this.each(function(e) {
|
|
459
|
+
if (!(n(this).prop("tagName").toUpperCase() === "INPUT" && n(this).attr("type").toLowerCase() === "file" && t.FileReader) || !this.files || this.files.length === 0) return !0;
|
|
460
|
+
for (var a = 0; a < this.files.length; a++) i.push({
|
|
461
|
+
file: this.files[a],
|
|
462
|
+
inputElem: this,
|
|
463
|
+
instanceConfig: n.extend({}, r)
|
|
464
|
+
});
|
|
465
|
+
}), a(), this;
|
|
466
|
+
function a() {
|
|
467
|
+
if (i.length === 0) x(e.complete) && e.complete();
|
|
468
|
+
else {
|
|
469
|
+
var t, r, a, c, l = i[0];
|
|
470
|
+
if (x(e.before)) {
|
|
471
|
+
var u = e.before(l.file, l.inputElem);
|
|
472
|
+
if (typeof u == "object") {
|
|
473
|
+
if (u.action === "abort") return t = "AbortError", r = l.file, a = l.inputElem, c = u.reason, void (x(e.error) && e.error({ name: t }, r, a, c));
|
|
474
|
+
if (u.action === "skip") return void o();
|
|
475
|
+
typeof u.config == "object" && (l.instanceConfig = n.extend(l.instanceConfig, u.config));
|
|
476
|
+
} else if (u === "skip") return void o();
|
|
477
|
+
}
|
|
478
|
+
var d = l.instanceConfig.complete;
|
|
479
|
+
l.instanceConfig.complete = function(e) {
|
|
480
|
+
x(d) && d(e, l.file, l.inputElem), o();
|
|
481
|
+
}, s.parse(l.file, l.instanceConfig);
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
function o() {
|
|
485
|
+
i.splice(0, 1), a();
|
|
486
|
+
}
|
|
487
|
+
}), i && (t.onmessage = function(e) {
|
|
488
|
+
e = e.data, s.WORKER_ID === void 0 && e && (s.WORKER_ID = e.workerId), typeof e.input == "string" ? t.postMessage({
|
|
489
|
+
workerId: s.WORKER_ID,
|
|
490
|
+
results: s.parse(e.input, e.config),
|
|
491
|
+
finished: !0
|
|
492
|
+
}) : (t.File && e.input instanceof File || e.input instanceof Object) && (e = s.parse(e.input, e.config)) && t.postMessage({
|
|
493
|
+
workerId: s.WORKER_ID,
|
|
494
|
+
results: e,
|
|
495
|
+
finished: !0
|
|
496
|
+
});
|
|
497
|
+
}), (l.prototype = Object.create(c.prototype)).constructor = l, (u.prototype = Object.create(c.prototype)).constructor = u, (d.prototype = Object.create(d.prototype)).constructor = d, (f.prototype = Object.create(c.prototype)).constructor = f, s;
|
|
498
|
+
});
|
|
499
|
+
})))(), 1), o = /* @__PURE__ */ e(i(), 1);
|
|
500
|
+
function s(e, t) {
|
|
501
|
+
let n = e.findIndex((e) => e.toLowerCase() === t.toLowerCase());
|
|
502
|
+
return n === -1 ? 0 : n;
|
|
503
|
+
}
|
|
504
|
+
function c(e) {
|
|
505
|
+
return e.every((e) => e.length < 64);
|
|
506
|
+
}
|
|
507
|
+
function l(e) {
|
|
508
|
+
return e.split(".").pop() || "";
|
|
509
|
+
}
|
|
510
|
+
function u(e) {
|
|
511
|
+
switch (l(e)) {
|
|
512
|
+
case "json": return "application/json";
|
|
513
|
+
case "jsonl": return "application/jsonl";
|
|
514
|
+
case "parquet": return "application/parquet";
|
|
515
|
+
case "csv": return "text/csv";
|
|
516
|
+
case "txt": return "text/plain";
|
|
517
|
+
case "pdf": return "application/pdf";
|
|
518
|
+
case "docx": return "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
|
|
519
|
+
case "zip": return "application/zip";
|
|
520
|
+
default: return "unknown";
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
function d(e) {
|
|
524
|
+
if (!Array.isArray(e)) return !1;
|
|
525
|
+
let t = e[0];
|
|
526
|
+
return typeof t == "object" && !!t && "role" in t && "content" in t && typeof t.role == "string" && typeof t.content == "string";
|
|
527
|
+
}
|
|
528
|
+
async function f(e, t, i) {
|
|
529
|
+
let l = e.type === "" ? u(e.name) : e.type;
|
|
530
|
+
if (l === "application/parquet") throw Error("Parquet loading is not currently supported in the browser. Please convert your data to JSONL format.");
|
|
531
|
+
if (l === "application/pdf") return n(e, t?.maxSize);
|
|
532
|
+
if (l === "application/vnd.openxmlformats-officedocument.wordprocessingml.document") return r(e);
|
|
533
|
+
if (l === "application/json") {
|
|
534
|
+
let t = await e.text(), n = JSON.parse(t);
|
|
535
|
+
if (Array.isArray(n)) return n.map((e) => [typeof e == "string" ? {
|
|
536
|
+
role: "text",
|
|
537
|
+
content: e
|
|
538
|
+
} : "text" in e ? {
|
|
539
|
+
role: "text",
|
|
540
|
+
content: e.text
|
|
541
|
+
} : {
|
|
542
|
+
role: "text",
|
|
543
|
+
content: JSON.stringify(e)
|
|
544
|
+
}]);
|
|
545
|
+
throw Error("Expected JSON array");
|
|
546
|
+
}
|
|
547
|
+
if (l === "application/jsonl") {
|
|
548
|
+
let t = await e.text();
|
|
549
|
+
return i && i(.1), t.split("\n").filter((e) => e.trim() !== "").map((e, t, n) => {
|
|
550
|
+
i && t % 1e3 == 0 && i(.1 + t / n.length * .9);
|
|
551
|
+
try {
|
|
552
|
+
let t = JSON.parse(e);
|
|
553
|
+
return d(t) ? t : [typeof t == "string" ? {
|
|
554
|
+
role: "text",
|
|
555
|
+
content: t
|
|
556
|
+
} : "text" in t ? {
|
|
557
|
+
role: "text",
|
|
558
|
+
content: t.text
|
|
559
|
+
} : {
|
|
560
|
+
role: "text",
|
|
561
|
+
content: JSON.stringify(t)
|
|
562
|
+
}];
|
|
563
|
+
} catch {
|
|
564
|
+
return [{
|
|
565
|
+
role: "text",
|
|
566
|
+
content: e
|
|
567
|
+
}];
|
|
568
|
+
}
|
|
569
|
+
});
|
|
570
|
+
}
|
|
571
|
+
if (l === "application/zip") {
|
|
572
|
+
let n = await o.default.loadAsync(e), r = [], a = Object.keys(n.files);
|
|
573
|
+
for (let e = 0; e < a.length; e++) {
|
|
574
|
+
let o = a[e], s = n.file(o);
|
|
575
|
+
if (s) {
|
|
576
|
+
let n = await s.async("blob", (t) => {
|
|
577
|
+
i && i(.1 + (t.percent / 100 * .9 / a.length + e / a.length * .9));
|
|
578
|
+
}), c = await f(new File([n], o), t);
|
|
579
|
+
i && i(.1 + (e + 1) / a.length * .9), r = r.concat(c);
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
return r;
|
|
583
|
+
}
|
|
584
|
+
if (l === "text/csv") {
|
|
585
|
+
let n = await e.text();
|
|
586
|
+
return i && i(.1), new Promise((e, r) => {
|
|
587
|
+
a.default.parse(n, {
|
|
588
|
+
header: !1,
|
|
589
|
+
skipEmptyLines: !0,
|
|
590
|
+
delimiter: ",",
|
|
591
|
+
complete: (n) => {
|
|
592
|
+
if (n.errors.length > 0) console.error(n.errors), r(/* @__PURE__ */ Error("Error parsing file"));
|
|
593
|
+
else {
|
|
594
|
+
let r = s(n.data[0], t?.column || "text");
|
|
595
|
+
e((t?.hasHeader ?? c(n.data[0]) ? n.data.slice(1) : n.data).map((e) => [{
|
|
596
|
+
role: "text",
|
|
597
|
+
content: e[r]
|
|
598
|
+
}]));
|
|
599
|
+
}
|
|
600
|
+
},
|
|
601
|
+
error: (e) => {
|
|
602
|
+
r(e);
|
|
603
|
+
}
|
|
604
|
+
});
|
|
605
|
+
});
|
|
606
|
+
} else if (l === "text/plain") return [[{
|
|
607
|
+
role: "text",
|
|
608
|
+
content: await e.text()
|
|
609
|
+
}]];
|
|
610
|
+
throw Error(`Unsupported file type: ${l}`);
|
|
611
|
+
}
|
|
612
|
+
//#endregion
|
|
613
|
+
export { f as default };
|