@genai-fi/nanogpt 0.19.0 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +9 -10
- package/dist/Generator.d.ts +0 -82
- package/dist/Generator.js +0 -11941
- package/dist/RealDiv-CGwv0liw.js +0 -365
- package/dist/Reshape-BW__R4mZ.js +0 -79
- package/dist/Reshape-CPBkTIH2.js +0 -14
- package/dist/TeachableLLM.d.ts +0 -70
- package/dist/TeachableLLM.js +0 -273
- package/dist/Trainer.d.ts +0 -43
- package/dist/Trainer.js +0 -244
- package/dist/_commonjsHelpers-ByX85dGu.js +0 -33
- package/dist/axis_util-GTVlo58H.js +0 -55
- package/dist/backend.d.ts +0 -2
- package/dist/backend.js +0 -13
- package/dist/backend_util-GaFarB78.js +0 -425
- package/dist/backend_webgpu-BqASlsbV.js +0 -545
- package/dist/binary_op_util-pKXltfxI.js +0 -192
- package/dist/broadcast_to-eS93CCN_.js +0 -28
- package/dist/checks/appendCache.d.ts +0 -1
- package/dist/checks/appendCache.js +0 -22
- package/dist/checks/attentionMask.d.ts +0 -1
- package/dist/checks/attentionMask.js +0 -37
- package/dist/checks/check.d.ts +0 -9
- package/dist/checks/check.js +0 -20
- package/dist/checks/gelu.d.ts +0 -1
- package/dist/checks/gelu.js +0 -18
- package/dist/checks/index.d.ts +0 -26
- package/dist/checks/index.js +0 -28
- package/dist/checks/matMulGelu.d.ts +0 -1
- package/dist/checks/matMulGelu.js +0 -28
- package/dist/checks/normRMS.d.ts +0 -1
- package/dist/checks/normRMS.js +0 -16
- package/dist/checks/normRMSGrad.d.ts +0 -1
- package/dist/checks/normRMSGrad.js +0 -12
- package/dist/checks/packUnpack.d.ts +0 -1
- package/dist/checks/packUnpack.js +0 -18
- package/dist/checks/qkv.d.ts +0 -1
- package/dist/checks/qkv.js +0 -34
- package/dist/checks/rope.d.ts +0 -1
- package/dist/checks/rope.js +0 -36
- package/dist/checks/weights.d.ts +0 -14
- package/dist/checks/weights.js +0 -31
- package/dist/clip_by_value-DDA7rrcT.js +0 -12
- package/dist/complex-DI35Q-gW.js +0 -11
- package/dist/complex_util-Yc1A_gV1.js +0 -55
- package/dist/concat-CAQpCret.js +0 -17
- package/dist/concat_util-D18dJ4fD.js +0 -22
- package/dist/data/docx.d.ts +0 -2
- package/dist/data/docx.js +0 -15
- package/dist/data/parquet.d.ts +0 -2
- package/dist/data/parquet.js +0 -17
- package/dist/data/pdf.d.ts +0 -2
- package/dist/data/pdf.js +0 -14
- package/dist/data/textLoader.d.ts +0 -7
- package/dist/data/textLoader.js +0 -108
- package/dist/dataset-CGGp1z9P.js +0 -1124
- package/dist/dropout_util--NxWuYg2.js +0 -27
- package/dist/expand_dims-Bkd1YD5x.js +0 -11
- package/dist/exports_initializers-CYzKLjN7.js +0 -7
- package/dist/floor-BQtb-Azg.js +0 -9
- package/dist/gather-qIqEqaGn.js +0 -9
- package/dist/gelu-B220X1Go.js +0 -26
- package/dist/gpgpu_math-BwvV12df.js +0 -2022
- package/dist/index-CUXkjxiT.js +0 -3516
- package/dist/index-CieiGp4Y.js +0 -349
- package/dist/index-CjOWnMXP.js +0 -7308
- package/dist/index-Cp39cXWe.js +0 -1016
- package/dist/index-D5v913EJ.js +0 -4
- package/dist/index-DmeWGGmS.js +0 -1074
- package/dist/index-DvYrXKkX.js +0 -113
- package/dist/index-Ksja3su6.js +0 -151
- package/dist/index-xuotMAFm.js +0 -118
- package/dist/inference/types.d.ts +0 -16
- package/dist/inference/types.js +0 -1
- package/dist/jszip.min-BZhlzntC.js +0 -2313
- package/dist/kernel_funcs_utils-pq0CK9co.js +0 -306
- package/dist/layers/BaseLayer.d.ts +0 -44
- package/dist/layers/BaseLayer.js +0 -74
- package/dist/layers/CausalSelfAttention.d.ts +0 -39
- package/dist/layers/CausalSelfAttention.js +0 -86
- package/dist/layers/LoRA.d.ts +0 -14
- package/dist/layers/LoRA.js +0 -58
- package/dist/layers/MLP.d.ts +0 -17
- package/dist/layers/MLP.js +0 -44
- package/dist/layers/PositionEmbedding.d.ts +0 -8
- package/dist/layers/PositionEmbedding.js +0 -31
- package/dist/layers/RMSNorm.d.ts +0 -12
- package/dist/layers/RMSNorm.js +0 -22
- package/dist/layers/RoPECache.d.ts +0 -18
- package/dist/layers/RoPECache.js +0 -50
- package/dist/layers/TiedEmbedding.d.ts +0 -13
- package/dist/layers/TiedEmbedding.js +0 -36
- package/dist/layers/TransformerBlock.d.ts +0 -27
- package/dist/layers/TransformerBlock.js +0 -40
- package/dist/layers/WeightStore.d.ts +0 -20
- package/dist/layers/WeightStore.js +0 -76
- package/dist/loader/load.d.ts +0 -6
- package/dist/loader/load.js +0 -68
- package/dist/loader/loadHF.d.ts +0 -8
- package/dist/loader/loadHF.js +0 -22
- package/dist/loader/loadTransformers.d.ts +0 -4
- package/dist/loader/loadTransformers.js +0 -44
- package/dist/loader/loadZipMeta.d.ts +0 -3
- package/dist/loader/loadZipMeta.js +0 -16
- package/dist/loader/newZipLoad.d.ts +0 -3
- package/dist/loader/newZipLoad.js +0 -31
- package/dist/loader/oldZipLoad.d.ts +0 -9
- package/dist/loader/oldZipLoad.js +0 -80
- package/dist/loader/save.d.ts +0 -16
- package/dist/loader/save.js +0 -90
- package/dist/loader/types.d.ts +0 -67
- package/dist/loader/types.js +0 -1
- package/dist/main.d.ts +0 -50
- package/dist/main.js +0 -109
- package/dist/matMul16-BcVC_E62.js +0 -80
- package/dist/matMulGelu-JNLZqKQp.js +0 -163
- package/dist/mat_mul-DhG0Newp.js +0 -11
- package/dist/mod-CSdCpRjf.js +0 -11
- package/dist/models/NanoGPTV1.d.ts +0 -16
- package/dist/models/NanoGPTV1.js +0 -99
- package/dist/models/NanoGPTV2.d.ts +0 -16
- package/dist/models/NanoGPTV2.js +0 -90
- package/dist/models/config.d.ts +0 -27
- package/dist/models/config.js +0 -50
- package/dist/models/factory.d.ts +0 -3
- package/dist/models/factory.js +0 -16
- package/dist/models/model.d.ts +0 -44
- package/dist/models/model.js +0 -134
- package/dist/non_max_suppression_impl-B2W7YjZB.js +0 -102
- package/dist/not_equal-hurPF26l.js +0 -64
- package/dist/ones-BytntneX.js +0 -14
- package/dist/ops/adamAdjust.d.ts +0 -2
- package/dist/ops/adamAdjust.js +0 -9
- package/dist/ops/adamMoments.d.ts +0 -2
- package/dist/ops/adamMoments.js +0 -9
- package/dist/ops/add16.d.ts +0 -2
- package/dist/ops/add16.js +0 -9
- package/dist/ops/appendCache.d.ts +0 -2
- package/dist/ops/appendCache.js +0 -22
- package/dist/ops/attentionMask.d.ts +0 -2
- package/dist/ops/attentionMask.js +0 -10
- package/dist/ops/concat16.d.ts +0 -2
- package/dist/ops/concat16.js +0 -9
- package/dist/ops/cpu/adamAdjust.d.ts +0 -1
- package/dist/ops/cpu/adamAdjust.js +0 -18
- package/dist/ops/cpu/adamMoments.d.ts +0 -1
- package/dist/ops/cpu/adamMoments.js +0 -16
- package/dist/ops/cpu/appendCache.d.ts +0 -1
- package/dist/ops/cpu/appendCache.js +0 -23
- package/dist/ops/cpu/attentionMask.d.ts +0 -1
- package/dist/ops/cpu/attentionMask.js +0 -22
- package/dist/ops/cpu/fusedSoftmax.d.ts +0 -9
- package/dist/ops/cpu/fusedSoftmax.js +0 -29
- package/dist/ops/cpu/gatherSub.d.ts +0 -1
- package/dist/ops/cpu/gatherSub.js +0 -18
- package/dist/ops/cpu/gelu.d.ts +0 -1
- package/dist/ops/cpu/gelu.js +0 -40
- package/dist/ops/cpu/matMul16.d.ts +0 -1
- package/dist/ops/cpu/matMul16.js +0 -15
- package/dist/ops/cpu/matMulGelu.d.ts +0 -1
- package/dist/ops/cpu/matMulGelu.js +0 -53
- package/dist/ops/cpu/matMulMul.d.ts +0 -1
- package/dist/ops/cpu/matMulMul.js +0 -23
- package/dist/ops/cpu/mulDropout.d.ts +0 -1
- package/dist/ops/cpu/mulDropout.js +0 -23
- package/dist/ops/cpu/normRMS.d.ts +0 -1
- package/dist/ops/cpu/normRMS.js +0 -39
- package/dist/ops/cpu/qkv.d.ts +0 -5
- package/dist/ops/cpu/qkv.js +0 -41
- package/dist/ops/cpu/rope.d.ts +0 -6
- package/dist/ops/cpu/rope.js +0 -38
- package/dist/ops/cpu/scatterSub.d.ts +0 -1
- package/dist/ops/cpu/scatterSub.js +0 -23
- package/dist/ops/dot16.d.ts +0 -2
- package/dist/ops/dot16.js +0 -42
- package/dist/ops/dropout.d.ts +0 -2
- package/dist/ops/dropout.js +0 -14
- package/dist/ops/dropout16.d.ts +0 -2
- package/dist/ops/dropout16.js +0 -25
- package/dist/ops/gatherSub.d.ts +0 -2
- package/dist/ops/gatherSub.js +0 -9
- package/dist/ops/gelu.d.ts +0 -3
- package/dist/ops/gelu.js +0 -8
- package/dist/ops/globalNorm.d.ts +0 -2
- package/dist/ops/globalNorm.js +0 -13
- package/dist/ops/grads/add16.d.ts +0 -1
- package/dist/ops/grads/add16.js +0 -26
- package/dist/ops/grads/attentionMask.d.ts +0 -1
- package/dist/ops/grads/attentionMask.js +0 -21
- package/dist/ops/grads/dropout16.d.ts +0 -1
- package/dist/ops/grads/dropout16.js +0 -2
- package/dist/ops/grads/gelu.d.ts +0 -2
- package/dist/ops/grads/gelu.js +0 -5
- package/dist/ops/grads/matMul16.d.ts +0 -2
- package/dist/ops/grads/matMul16.js +0 -9
- package/dist/ops/grads/matMulGelu.d.ts +0 -1
- package/dist/ops/grads/matMulGelu.js +0 -17
- package/dist/ops/grads/mul16.d.ts +0 -1
- package/dist/ops/grads/mul16.js +0 -4
- package/dist/ops/grads/normRMS.d.ts +0 -3
- package/dist/ops/grads/normRMS.js +0 -33
- package/dist/ops/grads/pack16.d.ts +0 -2
- package/dist/ops/grads/pack16.js +0 -6
- package/dist/ops/grads/qkv.d.ts +0 -3
- package/dist/ops/grads/qkv.js +0 -34
- package/dist/ops/grads/rope.d.ts +0 -2
- package/dist/ops/grads/rope.js +0 -5
- package/dist/ops/grads/softmax16.d.ts +0 -2
- package/dist/ops/grads/softmax16.js +0 -25
- package/dist/ops/grads/unpack16.d.ts +0 -2
- package/dist/ops/grads/unpack16.js +0 -5
- package/dist/ops/grads/utils.d.ts +0 -4
- package/dist/ops/grads/utils.js +0 -14
- package/dist/ops/log.d.ts +0 -0
- package/dist/ops/log.js +0 -1
- package/dist/ops/matMul16.d.ts +0 -15
- package/dist/ops/matMul16.js +0 -13
- package/dist/ops/matMulGelu.d.ts +0 -3
- package/dist/ops/matMulGelu.js +0 -14
- package/dist/ops/matMulMul.d.ts +0 -2
- package/dist/ops/matMulMul.js +0 -9
- package/dist/ops/mul16.d.ts +0 -2
- package/dist/ops/mul16.js +0 -39
- package/dist/ops/mulDrop.d.ts +0 -2
- package/dist/ops/mulDrop.js +0 -9
- package/dist/ops/normRMS.d.ts +0 -2
- package/dist/ops/normRMS.js +0 -19
- package/dist/ops/pack16.d.ts +0 -2
- package/dist/ops/pack16.js +0 -5
- package/dist/ops/qkv.d.ts +0 -2
- package/dist/ops/qkv.js +0 -10
- package/dist/ops/reshape16.d.ts +0 -2
- package/dist/ops/reshape16.js +0 -41
- package/dist/ops/rope.d.ts +0 -3
- package/dist/ops/rope.js +0 -7
- package/dist/ops/scatterSub.d.ts +0 -2
- package/dist/ops/scatterSub.js +0 -9
- package/dist/ops/slice16.d.ts +0 -2
- package/dist/ops/slice16.js +0 -9
- package/dist/ops/softmax16.d.ts +0 -2
- package/dist/ops/softmax16.js +0 -9
- package/dist/ops/sub16.d.ts +0 -2
- package/dist/ops/sub16.js +0 -8
- package/dist/ops/sum16.d.ts +0 -2
- package/dist/ops/sum16.js +0 -13
- package/dist/ops/transpose16.d.ts +0 -3
- package/dist/ops/transpose16.js +0 -40
- package/dist/ops/unpack16.d.ts +0 -2
- package/dist/ops/unpack16.js +0 -6
- package/dist/ops/webgl/adamAdjust.d.ts +0 -1
- package/dist/ops/webgl/adamAdjust.js +0 -49
- package/dist/ops/webgl/adamMoments.d.ts +0 -1
- package/dist/ops/webgl/adamMoments.js +0 -40
- package/dist/ops/webgl/appendCache.d.ts +0 -1
- package/dist/ops/webgl/appendCache.js +0 -44
- package/dist/ops/webgl/attentionMask.d.ts +0 -1
- package/dist/ops/webgl/attentionMask.js +0 -45
- package/dist/ops/webgl/dropout16.d.ts +0 -1
- package/dist/ops/webgl/dropout16.js +0 -11
- package/dist/ops/webgl/fusedSoftmax.d.ts +0 -11
- package/dist/ops/webgl/fusedSoftmax.js +0 -80
- package/dist/ops/webgl/gatherSub.d.ts +0 -1
- package/dist/ops/webgl/gatherSub.js +0 -27
- package/dist/ops/webgl/gelu.d.ts +0 -2
- package/dist/ops/webgl/gelu.js +0 -50
- package/dist/ops/webgl/log.d.ts +0 -17
- package/dist/ops/webgl/log.js +0 -23
- package/dist/ops/webgl/matMul16.d.ts +0 -1
- package/dist/ops/webgl/matMul16.js +0 -45
- package/dist/ops/webgl/matMulGelu.d.ts +0 -21
- package/dist/ops/webgl/matMulGelu.js +0 -9
- package/dist/ops/webgl/matMulMul.d.ts +0 -14
- package/dist/ops/webgl/matMulMul.js +0 -28
- package/dist/ops/webgl/mulDropout.d.ts +0 -1
- package/dist/ops/webgl/mulDropout.js +0 -41
- package/dist/ops/webgl/normRMS.d.ts +0 -1
- package/dist/ops/webgl/normRMS.js +0 -93
- package/dist/ops/webgl/qkv.d.ts +0 -1
- package/dist/ops/webgl/qkv.js +0 -46
- package/dist/ops/webgl/rope.d.ts +0 -1
- package/dist/ops/webgl/rope.js +0 -56
- package/dist/ops/webgl/scatterSub.d.ts +0 -1
- package/dist/ops/webgl/scatterSub.js +0 -27
- package/dist/ops/webgpu/adamAdjust.d.ts +0 -1
- package/dist/ops/webgpu/adamAdjust.js +0 -57
- package/dist/ops/webgpu/adamMoments.d.ts +0 -1
- package/dist/ops/webgpu/adamMoments.js +0 -60
- package/dist/ops/webgpu/add16.d.ts +0 -1
- package/dist/ops/webgpu/add16.js +0 -13
- package/dist/ops/webgpu/appendCache.d.ts +0 -1
- package/dist/ops/webgpu/appendCache.js +0 -105
- package/dist/ops/webgpu/attentionMask.d.ts +0 -1
- package/dist/ops/webgpu/attentionMask.js +0 -26
- package/dist/ops/webgpu/attentionMask32_program.d.ts +0 -19
- package/dist/ops/webgpu/attentionMask32_program.js +0 -54
- package/dist/ops/webgpu/clipScale.d.ts +0 -1
- package/dist/ops/webgpu/clipScale.js +0 -58
- package/dist/ops/webgpu/concat16.d.ts +0 -19
- package/dist/ops/webgpu/concat16.js +0 -126
- package/dist/ops/webgpu/dropout16.d.ts +0 -1
- package/dist/ops/webgpu/dropout16.js +0 -51
- package/dist/ops/webgpu/gatherSub.d.ts +0 -1
- package/dist/ops/webgpu/gatherSub.js +0 -39
- package/dist/ops/webgpu/gelu.d.ts +0 -14
- package/dist/ops/webgpu/gelu.js +0 -141
- package/dist/ops/webgpu/index.d.ts +0 -0
- package/dist/ops/webgpu/index.js +0 -26
- package/dist/ops/webgpu/matMul16.d.ts +0 -1
- package/dist/ops/webgpu/matMul16.js +0 -65
- package/dist/ops/webgpu/matMul16_program.d.ts +0 -42
- package/dist/ops/webgpu/matMul16_program.js +0 -343
- package/dist/ops/webgpu/mul16.d.ts +0 -1
- package/dist/ops/webgpu/mul16.js +0 -13
- package/dist/ops/webgpu/norm2.d.ts +0 -1
- package/dist/ops/webgpu/norm2.js +0 -76
- package/dist/ops/webgpu/normRMS.d.ts +0 -1
- package/dist/ops/webgpu/normRMS.js +0 -34
- package/dist/ops/webgpu/normRMS16_program.d.ts +0 -10
- package/dist/ops/webgpu/normRMS16_program.js +0 -25
- package/dist/ops/webgpu/normRMS32_program.d.ts +0 -10
- package/dist/ops/webgpu/normRMS32_program.js +0 -25
- package/dist/ops/webgpu/normRMSGrad.d.ts +0 -1
- package/dist/ops/webgpu/normRMSGrad.js +0 -284
- package/dist/ops/webgpu/pack16.d.ts +0 -1
- package/dist/ops/webgpu/pack16.js +0 -18
- package/dist/ops/webgpu/pack16_program.d.ts +0 -19
- package/dist/ops/webgpu/pack16_program.js +0 -92
- package/dist/ops/webgpu/qkv.d.ts +0 -1
- package/dist/ops/webgpu/qkv.js +0 -24
- package/dist/ops/webgpu/rope.d.ts +0 -1
- package/dist/ops/webgpu/rope.js +0 -135
- package/dist/ops/webgpu/scatterSub.d.ts +0 -1
- package/dist/ops/webgpu/scatterSub.js +0 -40
- package/dist/ops/webgpu/slice16.d.ts +0 -7
- package/dist/ops/webgpu/slice16.js +0 -69
- package/dist/ops/webgpu/softmax16.d.ts +0 -17
- package/dist/ops/webgpu/softmax16.js +0 -21
- package/dist/ops/webgpu/softmax16_program.d.ts +0 -13
- package/dist/ops/webgpu/softmax16_program.js +0 -73
- package/dist/ops/webgpu/softmax16_subgroup_program.d.ts +0 -17
- package/dist/ops/webgpu/softmax16_subgroup_program.js +0 -75
- package/dist/ops/webgpu/softmax16grad.d.ts +0 -1
- package/dist/ops/webgpu/softmax16grad.js +0 -37
- package/dist/ops/webgpu/sub16.d.ts +0 -1
- package/dist/ops/webgpu/sub16.js +0 -13
- package/dist/ops/webgpu/sum16.d.ts +0 -1
- package/dist/ops/webgpu/sum16.js +0 -38
- package/dist/ops/webgpu/transpose16.d.ts +0 -1
- package/dist/ops/webgpu/transpose16.js +0 -34
- package/dist/ops/webgpu/transpose16_program.d.ts +0 -16
- package/dist/ops/webgpu/transpose16_program.js +0 -50
- package/dist/ops/webgpu/transpose16_shared_program.d.ts +0 -15
- package/dist/ops/webgpu/transpose16_shared_program.js +0 -70
- package/dist/ops/webgpu/unpack16.d.ts +0 -1
- package/dist/ops/webgpu/unpack16.js +0 -48
- package/dist/ops/webgpu/utils/binary_op.d.ts +0 -35
- package/dist/ops/webgpu/utils/binary_op.js +0 -139
- package/dist/ops/webgpu/utils/deviceInfo.d.ts +0 -7
- package/dist/ops/webgpu/utils/deviceInfo.js +0 -11
- package/dist/ops/webgpu/utils/reductions.d.ts +0 -43
- package/dist/ops/webgpu/utils/reductions.js +0 -275
- package/dist/ops-CsXeTq1P.js +0 -476
- package/dist/pack16-bqltoUlR.js +0 -39
- package/dist/papaparse.min-C0cScC2i.js +0 -418
- package/dist/parquet-Bqjmp2vo.js +0 -44231
- package/dist/patches/webgpu_backend.d.ts +0 -18
- package/dist/patches/webgpu_backend.js +0 -56
- package/dist/patches/webgpu_base.d.ts +0 -21
- package/dist/patches/webgpu_base.js +0 -34
- package/dist/patches/webgpu_program.d.ts +0 -36
- package/dist/patches/webgpu_program.js +0 -400
- package/dist/pdf-NIhmP3sq.js +0 -19477
- package/dist/rand_util-CZ7yLoUm.js +0 -50
- package/dist/random_normal-IBRrha8a.js +0 -14
- package/dist/random_width-DN5ZtQkM.js +0 -9796
- package/dist/range-C-CjF-LI.js +0 -10
- package/dist/relu-J_X6MUzx.js +0 -9
- package/dist/reshape-BDOuCSNW.js +0 -9
- package/dist/resize_nearest_neighbor-BojqlfRe.js +0 -150
- package/dist/rope-DcrZM_e6.js +0 -24
- package/dist/scatter_nd_util-ByNJaL6I.js +0 -46
- package/dist/segment_util-Dasb2Zaf.js +0 -43
- package/dist/selu_util-BLhIqRkw.js +0 -44
- package/dist/shared-3agzAqQ_.js +0 -53
- package/dist/shared-CagdqkLh.js +0 -2143
- package/dist/slice-BzS11Qh0.js +0 -12
- package/dist/slice_util-CC35pLmT.js +0 -153
- package/dist/softmax-D4q1LJN7.js +0 -12
- package/dist/split-C2Sj255c.js +0 -9
- package/dist/squeeze-ho4wLUek.js +0 -10
- package/dist/stack-DudVrtmG.js +0 -11
- package/dist/step-BTxPtq1r.js +0 -261
- package/dist/sum-BpiwSWvg.js +0 -11
- package/dist/tensor-BWFldCso.js +0 -8
- package/dist/tensor1d-LMGMIUlr.js +0 -11
- package/dist/tensor2d-BnXMKScO.js +0 -14
- package/dist/tensor4d-C6UCG_u8.js +0 -14
- package/dist/tfjs_backend-BGnG-ppu.js +0 -654
- package/dist/tile-CFy-xTO6.js +0 -11
- package/dist/tokeniser/BaseTokeniser.d.ts +0 -33
- package/dist/tokeniser/BaseTokeniser.js +0 -124
- package/dist/tokeniser/CharTokeniser.d.ts +0 -24
- package/dist/tokeniser/CharTokeniser.js +0 -107
- package/dist/tokeniser/bpe.d.ts +0 -28
- package/dist/tokeniser/bpe.js +0 -173
- package/dist/tokeniser/messages.d.ts +0 -61
- package/dist/tokeniser/messages.js +0 -1
- package/dist/tokeniser/type.d.ts +0 -34
- package/dist/tokeniser/type.js +0 -1
- package/dist/training/AdamW.d.ts +0 -36
- package/dist/training/AdamW.js +0 -138
- package/dist/training/BasicTrainer.d.ts +0 -63
- package/dist/training/BasicTrainer.js +0 -265
- package/dist/training/DatasetBuilder.d.ts +0 -26
- package/dist/training/DatasetBuilder.js +0 -86
- package/dist/training/Evaluator.d.ts +0 -19
- package/dist/training/Evaluator.js +0 -39
- package/dist/training/LRScheduler.d.ts +0 -12
- package/dist/training/LRScheduler.js +0 -34
- package/dist/training/PreTrainer.d.ts +0 -11
- package/dist/training/PreTrainer.js +0 -20
- package/dist/training/SFTTrainer.d.ts +0 -12
- package/dist/training/SFTTrainer.js +0 -22
- package/dist/training/loss.d.ts +0 -3
- package/dist/training/loss.js +0 -24
- package/dist/training/orthoGrad.d.ts +0 -2
- package/dist/training/orthoGrad.js +0 -10
- package/dist/training/sparseCrossEntropy.d.ts +0 -7
- package/dist/training/sparseCrossEntropy.js +0 -69
- package/dist/training/tasks/ConversationTask.d.ts +0 -18
- package/dist/training/tasks/ConversationTask.js +0 -40
- package/dist/training/tasks/PretrainingTask.d.ts +0 -17
- package/dist/training/tasks/PretrainingTask.js +0 -47
- package/dist/training/tasks/StartSentenceTask.d.ts +0 -18
- package/dist/training/tasks/StartSentenceTask.js +0 -49
- package/dist/training/tasks/Task.d.ts +0 -22
- package/dist/training/tasks/Task.js +0 -68
- package/dist/training/tasks/splitter.d.ts +0 -5
- package/dist/training/tasks/splitter.js +0 -21
- package/dist/training/types.d.ts +0 -78
- package/dist/training/types.js +0 -1
- package/dist/training/validation.d.ts +0 -17
- package/dist/training/validation.js +0 -84
- package/dist/transpose-9kRxIXWR.js +0 -36
- package/dist/unsorted_segment_sum-DJvk5xnh.js +0 -277
- package/dist/utilities/arrayClose.d.ts +0 -1
- package/dist/utilities/arrayClose.js +0 -20
- package/dist/utilities/datasetID.d.ts +0 -2
- package/dist/utilities/datasetID.js +0 -21
- package/dist/utilities/dummy.d.ts +0 -9
- package/dist/utilities/dummy.js +0 -43
- package/dist/utilities/multinomialCPU.d.ts +0 -2
- package/dist/utilities/multinomialCPU.js +0 -13
- package/dist/utilities/naming.d.ts +0 -4
- package/dist/utilities/naming.js +0 -1
- package/dist/utilities/packed.d.ts +0 -4
- package/dist/utilities/packed.js +0 -15
- package/dist/utilities/parameters.d.ts +0 -11
- package/dist/utilities/parameters.js +0 -57
- package/dist/utilities/performance.d.ts +0 -2
- package/dist/utilities/performance.js +0 -16
- package/dist/utilities/profile.d.ts +0 -17
- package/dist/utilities/profile.js +0 -38
- package/dist/utilities/safetensors.d.ts +0 -3
- package/dist/utilities/safetensors.js +0 -83
- package/dist/utilities/sentences.d.ts +0 -5
- package/dist/utilities/sentences.js +0 -41
- package/dist/utilities/tokenParse.d.ts +0 -1
- package/dist/utilities/tokenParse.js +0 -21
- package/dist/utilities/topP.d.ts +0 -1
- package/dist/utilities/topP.js +0 -13
- package/dist/utilities/waitForModel.d.ts +0 -2
- package/dist/utilities/waitForModel.js +0 -12
- package/dist/utilities/weights.d.ts +0 -12
- package/dist/utilities/weights.js +0 -45
- package/dist/utilities/yielder.d.ts +0 -1
- package/dist/utilities/yielder.js +0 -7
- package/dist/variable-Ck482e3n.js +0 -7
- package/dist/webgpu_program-B4HmApL1.js +0 -525
- package/dist/webgpu_util-DYlGSwOJ.js +0 -64
- package/dist/zeros-DvZpK8s6.js +0 -13
- package/dist/zeros_like-CWjDdwr-.js +0 -721
|
@@ -1,80 +0,0 @@
|
|
|
1
|
-
import { aa as y } from "../index-CUXkjxiT.js";
|
|
2
|
-
import "../random_width-DN5ZtQkM.js";
|
|
3
|
-
import "../zeros_like-CWjDdwr-.js";
|
|
4
|
-
import "../Generator.js";
|
|
5
|
-
import "../index-Cp39cXWe.js";
|
|
6
|
-
import "../dataset-CGGp1z9P.js";
|
|
7
|
-
import "../ops/cpu/attentionMask.js";
|
|
8
|
-
import "../ops/webgl/attentionMask.js";
|
|
9
|
-
import "../ops/grads/attentionMask.js";
|
|
10
|
-
import "../ops/cpu/rope.js";
|
|
11
|
-
import "../ops/webgl/rope.js";
|
|
12
|
-
import "../rope-DcrZM_e6.js";
|
|
13
|
-
import "../ops/cpu/appendCache.js";
|
|
14
|
-
import "../ops/webgl/appendCache.js";
|
|
15
|
-
import "../ops/grads/softmax16.js";
|
|
16
|
-
import "../matMul16-BcVC_E62.js";
|
|
17
|
-
import "../ops/webgl/matMul16.js";
|
|
18
|
-
import "../ops/cpu/matMul16.js";
|
|
19
|
-
import "../pack16-bqltoUlR.js";
|
|
20
|
-
import "../ops/transpose16.js";
|
|
21
|
-
import "../ops/reshape16.js";
|
|
22
|
-
import "../ops/cpu/qkv.js";
|
|
23
|
-
import "../ops/webgl/qkv.js";
|
|
24
|
-
import "../ops/grads/qkv.js";
|
|
25
|
-
import "../ops/cpu/normRMS.js";
|
|
26
|
-
import "../ops/webgl/normRMS.js";
|
|
27
|
-
import "../ops/grads/normRMS.js";
|
|
28
|
-
import "../ops/dropout16.js";
|
|
29
|
-
import "../ops/webgl/dropout16.js";
|
|
30
|
-
import "../ops/grads/add16.js";
|
|
31
|
-
import "../jszip.min-BZhlzntC.js";
|
|
32
|
-
import g from "../tokeniser/CharTokeniser.js";
|
|
33
|
-
import k from "../tokeniser/bpe.js";
|
|
34
|
-
import { dummyPassAsync as u } from "../utilities/dummy.js";
|
|
35
|
-
import b from "../models/factory.js";
|
|
36
|
-
import "../ops/cpu/adamAdjust.js";
|
|
37
|
-
import "../ops/webgl/adamAdjust.js";
|
|
38
|
-
import "../ops/cpu/adamMoments.js";
|
|
39
|
-
import "../ops/webgl/adamMoments.js";
|
|
40
|
-
import "../index-DvYrXKkX.js";
|
|
41
|
-
import "../ops/cpu/gatherSub.js";
|
|
42
|
-
import "../ops/webgl/gatherSub.js";
|
|
43
|
-
import "../ops/cpu/scatterSub.js";
|
|
44
|
-
import "../ops/webgl/scatterSub.js";
|
|
45
|
-
import "../papaparse.min-C0cScC2i.js";
|
|
46
|
-
import "../ops/cpu/matMulGelu.js";
|
|
47
|
-
import "../matMulGelu-JNLZqKQp.js";
|
|
48
|
-
import "../ops/grads/matMulGelu.js";
|
|
49
|
-
import "../ops/cpu/gelu.js";
|
|
50
|
-
import "../ops/webgl/gelu.js";
|
|
51
|
-
import "../gelu-B220X1Go.js";
|
|
52
|
-
import "../ops/webgl/log.js";
|
|
53
|
-
import "../checks/normRMS.js";
|
|
54
|
-
import "../checks/normRMSGrad.js";
|
|
55
|
-
import { importWeights as M } from "../utilities/weights.js";
|
|
56
|
-
async function jt(r, m) {
|
|
57
|
-
const p = /* @__PURE__ */ new Map(), s = await r.file("manifest.json")?.async("string");
|
|
58
|
-
if (!s)
|
|
59
|
-
throw new Error("Manifest file not found in the zip archive");
|
|
60
|
-
const a = JSON.parse(s);
|
|
61
|
-
for (const [t, o] of Object.entries(a.weightSpec))
|
|
62
|
-
p.set(t, { spec: o, data: new Float32Array() });
|
|
63
|
-
const c = await r.file("tokeniser.json")?.async("string");
|
|
64
|
-
if (!c)
|
|
65
|
-
throw new Error("Tokeniser file not found in the zip archive");
|
|
66
|
-
const i = JSON.parse(c), l = (i.type ?? "char") === "char" ? new g(i.vocab) : new k(i.vocab, i.merges), f = /* @__PURE__ */ new Map();
|
|
67
|
-
for (const t of Object.keys(r.files))
|
|
68
|
-
if (t.endsWith(".bin")) {
|
|
69
|
-
const o = t.replace(".bin", ""), w = await r.file(t).async("arraybuffer"), d = new Float32Array(w), n = p.get(o) || { spec: [], data: new Float32Array() };
|
|
70
|
-
n.data = d, p.set(o, n);
|
|
71
|
-
const h = await M(n);
|
|
72
|
-
f.set(o, h);
|
|
73
|
-
}
|
|
74
|
-
y();
|
|
75
|
-
const e = b(a.config);
|
|
76
|
-
return e.metaData = m, await u(e), e.weightStore.loadWeights(f, !!m.url), { model: e, tokeniser: l, metaData: m };
|
|
77
|
-
}
|
|
78
|
-
export {
|
|
79
|
-
jt as default
|
|
80
|
-
};
|
package/dist/loader/save.d.ts
DELETED
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
import { ITokeniser } from '../tokeniser/type';
|
|
2
|
-
import { default as Model, ModelForwardAttributes } from '../models/model';
|
|
3
|
-
import { AdamWOptimizer } from '../training/AdamW';
|
|
4
|
-
import { TrainingLogEntry } from '../training/types';
|
|
5
|
-
import { GPTConfig } from '../models/config';
|
|
6
|
-
export interface SaveOptions {
|
|
7
|
-
name?: string;
|
|
8
|
-
metadata?: Record<string, unknown>;
|
|
9
|
-
files?: Record<string, unknown>;
|
|
10
|
-
includeOptimizer?: boolean;
|
|
11
|
-
}
|
|
12
|
-
export interface ExtraSaveItems {
|
|
13
|
-
optimizer?: AdamWOptimizer;
|
|
14
|
-
trainingLog?: TrainingLogEntry[];
|
|
15
|
-
}
|
|
16
|
-
export declare function saveModel(model: Model<ModelForwardAttributes, GPTConfig>, tokeniser: ITokeniser, options?: SaveOptions, extraItems?: ExtraSaveItems): Promise<Blob>;
|
package/dist/loader/save.js
DELETED
|
@@ -1,90 +0,0 @@
|
|
|
1
|
-
import { z as y } from "../jszip.min-BZhlzntC.js";
|
|
2
|
-
import _ from "../tokeniser/CharTokeniser.js";
|
|
3
|
-
import { save_safetensors as b } from "../utilities/safetensors.js";
|
|
4
|
-
import { VERSION as h } from "./load.js";
|
|
5
|
-
function m(i) {
|
|
6
|
-
if (i.length > 1e3) {
|
|
7
|
-
const n = Math.ceil(i.length / 1e3);
|
|
8
|
-
return i.filter((o, a) => a % n === 0 || a === i.length - 1);
|
|
9
|
-
}
|
|
10
|
-
return i;
|
|
11
|
-
}
|
|
12
|
-
async function u(i, n, o, a) {
|
|
13
|
-
const s = /* @__PURE__ */ new Map();
|
|
14
|
-
i.weightStore.saveWeights(s);
|
|
15
|
-
const e = new y();
|
|
16
|
-
if (a?.optimizer) {
|
|
17
|
-
const t = await a.optimizer.saveMoments();
|
|
18
|
-
e.file("optimizer.safetensors", t, { binary: !0 }), e.file("optimizer_config.json", JSON.stringify(a.optimizer.serializeConfig()), {
|
|
19
|
-
binary: !1
|
|
20
|
-
});
|
|
21
|
-
}
|
|
22
|
-
a?.trainingLog && e.file("training_log.json", JSON.stringify(m(a.trainingLog), void 0, 4), {
|
|
23
|
-
binary: !1
|
|
24
|
-
});
|
|
25
|
-
const g = {};
|
|
26
|
-
s.forEach((t, f) => {
|
|
27
|
-
t.length === 1 && (g[f] = t[0]);
|
|
28
|
-
});
|
|
29
|
-
const c = await b(g);
|
|
30
|
-
e.file("model.safetensors", c, { binary: !0 });
|
|
31
|
-
const p = i.config.modelType;
|
|
32
|
-
let r;
|
|
33
|
-
p === "GenAI_NanoGPT_v1" ? r = {
|
|
34
|
-
model_type: "GenAI_NanoGPT_v1",
|
|
35
|
-
vocab_size: n.getVocab().length,
|
|
36
|
-
hidden_size: i.config.nEmbed,
|
|
37
|
-
num_hidden_layers: i.config.nLayer,
|
|
38
|
-
num_attention_heads: i.config.nHead,
|
|
39
|
-
block_size: i.config.blockSize,
|
|
40
|
-
mlpFactor: i.config.mlpFactor,
|
|
41
|
-
useRope: i.config.useRope
|
|
42
|
-
} : r = {
|
|
43
|
-
model_type: "GenAI_NanoGPT_v2",
|
|
44
|
-
vocab_size: n.getVocab().length,
|
|
45
|
-
hidden_size: i.config.nEmbed,
|
|
46
|
-
num_hidden_layers: i.config.nLayer,
|
|
47
|
-
num_attention_heads: i.config.nHead,
|
|
48
|
-
block_size: i.config.blockSize,
|
|
49
|
-
mlpFactor: i.config.mlpFactor,
|
|
50
|
-
loraConfig: i.config.loraConfig ? Object.fromEntries(i.config.loraConfig) : void 0,
|
|
51
|
-
loraName: i.config.loraName,
|
|
52
|
-
windowSize: i.config.windowSize
|
|
53
|
-
}, e.file("config.json", JSON.stringify(r, void 0, 4), {
|
|
54
|
-
binary: !1
|
|
55
|
-
});
|
|
56
|
-
const l = {
|
|
57
|
-
version: h,
|
|
58
|
-
application: "@genai-fi/nanogpt",
|
|
59
|
-
meta: o?.metadata,
|
|
60
|
-
name: o?.name,
|
|
61
|
-
training: i.metaData?.training || void 0,
|
|
62
|
-
reference: i.metaData?.url || void 0,
|
|
63
|
-
phase: i.metaData?.phase || void 0,
|
|
64
|
-
pretrainingData: i.metaData?.pretrainingData || void 0,
|
|
65
|
-
pretrainingSettings: i.metaData?.pretrainingSettings || void 0,
|
|
66
|
-
generationSettings: i.metaData?.generationSettings || void 0,
|
|
67
|
-
actionLog: i.metaData?.actionLog || void 0
|
|
68
|
-
};
|
|
69
|
-
if (e.file("meta.json", JSON.stringify(l, void 0, 4), {
|
|
70
|
-
binary: !1
|
|
71
|
-
}), e.file(
|
|
72
|
-
"tokeniser.json",
|
|
73
|
-
JSON.stringify({
|
|
74
|
-
type: n instanceof _ ? "char" : "bpe",
|
|
75
|
-
vocab: n.getVocab(),
|
|
76
|
-
merges: n.getMerges(),
|
|
77
|
-
datasetID: n.datasetID,
|
|
78
|
-
id: n.id
|
|
79
|
-
}),
|
|
80
|
-
{
|
|
81
|
-
binary: !1
|
|
82
|
-
}
|
|
83
|
-
), o?.files)
|
|
84
|
-
for (const [t, f] of Object.entries(o.files))
|
|
85
|
-
e.file(t, JSON.stringify(f), { binary: !1 });
|
|
86
|
-
return e.generateAsync({ type: "blob" });
|
|
87
|
-
}
|
|
88
|
-
export {
|
|
89
|
-
u as saveModel
|
|
90
|
-
};
|
package/dist/loader/types.d.ts
DELETED
|
@@ -1,67 +0,0 @@
|
|
|
1
|
-
import { GenerateOptions } from '../inference/types';
|
|
2
|
-
import { LoRAConfig } from '../models/config';
|
|
3
|
-
import { default as Model, ModelForwardAttributes, TrainingState } from '../models/model';
|
|
4
|
-
import { ITokeniser } from '../tokeniser/type';
|
|
5
|
-
import { AdamWOptimizer } from '../training/AdamW';
|
|
6
|
-
import { TrainingLogEntry, TrainingOptions } from '../training/types';
|
|
7
|
-
export interface TransformersConfigBase {
|
|
8
|
-
model_type: 'GenAI_NanoGPT_v1' | 'GenAI_NanoGPT_v2';
|
|
9
|
-
vocab_size: number;
|
|
10
|
-
hidden_size: number;
|
|
11
|
-
num_hidden_layers: number;
|
|
12
|
-
num_attention_heads: number;
|
|
13
|
-
block_size: number;
|
|
14
|
-
mlpFactor: number;
|
|
15
|
-
loraConfig?: Record<string, LoRAConfig>;
|
|
16
|
-
loraName?: string;
|
|
17
|
-
}
|
|
18
|
-
export interface TransformersConfigV1 extends TransformersConfigBase {
|
|
19
|
-
model_type: 'GenAI_NanoGPT_v1';
|
|
20
|
-
useRope: boolean;
|
|
21
|
-
}
|
|
22
|
-
export interface TransformersConfigV2 extends TransformersConfigBase {
|
|
23
|
-
model_type: 'GenAI_NanoGPT_v2';
|
|
24
|
-
windowSize?: string;
|
|
25
|
-
}
|
|
26
|
-
export type TransformersConfig = TransformersConfigV1 | TransformersConfigV2;
|
|
27
|
-
export interface TransformersTokeniser {
|
|
28
|
-
type: 'char' | 'bpe';
|
|
29
|
-
vocab: string[];
|
|
30
|
-
merges: [string, string][];
|
|
31
|
-
datasetID?: string;
|
|
32
|
-
id?: string;
|
|
33
|
-
}
|
|
34
|
-
export type ModelPhase = 'untrained' | 'pretrained' | 'finetuned';
|
|
35
|
-
export interface DatasetMetadata {
|
|
36
|
-
id: string;
|
|
37
|
-
name: string;
|
|
38
|
-
}
|
|
39
|
-
export interface ActionLogEntry {
|
|
40
|
-
action: 'pretrain' | 'generate' | 'finetune';
|
|
41
|
-
timestamp: number;
|
|
42
|
-
duration: number;
|
|
43
|
-
tokensProcessed: number;
|
|
44
|
-
options: TrainingOptions | GenerateOptions;
|
|
45
|
-
}
|
|
46
|
-
export interface TransformersMetadata {
|
|
47
|
-
name?: string;
|
|
48
|
-
version: number;
|
|
49
|
-
application: string;
|
|
50
|
-
training?: TrainingState;
|
|
51
|
-
reference?: string;
|
|
52
|
-
id?: string;
|
|
53
|
-
url?: string;
|
|
54
|
-
phase?: ModelPhase;
|
|
55
|
-
pretrainingData?: DatasetMetadata[];
|
|
56
|
-
pretrainingSettings?: TrainingOptions;
|
|
57
|
-
generationSettings?: GenerateOptions;
|
|
58
|
-
actionLog?: ActionLogEntry[];
|
|
59
|
-
[key: string]: unknown;
|
|
60
|
-
}
|
|
61
|
-
export interface LoadResult {
|
|
62
|
-
model: Model<ModelForwardAttributes>;
|
|
63
|
-
tokeniser: ITokeniser;
|
|
64
|
-
metaData: TransformersMetadata;
|
|
65
|
-
optimizer?: AdamWOptimizer;
|
|
66
|
-
log?: TrainingLogEntry[];
|
|
67
|
-
}
|
package/dist/loader/types.js
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
|
package/dist/main.d.ts
DELETED
|
@@ -1,50 +0,0 @@
|
|
|
1
|
-
import { default as PretrainingTask } from './training/tasks/PretrainingTask';
|
|
2
|
-
import { default as StartSentenceTask } from './training/tasks/StartSentenceTask';
|
|
3
|
-
import { default as ConversationTask } from './training/tasks/ConversationTask';
|
|
4
|
-
import { pack16 } from './ops/pack16';
|
|
5
|
-
import { unpack16 } from './ops/unpack16';
|
|
6
|
-
import { default as CausalSelfAttention } from './layers/CausalSelfAttention';
|
|
7
|
-
import { default as MLP } from './layers/MLP';
|
|
8
|
-
import { default as TransformerBlock } from './layers/TransformerBlock';
|
|
9
|
-
import { default as RoPECache } from './layers/RoPECache';
|
|
10
|
-
export { default as NanoGPT } from './models/NanoGPTV1';
|
|
11
|
-
export { default as TeachableLLM } from './TeachableLLM';
|
|
12
|
-
export { default as CharTokeniser } from './tokeniser/CharTokeniser';
|
|
13
|
-
export { default as BPETokeniser } from './tokeniser/bpe';
|
|
14
|
-
export { default as waitForModel } from './utilities/waitForModel';
|
|
15
|
-
export { default as generateDatasetID } from './utilities/datasetID';
|
|
16
|
-
export { default as loadTextData } from './data/textLoader';
|
|
17
|
-
export type { DatasetMetadata } from './loader/types';
|
|
18
|
-
export { default as Generator, type IGenerator } from './Generator';
|
|
19
|
-
export { default as Evaluator } from './training/Evaluator';
|
|
20
|
-
export { default as Trainer } from './Trainer';
|
|
21
|
-
export type { IGenerateOptions } from './Generator';
|
|
22
|
-
export { type ModelForwardAttributes, default as Model } from './models/model';
|
|
23
|
-
export type { ITokeniser, Conversation, Roles } from './tokeniser/type';
|
|
24
|
-
export type { TrainingOptions, TrainingLogEntry } from './training/types';
|
|
25
|
-
export type { GPTConfig } from './models/config';
|
|
26
|
-
export { estimateParameterCount, estimateMemoryUsage, estimateTrainingMemoryUsage, estimateResources, validateConfig, } from './utilities/parameters';
|
|
27
|
-
export { default as topP } from './utilities/topP';
|
|
28
|
-
export { Task, tokensFromTasks } from './training/tasks/Task';
|
|
29
|
-
export declare const tasks: {
|
|
30
|
-
PretrainingTask: typeof PretrainingTask;
|
|
31
|
-
StartSentenceTask: typeof StartSentenceTask;
|
|
32
|
-
ConversationTask: typeof ConversationTask;
|
|
33
|
-
};
|
|
34
|
-
declare const ops: {
|
|
35
|
-
pack16: typeof pack16;
|
|
36
|
-
unpack16: typeof unpack16;
|
|
37
|
-
};
|
|
38
|
-
export { ops };
|
|
39
|
-
export { selectBackend } from './backend';
|
|
40
|
-
export { default as performanceTest } from './utilities/performance';
|
|
41
|
-
export declare const layers: {
|
|
42
|
-
CausalSelfAttention: typeof CausalSelfAttention;
|
|
43
|
-
MLP: typeof MLP;
|
|
44
|
-
TransformerBlock: typeof TransformerBlock;
|
|
45
|
-
RoPECache: typeof RoPECache;
|
|
46
|
-
};
|
|
47
|
-
export { AdamWOptimizer } from './training/AdamW';
|
|
48
|
-
export { default as checks } from './checks';
|
|
49
|
-
export type { TensorStatistics } from './checks/weights';
|
|
50
|
-
export { sentenceEmbeddings, sentenceEmbeddingsTensor } from './utilities/sentences';
|
package/dist/main.js
DELETED
|
@@ -1,109 +0,0 @@
|
|
|
1
|
-
import "./index-CUXkjxiT.js";
|
|
2
|
-
import "./random_width-DN5ZtQkM.js";
|
|
3
|
-
import "./zeros_like-CWjDdwr-.js";
|
|
4
|
-
import { default as io } from "./Generator.js";
|
|
5
|
-
import "./index-Cp39cXWe.js";
|
|
6
|
-
import "./dataset-CGGp1z9P.js";
|
|
7
|
-
import { default as fo } from "./models/NanoGPTV1.js";
|
|
8
|
-
import { default as lo } from "./TeachableLLM.js";
|
|
9
|
-
import { default as uo } from "./tokeniser/CharTokeniser.js";
|
|
10
|
-
import { default as ko } from "./tokeniser/bpe.js";
|
|
11
|
-
import { default as go } from "./utilities/waitForModel.js";
|
|
12
|
-
import { default as Co } from "./utilities/datasetID.js";
|
|
13
|
-
import { default as Eo } from "./data/textLoader.js";
|
|
14
|
-
import { default as Bo } from "./training/Evaluator.js";
|
|
15
|
-
import { default as vo } from "./Trainer.js";
|
|
16
|
-
import { default as Do } from "./models/model.js";
|
|
17
|
-
import { estimateMemoryUsage as So, estimateParameterCount as Ao, estimateResources as Fo, estimateTrainingMemoryUsage as Go, validateConfig as Ro } from "./utilities/parameters.js";
|
|
18
|
-
import { default as wo } from "./utilities/topP.js";
|
|
19
|
-
import { Task as Io, tokensFromTasks as No } from "./training/tasks/Task.js";
|
|
20
|
-
import o from "./training/tasks/PretrainingTask.js";
|
|
21
|
-
import r from "./training/tasks/StartSentenceTask.js";
|
|
22
|
-
import t from "./training/tasks/ConversationTask.js";
|
|
23
|
-
import "./ops/cpu/scatterSub.js";
|
|
24
|
-
import "./ops/webgl/scatterSub.js";
|
|
25
|
-
import "./ops/cpu/gatherSub.js";
|
|
26
|
-
import "./ops/webgl/gatherSub.js";
|
|
27
|
-
import "./ops/cpu/attentionMask.js";
|
|
28
|
-
import "./ops/webgl/attentionMask.js";
|
|
29
|
-
import "./ops/grads/attentionMask.js";
|
|
30
|
-
import "./ops/cpu/qkv.js";
|
|
31
|
-
import "./ops/webgl/qkv.js";
|
|
32
|
-
import "./ops/grads/qkv.js";
|
|
33
|
-
import "./ops/cpu/rope.js";
|
|
34
|
-
import "./ops/webgl/rope.js";
|
|
35
|
-
import "./rope-DcrZM_e6.js";
|
|
36
|
-
import "./ops/cpu/appendCache.js";
|
|
37
|
-
import "./ops/webgl/appendCache.js";
|
|
38
|
-
import "./ops/cpu/matMulGelu.js";
|
|
39
|
-
import "./matMulGelu-JNLZqKQp.js";
|
|
40
|
-
import "./ops/grads/matMulGelu.js";
|
|
41
|
-
import "./ops/cpu/gelu.js";
|
|
42
|
-
import "./ops/webgl/gelu.js";
|
|
43
|
-
import "./gelu-B220X1Go.js";
|
|
44
|
-
import "./ops/cpu/normRMS.js";
|
|
45
|
-
import "./ops/webgl/normRMS.js";
|
|
46
|
-
import "./ops/grads/normRMS.js";
|
|
47
|
-
import "./ops/webgl/log.js";
|
|
48
|
-
import "./ops/cpu/adamMoments.js";
|
|
49
|
-
import "./ops/webgl/adamMoments.js";
|
|
50
|
-
import "./ops/cpu/adamAdjust.js";
|
|
51
|
-
import "./ops/webgl/adamAdjust.js";
|
|
52
|
-
import { u as e, p as m } from "./pack16-bqltoUlR.js";
|
|
53
|
-
import "./ops/grads/softmax16.js";
|
|
54
|
-
import "./matMul16-BcVC_E62.js";
|
|
55
|
-
import "./ops/webgl/matMul16.js";
|
|
56
|
-
import "./ops/cpu/matMul16.js";
|
|
57
|
-
import "./ops/transpose16.js";
|
|
58
|
-
import { selectBackend as Wo } from "./backend.js";
|
|
59
|
-
import { default as qo } from "./utilities/performance.js";
|
|
60
|
-
import a from "./layers/CausalSelfAttention.js";
|
|
61
|
-
import p from "./layers/MLP.js";
|
|
62
|
-
import i from "./layers/TransformerBlock.js";
|
|
63
|
-
import s from "./layers/RoPECache.js";
|
|
64
|
-
import { AdamWOptimizer as Jo } from "./training/AdamW.js";
|
|
65
|
-
import { default as Qo } from "./checks/index.js";
|
|
66
|
-
import { sentenceEmbeddings as Xo, sentenceEmbeddingsTensor as Yo } from "./utilities/sentences.js";
|
|
67
|
-
const to = {
|
|
68
|
-
PretrainingTask: o,
|
|
69
|
-
StartSentenceTask: r,
|
|
70
|
-
ConversationTask: t
|
|
71
|
-
}, eo = {
|
|
72
|
-
pack16: m,
|
|
73
|
-
unpack16: e
|
|
74
|
-
}, mo = {
|
|
75
|
-
CausalSelfAttention: a,
|
|
76
|
-
MLP: p,
|
|
77
|
-
TransformerBlock: i,
|
|
78
|
-
RoPECache: s
|
|
79
|
-
};
|
|
80
|
-
export {
|
|
81
|
-
Jo as AdamWOptimizer,
|
|
82
|
-
ko as BPETokeniser,
|
|
83
|
-
uo as CharTokeniser,
|
|
84
|
-
Bo as Evaluator,
|
|
85
|
-
io as Generator,
|
|
86
|
-
Do as Model,
|
|
87
|
-
fo as NanoGPT,
|
|
88
|
-
Io as Task,
|
|
89
|
-
lo as TeachableLLM,
|
|
90
|
-
vo as Trainer,
|
|
91
|
-
Qo as checks,
|
|
92
|
-
So as estimateMemoryUsage,
|
|
93
|
-
Ao as estimateParameterCount,
|
|
94
|
-
Fo as estimateResources,
|
|
95
|
-
Go as estimateTrainingMemoryUsage,
|
|
96
|
-
Co as generateDatasetID,
|
|
97
|
-
mo as layers,
|
|
98
|
-
Eo as loadTextData,
|
|
99
|
-
eo as ops,
|
|
100
|
-
qo as performanceTest,
|
|
101
|
-
Wo as selectBackend,
|
|
102
|
-
Xo as sentenceEmbeddings,
|
|
103
|
-
Yo as sentenceEmbeddingsTensor,
|
|
104
|
-
to as tasks,
|
|
105
|
-
No as tokensFromTasks,
|
|
106
|
-
wo as topP,
|
|
107
|
-
Ro as validateConfig,
|
|
108
|
-
go as waitForModel
|
|
109
|
-
};
|
|
@@ -1,80 +0,0 @@
|
|
|
1
|
-
import { e as y, h } from "./index-CUXkjxiT.js";
|
|
2
|
-
import "./ops/webgl/matMul16.js";
|
|
3
|
-
import "./ops/cpu/matMul16.js";
|
|
4
|
-
import { isPackedTensor as v } from "./utilities/packed.js";
|
|
5
|
-
import { p as g } from "./pack16-bqltoUlR.js";
|
|
6
|
-
import { d as k } from "./gelu-B220X1Go.js";
|
|
7
|
-
import { transpose16 as S } from "./ops/transpose16.js";
|
|
8
|
-
import { reshape16 as w } from "./ops/reshape16.js";
|
|
9
|
-
import { mul16 as D } from "./ops/mul16.js";
|
|
10
|
-
const G = {
|
|
11
|
-
kernelName: "MatMul16",
|
|
12
|
-
inputsToSave: ["A", "B"],
|
|
13
|
-
outputsToSave: [],
|
|
14
|
-
gradFunc: (r, o, l) => {
|
|
15
|
-
const [s, t] = o;
|
|
16
|
-
if (Array.isArray(r))
|
|
17
|
-
throw new Error("Expected dy to be a single Tensor");
|
|
18
|
-
let e = r;
|
|
19
|
-
const { transposeA: u, transposeB: i, scale: a, activation: p, originalShape: d, perm: m } = l;
|
|
20
|
-
if (m && d) {
|
|
21
|
-
const f = new Array(m.length);
|
|
22
|
-
for (let M = 0; M < m.length; ++M)
|
|
23
|
-
f[m[M]] = M;
|
|
24
|
-
const c = e;
|
|
25
|
-
e = S(e, f), c.dispose();
|
|
26
|
-
}
|
|
27
|
-
if (d) {
|
|
28
|
-
const f = e;
|
|
29
|
-
e = w(e, d), f.dispose();
|
|
30
|
-
}
|
|
31
|
-
if (p === "gelu") {
|
|
32
|
-
const f = e, c = n(s, t, u, i);
|
|
33
|
-
e = k(f, c), f.dispose(), c.dispose();
|
|
34
|
-
} else if (p === "relu2") {
|
|
35
|
-
const f = e, c = n(s, t, u, i, { activation: "relu", scale: 2 });
|
|
36
|
-
e = D(f, c), f.dispose(), c.dispose();
|
|
37
|
-
}
|
|
38
|
-
if (!u && !i)
|
|
39
|
-
return {
|
|
40
|
-
A: () => a !== void 0 ? B(e, t, a, !1, !0) : n(e, t, !1, !0),
|
|
41
|
-
B: () => a !== void 0 ? A(s, e, a, !0, !1) : n(s, e, !0, !1)
|
|
42
|
-
};
|
|
43
|
-
if (!u && i)
|
|
44
|
-
return {
|
|
45
|
-
A: () => a !== void 0 ? B(e, t, a, !1, !1) : n(e, t, !1, !1),
|
|
46
|
-
B: () => a !== void 0 ? A(s, e, a, !0, !1) : n(s, e, !0, !1)
|
|
47
|
-
};
|
|
48
|
-
if (u && !i)
|
|
49
|
-
return {
|
|
50
|
-
A: () => a !== void 0 ? A(t, e, a, !1, !0) : n(t, e, !1, !0),
|
|
51
|
-
B: () => a !== void 0 ? A(s, e, a, !1, !1) : n(s, e, !1, !1)
|
|
52
|
-
};
|
|
53
|
-
throw new Error("Gradient for transposeA=true and transposeB=true is not supported yet.");
|
|
54
|
-
}
|
|
55
|
-
};
|
|
56
|
-
y(G);
|
|
57
|
-
function n(r, o, l = !1, s = !1, t = {}) {
|
|
58
|
-
const e = v(r), u = v(o), i = e || u, a = !i || e ? r : g(r), p = !i || u ? o : g(o), d = h().runKernel("MatMul16", { A: a, B: p }, { transposeA: l, transposeB: s, ...t });
|
|
59
|
-
return i && !e && a.dispose(), i && !u && p.dispose(), d;
|
|
60
|
-
}
|
|
61
|
-
function j(r, o, l, s = !1, t = !1) {
|
|
62
|
-
return n(r, o, s, t, { scale: l });
|
|
63
|
-
}
|
|
64
|
-
function B(r, o, l, s = !1, t = !1) {
|
|
65
|
-
return n(r, o, s, t, { scaleA: l });
|
|
66
|
-
}
|
|
67
|
-
function A(r, o, l, s = !1, t = !1) {
|
|
68
|
-
return n(r, o, s, t, { scaleB: l });
|
|
69
|
-
}
|
|
70
|
-
function q(r, o, l = !1, s = !1) {
|
|
71
|
-
return n(r, o, l, s, { activation: "gelu" });
|
|
72
|
-
}
|
|
73
|
-
export {
|
|
74
|
-
G as a,
|
|
75
|
-
n as b,
|
|
76
|
-
q as c,
|
|
77
|
-
B as d,
|
|
78
|
-
A as e,
|
|
79
|
-
j as m
|
|
80
|
-
};
|
|
@@ -1,163 +0,0 @@
|
|
|
1
|
-
import { c as C, t as R, h as I, U as G, _ as L, x as U, am as F } from "./index-CUXkjxiT.js";
|
|
2
|
-
import { r as M } from "./Reshape-BW__R4mZ.js";
|
|
3
|
-
import { u as H } from "./gpgpu_math-BwvV12df.js";
|
|
4
|
-
import { m as B } from "./mat_mul-DhG0Newp.js";
|
|
5
|
-
class W {
|
|
6
|
-
constructor(e, s, a, n = !1, o = !1, r = !1, i = null, u = !1, l = !1) {
|
|
7
|
-
this.variableNames = ["matrixA", "matrixB"], this.packedInputs = !0, this.packedOutput = !0, this.outputShape = a, this.enableShapeUniforms = H(this.outputShape.length);
|
|
8
|
-
const p = n ? e[1] : e[2], h = Math.ceil(p / 2), d = n ? "i * 2, rc.y" : "rc.y, i * 2", b = o ? "rc.z, i * 2" : "i * 2, rc.z", x = n ? ["a.xxyy", "a.zzww"] : ["a.xxzz", "a.yyww"], m = o ? ["b.xzxz", "b.ywyw"] : ["b.xyxy", "b.zwzw"];
|
|
9
|
-
let c = "", g = "";
|
|
10
|
-
i && (u ? c = `vec4 activation(vec4 a) {
|
|
11
|
-
vec4 b = getPreluActivationWeightsAtOutCoords();
|
|
12
|
-
${i}
|
|
13
|
-
}` : l ? c = `vec4 activation(vec4 a) {
|
|
14
|
-
vec4 b = getLeakyreluAlphaAtOutCoords();
|
|
15
|
-
${i}
|
|
16
|
-
}` : c = `vec4 activation(vec4 x) {
|
|
17
|
-
${i}
|
|
18
|
-
}`, g = "result = activation(result);");
|
|
19
|
-
const $ = r ? "result += getBiasAtOutCoords();" : "";
|
|
20
|
-
r && this.variableNames.push("bias"), u && this.variableNames.push("preluActivationWeights"), l && this.variableNames.push("leakyreluAlpha");
|
|
21
|
-
let f = "rc.x", v = "rc.x";
|
|
22
|
-
e[0] < s[0] ? f = `imod(rc.x, ${e[0]})` : s[0] < e[0] && (v = `imod(rc.x, ${s[0]})`), this.userCode = `
|
|
23
|
-
${c}
|
|
24
|
-
// Don't use uniform for sharedDimensionPacked for performance.
|
|
25
|
-
const float sharedDimension = ${h}.0;
|
|
26
|
-
|
|
27
|
-
vec4 dot2x2ARowBCol(ivec3 rc) {
|
|
28
|
-
vec4 result = vec4(0);
|
|
29
|
-
int batchA = ${f};
|
|
30
|
-
int batchB = ${v};
|
|
31
|
-
for (int i = 0; i < ${h}; i++) {
|
|
32
|
-
vec4 a = getMatrixA(batchA, ${d});
|
|
33
|
-
vec4 b = getMatrixB(batchB, ${b});
|
|
34
|
-
|
|
35
|
-
// These swizzled products need to be separately added.
|
|
36
|
-
// See: https://github.com/tensorflow/tfjs/issues/1735
|
|
37
|
-
result += (${x[0]} * ${m[0]});
|
|
38
|
-
result += (${x[1]} * ${m[1]});
|
|
39
|
-
}
|
|
40
|
-
return result;
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
void main() {
|
|
44
|
-
ivec3 rc = getOutputCoords();
|
|
45
|
-
vec4 result = dot2x2ARowBCol(rc);
|
|
46
|
-
|
|
47
|
-
${$}
|
|
48
|
-
|
|
49
|
-
${g}
|
|
50
|
-
|
|
51
|
-
setOutput(result);
|
|
52
|
-
}
|
|
53
|
-
`;
|
|
54
|
-
}
|
|
55
|
-
}
|
|
56
|
-
const S = 0.7978845608028654, w = 0.044715, j = `
|
|
57
|
-
vec4 x3 = x * x * x;
|
|
58
|
-
vec4 inner = x + ${w} * x3;
|
|
59
|
-
inner = ${S} * inner;
|
|
60
|
-
inner = vec4(
|
|
61
|
-
abs(inner[0]) > 15.0 ? sign(inner[0]) : tanh(inner[0]),
|
|
62
|
-
abs(inner[1]) > 15.0 ? sign(inner[1]) : tanh(inner[1]),
|
|
63
|
-
abs(inner[2]) > 15.0 ? sign(inner[2]) : tanh(inner[2]),
|
|
64
|
-
abs(inner[3]) > 15.0 ? sign(inner[3]) : tanh(inner[3])
|
|
65
|
-
);
|
|
66
|
-
inner = 0.5 * (1.0 + inner);
|
|
67
|
-
vec4 result = x * inner;
|
|
68
|
-
return result;
|
|
69
|
-
`, q = `
|
|
70
|
-
vec4 a2 = a * a;
|
|
71
|
-
vec4 a3 = a2 * a;
|
|
72
|
-
vec4 u = ${S} * (a + ${w} * a3);
|
|
73
|
-
vec4 t = vec4(
|
|
74
|
-
abs(u[0]) > 15.0 ? sign(u[0]) : tanh(u[0]),
|
|
75
|
-
abs(u[1]) > 15.0 ? sign(u[1]) : tanh(u[1]),
|
|
76
|
-
abs(u[2]) > 15.0 ? sign(u[2]) : tanh(u[2]),
|
|
77
|
-
abs(u[3]) > 15.0 ? sign(u[3]) : tanh(u[3])
|
|
78
|
-
);
|
|
79
|
-
vec4 sech2 = 1.0 - t * t;
|
|
80
|
-
vec4 du_dx = ${S} * (1.0 + 3.0 * ${w} * a2);
|
|
81
|
-
vec4 dgelu = 0.5 * (1.0 + t) + 0.5 * a * sech2 * du_dx;
|
|
82
|
-
return dgelu * b;
|
|
83
|
-
`, se = 1e3;
|
|
84
|
-
function O({
|
|
85
|
-
a: t,
|
|
86
|
-
b: e,
|
|
87
|
-
transposeA: s,
|
|
88
|
-
transposeB: a,
|
|
89
|
-
backend: n,
|
|
90
|
-
activationSnippet: o,
|
|
91
|
-
multiplier: r
|
|
92
|
-
}) {
|
|
93
|
-
const i = t.shape.length, u = e.shape.length, l = s ? t.shape[i - 2] : t.shape[i - 1], p = a ? e.shape[u - 1] : e.shape[u - 2], h = s ? t.shape[i - 1] : t.shape[i - 2], d = a ? e.shape[u - 2] : e.shape[u - 1], b = t.shape.slice(0, -2), x = e.shape.slice(0, -2), m = G(b), c = G(x), $ = L(t.shape.slice(0, -2), e.shape.slice(0, -2)).concat([h, d]);
|
|
94
|
-
U(
|
|
95
|
-
l === p,
|
|
96
|
-
() => `Error in matMul: inner shapes (${l}) and (${p}) of Tensors with shapes ${t.shape} and ${e.shape} and transposeA=${s} and transposeB=${a} must match.`
|
|
97
|
-
);
|
|
98
|
-
const f = s ? [m, l, h] : [m, h, l], v = a ? [c, d, p] : [c, p, d], A = M({ inputs: { x: t }, backend: n, attrs: { shape: f } }), y = M({ inputs: { x: e }, backend: n, attrs: { shape: v } }), D = [A, y], _ = Math.max(m, c), E = o, N = F(t.dtype, e.dtype), T = new W(
|
|
99
|
-
f,
|
|
100
|
-
v,
|
|
101
|
-
[_, h, d],
|
|
102
|
-
s,
|
|
103
|
-
a,
|
|
104
|
-
!1,
|
|
105
|
-
E,
|
|
106
|
-
!!r,
|
|
107
|
-
!1
|
|
108
|
-
), k = [A, y];
|
|
109
|
-
r && k.push(r);
|
|
110
|
-
const z = n.runWebGLProgram(T, k, N), K = M({ inputs: { x: z }, backend: n, attrs: { shape: $ } });
|
|
111
|
-
D.push(z);
|
|
112
|
-
for (const P of D)
|
|
113
|
-
n.disposeIntermediateTensorInfo(P);
|
|
114
|
-
return K;
|
|
115
|
-
}
|
|
116
|
-
function J(t) {
|
|
117
|
-
const { inputs: e, backend: s } = t, { x: a, kernel: n } = e;
|
|
118
|
-
if (a === void 0 || n === void 0)
|
|
119
|
-
throw new Error("BatchMatMul requires two input tensors.");
|
|
120
|
-
return O({
|
|
121
|
-
a,
|
|
122
|
-
b: n,
|
|
123
|
-
transposeA: !1,
|
|
124
|
-
transposeB: !1,
|
|
125
|
-
backend: s,
|
|
126
|
-
activationSnippet: j
|
|
127
|
-
});
|
|
128
|
-
}
|
|
129
|
-
const Q = {
|
|
130
|
-
kernelName: "MatMulGelu",
|
|
131
|
-
backendName: "webgl",
|
|
132
|
-
kernelFunc: J
|
|
133
|
-
};
|
|
134
|
-
C(Q);
|
|
135
|
-
function V(t) {
|
|
136
|
-
const { dy: e, x: s, kernel: a } = t.inputs, n = t.backend;
|
|
137
|
-
return R(() => {
|
|
138
|
-
const o = I().makeTensorFromTensorInfo(
|
|
139
|
-
O({
|
|
140
|
-
a: s,
|
|
141
|
-
b: a,
|
|
142
|
-
transposeA: !1,
|
|
143
|
-
transposeB: !1,
|
|
144
|
-
backend: n,
|
|
145
|
-
activationSnippet: q,
|
|
146
|
-
multiplier: e
|
|
147
|
-
})
|
|
148
|
-
), r = B(o, a, !1, !0), i = B(s, o, !0, !1);
|
|
149
|
-
return [r, i];
|
|
150
|
-
});
|
|
151
|
-
}
|
|
152
|
-
const X = {
|
|
153
|
-
kernelName: "MatMulGeluGrad",
|
|
154
|
-
backendName: "webgl",
|
|
155
|
-
kernelFunc: V
|
|
156
|
-
};
|
|
157
|
-
C(X);
|
|
158
|
-
export {
|
|
159
|
-
W as M,
|
|
160
|
-
se as a,
|
|
161
|
-
O as b,
|
|
162
|
-
J as c
|
|
163
|
-
};
|
package/dist/mat_mul-DhG0Newp.js
DELETED
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
import { o as m, q as s, B as c, E as M, D as p } from "./index-CUXkjxiT.js";
|
|
2
|
-
function f(e, o, n = !1, l = !1) {
|
|
3
|
-
let a = s(e, "a", "matMul"), t = s(o, "b", "matMul");
|
|
4
|
-
[a, t] = c(a, t);
|
|
5
|
-
const r = { a, b: t }, u = { transposeA: n, transposeB: l };
|
|
6
|
-
return M.runKernel(p, r, u);
|
|
7
|
-
}
|
|
8
|
-
const i = /* @__PURE__ */ m({ matMul_: f });
|
|
9
|
-
export {
|
|
10
|
-
i as m
|
|
11
|
-
};
|