npm - @genai-fi/nanogpt - Versions diffs - 0.9.0 → 0.10.0 - Mend

@genai-fi/nanogpt 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (343) hide show

package/README.md +352 -14
package/dist/Generator.js +69 -78
package/dist/{RealDiv-D4EzDsC0.js → RealDiv-DgA3z9oO.js} +32 -206
package/dist/Reshape-CF6odzV4.js +16 -0
package/dist/Reshape-_kILl6tK.js +81 -0
package/dist/TeachableLLM.js +28 -22
package/dist/Trainer.d.ts +2 -0
package/dist/Trainer.js +3 -2
package/dist/{axis_util-TbGYJ208.js → axis_util-BvHEw88j.js} +7 -23
package/dist/backend.d.ts +2 -1
package/dist/backend.js +10 -4
package/dist/backend_util-D-rUb2ty.js +474 -0
package/dist/backend_webgpu-B0u2ndUn.js +547 -0
package/dist/binary_op_util-pKXltfxI.js +192 -0
package/dist/broadcast_to-CwF7XIeu.js +30 -0
package/dist/checks/appendCache.js +2 -2
package/dist/checks/attentionMask.js +3 -3
package/dist/checks/check.d.ts +1 -1
package/dist/checks/check.js +8 -8
package/dist/checks/gelu.js +2 -2
package/dist/checks/index.d.ts +2 -0
package/dist/checks/index.js +7 -5
package/dist/checks/matMulGelu.js +6 -6
package/dist/checks/normRMS.js +7 -7
package/dist/checks/normRMSGrad.js +3 -3
package/dist/checks/packUnpack.d.ts +1 -0
package/dist/checks/packUnpack.js +18 -0
package/dist/checks/qkv.js +12 -27
package/dist/checks/rope.js +2 -2
package/dist/checks/weights.js +18 -16
package/dist/complex-CSlYz-2T.js +13 -0
package/dist/complex_util-Yc1A_gV1.js +55 -0
package/dist/concat-BHlIJeyT.js +19 -0
package/dist/concat_util-DcJk7YHS.js +22 -0
package/dist/data/docx.js +1 -1
package/dist/data/parquet.js +2 -2
package/dist/data/pdf.js +1 -1
package/dist/data/textLoader.js +1 -1
package/dist/{dataset-DlZtKmBq.js → dataset-0xP8GjwI.js} +136 -236
package/dist/dropout-C1pM3f11.js +99 -0
package/dist/expand_dims-BPG4fwBP.js +13 -0
package/dist/exports_initializers-xuidcwI4.js +7 -0
package/dist/gather-DykLGqmW.js +10 -0
package/dist/{gelu-Bp_-935b.js → gelu-CNLFZWea.js} +11 -10
package/dist/{gpgpu_math-CDaYiyE_.js → gpgpu_math-DDVJCn6-.js} +90 -265
package/dist/{index-C4L8Cm77.js → index-CieiGp4Y.js} +14 -14
package/dist/index-CjOj7j-u.js +7308 -0
package/dist/{index-Tf7vU29b.js → index-Cp39cXWe.js} +3 -10
package/dist/{index-Dwqa6Zy2.js → index-DvYrXKkX.js} +2 -2
package/dist/index-ZyQhjEPo.js +2157 -0
package/dist/{jszip.min-CjP2V1VV.js → jszip.min-Bz5-11Bk.js} +56 -57
package/dist/kernel_funcs_utils-Dg_-E44D.js +308 -0
package/dist/layers/BaseLayer.d.ts +1 -0
package/dist/layers/BaseLayer.js +7 -6
package/dist/layers/CausalSelfAttention.d.ts +0 -1
package/dist/layers/CausalSelfAttention.js +56 -55
package/dist/layers/MLP.js +15 -16
package/dist/layers/PositionEmbedding.js +5 -14
package/dist/layers/RMSNorm.js +3 -3
package/dist/layers/RoPECache.d.ts +2 -0
package/dist/layers/RoPECache.js +22 -17
package/dist/layers/TiedEmbedding.js +22 -17
package/dist/layers/TransformerBlock.js +21 -20
package/dist/loader/load.js +1 -1
package/dist/loader/loadTransformers.js +1 -1
package/dist/loader/oldZipLoad.js +39 -33
package/dist/loader/save.js +1 -1
package/dist/log_sum_exp-DWI-76TI.js +41 -0
package/dist/main.d.ts +8 -0
package/dist/main.js +63 -52
package/dist/matMul16--R5hOwDG.js +77 -0
package/dist/mat_mul-DeAh4uTH.js +12 -0
package/dist/mod-Gt1rMB4n.js +12 -0
package/dist/models/NanoGPTV1.js +40 -31
package/dist/models/model.d.ts +2 -0
package/dist/models/model.js +37 -29
package/dist/{mulmat_packed_gpu-BT60jmzP.js → mulmat_packed_gpu-BMFhLwta.js} +1 -17
package/dist/{non_max_suppression_impl-CsEgBuMA.js → non_max_suppression_impl-B2W7YjZB.js} +0 -32
package/dist/ones-CAMiP4I2.js +15 -0
package/dist/ops/adamAdjust.js +1 -1
package/dist/ops/adamMoments.d.ts +1 -1
package/dist/ops/adamMoments.js +4 -4
package/dist/ops/add16.d.ts +2 -0
package/dist/ops/add16.js +9 -0
package/dist/ops/appendCache.js +16 -9
package/dist/ops/attentionMask.js +4 -4
package/dist/ops/concat16.d.ts +2 -0
package/dist/ops/concat16.js +9 -0
package/dist/ops/cpu/adamAdjust.js +14 -13
package/dist/ops/cpu/adamMoments.js +10 -9
package/dist/ops/cpu/appendCache.js +9 -8
package/dist/ops/cpu/attentionMask.js +15 -14
package/dist/ops/cpu/fusedSoftmax.js +13 -12
package/dist/ops/cpu/gatherSub.js +9 -24
package/dist/ops/cpu/gelu.js +13 -12
package/dist/ops/cpu/matMul16.d.ts +1 -0
package/dist/ops/cpu/matMul16.js +16 -0
package/dist/ops/cpu/matMulGelu.js +18 -16
package/dist/ops/cpu/matMulMul.js +8 -7
package/dist/ops/cpu/mulDropout.js +4 -3
package/dist/ops/cpu/normRMS.js +11 -10
package/dist/ops/cpu/qkv.js +17 -13
package/dist/ops/cpu/rope.js +23 -22
package/dist/ops/cpu/scatterSub.js +16 -30
package/dist/ops/dot16.d.ts +2 -0
package/dist/ops/dot16.js +42 -0
package/dist/ops/gatherSub.js +1 -1
package/dist/ops/gelu.js +2 -2
package/dist/ops/grads/add16.d.ts +1 -0
package/dist/ops/grads/add16.js +27 -0
package/dist/ops/grads/attentionMask.js +12 -19
package/dist/ops/grads/gelu.js +4 -3
package/dist/ops/grads/matMul16.d.ts +2 -0
package/dist/ops/grads/matMul16.js +9 -0
package/dist/ops/grads/matMulGelu.js +8 -7
package/dist/ops/grads/normRMS.js +8 -7
package/dist/ops/grads/{fusedSoftmax.d.ts → pack16.d.ts} +1 -1
package/dist/ops/grads/pack16.js +7 -0
package/dist/ops/grads/qkv.d.ts +3 -1
package/dist/ops/grads/qkv.js +28 -22
package/dist/ops/grads/rope.d.ts +2 -1
package/dist/ops/grads/rope.js +6 -13
package/dist/ops/grads/softmax16.d.ts +2 -0
package/dist/ops/grads/softmax16.js +26 -0
package/dist/ops/grads/unpack16.d.ts +2 -0
package/dist/ops/grads/unpack16.js +6 -0
package/dist/ops/grads/utils.d.ts +3 -0
package/dist/ops/grads/utils.js +10 -0
package/dist/ops/matMul16.d.ts +15 -0
package/dist/ops/matMul16.js +13 -0
package/dist/ops/matMulGelu.js +1 -1
package/dist/ops/matMulMul.js +1 -1
package/dist/ops/mul16.d.ts +2 -0
package/dist/ops/mul16.js +8 -0
package/dist/ops/mulDrop.js +1 -1
package/dist/ops/normRMS.js +1 -1
package/dist/ops/pack16.d.ts +2 -0
package/dist/ops/pack16.js +6 -0
package/dist/ops/qkv.d.ts +1 -1
package/dist/ops/qkv.js +8 -4
package/dist/ops/reshape16.d.ts +2 -0
package/dist/ops/reshape16.js +43 -0
package/dist/ops/rope.d.ts +1 -1
package/dist/ops/rope.js +8 -10
package/dist/ops/scatterSub.js +1 -1
package/dist/ops/slice16.d.ts +2 -0
package/dist/ops/slice16.js +9 -0
package/dist/ops/softmax16.d.ts +2 -0
package/dist/ops/softmax16.js +12 -0
package/dist/ops/sub16.d.ts +2 -0
package/dist/ops/sub16.js +8 -0
package/dist/ops/sum16.d.ts +2 -0
package/dist/ops/sum16.js +13 -0
package/dist/ops/transpose16.d.ts +3 -0
package/dist/ops/transpose16.js +41 -0
package/dist/ops/unpack16.d.ts +2 -0
package/dist/ops/unpack16.js +6 -0
package/dist/ops/webgl/adamAdjust.js +3 -2
package/dist/ops/webgl/adamMoments.js +2 -1
package/dist/ops/webgl/appendCache.js +2 -1
package/dist/ops/webgl/attentionMask.js +5 -4
package/dist/ops/webgl/fusedSoftmax.js +6 -4
package/dist/ops/webgl/gatherSub.js +7 -6
package/dist/ops/webgl/gelu.js +3 -2
package/dist/ops/webgl/log.js +12 -27
package/dist/ops/webgl/matMul16.d.ts +1 -0
package/dist/ops/webgl/matMul16.js +37 -0
package/dist/ops/webgl/matMulGelu.js +17 -15
package/dist/ops/webgl/matMulMul.js +13 -12
package/dist/ops/webgl/mulDropout.js +9 -8
package/dist/ops/webgl/normRMS.js +8 -7
package/dist/ops/webgl/qkv.js +6 -5
package/dist/ops/webgl/rope.js +11 -10
package/dist/ops/webgl/scatterSub.js +6 -5
package/dist/ops/webgpu/adamAdjust.js +12 -10
package/dist/ops/webgpu/adamMoments.js +27 -22
package/dist/ops/webgpu/add16.d.ts +1 -0
package/dist/ops/webgpu/add16.js +14 -0
package/dist/ops/webgpu/appendCache.js +64 -17
package/dist/ops/webgpu/attentionMask.js +19 -62
package/dist/ops/webgpu/attentionMask32_program.d.ts +19 -0
package/dist/ops/webgpu/attentionMask32_program.js +54 -0
package/dist/ops/webgpu/concat16.d.ts +19 -0
package/dist/ops/webgpu/concat16.js +128 -0
package/dist/ops/webgpu/gatherSub.js +9 -7
package/dist/ops/webgpu/gelu.js +78 -31
package/dist/ops/webgpu/index.js +12 -0
package/dist/ops/webgpu/matMul16.d.ts +1 -0
package/dist/ops/webgpu/matMul16.js +58 -0
package/dist/ops/webgpu/matMul16_program.d.ts +42 -0
package/dist/ops/webgpu/matMul16_program.js +336 -0
package/dist/ops/webgpu/mul16.d.ts +1 -0
package/dist/ops/webgpu/mul16.js +14 -0
package/dist/ops/webgpu/normRMS.js +21 -40
package/dist/ops/webgpu/normRMS16_program.d.ts +9 -0
package/dist/ops/webgpu/normRMS16_program.js +24 -0
package/dist/ops/webgpu/normRMS32_program.d.ts +9 -0
package/dist/ops/webgpu/normRMS32_program.js +24 -0
package/dist/ops/webgpu/normRMSGrad.js +113 -64
package/dist/ops/webgpu/pack16.d.ts +1 -0
package/dist/ops/webgpu/pack16.js +19 -0
package/dist/ops/webgpu/pack16_program.d.ts +19 -0
package/dist/ops/webgpu/pack16_program.js +92 -0
package/dist/ops/webgpu/qkv.js +20 -55
package/dist/ops/webgpu/rope.js +77 -22
package/dist/ops/webgpu/scatterSub.js +9 -7
package/dist/ops/webgpu/slice16.d.ts +7 -0
package/dist/ops/webgpu/slice16.js +71 -0
package/dist/{variable-Bm2OFwGI.js → ops/webgpu/softmax16.d.ts} +2 -8
package/dist/ops/webgpu/softmax16.js +23 -0
package/dist/ops/webgpu/softmax16_program.d.ts +13 -0
package/dist/ops/webgpu/softmax16_program.js +73 -0
package/dist/ops/webgpu/softmax16_subgroup_program.d.ts +17 -0
package/dist/ops/webgpu/softmax16_subgroup_program.js +75 -0
package/dist/ops/webgpu/softmax16grad.d.ts +1 -0
package/dist/ops/webgpu/softmax16grad.js +38 -0
package/dist/ops/webgpu/sub16.d.ts +1 -0
package/dist/ops/webgpu/sub16.js +14 -0
package/dist/ops/webgpu/sum16.d.ts +1 -0
package/dist/ops/webgpu/sum16.js +40 -0
package/dist/ops/webgpu/transpose16.d.ts +1 -0
package/dist/ops/webgpu/transpose16.js +35 -0
package/dist/ops/webgpu/transpose16_program.d.ts +16 -0
package/dist/ops/webgpu/transpose16_program.js +50 -0
package/dist/ops/webgpu/transpose16_shared_program.d.ts +15 -0
package/dist/ops/webgpu/transpose16_shared_program.js +71 -0
package/dist/ops/webgpu/unpack16.d.ts +1 -0
package/dist/ops/webgpu/unpack16.js +49 -0
package/dist/ops/webgpu/utils/binary_op.d.ts +19 -0
package/dist/ops/webgpu/utils/binary_op.js +79 -0
package/dist/ops/webgpu/utils/deviceInfo.d.ts +7 -0
package/dist/ops/webgpu/utils/deviceInfo.js +11 -0
package/dist/ops/webgpu/utils/reductions.d.ts +32 -4
package/dist/ops/webgpu/utils/reductions.js +236 -45
package/dist/ops-CNI3TwqM.js +645 -0
package/dist/pack16-CFUqumar.js +41 -0
package/dist/{papaparse.min-C8l2Kvo1.js → papaparse.min-C0cScC2i.js} +2 -8
package/dist/{parquet-C0Tlmv9c.js → parquet-BE8MU_ge.js} +201 -278
package/dist/patches/PackedTensor.d.ts +12 -0
package/dist/patches/PackedTensor.js +11 -0
package/dist/patches/engine.d.ts +261 -0
package/dist/patches/engine.js +10 -0
package/dist/patches/tape.d.ts +12 -0
package/dist/patches/tape.js +5 -0
package/dist/patches/webgpu_backend.d.ts +18 -0
package/dist/patches/webgpu_backend.js +57 -0
package/dist/{tensor-CZr4dh61.js → patches/webgpu_base.d.ts} +5 -8
package/dist/patches/webgpu_base.js +34 -0
package/dist/patches/webgpu_program.d.ts +36 -0
package/dist/patches/webgpu_program.js +401 -0
package/dist/{pdf-kJD-f258.js → pdf-NIhmP3sq.js} +424 -428
package/dist/random_width-DY6Kk2Dl.js +10051 -0
package/dist/range-BMS52eQi.js +11 -0
package/dist/reciprocal-CTmshQ9J.js +10 -0
package/dist/{register_all_kernels-DIGpEwcf.js → register_all_kernels-Bwu1PTuU.js} +719 -9766
package/dist/relu-yZ2-7WxU.js +10 -0
package/dist/reshape-DevtBWtf.js +10 -0
package/dist/rope-B5UUMsPi.js +32 -0
package/dist/{scatter_nd_util-BQdz--Gn.js → scatter_nd_util-5EL-8VAQ.js} +1 -1
package/dist/selu_util-D1w6yyTO.js +303 -0
package/dist/{shared-DuP7ue-R.js → shared-BRksrJb3.js} +1 -17
package/dist/shared-BuAXb4CI.js +2145 -0
package/dist/sin-BGfy2HZo.js +16 -0
package/dist/slice-D_gkkqZK.js +13 -0
package/dist/slice_util-DtEldBfK.js +261 -0
package/dist/softmax-ZHVebtR1.js +13 -0
package/dist/split-DrfihRpZ.js +10 -0
package/dist/squeeze-DZEpeblb.js +11 -0
package/dist/stack-yOIAalTq.js +13 -0
package/dist/sum-_fzj5ZTB.js +12 -0
package/dist/tensor-DdQUJZlz.js +909 -0
package/dist/tensor-f35l8Odg.js +8 -0
package/dist/tensor1d-CeZuc-Rv.js +12 -0
package/dist/tensor2d-G4Ys2GxX.js +15 -0
package/dist/tensor4d-B8roDgtc.js +15 -0
package/dist/tensor_util-DV-FP5Q3.js +523 -0
package/dist/tfjs_backend-kNyO5L2d.js +653 -0
package/dist/tile-BzyEiF-F.js +13 -0
package/dist/tokeniser/CharTokeniser.js +1 -1
package/dist/tokeniser/bpe.js +1 -1
package/dist/training/Adam.d.ts +2 -1
package/dist/training/Adam.js +12 -28
package/dist/training/AdamExt.d.ts +1 -0
package/dist/training/AdamExt.js +2 -2
package/dist/training/DatasetBuilder.js +3 -20
package/dist/training/FullTrainer.js +82 -64
package/dist/training/Trainer.d.ts +11 -6
package/dist/training/Trainer.js +51 -39
package/dist/training/sparseCrossEntropy.js +3 -3
package/dist/transpose-DKELTqhe.js +38 -0
package/dist/utilities/arrayClose.js +7 -7
package/dist/utilities/dummy.js +35 -27
package/dist/utilities/multinomialCPU.js +2 -2
package/dist/utilities/packed.d.ts +7 -0
package/dist/utilities/packed.js +716 -0
package/dist/utilities/performance.js +1 -1
package/dist/utilities/profile.js +1 -1
package/dist/utilities/safetensors.js +2 -2
package/dist/utilities/sentences.d.ts +5 -0
package/dist/utilities/sentences.js +41 -0
package/dist/utilities/weights.js +2 -2
package/dist/variable-Bhn5bHYv.js +7 -0
package/dist/{webgpu_program-DkQJOJSd.js → webgpu_program-Cigz-7RF.js} +15 -44
package/dist/webgpu_util-BBCnKm2X.js +65 -0
package/dist/zeros-2gldETuK.js +14 -0
package/package.json +4 -3
package/dist/Reshape-Bowtk9BP.js +0 -127
package/dist/Reshape-DUqYftGC.js +0 -30
package/dist/backend_util-CJIiDoV1.js +0 -749
package/dist/broadcast_to-DzlNweb8.js +0 -44
package/dist/concat-B912vBbo.js +0 -33
package/dist/dropout-C-csYCLj.js +0 -193
package/dist/exports_initializers-B8iZMgQ0.js +0 -16
package/dist/gather-Dnpgw-YQ.js +0 -25
package/dist/index-BzFyqcy-.js +0 -4457
package/dist/index-C1rx_Ajs.js +0 -12076
package/dist/kernel_funcs_utils-DKLK0Mg3.js +0 -466
package/dist/log_sum_exp-DO6z8tSE.js +0 -103
package/dist/mat_mul-DzjTFx-u.js +0 -27
package/dist/mod-Dobti4j4.js +0 -27
package/dist/ones-tIJeHlq-.js +0 -29
package/dist/ops/fusedSoftmax.d.ts +0 -2
package/dist/ops/fusedSoftmax.js +0 -10
package/dist/ops/grads/fusedSoftmax.js +0 -22
package/dist/ops-LuCMAnmM.js +0 -1525
package/dist/random_width-CXVRloNK.js +0 -13670
package/dist/range-CWcz7xFA.js +0 -26
package/dist/reciprocal-C4rNcM-S.js +0 -25
package/dist/relu-BjCh_SYb.js +0 -25
package/dist/reshape-CnIwVG1c.js +0 -25
package/dist/selu_util-OtRzVwW5.js +0 -719
package/dist/shared-DmRsFyaJ.js +0 -3134
package/dist/sin-gpDNRxE0.js +0 -47
package/dist/slice-d0Vo9XTN.js +0 -28
package/dist/softmax-D7Jj3p_P.js +0 -28
package/dist/split-DK2k5eHf.js +0 -25
package/dist/stack-DFatutCx.js +0 -27
package/dist/sum-CJ0ULhmt.js +0 -27
package/dist/tensor1d-vML0r3q6.js +0 -27
package/dist/tensor2d-D76QGjF3.js +0 -30
package/dist/tensor4d-Df1WlVDY.js +0 -30
package/dist/webgpu_util-pLEV9tks.js +0 -80
package/dist/zeros-Bj5rMYA7.js +0 -52

package/dist/tfjs_backend-kNyO5L2d.js ADDED Viewed

@@ -0,0 +1,653 @@
+import { A as g, B as $, i as _e, E as M, n as x, j as ie, w as ue, R as Te, t as A, G as ge, m as ke, l as Ee, S as Ie } from "./index-ZyQhjEPo.js";
+import { a as y, s as ae, x as le } from "./tensor-DdQUJZlz.js";
+import { t as Le } from "./tensor1d-CeZuc-Rv.js";
+import { r as Ne, d as be } from "./dropout-C1pM3f11.js";
+import { s as C } from "./slice-D_gkkqZK.js";
+import { r as c } from "./reshape-DevtBWtf.js";
+import { g as Ce } from "./gather-DykLGqmW.js";
+import { s as Fe, b as Pe, m as ve, l as je, o as Be } from "./selu_util-D1w6yyTO.js";
+import { a1 as Me, m as fe, a2 as he } from "./tensor_util-DV-FP5Q3.js";
+import { t as Ue } from "./tile-BzyEiF-F.js";
+import { m as w } from "./mat_mul-DeAh4uTH.js";
+import { t as xe } from "./transpose-DKELTqhe.js";
+import { c as j } from "./concat-BHlIJeyT.js";
+function Ge(e, n, t) {
+  const s = $(e, "x", "clipByValue");
+  if (y(n <= t, () => `Error in clip: min (${n}) must be less than or equal to max (${t}).`), n === t)
+    return _e(s.shape, n, s.dtype);
+  const r = { x: s }, o = { clipValueMin: n, clipValueMax: t };
+  return M.runKernel(Me, r, o);
+}
+const Ve = /* @__PURE__ */ g({ clipByValue_: Ge });
+function qe(e) {
+  return j(
+    e,
+    0
+    /* axis */
+  );
+}
+const Je = /* @__PURE__ */ g({ concat1d_: qe });
+function Ke(e, n) {
+  return j(e, n);
+}
+const Re = /* @__PURE__ */ g({ concat2d_: Ke });
+function Ze(e, n) {
+  return j(e, n);
+}
+const We = /* @__PURE__ */ g({ concat3d_: Ze });
+function Ye(e, n) {
+  return j(e, n);
+}
+const He = /* @__PURE__ */ g({ concat4d_: Ye });
+function Qe(e, n, t) {
+  const s = $(e, "x", "slice1d");
+  return y(s.rank === 1, () => `slice1d expects a rank-1 tensor, but got a rank-${s.rank} tensor`), C(s, [n], [t]);
+}
+const Q = /* @__PURE__ */ g({ slice1d_: Qe });
+function Xe(e, n, t) {
+  const s = $(e, "x", "slice2d");
+  return y(s.rank === 2, () => `slice2d expects a rank-2 tensor, but got a rank-${s.rank} tensor`), C(s, n, t);
+}
+const we = /* @__PURE__ */ g({ slice2d_: Xe });
+function ze(e, n, t) {
+  const s = $(e, "x", "slice3d");
+  return y(s.rank === 3, () => `slice3d expects a rank-3 tensor, but got a rank-${s.rank} tensor`), C(s, n, t);
+}
+const X = /* @__PURE__ */ g({ slice3d_: ze });
+function en(e, n, t) {
+  const s = $(e, "x", "slice4d");
+  return y(s.rank === 4, () => `slice4d expects a rank-4 tensor, but got a rank-${s.rank} tensor`), C(s, n, t);
+}
+const U = /* @__PURE__ */ g({ slice4d_: en });
+function nn({ a: e, b: n, transposeA: t = !1, transposeB: s = !1, bias: r, activation: o = "linear", preluActivationWeights: a, leakyreluAlpha: f = 0.2 }) {
+  if (Fe(M.state.gradientDepth, o) === !1) {
+    let D = w(e, n, t, s);
+    return r != null && (D = x(D, r)), Pe(D, o, a, f);
+  }
+  let i = $(e, "a", "fused matMul"), u = $(n, "b", "fused matMul");
+  [i, u] = fe(i, u);
+  const m = t ? i.shape[i.rank - 2] : i.shape[i.rank - 1], d = s ? u.shape[u.rank - 1] : u.shape[u.rank - 2], T = t ? i.shape[i.rank - 1] : i.shape[i.rank - 2], h = s ? u.shape[u.rank - 2] : u.shape[u.rank - 1], ee = i.shape.slice(0, -2), E = u.shape.slice(0, -2), ne = ae(ee), te = ae(E);
+  y(m === d, () => `Error in fused matMul: inner shapes (${m}) and (${d}) of Tensors with shapes ${i.shape} and ${u.shape} and transposeA=${t} and transposeB=${s} must match.`);
+  const V = ie(i.shape.slice(0, -2), u.shape.slice(0, -2)).concat([T, h]), q = t ? c(i, [ne, m, T]) : c(i, [ne, T, m]), J = s ? c(u, [te, h, d]) : c(u, [te, d, h]);
+  let I;
+  r != null && (I = $(r, "bias", "fused matMul"), [I] = fe(I, i), ie(V, I.shape));
+  let se;
+  a != null && (se = $(a, "prelu weights", "fused matMul"));
+  const re = (D, F) => {
+    const [S, O, _, B] = F, k = ve(c(D, _.shape), _, o);
+    let L, N;
+    if (!t && !s ? (L = w(k, O, !1, !0), N = w(S, k, !0, !1)) : !t && s ? (L = w(k, O, !1, !1), N = w(k, S, !0, !1)) : t && !s ? (L = w(O, k, !1, !0), N = w(S, k, !1, !1)) : (L = w(O, k, !0, !0), N = w(k, S, !0, !0)), r != null) {
+      const De = je(B, k);
+      return [L, N, De];
+    } else
+      return [L, N];
+  }, oe = {
+    a: q,
+    b: J,
+    bias: I,
+    preluActivationWeights: se
+  }, ce = { transposeA: t, transposeB: s, activation: o, leakyreluAlpha: f };
+  return r == null ? ue((F, S, O) => {
+    const _ = (
+      // tslint:disable-next-line: no-unnecessary-type-assertion
+      M.runKernel(he, oe, ce)
+    );
+    return O([F, S, _]), { value: c(_, V), gradFunc: re };
+  })(q, J) : ue((F, S, O, _) => {
+    const B = (
+      // tslint:disable-next-line: no-unnecessary-type-assertion
+      M.runKernel(he, oe, ce)
+    );
+    return _([F, S, B, O]), { value: c(B, V), gradFunc: re };
+  })(q, J, I);
+}
+const pe = /* @__PURE__ */ g({ fusedMatMul_: nn });
+class $e extends Error {
+  constructor(n) {
+    super(n), Object.setPrototypeOf(this, $e.prototype);
+  }
+}
+class Ae extends Error {
+  constructor(n) {
+    super(n), Object.setPrototypeOf(this, Ae.prototype);
+  }
+}
+class l extends Error {
+  constructor(n) {
+    super(n), Object.setPrototypeOf(this, l.prototype);
+  }
+}
+class v extends Error {
+  constructor(n) {
+    super(n), Object.setPrototypeOf(this, v.prototype);
+  }
+}
+class z extends Error {
+  constructor(n) {
+    super(n), Object.setPrototypeOf(this, z.prototype);
+  }
+}
+function In(e, n) {
+  if (Array.isArray(e)) {
+    let t = [];
+    for (let s = 0; s < n; s++)
+      t = t.concat(e);
+    return t;
+  } else {
+    const t = new Array(n);
+    return t.fill(e), t;
+  }
+}
+function de(e, n) {
+  if (!e)
+    throw new z(n);
+}
+function Ln(e, n) {
+  let t = 0;
+  for (const s of e)
+    s === n && t++;
+  return t;
+}
+function Nn(e) {
+  return e.length === 1 ? e[0] : e;
+}
+function bn(e) {
+  return Array.isArray(e) ? e : [e];
+}
+function Cn(e) {
+  const t = e.replace(/(.)([A-Z][a-z0-9]+)/g, "$1_$2").replace(/([a-z])([A-Z])/g, "$1_$2").toLowerCase();
+  return t[0] !== "_" ? t : "private" + t;
+}
+function Fn(e) {
+  return e.length <= 1 || e.indexOf("_") === -1 ? e : e.replace(/[_]+(\w|$)/g, (n, t) => t.toUpperCase());
+}
+let p = {};
+function Pn(e) {
+  if (e == null)
+    return null;
+  const n = {};
+  return n.className = e.getClassName(), n.config = e.getConfig(), n;
+}
+function W(e) {
+  if (!(e == null || typeof e != "object"))
+    if (Array.isArray(e))
+      e.forEach((n) => W(n));
+    else {
+      const n = Object.keys(e);
+      for (const t of n) {
+        const s = e[t];
+        s != null && typeof s == "object" && (!Array.isArray(s) && s.type === "ndarray" && typeof s.value == "number" ? e[t] = s.value : W(s));
+      }
+    }
+}
+function vn(e, n = {}, t = {}, s = "object", r = !1) {
+  if (typeof e == "string") {
+    const o = e;
+    let a;
+    if (o in t)
+      a = t[o];
+    else if (o in p)
+      a = p[o];
+    else if (a = n[o], a == null)
+      throw new l(`Unknown ${s}: ${e}. This may be due to one of the following reasons:
+1. The ${s} is defined in Python, in which case it needs to be ported to TensorFlow.js or your JavaScript code.
+2. The custom ${s} is defined in JavaScript, but is not registered properly with tf.serialization.registerClass().`);
+    return a;
+  } else {
+    const o = e;
+    if (o.className == null || o.config == null)
+      throw new l(`${s}: Improper config format: ${JSON.stringify(o)}.
+'className' and 'config' must set.`);
+    const a = o.className;
+    let f, i;
+    if (a in t ? [f, i] = t[a] : a in p ? [f, i] = p.className : a in n && ([f, i] = n[a]), f == null)
+      throw new l(`Unknown ${s}: ${a}. This may be due to one of the following reasons:
+1. The ${s} is defined in Python, in which case it needs to be ported to TensorFlow.js or your JavaScript code.
+2. The custom ${s} is defined in JavaScript, but is not registered properly with tf.serialization.registerClass().`);
+    if (i != null) {
+      const u = {};
+      for (const h of Object.keys(p))
+        u[h] = p[h];
+      for (const h of Object.keys(t))
+        u[h] = t[h];
+      const m = o.config;
+      m.customObjects = u;
+      const d = Object.assign({}, p);
+      for (const h of Object.keys(t))
+        p[h] = t[h];
+      W(o.config);
+      const T = i(f, o.config, t, r);
+      return p = Object.assign({}, d), T;
+    } else {
+      const u = Object.assign({}, p);
+      for (const d of Object.keys(t))
+        p[d] = t[d];
+      const m = new f(o.config);
+      return p = Object.assign({}, u), m;
+    }
+  }
+}
+function tn(e, n) {
+  return e < n ? -1 : e > n ? 1 : 0;
+}
+function jn(e, n) {
+  return -1 * tn(e, n);
+}
+function Bn(e) {
+  if (e == null)
+    return e;
+  const n = [];
+  for (const t of e)
+    n.indexOf(t) === -1 && n.push(t);
+  return n;
+}
+function Mn(e) {
+  if (e == null)
+    throw new l(`Invalid value in obj: ${JSON.stringify(e)}`);
+  for (const n in e)
+    if (e.hasOwnProperty(n))
+      return !1;
+  return !0;
+}
+function G(e, n, t) {
+  if (t != null && e.indexOf(t) < 0)
+    throw new l(`${t} is not a valid ${n}.  Valid values are ${e} or null/undefined.`);
+}
+function Un(e, n, t = 0, s = 1 / 0) {
+  return de(t >= 0), de(s >= t), Array.isArray(e) && e.length >= t && e.length <= s && e.every((r) => typeof r === n);
+}
+function sn(e, n) {
+  Array.isArray(e) ? (y(e.length > 0, () => `${n} is unexpectedly an empty array.`), e.forEach((t, s) => sn(t, `element ${s + 1} of ${n}`))) : y(Number.isInteger(e) && e > 0, () => `Expected ${n} to be a positive integer, but got ${Se(e)}.`);
+}
+function Se(e) {
+  return e === null ? "null" : Array.isArray(e) ? "[" + e.map((n) => Se(n)).join(",") + "]" : typeof e == "string" ? `"${e}"` : `${e}`;
+}
+function xn(e, n, t) {
+  let s = t != null ? t() : le(), r;
+  return (...a) => {
+    const f = t != null ? t() : le();
+    return f - s < n || (s = f, r = e(...a)), r;
+  };
+}
+function Gn(e) {
+  return e === "relu" ? "relu" : e === "linear" ? "linear" : e === "elu" ? "elu" : null;
+}
+const rn = ["channelsFirst", "channelsLast"], on = ["nearest", "bilinear"], cn = ["valid", "same", "causal"], un = ["max", "avg"], Vn = ["sum", "mul", "concat", "ave"];
+const b = /* @__PURE__ */ new Map();
+function an(e) {
+  G(rn, "DataFormat", e);
+}
+function qn(e) {
+  G(on, "InterpolationFormat", e);
+}
+function Jn(e) {
+  G(cn, "PaddingMode", e);
+}
+function Kn(e) {
+  G(un, "PoolMode", e);
+}
+const P = [], me = "/";
+function Rn(e, n) {
+  P.push(e);
+  try {
+    const t = n();
+    return P.pop(), t;
+  } catch (t) {
+    throw P.pop(), t;
+  }
+}
+function ln() {
+  return P.length === 0 ? "" : P.join(me) + me;
+}
+function Zn(e) {
+  if (!Oe(e))
+    throw new Error("Not a valid tensor name: '" + e + "'");
+  return ln() + e;
+}
+function Wn(e) {
+  if (!Oe(e))
+    throw new Error("Not a valid tensor name: '" + e + "'");
+  b.has(e) || b.set(e, 0);
+  const n = b.get(e);
+  if (b.set(e, b.get(e) + 1), n > 0) {
+    const t = `${e}_${n}`;
+    return b.set(t, 1), t;
+  } else
+    return e;
+}
+const fn = new RegExp(/^[A-Za-z0-9][-A-Za-z0-9\._\/]*$/);
+function Oe(e) {
+  return !!e.match(fn);
+}
+function Yn(e) {
+  return e === parseInt(e.toString(), 10);
+}
+function ye(e, n, t) {
+  n == null && (n = 0), t == null && (t = e.length);
+  let s = 1;
+  for (let r = n; r < t; ++r)
+    s *= e[r];
+  return s;
+}
+function Hn(e) {
+  if (e.length === 0)
+    return Number.NaN;
+  let n = Number.POSITIVE_INFINITY;
+  for (let t = 0; t < e.length; t++) {
+    const s = e[t];
+    s < n && (n = s);
+  }
+  return n;
+}
+function Qn(e) {
+  if (e.length === 0)
+    return Number.NaN;
+  let n = Number.NEGATIVE_INFINITY;
+  for (let t = 0; t < e.length; t++) {
+    const s = e[t];
+    s > n && (n = s);
+  }
+  return n;
+}
+function Xn(e, n) {
+  if (n < e)
+    throw new l(`end (${n}) < begin (${e}) is forbidden.`);
+  const t = [];
+  for (let s = e; s < n; ++s)
+    t.push(s);
+  return t;
+}
+let K;
+function zn() {
+  return K == null && (K = Te().epsilon()), K;
+}
+function Y() {
+  return "channelsLast";
+}
+function et(e, n) {
+  return ge(e, n);
+}
+function hn(e, n = -1) {
+  const t = e.shape.slice();
+  return n < 0 && (n = t.length + n + 1), t.splice(n, 0, 1), c(e, t);
+}
+function nt(e, n) {
+  return A(() => {
+    if (e.shape.length !== 2)
+      throw new l(`repeat() expects a rank-2 tensor, but received a rank-${e.shape.length} tensor.`);
+    const t = hn(e, 1);
+    return pn(t, [1, n, 1]);
+  });
+}
+function tt(e) {
+  const n = [ye(e.shape)];
+  return c(e, n);
+}
+function st(e) {
+  if (e.rank <= 1)
+    throw new l(`batchFlatten requires a minimum rank of 2. Got rank: ${e.rank}.`);
+  const n = [e.shape[0], ye(e.shape, 1)];
+  return c(e, n);
+}
+function R(e, n, t) {
+  return A(() => {
+    switch (e.rank) {
+      case 1:
+        return Q(e, n, t);
+      case 2:
+        return we(e, [n, 0], [t, e.shape[1]]);
+      case 3:
+        return X(e, [n, 0, 0], [t, e.shape[1], e.shape[2]]);
+      case 4:
+        return U(e, [n, 0, 0, 0], [t, e.shape[1], e.shape[2], e.shape[3]]);
+      case 5:
+        return C(e, [n, 0, 0, 0, 0], [
+          t,
+          e.shape[1],
+          e.shape[2],
+          e.shape[3],
+          e.shape[4]
+        ]);
+      case 6:
+        return C(e, [n, 0, 0, 0, 0, 0], [
+          t,
+          e.shape[1],
+          e.shape[2],
+          e.shape[3],
+          e.shape[4],
+          e.shape[5]
+        ]);
+      default:
+        throw new l(`sliceAlongFirstAxis() received an unsupported tensor rank: ${e.rank}`);
+    }
+  });
+}
+function Z(e, n, t) {
+  return A(() => {
+    switch (e.rank) {
+      case 1:
+        return Q(e, n, t);
+      case 2:
+        return we(e, [0, n], [e.shape[0], t]);
+      case 3:
+        return X(e, [0, 0, n], [e.shape[0], e.shape[1], t]);
+      case 4:
+        return U(e, [0, 0, 0, n], [e.shape[0], e.shape[1], e.shape[2], t]);
+      default:
+        throw new l(`sliceAlongLastAxis() received an unsupported tensor rank: ${e.rank}`);
+    }
+  });
+}
+function rt(e, n, t, s) {
+  return A(() => {
+    switch (e.rank) {
+      case 1:
+        return Q(e, n, t);
+      case 2:
+        switch (s) {
+          case 1:
+            return R(e, n, t);
+          case 2:
+            return Z(e, n, t);
+          default:
+            throw new l(`The axis is not within the rank of the tensor ${s}`);
+        }
+      case 3:
+        switch (s) {
+          case 1:
+            return R(e, n, t);
+          case 2:
+            return X(e, [0, n, 0], [e.shape[0], t, e.shape[2]]);
+          case 3:
+            return Z(e, n, t);
+          default:
+            throw new l(`The axis is not within the rank of the tensor ${s}`);
+        }
+      case 4:
+        switch (s) {
+          case 1:
+            return R(e, n, t);
+          case 2:
+            return U(e, [0, n, 0, 0], [e.shape[0], t, e.shape[2], e.shape[3]]);
+          case 3:
+            return U(e, [0, 0, n, 0], [e.shape[0], e.shape[1], t, e.shape[3]]);
+          case 4:
+            return Z(e, n, t);
+          default:
+            throw new l(`The axis is not within the rank of the tensor ${s}`);
+        }
+      default:
+        throw new l(`sliceAlongLastAxis() received an unsupported tensor rank: ${e.rank}`);
+    }
+  });
+}
+function ot(e, n = -1) {
+  let t;
+  return n < 0 && (t = e[0].rank, t !== 0 ? n = t : n = 0), n === e[0].rank && (n = -1), j(e, n);
+}
+function ct(e, n) {
+  switch (e.rank) {
+    case 1:
+      return Je([e, n]);
+    case 2:
+      return Re([e, n], 0);
+    case 3:
+      return We([e, n], 0);
+    case 4:
+      return He([e, n], 0);
+    default:
+      throw new l(`concatAlongFirstAxis() received an unsupported tensor rank: ${e.rank}`);
+  }
+}
+function pn(e, n) {
+  if (Array.isArray(n) || (n = [n]), e.rank !== n.length)
+    throw new l(`The length of input n (${n.length}) does not match the number of dimensions in input x (${e.rank})`);
+  return Ue(e, n);
+}
+function it(e, n = 0, t = 1, s, r) {
+  return Ne(e, n, t, s, r);
+}
+function ut(e, n, t, s) {
+  if (e.rank < 2 || n.rank < 2)
+    throw new v(`dot requires both inputs to be rank >= 2 but got x shape = ${e.shape} and y shape = ${n.shape}`);
+  if (n.rank >= 3) {
+    const r = e.shape.slice(-1)[0], o = n.shape.slice(-2)[0];
+    if (r !== o)
+      throw new v(`If rank y >= 3, then the second last dim of y must equal the last dim of x but got x shape = ${e.shape} and  y shape = ${n.shape}`);
+  }
+  if (e.rank === 2 && n.rank === 2)
+    return pe({
+      a: e,
+      b: n,
+      transposeA: !1,
+      transposeB: !1,
+      bias: s ? H(e.rank, s, Y()) : null,
+      activation: t
+    });
+  {
+    const r = e.shape.slice(), o = r.pop();
+    e = c(e, [-1, o]);
+    const a = n.shape.slice(), f = a.pop(), i = a.pop(), u = [...a, f], m = Array.from({ length: n.rank }, (ee, E) => E === 0 ? n.rank - 2 : E <= n.rank - 2 ? E - 1 : E);
+    n = c(xe(n, m), [i, -1]);
+    const d = [...r, ...u];
+    return c(pe({
+      a: e,
+      b: n,
+      transposeA: !1,
+      transposeB: !1,
+      bias: s ? H(e.rank, s, Y()) : null,
+      activation: t
+    }), d);
+  }
+}
+function at(e, n, t) {
+  return A(() => (Array.isArray(n) ? n = Le(n, "int32") : n = ge(n, "int32"), Ce(e, n, t)));
+}
+function lt(e) {
+  return ke(e, e);
+}
+function H(e, n, t) {
+  const s = n.shape;
+  if (n.rank !== 1 && n.rank !== e)
+    throw new l(`Unexpected bias dimensions: ${n.rank}; expected it to be 1 or ${e}`);
+  if (e === 5) {
+    if (t === "channelsFirst")
+      return s.length === 1 ? c(n, [1, s[0], 1, 1, 1]) : c(n, [1, s[3], s[0], s[1], s[2]]);
+    if (t === "channelsLast")
+      return s.length === 1 ? c(n, [1, 1, 1, 1, s[0]]) : c(n, [1].concat(s));
+  } else if (e === 4) {
+    if (t === "channelsFirst")
+      return s.length === 1 ? c(n, [1, s[0], 1, 1]) : c(n, [1, s[2], s[0], s[1]]);
+    if (t === "channelsLast")
+      return s.length === 1 ? c(n, [1, 1, 1, s[0]]) : c(n, [1].concat(s));
+  } else if (e === 3) {
+    if (t === "channelsFirst")
+      return s.length === 1 ? c(n, [1, s[0], 1]) : c(n, [1, s[1], s[0]]);
+    if (t === "channelsLast")
+      return s.length === 1 ? c(n, [1, 1, s[0]]) : c(n, [1].concat(s));
+  } else if (e < 3)
+    return n;
+  throw new l(`Unsupported input rank by biasAdd: ${n.rank}`);
+}
+function ft(e, n, t) {
+  return A(() => (t == null && (t = Y()), an(t), x(e, H(e.rank, n, t))));
+}
+function ht(e, n = 1) {
+  if (n !== 1)
+    throw new v(`Support for alpha values other than 1 (${n}) is not implemented yet.`);
+  return Be(e);
+}
+function pt(e) {
+  return A(() => Ee(e, x(Ie(e), 1)));
+}
+function dt(e, n, t, s) {
+  return A(() => be(e, n, t, s));
+}
+function mt(e) {
+  return A(() => {
+    const n = x(0.5, ke(0.2, e));
+    return Ve(n, 0, 1);
+  });
+}
+function gt(e, n, t = !1) {
+  return t ? e() : n();
+}
+export {
+  st as $,
+  $e as A,
+  hn as B,
+  at as C,
+  Xn as D,
+  Ln as E,
+  ht as F,
+  mt as G,
+  pt as H,
+  Qn as I,
+  Yn as J,
+  Un as K,
+  ft as L,
+  rt as M,
+  v as N,
+  qn as O,
+  sn as P,
+  Gn as Q,
+  Ae as R,
+  Jn as S,
+  Y as T,
+  pn as U,
+  l as V,
+  Hn as W,
+  ct as X,
+  gt as Y,
+  dt as Z,
+  ot as _,
+  an as a,
+  nt as a0,
+  Kn as a1,
+  Vn as a2,
+  ye as b,
+  G as c,
+  ut as d,
+  vn as e,
+  Wn as f,
+  Zn as g,
+  Nn as h,
+  bn as i,
+  Ve as j,
+  zn as k,
+  xn as l,
+  tt as m,
+  Rn as n,
+  lt as o,
+  de as p,
+  et as q,
+  it as r,
+  Pn as s,
+  Cn as t,
+  Fn as u,
+  Bn as v,
+  jn as w,
+  In as x,
+  Mn as y,
+  R as z
+};

package/dist/tile-BzyEiF-F.js ADDED Viewed

@@ -0,0 +1,13 @@
+import { A as a, B as e, E as i } from "./index-ZyQhjEPo.js";
+import { T as m } from "./tensor_util-DV-FP5Q3.js";
+import { a as c } from "./tensor-DdQUJZlz.js";
+function l(n, t) {
+  const r = e(n, "x", "tile", "string_or_numeric");
+  c(r.rank === t.length, () => `Error in transpose: rank of input ${r.rank} must match length of reps ${t}.`);
+  const o = { x: r }, s = { reps: t };
+  return i.runKernel(m, o, s);
+}
+const x = /* @__PURE__ */ a({ tile_: l });
+export {
+  x as t
+};

package/dist/tokeniser/CharTokeniser.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { E as k } from "../index-Dwqa6Zy2.js";
+import { E as k } from "../index-DvYrXKkX.js";
 const u = ["<eos>", "<unk>"];
 class b extends k {
   vocabSize = 0;

package/dist/tokeniser/bpe.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import l from "../utilities/tokenParse.js";
-import { E as f } from "../index-Dwqa6Zy2.js";
+import { E as f } from "../index-DvYrXKkX.js";
 function u(o, e) {
   return `${o}-::-${e}`;
 }

package/dist/training/Adam.d.ts CHANGED Viewed

@@ -5,13 +5,14 @@ export declare class AdamOptimizer extends Optimizer {
     protected learningRate: number;
     protected beta1: number;
     protected beta2: number;
+    protected lossScaling: number;
     protected epsilon: number | null;
     /** @nocollapse */
     static get className(): string;
     private accBeta1;
     private accBeta2;
     private accumulatedMoments;
-    constructor(learningRate: number, beta1: number, beta2: number, epsilon?: number | null);
+    constructor(learningRate: number, beta1: number, beta2: number, lossScaling: number, epsilon?: number | null);
     applyGradients(variableGradients: NamedVariableMap | NamedTensor[]): void;
     dispose(): void;
     getWeights(): Promise<NamedTensor[]>;