npm - @genai-fi/nanogpt - Versions diffs - 0.17.4 → 0.18.0 - Mend

@genai-fi/nanogpt 0.17.4 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (258) hide show

package/dist/Generator.d.ts +2 -15
package/dist/Generator.js +45 -34
package/dist/{RealDiv-CGwv0liw.js → RealDiv-ioj6Z-ox.js} +9 -9
package/dist/{Reshape-BW__R4mZ.js → Reshape-BZC-ebeR.js} +7 -7
package/dist/{Reshape-CPBkTIH2.js → Reshape-pwprEaej.js} +1 -1
package/dist/TeachableLLM.d.ts +3 -8
package/dist/TeachableLLM.js +61 -44
package/dist/Trainer.d.ts +6 -4
package/dist/Trainer.js +107 -92
package/dist/{axis_util-GTVlo58H.js → axis_util-QWWgLjut.js} +1 -1
package/dist/backend.js +2 -2
package/dist/{backend_util-GaFarB78.js → backend_util-qwSFfxYx.js} +21 -21
package/dist/{backend_webgpu-BqASlsbV.js → backend_webgpu-DI2wXEC2.js} +8 -8
package/dist/{broadcast_to-eS93CCN_.js → broadcast_to-C_EJTVTZ.js} +2 -2
package/dist/checks/appendCache.js +2 -2
package/dist/checks/attentionMask.js +5 -5
package/dist/checks/gelu.js +2 -2
package/dist/checks/matMulGelu.js +2 -2
package/dist/checks/normRMS.js +6 -6
package/dist/checks/normRMSGrad.js +3 -3
package/dist/checks/packUnpack.js +6 -6
package/dist/checks/qkv.js +2 -2
package/dist/checks/rope.js +2 -2
package/dist/{clip_by_value-DDA7rrcT.js → clip_by_value-CLAD4h_I.js} +1 -1
package/dist/complex-3DpPEG9B.js +11 -0
package/dist/{concat-CAQpCret.js → concat-Dqk7Xk7h.js} +5 -5
package/dist/{concat_util-D18dJ4fD.js → concat_util-C1Mxe27t.js} +1 -1
package/dist/{dataset-CGGp1z9P.js → dataset-DlqAN81i.js} +3 -3
package/dist/{dropout_util--NxWuYg2.js → dropout_util-N0z8Os-K.js} +1 -1
package/dist/{expand_dims-Bkd1YD5x.js → expand_dims-D0rBtgT1.js} +4 -4
package/dist/{exports_initializers-CYzKLjN7.js → exports_initializers-DIOZQt_L.js} +1 -1
package/dist/{floor-BQtb-Azg.js → floor-CymuCmTO.js} +1 -1
package/dist/{gather-qIqEqaGn.js → gather-DEyjXNb1.js} +1 -1
package/dist/{gelu-B220X1Go.js → gelu-DpTCC3eB.js} +1 -1
package/dist/{gpgpu_math-BwvV12df.js → gpgpu_math-3bCb5ooU.js} +25 -25
package/dist/{index-CjOWnMXP.js → index-BQvB7LCC.js} +15 -15
package/dist/{index-CUXkjxiT.js → index-DSGwv2Yx.js} +33 -33
package/dist/inference/types.d.ts +16 -0
package/dist/inference/types.js +1 -0
package/dist/{kernel_funcs_utils-pq0CK9co.js → kernel_funcs_utils-DGqzNlHT.js} +6 -6
package/dist/layers/BaseLayer.js +4 -4
package/dist/layers/CausalSelfAttention.js +6 -6
package/dist/layers/LoRA.js +4 -4
package/dist/layers/MLP.js +4 -4
package/dist/layers/PositionEmbedding.js +5 -5
package/dist/layers/RMSNorm.js +3 -3
package/dist/layers/RoPECache.js +4 -4
package/dist/layers/TiedEmbedding.js +6 -6
package/dist/layers/TransformerBlock.js +1 -1
package/dist/layers/WeightStore.js +2 -2
package/dist/loader/load.d.ts +2 -8
package/dist/loader/loadTransformers.d.ts +2 -8
package/dist/loader/loadTransformers.js +13 -11
package/dist/loader/newZipLoad.d.ts +2 -8
package/dist/loader/newZipLoad.js +25 -10
package/dist/loader/oldZipLoad.js +13 -13
package/dist/loader/save.d.ts +9 -2
package/dist/loader/save.js +64 -55
package/dist/loader/types.d.ts +29 -1
package/dist/main.d.ts +2 -0
package/dist/main.js +45 -43
package/dist/{matMul16-BcVC_E62.js → matMul16-BIT70Vya.js} +3 -3
package/dist/{matMulGelu-JNLZqKQp.js → matMulGelu-CsZnh18H.js} +18 -18
package/dist/mat_mul-DP86qZtZ.js +11 -0
package/dist/mod-BXjLYwvM.js +11 -0
package/dist/models/NanoGPTV1.js +2 -2
package/dist/models/NanoGPTV2.js +2 -2
package/dist/models/model.d.ts +3 -2
package/dist/models/model.js +13 -13
package/dist/{not_equal-hurPF26l.js → not_equal-CkQKkKZy.js} +15 -15
package/dist/{ones-BytntneX.js → ones-DbVB5N58.js} +3 -3
package/dist/ops/adamAdjust.js +3 -3
package/dist/ops/adamMoments.js +3 -3
package/dist/ops/add16.js +1 -1
package/dist/ops/appendCache.js +6 -6
package/dist/ops/attentionMask.js +3 -3
package/dist/ops/concat16.js +3 -3
package/dist/ops/cpu/adamAdjust.js +9 -9
package/dist/ops/cpu/adamMoments.js +5 -5
package/dist/ops/cpu/appendCache.js +2 -2
package/dist/ops/cpu/attentionMask.js +6 -6
package/dist/ops/cpu/fusedSoftmax.js +4 -4
package/dist/ops/cpu/gatherSub.js +5 -5
package/dist/ops/cpu/gelu.js +4 -4
package/dist/ops/cpu/matMul16.js +2 -2
package/dist/ops/cpu/matMulGelu.js +7 -7
package/dist/ops/cpu/matMulMul.js +2 -2
package/dist/ops/cpu/mulDropout.js +5 -5
package/dist/ops/cpu/normRMS.js +1 -1
package/dist/ops/cpu/qkv.js +3 -3
package/dist/ops/cpu/rope.js +5 -5
package/dist/ops/cpu/scatterSub.js +5 -5
package/dist/ops/dot16.js +2 -2
package/dist/ops/dropout.js +6 -6
package/dist/ops/dropout16.js +1 -1
package/dist/ops/gatherSub.js +1 -1
package/dist/ops/gelu.js +2 -2
package/dist/ops/globalNorm.js +7 -7
package/dist/ops/grads/add16.js +1 -1
package/dist/ops/grads/attentionMask.js +2 -2
package/dist/ops/grads/dropout16.js +1 -1
package/dist/ops/grads/gelu.js +2 -2
package/dist/ops/grads/matMul16.js +3 -3
package/dist/ops/grads/matMulGelu.js +1 -1
package/dist/ops/grads/mul16.js +1 -1
package/dist/ops/grads/normRMS.js +7 -7
package/dist/ops/grads/pack16.js +3 -3
package/dist/ops/grads/qkv.js +11 -11
package/dist/ops/grads/rope.js +2 -2
package/dist/ops/grads/softmax16.js +1 -1
package/dist/ops/grads/unpack16.js +2 -2
package/dist/ops/matMul16.js +3 -3
package/dist/ops/matMulGelu.js +6 -6
package/dist/ops/matMulMul.js +3 -3
package/dist/ops/mul16.js +1 -1
package/dist/ops/mulDrop.js +3 -3
package/dist/ops/normRMS.js +4 -4
package/dist/ops/pack16.js +2 -2
package/dist/ops/qkv.js +3 -3
package/dist/ops/reshape16.js +6 -6
package/dist/ops/rope.js +2 -2
package/dist/ops/scatterSub.js +1 -1
package/dist/ops/slice16.js +2 -2
package/dist/ops/softmax16.js +1 -1
package/dist/ops/sub16.js +1 -1
package/dist/ops/sum16.js +6 -6
package/dist/ops/transpose16.js +3 -3
package/dist/ops/unpack16.js +2 -2
package/dist/ops/webgl/adamAdjust.js +2 -2
package/dist/ops/webgl/adamMoments.js +1 -1
package/dist/ops/webgl/appendCache.js +1 -1
package/dist/ops/webgl/attentionMask.js +1 -1
package/dist/ops/webgl/dropout16.js +1 -1
package/dist/ops/webgl/fusedSoftmax.js +7 -7
package/dist/ops/webgl/gatherSub.js +3 -3
package/dist/ops/webgl/gelu.js +2 -2
package/dist/ops/webgl/log.js +3 -3
package/dist/ops/webgl/matMul16.js +13 -13
package/dist/ops/webgl/matMulGelu.js +4 -4
package/dist/ops/webgl/matMulMul.js +2 -2
package/dist/ops/webgl/mulDropout.js +1 -1
package/dist/ops/webgl/normRMS.js +2 -2
package/dist/ops/webgl/qkv.js +1 -1
package/dist/ops/webgl/rope.js +1 -1
package/dist/ops/webgl/scatterSub.js +2 -2
package/dist/ops/webgpu/adamAdjust.js +3 -3
package/dist/ops/webgpu/adamMoments.js +3 -3
package/dist/ops/webgpu/add16.js +6 -6
package/dist/ops/webgpu/appendCache.js +3 -3
package/dist/ops/webgpu/attentionMask.js +2 -2
package/dist/ops/webgpu/attentionMask32_program.js +2 -2
package/dist/ops/webgpu/clipScale.js +7 -7
package/dist/ops/webgpu/concat16.js +5 -5
package/dist/ops/webgpu/dropout16.js +6 -6
package/dist/ops/webgpu/gatherSub.js +3 -3
package/dist/ops/webgpu/gelu.js +8 -8
package/dist/ops/webgpu/matMul16.js +16 -16
package/dist/ops/webgpu/matMul16_program.js +2 -2
package/dist/ops/webgpu/mul16.js +5 -5
package/dist/ops/webgpu/norm2.js +1 -1
package/dist/ops/webgpu/normRMS.js +2 -2
package/dist/ops/webgpu/normRMSGrad.js +4 -4
package/dist/ops/webgpu/pack16.js +4 -4
package/dist/ops/webgpu/pack16_program.js +2 -2
package/dist/ops/webgpu/qkv.js +2 -2
package/dist/ops/webgpu/rope.js +3 -3
package/dist/ops/webgpu/scatterSub.js +3 -3
package/dist/ops/webgpu/slice16.js +4 -4
package/dist/ops/webgpu/softmax16.js +4 -4
package/dist/ops/webgpu/softmax16_program.js +2 -2
package/dist/ops/webgpu/softmax16_subgroup_program.js +2 -2
package/dist/ops/webgpu/softmax16grad.js +4 -4
package/dist/ops/webgpu/sub16.js +6 -6
package/dist/ops/webgpu/sum16.js +3 -3
package/dist/ops/webgpu/transpose16.js +8 -8
package/dist/ops/webgpu/transpose16_program.js +2 -2
package/dist/ops/webgpu/transpose16_shared_program.js +3 -3
package/dist/ops/webgpu/unpack16.js +3 -3
package/dist/ops/webgpu/utils/binary_op.js +3 -3
package/dist/ops/webgpu/utils/reductions.js +5 -5
package/dist/{ops-CsXeTq1P.js → ops-CURIZSVt.js} +100 -100
package/dist/{pack16-bqltoUlR.js → pack16-WlOSOuZA.js} +2 -2
package/dist/patches/webgpu_backend.js +6 -6
package/dist/patches/webgpu_base.js +1 -1
package/dist/patches/webgpu_program.js +2 -2
package/dist/{random_normal-IBRrha8a.js → random_normal-CIm8lk2-.js} +1 -1
package/dist/{random_width-DN5ZtQkM.js → random_width-B_fVXhGx.js} +131 -131
package/dist/{range-C-CjF-LI.js → range-BDxO73mk.js} +1 -1
package/dist/{readers-iz5u3HBo.js → readers-17HLdxVM.js} +2 -2
package/dist/relu-DTvZKBsZ.js +9 -0
package/dist/{reshape-BDOuCSNW.js → reshape-BIN71H3p.js} +1 -1
package/dist/{resize_nearest_neighbor-BojqlfRe.js → resize_nearest_neighbor-C6_0dAnK.js} +41 -41
package/dist/{rope-0j_f1TPm.js → rope-CC5RjmKU.js} +4 -4
package/dist/{scatter_nd_util-ByNJaL6I.js → scatter_nd_util-C-x73Cj6.js} +1 -1
package/dist/{segment_util-Dasb2Zaf.js → segment_util-4zuHV5IG.js} +2 -2
package/dist/{selu_util-BLhIqRkw.js → selu_util-BXdhy_W6.js} +5 -5
package/dist/{shared-CagdqkLh.js → shared-DRWDyk9w.js} +6 -6
package/dist/{shared-3agzAqQ_.js → shared-zTaJ5siv.js} +1 -1
package/dist/slice-BvItlgXu.js +12 -0
package/dist/{slice_util-CC35pLmT.js → slice_util-DPY56GzQ.js} +5 -5
package/dist/{softmax-D4q1LJN7.js → softmax-BLGJqdwx.js} +1 -1
package/dist/split-BN9LkEgS.js +9 -0
package/dist/{squeeze-ho4wLUek.js → squeeze-O_YWJpw_.js} +2 -2
package/dist/{stack-DudVrtmG.js → stack-z6QE7kmP.js} +1 -1
package/dist/{step-BTxPtq1r.js → step-DQY6_ABw.js} +4 -4
package/dist/{sum-BpiwSWvg.js → sum-D39FeU5h.js} +3 -3
package/dist/{tensor-BWFldCso.js → tensor-D8e0Gd7c.js} +1 -1
package/dist/{tensor1d-LMGMIUlr.js → tensor1d-BMl0eZYV.js} +1 -1
package/dist/{tensor2d-BnXMKScO.js → tensor2d-DTtQ1QcT.js} +1 -1
package/dist/{tensor4d-C6UCG_u8.js → tensor4d-Dj4rDssL.js} +1 -1
package/dist/{tfjs_backend-BGnG-ppu.js → tfjs_backend-Bk3PmK91.js} +65 -65
package/dist/{tile-CFy-xTO6.js → tile-CsWlVKKz.js} +1 -1
package/dist/tokeniser/BaseTokeniser.d.ts +4 -1
package/dist/tokeniser/BaseTokeniser.js +21 -5
package/dist/tokeniser/CharTokeniser.d.ts +1 -1
package/dist/tokeniser/CharTokeniser.js +62 -50
package/dist/tokeniser/bpe.d.ts +1 -1
package/dist/tokeniser/bpe.js +41 -35
package/dist/tokeniser/type.d.ts +3 -1
package/dist/training/AdamW.d.ts +3 -0
package/dist/training/AdamW.js +59 -30
package/dist/training/BasicTrainer.d.ts +1 -0
package/dist/training/BasicTrainer.js +112 -92
package/dist/training/DatasetBuilder.js +3 -3
package/dist/training/Evaluator.js +2 -2
package/dist/training/LRScheduler.d.ts +1 -0
package/dist/training/LRScheduler.js +18 -12
package/dist/training/PreTrainer.js +3 -3
package/dist/training/SFTDatasetBuilder.js +3 -3
package/dist/training/SFTTrainer.js +1 -1
package/dist/training/orthoGrad.js +1 -1
package/dist/training/sparseCrossEntropy.js +30 -30
package/dist/training/types.d.ts +5 -3
package/dist/training/validation.js +13 -13
package/dist/{transpose-9kRxIXWR.js → transpose-Qxz-4os3.js} +7 -7
package/dist/{unsorted_segment_sum-DJvk5xnh.js → unsorted_segment_sum-BfFVV9Zm.js} +20 -20
package/dist/utilities/datasetID.d.ts +2 -0
package/dist/utilities/datasetID.js +21 -0
package/dist/utilities/dummy.js +6 -6
package/dist/utilities/multinomialCPU.js +2 -2
package/dist/utilities/packed.js +1 -1
package/dist/utilities/performance.js +1 -1
package/dist/utilities/profile.js +1 -1
package/dist/utilities/safetensors.js +2 -2
package/dist/utilities/sentences.js +5 -5
package/dist/utilities/weights.js +2 -2
package/dist/{variable-Ck482e3n.js → variable-SSATClyt.js} +1 -1
package/dist/{webgpu_program-B4HmApL1.js → webgpu_program-CbjdYLYk.js} +1 -1
package/dist/{webgpu_util-DYlGSwOJ.js → webgpu_util-DuofJBMo.js} +7 -7
package/dist/{zeros-DvZpK8s6.js → zeros-Bw0puq_w.js} +2 -2
package/dist/{zeros_like-CWjDdwr-.js → zeros_like-rOHr54NY.js} +69 -69
package/package.json +3 -3
package/dist/complex-DI35Q-gW.js +0 -11
package/dist/mat_mul-DhG0Newp.js +0 -11
package/dist/mod-CSdCpRjf.js +0 -11
package/dist/relu-J_X6MUzx.js +0 -9
package/dist/slice-BzS11Qh0.js +0 -12
package/dist/split-C2Sj255c.js +0 -9

package/dist/Trainer.js CHANGED Viewed

@@ -1,35 +1,36 @@
-import { E as f } from "./index-DvYrXKkX.js";
-import l from "./training/PreTrainer.js";
-import { createTrainValidationSplit as p } from "./training/validation.js";
-import g from "./training/SFTTrainer.js";
-import m from "./training/tasks/splitter.js";
-const n = [];
-for (let a = 0; a < 256; ++a)
-  n.push((a + 256).toString(16).slice(1));
-function u(a, t = 0) {
-  return (n[a[t + 0]] + n[a[t + 1]] + n[a[t + 2]] + n[a[t + 3]] + "-" + n[a[t + 4]] + n[a[t + 5]] + "-" + n[a[t + 6]] + n[a[t + 7]] + "-" + n[a[t + 8]] + n[a[t + 9]] + "-" + n[a[t + 10]] + n[a[t + 11]] + n[a[t + 12]] + n[a[t + 13]] + n[a[t + 14]] + n[a[t + 15]]).toLowerCase();
+import { E as m } from "./index-DvYrXKkX.js";
+import g from "./training/PreTrainer.js";
+import { createTrainValidationSplit as u } from "./training/validation.js";
+import c from "./training/SFTTrainer.js";
+import p from "./training/tasks/splitter.js";
+const r = [];
+for (let n = 0; n < 256; ++n)
+  r.push((n + 256).toString(16).slice(1));
+function w(n, t = 0) {
+  return (r[n[t + 0]] + r[n[t + 1]] + r[n[t + 2]] + r[n[t + 3]] + "-" + r[n[t + 4]] + r[n[t + 5]] + "-" + r[n[t + 6]] + r[n[t + 7]] + "-" + r[n[t + 8]] + r[n[t + 9]] + "-" + r[n[t + 10]] + r[n[t + 11]] + r[n[t + 12]] + r[n[t + 13]] + r[n[t + 14]] + r[n[t + 15]]).toLowerCase();
 }
-const w = new Uint8Array(16);
-function S() {
-  return crypto.getRandomValues(w);
+const T = new Uint8Array(16);
+function D() {
+  return crypto.getRandomValues(T);
 }
-function c(a, t, i) {
-  return crypto.randomUUID ? crypto.randomUUID() : D(a);
+function d(n, t, a) {
+  return crypto.randomUUID ? crypto.randomUUID() : k(n);
 }
-function D(a, t, i) {
-  a = a || {};
-  const e = a.random ?? a.rng?.() ?? S();
-  if (e.length < 16)
+function k(n, t, a) {
+  n = n || {};
+  const i = n.random ?? n.rng?.() ?? D();
+  if (i.length < 16)
     throw new Error("Random bytes length must be >= 16");
-  return e[6] = e[6] & 15 | 64, e[8] = e[8] & 63 | 128, u(e);
+  return i[6] = i[6] & 15 | 64, i[8] = i[8] & 63 | 128, w(i);
 }
-class d extends f {
+class f extends m {
   trainer;
   trainingType = "pretraining";
   hasTrained = !1;
   trainDataset;
   validationDataset;
-  totalSamples = 0;
+  totalTokens = 0;
+  tokensProcessed = 0;
   log = [];
   progress = null;
   options = {
@@ -38,21 +39,21 @@ class d extends f {
     logInterval: 10
   };
   tokenizer;
-  constructor(t, i, e = "pretraining", r) {
-    if (super(), t instanceof d) {
-      const s = i || t.options, h = t.options;
-      let o = !1;
-      t.trainingType === "sft" && s.sftMode !== h.sftMode && (o = !0), e !== t.trainingType && (o = !0), o ? (t.trainingType === "sft" ? this.trainer = new g(t.model, t.tokenizer, s) : this.trainer = new l(t.model, t.tokenizer, s), this.trainingType = e, this.options = s, this.tokenizer = t.tokenizer) : (this.trainer = t.trainer, this.trainingType = e, this.options = s, this.trainer.updateOptimizer(this.options), this.log = t.log, this.progress = t.progress, this.totalSamples = t.totalSamples, this.tokenizer = t.tokenizer, s.batchSize === h.batchSize && (this.trainDataset = t.trainDataset, this.validationDataset = t.validationDataset));
+  constructor(t, a, i = "pretraining", e, s) {
+    if (super(), t instanceof f) {
+      const o = a || t.options, h = t.options;
+      let l = !1;
+      t.trainingType === "sft" && o.sftMode !== h.sftMode && (l = !0), i !== t.trainingType && (l = !0), l ? (t.trainingType === "sft" ? this.trainer = new c(t.model, t.tokenizer, o) : this.trainer = new g(t.model, t.tokenizer, o), this.trainingType = i, this.options = o, this.tokenizer = t.tokenizer) : (this.trainer = t.trainer, this.trainingType = i, this.options = o, this.trainer.updateOptimizer(this.options), this.log = t.log, this.progress = t.progress, this.totalTokens = t.totalTokens, this.tokenizer = t.tokenizer, o.batchSize === h.batchSize && (this.trainDataset = t.trainDataset, this.validationDataset = t.validationDataset));
       return;
     }
-    if (!i)
+    if (!a)
       throw new Error("Tokeniser must be provided when initializing Trainer with a model");
     if (!t)
       throw new Error("Model must be provided when initializing Trainer");
-    this.options = r || {
+    this.options = e || {
       batchSize: 32,
       sftMode: "full"
-    }, e === "sft" ? this.trainer = new g(t, i, r) : this.trainer = new l(t, i, r), this.trainingType = e, this.tokenizer = i;
+    }, i === "sft" ? this.trainer = new c(t, a, e, s) : this.trainer = new g(t, a, e, s), this.trainingType = i, this.tokenizer = a;
   }
   get model() {
     return this.trainer.model;
@@ -69,61 +70,66 @@ class d extends f {
   dispose() {
     this.trainer.dispose(), this.removeAllListeners();
   }
-  getTotalSamples() {
-    return this.totalSamples;
+  getTotalTokens() {
+    return this.totalTokens;
   }
   setOptions(t) {
-    const i = new Set(
+    const a = new Set(
       Object.keys(t).filter(
-        (e) => t[e] !== this.options[e]
+        (i) => t[i] !== this.options[i]
       )
     );
     if (this.trainer.isRunning) {
-      if (i.has("batchSize"))
+      if (a.has("batchSize"))
         throw new Error("Cannot change batch size during training");
-      if (i.has("sftMode"))
+      if (a.has("sftMode"))
         throw new Error("Cannot change SFT mode during training");
-      if (i.has("loraConfig"))
+      if (a.has("loraConfig"))
         throw new Error("Cannot change LoRA configuration during training");
-      if (i.has("validationSplit"))
+      if (a.has("validationSplit"))
         throw new Error("Cannot change validation split during training");
-      if (i.has("trainableWeights"))
+      if (a.has("trainableWeights"))
         throw new Error("Cannot change trainable weights during training");
-      if (i.has("mixedPrecision"))
+      if (a.has("mixedPrecision"))
         throw new Error("Cannot change mixed precision setting during training");
-      if (i.has("gradientCheckpointing"))
+      if (a.has("gradientCheckpointing"))
         throw new Error("Cannot change gradient checkpointing setting during training");
     }
     this.options = {
       ...this.options,
       ...t
-    }, this.trainer.updateOptimizer(this.options), i.has("metrics") && this.trainer.setMetrics(t.metrics || []);
+    }, this.trainer.updateOptimizer(this.options), a.has("metrics") && this.trainer.setMetrics(t.metrics || []);
   }
-  async prepare(t = []) {
+  async prepare(t = [], a) {
     const i = this.options;
-    if (this.trainingType === "pretraining" && this.trainer instanceof l) {
-      const { trainDataset: e, validationDataset: r, size: s, trainState: h } = await p(
+    if (a && (this.model.metaData.pretrainingData = a.map((e) => ({
+      id: e.id,
+      name: e.name
+    }))), this.trainingType === "pretraining" && this.trainer instanceof g) {
+      const { trainDataset: e, validationDataset: s, size: o } = await u(
         t,
         this.trainer.tokenizer,
         this.trainer.datasetBuilder,
         i?.batchSize || 32,
         i?.validationSplit || 0.1
-      ), o = s * (1 - (i?.validationSplit || 0));
-      this.trainDataset = e, this.validationDataset = r, this.totalSamples = o, this.options.epochSteps = Math.ceil(h.shuffledIndexes.length / (i?.batchSize || 32)), this.trainer.updateOptimizer(this.options);
-    } else if (this.trainingType === "sft" && this.trainer instanceof g) {
+      ), h = o * (1 - (i?.validationSplit || 0));
+      this.trainDataset = e, this.validationDataset = s, this.totalTokens = h, this.options.epochSteps = Math.ceil(
+        this.totalTokens / ((i?.batchSize || 32) * this.model.config.blockSize)
+      ), this.trainer.updateOptimizer(this.options);
+    } else if (this.trainingType === "sft" && this.trainer instanceof c) {
       if (t instanceof Uint16Array)
         throw new Error("SFT training requires Task[] input");
       if (i?.validationSplit && i.validationSplit > 0) {
-        const e = m(t, i?.validationSplit), r = await this.trainer.datasetBuilder.createSFTDataset(
+        const e = p(t, i?.validationSplit), s = await this.trainer.datasetBuilder.createSFTDataset(
           [e.training],
           i?.batchSize || 32,
           -100
-        ), s = await this.trainer.datasetBuilder.createSFTDataset(
+        ), o = await this.trainer.datasetBuilder.createSFTDataset(
           [e.validation],
           i?.batchSize || 32,
           -100
         );
-        this.validationDataset = s, this.trainDataset = r;
+        this.validationDataset = o, this.trainDataset = s;
       } else {
         const e = await this.trainer.datasetBuilder.createSFTDataset(
           t,
@@ -132,45 +138,47 @@ class d extends f {
         );
         this.trainDataset = e;
       }
-      this.totalSamples = t.reduce((e, r) => e + r.length, 0), this.options.epochSteps = Math.ceil(this.totalSamples / (i?.batchSize || 32)), this.trainer.updateOptimizer(this.options);
+      this.totalTokens = t.reduce((e, s) => e + s.length, 0), this.options.epochSteps = Math.ceil(
+        this.totalTokens / ((i?.batchSize || 32) * this.model.config.blockSize)
+      ), this.trainer.updateOptimizer(this.options);
     }
   }
   configureModel(t) {
-    const i = t?.sftMode || "full";
+    const a = t?.sftMode || "full";
     if (this.trainingType === "pretraining" && (this.trainer.model.hasLoRA() && this.trainer.model.detachLoRA(), this.trainer.model.weightStore.setTrainable(["*"])), this.trainingType === "sft") {
-      if (i === "lora") {
-        const e = this.trainer.model;
+      if (a === "lora") {
+        const i = this.trainer.model;
         if (t?.loraName)
-          if (e.hasLoRA(t.loraName)) {
-            if (e.attachLoRA(t.loraName), t.loraConfig) {
-              const r = e.lora;
-              (r.alpha !== t.loraConfig.alpha || r.rank !== t.loraConfig.rank) && (e.detachLoRA(), e.deleteLoRA(t.loraName), e.createLoRA(t.loraName, t.loraConfig), e.attachLoRA(t.loraName), console.warn("Resetting LoRA with new configuration."));
+          if (i.hasLoRA(t.loraName)) {
+            if (i.attachLoRA(t.loraName), t.loraConfig) {
+              const e = i.lora;
+              (e.alpha !== t.loraConfig.alpha || e.rank !== t.loraConfig.rank) && (i.detachLoRA(), i.deleteLoRA(t.loraName), i.createLoRA(t.loraName, t.loraConfig), i.attachLoRA(t.loraName), console.warn("Resetting LoRA with new configuration."));
             }
           } else if (t.loraConfig)
-            e.createLoRA(t.loraName, t.loraConfig), e.attachLoRA(t.loraName);
+            i.createLoRA(t.loraName, t.loraConfig), i.attachLoRA(t.loraName);
           else
             throw new Error(
               `LoRA configuration must be provided to create LoRA with name ${t.loraName}`
             );
         else if (t?.loraConfig)
-          if (e.hasLoRA()) {
-            const r = e.lora;
-            if (r.alpha !== t.loraConfig.alpha || r.rank !== t.loraConfig.rank) {
-              e.detachLoRA();
-              const s = t.loraName || c();
-              e.createLoRA(s, t.loraConfig), e.attachLoRA(s);
+          if (i.hasLoRA()) {
+            const e = i.lora;
+            if (e.alpha !== t.loraConfig.alpha || e.rank !== t.loraConfig.rank) {
+              i.detachLoRA();
+              const s = t.loraName || d();
+              i.createLoRA(s, t.loraConfig), i.attachLoRA(s);
             }
           } else {
-            const r = t.loraName || c();
-            e.createLoRA(r, t.loraConfig), e.attachLoRA(r);
+            const e = t.loraName || d();
+            i.createLoRA(e, t.loraConfig), i.attachLoRA(e);
           }
-        else if (!e.hasLoRA()) throw new Error("LoRA configuration must be provided for lora SFT mode");
+        else if (!i.hasLoRA()) throw new Error("LoRA configuration must be provided for lora SFT mode");
       } else
         this.trainer.model.hasLoRA() && this.trainer.model.detachLoRA();
-      i === "last-layer" ? this.trainer.model.weightStore.setTrainable([
+      a === "last-layer" ? this.trainer.model.weightStore.setTrainable([
         `block_${this.trainer.model.config.nLayer - 1}_*`,
         "token_embedding"
-      ]) : i === "full" && this.trainer.model.weightStore.setTrainable(["*"]);
+      ]) : a === "full" && this.trainer.model.weightStore.setTrainable(["*"]);
     }
     t?.trainableWeights && this.trainer.model.weightStore.setTrainable(t.trainableWeights);
   }
@@ -178,37 +186,44 @@ class d extends f {
     const t = this.options;
     if (!this.trainDataset)
       throw new Error("Dataset not prepared");
-    this.hasTrained || this.trainer.setLearningRate(t?.learningRate || 1e-3), this.hasTrained = !0, this.emit("start"), this.trainer.setGradientCheckpointing(t?.gradientCheckpointing || !1), this.trainer.setMixedPrecision(t?.mixedPrecision || !1), this.trainer.setLabelSmoothing(t?.labelSmoothing || 0), this.trainer.setDropout(t?.dropout || 0), this.trainer.setLayerDrop(t?.layerDrop || 0), this.configureModel(t), await this.trainer.trainOnDataset(
+    this.hasTrained || this.trainer.setLearningRate(t?.learningRate || 1e-3), this.hasTrained = !0, this.emit("start"), this.model.metaData.pretrainingSettings = t;
+    const a = Date.now();
+    this.log.length > 0 && this.trainer.resumeFromLog(this.log[this.log.length - 1]), this.trainer.setGradientCheckpointing(t?.gradientCheckpointing || !1), this.trainer.setMixedPrecision(t?.mixedPrecision || !1), this.trainer.setLabelSmoothing(t?.labelSmoothing || 0), this.trainer.setDropout(t?.dropout || 0), this.trainer.setLayerDrop(t?.layerDrop || 0), this.configureModel(t), await this.trainer.trainOnDataset(
       this.trainDataset,
       {
         ...t,
-        onStep: async (i) => {
-          this.log.push(i), this.progress = {
-            lastLog: i,
-            progress: i.totalSamples / this.totalSamples,
-            remaining: Math.max(
-              0,
-              (this.totalSamples - i.totalSamples) / i.totalSamples * i.duration
-            )
-          };
-          const e = this.listeners("log");
-          for (const r of e)
-            await r(i, this.progress);
+        onStep: async (e) => {
+          this.log.push(e), this.progress = {
+            lastLog: e,
+            progress: e.totalTokens / this.totalTokens,
+            remaining: Math.max(0, (this.totalTokens - e.totalTokens) / e.totalTokens * e.duration)
+          }, this.tokensProcessed = e.totalTokens;
+          const s = this.listeners("log");
+          for (const o of s)
+            await o(e, this.progress);
         }
       },
       this.validationDataset
-    ), this.emit("stop");
+    ), this.model.metaData.actionLog = this.model.metaData.actionLog || [];
+    const i = Date.now();
+    this.model.metaData.actionLog.push({
+      action: "pretrain",
+      timestamp: i,
+      duration: i - a,
+      tokensProcessed: this.tokensProcessed,
+      options: t
+    }), this.emit("stop");
   }
   async step(t) {
     if (!this.trainDataset)
       throw new Error("Dataset not prepared");
     this.hasTrained || this.trainer.setLearningRate(t?.learningRate || 1e-3), this.hasTrained = !0, this.emit("start");
-    const { log: i } = await this.trainer.stepDataset(this.trainDataset, t || {}, this.validationDataset), e = this.listeners("log");
-    for (const r of e)
-      await r(i, {
-        lastLog: i,
-        progress: i.totalSamples / this.totalSamples,
-        remaining: Math.max(0, (this.totalSamples - i.totalSamples) / i.totalSamples * i.duration)
+    const { log: a } = await this.trainer.stepDataset(this.trainDataset, t || {}, this.validationDataset), i = this.listeners("log");
+    for (const e of i)
+      await e(a, {
+        lastLog: a,
+        progress: a.totalTokens / this.totalTokens,
+        remaining: Math.max(0, (this.totalTokens - a.totalTokens) / a.totalTokens * a.duration)
       });
     this.emit("stop");
   }
@@ -223,5 +238,5 @@ class d extends f {
   }
 }
 export {
-  d as default
+  f as default
 };

package/dist/{axis_util-GTVlo58H.js → axis_util-QWWgLjut.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { x as c } from "./index-CUXkjxiT.js";
+import { v as c } from "./index-DSGwv2Yx.js";
 function i(e, n) {
   for (let t = 0; t < e.length; ++t)
     if (e[e.length - t - 1] !== n - 1 - t)

package/dist/backend.js CHANGED Viewed

@@ -1,9 +1,9 @@
-import { g as o, s as e, r as s } from "./index-CUXkjxiT.js";
+import { g as o, s as e, r as s } from "./index-DSGwv2Yx.js";
 async function c(t, a) {
   if (o() !== t) {
     if (t === "webgpu") {
       const { registerWebGPUBackend: i } = await import("./patches/webgpu_base.js");
-      i(a), await import("./index-CjOWnMXP.js"), await import("./ops/webgpu/index.js");
+      i(a), await import("./index-BQvB7LCC.js"), await import("./ops/webgpu/index.js");
     }
     await e(t), await s(), console.log(`Backend set to ${t}`);
   }

package/dist/{backend_util-GaFarB78.js → backend_util-qwSFfxYx.js} RENAMED Viewed

@@ -1,9 +1,9 @@
-import { U as d, a9 as A, a8 as O, x as g, av as _, az as w, ad as D, _ as x, $ as b, am as y, aY as M } from "./index-CUXkjxiT.js";
-import { d as T, f as L, h as W, c as v, e as F, a as N, b as C, g as P } from "./axis_util-GTVlo58H.js";
-import { a as z, c as U } from "./concat_util-D18dJ4fD.js";
-import { c as B, b as H, d as V, f as G, g as Z, h as j, i as q, j as J, k as K, m as X, t as Y } from "./step-BTxPtq1r.js";
-import { S as k, a as Q, b as ee, g as te, c as se, s as ne } from "./selu_util-BLhIqRkw.js";
-import { c as re, v as oe, a as ie } from "./scatter_nd_util-ByNJaL6I.js";
+import { N as d, a9 as A, a8 as O, v as g, av as _, az as w, ad as D, _ as x, $ as b, am as y, aY as M } from "./index-DSGwv2Yx.js";
+import { d as T, f as L, h as v, c as W, e as F, a as N, b as C, g as P } from "./axis_util-QWWgLjut.js";
+import { a as z, c as B } from "./concat_util-C1Mxe27t.js";
+import { c as U, b as H, d as V, f as G, g as Z, h as j, i as q, j as J, k as K, m as X, t as Y } from "./step-DQY6_ABw.js";
+import { S as k, a as Q, b as ee, g as te, c as se, s as ne } from "./selu_util-BXdhy_W6.js";
+import { c as re, v as oe, a as ie } from "./scatter_nd_util-C-x73Cj6.js";
 import { a as ae, c as ue, b as ce, e as pe, d as le, g as fe, m as he, s as ge } from "./complex_util-Yc1A_gV1.js";
 function de(e, t) {
   const r = e.shape.length, n = t.shape.length;
@@ -146,10 +146,10 @@ function De(e, t, r) {
   return n;
 }
 const xe = 0.3275911, be = 0.254829592, ye = -0.284496736, Me = 1.421413741, Te = -1.453152027, Le = 1.061405429;
-const I = "->", We = /->/g, E = ",", $ = "...";
-function ve(e, t) {
+const I = "->", ve = /->/g, E = ",", $ = "...";
+function We(e, t) {
   e = e.replace(/\s/g, "");
-  const r = (e.length - e.replace(We, "").length) / I.length;
+  const r = (e.length - e.replace(ve, "").length) / I.length;
   if (r < 1)
     throw new Error("Equations without an arrow are not supported.");
   if (r > 1)
@@ -226,7 +226,7 @@ function ze(e, t) {
     (e[n].length === 0 || e[n].indexOf(t) !== -1 || t === -1) && r.push(n);
   return r;
 }
-function Ue(e, t, r = 0) {
+function Be(e, t, r = 0) {
   let n = [];
   if (typeof t == "number")
     g(e.shape[r] % t === 0, () => "Number of splits must evenly divide the axis."), n = new Array(t).fill(e.shape[r] / t);
@@ -242,7 +242,7 @@ function Ue(e, t, r = 0) {
   }
   return n;
 }
-function Be(e) {
+function Ue(e) {
   return `Received SparseTensor with denseShape[0] = 0 but
   indices.shape[0] = ${e}`;
 }
@@ -314,8 +314,8 @@ const ut = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
   axesAreInnerMostDims: L,
   calculateShapes: re,
   checkEinsumDimSizes: Ne,
-  checkPadOnDimRoundingMode: B,
-  combineLocations: W,
+  checkPadOnDimRoundingMode: U,
+  combineLocations: v,
   combineRaggedTensorToTensorShapes: me,
   complexWithEvenIndex: ue,
   complexWithOddIndex: ce,
@@ -324,12 +324,12 @@ const ut = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
   computeDefaultPad: G,
   computeDilation2DInfo: Z,
   computeOptimalWindowSize: Se,
-  computeOutAndReduceShapes: v,
-  computeOutShape: U,
+  computeOutAndReduceShapes: W,
+  computeOutShape: B,
   computePool2DInfo: j,
   computePool3DInfo: q,
   convertConv2DDataFormat: J,
-  decodeEinsumEquation: ve,
+  decodeEinsumEquation: We,
   eitherStridesOrDilationsAreOne: K,
   expandShapeToKeepDim: F,
   exponent: pe,
@@ -353,7 +353,7 @@ const ut = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
   getRowPartitionTypesHelper: Ie,
   getSliceBeginCoords: we,
   getSliceSize: De,
-  getSparseFillEmptyRowsIndicesDenseShapeMismatch: Be,
+  getSparseFillEmptyRowsIndicesDenseShapeMismatch: Ue,
   getSparseFillEmptyRowsNegativeIndexErrorMessage: He,
   getSparseFillEmptyRowsOutOfRangeIndexErrorMessage: Ve,
   getSparseReshapeEmptyTensorZeroOutputDimErrorMessage: je,
@@ -369,7 +369,7 @@ const ut = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
   isIdentityPermutation: Pe,
   mergeRealAndImagArrays: he,
   prepareAndValidate: de,
-  prepareSplitSize: Ue,
+  prepareSplitSize: Be,
   shouldFuse: ne,
   splitRealAndImagArrays: ge,
   stridesOrDilationsArePositive: X,
@@ -386,14 +386,14 @@ export {
   we as C,
   De as D,
   xe as E,
-  ve as F,
+  We as F,
   Ne as G,
   Ce as H,
   Fe as I,
   Pe as J,
   de as K,
   Re as L,
-  Ue as M,
+  Be as M,
   S as P,
   f as R,
   Ee as a,
@@ -403,7 +403,7 @@ export {
   et as e,
   Qe as f,
   Ie as g,
-  Be as h,
+  Ue as h,
   He as i,
   Ve as j,
   Ge as k,

package/dist/{backend_webgpu-BqASlsbV.js → backend_webgpu-DI2wXEC2.js} RENAMED Viewed

@@ -1,7 +1,7 @@
-import { ab as g, as as $, at as K, h as D, x as _, au as O, U as x, av as Z, a5 as W, aw as F, ax as j, ay as X, az as J, af as ee, a9 as k } from "./index-CUXkjxiT.js";
-import { m as te, f as se, P as re } from "./webgpu_program-B4HmApL1.js";
-import { i as ne, G as q } from "./webgpu_util-DYlGSwOJ.js";
-import { m as N } from "./complex_util-Yc1A_gV1.js";
+import { ab as g, as as $, at as K, e as D, v as _, au as O, N as x, av as Z, a5 as W, aw as F, ax as j, ay as X, az as J, af as ee, a9 as k } from "./index-DSGwv2Yx.js";
+import { m as te, f as se, P as re } from "./webgpu_program-CbjdYLYk.js";
+import { i as ne, G as N } from "./webgpu_util-DuofJBMo.js";
+import { m as q } from "./complex_util-Yc1A_gV1.js";
 const d = g();
 d.registerFlag("WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE", () => 15);
 d.registerFlag("WEBGPU_CPU_FORWARD", () => !0);
@@ -248,7 +248,7 @@ class R extends $ {
     if (s != null || t.dtype === "string")
       return s;
     if (t.dtype === "complex64") {
-      const E = this.readSync(n.real.dataId), B = this.readSync(n.imag.dataId), y = O(N(E, B).buffer, "float32");
+      const E = this.readSync(n.real.dataId), B = this.readSync(n.imag.dataId), y = O(q(E, B).buffer, "float32");
       return this.convertAndCacheOnCPU(e, y), y;
     }
     this.hasReadSyncWarned || (this.hasReadSyncWarned = !0, console.warn("The performance of synchronously reading data from GPU to CPU is poor on the webgpu backend, please use asynchronous APIs instead."));
@@ -309,7 +309,7 @@ class R extends $ {
         this.read(t.complexTensorInfos.real.dataId),
         this.read(t.complexTensorInfos.imag.dataId)
       ]), a = r[0], i = r[1];
-      n = N(a, i);
+      n = q(a, i);
     } else {
       const r = await this.getBufferData(t.resource);
       n = O(r, t.dtype);
@@ -337,7 +337,7 @@ class R extends $ {
       refCount: 1,
       external: e.zeroCopy
     });
-    const a = this.tensorMap.get(r), i = q(a.dtype) * x(a.shape);
+    const a = this.tensorMap.get(r), i = N(a.dtype) * x(a.shape);
     if (e.buffer.size < i)
       throw new Error(`GPUBuffer size(${e.buffer.size}) is smaller than tensor size(${i})!`);
     if ((e.buffer.usage & (GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC)) !== (GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC))
@@ -398,7 +398,7 @@ class R extends $ {
     const t = this.tensorMap.get(e);
     if (t.resource != null)
       return;
-    const s = q(t.dtype) * x(t.shape);
+    const s = N(t.dtype) * x(t.shape);
     let n;
     const r = GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST;
     if (t.values) {

package/dist/{broadcast_to-eS93CCN_.js → broadcast_to-C_EJTVTZ.js} RENAMED Viewed

@@ -1,5 +1,5 @@
-import { o as h, q as f, u as p, w as g, E as u, T } from "./index-CUXkjxiT.js";
-import { r as b } from "./reshape-BDOuCSNW.js";
+import { o as h, n as f, q as p, u as g, E as u, T } from "./index-DSGwv2Yx.js";
+import { r as b } from "./reshape-BIN71H3p.js";
 function m(e, r) {
   let n = f(e, "broadcastTo", "x");
   const a = n.shape;

package/dist/checks/appendCache.js CHANGED Viewed

@@ -1,5 +1,5 @@
-import { s, h as a } from "../index-CUXkjxiT.js";
-import { t } from "../tensor4d-C6UCG_u8.js";
+import { s, e as a } from "../index-DSGwv2Yx.js";
+import { t } from "../tensor4d-Dj4rDssL.js";
 async function u(e) {
   await s(e);
   const n = t(

package/dist/checks/attentionMask.js CHANGED Viewed

@@ -1,6 +1,6 @@
-import { s as i, h as a } from "../index-CUXkjxiT.js";
-import { t } from "../tensor4d-C6UCG_u8.js";
-import { t as e } from "../tensor2d-BnXMKScO.js";
+import { s as i, e } from "../index-DSGwv2Yx.js";
+import { t } from "../tensor4d-Dj4rDssL.js";
+import { t as a } from "../tensor2d-DTtQ1QcT.js";
 async function k(n) {
   await i(n);
   const s = t(
@@ -23,14 +23,14 @@ async function k(n) {
       ]
     ],
     [1, 1, 2, 4]
-  ), r = e(
+  ), r = a(
     [
       [0, -1 / 0, -1 / 0, -1 / 0],
       [0, 0, 0, -1 / 0]
     ],
     [2, 4]
   );
-  return await a().runKernel("AttentionMask", { q: s, k: o, mask: r }, { divisor: 0.5, pastLen: 0 }).array();
+  return await e().runKernel("AttentionMask", { q: s, k: o, mask: r }, { divisor: 0.5, pastLen: 0 }).array();
 }
 export {
   k as execute

package/dist/checks/gelu.js CHANGED Viewed

@@ -1,5 +1,5 @@
-import { s as e, h as o } from "../index-CUXkjxiT.js";
-import { t as s } from "../tensor2d-BnXMKScO.js";
+import { s as e, e as o } from "../index-DSGwv2Yx.js";
+import { t as s } from "../tensor2d-DTtQ1QcT.js";
 async function m(t) {
   await e(t);
   const r = s(

package/dist/checks/matMulGelu.js CHANGED Viewed

@@ -1,5 +1,5 @@
-import { s as o, h as s } from "../index-CUXkjxiT.js";
-import { t as e } from "../tensor2d-BnXMKScO.js";
+import { s as o, e as s } from "../index-DSGwv2Yx.js";
+import { t as e } from "../tensor2d-DTtQ1QcT.js";
 async function i(t) {
   await o(t);
   const r = e(

package/dist/checks/normRMS.js CHANGED Viewed

@@ -1,13 +1,13 @@
-import { s as u, a0 as A, h } from "../index-CUXkjxiT.js";
-import { a as y } from "../ops-CsXeTq1P.js";
-import { t as p } from "../tensor1d-LMGMIUlr.js";
-import { t as r } from "../tensor-BWFldCso.js";
+import { s as u, a0 as A, e as y } from "../index-DSGwv2Yx.js";
+import { a as h } from "../ops-CURIZSVt.js";
+import { t as p } from "../tensor1d-BMl0eZYV.js";
+import { t as r } from "../tensor-D8e0Gd7c.js";
 const w = Array.from({ length: 2048 * 192 }, () => Math.random()), x = Array.from({ length: 192 }, () => Math.random()), M = Array.from({ length: 2048 * 192 }, () => Math.random());
 async function k(t) {
   await u(t);
   const o = p(x, "float32"), n = r(w, [16, 128, 192], "float32"), s = r(M, [16, 128, 192], "float32"), e = (d, g) => {
-    const i = h().runKernel("RMSNorm", { x: d, gamma: g });
-    return y.meanSquaredError(i, s);
+    const i = y().runKernel("RMSNorm", { x: d, gamma: g });
+    return h.meanSquaredError(i, s);
   }, { value: m, grads: a } = A(e)([n, o]), c = await m.array(), f = await a[0].array(), l = await a[1].array();
   return [c, f, l];
 }

package/dist/checks/normRMSGrad.js CHANGED Viewed

@@ -1,6 +1,6 @@
-import { s as c, h as d } from "../index-CUXkjxiT.js";
-import { t as f } from "../tensor1d-LMGMIUlr.js";
-import { t as r } from "../tensor-BWFldCso.js";
+import { s as c, e as d } from "../index-DSGwv2Yx.js";
+import { t as f } from "../tensor1d-BMl0eZYV.js";
+import { t as r } from "../tensor-D8e0Gd7c.js";
 const y = Array.from({ length: 2048 * 192 }, () => Math.random()), i = Array.from({ length: 192 }, () => Math.random()), l = Array.from({ length: 2048 * 192 }, () => Math.random());
 async function x(t) {
   await c(t);

package/dist/checks/packUnpack.js CHANGED Viewed

@@ -1,7 +1,7 @@
-import { s as a, h as n } from "../index-CUXkjxiT.js";
-import { t as c } from "../tensor2d-BnXMKScO.js";
-async function i(e) {
-  await a(e);
+import { s as a, e } from "../index-DSGwv2Yx.js";
+import { t as c } from "../tensor2d-DTtQ1QcT.js";
+async function i(n) {
+  await a(n);
   const r = c(
     [
       [0.1, 0.2, 0, 0, 1230, 1232331234, -12234234],
@@ -10,8 +10,8 @@ async function i(e) {
       [0, 0, 0, 0, -0.1, 1e-3, 0]
     ],
     [4, 7]
-  ), t = n().runKernel("Pack16", { x: r });
-  return await n().runKernel("Unpack16", { x: t }).array();
+  ), t = e().runKernel("Pack16", { x: r });
+  return await e().runKernel("Unpack16", { x: t }).array();
 }
 export {
   i as execute

package/dist/checks/qkv.js CHANGED Viewed

@@ -1,5 +1,5 @@
-import { W as i, X as u, Y as c, s as l, h } from "../index-CUXkjxiT.js";
-import { t as f } from "../tensor2d-BnXMKScO.js";
+import { U as i, V as u, W as c, s as l, e as h } from "../index-DSGwv2Yx.js";
+import { t as f } from "../tensor2d-DTtQ1QcT.js";
 function m(t, e, n) {
   if (i(t), e != null && e.length !== 3)
     throw new Error("tensor3d() requires shape to have three numbers");

package/dist/checks/rope.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import s from "../layers/RoPECache.js";
-import { s as c, h as i } from "../index-CUXkjxiT.js";
-import { t as p } from "../tensor4d-C6UCG_u8.js";
+import { s as c, e as i } from "../index-DSGwv2Yx.js";
+import { t as p } from "../tensor4d-Dj4rDssL.js";
 async function f(r) {
   await c(r);
   const n = p(