npm - @genai-fi/nanogpt - Versions diffs - 0.7.0 → 0.7.1 - Mend

@genai-fi/nanogpt 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (152) hide show

package/dist/Generator.js +13 -9
package/dist/NanoGPTModel.js +10 -10
package/dist/{RealDiv-C4hOvYOZ.js → RealDiv-CVYNbZxu.js} +11 -11
package/dist/{Reshape-BLijOA8h.js → Reshape-CEsEp0AI.js} +2 -2
package/dist/Reshape-Do18N3gO.js +30 -0
package/dist/TeachableLLM.js +9 -5
package/dist/{TiedEmbedding-BLltddza.js → TiedEmbedding-ccLBFiZi.js} +4 -4
package/dist/{axis_util-DaAl5MER.js → axis_util-5DTW2tFV.js} +1 -1
package/dist/backend.js +2 -2
package/dist/{backend_util-DWiwsi2N.js → backend_util-C9Ut8n0Q.js} +40 -40
package/dist/{broadcast_to-C4v-j9yA.js → broadcast_to-Ba9h_8DO.js} +2 -2
package/dist/{concat-CsHeR4zV.js → concat-CbXTetof.js} +1 -1
package/dist/{dataset-JDyjG3QR.js → dataset-U3PrjwgU.js} +7 -7
package/dist/{dropout-hpDwECTe.js → dropout-DPfPgWWe.js} +11 -11
package/dist/{gather-D0_gPiBz.js → gather-Bbh8DHhM.js} +4 -4
package/dist/{gelu-uyHP1x1f.js → gelu-BFwVnd1r.js} +1 -1
package/dist/{gpgpu_math-DJm3ZTAf.js → gpgpu_math-DffelNS-.js} +2 -2
package/dist/{index-BPPzKVdR.js → index-DYD_yPa-.js} +1083 -1106
package/dist/{index-C0dhsYom.js → index-UdZhlibC.js} +126 -126
package/dist/{kernel_funcs_utils-CwRTFqrc.js → kernel_funcs_utils-CXDy3EN7.js} +3 -3
package/dist/layers/BaseLayer.js +2 -2
package/dist/layers/CausalSelfAttention.js +8 -8
package/dist/layers/MLP.js +5 -5
package/dist/layers/RMSNorm.js +3 -3
package/dist/layers/RoPECache.js +4 -4
package/dist/layers/TiedEmbedding.js +5 -5
package/dist/layers/TransformerBlock.js +1 -1
package/dist/loader/loadTransformers.js +1 -1
package/dist/loader/oldZipLoad.js +11 -7
package/dist/{log_sum_exp-D086OgZJ.js → log_sum_exp-BnmCkHWl.js} +8 -8
package/dist/main.d.ts +11 -0
package/dist/main.js +44 -27
package/dist/{mat_mul-1nwdPkQ_.js → mat_mul-dwmZz69e.js} +1 -1
package/dist/{max-BQc2Aj-I.js → max-ByjEGoFx.js} +3 -3
package/dist/{mulmat_packed_gpu-Gzf3I9UV.js → mulmat_packed_gpu-IGPBp6h9.js} +1 -1
package/dist/{ones-D63HpSF_.js → ones-C8Mfln6-.js} +2 -2
package/dist/ops/adamAdjust.d.ts +2 -0
package/dist/ops/adamAdjust.js +9 -0
package/dist/ops/adamMoments.d.ts +2 -0
package/dist/ops/adamMoments.js +9 -0
package/dist/ops/appendCache.js +3 -3
package/dist/ops/attentionMask.js +1 -1
package/dist/ops/cpu/adamAdjust.d.ts +1 -0
package/dist/ops/cpu/adamAdjust.js +18 -0
package/dist/ops/cpu/adamMoments.d.ts +1 -0
package/dist/ops/cpu/adamMoments.js +16 -0
package/dist/ops/cpu/appendCache.js +2 -2
package/dist/ops/cpu/attentionMask.js +5 -5
package/dist/ops/cpu/fusedSoftmax.js +2 -2
package/dist/ops/cpu/gatherSub.js +3 -3
package/dist/ops/cpu/gelu.js +1 -1
package/dist/ops/cpu/matMulGelu.js +2 -2
package/dist/ops/cpu/matMulMul.js +1 -1
package/dist/ops/cpu/mulDropout.js +1 -1
package/dist/ops/cpu/normRMS.js +1 -1
package/dist/ops/cpu/qkv.js +3 -3
package/dist/ops/cpu/rope.js +5 -5
package/dist/ops/cpu/scatterSub.js +11 -11
package/dist/ops/fusedSoftmax.js +1 -1
package/dist/ops/gatherSub.js +1 -1
package/dist/ops/gelu.js +2 -2
package/dist/ops/grads/attentionMask.js +1 -1
package/dist/ops/grads/fusedSoftmax.js +2 -2
package/dist/ops/grads/gelu.js +2 -2
package/dist/ops/grads/matMulGelu.js +1 -1
package/dist/ops/grads/normRMS.js +1 -1
package/dist/ops/grads/qkv.js +1 -1
package/dist/ops/grads/rope.js +1 -1
package/dist/ops/matMulGelu.js +1 -1
package/dist/ops/matMulMul.js +1 -1
package/dist/ops/mulDrop.js +1 -1
package/dist/ops/normRMS.js +1 -1
package/dist/ops/qkv.js +1 -1
package/dist/ops/rope.js +4 -4
package/dist/ops/scatterSub.js +1 -1
package/dist/ops/webgl/adamAdjust.d.ts +1 -0
package/dist/ops/webgl/adamAdjust.js +50 -0
package/dist/ops/webgl/adamMoments.d.ts +1 -0
package/dist/ops/webgl/adamMoments.js +38 -0
package/dist/ops/webgl/appendCache.js +1 -1
package/dist/ops/webgl/attentionMask.js +1 -1
package/dist/ops/webgl/fusedSoftmax.js +4 -4
package/dist/ops/webgl/gatherSub.js +8 -8
package/dist/ops/webgl/gelu.js +2 -2
package/dist/ops/webgl/log.js +3 -3
package/dist/ops/webgl/matMulGelu.js +4 -4
package/dist/ops/webgl/matMulMul.js +1 -1
package/dist/ops/webgl/mulDropout.js +1 -1
package/dist/ops/webgl/normRMS.js +2 -2
package/dist/ops/webgl/qkv.js +1 -1
package/dist/ops/webgl/rope.js +1 -1
package/dist/ops/webgl/scatterSub.js +1 -1
package/dist/ops/webgpu/adamAdjust.d.ts +1 -0
package/dist/ops/webgpu/adamAdjust.js +52 -0
package/dist/ops/webgpu/adamMoments.d.ts +1 -0
package/dist/ops/webgpu/adamMoments.js +51 -0
package/dist/ops/webgpu/appendCache.js +13 -12
package/dist/ops/webgpu/attentionMask.js +11 -10
package/dist/ops/webgpu/gatherSub.js +26 -11
package/dist/ops/webgpu/gelu.js +7 -6
package/dist/ops/webgpu/index.js +3 -0
package/dist/ops/webgpu/normRMS.js +27 -101
package/dist/ops/webgpu/normRMSGrad.d.ts +1 -0
package/dist/ops/webgpu/normRMSGrad.js +128 -0
package/dist/ops/webgpu/qkv.js +9 -8
package/dist/ops/webgpu/rope.js +8 -7
package/dist/ops/webgpu/scatterSub.js +8 -7
package/dist/ops/webgpu/utils/reductions.d.ts +9 -0
package/dist/ops/webgpu/utils/reductions.js +68 -0
package/dist/{ops-CIQLNshk.js → ops-aRTXR2Sr.js} +195 -219
package/dist/{random_width-DkYP8W8N.js → random_width-DbSpgl4o.js} +22 -21
package/dist/{range-CYzpQY53.js → range-D9CZhVlR.js} +1 -1
package/dist/{reciprocal-_A9yv27J.js → reciprocal-CGB48wZB.js} +1 -1
package/dist/{register_all_kernels-guvSxp7M.js → register_all_kernels-DnbAyBXt.js} +30 -29
package/dist/{reshape-BMUzc1UY.js → reshape-BR0eoLYN.js} +3 -3
package/dist/{scatter_nd_util-IRBqKz_b.js → scatter_nd_util-OjyAxku2.js} +1 -1
package/dist/{selu_util-Dt_iuXaq.js → selu_util-Ce6pu9IM.js} +41 -41
package/dist/{shared-CDu9S76h.js → shared-Czipaeb6.js} +6 -6
package/dist/{shared-BNa2q6jD.js → shared-DS5waSIY.js} +1 -1
package/dist/{sin-Cocju-BY.js → sin-CiBxrDqX.js} +6 -6
package/dist/slice-BHbDHObE.js +28 -0
package/dist/{softmax-GPNK3o-U.js → softmax-JMEIUo2J.js} +3 -3
package/dist/{split-CHzJjxDv.js → split-CRU0PjVV.js} +1 -1
package/dist/{stack-Dpgg_1W1.js → stack-ikk2Y8_P.js} +1 -1
package/dist/{sum-B8wEpKsg.js → sum-NLYbiDag.js} +3 -3
package/dist/{tensor-RvZVNmg0.js → tensor-Do9PKbIE.js} +1 -1
package/dist/{tensor2d-B_kyod7_.js → tensor2d-CWHxHpLh.js} +1 -1
package/dist/training/Adam.d.ts +22 -0
package/dist/training/Adam.js +93 -0
package/dist/training/AdamExt.d.ts +1 -1
package/dist/training/AdamExt.js +13 -12
package/dist/training/DatasetBuilder.js +2 -2
package/dist/training/FullTrainer.js +22 -22
package/dist/training/Trainer.d.ts +1 -1
package/dist/training/Trainer.js +32 -32
package/dist/training/sparseCrossEntropy.d.ts +0 -4
package/dist/training/sparseCrossEntropy.js +7 -7
package/dist/utilities/arrayClose.d.ts +1 -0
package/dist/utilities/arrayClose.js +11 -0
package/dist/utilities/dummy.js +2 -2
package/dist/utilities/generate.js +3 -3
package/dist/utilities/multinomialCPU.js +2 -2
package/dist/utilities/performance.d.ts +1 -1
package/dist/utilities/performance.js +11 -11
package/dist/utilities/profile.js +1 -1
package/dist/utilities/safetensors.js +2 -2
package/dist/utilities/weights.js +2 -2
package/dist/{variable-DXEUOwew.js → variable-BTBkayv_.js} +1 -1
package/dist/{webgpu_util-g13LvDIv.js → webgpu_program-WaoMq-WD.js} +138 -215
package/dist/webgpu_util-DhSeP4b6.js +80 -0
package/dist/{zeros-DCPCdFGq.js → zeros-DnPT2nD4.js} +4 -4
package/package.json +1 -1

package/dist/training/Adam.js ADDED Viewed

@@ -0,0 +1,93 @@
+import { adamAdjust as b } from "../ops/adamAdjust.js";
+import { adamMoments as d } from "../ops/adamMoments.js";
+import { O as g, e as h, t as o, d as B } from "../index-UdZhlibC.js";
+import { z as M } from "../zeros-DnPT2nD4.js";
+/**
+ * @license
+ * Copyright 2018 Google LLC. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+class R extends g {
+  constructor(t, a, e, s = null) {
+    super(), this.learningRate = t, this.beta1 = a, this.beta2 = e, this.epsilon = s, this.accBeta1 = a, this.accBeta2 = e, s === null && (this.epsilon = h().backend.epsilon());
+  }
+  /** @nocollapse */
+  static get className() {
+    return "Adam";
+  }
+  accBeta1 = 0;
+  accBeta2 = 0;
+  accumulatedMoments = [];
+  applyGradients(t) {
+    const a = Array.isArray(t) ? t.map((e) => e.name) : Object.keys(t);
+    o(() => {
+      const e = 1 - this.accBeta1, s = 1 - this.accBeta2;
+      a.forEach((n, i) => {
+        const c = h().registeredVariables[n], u = !1;
+        this.accumulatedMoments[i] == null && (this.accumulatedMoments[i] = {
+          originalName: `${n}/m`,
+          variable: o(() => M([...c.shape, 2]).variable(u))
+        });
+        const r = Array.isArray(t) ? t[i].tensor : t[n];
+        if (r == null)
+          return;
+        const m = this.accumulatedMoments[i].variable, l = d(m, r, this.beta1, this.beta2);
+        m.assign(l);
+        const p = b(
+          l,
+          c,
+          e,
+          s,
+          this.epsilon ?? 1e-8,
+          this.learningRate
+        );
+        c.assign(p);
+      }), this.accBeta1 = this.accBeta1 * this.beta1, this.accBeta2 = this.accBeta2 * this.beta2;
+    }), this.incrementIterations();
+  }
+  dispose() {
+    this.accumulatedMoments != null && B(this.accumulatedMoments.map((t) => t.variable));
+  }
+  async getWeights() {
+    const t = [...this.accumulatedMoments];
+    return [await this.saveIterations()].concat(
+      t.map((a) => ({ name: a.originalName, tensor: a.variable }))
+    );
+  }
+  async setWeights(t) {
+    t = await this.extractIterations(t), o(() => {
+      this.accBeta1 = Math.pow(this.beta1, this.iterations_ + 1), this.accBeta2 = Math.pow(this.beta2, this.iterations_ + 1);
+    });
+    const a = t.length / 2, e = !1;
+    this.accumulatedMoments = t.slice(0, a).map((s) => ({
+      originalName: s.name,
+      variable: s.tensor.variable(e)
+    }));
+  }
+  getConfig() {
+    return {
+      learningRate: this.learningRate,
+      beta1: this.beta1,
+      beta2: this.beta2,
+      epsilon: this.epsilon
+    };
+  }
+  /** @nocollapse */
+  static fromConfig(t, a) {
+    return new t(a.learningRate, a.beta1, a.beta2, a.epsilon);
+  }
+}
+export {
+  R as AdamOptimizer
+};

package/dist/training/AdamExt.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
-import { AdamOptimizer } from '@tensorflow/tfjs-core';
 import { NamedTensor, NamedVariableMap } from '@tensorflow/tfjs-core/dist/tensor_types';
+import { AdamOptimizer } from './Adam';
 interface AdamExtConfig {
     warmupSteps: number;
     decaySteps: number;

package/dist/training/AdamExt.js CHANGED Viewed

@@ -1,7 +1,8 @@
-import { A as r, a as c, b as h, c as g, e as o } from "../index-C0dhsYom.js";
-class u extends r {
-  constructor(t, e, s, a, i) {
-    super(t, e, s, a), this.config = i, this.startLearningRate = t;
+import { a as r, b as c, c as h, e as o } from "../index-UdZhlibC.js";
+import { AdamOptimizer as g } from "./Adam.js";
+class y extends g {
+  constructor(t, e, s, i, a) {
+    super(t, e, s, i), this.config = a, this.startLearningRate = t;
   }
   step = 0;
   startLearningRate;
@@ -23,21 +24,21 @@ class u extends r {
   }
   decayVariable(t, e, s) {
     if (t && t.shape.length >= 2) {
-      const a = c(t, h(s * e));
-      t.assign(g(t, a)), a.dispose();
+      const i = r(t, c(s * e));
+      t.assign(h(t, i)), i.dispose();
     }
   }
   applyWeightDecay(t) {
-    const e = this.config.weightDecay, s = this.learningRate, a = o().registeredVariables;
-    Array.isArray(t) ? t.forEach(({ name: i }) => {
-      const n = a[i];
+    const e = this.config.weightDecay, s = this.learningRate, i = o().registeredVariables;
+    Array.isArray(t) ? t.forEach(({ name: a }) => {
+      const n = i[a];
       this.decayVariable(n, e, s);
-    }) : Object.keys(t).forEach((i) => {
-      const n = a[i];
+    }) : Object.keys(t).forEach((a) => {
+      const n = i[a];
       this.decayVariable(n, e, s);
     });
   }
 }
 export {
-  u as default
+  y as default
 };

package/dist/training/DatasetBuilder.js CHANGED Viewed

@@ -1,5 +1,5 @@
-import { t as u } from "../index-C0dhsYom.js";
-import { d as z, i as f } from "../dataset-JDyjG3QR.js";
+import { t as u } from "../index-UdZhlibC.js";
+import { d as z, i as f } from "../dataset-U3PrjwgU.js";
 import "../index-Tf7vU29b.js";
 /**
  * @license

package/dist/training/FullTrainer.js CHANGED Viewed

@@ -1,21 +1,21 @@
-import { generateText as T } from "../utilities/generate.js";
-import L from "./Trainer.js";
-import x from "./Evaluator.js";
-import { d as h } from "../index-C0dhsYom.js";
-import y from "../utilities/profile.js";
-const D = {
+import { generateText as w } from "../utilities/generate.js";
+import T from "./Trainer.js";
+import L from "./Evaluator.js";
+import { d as h } from "../index-UdZhlibC.js";
+import x from "../utilities/profile.js";
+const y = {
   desiredLoss: 0.01,
   logInterval: 1,
   maxSteps: 1e3
 };
-class I extends L {
+class E extends T {
   constructor(i, e, r = 3e-4) {
     super(i, e, r);
   }
   // Train for multiple epochs using Dataset API - FIXED memory leaks
   async trainOnDataset(i, e, r) {
-    const { desiredLoss: p, logInterval: g, onStep: l, prompt: m, maxSteps: u } = {
-      ...D,
+    const { logInterval: g, onStep: l, prompt: c, maxSteps: u } = {
+      ...y,
       ...e
     }, n = Date.now(), t = {
       step: 0,
@@ -27,13 +27,13 @@ class I extends L {
       trainingDuration: 0,
       ...this.lastState || {}
     };
-    this.lastState = t, await this.dummyPass(), this.model.trainable = !0, e?.advancedMetrics && (this.model.getProfiler() || (this.model.config.layerConfig.profiler = new y())), this.running = !0, t.logStartTime = n;
-    const c = r ? new x(this.model, r) : void 0, f = await i.iterator();
+    this.lastState = t, await this.dummyPass(), this.model.trainable = !0, e?.advancedMetrics && (this.model.getProfiler() || (this.model.config.layerConfig.profiler = new x())), this.running = !0, t.logStartTime = n;
+    const m = r ? new L(this.model, r) : void 0, f = await i.iterator();
     try {
-      for (; this.running && !(t.lastLoss < p); ) {
+      for (; this.running; ) {
         const o = await f.next();
         if (o.done) break;
-        const d = o.value, S = this.trainBatch(t, d), s = {
+        const d = o.value, p = this.trainBatch(t, d), s = {
           loss: t.lastLoss,
           step: t.step,
           time: Date.now() - n,
@@ -42,21 +42,21 @@ class I extends L {
           //gradientNorm: options?.advancedMetrics ? await state.gradientNorm : undefined,
         };
         if (this.model.log.push(s), t.step % g === 0) {
-          await S;
-          const v = Date.now();
-          if (t.trainingDuration += v - t.logStartTime, c)
+          await p.data();
+          const S = Date.now();
+          if (t.trainingDuration += S - t.logStartTime, m)
             try {
-              const a = await c.evaluate(5);
+              const a = await m.evaluate(5);
               t.validationLosses.push(a), s.valLoss = a;
             } catch (a) {
               console.error("Validation error:", a);
             }
           if (l) {
-            if (m) {
-              const w = await T(this.tokenizer, this.model, m, 100, {
+            if (c) {
+              const v = await w(this.tokenizer, this.model, c, 100, {
                 temperature: 0.8
               });
-              s.example = w;
+              s.example = v;
             }
             const a = {
               duration: t.trainingDuration,
@@ -68,7 +68,7 @@ class I extends L {
           }
           t.logStartTime = Date.now();
         }
-        t.step >= u && this.stop();
+        p.dispose(), t.step >= u && this.stop();
       }
     } catch (o) {
       throw console.error("Training error:", o), h(), o;
@@ -77,5 +77,5 @@ class I extends L {
   }
 }
 export {
-  I as default
+  E as default
 };

package/dist/training/Trainer.d.ts CHANGED Viewed

@@ -55,7 +55,7 @@ export default abstract class GPTTrainer {
     protected trainBatch(state: TrainingState, batch: {
         xs: Tensor;
         ys: Tensor;
-    }): Promise<number>;
+    }): Scalar;
     abstract trainOnDataset(dataset: Dataset<{
         xs: Tensor;
         ys: Tensor;

package/dist/training/Trainer.js CHANGED Viewed

@@ -1,10 +1,10 @@
-import { DatasetBuilder as m, flattenTokens as p, PAGE_FACTOR as u } from "./DatasetBuilder.js";
-import g from "./AdamExt.js";
-import { t as f, v as y, d as c } from "../index-C0dhsYom.js";
-import { z as h } from "../zeros-DCPCdFGq.js";
+import { DatasetBuilder as h, flattenTokens as p, PAGE_FACTOR as g } from "./DatasetBuilder.js";
+import u from "./AdamExt.js";
+import { t as f, v as y, d as c } from "../index-UdZhlibC.js";
+import { z as m } from "../zeros-DnPT2nD4.js";
 class x {
-  constructor(t, s, e = 1e-3) {
-    this.tokenizer = s, this.model = t, this.learningRate = e, this.resetOptimizer(), this.datasetBuilder = new m(s, t.config.gpt.blockSize);
+  constructor(t, e, a = 1e-3) {
+    this.tokenizer = e, this.model = t, this.learningRate = a, this.resetOptimizer(), this.datasetBuilder = new h(e, t.config.gpt.blockSize);
   }
   model;
   optimizer;
@@ -26,7 +26,7 @@ class x {
   }
   resetOptimizer(t = { learningRateFactor: 1, beta1: 0.9, beta2: 0.99, epsilon: 1e-8 }) {
     this.optimizer && this.optimizer.dispose();
-    const s = new g(
+    const e = new u(
       t.learningRateFactor * this.learningRate,
       t.beta1,
       t.beta2,
@@ -38,7 +38,7 @@ class x {
         weightDecay: 0
       }
     );
-    this.optimizer = s;
+    this.optimizer = e;
   }
   /*private async maxGradNorm(grads: NamedVariableMap): Promise<number> {
       let maxNorm = 0;
@@ -56,55 +56,55 @@ class x {
       );
       return maxNorm;
   }*/
-  trainStep(t, s, e = !1) {
+  trainStep(t, e, a = !1) {
     return f(() => {
       this.model.getProfiler()?.startMemory();
-      const { xs: a, ys: i } = s, o = () => {
-        const [l, d] = this.model.forward({ training: !0 }, a, i);
+      const { xs: s, ys: i } = e, o = () => {
+        const [l, d] = this.model.forward({ training: !0 }, s, i);
         return l.dispose(), d;
       }, { value: n, grads: r } = y(o);
-      return e ? this.model.getProfiler()?.endMemory("Training") : (this.optimizer.applyGradients(r), this.model.getProfiler()?.endMemory("Training"), c(r)), n;
+      return a ? this.model.getProfiler()?.endMemory("Training") : (this.optimizer.applyGradients(r), this.model.getProfiler()?.endMemory("Training"), c(r)), n;
     });
   }
   async dummyPass() {
-    const t = h([1, this.model.config.gpt.blockSize], "int32"), s = h([1, this.model.config.gpt.blockSize], "int32");
+    const t = m([1, this.model.config.gpt.blockSize], "int32"), e = m([1, this.model.config.gpt.blockSize], "int32");
     try {
-      const e = this.trainStep({}, { xs: t, ys: s }, !0);
-      await e.data(), e.dispose();
-    } catch (e) {
-      console.error("Error during dummy pass:", e);
+      const a = this.trainStep({}, { xs: t, ys: e }, !0);
+      await a.data(), a.dispose();
+    } catch (a) {
+      console.error("Error during dummy pass:", a);
     } finally {
-      t.dispose(), s.dispose();
+      t.dispose(), e.dispose();
     }
   }
-  async trainBatch(t, s) {
+  trainBatch(t, e) {
     try {
-      const e = this.trainStep(t, s, !1);
-      return s.xs.dispose(), s.ys.dispose(), t.step++, t.totalSteps++, e.array().then((a) => (t.lastLoss = a, t.losses.push(t.lastLoss), e.dispose(), t.lastLoss));
-    } catch (e) {
-      throw console.error(`Error processing batch at step ${t.step}:`, e), c(), e;
+      const a = this.trainStep(t, e, !1);
+      return e.xs.dispose(), e.ys.dispose(), t.step++, t.totalSteps++, a;
+    } catch (a) {
+      throw console.error(`Error processing batch at step ${t.step}:`, a), c(), a;
     }
   }
-  async createTrainValidationSplit(t, s = 32, e = 0.1) {
-    const a = await p(t, this.tokenizer), i = /* @__PURE__ */ new Set();
-    if (e > 0) {
-      const r = Math.floor(a.length / (this.datasetBuilder.blockSize * u)), l = Math.max(1, Math.floor(r * e));
+  async createTrainValidationSplit(t, e = 32, a = 0.1) {
+    const s = await p(t, this.tokenizer), i = /* @__PURE__ */ new Set();
+    if (a > 0) {
+      const r = Math.floor(s.length / (this.datasetBuilder.blockSize * g)), l = Math.max(1, Math.floor(r * a));
       for (; i.size < l; ) {
         const d = Math.floor(Math.random() * r);
         i.add(d);
       }
     }
-    const o = await this.datasetBuilder.createTextDataset(a, s, i, !1), n = await this.datasetBuilder.createTextDataset(
-      a,
+    const o = await this.datasetBuilder.createTextDataset(s, e, i, !1), n = await this.datasetBuilder.createTextDataset(
       s,
+      e,
       i,
       !0
     );
     return { trainDataset: o, validationDataset: n };
   }
-  async createDataset(t, s = 32) {
-    const e = await p(t, this.tokenizer);
-    return await this.datasetBuilder.createTextDataset(e, s);
+  async createDataset(t, e = 32) {
+    const a = await p(t, this.tokenizer);
+    return await this.datasetBuilder.createTextDataset(a, e);
   }
   dispose() {
     this.optimizer && this.optimizer.dispose();

package/dist/training/sparseCrossEntropy.d.ts CHANGED Viewed

@@ -4,8 +4,4 @@ import * as tf from '@tensorflow/tfjs-core';
  * This version handles potential numerical issues better
  */
 export declare function sparseSoftmaxCrossEntropy(logits: tf.Tensor, labels: tf.Tensor): tf.Tensor;
-/**
- * Custom gradient implementation for sparse cross-entropy
- * This ensures proper backpropagation
- */
 export declare function createSoftmaxCrossEntropyWithGrad(): (...args: tf.Tensor[]) => tf.Tensor<tf.Rank>;

package/dist/training/sparseCrossEntropy.js CHANGED Viewed

@@ -1,22 +1,22 @@
 import { gatherSub as x } from "../ops/gatherSub.js";
 import { scatterSub as L } from "../ops/scatterSub.js";
-import { q as C, t as u, z as E, c as G } from "../index-C0dhsYom.js";
-import { s as y } from "../softmax-GPNK3o-U.js";
-import { m as z } from "../max-BQc2Aj-I.js";
-import { l as v } from "../log_sum_exp-D086OgZJ.js";
+import { y, t as u, z as C, c as E } from "../index-UdZhlibC.js";
+import { s as G } from "../softmax-JMEIUo2J.js";
+import { m as z } from "../max-ByjEGoFx.js";
+import { l as v } from "../log_sum_exp-BnmCkHWl.js";
 function k(t, s) {
   return u(() => {
-    const n = t.shape[t.shape.length - 1], c = t.shape.slice(0, -1).reduce((o, e) => o * e, 1), h = t.shape.length > 2 ? t.reshape([c, n]) : t, p = s.shape.length > 1 ? s.reshape([c]).cast("int32") : s.cast("int32"), r = z(h, -1, !0), a = G(h, r), m = v(a, -1);
+    const n = t.shape[t.shape.length - 1], c = t.shape.slice(0, -1).reduce((o, e) => o * e, 1), h = t.shape.length > 2 ? t.reshape([c, n]) : t, p = s.shape.length > 1 ? s.reshape([c]).cast("int32") : s.cast("int32"), r = z(h, -1, !0), a = E(h, r), m = v(a, -1);
     return x(m, p, a);
   });
 }
 function A() {
-  return C(
+  return y(
     // @ts-expect-error Invalid params
     (s, n, d) => {
       const c = s.shape[s.shape.length - 1], p = s.shape.slice(0, -1).reduce((o, e) => o * e, 1), r = s.reshape([p, c]), a = n.reshape([p]).cast("int32"), m = k(r, a);
       return d([r, a]), r.dispose(), a.dispose(), { value: m, gradFunc: (o, e) => u(() => {
-        const S = e[0], f = e[1], b = y(S), l = L(b, f, o), g = E(n);
+        const S = e[0], f = e[1], b = G(S), l = L(b, f, o), g = C(n);
         return [l.reshape(s.shape), g];
       }) };
     }

package/dist/utilities/arrayClose.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export declare function arraysClose(a: unknown, b: unknown, epsilon?: number): boolean;

package/dist/utilities/arrayClose.js ADDED Viewed

@@ -0,0 +1,11 @@
+function f(r, e, n = 1e-5) {
+  if (Array.isArray(r) && Array.isArray(e)) {
+    if (r.length !== e.length) return !1;
+    for (let t = 0; t < r.length; ++t)
+      if (!f(r[t], e[t], n)) return !1;
+    return !0;
+  } else return typeof r == "number" && typeof e == "number" ? r === -1 / 0 && e === -1 / 0 ? !0 : Math.abs(r - e) < n : !1;
+}
+export {
+  f as arraysClose
+};

package/dist/utilities/dummy.js CHANGED Viewed

@@ -1,5 +1,5 @@
-import { m as y, v as P, e as S } from "../index-C0dhsYom.js";
-import { z as i } from "../zeros-DCPCdFGq.js";
+import { m as y, v as P, e as S } from "../index-UdZhlibC.js";
+import { z as i } from "../zeros-DnPT2nD4.js";
 async function w(s) {
   const t = i([1, s.config.gpt.blockSize], "int32"), [e, n] = s.forward({ training: !1 }, t);
   await e.data(), e.dispose(), n && n.dispose(), t.dispose();

package/dist/utilities/generate.js CHANGED Viewed

@@ -1,6 +1,6 @@
-import "../index-C0dhsYom.js";
-import { t as m } from "../tensor2d-B_kyod7_.js";
-import { c as u } from "../concat-CsHeR4zV.js";
+import "../index-UdZhlibC.js";
+import { t as m } from "../tensor2d-CWHxHpLh.js";
+import { c as u } from "../concat-CbXTetof.js";
 async function v(o, r, a, c, f) {
   if (c <= 0)
     throw new Error("Length must be a positive integer");

package/dist/utilities/multinomialCPU.js CHANGED Viewed

@@ -1,5 +1,5 @@
-import "../index-C0dhsYom.js";
-import { t as e } from "../tensor2d-B_kyod7_.js";
+import "../index-UdZhlibC.js";
+import { t as e } from "../tensor2d-CWHxHpLh.js";
 function l(n) {
   let r = 0;
   const i = Math.random();

package/dist/utilities/performance.d.ts CHANGED Viewed

@@ -1,2 +1,2 @@
 import { Tensor } from '@tensorflow/tfjs-core';
-export default function performanceTest(fn: () => Tensor, iterations?: number): Promise<number>;
+export default function performanceTest(fn: () => Tensor, iterations?: number, allowPromise?: boolean): Promise<number>;

package/dist/utilities/performance.js CHANGED Viewed

@@ -1,16 +1,16 @@
-import { t as r } from "../index-C0dhsYom.js";
-async function d(s, o = 10) {
-  for (let e = 0; e < 10; e++) {
-    const t = s();
-    await t.data(), t.dispose();
+import { t as s } from "../index-UdZhlibC.js";
+async function f(e, o = 10, r = !1) {
+  for (let t = 0; t < 100; t++) {
+    const a = r ? await e() : s(e);
+    t === 99 && await a.data(), a.dispose();
   }
-  const a = performance.now();
-  for (let e = 0; e < o; e++) {
-    const t = r(s);
-    e === o - 1 && await t.data(), t.dispose();
+  const n = performance.now();
+  for (let t = 0; t < o; t++) {
+    const a = r ? await e() : s(e);
+    t === o - 1 && await a.data(), a.dispose();
   }
-  return (performance.now() - a) / o;
+  return (performance.now() - n) / o;
 }
 export {
-  d as default
+  f as default
 };

package/dist/utilities/profile.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { m as a } from "../index-C0dhsYom.js";
+import { m as a } from "../index-UdZhlibC.js";
 const s = 1024 * 1024;
 class l {
   log = /* @__PURE__ */ new Map();

package/dist/utilities/safetensors.js CHANGED Viewed

@@ -1,5 +1,5 @@
-import "../index-C0dhsYom.js";
-import { t as y } from "../tensor-RvZVNmg0.js";
+import "../index-UdZhlibC.js";
+import { t as y } from "../tensor-Do9PKbIE.js";
 function l(t) {
   if (t === "float32") return "F32";
   if (t === "int32") return "I32";

package/dist/utilities/weights.js CHANGED Viewed

@@ -1,5 +1,5 @@
-import "../index-C0dhsYom.js";
-import { t as p } from "../tensor-RvZVNmg0.js";
+import "../index-UdZhlibC.js";
+import { t as p } from "../tensor-Do9PKbIE.js";
 function h(n) {
   const e = n.reduce((s, o) => s + o.length, 0), a = new Float32Array(e);
   let t = 0;

package/dist/{variable-DXEUOwew.js → variable-BTBkayv_.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { E as i } from "./index-C0dhsYom.js";
+import { E as i } from "./index-UdZhlibC.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.