npm - @genai-fi/nanogpt - Versions diffs - 0.7.2 → 0.8.0 - Mend

@genai-fi/nanogpt 0.7.2 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (173) hide show

package/dist/Generator.d.ts +36 -4
package/dist/Generator.js +183 -69
package/dist/{RealDiv-Dy0p8Bvo.js → RealDiv-N8TpOMYv.js} +14 -14
package/dist/{Reshape-DvudQDvJ.js → Reshape-B-lWQRnF.js} +1 -1
package/dist/{Reshape-DH5srBP0.js → Reshape-Bo8HzP8V.js} +5 -5
package/dist/TeachableLLM.d.ts +6 -6
package/dist/TeachableLLM.js +51 -50
package/dist/Trainer.d.ts +19 -3
package/dist/Trainer.js +71 -28
package/dist/{axis_util-BzbKo31C.js → axis_util-DubwyOhW.js} +3 -3
package/dist/backend.js +2 -2
package/dist/{backend_util-TE7aTPhZ.js → backend_util-BJ-_jSeK.js} +46 -46
package/dist/{broadcast_to-CdbwV-Dj.js → broadcast_to-BYfCp5iL.js} +2 -2
package/dist/{concat-CsxrgovM.js → concat-BmDqqFsa.js} +1 -1
package/dist/{dataset-CtdBYwjo.js → dataset-CJmEGu6D.js} +5 -5
package/dist/{dropout-DYs5QFGQ.js → dropout-sx0sjVAT.js} +8 -8
package/dist/exports_initializers-DAKM8UO9.js +16 -0
package/dist/{gather-CMMy2KEG.js → gather-C1siEkdp.js} +1 -1
package/dist/{gelu-C-dPj6Ku.js → gelu-Bd3UBBxg.js} +1 -1
package/dist/{gpgpu_math-DGNLNL4I.js → gpgpu_math-TFLxaLkw.js} +26 -26
package/dist/{index-CLthM0TO.js → index-BaPo_0H8.js} +185 -185
package/dist/{index-BoWRt-10.js → index-CUQrfsw_.js} +266 -265
package/dist/{kernel_funcs_utils-BYKWV8Aa.js → kernel_funcs_utils-P9aFa232.js} +9 -9
package/dist/layers/BaseLayer.d.ts +8 -13
package/dist/layers/BaseLayer.js +25 -13
package/dist/layers/CausalSelfAttention.d.ts +3 -2
package/dist/layers/CausalSelfAttention.js +28 -28
package/dist/layers/MLP.d.ts +3 -2
package/dist/layers/MLP.js +16 -20
package/dist/layers/PositionEmbedding.d.ts +9 -0
package/dist/layers/PositionEmbedding.js +45 -0
package/dist/layers/RMSNorm.d.ts +3 -2
package/dist/layers/RMSNorm.js +6 -6
package/dist/layers/RoPECache.d.ts +1 -1
package/dist/layers/RoPECache.js +4 -4
package/dist/layers/TiedEmbedding.d.ts +3 -2
package/dist/layers/TiedEmbedding.js +29 -7
package/dist/layers/TransformerBlock.d.ts +3 -2
package/dist/layers/TransformerBlock.js +1 -1
package/dist/loader/load.d.ts +2 -2
package/dist/loader/loadHF.d.ts +2 -2
package/dist/loader/loadTransformers.d.ts +4 -2
package/dist/loader/loadTransformers.js +10 -9
package/dist/loader/newZipLoad.d.ts +2 -2
package/dist/loader/oldZipLoad.d.ts +2 -2
package/dist/loader/oldZipLoad.js +42 -51
package/dist/loader/save.d.ts +8 -0
package/dist/loader/save.js +62 -0
package/dist/{log_sum_exp-DbjkV734.js → log_sum_exp-C142qZqY.js} +14 -14
package/dist/main.d.ts +5 -4
package/dist/main.js +22 -18
package/dist/{mat_mul-8m8pfdcx.js → mat_mul-DMkduNJu.js} +1 -1
package/dist/{max-Ddnnb5xe.js → max-B3JOcNGb.js} +1 -1
package/dist/mod-uUuj4gSb.js +27 -0
package/dist/models/NanoGPTV1.d.ts +15 -0
package/dist/models/NanoGPTV1.js +71 -0
package/dist/{config.d.ts → models/config.d.ts} +1 -0
package/dist/{config.js → models/config.js} +1 -0
package/dist/models/factory.d.ts +3 -0
package/dist/models/factory.js +14 -0
package/dist/models/model.d.ts +26 -0
package/dist/models/model.js +68 -0
package/dist/{mulmat_packed_gpu-VSekgsNv.js → mulmat_packed_gpu-Cm2gw-c8.js} +1 -1
package/dist/{ones-Dj0SDhHf.js → ones-ZdgQGBCP.js} +2 -2
package/dist/ops/adamAdjust.js +1 -1
package/dist/ops/adamMoments.js +1 -1
package/dist/ops/appendCache.js +3 -3
package/dist/ops/attentionMask.js +1 -1
package/dist/ops/cpu/adamAdjust.js +9 -9
package/dist/ops/cpu/adamMoments.js +2 -2
package/dist/ops/cpu/appendCache.js +2 -2
package/dist/ops/cpu/attentionMask.js +5 -5
package/dist/ops/cpu/fusedSoftmax.js +2 -2
package/dist/ops/cpu/gatherSub.js +3 -3
package/dist/ops/cpu/gelu.js +1 -1
package/dist/ops/cpu/matMulGelu.js +2 -2
package/dist/ops/cpu/matMulMul.js +1 -1
package/dist/ops/cpu/mulDropout.js +1 -1
package/dist/ops/cpu/normRMS.js +1 -1
package/dist/ops/cpu/qkv.js +3 -3
package/dist/ops/cpu/rope.js +5 -5
package/dist/ops/cpu/scatterSub.js +11 -11
package/dist/ops/fusedSoftmax.js +1 -1
package/dist/ops/gatherSub.js +1 -1
package/dist/ops/gelu.js +2 -2
package/dist/ops/grads/attentionMask.js +1 -1
package/dist/ops/grads/fusedSoftmax.js +2 -2
package/dist/ops/grads/gelu.js +2 -2
package/dist/ops/grads/matMulGelu.js +1 -1
package/dist/ops/grads/normRMS.js +1 -1
package/dist/ops/grads/qkv.js +1 -1
package/dist/ops/grads/rope.js +1 -1
package/dist/ops/matMulGelu.js +1 -1
package/dist/ops/matMulMul.js +1 -1
package/dist/ops/mulDrop.js +1 -1
package/dist/ops/normRMS.js +1 -1
package/dist/ops/qkv.js +1 -1
package/dist/ops/rope.js +4 -4
package/dist/ops/scatterSub.js +1 -1
package/dist/ops/webgl/adamAdjust.js +2 -2
package/dist/ops/webgl/adamMoments.js +1 -1
package/dist/ops/webgl/appendCache.js +1 -1
package/dist/ops/webgl/attentionMask.js +1 -1
package/dist/ops/webgl/fusedSoftmax.js +4 -4
package/dist/ops/webgl/gatherSub.js +1 -1
package/dist/ops/webgl/gelu.js +2 -2
package/dist/ops/webgl/log.js +3 -3
package/dist/ops/webgl/matMulGelu.js +10 -10
package/dist/ops/webgl/matMulMul.js +1 -1
package/dist/ops/webgl/mulDropout.js +1 -1
package/dist/ops/webgl/normRMS.js +2 -2
package/dist/ops/webgl/qkv.js +1 -1
package/dist/ops/webgl/rope.js +1 -1
package/dist/ops/webgl/scatterSub.js +1 -1
package/dist/ops/webgpu/adamAdjust.js +3 -3
package/dist/ops/webgpu/adamMoments.js +3 -3
package/dist/ops/webgpu/appendCache.js +3 -3
package/dist/ops/webgpu/attentionMask.js +3 -3
package/dist/ops/webgpu/gatherSub.js +3 -3
package/dist/ops/webgpu/gelu.js +3 -3
package/dist/ops/webgpu/normRMS.js +2 -2
package/dist/ops/webgpu/normRMSGrad.js +5 -5
package/dist/ops/webgpu/qkv.js +3 -3
package/dist/ops/webgpu/rope.js +3 -3
package/dist/ops/webgpu/scatterSub.js +3 -3
package/dist/ops/webgpu/utils/reductions.js +4 -4
package/dist/{ops-BFGCx8Ri.js → ops-C_1K_-35.js} +103 -103
package/dist/{random_width-sZORGo5k.js → random_width-D8Pwy_na.js} +136 -136
package/dist/{range-CRuAh-gd.js → range-LVHrSLdi.js} +1 -1
package/dist/{reciprocal-BvGAyKyu.js → reciprocal-CaR9e67G.js} +1 -1
package/dist/{register_all_kernels-BwDSRN-f.js → register_all_kernels-DUshvVWP.js} +2026 -2049
package/dist/{reshape-CdBq1WJ6.js → reshape-DEfQGSin.js} +1 -1
package/dist/{scatter_nd_util-DUstGbU1.js → scatter_nd_util-CUPPNLaA.js} +1 -1
package/dist/{selu_util-BJEXVvjX.js → selu_util-8vv5JxQV.js} +3 -3
package/dist/{shared-B8ztnyEk.js → shared-CkNorDcU.js} +83 -83
package/dist/{shared-wS99K7_n.js → shared-D1elLckx.js} +1 -1
package/dist/{sin-BeA3tsEd.js → sin-D2CKKmyR.js} +1 -1
package/dist/{slice-BiOsknYS.js → slice-BnyE-M_7.js} +1 -1
package/dist/{softmax-Bv_6lyMX.js → softmax-DLoZWYBx.js} +1 -1
package/dist/{split-B-dikLRw.js → split-By_n4TKP.js} +1 -1
package/dist/{stack-B17UN2nn.js → stack-DkdFLq37.js} +1 -1
package/dist/{sum-66ew2byf.js → sum-l_0SqM4h.js} +3 -3
package/dist/{tensor-JwS7ZYY6.js → tensor-BAQdLqoU.js} +1 -1
package/dist/{tensor2d-wxPAnDQy.js → tensor2d-BHy261cI.js} +1 -1
package/dist/training/Adam.js +2 -2
package/dist/training/AdamExt.js +1 -1
package/dist/training/DatasetBuilder.js +2 -2
package/dist/training/Evaluator.d.ts +2 -2
package/dist/training/FullTrainer.d.ts +16 -3
package/dist/training/FullTrainer.js +91 -53
package/dist/training/Trainer.d.ts +25 -3
package/dist/training/Trainer.js +39 -47
package/dist/training/sparseCrossEntropy.js +9 -9
package/dist/utilities/dummy.d.ts +4 -4
package/dist/utilities/dummy.js +13 -13
package/dist/utilities/multinomialCPU.js +2 -2
package/dist/utilities/parameters.d.ts +1 -1
package/dist/utilities/performance.js +1 -1
package/dist/utilities/profile.js +1 -1
package/dist/utilities/safetensors.js +2 -2
package/dist/utilities/weights.js +2 -2
package/dist/{variable-BuddVFLa.js → variable-C9hihzDB.js} +1 -1
package/dist/{webgpu_program-PFzf1hAQ.js → webgpu_program-dFEVbDPL.js} +1 -1
package/dist/{webgpu_util-D____QpY.js → webgpu_util-DLImlSc6.js} +27 -27
package/dist/{zeros--BdLQ3oG.js → zeros-VZ72lWXM.js} +1 -1
package/package.json +2 -3
package/dist/NanoGPTModel.d.ts +0 -52
package/dist/NanoGPTModel.js +0 -203
package/dist/TiedEmbedding-BxOerUmB.js +0 -43
package/dist/utilities/generate.d.ts +0 -3
package/dist/utilities/generate.js +0 -22
package/dist/utilities/save.d.ts +0 -9
package/dist/utilities/save.js +0 -61

package/dist/TeachableLLM.js CHANGED Viewed

@@ -1,30 +1,21 @@
-import { defaultConfig as _ } from "./config.js";
-import f from "./NanoGPTModel.js";
-import { saveModel as u } from "./utilities/save.js";
-import { loadModel as d } from "./loader/load.js";
-import l from "./Generator.js";
-import p from "./Trainer.js";
-import { E as g } from "./index-Dwqa6Zy2.js";
+import { defaultConfig as d } from "./models/config.js";
+import { saveModel as l } from "./loader/save.js";
+import { loadModel as _ } from "./loader/load.js";
+import u from "./Generator.js";
+import f from "./Trainer.js";
+import { E as p } from "./index-Dwqa6Zy2.js";
 import { dummyPassTrainAsync as m } from "./utilities/dummy.js";
-import c from "./tokeniser/CharTokeniser.js";
-import k from "./tokeniser/bpe.js";
-import "./papaparse.min-C8l2Kvo1.js";
-import "./index-Tf7vU29b.js";
-import "./jszip.min-CjP2V1VV.js";
-import "./index-BoWRt-10.js";
-import "./ops/cpu/scatterSub.js";
-import "./ops/webgl/scatterSub.js";
-import "./ops/cpu/gatherSub.js";
-import "./ops/webgl/gatherSub.js";
+import "./index-CUQrfsw_.js";
 import "./ops/cpu/attentionMask.js";
 import "./ops/webgl/attentionMask.js";
 import "./ops/grads/attentionMask.js";
 import "./ops/cpu/qkv.js";
 import "./ops/webgl/qkv.js";
 import "./ops/grads/qkv.js";
-import "./random_width-sZORGo5k.js";
-import "./register_all_kernels-BwDSRN-f.js";
-import "./dataset-CtdBYwjo.js";
+import "./random_width-D8Pwy_na.js";
+import "./register_all_kernels-DUshvVWP.js";
+import "./index-Tf7vU29b.js";
+import "./dataset-CJmEGu6D.js";
 import "./ops/cpu/rope.js";
 import "./ops/webgl/rope.js";
 import "./ops/grads/rope.js";
@@ -36,20 +27,29 @@ import "./ops/grads/fusedSoftmax.js";
 import "./ops/cpu/matMulGelu.js";
 import "./ops/webgl/matMulGelu.js";
 import "./ops/grads/matMulGelu.js";
-import "./ops/cpu/gelu.js";
-import "./ops/webgl/gelu.js";
-import "./gelu-C-dPj6Ku.js";
 import "./ops/cpu/normRMS.js";
 import "./ops/webgl/normRMS.js";
 import "./ops/grads/normRMS.js";
+import "./ops/cpu/gatherSub.js";
+import "./ops/webgl/gatherSub.js";
+import "./ops/cpu/scatterSub.js";
+import "./ops/webgl/scatterSub.js";
+import c from "./tokeniser/CharTokeniser.js";
+import g from "./tokeniser/bpe.js";
+import "./papaparse.min-C8l2Kvo1.js";
+import "./jszip.min-CjP2V1VV.js";
+import "./ops/cpu/gelu.js";
+import "./ops/webgl/gelu.js";
+import "./gelu-Bd3UBBxg.js";
 import "./ops/webgl/log.js";
 import "./ops/cpu/adamMoments.js";
 import "./ops/webgl/adamMoments.js";
 import "./ops/cpu/adamAdjust.js";
 import "./ops/webgl/adamAdjust.js";
-import w from "./utilities/profile.js";
+import k from "./utilities/profile.js";
+import w from "./models/factory.js";
 class a {
-  ee = new g();
+  ee = new p();
   _config;
   _model;
   _tokeniser;
@@ -69,7 +69,7 @@ class a {
   get config() {
     if (!this._config)
       throw new Error("configuration_not_initialized.");
-    return this._config.gpt;
+    return this._config;
   }
   get model() {
     if (!this._model)
@@ -92,8 +92,8 @@ class a {
     return this._status === "busy" || this._status === "training";
   }
   estimateTrainingMemoryUsage(t) {
-    const e = this._memoryRequirements ?? { perBatch: 0, gradients: 0 }, i = e.perBatch * t, o = e.gradients;
-    return i * 0.66 + o * 4;
+    const e = this._memoryRequirements ?? { perBatch: 0, gradients: 0 }, r = e.perBatch * t, o = e.gradients;
+    return r * 0.66 + o * 4;
   }
   setStatus(t) {
     this._status !== t && (this._status = t, this.ee.emit("status", t));
@@ -101,32 +101,32 @@ class a {
   saveModel(t) {
     if (!this._model || !this._tokeniser)
       throw new Error("model_or_tokeniser_not_initialized.");
-    return u(this._model, this._tokeniser, {
+    return l(this._model, this._tokeniser, {
       ...t,
       name: t?.name || this.meta.name
     });
   }
   static loadModel(t) {
     const e = new a();
-    return d(t).then(({ model: i, tokeniser: o, name: s }) => {
-      e._model = i, e._tokeniser = o, e._config = i.config, s && (e.meta.name = s), e.setStatus("warmup"), m(i).then((r) => {
-        e._memoryRequirements = r, e.setStatus("ready"), e.ee.emit("loaded");
-      }).catch((r) => {
-        e.setStatus("error"), e.ee.emit("error", r);
+    return _(t).then(({ model: r, tokeniser: o, name: s }) => {
+      e._model = r, e._tokeniser = o, e._config = r.config, s && (e.meta.name = s), e.setStatus("warmup"), m(r).then((i) => {
+        e._memoryRequirements = i, e.setStatus("ready"), e.ee.emit("loaded");
+      }).catch((i) => {
+        e.setStatus("error"), e.ee.emit("error", i);
       });
-    }).catch((i) => {
-      e.setStatus("error"), e.ee.emit("error", i);
+    }).catch((r) => {
+      e.setStatus("error"), e.ee.emit("error", r);
     }), e;
   }
   static create(t, e = {}) {
-    const i = { ..._, ...e }, o = t === "char" ? new c(i.vocabSize) : new k(i.vocabSize), s = new f(i), r = new a(o, s);
-    return r.setStatus("warmup"), m(s).then((n) => {
-      r._memoryRequirements = n, r.tokeniser.trained ? (r.setStatus("ready"), r.ee.emit("loaded")) : (r.setStatus("awaitingTokens"), r.ee.emit("loaded"), r.tokeniser.once("trainStatus", (h) => {
-        h === "trained" && r.setStatus("ready");
+    const r = { ...d, ...e }, o = t === "char" ? new c(r.vocabSize) : new g(r.vocabSize), s = w(r), i = new a(o, s);
+    return i.setStatus("warmup"), m(s).then((n) => {
+      i._memoryRequirements = n, i.tokeniser.trained ? (i.setStatus("ready"), i.ee.emit("loaded")) : (i.setStatus("awaitingTokens"), i.ee.emit("loaded"), i.tokeniser.once("trainStatus", (h) => {
+        h === "trained" && i.setStatus("ready");
       }));
     }).catch((n) => {
-      r.setStatus("error"), r.ee.emit("error", n);
-    }), r;
+      i.setStatus("error"), i.ee.emit("error", n);
+    }), i;
   }
   getProfiler() {
     return this._model?.getProfiler();
@@ -138,9 +138,9 @@ class a {
     if (t) {
       if (!this._config)
         return;
-      this._config.layerConfig.profiler || (this._config.layerConfig.profiler = new w());
+      this.model.getProfiler() || this.model.setProfiler(new k());
     } else
-      this._config?.layerConfig.profiler && (this._config.layerConfig.profiler = void 0);
+      this.model.getProfiler() && this.model.setProfiler(null);
   }
   getNumParams() {
     return this._model ? this._model.getNumParams() : 0;
@@ -148,15 +148,16 @@ class a {
   trainer() {
     if (!this._model || !this._tokeniser)
       throw new Error("model_or_tokeniser_not_initialized.");
-    const t = new p(this._model, this._tokeniser);
-    return t.on("start", () => this.setStatus("training")), t.on("stop", () => this.setStatus("ready")), t.on("log", async (e, i) => {
+    const t = new f(this._model, this._tokeniser);
+    return t.on("start", () => this.setStatus("training")), t.on("stop", () => this.setStatus("ready")), t.on("log", async (e, r) => {
       const o = this.ee.listeners("trainStep");
       for (const s of o)
-        await s(e, i);
+        await s(e, r);
     }), t;
   }
-  train(t, e) {
-    return this.trainer().train(t, e);
+  async train(t, e) {
+    const r = this.trainer();
+    await r.prepare(t, e), await r.train(e);
   }
   async trainTokeniser(t) {
     if (!this._tokeniser)
@@ -167,7 +168,7 @@ class a {
   generator() {
     if (!this._model || !this._tokeniser)
       throw new Error("model_or_tokeniser_not_initialized.");
-    const t = new l(this._model, this._tokeniser);
+    const t = new u(this._model, this._tokeniser);
     return t.on("start", () => {
       this.status === "ready" && this.setStatus("busy");
     }), t.on("stop", () => {

package/dist/Trainer.d.ts CHANGED Viewed

@@ -1,6 +1,7 @@
-import { default as NanoGPT } from './NanoGPTModel';
 import { ITokeniser } from './tokeniser/type';
 import { default as EE } from 'eventemitter3';
+import { TrainingLogEntry, TrainingProgress } from './training/Trainer';
+import { default as Model, ModelForwardAttributes } from './models/model';
 export interface ITrainerOptions {
     batchSize?: number;
     learningRate?: number;
@@ -10,12 +11,27 @@ export interface ITrainerOptions {
     prompt?: string;
     validationSplit?: number;
     advancedMetrics?: boolean;
+    gradientCheckpointing?: boolean;
+}
+interface ExtendedTrainingProgress extends TrainingProgress {
+    progress: number;
+    remaining: number;
 }
 export default class Trainer extends EE<'start' | 'stop' | 'log'> {
     private trainer;
     private hasTrained;
-    constructor(model: NanoGPT, tokeniser: ITokeniser);
+    private trainDataset?;
+    private validationDataset?;
+    private totalSamples;
+    private log;
+    private progress;
+    constructor(model: Model<ModelForwardAttributes>, tokeniser: ITokeniser);
     stop(): void;
     reset(): void;
-    train(text: string[], options?: ITrainerOptions): Promise<void>;
+    prepare(text: string[], options?: ITrainerOptions): Promise<void>;
+    train(options?: ITrainerOptions): Promise<void>;
+    step(options?: ITrainerOptions): Promise<void>;
+    getLog(): TrainingLogEntry[];
+    getProgress(): ExtendedTrainingProgress | null;
 }
+export {};

package/dist/Trainer.js CHANGED Viewed

@@ -1,48 +1,91 @@
-import { E as h } from "./index-Dwqa6Zy2.js";
-import m from "./training/FullTrainer.js";
-class p extends h {
+import { E as l } from "./index-Dwqa6Zy2.js";
+import h from "./training/FullTrainer.js";
+class m extends l {
   trainer;
   hasTrained = !1;
-  constructor(e, t) {
-    super(), this.trainer = new m(e, t, 1e-3);
+  trainDataset;
+  validationDataset;
+  totalSamples = 0;
+  log = [];
+  progress = null;
+  constructor(t, e) {
+    super(), this.trainer = new h(t, e, 1e-3);
   }
   stop() {
     this.trainer.stop();
   }
   reset() {
-    this.hasTrained = !1, this.trainer.reset();
-  }
-  async train(e, t) {
-    const { trainDataset: s, validationDataset: n } = await this.trainer.createTrainValidationSplit(
-      e,
-      t?.batchSize || 32,
-      t?.validationSplit || 0.1
-    ), r = e.reduce((i, a) => i + a.length, 0) * (1 - (t?.validationSplit || 0));
-    this.hasTrained || this.trainer.setLearningRate(t?.learningRate || 1e-3), this.hasTrained = !0, this.emit("start"), await this.trainer.trainOnDataset(
-      s,
+    this.hasTrained = !1, this.log = [], this.trainer.reset();
+  }
+  async prepare(t, e) {
+    const { trainDataset: a, validationDataset: s } = await this.trainer.createTrainValidationSplit(
+      t,
+      e?.batchSize || 32,
+      e?.validationSplit || 0.1
+    ), i = t.reduce((r, n) => r + n.length, 0) * (1 - (e?.validationSplit || 0));
+    this.trainDataset = a, this.validationDataset = s, this.totalSamples = i;
+  }
+  async train(t) {
+    if (!this.trainDataset || !this.validationDataset)
+      throw new Error("Datasets not prepared");
+    this.hasTrained || this.trainer.setLearningRate(t?.learningRate || 1e-3), this.hasTrained = !0, this.emit("start"), this.trainer.setGradientCheckpointing(t?.gradientCheckpointing || !1), await this.trainer.trainOnDataset(
+      this.trainDataset,
       {
         prompt: t?.prompt,
         logInterval: t?.logInterval || 10,
         desiredLoss: t?.desiredLoss || 0.01,
         maxSteps: t?.maxSteps || 1e3,
         advancedMetrics: t?.advancedMetrics || !1,
-        onStep: async (i, a) => {
-          const l = this.listeners("log");
-          for (const d of l)
-            await d(i, {
-              ...a,
-              progress: a.totalSamples / r,
-              remaining: Math.max(
-                0,
-                (r - a.totalSamples) / a.totalSamples * a.duration
-              )
-            });
+        onStep: async (e, a) => {
+          this.log.push(e), this.progress = {
+            ...a,
+            progress: a.totalSamples / this.totalSamples,
+            remaining: Math.max(
+              0,
+              (this.totalSamples - a.totalSamples) / a.totalSamples * a.duration
+            )
+          };
+          const s = this.listeners("log");
+          for (const i of s)
+            await i(e, this.progress);
         }
       },
-      n
+      this.validationDataset
     ), this.emit("stop");
   }
+  async step(t) {
+    if (!this.trainDataset || !this.validationDataset)
+      throw new Error("Datasets not prepared");
+    this.hasTrained || this.trainer.setLearningRate(t?.learningRate || 1e-3), this.hasTrained = !0, this.emit("start");
+    const { log: e, progress: a } = await this.trainer.stepDataset(
+      this.trainDataset,
+      {
+        prompt: t?.prompt,
+        logInterval: t?.logInterval || 10,
+        desiredLoss: t?.desiredLoss || 0.01,
+        maxSteps: t?.maxSteps || 1e3,
+        advancedMetrics: t?.advancedMetrics || !1
+      },
+      this.validationDataset
+    ), s = this.listeners("log");
+    for (const i of s)
+      await i(e, {
+        ...a,
+        progress: a.totalSamples / this.totalSamples,
+        remaining: Math.max(
+          0,
+          (this.totalSamples - a.totalSamples) / a.totalSamples * a.duration
+        )
+      });
+    this.emit("stop");
+  }
+  getLog() {
+    return this.log;
+  }
+  getProgress() {
+    return this.progress;
+  }
 }
 export {
-  p as default
+  m as default
 };

package/dist/{axis_util-BzbKo31C.js → axis_util-DubwyOhW.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { l as c } from "./index-BoWRt-10.js";
+import { n as c } from "./index-CUQrfsw_.js";
 /**
  * @license
  * Copyright 2017 Google LLC. All Rights Reserved.
@@ -28,7 +28,7 @@ function a(e, n, t) {
     t.indexOf(u) === -1 ? s.push(e[o++]) : s.push(n[f++]);
   return s;
 }
-function p(e, n) {
+function l(e, n) {
   const t = [], r = e.length;
   for (let o = 0; o < r; o++)
     n.indexOf(o) === -1 && t.push(e[o]);
@@ -62,7 +62,7 @@ function x(e, n) {
 export {
   x as a,
   m as b,
-  p as c,
+  l as c,
   i as d,
   h as e,
   a as f,

package/dist/backend.js CHANGED Viewed

@@ -1,6 +1,6 @@
-import { g as a, s as i, r as o } from "./index-BoWRt-10.js";
+import { g as a, s as i, r as o } from "./index-CUQrfsw_.js";
 async function e(t) {
-  a() !== t && (t === "webgpu" && (await import("./index-CLthM0TO.js"), await import("./ops/webgpu/index.js")), await i(t), await o(), console.log(`Backend set to ${t}`));
+  a() !== t && (t === "webgpu" && (await import("./index-BaPo_0H8.js"), await import("./ops/webgpu/index.js")), await i(t), await o(), console.log(`Backend set to ${t}`));
 }
 export {
   e as selectBackend

package/dist/{backend_util-TE7aTPhZ.js → backend_util-BJ-_jSeK.js} RENAMED Viewed

@@ -1,7 +1,7 @@
-import { j as m, a1 as O, l as g, aK as $, aL as R, aM as M, k as _, aa as y, aw as D, aN as T, u as b, aO as F } from "./index-BoWRt-10.js";
-import { b as L, d as W, f as v, c as N, e as x, g as P, a as C, h as z } from "./axis_util-BzbKo31C.js";
-import { S as U, a as B, b as V, c as j, d as k, e as G, f as H, g as q, h as Z, i as K, j as X, k as J, l as Y, m as Q, s as ee, n as te, o as ne, t as se } from "./selu_util-BJEXVvjX.js";
-import { c as re, v as oe, a as ae } from "./scatter_nd_util-DUstGbU1.js";
+import { j as m, a2 as O, n as g, aM as $, aN as R, aO as M, l as _, ad as y, ay as D, aP as T, u as b, aQ as F } from "./index-CUQrfsw_.js";
+import { b as L, d as W, f as v, c as N, e as x, g as P, a as C, h as z } from "./axis_util-DubwyOhW.js";
+import { S as U, a as B, b as V, c as j, d as G, e as H, f as k, g as q, h as Z, i as X, j as J, k as K, l as Q, m as Y, s as ee, n as te, o as ne, t as se } from "./selu_util-8vv5JxQV.js";
+import { c as re, v as oe, a as ae } from "./scatter_nd_util-CUPPNLaA.js";
 function ie(e, n) {
   const r = e.shape.length, t = n.shape.length;
   if (r < 1)
@@ -233,7 +233,7 @@ function Ie(e, n) {
     r.push(e[t][0]);
   return r;
 }
-function we(e, n, r) {
+function Se(e, n, r) {
   const t = e.slice(0, 1);
   for (let s = 0; s < r; ++s)
     t.push(e[s + 1] - n[s][0] - n[s][1]);
@@ -255,7 +255,7 @@ function we(e, n, r) {
  * limitations under the License.
  * =============================================================================
  */
-const Se = 0.3275911, Ae = 0.254829592, Oe = -0.284496736, Re = 1.421413741, Me = -1.453152027, _e = 1.061405429;
+const we = 0.3275911, Ae = 0.254829592, Oe = -0.284496736, Re = 1.421413741, Me = -1.453152027, _e = 1.061405429;
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -333,7 +333,7 @@ function ve(e, n, r) {
  * limitations under the License.
  * =============================================================================
  */
-const E = "->", Ne = /->/g, w = ",", S = "...";
+const E = "->", Ne = /->/g, S = ",", w = "...";
 function xe(e, n) {
   e = e.replace(/\s/g, "");
   const r = (e.length - e.replace(Ne, "").length) / E.length;
@@ -342,8 +342,8 @@ function xe(e, n) {
   if (r > 1)
     throw new Error(`Equation must contain exactly one arrow ("${E}").`);
   const [t, s] = e.split(E);
-  g(t.indexOf(S) === -1, () => `The ellipsis notation ("${S}") is not supported yet.`);
-  const o = t.split(w), a = o.length;
+  g(t.indexOf(w) === -1, () => `The ellipsis notation ("${w}") is not supported yet.`);
+  const o = t.split(S), a = o.length;
   if (n !== a)
     throw new Error(`Expected ${a} input tensors, received ${n}`);
   if (a > 2)
@@ -357,7 +357,7 @@ function xe(e, n) {
   }
   for (let l = 0; l < t.length; ++l) {
     const f = t[l];
-    u.indexOf(f) === -1 && f !== w && u.push(f);
+    u.indexOf(f) === -1 && f !== S && u.push(f);
   }
   const c = new Array(o.length);
   for (let l = 0; l < a; ++l) {
@@ -449,10 +449,10 @@ function je(e) {
   return `Received SparseTensor with denseShape[0] = 0 but
   indices.shape[0] = ${e}`;
 }
-function ke(e, n) {
+function Ge(e, n) {
   return `indices(${e}, 0) is invalid: ${n} < 0`;
 }
-function Ge(e, n, r) {
+function He(e, n, r) {
   return `indices(${e}, 0) is invalid: ${n} >= ${r}`;
 }
 /**
@@ -471,7 +471,7 @@ function Ge(e, n, r) {
  * limitations under the License.
  * =============================================================================
  */
-function He(e, n) {
+function ke(e, n) {
   return `only one output dimension may be -1, not both ${e} and ${n}`;
 }
 function qe(e, n) {
@@ -480,12 +480,12 @@ function qe(e, n) {
 function Ze() {
   return "reshape cannot infer the missing input size for an empty tensor unless all specified input sizes are non-zero";
 }
-function Ke(e, n) {
+function Xe(e, n) {
   const r = m(e), t = m(n);
   return `Input to reshape is a SparseTensor with ${r}
   dense values, but the requested shape requires a multiple of ${t}. inputShape=${e} outputShape= ${n}`;
 }
-function Xe(e, n) {
+function Je(e, n) {
   const r = m(e), t = m(n);
   return `Input to reshape is a tensor with ${r} dense values, but the requested shape has ${t}. inputShape=${e} outputShape=${n}`;
 }
@@ -505,13 +505,13 @@ function Xe(e, n) {
  * limitations under the License.
  * =============================================================================
  */
-function Je() {
+function Ke() {
   return "segment ids must be >= 0";
 }
-function Ye() {
+function Qe() {
   return "segment ids are not increasing";
 }
-function Qe(e, n) {
+function Ye(e, n) {
   return `Segment id ${e} out of range [0, ${n}), possibly because segmentIds input is not sorted.`;
 }
 function et(e, n, r) {
@@ -608,7 +608,7 @@ const ht = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
   ERF_A3: Re,
   ERF_A4: Me,
   ERF_A5: _e,
-  ERF_P: Se,
+  ERF_P: we,
   PARALLELIZE_THRESHOLD: I,
   get RowPartitionType() {
     return p;
@@ -628,18 +628,18 @@ const ht = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
   combineRaggedTensorToTensorShapes: ce,
   complexWithEvenIndex: Te,
   complexWithOddIndex: be,
-  computeConv2DInfo: k,
-  computeConv3DInfo: G,
-  computeDefaultPad: H,
+  computeConv2DInfo: G,
+  computeConv3DInfo: H,
+  computeDefaultPad: k,
   computeDilation2DInfo: q,
   computeOptimalWindowSize: ge,
   computeOutAndReduceShapes: N,
   computeOutShape: le,
   computePool2DInfo: Z,
-  computePool3DInfo: K,
-  convertConv2DDataFormat: X,
+  computePool3DInfo: X,
+  convertConv2DDataFormat: J,
   decodeEinsumEquation: xe,
-  eitherStridesOrDilationsAreOne: J,
+  eitherStridesOrDilationsAreOne: K,
   expandShapeToKeepDim: x,
   exponent: ve,
   exponents: We,
@@ -650,8 +650,8 @@ const ht = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
   getComplexWithIndex: Fe,
   getEinsumComputePath: ze,
   getEinsumPermutation: Pe,
-  getFusedBiasGradient: Y,
-  getFusedDyActivation: Q,
+  getFusedBiasGradient: Q,
+  getFusedDyActivation: Y,
   getImageCenter: de,
   getInnerMostAxes: C,
   getPermuted: Ee,
@@ -661,19 +661,19 @@ const ht = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
   getReshapedPermuted: $e,
   getRowPartitionTypesHelper: he,
   getSliceBeginCoords: Ie,
-  getSliceSize: we,
+  getSliceSize: Se,
   getSparseFillEmptyRowsIndicesDenseShapeMismatch: je,
-  getSparseFillEmptyRowsNegativeIndexErrorMessage: ke,
-  getSparseFillEmptyRowsOutOfRangeIndexErrorMessage: Ge,
+  getSparseFillEmptyRowsNegativeIndexErrorMessage: Ge,
+  getSparseFillEmptyRowsOutOfRangeIndexErrorMessage: He,
   getSparseReshapeEmptyTensorZeroOutputDimErrorMessage: Ze,
-  getSparseReshapeInputOutputMismatchErrorMessage: Xe,
-  getSparseReshapeInputOutputMultipleErrorMessage: Ke,
-  getSparseReshapeMultipleNegativeOneOutputDimErrorMessage: He,
+  getSparseReshapeInputOutputMismatchErrorMessage: Je,
+  getSparseReshapeInputOutputMultipleErrorMessage: Xe,
+  getSparseReshapeMultipleNegativeOneOutputDimErrorMessage: ke,
   getSparseReshapeNegativeOutputDimErrorMessage: qe,
   getSparseSegmentReductionIndicesOutOfRangeErrorMessage: et,
-  getSparseSegmentReductionNegativeSegmentIdsErrorMessage: Je,
-  getSparseSegmentReductionNonIncreasingSegmentIdsErrorMessage: Ye,
-  getSparseSegmentReductionSegmentIdOutOfRangeErrorMessage: Qe,
+  getSparseSegmentReductionNegativeSegmentIdsErrorMessage: Ke,
+  getSparseSegmentReductionNonIncreasingSegmentIdsErrorMessage: Qe,
+  getSparseSegmentReductionSegmentIdOutOfRangeErrorMessage: Ye,
   getUndoAxesPermutation: z,
   isIdentityPermutation: Ue,
   log: T,
@@ -697,8 +697,8 @@ export {
   Ee as B,
   $e as C,
   Ie as D,
-  Se as E,
-  we as F,
+  we as E,
+  Se as F,
   le as G,
   ue as H,
   xe as I,
@@ -728,17 +728,17 @@ export {
   ot as f,
   he as g,
   je as h,
-  ke as i,
-  Ge as j,
-  He as k,
+  Ge as i,
+  He as j,
+  ke as k,
   qe as l,
   ye as m,
   Ze as n,
-  Ke as o,
-  Xe as p,
-  Je as q,
-  Ye as r,
-  Qe as s,
+  Xe as o,
+  Je as p,
+  Ke as q,
+  Qe as r,
+  Ye as s,
   et as t,
   Ae as u,
   pe as v,

package/dist/{broadcast_to-CdbwV-Dj.js → broadcast_to-BYfCp5iL.js} RENAMED Viewed

@@ -1,5 +1,5 @@
-import { B as h, C as f, F as p, M as g, E as u, N as b } from "./index-BoWRt-10.js";
-import { r as T } from "./reshape-CdBq1WJ6.js";
+import { B as h, C as f, L as p, F as g, E as u, W as b } from "./index-CUQrfsw_.js";
+import { r as T } from "./reshape-DEfQGSin.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.

package/dist/{concat-CsxrgovM.js → concat-BmDqqFsa.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { B as s, l as a, D as p, M as i, E as l, Q as f } from "./index-BoWRt-10.js";
+import { B as s, n as a, D as p, F as i, E as l, H as f } from "./index-CUQrfsw_.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.

package/dist/{dataset-CtdBYwjo.js → dataset-CJmEGu6D.js} RENAMED Viewed

@@ -1,7 +1,7 @@
-import { ag as S, T as h, ac as N, d as v, ah as o, ai as p, aj as g, l as k, t as y } from "./index-BoWRt-10.js";
+import { ai as S, T as h, af as k, d as v, aj as o, ak as p, al as g, n as N, t as y } from "./index-CUQrfsw_.js";
 import { s as R } from "./index-C4L8Cm77.js";
-import { s as $ } from "./stack-B17UN2nn.js";
-import { t as B } from "./tensor-JwS7ZYY6.js";
+import { s as $ } from "./stack-DkdFLq37.js";
+import { t as B } from "./tensor-BAQdLqoU.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -75,7 +75,7 @@ function I(s) {
 }
 function c(s) {
   let t = !1;
-  if (N().get("IS_BROWSER"))
+  if (k().get("IS_BROWSER"))
     t = s instanceof TextDecoder;
   else {
     const { StringDecoder: e } = require("string_decoder");
@@ -930,7 +930,7 @@ class T {
    */
   batch(t, e = !0) {
     const r = this;
-    k(t > 0, () => `batchSize needs to be positive, but it is
+    N(t > 0, () => `batchSize needs to be positive, but it is
       ${t}`);
     let n;
     return this.size === 1 / 0 || this.size == null ? n = this.size : e ? n = Math.ceil(this.size / t) : n = Math.floor(this.size / t), u(async () => (await r.iterator()).columnMajorBatch(t, e, st), n);