npm - @genai-fi/nanogpt - Versions diffs - 0.7.1 → 0.7.3 - Mend

@genai-fi/nanogpt 0.7.1 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (138) hide show

package/dist/Generator.d.ts +11 -2
package/dist/Generator.js +81 -68
package/dist/NanoGPTModel.js +8 -8
package/dist/{RealDiv-CVYNbZxu.js → RealDiv-Dy0p8Bvo.js} +7 -7
package/dist/{Reshape-CEsEp0AI.js → Reshape-DH5srBP0.js} +2 -2
package/dist/{Reshape-Do18N3gO.js → Reshape-DvudQDvJ.js} +1 -1
package/dist/TeachableLLM.js +33 -32
package/dist/{TiedEmbedding-ccLBFiZi.js → TiedEmbedding-BxOerUmB.js} +4 -4
package/dist/Trainer.d.ts +6 -1
package/dist/Trainer.js +53 -19
package/dist/{axis_util-5DTW2tFV.js → axis_util-BzbKo31C.js} +1 -1
package/dist/backend.js +2 -2
package/dist/{backend_util-C9Ut8n0Q.js → backend_util-TE7aTPhZ.js} +4 -4
package/dist/{broadcast_to-Ba9h_8DO.js → broadcast_to-CdbwV-Dj.js} +2 -2
package/dist/{concat-CbXTetof.js → concat-CsxrgovM.js} +1 -1
package/dist/{dataset-U3PrjwgU.js → dataset-CtdBYwjo.js} +3 -3
package/dist/{dropout-DPfPgWWe.js → dropout-DYs5QFGQ.js} +1 -1
package/dist/{gather-Bbh8DHhM.js → gather-CMMy2KEG.js} +1 -1
package/dist/{gelu-BFwVnd1r.js → gelu-C-dPj6Ku.js} +1 -1
package/dist/{gpgpu_math-DffelNS-.js → gpgpu_math-DGNLNL4I.js} +2 -2
package/dist/{index-UdZhlibC.js → index-BoWRt-10.js} +4 -4
package/dist/{index-DYD_yPa-.js → index-CLthM0TO.js} +10 -10
package/dist/{kernel_funcs_utils-CXDy3EN7.js → kernel_funcs_utils-BYKWV8Aa.js} +3 -3
package/dist/layers/BaseLayer.js +2 -2
package/dist/layers/CausalSelfAttention.js +6 -6
package/dist/layers/MLP.js +5 -5
package/dist/layers/RMSNorm.js +3 -3
package/dist/layers/RoPECache.js +4 -4
package/dist/layers/TiedEmbedding.js +5 -5
package/dist/layers/TransformerBlock.js +1 -1
package/dist/loader/loadTransformers.js +1 -1
package/dist/loader/oldZipLoad.js +5 -5
package/dist/{log_sum_exp-BnmCkHWl.js → log_sum_exp-DbjkV734.js} +5 -5
package/dist/main.js +5 -5
package/dist/{mat_mul-dwmZz69e.js → mat_mul-8m8pfdcx.js} +1 -1
package/dist/{max-ByjEGoFx.js → max-Ddnnb5xe.js} +1 -1
package/dist/{mulmat_packed_gpu-IGPBp6h9.js → mulmat_packed_gpu-VSekgsNv.js} +1 -1
package/dist/{ones-C8Mfln6-.js → ones-Dj0SDhHf.js} +2 -2
package/dist/ops/adamAdjust.js +1 -1
package/dist/ops/adamMoments.js +1 -1
package/dist/ops/appendCache.js +3 -3
package/dist/ops/attentionMask.js +1 -1
package/dist/ops/cpu/adamAdjust.js +1 -1
package/dist/ops/cpu/adamMoments.js +2 -2
package/dist/ops/cpu/appendCache.js +2 -2
package/dist/ops/cpu/attentionMask.js +5 -5
package/dist/ops/cpu/fusedSoftmax.js +2 -2
package/dist/ops/cpu/gatherSub.js +3 -3
package/dist/ops/cpu/gelu.js +1 -1
package/dist/ops/cpu/matMulGelu.js +2 -2
package/dist/ops/cpu/matMulMul.js +1 -1
package/dist/ops/cpu/mulDropout.js +1 -1
package/dist/ops/cpu/normRMS.js +1 -1
package/dist/ops/cpu/qkv.js +3 -3
package/dist/ops/cpu/rope.js +5 -5
package/dist/ops/cpu/scatterSub.js +5 -5
package/dist/ops/fusedSoftmax.js +1 -1
package/dist/ops/gatherSub.js +1 -1
package/dist/ops/gelu.js +2 -2
package/dist/ops/grads/attentionMask.js +1 -1
package/dist/ops/grads/fusedSoftmax.js +2 -2
package/dist/ops/grads/gelu.js +2 -2
package/dist/ops/grads/matMulGelu.js +1 -1
package/dist/ops/grads/normRMS.js +1 -1
package/dist/ops/grads/qkv.js +1 -1
package/dist/ops/grads/rope.js +1 -1
package/dist/ops/matMulGelu.js +1 -1
package/dist/ops/matMulMul.js +1 -1
package/dist/ops/mulDrop.js +1 -1
package/dist/ops/normRMS.js +1 -1
package/dist/ops/qkv.js +1 -1
package/dist/ops/rope.js +4 -4
package/dist/ops/scatterSub.js +1 -1
package/dist/ops/webgl/adamAdjust.js +2 -2
package/dist/ops/webgl/adamMoments.js +7 -5
package/dist/ops/webgl/appendCache.js +1 -1
package/dist/ops/webgl/attentionMask.js +1 -1
package/dist/ops/webgl/fusedSoftmax.js +4 -4
package/dist/ops/webgl/gatherSub.js +1 -1
package/dist/ops/webgl/gelu.js +2 -2
package/dist/ops/webgl/log.js +3 -3
package/dist/ops/webgl/matMulGelu.js +4 -4
package/dist/ops/webgl/matMulMul.js +1 -1
package/dist/ops/webgl/mulDropout.js +1 -1
package/dist/ops/webgl/normRMS.js +2 -2
package/dist/ops/webgl/qkv.js +1 -1
package/dist/ops/webgl/rope.js +1 -1
package/dist/ops/webgl/scatterSub.js +1 -1
package/dist/ops/webgpu/adamAdjust.js +15 -13
package/dist/ops/webgpu/adamMoments.js +18 -11
package/dist/ops/webgpu/appendCache.js +18 -15
package/dist/ops/webgpu/attentionMask.js +24 -18
package/dist/ops/webgpu/gatherSub.js +17 -30
package/dist/ops/webgpu/gelu.js +3 -3
package/dist/ops/webgpu/normRMS.js +16 -8
package/dist/ops/webgpu/normRMSGrad.js +25 -20
package/dist/ops/webgpu/qkv.js +23 -19
package/dist/ops/webgpu/rope.js +37 -24
package/dist/ops/webgpu/scatterSub.js +16 -14
package/dist/ops/webgpu/utils/reductions.js +4 -4
package/dist/{ops-aRTXR2Sr.js → ops-BFGCx8Ri.js} +15 -15
package/dist/{random_width-DbSpgl4o.js → random_width-sZORGo5k.js} +22 -22
package/dist/{range-D9CZhVlR.js → range-CRuAh-gd.js} +1 -1
package/dist/{reciprocal-CGB48wZB.js → reciprocal-BvGAyKyu.js} +1 -1
package/dist/{register_all_kernels-DnbAyBXt.js → register_all_kernels-BwDSRN-f.js} +30 -30
package/dist/{reshape-BR0eoLYN.js → reshape-CdBq1WJ6.js} +1 -1
package/dist/{scatter_nd_util-OjyAxku2.js → scatter_nd_util-DUstGbU1.js} +1 -1
package/dist/{selu_util-Ce6pu9IM.js → selu_util-BJEXVvjX.js} +3 -3
package/dist/{shared-Czipaeb6.js → shared-B8ztnyEk.js} +6 -6
package/dist/{shared-DS5waSIY.js → shared-wS99K7_n.js} +1 -1
package/dist/{sin-CiBxrDqX.js → sin-BeA3tsEd.js} +1 -1
package/dist/{slice-BHbDHObE.js → slice-BiOsknYS.js} +1 -1
package/dist/{softmax-JMEIUo2J.js → softmax-Bv_6lyMX.js} +1 -1
package/dist/{split-CRU0PjVV.js → split-B-dikLRw.js} +1 -1
package/dist/{stack-ikk2Y8_P.js → stack-B17UN2nn.js} +1 -1
package/dist/{sum-NLYbiDag.js → sum-66ew2byf.js} +1 -1
package/dist/{tensor-Do9PKbIE.js → tensor-JwS7ZYY6.js} +1 -1
package/dist/{tensor2d-CWHxHpLh.js → tensor2d-wxPAnDQy.js} +1 -1
package/dist/training/Adam.js +2 -2
package/dist/training/AdamExt.js +1 -1
package/dist/training/DatasetBuilder.js +35 -32
package/dist/training/FullTrainer.d.ts +15 -2
package/dist/training/FullTrainer.js +97 -51
package/dist/training/Trainer.d.ts +10 -0
package/dist/training/Trainer.js +2 -2
package/dist/training/sparseCrossEntropy.js +4 -4
package/dist/utilities/dummy.js +2 -2
package/dist/utilities/generate.js +3 -3
package/dist/utilities/multinomialCPU.js +2 -2
package/dist/utilities/performance.js +1 -1
package/dist/utilities/profile.js +1 -1
package/dist/utilities/safetensors.js +2 -2
package/dist/utilities/weights.js +2 -2
package/dist/{variable-BTBkayv_.js → variable-BuddVFLa.js} +1 -1
package/dist/{webgpu_program-WaoMq-WD.js → webgpu_program-PFzf1hAQ.js} +1 -1
package/dist/{webgpu_util-DhSeP4b6.js → webgpu_util-D____QpY.js} +1 -1
package/dist/{zeros-DnPT2nD4.js → zeros--BdLQ3oG.js} +1 -1
package/package.json +1 -1

package/dist/Generator.d.ts CHANGED Viewed

@@ -9,11 +9,20 @@ export default class Generator extends EE<'start' | 'stop' | 'tokens'> {
     private readonly model;
     private readonly tokeniser;
     private active;
+    private cache;
+    private initialPrompt;
+    private outputText;
+    private actualTokeniser;
+    private lastToken;
     constructor(model: NanoGPT, tokeniser: ITokeniser);
     private tokenisePrompt;
-    private generateNoCache;
     private processResponse;
-    private generateCache;
+    private _generate;
+    reset(): void;
+    dispose(): void;
+    private initialise;
+    step(prompt?: string, options?: IGenerateOptions): Promise<string>;
     generate(prompt?: string, options?: IGenerateOptions): Promise<string>;
     stop(): void;
+    getText(): string;
 }

package/dist/Generator.js CHANGED Viewed

@@ -1,15 +1,15 @@
-import { E as u } from "./index-Dwqa6Zy2.js";
-import "./index-UdZhlibC.js";
+import { E as l } from "./index-Dwqa6Zy2.js";
+import "./index-BoWRt-10.js";
 import "./ops/cpu/attentionMask.js";
 import "./ops/webgl/attentionMask.js";
 import "./ops/grads/attentionMask.js";
 import "./ops/cpu/qkv.js";
 import "./ops/webgl/qkv.js";
 import "./ops/grads/qkv.js";
-import "./random_width-DbSpgl4o.js";
-import "./register_all_kernels-DnbAyBXt.js";
+import "./random_width-sZORGo5k.js";
+import "./register_all_kernels-BwDSRN-f.js";
 import "./index-Tf7vU29b.js";
-import "./dataset-U3PrjwgU.js";
+import "./dataset-CtdBYwjo.js";
 import "./ops/cpu/rope.js";
 import "./ops/webgl/rope.js";
 import "./ops/grads/rope.js";
@@ -29,7 +29,7 @@ import "./ops/webgl/gatherSub.js";
 import "./ops/cpu/scatterSub.js";
 import "./ops/webgl/scatterSub.js";
 import "./jszip.min-CjP2V1VV.js";
-import f from "./tokeniser/CharTokeniser.js";
+import u from "./tokeniser/CharTokeniser.js";
 import "./ops/cpu/adamAdjust.js";
 import "./ops/webgl/adamAdjust.js";
 import "./ops/cpu/adamMoments.js";
@@ -37,12 +37,12 @@ import "./ops/webgl/adamMoments.js";
 import "./papaparse.min-C8l2Kvo1.js";
 import "./ops/cpu/gelu.js";
 import "./ops/webgl/gelu.js";
-import "./gelu-BFwVnd1r.js";
+import "./gelu-C-dPj6Ku.js";
 import "./ops/webgl/log.js";
-import { t as d } from "./tensor2d-CWHxHpLh.js";
-import { c as g } from "./concat-CbXTetof.js";
+import { t as p } from "./tensor2d-wxPAnDQy.js";
+import { c as f } from "./concat-CsxrgovM.js";
 const k = [
-  ...Array.from({ length: 95 }, (a, t) => String.fromCharCode(t + 32)),
+  ...Array.from({ length: 95 }, (r, t) => String.fromCharCode(t + 32)),
   // ASCII
   // Spanish accented letters and punctuation
   ..."áéíóúüñ¿¡",
@@ -53,80 +53,93 @@ const k = [
   // Cyrillic letters
   ..."абвгдеёжзийклмнопрстуфхцчшщъыьэюяАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ"
 ];
-function w(a, t) {
-  return a.length === t ? a : a.length > t ? a.slice(0, t) : a.concat(Array(t - a.length).fill(""));
+function d(r, t) {
+  return r.length === t ? r : r.length > t ? r.slice(0, t) : r.concat(Array(t - r.length).fill(""));
 }
-class pt extends u {
-  constructor(t, o) {
-    super(), this.model = t, this.tokeniser = o;
+class nt extends l {
+  constructor(t, i) {
+    super(), this.model = t, this.tokeniser = i, this.actualTokeniser = i;
   }
   active = !1;
-  async tokenisePrompt(t, o) {
-    const r = o ? await t.tokenise([o], !0) : [[t.eosToken]];
-    return d(r, [1, r[0].length], "int32");
+  cache = null;
+  initialPrompt = null;
+  outputText = "";
+  actualTokeniser;
+  lastToken = -1;
+  async tokenisePrompt(t, i) {
+    const e = i ? await t.tokenise([i], !0) : [[t.eosToken]];
+    return p(e, [1, e[0].length], "int32");
   }
-  async generateNoCache(t, o, r) {
-    let i = await this.tokenisePrompt(t, o), s = o || "";
-    const n = r?.maxLength ?? 1e3;
-    for (let m = 0; m < n && this.active; m++) {
-      const {
-        output: e,
-        attention: p,
-        probabilities: c
-      } = await this.model.generate(i, void 0, r), h = i;
-      i = g([i, e], 1), h.dispose();
-      const l = await this.processResponse(t, e, p, c);
-      if (e.dispose(), l === null)
-        break;
-      s += l;
-    }
-    return i.dispose(), s;
-  }
-  async processResponse(t, o, r, i) {
-    const s = (await o.array())[0][0];
-    if (s === this.tokeniser.eosToken)
+  async processResponse(t, i, e, o) {
+    const s = (await i.array())[0][0];
+    if (this.lastToken = s, s === this.tokeniser.eosToken)
       return null;
     const n = await t.decode([s]);
-    let m;
-    r && (m = await Promise.all(r.map((p) => p.array().then((c) => c))), r.forEach((p) => p.dispose()));
-    let e;
-    return i && (e = await i.array(), i.dispose()), this.emit("tokens", [s], n, m, e), n;
+    let c;
+    e && (c = await Promise.all(e.map((h) => h.array().then((m) => m))), e.forEach((h) => h.dispose()));
+    let a;
+    return o && (a = await o.array(), o.dispose()), this.emit("tokens", [s], n, c, a), n;
   }
-  async generateCache(t, o, r) {
-    let i = await this.tokenisePrompt(t, o), s = o || "";
-    const n = new Array(this.model.config.gpt.nLayer);
-    for (let e = 0; e < this.model.config.gpt.nLayer; e++)
-      n[e] = { k: void 0, v: void 0, length: 0, cumulativeLength: 0 };
-    const m = r?.maxLength ?? 1e3;
-    for (let e = 0; e < m && this.active; e++) {
+  async _generate(t) {
+    let i = this.lastToken >= 0 && this.cache ? p([this.lastToken], [1, 1], "int32") : await this.tokenisePrompt(this.actualTokeniser, this.outputText);
+    const e = t?.maxLength ?? 1e3;
+    for (let o = 0; o < e && this.active; o++) {
       const {
-        output: p,
-        probabilities: c,
-        attention: h
-      } = await this.model.generate(i, n, {
-        ...r,
-        usePadding: !1
+        output: s,
+        probabilities: n,
+        attention: c
+      } = await this.model.generate(i, this.cache ? this.cache : void 0, {
+        ...t,
+        usePadding: !this.cache
       });
-      i.dispose(), i = p;
-      const l = await this.processResponse(t, p, h, c);
-      if (l === null)
+      if (this.cache)
+        i.dispose(), i = s;
+      else {
+        const h = i;
+        i = f([i, s], 1), h.dispose();
+      }
+      const a = await this.processResponse(this.actualTokeniser, s, c, n);
+      if (this.cache || s.dispose(), a === null)
         break;
-      s += l;
+      this.outputText += a;
+    }
+    return i.dispose(), this.outputText;
+  }
+  reset() {
+    this.cache && (this.cache.forEach((t) => {
+      t && (t.k && t.k.dispose(), t.v && t.v.dispose());
+    }), this.cache = null), this.outputText = "", this.initialPrompt = null, this.lastToken = -1;
+  }
+  dispose() {
+    this.reset();
+  }
+  initialise(t, i) {
+    const e = t && t.length > this.model.config.gpt.blockSize ? t.slice(-this.model.config.gpt.blockSize) : t ?? null;
+    if (this.cache && i?.noCache && this.reset(), this.initialPrompt = e || null, this.lastToken === -1 && (this.outputText = this.initialPrompt || ""), !this.cache && !i?.noCache && this.model.config.gpt.useRope) {
+      const s = new Array(this.model.config.gpt.nLayer);
+      for (let n = 0; n < this.model.config.gpt.nLayer; n++)
+        s[n] = { k: void 0, v: void 0, length: 0, cumulativeLength: 0 };
+      this.cache = s, this.lastToken = -1;
     }
-    return n.forEach((e) => {
-      e && (e.k && e.k.dispose(), e.v && e.v.dispose());
-    }), i.dispose(), s;
+    const o = this.tokeniser.trained ? this.tokeniser : new u(d(k, this.tokeniser.vocabSize));
+    this.actualTokeniser = o;
   }
-  async generate(t, o) {
-    const r = t && t.length > this.model.config.gpt.blockSize ? t.slice(-this.model.config.gpt.blockSize) : t;
-    this.active = !0, this.emit("start");
-    const i = this.tokeniser.trained ? this.tokeniser : new f(w(k, this.tokeniser.vocabSize)), n = await (this.model.config.gpt.useRope && !o?.noCache ? this.generateCache(i, r, o) : this.generateNoCache(i, r, o));
-    return this.active = !1, this.emit("stop"), n;
+  async step(t, i) {
+    const e = { ...i, maxLength: 1 };
+    return this.generate(t, e);
+  }
+  async generate(t, i) {
+    this.initialise(t, i), this.active = !0, this.emit("start");
+    const o = await this._generate(i);
+    return this.active = !1, this.emit("stop"), o;
   }
   stop() {
     this.active = !1;
   }
+  getText() {
+    return this.outputText;
+  }
 }
 export {
-  pt as default
+  nt as default
 };

package/dist/NanoGPTModel.js CHANGED Viewed

@@ -1,19 +1,19 @@
 import { defaultConfig as M } from "./config.js";
 import v from "./layers/TransformerBlock.js";
-import { T as x, r as T } from "./TiedEmbedding-ccLBFiZi.js";
+import { T as x, r as T } from "./TiedEmbedding-BxOerUmB.js";
 import F from "./layers/RoPECache.js";
 import O from "./layers/RMSNorm.js";
 import { estimateParameterCount as _ } from "./utilities/parameters.js";
 import { createSoftmaxCrossEntropyWithGrad as D } from "./training/sparseCrossEntropy.js";
 import K from "./layers/BaseLayer.js";
-import { E as N, D as R, p as q } from "./random_width-DbSpgl4o.js";
-import { B as A, C as B, E as G, ad as V, t as C, o as j, b as z, w as U } from "./index-UdZhlibC.js";
+import { E as N, D as R, p as q } from "./random_width-sZORGo5k.js";
+import { B as A, C as B, E as G, ad as V, t as C, o as j, b as z, w as U } from "./index-BoWRt-10.js";
 import W from "./utilities/multinomialCPU.js";
-import { m as H, t as J } from "./register_all_kernels-DnbAyBXt.js";
-import { r as P } from "./reshape-BR0eoLYN.js";
-import { r as Q } from "./range-D9CZhVlR.js";
-import { s as $ } from "./softmax-JMEIUo2J.js";
-import { g as X } from "./gather-Bbh8DHhM.js";
+import { m as H, t as J } from "./register_all_kernels-BwDSRN-f.js";
+import { r as P } from "./reshape-CdBq1WJ6.js";
+import { r as Q } from "./range-CRuAh-gd.js";
+import { s as $ } from "./softmax-Bv_6lyMX.js";
+import { g as X } from "./gather-CMMy2KEG.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.

package/dist/{RealDiv-CVYNbZxu.js → RealDiv-Dy0p8Bvo.js} RENAMED Viewed

@@ -1,10 +1,10 @@
-import { aq as T, ac as E, p as O, j as V, ay as B, Y as F, U, az as j } from "./index-UdZhlibC.js";
-import { r as $ } from "./Reshape-CEsEp0AI.js";
-import { g as A, a as k, b as C, c as N, e as R } from "./axis_util-5DTW2tFV.js";
-import { t as K, m as W } from "./shared-DS5waSIY.js";
-import { c as _ } from "./backend_util-C9Ut8n0Q.js";
-import { f as y } from "./gpgpu_math-DffelNS-.js";
-import { g as G, b as L } from "./kernel_funcs_utils-CXDy3EN7.js";
+import { aq as T, ac as E, p as O, j as V, ay as B, Y as F, U, az as j } from "./index-BoWRt-10.js";
+import { r as $ } from "./Reshape-DH5srBP0.js";
+import { g as A, a as k, b as C, c as N, e as R } from "./axis_util-BzbKo31C.js";
+import { t as K, m as W } from "./shared-wS99K7_n.js";
+import { c as _ } from "./backend_util-TE7aTPhZ.js";
+import { f as y } from "./gpgpu_math-DGNLNL4I.js";
+import { g as G, b as L } from "./kernel_funcs_utils-BYKWV8Aa.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.

package/dist/{Reshape-CEsEp0AI.js → Reshape-DH5srBP0.js} RENAMED Viewed

@@ -1,5 +1,5 @@
-import { j as c, a2 as C, l as f, K as R } from "./index-UdZhlibC.js";
-import { u as g, g as I, a as x, b as F, c as $, d as u, e as l, i as m } from "./gpgpu_math-DffelNS-.js";
+import { j as c, a3 as C, l as f, K as R } from "./index-BoWRt-10.js";
+import { u as g, g as I, a as x, b as F, c as $, d as u, e as l, i as m } from "./gpgpu_math-DGNLNL4I.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.

package/dist/{Reshape-Do18N3gO.js → Reshape-DvudQDvJ.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { j as h, a2 as d, l as c, K as m } from "./index-UdZhlibC.js";
+import { j as h, a3 as d, l as c, K as m } from "./index-BoWRt-10.js";
 /**
  * @license
  * Copyright 2021 Google LLC. All Rights Reserved.

package/dist/TeachableLLM.js CHANGED Viewed

@@ -1,17 +1,17 @@
 import { defaultConfig as _ } from "./config.js";
 import f from "./NanoGPTModel.js";
-import { saveModel as u } from "./utilities/save.js";
-import { loadModel as d } from "./loader/load.js";
-import l from "./Generator.js";
+import { saveModel as d } from "./utilities/save.js";
+import { loadModel as l } from "./loader/load.js";
+import u from "./Generator.js";
 import p from "./Trainer.js";
-import { E as g } from "./index-Dwqa6Zy2.js";
+import { E as c } from "./index-Dwqa6Zy2.js";
 import { dummyPassTrainAsync as m } from "./utilities/dummy.js";
-import c from "./tokeniser/CharTokeniser.js";
+import g from "./tokeniser/CharTokeniser.js";
 import k from "./tokeniser/bpe.js";
 import "./papaparse.min-C8l2Kvo1.js";
 import "./index-Tf7vU29b.js";
 import "./jszip.min-CjP2V1VV.js";
-import "./index-UdZhlibC.js";
+import "./index-BoWRt-10.js";
 import "./ops/cpu/scatterSub.js";
 import "./ops/webgl/scatterSub.js";
 import "./ops/cpu/gatherSub.js";
@@ -22,9 +22,9 @@ import "./ops/grads/attentionMask.js";
 import "./ops/cpu/qkv.js";
 import "./ops/webgl/qkv.js";
 import "./ops/grads/qkv.js";
-import "./random_width-DbSpgl4o.js";
-import "./register_all_kernels-DnbAyBXt.js";
-import "./dataset-U3PrjwgU.js";
+import "./random_width-sZORGo5k.js";
+import "./register_all_kernels-BwDSRN-f.js";
+import "./dataset-CtdBYwjo.js";
 import "./ops/cpu/rope.js";
 import "./ops/webgl/rope.js";
 import "./ops/grads/rope.js";
@@ -38,7 +38,7 @@ import "./ops/webgl/matMulGelu.js";
 import "./ops/grads/matMulGelu.js";
 import "./ops/cpu/gelu.js";
 import "./ops/webgl/gelu.js";
-import "./gelu-BFwVnd1r.js";
+import "./gelu-C-dPj6Ku.js";
 import "./ops/cpu/normRMS.js";
 import "./ops/webgl/normRMS.js";
 import "./ops/grads/normRMS.js";
@@ -49,7 +49,7 @@ import "./ops/cpu/adamAdjust.js";
 import "./ops/webgl/adamAdjust.js";
 import w from "./utilities/profile.js";
 class a {
-  ee = new g();
+  ee = new c();
   _config;
   _model;
   _tokeniser;
@@ -92,8 +92,8 @@ class a {
     return this._status === "busy" || this._status === "training";
   }
   estimateTrainingMemoryUsage(t) {
-    const e = this._memoryRequirements ?? { perBatch: 0, gradients: 0 }, i = e.perBatch * t, o = e.gradients;
-    return i * 0.66 + o * 4;
+    const e = this._memoryRequirements ?? { perBatch: 0, gradients: 0 }, r = e.perBatch * t, o = e.gradients;
+    return r * 0.66 + o * 4;
   }
   setStatus(t) {
     this._status !== t && (this._status = t, this.ee.emit("status", t));
@@ -101,32 +101,32 @@ class a {
   saveModel(t) {
     if (!this._model || !this._tokeniser)
       throw new Error("model_or_tokeniser_not_initialized.");
-    return u(this._model, this._tokeniser, {
+    return d(this._model, this._tokeniser, {
       ...t,
       name: t?.name || this.meta.name
     });
   }
   static loadModel(t) {
     const e = new a();
-    return d(t).then(({ model: i, tokeniser: o, name: s }) => {
-      e._model = i, e._tokeniser = o, e._config = i.config, s && (e.meta.name = s), e.setStatus("warmup"), m(i).then((r) => {
-        e._memoryRequirements = r, e.setStatus("ready"), e.ee.emit("loaded");
-      }).catch((r) => {
-        e.setStatus("error"), e.ee.emit("error", r);
+    return l(t).then(({ model: r, tokeniser: o, name: s }) => {
+      e._model = r, e._tokeniser = o, e._config = r.config, s && (e.meta.name = s), e.setStatus("warmup"), m(r).then((i) => {
+        e._memoryRequirements = i, e.setStatus("ready"), e.ee.emit("loaded");
+      }).catch((i) => {
+        e.setStatus("error"), e.ee.emit("error", i);
       });
-    }).catch((i) => {
-      e.setStatus("error"), e.ee.emit("error", i);
+    }).catch((r) => {
+      e.setStatus("error"), e.ee.emit("error", r);
     }), e;
   }
   static create(t, e = {}) {
-    const i = { ..._, ...e }, o = t === "char" ? new c(i.vocabSize) : new k(i.vocabSize), s = new f(i), r = new a(o, s);
-    return r.setStatus("warmup"), m(s).then((n) => {
-      r._memoryRequirements = n, r.tokeniser.trained ? (r.setStatus("ready"), r.ee.emit("loaded")) : (r.setStatus("awaitingTokens"), r.ee.emit("loaded"), r.tokeniser.once("trainStatus", (h) => {
-        h === "trained" && r.setStatus("ready");
+    const r = { ..._, ...e }, o = t === "char" ? new g(r.vocabSize) : new k(r.vocabSize), s = new f(r), i = new a(o, s);
+    return i.setStatus("warmup"), m(s).then((n) => {
+      i._memoryRequirements = n, i.tokeniser.trained ? (i.setStatus("ready"), i.ee.emit("loaded")) : (i.setStatus("awaitingTokens"), i.ee.emit("loaded"), i.tokeniser.once("trainStatus", (h) => {
+        h === "trained" && i.setStatus("ready");
       }));
     }).catch((n) => {
-      r.setStatus("error"), r.ee.emit("error", n);
-    }), r;
+      i.setStatus("error"), i.ee.emit("error", n);
+    }), i;
   }
   getProfiler() {
     return this._model?.getProfiler();
@@ -149,14 +149,15 @@ class a {
     if (!this._model || !this._tokeniser)
       throw new Error("model_or_tokeniser_not_initialized.");
     const t = new p(this._model, this._tokeniser);
-    return t.on("start", () => this.setStatus("training")), t.on("stop", () => this.setStatus("ready")), t.on("log", async (e, i) => {
+    return t.on("start", () => this.setStatus("training")), t.on("stop", () => this.setStatus("ready")), t.on("log", async (e, r) => {
       const o = this.ee.listeners("trainStep");
       for (const s of o)
-        await s(e, i);
+        await s(e, r);
     }), t;
   }
-  train(t, e) {
-    return this.trainer().train(t, e);
+  async train(t, e) {
+    const r = this.trainer();
+    await r.prepare(t, e), await r.train(e);
   }
   async trainTokeniser(t) {
     if (!this._tokeniser)
@@ -167,7 +168,7 @@ class a {
   generator() {
     if (!this._model || !this._tokeniser)
       throw new Error("model_or_tokeniser_not_initialized.");
-    const t = new l(this._model, this._tokeniser);
+    const t = new u(this._model, this._tokeniser);
     return t.on("start", () => {
       this.status === "ready" && this.setStatus("busy");
     }), t.on("stop", () => {

package/dist/{TiedEmbedding-ccLBFiZi.js → TiedEmbedding-BxOerUmB.js} RENAMED Viewed

@@ -1,8 +1,8 @@
-import { R as a, d as s } from "./random_width-DbSpgl4o.js";
-import "./index-UdZhlibC.js";
+import { R as a, d as s } from "./random_width-sZORGo5k.js";
+import "./index-BoWRt-10.js";
 import o from "./layers/BaseLayer.js";
-import { v as m } from "./variable-BTBkayv_.js";
-import { g as d } from "./gather-Bbh8DHhM.js";
+import { v as m } from "./variable-BuddVFLa.js";
+import { g as d } from "./gather-CMMy2KEG.js";
 /**
  * @license
  * Copyright 2018 Google LLC

package/dist/Trainer.d.ts CHANGED Viewed

@@ -14,8 +14,13 @@ export interface ITrainerOptions {
 export default class Trainer extends EE<'start' | 'stop' | 'log'> {
     private trainer;
     private hasTrained;
+    private trainDataset?;
+    private validationDataset?;
+    private totalSamples;
     constructor(model: NanoGPT, tokeniser: ITokeniser);
     stop(): void;
     reset(): void;
-    train(text: string[], options?: ITrainerOptions): Promise<void>;
+    prepare(text: string[], options?: ITrainerOptions): Promise<void>;
+    train(options?: ITrainerOptions): Promise<void>;
+    step(options?: ITrainerOptions): Promise<void>;
 }

package/dist/Trainer.js CHANGED Viewed

@@ -1,10 +1,13 @@
-import { E as h } from "./index-Dwqa6Zy2.js";
-import m from "./training/FullTrainer.js";
-class p extends h {
+import { E as l } from "./index-Dwqa6Zy2.js";
+import h from "./training/FullTrainer.js";
+class p extends l {
   trainer;
   hasTrained = !1;
-  constructor(e, t) {
-    super(), this.trainer = new m(e, t, 1e-3);
+  trainDataset;
+  validationDataset;
+  totalSamples = 0;
+  constructor(t, e) {
+    super(), this.trainer = new h(t, e, 1e-3);
   }
   stop() {
     this.trainer.stop();
@@ -12,36 +15,67 @@ class p extends h {
   reset() {
     this.hasTrained = !1, this.trainer.reset();
   }
-  async train(e, t) {
-    const { trainDataset: s, validationDataset: n } = await this.trainer.createTrainValidationSplit(
-      e,
-      t?.batchSize || 32,
-      t?.validationSplit || 0.1
-    ), r = e.reduce((i, a) => i + a.length, 0) * (1 - (t?.validationSplit || 0));
+  async prepare(t, e) {
+    const { trainDataset: a, validationDataset: s } = await this.trainer.createTrainValidationSplit(
+      t,
+      e?.batchSize || 32,
+      e?.validationSplit || 0.1
+    ), i = t.reduce((r, n) => r + n.length, 0) * (1 - (e?.validationSplit || 0));
+    this.trainDataset = a, this.validationDataset = s, this.totalSamples = i;
+  }
+  async train(t) {
+    if (!this.trainDataset || !this.validationDataset)
+      throw new Error("Datasets not prepared");
     this.hasTrained || this.trainer.setLearningRate(t?.learningRate || 1e-3), this.hasTrained = !0, this.emit("start"), await this.trainer.trainOnDataset(
-      s,
+      this.trainDataset,
       {
         prompt: t?.prompt,
         logInterval: t?.logInterval || 10,
         desiredLoss: t?.desiredLoss || 0.01,
         maxSteps: t?.maxSteps || 1e3,
         advancedMetrics: t?.advancedMetrics || !1,
-        onStep: async (i, a) => {
-          const l = this.listeners("log");
-          for (const d of l)
-            await d(i, {
+        onStep: async (e, a) => {
+          const s = this.listeners("log");
+          for (const i of s)
+            await i(e, {
               ...a,
-              progress: a.totalSamples / r,
+              progress: a.totalSamples / this.totalSamples,
               remaining: Math.max(
                 0,
-                (r - a.totalSamples) / a.totalSamples * a.duration
+                (this.totalSamples - a.totalSamples) / a.totalSamples * a.duration
               )
             });
         }
       },
-      n
+      this.validationDataset
     ), this.emit("stop");
   }
+  async step(t) {
+    if (!this.trainDataset || !this.validationDataset)
+      throw new Error("Datasets not prepared");
+    this.hasTrained || this.trainer.setLearningRate(t?.learningRate || 1e-3), this.hasTrained = !0, this.emit("start");
+    const { log: e, progress: a } = await this.trainer.stepDataset(
+      this.trainDataset,
+      {
+        prompt: t?.prompt,
+        logInterval: t?.logInterval || 10,
+        desiredLoss: t?.desiredLoss || 0.01,
+        maxSteps: t?.maxSteps || 1e3,
+        advancedMetrics: t?.advancedMetrics || !1
+      },
+      this.validationDataset
+    ), s = this.listeners("log");
+    for (const i of s)
+      await i(e, {
+        ...a,
+        progress: a.totalSamples / this.totalSamples,
+        remaining: Math.max(
+          0,
+          (this.totalSamples - a.totalSamples) / a.totalSamples * a.duration
+        )
+      });
+    this.emit("stop");
+  }
 }
 export {
   p as default

package/dist/{axis_util-5DTW2tFV.js → axis_util-BzbKo31C.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { l as c } from "./index-UdZhlibC.js";
+import { l as c } from "./index-BoWRt-10.js";
 /**
  * @license
  * Copyright 2017 Google LLC. All Rights Reserved.

package/dist/backend.js CHANGED Viewed

@@ -1,6 +1,6 @@
-import { g as a, s as i, r as o } from "./index-UdZhlibC.js";
+import { g as a, s as i, r as o } from "./index-BoWRt-10.js";
 async function e(t) {
-  a() !== t && (t === "webgpu" && (await import("./index-DYD_yPa-.js"), await import("./ops/webgpu/index.js")), await i(t), await o(), console.log(`Backend set to ${t}`));
+  a() !== t && (t === "webgpu" && (await import("./index-CLthM0TO.js"), await import("./ops/webgpu/index.js")), await i(t), await o(), console.log(`Backend set to ${t}`));
 }
 export {
   e as selectBackend

package/dist/{backend_util-C9Ut8n0Q.js → backend_util-TE7aTPhZ.js} RENAMED Viewed

@@ -1,7 +1,7 @@
-import { j as m, a1 as O, l as g, aK as $, aL as R, aM as M, k as _, aa as y, aw as D, aN as T, u as b, aO as F } from "./index-UdZhlibC.js";
-import { b as L, d as W, f as v, c as N, e as x, g as P, a as C, h as z } from "./axis_util-5DTW2tFV.js";
-import { S as U, a as B, b as V, c as j, d as k, e as G, f as H, g as q, h as Z, i as K, j as X, k as J, l as Y, m as Q, s as ee, n as te, o as ne, t as se } from "./selu_util-Ce6pu9IM.js";
-import { c as re, v as oe, a as ae } from "./scatter_nd_util-OjyAxku2.js";
+import { j as m, a1 as O, l as g, aK as $, aL as R, aM as M, k as _, aa as y, aw as D, aN as T, u as b, aO as F } from "./index-BoWRt-10.js";
+import { b as L, d as W, f as v, c as N, e as x, g as P, a as C, h as z } from "./axis_util-BzbKo31C.js";
+import { S as U, a as B, b as V, c as j, d as k, e as G, f as H, g as q, h as Z, i as K, j as X, k as J, l as Y, m as Q, s as ee, n as te, o as ne, t as se } from "./selu_util-BJEXVvjX.js";
+import { c as re, v as oe, a as ae } from "./scatter_nd_util-DUstGbU1.js";
 function ie(e, n) {
   const r = e.shape.length, t = n.shape.length;
   if (r < 1)

package/dist/{broadcast_to-Ba9h_8DO.js → broadcast_to-CdbwV-Dj.js} RENAMED Viewed

@@ -1,5 +1,5 @@
-import { B as h, C as f, F as p, M as g, E as u, N as b } from "./index-UdZhlibC.js";
-import { r as T } from "./reshape-BR0eoLYN.js";
+import { B as h, C as f, F as p, M as g, E as u, N as b } from "./index-BoWRt-10.js";
+import { r as T } from "./reshape-CdBq1WJ6.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.

package/dist/{concat-CbXTetof.js → concat-CsxrgovM.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { B as s, l as a, D as p, M as i, E as l, Q as f } from "./index-UdZhlibC.js";
+import { B as s, l as a, D as p, M as i, E as l, Q as f } from "./index-BoWRt-10.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.

package/dist/{dataset-U3PrjwgU.js → dataset-CtdBYwjo.js} RENAMED Viewed

@@ -1,7 +1,7 @@
-import { ag as S, T as h, ac as N, d as v, ah as o, ai as p, aj as g, l as k, t as y } from "./index-UdZhlibC.js";
+import { ag as S, T as h, ac as N, d as v, ah as o, ai as p, aj as g, l as k, t as y } from "./index-BoWRt-10.js";
 import { s as R } from "./index-C4L8Cm77.js";
-import { s as $ } from "./stack-ikk2Y8_P.js";
-import { t as B } from "./tensor-Do9PKbIE.js";
+import { s as $ } from "./stack-B17UN2nn.js";
+import { t as B } from "./tensor-JwS7ZYY6.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.

package/dist/{dropout-DPfPgWWe.js → dropout-DYs5QFGQ.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { B as l, C as h, E as m, ak as p, F as c, al as d, ab as g, l as u, T as V, n as v, o as N, a as w } from "./index-UdZhlibC.js";
+import { B as l, C as h, E as m, ak as p, F as c, al as d, ab as g, l as u, T as V, n as v, o as N, a as w } from "./index-BoWRt-10.js";
 import { s as f } from "./index-C4L8Cm77.js";
 /**
  * @license

package/dist/{gather-Bbh8DHhM.js → gather-CMMy2KEG.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { B as g, C as t, E as h, G as p } from "./index-UdZhlibC.js";
+import { B as g, C as t, E as h, G as p } from "./index-BoWRt-10.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.

package/dist/{gelu-BFwVnd1r.js → gelu-C-dPj6Ku.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { i as t, e as n } from "./index-UdZhlibC.js";
+import { i as t, e as n } from "./index-BoWRt-10.js";
 import "./ops/cpu/gelu.js";
 import "./ops/webgl/gelu.js";
 const a = {