npm - @genai-fi/nanogpt - Versions diffs - 0.8.5 → 0.9.0 - Mend

@genai-fi/nanogpt 0.8.5 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (153) hide show

package/dist/Generator.d.ts +4 -1
package/dist/Generator.js +144 -124
package/dist/{RealDiv-D_q39E3A.js → RealDiv-D4EzDsC0.js} +7 -7
package/dist/{Reshape-Bh_jzKzV.js → Reshape-Bowtk9BP.js} +2 -2
package/dist/{Reshape-41YpQqEo.js → Reshape-DUqYftGC.js} +1 -1
package/dist/TeachableLLM.js +5 -5
package/dist/Trainer.d.ts +1 -0
package/dist/Trainer.js +3 -0
package/dist/{axis_util-Did9235A.js → axis_util-TbGYJ208.js} +1 -1
package/dist/backend.js +2 -2
package/dist/{backend_util-yC3YH1jo.js → backend_util-CJIiDoV1.js} +4 -4
package/dist/{broadcast_to-CUvOdOT5.js → broadcast_to-DzlNweb8.js} +2 -2
package/dist/checks/appendCache.js +2 -2
package/dist/checks/attentionMask.js +3 -3
package/dist/checks/gelu.js +2 -2
package/dist/checks/matMulGelu.js +5 -5
package/dist/checks/normRMS.js +4 -4
package/dist/checks/normRMSGrad.js +3 -3
package/dist/checks/qkv.js +2 -2
package/dist/checks/rope.js +2 -2
package/dist/{concat-pHiVqR3L.js → concat-B912vBbo.js} +1 -1
package/dist/{dataset-DPPl-iLT.js → dataset-DlZtKmBq.js} +3 -3
package/dist/{dropout-CcKSfOYE.js → dropout-C-csYCLj.js} +6 -6
package/dist/{exports_initializers-DKk7-bsx.js → exports_initializers-B8iZMgQ0.js} +1 -1
package/dist/{gather-CPg6ZlQA.js → gather-Dnpgw-YQ.js} +1 -1
package/dist/{gelu-BkcmEEyD.js → gelu-Bp_-935b.js} +1 -1
package/dist/{gpgpu_math-D_ODOLix.js → gpgpu_math-CDaYiyE_.js} +2 -2
package/dist/{index-DdmHGZjq.js → index-BzFyqcy-.js} +13 -13
package/dist/{index-evZ57wr4.js → index-C1rx_Ajs.js} +10 -10
package/dist/{kernel_funcs_utils-CDfFpUab.js → kernel_funcs_utils-DKLK0Mg3.js} +3 -3
package/dist/layers/BaseLayer.js +2 -2
package/dist/layers/CausalSelfAttention.js +6 -6
package/dist/layers/MLP.js +5 -5
package/dist/layers/PositionEmbedding.js +5 -5
package/dist/layers/RMSNorm.js +3 -3
package/dist/layers/RoPECache.js +4 -4
package/dist/layers/TiedEmbedding.js +5 -5
package/dist/layers/TransformerBlock.js +1 -1
package/dist/loader/loadTransformers.js +1 -1
package/dist/loader/oldZipLoad.js +5 -5
package/dist/{log_sum_exp-C8yFJfZz.js → log_sum_exp-DO6z8tSE.js} +9 -9
package/dist/main.d.ts +1 -0
package/dist/main.js +18 -16
package/dist/{mat_mul-Dpy2mMRu.js → mat_mul-DzjTFx-u.js} +1 -1
package/dist/{mod-CbibJi3D.js → mod-Dobti4j4.js} +1 -1
package/dist/models/NanoGPTV1.d.ts +1 -0
package/dist/models/NanoGPTV1.js +12 -9
package/dist/models/model.d.ts +1 -0
package/dist/models/model.js +5 -5
package/dist/{mulmat_packed_gpu-q_Gmwyld.js → mulmat_packed_gpu-BT60jmzP.js} +1 -1
package/dist/{ones-BAqVh-eA.js → ones-tIJeHlq-.js} +2 -2
package/dist/ops/adamAdjust.js +1 -1
package/dist/ops/adamMoments.js +1 -1
package/dist/ops/appendCache.js +3 -3
package/dist/ops/attentionMask.js +1 -1
package/dist/ops/cpu/adamAdjust.js +1 -1
package/dist/ops/cpu/adamMoments.js +2 -2
package/dist/ops/cpu/appendCache.js +2 -2
package/dist/ops/cpu/attentionMask.js +5 -5
package/dist/ops/cpu/fusedSoftmax.js +2 -2
package/dist/ops/cpu/gatherSub.js +5 -5
package/dist/ops/cpu/gelu.js +1 -1
package/dist/ops/cpu/matMulGelu.js +2 -2
package/dist/ops/cpu/matMulMul.js +1 -1
package/dist/ops/cpu/mulDropout.js +1 -1
package/dist/ops/cpu/normRMS.js +1 -1
package/dist/ops/cpu/qkv.js +3 -3
package/dist/ops/cpu/rope.js +5 -5
package/dist/ops/cpu/scatterSub.js +13 -13
package/dist/ops/fusedSoftmax.js +1 -1
package/dist/ops/gatherSub.js +1 -1
package/dist/ops/gelu.js +2 -2
package/dist/ops/grads/attentionMask.js +1 -1
package/dist/ops/grads/fusedSoftmax.js +2 -2
package/dist/ops/grads/gelu.js +2 -2
package/dist/ops/grads/matMulGelu.js +1 -1
package/dist/ops/grads/normRMS.js +1 -1
package/dist/ops/grads/qkv.js +1 -1
package/dist/ops/grads/rope.js +1 -1
package/dist/ops/matMulGelu.js +1 -1
package/dist/ops/matMulMul.js +1 -1
package/dist/ops/mulDrop.js +1 -1
package/dist/ops/normRMS.js +1 -1
package/dist/ops/qkv.js +1 -1
package/dist/ops/rope.js +4 -4
package/dist/ops/scatterSub.js +1 -1
package/dist/ops/webgl/adamAdjust.js +2 -2
package/dist/ops/webgl/adamMoments.js +1 -1
package/dist/ops/webgl/appendCache.js +1 -1
package/dist/ops/webgl/attentionMask.js +1 -1
package/dist/ops/webgl/fusedSoftmax.js +4 -4
package/dist/ops/webgl/gatherSub.js +1 -1
package/dist/ops/webgl/gelu.js +2 -2
package/dist/ops/webgl/log.js +3 -3
package/dist/ops/webgl/matMulGelu.js +4 -4
package/dist/ops/webgl/matMulMul.js +1 -1
package/dist/ops/webgl/mulDropout.js +1 -1
package/dist/ops/webgl/normRMS.js +2 -2
package/dist/ops/webgl/qkv.js +1 -1
package/dist/ops/webgl/rope.js +1 -1
package/dist/ops/webgl/scatterSub.js +1 -1
package/dist/ops/webgpu/adamAdjust.js +3 -3
package/dist/ops/webgpu/adamMoments.js +3 -3
package/dist/ops/webgpu/appendCache.js +3 -3
package/dist/ops/webgpu/attentionMask.js +3 -3
package/dist/ops/webgpu/gatherSub.js +3 -3
package/dist/ops/webgpu/gelu.js +37 -35
package/dist/ops/webgpu/normRMS.js +2 -2
package/dist/ops/webgpu/normRMSGrad.js +5 -5
package/dist/ops/webgpu/qkv.js +3 -3
package/dist/ops/webgpu/rope.js +3 -3
package/dist/ops/webgpu/scatterSub.js +3 -3
package/dist/ops/webgpu/utils/reductions.js +4 -4
package/dist/{ops-542ai2vG.js → ops-LuCMAnmM.js} +65 -65
package/dist/{random_width-DKGeiFuR.js → random_width-CXVRloNK.js} +23 -23
package/dist/{range-BcUvLuf5.js → range-CWcz7xFA.js} +3 -3
package/dist/{reciprocal-DhDWSKiD.js → reciprocal-C4rNcM-S.js} +1 -1
package/dist/{register_all_kernels-Do9VvZmo.js → register_all_kernels-DIGpEwcf.js} +31 -31
package/dist/{relu-B1AXs7p5.js → relu-BjCh_SYb.js} +1 -1
package/dist/{reshape-WeJkT3ja.js → reshape-CnIwVG1c.js} +1 -1
package/dist/{scatter_nd_util-B7yDhiQr.js → scatter_nd_util-BQdz--Gn.js} +1 -1
package/dist/{selu_util-BgUO9gHY.js → selu_util-OtRzVwW5.js} +23 -23
package/dist/{shared-V6D_md-c.js → shared-DmRsFyaJ.js} +6 -6
package/dist/{shared-CZiWmQCI.js → shared-DuP7ue-R.js} +1 -1
package/dist/{sin-CPxad7Am.js → sin-gpDNRxE0.js} +1 -1
package/dist/{slice-B7jXtPnp.js → slice-d0Vo9XTN.js} +1 -1
package/dist/{softmax-BfsyI4As.js → softmax-D7Jj3p_P.js} +1 -1
package/dist/{split-BPxr8_8m.js → split-DK2k5eHf.js} +1 -1
package/dist/{stack-BNwLzE43.js → stack-DFatutCx.js} +1 -1
package/dist/{sum-ByFINZgi.js → sum-CJ0ULhmt.js} +1 -1
package/dist/{tensor-DbqgIV9B.js → tensor-CZr4dh61.js} +1 -1
package/dist/{tensor1d-CtJq5BOv.js → tensor1d-vML0r3q6.js} +1 -1
package/dist/{tensor2d-CObBWBkW.js → tensor2d-D76QGjF3.js} +1 -1
package/dist/{tensor4d-DLtk7Nxh.js → tensor4d-Df1WlVDY.js} +1 -1
package/dist/training/Adam.js +2 -2
package/dist/training/AdamExt.js +1 -1
package/dist/training/DatasetBuilder.js +2 -2
package/dist/training/FullTrainer.js +1 -1
package/dist/training/Trainer.js +2 -2
package/dist/training/sparseCrossEntropy.js +3 -3
package/dist/utilities/dummy.js +2 -2
package/dist/utilities/multinomialCPU.js +2 -2
package/dist/utilities/performance.js +1 -1
package/dist/utilities/profile.js +1 -1
package/dist/utilities/safetensors.js +2 -2
package/dist/utilities/topP.d.ts +1 -0
package/dist/utilities/topP.js +13 -0
package/dist/utilities/weights.js +2 -2
package/dist/{variable-DPFOJyRG.js → variable-Bm2OFwGI.js} +1 -1
package/dist/{webgpu_program-Dhk9R5aG.js → webgpu_program-DkQJOJSd.js} +1 -1
package/dist/{webgpu_util-BqGnZg8t.js → webgpu_util-pLEV9tks.js} +1 -1
package/dist/{zeros-Dnwix0p4.js → zeros-Bj5rMYA7.js} +1 -1
package/package.json +1 -1

package/dist/Generator.d.ts CHANGED Viewed

@@ -8,7 +8,8 @@ export interface GenerateOptions {
     usePadding?: boolean;
     attentionScores?: boolean;
     includeProbabilities?: boolean;
-    embeddings?: boolean;
+    embeddings?: 'embedding' | 'logits' | 'softmax' | 'all';
+    targets?: number[];
 }
 export interface IGenerateOptions extends GenerateOptions {
     maxLength?: number;
@@ -31,6 +32,7 @@ export default class Generator extends EE<'start' | 'stop' | 'tokens'> {
     private probabilitiesData;
     private embeddingsData;
     private tokens;
+    private lastLoss;
     constructor(model: Model<ModelForwardAttributes>, tokeniser: ITokeniser);
     private tokenisePrompt;
     private processResponse;
@@ -52,4 +54,5 @@ export default class Generator extends EE<'start' | 'stop' | 'tokens'> {
         tensor: number[][];
     }[][];
     getTokens(): number[];
+    getLastLoss(): number | null;
 }

package/dist/Generator.js CHANGED Viewed

@@ -1,15 +1,15 @@
-import { E as z } from "./index-Dwqa6Zy2.js";
-import { C as A, D as L, E as C, a6 as I, t as O, k as R } from "./index-DdmHGZjq.js";
+import { E as C } from "./index-Dwqa6Zy2.js";
+import { E as _, F as I, G as O, a6 as R, t as q, k as K } from "./index-BzFyqcy-.js";
 import "./ops/cpu/attentionMask.js";
 import "./ops/webgl/attentionMask.js";
 import "./ops/grads/attentionMask.js";
 import "./ops/cpu/qkv.js";
 import "./ops/webgl/qkv.js";
 import "./ops/grads/qkv.js";
-import { p as _ } from "./random_width-DKGeiFuR.js";
-import { t as K } from "./register_all_kernels-Do9VvZmo.js";
+import { p as j } from "./random_width-CXVRloNK.js";
+import { t as G } from "./register_all_kernels-DIGpEwcf.js";
 import "./index-Tf7vU29b.js";
-import "./dataset-DPPl-iLT.js";
+import "./dataset-DlZtKmBq.js";
 import "./ops/cpu/rope.js";
 import "./ops/webgl/rope.js";
 import "./ops/grads/rope.js";
@@ -24,29 +24,31 @@ import "./ops/grads/matMulGelu.js";
 import "./ops/cpu/normRMS.js";
 import "./ops/webgl/normRMS.js";
 import "./ops/grads/normRMS.js";
-import "./ops/cpu/gatherSub.js";
-import "./ops/webgl/gatherSub.js";
-import "./ops/cpu/scatterSub.js";
-import "./ops/webgl/scatterSub.js";
+import { sparseSoftmaxCrossEntropy as V } from "./training/sparseCrossEntropy.js";
 import "./jszip.min-CjP2V1VV.js";
-import M from "./tokeniser/CharTokeniser.js";
+import $ from "./tokeniser/CharTokeniser.js";
 import "./ops/cpu/adamAdjust.js";
 import "./ops/webgl/adamAdjust.js";
 import "./ops/cpu/adamMoments.js";
 import "./ops/webgl/adamMoments.js";
 import "./papaparse.min-C8l2Kvo1.js";
+import M from "./utilities/topP.js";
+import "./ops/cpu/scatterSub.js";
+import "./ops/webgl/scatterSub.js";
+import "./ops/cpu/gatherSub.js";
+import "./ops/webgl/gatherSub.js";
 import "./ops/cpu/gelu.js";
 import "./ops/webgl/gelu.js";
-import "./gelu-BkcmEEyD.js";
+import "./gelu-Bp_-935b.js";
 import "./ops/webgl/log.js";
 import "./checks/normRMS.js";
 import "./checks/normRMSGrad.js";
-import $ from "./utilities/multinomialCPU.js";
-import { r as x } from "./reshape-WeJkT3ja.js";
-import { t as P } from "./tensor2d-CObBWBkW.js";
-import { s as v } from "./softmax-BfsyI4As.js";
-import { g as q } from "./gather-CPg6ZlQA.js";
-import { c as G } from "./concat-pHiVqR3L.js";
+import N from "./utilities/multinomialCPU.js";
+import { r as E } from "./reshape-CnIwVG1c.js";
+import { t as P } from "./tensor2d-D76QGjF3.js";
+import { s as S } from "./softmax-D7Jj3p_P.js";
+import { g as F } from "./gather-Dnpgw-YQ.js";
+import { c as H } from "./concat-B912vBbo.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -63,18 +65,18 @@ import { c as G } from "./concat-pHiVqR3L.js";
  * limitations under the License.
  * =============================================================================
  */
-function N(h, t, e, i = !1) {
-  const o = L(h, "logits", "multinomial"), s = o.size, n = o.rank;
-  if (s < 2)
-    throw new Error(`Error in multinomial: you need at least 2 outcomes, but got ${s}.`);
-  if (n > 2)
-    throw new Error(`Rank of probabilities must be 1 or 2, but is ${n}`);
-  e = e || Math.random();
-  const a = { logits: n === 1 ? x(o, [1, -1]) : o }, l = { numSamples: t, seed: e, normalized: i }, m = C.runKernel(I, a, l);
-  return n === 1 ? x(m, [m.size]) : m;
+function U(p, t, s, e = !1) {
+  const o = I(p, "logits", "multinomial"), i = o.size, c = o.rank;
+  if (i < 2)
+    throw new Error(`Error in multinomial: you need at least 2 outcomes, but got ${i}.`);
+  if (c > 2)
+    throw new Error(`Rank of probabilities must be 1 or 2, but is ${c}`);
+  s = s || Math.random();
+  const n = { logits: c === 1 ? E(o, [1, -1]) : o }, l = { numSamples: t, seed: s, normalized: e }, d = O.runKernel(R, n, l);
+  return c === 1 ? E(d, [d.size]) : d;
 }
-const D = /* @__PURE__ */ A({ multinomial_: N }), H = [
-  ...Array.from({ length: 95 }, (h, t) => String.fromCharCode(t + 32)),
+const z = /* @__PURE__ */ _({ multinomial_: U }), W = [
+  ...Array.from({ length: 95 }, (p, t) => String.fromCharCode(t + 32)),
   // ASCII
   // Spanish accented letters and punctuation
   ..."áéíóúüñ¿¡",
@@ -85,12 +87,12 @@ const D = /* @__PURE__ */ A({ multinomial_: N }), H = [
   // Cyrillic letters
   ..."абвгдеёжзийклмнопрстуфхцчшщъыьэюяАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ"
 ];
-function U(h, t) {
-  return h.length === t ? h : h.length > t ? h.slice(0, t) : h.concat(Array(t - h.length).fill(""));
+function B(p, t) {
+  return p.length === t ? p : p.length > t ? p.slice(0, t) : p.concat(Array(t - p.length).fill(""));
 }
-class qt extends z {
-  constructor(t, e) {
-    super(), this.model = t, this.tokeniser = e, this.actualTokeniser = e;
+class Wt extends C {
+  constructor(t, s) {
+    super(), this.model = t, this.tokeniser = s, this.actualTokeniser = s;
   }
   active = !1;
   cache = null;
@@ -102,130 +104,145 @@ class qt extends z {
   probabilitiesData = [];
   embeddingsData = [];
   tokens = [];
-  async tokenisePrompt(t, e) {
-    const i = e ? await t.tokenise([e], !0) : [[t.eosToken]];
-    return P(i, [1, i[0].length], "int32");
+  lastLoss = null;
+  async tokenisePrompt(t, s) {
+    const e = s ? await t.tokenise([s], !0) : [[t.eosToken]];
+    return e[0].length > this.model.config.blockSize && (e[0] = e[0].slice(-this.model.config.blockSize)), P(e, [1, e[0].length], "int32");
   }
-  async processResponse(t, e, i, o) {
-    const s = (await e.array())[0][0];
-    if (this.lastToken = s, s === this.tokeniser.eosToken)
+  async processResponse(t, s, e, o) {
+    const i = (await s.array())[0][0];
+    if (this.lastToken = i, i === this.tokeniser.eosToken)
       return null;
-    const n = await t.decode([s]);
-    if (i) {
-      const d = await Promise.all(
-        i.map((a) => a.array().then((l) => l))
+    const c = await t.decode([i]);
+    if (e) {
+      const T = await Promise.all(
+        e.map((n) => n.array().then((l) => l))
       );
-      i.forEach((a) => a.dispose()), this.attentionData.push(d);
-    }
-    if (o) {
-      const d = await o.array();
-      o.dispose(), this.probabilitiesData.push(d);
+      e.forEach((n) => n.dispose()), this.attentionData.push(T);
     }
-    return this.tokens.push(s), this.emit("tokens", [s], n), n;
+    return o && this.probabilitiesData.push(o), this.tokens.push(i), this.emit("tokens", [i], c), c;
   }
   /** Generate logits and select a token. */
-  async _generateToken(t, e, i) {
-    const o = i?.temperature ?? 1, s = i?.topK, n = i?.topP, d = i?.usePadding ?? !1, a = {
+  async _generateToken(t, s, e) {
+    const o = e?.temperature ?? 1, i = e?.topK, c = e?.topP, T = e?.usePadding ?? !1, n = {
       training: !1,
-      attentionScores: i?.attentionScores ? {
+      attentionScores: e?.attentionScores ? {
         attentionOut: []
       } : void 0,
-      cache: e,
-      outputEmbeddings: i?.embeddings ?? !1
-    }, l = O(() => {
-      const p = t, u = p.shape[1], r = u <= this.model.config.blockSize ? p : p.slice(
-        [0, u - this.model.config.blockSize],
-        [p.shape[0], this.model.config.blockSize]
-      ), f = d ? this.model.config.blockSize - r.shape[1] : 0, b = f > 0 ? _(r, [
+      cache: s,
+      outputEmbeddings: !!e?.embeddings
+    }, [l, d] = q(() => {
+      const a = t, m = a.shape[1], h = m <= this.model.config.blockSize ? a : a.slice(
+        [0, m - this.model.config.blockSize],
+        [a.shape[0], this.model.config.blockSize]
+      ), r = T ? this.model.config.blockSize - h.shape[1] : 0, v = r > 0 ? j(h, [
         [0, 0],
-        [0, f]
-      ]) : r, [g] = this.model.forward(a, b), y = g.shape[1] - 1 - f, c = g.slice([0, y, 0], [g.shape[0], 1, g.shape[2]]);
-      return a.attentionScores?.attentionOut && a.attentionScores.attentionOut.forEach((T, E) => {
-        T.shape[1] !== 1 && (a.attentionScores.attentionOut[E] = R(
-          T.slice([0, y, 0], [T.shape[0], 1, T.shape[2]])
-        ), T.dispose());
-      }), g.dispose(), c.div(o).squeeze([1]);
+        [0, r]
+      ]) : h, [g] = this.model.forward(n, v), u = g.shape[1] - 1 - r, f = g.slice([0, u, 0], [g.shape[0], 1, g.shape[2]]);
+      let y;
+      if (e?.targets) {
+        const k = e.targets.shift();
+        if (k !== void 0) {
+          const w = P([[k]], [1, 1], "int32"), D = V(f, w);
+          y = D.mean(), w.dispose(), D.dispose();
+        }
+      }
+      return n.attentionScores?.attentionOut && n.attentionScores.attentionOut.forEach((k, w) => {
+        k.shape[1] !== 1 && (n.attentionScores.attentionOut[w] = K(
+          k.slice([0, u, 0], [k.shape[0], 1, k.shape[2]])
+        ), k.dispose());
+      }), g.dispose(), [f.div(o).squeeze([1]), y];
     });
-    let m;
-    if (n) {
-      const p = v(l), u = await p.array();
-      p.dispose();
-      const r = u[0].map((c, k) => ({ prob: c, index: k })).sort((c, k) => k.prob - c.prob);
-      let f = 0;
-      const b = new Array(r.length).fill(0);
-      for (const c of r)
-        if (f += c.prob, b[c.index] = c.prob, f >= n)
-          break;
-      const g = b.reduce((c, k) => c + k, 0), y = b.map((c) => c / g);
-      m = $(y);
-    } else if (s) {
-      const { values: p, indices: u } = K(l, s), r = D(p, 1);
-      m = q(u, r, 1), p.dispose(), u.dispose(), r.dispose();
-    } else
-      m = D(l, 1);
-    let w;
-    if (i?.includeProbabilities && (w = v(l)), a.embeddings) {
-      const p = a.embeddings.map(async (r) => {
-        const f = await r.tensor.array();
-        return r.tensor.dispose(), { name: r.name, tensor: f };
-      }), u = await Promise.all(p);
-      this.embeddingsData.push(u);
+    let b, x;
+    if (c) {
+      const a = S(l), m = await a.array();
+      a.dispose();
+      const h = M(m, c);
+      e?.includeProbabilities && (x = m), b = N(h);
+    } else if (i) {
+      const { values: a, indices: m } = G(l, i), h = z(a, 1);
+      b = F(m, h, 1), a.dispose(), m.dispose(), h.dispose();
+    } else if (b = z(l, 1), e?.includeProbabilities) {
+      const a = S(l);
+      x = await a.array(), a.dispose();
+    }
+    if (n.embeddings) {
+      const m = (e?.embeddings === "all" ? n.embeddings : n.embeddings.filter((r) => r.name.startsWith("block_output_"))).map(async (r) => {
+        const v = r.tensor.shape[1], g = r.tensor.slice([0, v - 1, 0], [r.tensor.shape[0], 1, r.tensor.shape[2]]);
+        r.tensor.dispose();
+        const u = g.squeeze([1]);
+        if (g.dispose(), e?.embeddings === "softmax") {
+          const f = this.model.project(u);
+          u.dispose();
+          const y = S(f, -1);
+          return f.dispose(), { name: r.name, tensor: await y.array() };
+        } else if (e?.embeddings === "logits") {
+          const f = this.model.project(u);
+          return u.dispose(), { name: r.name, tensor: await f.array() };
+        } else {
+          const f = await u.array();
+          return u.dispose(), { name: r.name, tensor: f };
+        }
+      }), h = await Promise.all(m);
+      this.embeddingsData.push(h);
     }
-    const S = m.reshape([1, 1]);
-    return m.dispose(), m = S, l.dispose(), { output: m, probabilities: w, attention: a.attentionScores?.attentionOut };
+    const A = b.reshape([1, 1]);
+    b.dispose(), b = A, l.dispose();
+    let L;
+    return d && (L = await d.array(), d.dispose()), { output: b, probabilities: x, attention: n.attentionScores?.attentionOut, loss: L };
   }
   /** Generate multiple tokens in a loop and produce text */
   async _generate(t) {
-    let e = this.lastToken >= 0 && this.cache ? P([this.lastToken], [1, 1], "int32") : await this.tokenisePrompt(this.actualTokeniser, this.outputText);
-    const i = t?.maxLength ?? 1e3;
-    for (let o = 0; o < i && this.active; o++) {
+    let s = this.lastToken >= 0 && this.cache ? P([this.lastToken], [1, 1], "int32") : await this.tokenisePrompt(this.actualTokeniser, this.outputText);
+    const e = t?.maxLength ?? 1e3;
+    for (let o = 0; o < e && this.active; o++) {
       const {
-        output: s,
-        probabilities: n,
-        attention: d
-      } = await this._generateToken(e, this.cache ? this.cache : void 0, {
+        output: i,
+        probabilities: c,
+        attention: T,
+        loss: n
+      } = await this._generateToken(s, this.cache ? this.cache : void 0, {
         ...t,
         usePadding: !this.cache
       });
-      if (this.cache)
-        e.dispose(), e = s;
+      if (n !== void 0 && (this.lastLoss = n), this.cache)
+        s.dispose(), s = i;
       else {
-        const l = e;
-        e = G([e, s], 1), l.dispose();
+        const d = s;
+        s = H([s, i], 1), d.dispose();
       }
-      const a = await this.processResponse(this.actualTokeniser, s, d, n);
-      if (this.cache || s.dispose(), a === null)
+      const l = await this.processResponse(this.actualTokeniser, i, T, c);
+      if (this.cache || i.dispose(), l === null)
         break;
-      this.outputText += a;
+      this.outputText += l;
     }
-    return e.dispose(), this.outputText;
+    return s.dispose(), this.outputText;
   }
   reset() {
     this.cache && (this.cache.forEach((t) => {
       t && (t.k && t.k.dispose(), t.v && t.v.dispose());
-    }), this.cache = null), this.outputText = "", this.initialPrompt = null, this.lastToken = -1, this.attentionData = [], this.probabilitiesData = [], this.tokens = [];
+    }), this.cache = null), this.outputText = "", this.initialPrompt = null, this.lastToken = -1, this.attentionData = [], this.probabilitiesData = [], this.tokens = [], this.lastLoss = null;
   }
   dispose() {
     this.reset();
   }
-  initialise(t, e) {
-    const i = t && t.length > this.model.config.blockSize ? t.slice(-this.model.config.blockSize) : t ?? null;
-    if (this.cache && e?.noCache && this.reset(), this.initialPrompt = i || null, this.lastToken === -1 && (this.outputText = this.initialPrompt || ""), !this.cache && !e?.noCache && this.model.config.useRope) {
-      const s = new Array(this.model.config.nLayer);
-      for (let n = 0; n < this.model.config.nLayer; n++)
-        s[n] = { k: void 0, v: void 0, length: 0, cumulativeLength: 0 };
-      this.cache = s, this.lastToken = -1;
+  initialise(t, s) {
+    if (this.cache && s?.noCache && this.reset(), this.initialPrompt = t || null, this.lastToken === -1 && (this.outputText = this.initialPrompt || ""), !this.cache && !s?.noCache && this.model.config.useRope) {
+      const o = new Array(this.model.config.nLayer);
+      for (let i = 0; i < this.model.config.nLayer; i++)
+        o[i] = { k: void 0, v: void 0, length: 0, cumulativeLength: 0 };
+      this.cache = o, this.lastToken = -1;
     }
-    const o = this.tokeniser.trained ? this.tokeniser : new M(U(H, this.tokeniser.vocabSize));
-    this.actualTokeniser = o;
+    const e = this.tokeniser.trained ? this.tokeniser : new $(B(W, this.tokeniser.vocabSize));
+    this.actualTokeniser = e;
   }
-  async step(t, e) {
-    const i = { ...e, maxLength: 1 };
-    return this.generate(t, i);
+  async step(t, s) {
+    const e = { ...s, maxLength: 1 };
+    return this.generate(t, e);
   }
-  async generate(t, e) {
-    this.initialise(t, e), this.active = !0, this.emit("start");
-    const o = await this._generate(e);
+  async generate(t, s) {
+    this.initialise(t, s), this.active = !0, s?.maxLength !== 1 && this.emit("start");
+    const o = await this._generate(s);
     return this.active = !1, this.emit("stop"), o;
   }
   stop() {
@@ -246,7 +263,10 @@ class qt extends z {
   getTokens() {
     return this.tokens;
   }
+  getLastLoss() {
+    return this.lastLoss;
+  }
 }
 export {
-  qt as default
+  Wt as default
 };

package/dist/{RealDiv-D_q39E3A.js → RealDiv-D4EzDsC0.js} RENAMED Viewed

@@ -1,10 +1,10 @@
-import { aq as T, ag as E, p as O, j as V, aB as B, a1 as F, ah as j, aC as K } from "./index-DdmHGZjq.js";
-import { r as $ } from "./Reshape-Bh_jzKzV.js";
-import { g as A, a as C, b as k, c as N, e as R } from "./axis_util-Did9235A.js";
-import { t as U, m as W } from "./shared-CZiWmQCI.js";
-import { c as _ } from "./backend_util-yC3YH1jo.js";
-import { f as y } from "./gpgpu_math-D_ODOLix.js";
-import { g as G, b as L } from "./kernel_funcs_utils-CDfFpUab.js";
+import { aq as T, ag as E, p as O, j as V, aB as B, a1 as F, ah as j, aC as K } from "./index-BzFyqcy-.js";
+import { r as $ } from "./Reshape-Bowtk9BP.js";
+import { g as A, a as C, b as k, c as N, e as R } from "./axis_util-TbGYJ208.js";
+import { t as U, m as W } from "./shared-DuP7ue-R.js";
+import { c as _ } from "./backend_util-CJIiDoV1.js";
+import { f as y } from "./gpgpu_math-CDaYiyE_.js";
+import { g as G, b as L } from "./kernel_funcs_utils-DKLK0Mg3.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.

package/dist/{Reshape-Bh_jzKzV.js → Reshape-Bowtk9BP.js} RENAMED Viewed

@@ -1,5 +1,5 @@
-import { j as c, a5 as C, n as f, V as R } from "./index-DdmHGZjq.js";
-import { u as g, g as I, a as x, b as F, c as $, d as u, e as m, i as l } from "./gpgpu_math-D_ODOLix.js";
+import { j as c, a5 as C, n as f, V as R } from "./index-BzFyqcy-.js";
+import { u as g, g as I, a as x, b as F, c as $, d as u, e as m, i as l } from "./gpgpu_math-CDaYiyE_.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.

package/dist/{Reshape-41YpQqEo.js → Reshape-DUqYftGC.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { j as h, a5 as d, n as c, V as m } from "./index-DdmHGZjq.js";
+import { j as h, a5 as d, n as c, V as m } from "./index-BzFyqcy-.js";
 /**
  * @license
  * Copyright 2021 Google LLC. All Rights Reserved.

package/dist/TeachableLLM.js CHANGED Viewed

@@ -5,17 +5,17 @@ import u from "./Generator.js";
 import f from "./Trainer.js";
 import { E as p } from "./index-Dwqa6Zy2.js";
 import { dummyPassTrainAsync as m } from "./utilities/dummy.js";
-import "./index-DdmHGZjq.js";
+import "./index-BzFyqcy-.js";
 import "./ops/cpu/attentionMask.js";
 import "./ops/webgl/attentionMask.js";
 import "./ops/grads/attentionMask.js";
 import "./ops/cpu/qkv.js";
 import "./ops/webgl/qkv.js";
 import "./ops/grads/qkv.js";
-import "./random_width-DKGeiFuR.js";
-import "./register_all_kernels-Do9VvZmo.js";
+import "./random_width-CXVRloNK.js";
+import "./register_all_kernels-DIGpEwcf.js";
 import "./index-Tf7vU29b.js";
-import "./dataset-DPPl-iLT.js";
+import "./dataset-DlZtKmBq.js";
 import "./ops/cpu/rope.js";
 import "./ops/webgl/rope.js";
 import "./ops/grads/rope.js";
@@ -40,7 +40,7 @@ import "./papaparse.min-C8l2Kvo1.js";
 import "./jszip.min-CjP2V1VV.js";
 import "./ops/cpu/gelu.js";
 import "./ops/webgl/gelu.js";
-import "./gelu-BkcmEEyD.js";
+import "./gelu-Bp_-935b.js";
 import "./ops/webgl/log.js";
 import "./ops/cpu/adamMoments.js";
 import "./ops/webgl/adamMoments.js";

package/dist/Trainer.d.ts CHANGED Viewed

@@ -33,5 +33,6 @@ export default class Trainer extends EE<'start' | 'stop' | 'log'> {
     step(options?: ITrainerOptions): Promise<void>;
     getLog(): TrainingLogEntry[];
     getProgress(): ExtendedTrainingProgress | null;
+    isPrepared(): boolean;
 }
 export {};

package/dist/Trainer.js CHANGED Viewed

@@ -85,6 +85,9 @@ class m extends l {
   getProgress() {
     return this.progress;
   }
+  isPrepared() {
+    return this.trainDataset !== void 0 && this.validationDataset !== void 0;
+  }
 }
 export {
   m as default

package/dist/{axis_util-Did9235A.js → axis_util-TbGYJ208.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { n as c } from "./index-DdmHGZjq.js";
+import { n as c } from "./index-BzFyqcy-.js";
 /**
  * @license
  * Copyright 2017 Google LLC. All Rights Reserved.

package/dist/backend.js CHANGED Viewed

@@ -1,6 +1,6 @@
-import { g as a, s as i, r as o } from "./index-DdmHGZjq.js";
+import { g as a, s as i, r as o } from "./index-BzFyqcy-.js";
 async function e(t) {
-  a() !== t && (t === "webgpu" && (await import("./index-evZ57wr4.js"), await import("./ops/webgpu/index.js")), await i(t), await o(), console.log(`Backend set to ${t}`));
+  a() !== t && (t === "webgpu" && (await import("./index-C1rx_Ajs.js"), await import("./ops/webgpu/index.js")), await i(t), await o(), console.log(`Backend set to ${t}`));
 }
 export {
   e as selectBackend

package/dist/{backend_util-yC3YH1jo.js → backend_util-CJIiDoV1.js} RENAMED Viewed

@@ -1,7 +1,7 @@
-import { j as m, a3 as R, n as g, aN as $, aO as O, aP as _, l as M, ae as y, ax as D, aQ as T, u as b, aR as F } from "./index-DdmHGZjq.js";
-import { b as L, d as W, f as v, c as N, e as x, g as P, a as C, h as z } from "./axis_util-Did9235A.js";
-import { S as U, a as B, b as V, c as j, d as G, e as H, f as k, g as q, h as Z, i as X, j as J, k as K, l as Q, m as Y, s as ee, n as te, o as ne, t as se } from "./selu_util-BgUO9gHY.js";
-import { c as re, v as oe, a as ae } from "./scatter_nd_util-B7yDhiQr.js";
+import { j as m, a3 as R, n as g, aN as $, aO as O, aP as _, l as M, ae as y, ax as D, aQ as T, u as b, aR as F } from "./index-BzFyqcy-.js";
+import { b as L, d as W, f as v, c as N, e as x, g as P, a as C, h as z } from "./axis_util-TbGYJ208.js";
+import { S as U, a as B, b as V, c as j, d as G, e as H, f as k, g as q, h as Z, i as X, j as J, k as K, l as Q, m as Y, s as ee, n as te, o as ne, t as se } from "./selu_util-OtRzVwW5.js";
+import { c as re, v as oe, a as ae } from "./scatter_nd_util-BQdz--Gn.js";
 function ie(e, n) {
   const r = e.shape.length, t = n.shape.length;
   if (r < 1)

package/dist/{broadcast_to-CUvOdOT5.js → broadcast_to-DzlNweb8.js} RENAMED Viewed

@@ -1,5 +1,5 @@
-import { C as h, D as f, M as p, H as g, E as u, X as b } from "./index-DdmHGZjq.js";
-import { r as T } from "./reshape-WeJkT3ja.js";
+import { E as h, F as f, M as p, J as g, G as u, X as b } from "./index-BzFyqcy-.js";
+import { r as T } from "./reshape-CnIwVG1c.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.

package/dist/checks/appendCache.js CHANGED Viewed

@@ -1,5 +1,5 @@
-import { s, e as a } from "../index-DdmHGZjq.js";
-import { t } from "../tensor4d-DLtk7Nxh.js";
+import { s, e as a } from "../index-BzFyqcy-.js";
+import { t } from "../tensor4d-Df1WlVDY.js";
 async function u(e) {
   await s(e);
   const n = t(

package/dist/checks/attentionMask.js CHANGED Viewed

@@ -1,6 +1,6 @@
-import { s as i, e } from "../index-DdmHGZjq.js";
-import { t } from "../tensor4d-DLtk7Nxh.js";
-import { t as a } from "../tensor2d-CObBWBkW.js";
+import { s as i, e } from "../index-BzFyqcy-.js";
+import { t } from "../tensor4d-Df1WlVDY.js";
+import { t as a } from "../tensor2d-D76QGjF3.js";
 async function k(n) {
   await i(n);
   const s = t(

package/dist/checks/gelu.js CHANGED Viewed

@@ -1,5 +1,5 @@
-import { s as e, e as o } from "../index-DdmHGZjq.js";
-import { t as s } from "../tensor2d-CObBWBkW.js";
+import { s as e, e as o } from "../index-BzFyqcy-.js";
+import { t as s } from "../tensor2d-D76QGjF3.js";
 async function m(t) {
   await e(t);
   const r = s(

package/dist/checks/matMulGelu.js CHANGED Viewed

@@ -1,9 +1,9 @@
-import { s as n, e as s } from "../index-DdmHGZjq.js";
-import "../random_width-DKGeiFuR.js";
-import "../register_all_kernels-Do9VvZmo.js";
+import { s as n, e as s } from "../index-BzFyqcy-.js";
+import "../random_width-CXVRloNK.js";
+import "../register_all_kernels-DIGpEwcf.js";
 import "../index-Tf7vU29b.js";
-import "../dataset-DPPl-iLT.js";
-import { t as e } from "../tensor2d-CObBWBkW.js";
+import "../dataset-DlZtKmBq.js";
+import { t as e } from "../tensor2d-D76QGjF3.js";
 async function f(t) {
   await n(t);
   const r = e(

package/dist/checks/normRMS.js CHANGED Viewed

@@ -1,7 +1,7 @@
-import { s as u, y as A, e as y } from "../index-DdmHGZjq.js";
-import { a as h } from "../ops-542ai2vG.js";
-import { t as p } from "../tensor1d-CtJq5BOv.js";
-import { t as a } from "../tensor-DbqgIV9B.js";
+import { s as u, y as A, e as y } from "../index-BzFyqcy-.js";
+import { a as h } from "../ops-LuCMAnmM.js";
+import { t as p } from "../tensor1d-vML0r3q6.js";
+import { t as a } from "../tensor-CZr4dh61.js";
 const w = Array.from({ length: 2048 * 192 }, () => Math.random()), x = Array.from({ length: 192 }, () => Math.random()), M = Array.from({ length: 2048 * 192 }, () => Math.random());
 async function k(t) {
   await u(t);

package/dist/checks/normRMSGrad.js CHANGED Viewed

@@ -1,6 +1,6 @@
-import { s as c, e as d } from "../index-DdmHGZjq.js";
-import { t as f } from "../tensor1d-CtJq5BOv.js";
-import { t as r } from "../tensor-DbqgIV9B.js";
+import { s as c, e as d } from "../index-BzFyqcy-.js";
+import { t as f } from "../tensor1d-vML0r3q6.js";
+import { t as r } from "../tensor-CZr4dh61.js";
 const y = Array.from({ length: 2048 * 192 }, () => Math.random()), i = Array.from({ length: 192 }, () => Math.random()), l = Array.from({ length: 2048 * 192 }, () => Math.random());
 async function x(t) {
   await c(t);

package/dist/checks/qkv.js CHANGED Viewed

@@ -1,5 +1,5 @@
-import { z as i, A as u, B as c, s as l, e as h } from "../index-DdmHGZjq.js";
-import { t as f } from "../tensor2d-CObBWBkW.js";
+import { B as i, C as u, D as c, s as l, e as h } from "../index-BzFyqcy-.js";
+import { t as f } from "../tensor2d-D76QGjF3.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.

package/dist/checks/rope.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import t from "../layers/RoPECache.js";
-import { s as c, e as i } from "../index-DdmHGZjq.js";
-import { t as p } from "../tensor4d-DLtk7Nxh.js";
+import { s as c, e as i } from "../index-BzFyqcy-.js";
+import { t as p } from "../tensor4d-Df1WlVDY.js";
 async function y(a) {
   await c(a);
   const o = p(

package/dist/{concat-pHiVqR3L.js → concat-B912vBbo.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { C as s, n as a, F as p, H as i, E as l, I as f } from "./index-DdmHGZjq.js";
+import { E as s, n as a, I as p, J as i, G as l, K as f } from "./index-BzFyqcy-.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.

package/dist/{dataset-DPPl-iLT.js → dataset-DlZtKmBq.js} RENAMED Viewed

@@ -1,7 +1,7 @@
-import { ak as S, T as h, ag as k, d as v, al as o, am as p, an as g, n as N, t as y } from "./index-DdmHGZjq.js";
+import { ak as S, T as h, ag as k, d as v, al as o, am as p, an as g, n as N, t as y } from "./index-BzFyqcy-.js";
 import { s as R } from "./index-C4L8Cm77.js";
-import { s as $ } from "./stack-BNwLzE43.js";
-import { t as B } from "./tensor-DbqgIV9B.js";
+import { s as $ } from "./stack-DFatutCx.js";
+import { t as B } from "./tensor-CZr4dh61.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.