npm - @genai-fi/nanogpt - Versions diffs - 0.7.3 → 0.8.1 - Mend

@genai-fi/nanogpt 0.7.3 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (197) hide show

package/dist/Generator.d.ts +25 -2
package/dist/Generator.js +152 -49
package/dist/{RealDiv-Dy0p8Bvo.js → RealDiv-D_q39E3A.js} +13 -13
package/dist/{Reshape-DvudQDvJ.js → Reshape-41YpQqEo.js} +1 -1
package/dist/{Reshape-DH5srBP0.js → Reshape-Bh_jzKzV.js} +5 -5
package/dist/TeachableLLM.d.ts +6 -6
package/dist/TeachableLLM.js +33 -31
package/dist/Trainer.d.ts +13 -2
package/dist/Trainer.js +21 -12
package/dist/{axis_util-BzbKo31C.js → axis_util-Did9235A.js} +3 -3
package/dist/backend.js +2 -2
package/dist/{backend_util-TE7aTPhZ.js → backend_util-yC3YH1jo.js} +58 -58
package/dist/{broadcast_to-CdbwV-Dj.js → broadcast_to-CUvOdOT5.js} +2 -2
package/dist/checks/appendCache.d.ts +1 -0
package/dist/checks/appendCache.js +22 -0
package/dist/checks/attentionMask.d.ts +1 -0
package/dist/checks/attentionMask.js +37 -0
package/dist/checks/check.d.ts +9 -0
package/dist/checks/check.js +20 -0
package/dist/checks/gelu.d.ts +1 -0
package/dist/checks/gelu.js +18 -0
package/dist/checks/index.d.ts +19 -0
package/dist/checks/index.js +21 -0
package/dist/checks/normRMS.d.ts +1 -0
package/dist/checks/normRMS.js +16 -0
package/dist/checks/normRMSGrad.d.ts +1 -0
package/dist/checks/normRMSGrad.js +12 -0
package/dist/checks/qkv.d.ts +1 -0
package/dist/checks/qkv.js +25 -0
package/dist/checks/rope.d.ts +1 -0
package/dist/checks/rope.js +21 -0
package/dist/{concat-CsxrgovM.js → concat-pHiVqR3L.js} +1 -1
package/dist/{dataset-CtdBYwjo.js → dataset-DPPl-iLT.js} +9 -9
package/dist/{dropout-DYs5QFGQ.js → dropout-CcKSfOYE.js} +18 -18
package/dist/exports_initializers-DKk7-bsx.js +16 -0
package/dist/{gather-CMMy2KEG.js → gather-CPg6ZlQA.js} +1 -1
package/dist/{gelu-C-dPj6Ku.js → gelu-BkcmEEyD.js} +1 -1
package/dist/{gpgpu_math-DGNLNL4I.js → gpgpu_math-D_ODOLix.js} +26 -26
package/dist/{index-BoWRt-10.js → index-DdmHGZjq.js} +659 -650
package/dist/{index-CLthM0TO.js → index-evZ57wr4.js} +185 -185
package/dist/{kernel_funcs_utils-BYKWV8Aa.js → kernel_funcs_utils-CDfFpUab.js} +21 -21
package/dist/layers/BaseLayer.d.ts +8 -13
package/dist/layers/BaseLayer.js +25 -13
package/dist/layers/CausalSelfAttention.d.ts +3 -2
package/dist/layers/CausalSelfAttention.js +28 -28
package/dist/layers/MLP.d.ts +3 -2
package/dist/layers/MLP.js +16 -20
package/dist/layers/PositionEmbedding.d.ts +9 -0
package/dist/layers/PositionEmbedding.js +45 -0
package/dist/layers/RMSNorm.d.ts +3 -2
package/dist/layers/RMSNorm.js +6 -6
package/dist/layers/RoPECache.d.ts +1 -1
package/dist/layers/RoPECache.js +4 -4
package/dist/layers/TiedEmbedding.d.ts +3 -2
package/dist/layers/TiedEmbedding.js +29 -7
package/dist/layers/TransformerBlock.d.ts +3 -2
package/dist/layers/TransformerBlock.js +1 -1
package/dist/loader/load.d.ts +2 -2
package/dist/loader/loadHF.d.ts +2 -2
package/dist/loader/loadTransformers.d.ts +4 -2
package/dist/loader/loadTransformers.js +10 -9
package/dist/loader/newZipLoad.d.ts +2 -2
package/dist/loader/oldZipLoad.d.ts +2 -2
package/dist/loader/oldZipLoad.js +44 -51
package/dist/loader/save.d.ts +8 -0
package/dist/loader/save.js +62 -0
package/dist/{log_sum_exp-DbjkV734.js → log_sum_exp-C8yFJfZz.js} +45 -24
package/dist/main.d.ts +6 -4
package/dist/main.js +24 -18
package/dist/{mat_mul-8m8pfdcx.js → mat_mul-Dpy2mMRu.js} +1 -1
package/dist/mod-CbibJi3D.js +27 -0
package/dist/models/NanoGPTV1.d.ts +15 -0
package/dist/models/NanoGPTV1.js +71 -0
package/dist/{config.d.ts → models/config.d.ts} +1 -0
package/dist/{config.js → models/config.js} +1 -0
package/dist/models/factory.d.ts +3 -0
package/dist/models/factory.js +14 -0
package/dist/models/model.d.ts +26 -0
package/dist/models/model.js +70 -0
package/dist/{mulmat_packed_gpu-VSekgsNv.js → mulmat_packed_gpu-q_Gmwyld.js} +1 -1
package/dist/{ones-Dj0SDhHf.js → ones-BAqVh-eA.js} +2 -2
package/dist/ops/adamAdjust.js +1 -1
package/dist/ops/adamMoments.js +1 -1
package/dist/ops/appendCache.js +3 -3
package/dist/ops/attentionMask.js +1 -1
package/dist/ops/cpu/adamAdjust.js +9 -9
package/dist/ops/cpu/adamMoments.js +2 -2
package/dist/ops/cpu/appendCache.js +2 -2
package/dist/ops/cpu/attentionMask.js +5 -5
package/dist/ops/cpu/fusedSoftmax.js +2 -2
package/dist/ops/cpu/gatherSub.js +5 -5
package/dist/ops/cpu/gelu.js +1 -1
package/dist/ops/cpu/matMulGelu.js +2 -2
package/dist/ops/cpu/matMulMul.js +1 -1
package/dist/ops/cpu/mulDropout.js +1 -1
package/dist/ops/cpu/normRMS.js +1 -1
package/dist/ops/cpu/qkv.js +3 -3
package/dist/ops/cpu/rope.js +5 -5
package/dist/ops/cpu/scatterSub.js +7 -7
package/dist/ops/fusedSoftmax.js +1 -1
package/dist/ops/gatherSub.js +1 -1
package/dist/ops/gelu.js +2 -2
package/dist/ops/grads/attentionMask.js +1 -1
package/dist/ops/grads/fusedSoftmax.js +2 -2
package/dist/ops/grads/gelu.js +2 -2
package/dist/ops/grads/matMulGelu.js +1 -1
package/dist/ops/grads/normRMS.js +1 -1
package/dist/ops/grads/qkv.js +1 -1
package/dist/ops/grads/rope.js +1 -1
package/dist/ops/matMulGelu.js +1 -1
package/dist/ops/matMulMul.js +1 -1
package/dist/ops/mulDrop.js +1 -1
package/dist/ops/normRMS.js +1 -1
package/dist/ops/qkv.js +1 -1
package/dist/ops/rope.js +4 -4
package/dist/ops/scatterSub.js +1 -1
package/dist/ops/webgl/adamAdjust.js +2 -2
package/dist/ops/webgl/adamMoments.js +1 -1
package/dist/ops/webgl/appendCache.js +1 -1
package/dist/ops/webgl/attentionMask.js +1 -1
package/dist/ops/webgl/fusedSoftmax.js +4 -4
package/dist/ops/webgl/gatherSub.js +1 -1
package/dist/ops/webgl/gelu.js +2 -2
package/dist/ops/webgl/log.js +3 -3
package/dist/ops/webgl/matMulGelu.js +10 -10
package/dist/ops/webgl/matMulMul.js +1 -1
package/dist/ops/webgl/mulDropout.js +1 -1
package/dist/ops/webgl/normRMS.js +2 -2
package/dist/ops/webgl/qkv.js +1 -1
package/dist/ops/webgl/rope.js +1 -1
package/dist/ops/webgl/scatterSub.js +1 -1
package/dist/ops/webgpu/adamAdjust.js +3 -3
package/dist/ops/webgpu/adamMoments.js +3 -3
package/dist/ops/webgpu/appendCache.js +3 -3
package/dist/ops/webgpu/attentionMask.js +3 -3
package/dist/ops/webgpu/gatherSub.js +3 -3
package/dist/ops/webgpu/gelu.js +3 -3
package/dist/ops/webgpu/normRMS.js +2 -2
package/dist/ops/webgpu/normRMSGrad.js +5 -5
package/dist/ops/webgpu/qkv.js +3 -3
package/dist/ops/webgpu/rope.js +3 -3
package/dist/ops/webgpu/scatterSub.js +3 -3
package/dist/ops/webgpu/utils/reductions.js +4 -4
package/dist/ops-542ai2vG.js +1525 -0
package/dist/{random_width-sZORGo5k.js → random_width-DKGeiFuR.js} +1471 -1538
package/dist/{range-CRuAh-gd.js → range-BcUvLuf5.js} +1 -1
package/dist/{reciprocal-BvGAyKyu.js → reciprocal-DhDWSKiD.js} +1 -1
package/dist/{register_all_kernels-BwDSRN-f.js → register_all_kernels-Do9VvZmo.js} +2488 -2534
package/dist/{max-Ddnnb5xe.js → relu-B1AXs7p5.js} +6 -6
package/dist/{reshape-CdBq1WJ6.js → reshape-WeJkT3ja.js} +1 -1
package/dist/{scatter_nd_util-DUstGbU1.js → scatter_nd_util-B7yDhiQr.js} +1 -1
package/dist/{selu_util-BJEXVvjX.js → selu_util-BgUO9gHY.js} +125 -146
package/dist/{shared-wS99K7_n.js → shared-CZiWmQCI.js} +1 -1
package/dist/{shared-B8ztnyEk.js → shared-V6D_md-c.js} +72 -72
package/dist/{sin-BeA3tsEd.js → sin-CPxad7Am.js} +1 -1
package/dist/{slice-BiOsknYS.js → slice-B7jXtPnp.js} +1 -1
package/dist/{softmax-Bv_6lyMX.js → softmax-BfsyI4As.js} +1 -1
package/dist/{split-B-dikLRw.js → split-BPxr8_8m.js} +1 -1
package/dist/{stack-B17UN2nn.js → stack-BNwLzE43.js} +1 -1
package/dist/{sum-66ew2byf.js → sum-ByFINZgi.js} +3 -3
package/dist/{tensor-JwS7ZYY6.js → tensor-DbqgIV9B.js} +1 -1
package/dist/tensor1d-CtJq5BOv.js +27 -0
package/dist/{tensor2d-wxPAnDQy.js → tensor2d-CObBWBkW.js} +1 -1
package/dist/tensor3d-BOukqWwr.js +30 -0
package/dist/tensor4d-DLtk7Nxh.js +30 -0
package/dist/training/Adam.js +2 -2
package/dist/training/AdamExt.js +1 -1
package/dist/training/DatasetBuilder.js +2 -2
package/dist/training/Evaluator.d.ts +2 -2
package/dist/training/FullTrainer.d.ts +3 -3
package/dist/training/FullTrainer.js +61 -69
package/dist/training/Trainer.d.ts +15 -3
package/dist/training/Trainer.js +39 -47
package/dist/training/sparseCrossEntropy.js +12 -13
package/dist/utilities/arrayClose.d.ts +1 -1
package/dist/utilities/arrayClose.js +16 -7
package/dist/utilities/dummy.d.ts +4 -4
package/dist/utilities/dummy.js +13 -13
package/dist/utilities/multinomialCPU.js +2 -2
package/dist/utilities/parameters.d.ts +1 -1
package/dist/utilities/performance.js +1 -1
package/dist/utilities/profile.js +1 -1
package/dist/utilities/safetensors.js +2 -2
package/dist/utilities/weights.js +2 -2
package/dist/{variable-BuddVFLa.js → variable-DPFOJyRG.js} +1 -1
package/dist/{webgpu_program-PFzf1hAQ.js → webgpu_program-Dhk9R5aG.js} +1 -1
package/dist/{webgpu_util-D____QpY.js → webgpu_util-BqGnZg8t.js} +27 -27
package/dist/{zeros--BdLQ3oG.js → zeros-Dnwix0p4.js} +1 -1
package/package.json +2 -3
package/dist/NanoGPTModel.d.ts +0 -52
package/dist/NanoGPTModel.js +0 -203
package/dist/TiedEmbedding-BxOerUmB.js +0 -43
package/dist/ops-BFGCx8Ri.js +0 -1202
package/dist/utilities/generate.d.ts +0 -3
package/dist/utilities/generate.js +0 -22
package/dist/utilities/save.d.ts +0 -9
package/dist/utilities/save.js +0 -61

package/dist/Generator.d.ts CHANGED Viewed

@@ -1,10 +1,23 @@
-import { default as NanoGPT, GenerateOptions } from './NanoGPTModel';
 import { ITokeniser } from './tokeniser/type';
 import { default as EE } from 'eventemitter3';
+import { default as Model, ModelForwardAttributes } from './models/model';
+export interface GenerateOptions {
+    temperature?: number;
+    topK?: number;
+    topP?: number;
+    usePadding?: boolean;
+    attentionScores?: boolean;
+    includeProbabilities?: boolean;
+    embeddings?: boolean;
+}
 export interface IGenerateOptions extends GenerateOptions {
     maxLength?: number;
     noCache?: boolean;
 }
+/**
+ * Text generator using a NanoGPT model and a tokeniser.
+ * This uses the forward method of the model to generate text token by token, including options for temperature, top-k, and top-p sampling.
+ */
 export default class Generator extends EE<'start' | 'stop' | 'tokens'> {
     private readonly model;
     private readonly tokeniser;
@@ -14,9 +27,16 @@ export default class Generator extends EE<'start' | 'stop' | 'tokens'> {
     private outputText;
     private actualTokeniser;
     private lastToken;
-    constructor(model: NanoGPT, tokeniser: ITokeniser);
+    private attentionData;
+    private probabilitiesData;
+    private embeddingsData;
+    private tokens;
+    constructor(model: Model<ModelForwardAttributes>, tokeniser: ITokeniser);
     private tokenisePrompt;
     private processResponse;
+    /** Generate logits and select a token. */
+    private _generateToken;
+    /** Generate multiple tokens in a loop and produce text */
     private _generate;
     reset(): void;
     dispose(): void;
@@ -25,4 +45,7 @@ export default class Generator extends EE<'start' | 'stop' | 'tokens'> {
     generate(prompt?: string, options?: IGenerateOptions): Promise<string>;
     stop(): void;
     getText(): string;
+    getAttentionData(): number[][][][];
+    getProbabilitiesData(): number[][][];
+    getTokens(): number[];
 }

package/dist/Generator.js CHANGED Viewed

@@ -1,15 +1,15 @@
-import { E as l } from "./index-Dwqa6Zy2.js";
-import "./index-BoWRt-10.js";
+import { E as z } from "./index-Dwqa6Zy2.js";
+import { C as A, D as L, E as C, a6 as I, t as O, k as R } from "./index-DdmHGZjq.js";
 import "./ops/cpu/attentionMask.js";
 import "./ops/webgl/attentionMask.js";
 import "./ops/grads/attentionMask.js";
 import "./ops/cpu/qkv.js";
 import "./ops/webgl/qkv.js";
 import "./ops/grads/qkv.js";
-import "./random_width-sZORGo5k.js";
-import "./register_all_kernels-BwDSRN-f.js";
+import { p as _ } from "./random_width-DKGeiFuR.js";
+import { t as K } from "./register_all_kernels-Do9VvZmo.js";
 import "./index-Tf7vU29b.js";
-import "./dataset-CtdBYwjo.js";
+import "./dataset-DPPl-iLT.js";
 import "./ops/cpu/rope.js";
 import "./ops/webgl/rope.js";
 import "./ops/grads/rope.js";
@@ -29,7 +29,7 @@ import "./ops/webgl/gatherSub.js";
 import "./ops/cpu/scatterSub.js";
 import "./ops/webgl/scatterSub.js";
 import "./jszip.min-CjP2V1VV.js";
-import u from "./tokeniser/CharTokeniser.js";
+import M from "./tokeniser/CharTokeniser.js";
 import "./ops/cpu/adamAdjust.js";
 import "./ops/webgl/adamAdjust.js";
 import "./ops/cpu/adamMoments.js";
@@ -37,12 +37,44 @@ import "./ops/webgl/adamMoments.js";
 import "./papaparse.min-C8l2Kvo1.js";
 import "./ops/cpu/gelu.js";
 import "./ops/webgl/gelu.js";
-import "./gelu-C-dPj6Ku.js";
+import "./gelu-BkcmEEyD.js";
 import "./ops/webgl/log.js";
-import { t as p } from "./tensor2d-wxPAnDQy.js";
-import { c as f } from "./concat-CsxrgovM.js";
-const k = [
-  ...Array.from({ length: 95 }, (r, t) => String.fromCharCode(t + 32)),
+import "./checks/normRMS.js";
+import "./checks/normRMSGrad.js";
+import $ from "./utilities/multinomialCPU.js";
+import { r as x } from "./reshape-WeJkT3ja.js";
+import { t as P } from "./tensor2d-CObBWBkW.js";
+import { s as v } from "./softmax-BfsyI4As.js";
+import { g as q } from "./gather-CPg6ZlQA.js";
+import { c as G } from "./concat-pHiVqR3L.js";
+/**
+ * @license
+ * Copyright 2020 Google LLC. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+function N(m, t, e, i = !1) {
+  const o = L(m, "logits", "multinomial"), s = o.size, n = o.rank;
+  if (s < 2)
+    throw new Error(`Error in multinomial: you need at least 2 outcomes, but got ${s}.`);
+  if (n > 2)
+    throw new Error(`Rank of probabilities must be 1 or 2, but is ${n}`);
+  e = e || Math.random();
+  const a = { logits: n === 1 ? x(o, [1, -1]) : o }, p = { numSamples: t, seed: e, normalized: i }, l = C.runKernel(I, a, p);
+  return n === 1 ? x(l, [l.size]) : l;
+}
+const S = /* @__PURE__ */ A({ multinomial_: N }), H = [
+  ...Array.from({ length: 95 }, (m, t) => String.fromCharCode(t + 32)),
   // ASCII
   // Spanish accented letters and punctuation
   ..."áéíóúüñ¿¡",
@@ -53,12 +85,12 @@ const k = [
   // Cyrillic letters
   ..."абвгдеёжзийклмнопрстуфхцчшщъыьэюяАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ"
 ];
-function d(r, t) {
-  return r.length === t ? r : r.length > t ? r.slice(0, t) : r.concat(Array(t - r.length).fill(""));
+function U(m, t) {
+  return m.length === t ? m : m.length > t ? m.slice(0, t) : m.concat(Array(t - m.length).fill(""));
 }
-class nt extends l {
-  constructor(t, i) {
-    super(), this.model = t, this.tokeniser = i, this.actualTokeniser = i;
+class qt extends z {
+  constructor(t, e) {
+    super(), this.model = t, this.tokeniser = e, this.actualTokeniser = e;
   }
   active = !1;
   cache = null;
@@ -66,71 +98,133 @@ class nt extends l {
   outputText = "";
   actualTokeniser;
   lastToken = -1;
-  async tokenisePrompt(t, i) {
-    const e = i ? await t.tokenise([i], !0) : [[t.eosToken]];
-    return p(e, [1, e[0].length], "int32");
+  attentionData = [];
+  probabilitiesData = [];
+  embeddingsData = [];
+  tokens = [];
+  async tokenisePrompt(t, e) {
+    const i = e ? await t.tokenise([e], !0) : [[t.eosToken]];
+    return P(i, [1, i[0].length], "int32");
   }
-  async processResponse(t, i, e, o) {
-    const s = (await i.array())[0][0];
+  async processResponse(t, e, i, o) {
+    const s = (await e.array())[0][0];
     if (this.lastToken = s, s === this.tokeniser.eosToken)
       return null;
     const n = await t.decode([s]);
-    let c;
-    e && (c = await Promise.all(e.map((h) => h.array().then((m) => m))), e.forEach((h) => h.dispose()));
-    let a;
-    return o && (a = await o.array(), o.dispose()), this.emit("tokens", [s], n, c, a), n;
+    if (i) {
+      const d = await Promise.all(i.map((a) => a.array().then((p) => p)));
+      i.forEach((a) => a.dispose()), this.attentionData.push(d);
+    }
+    if (o) {
+      const d = await o.array();
+      o.dispose(), this.probabilitiesData.push(d);
+    }
+    return this.tokens.push(s), this.emit("tokens", [s], n), n;
+  }
+  /** Generate logits and select a token. */
+  async _generateToken(t, e, i) {
+    const o = i?.temperature ?? 1, s = i?.topK, n = i?.topP, d = i?.usePadding ?? !1, a = {
+      training: !1,
+      attentionScores: i?.attentionScores ? {
+        attentionOut: []
+      } : void 0,
+      cache: e,
+      outputEmbeddings: i?.embeddings ?? !1
+    }, p = O(() => {
+      const r = t, h = r.shape[1], u = h <= this.model.config.blockSize ? r : r.slice(
+        [0, h - this.model.config.blockSize],
+        [r.shape[0], this.model.config.blockSize]
+      ), k = d ? this.model.config.blockSize - u.shape[1] : 0, b = k > 0 ? _(u, [
+        [0, 0],
+        [0, k]
+      ]) : u, [f] = this.model.forward(a, b), y = f.shape[1] - 1 - k, c = f.slice([0, y, 0], [f.shape[0], 1, f.shape[2]]);
+      return a.attentionScores?.attentionOut && a.attentionScores.attentionOut.forEach((T, E) => {
+        T.shape[1] !== 1 && (a.attentionScores.attentionOut[E] = R(
+          T.slice([0, y, 0], [T.shape[0], 1, T.shape[2]])
+        ), T.dispose());
+      }), f.dispose(), c.div(o).squeeze([1]);
+    });
+    let l;
+    if (n) {
+      const r = v(p), h = await r.array();
+      r.dispose();
+      const u = h[0].map((c, g) => ({ prob: c, index: g })).sort((c, g) => g.prob - c.prob);
+      let k = 0;
+      const b = new Array(u.length).fill(0);
+      for (const c of u)
+        if (k += c.prob, b[c.index] = c.prob, k >= n)
+          break;
+      const f = b.reduce((c, g) => c + g, 0), y = b.map((c) => c / f);
+      l = $(y);
+    } else if (s) {
+      const { values: r, indices: h } = K(p, s), u = S(r, 1);
+      l = q(h, u, 1), r.dispose(), h.dispose(), u.dispose();
+    } else
+      l = S(p, 1);
+    let w;
+    i?.includeProbabilities && (w = v(p)), a.embeddings && this.embeddingsData.push(
+      await Promise.all(
+        a.embeddings.map(async (r) => {
+          const h = await r.array();
+          return r.dispose(), h;
+        })
+      )
+    );
+    const D = l.reshape([1, 1]);
+    return l.dispose(), l = D, p.dispose(), { output: l, probabilities: w, attention: a.attentionScores?.attentionOut };
   }
+  /** Generate multiple tokens in a loop and produce text */
   async _generate(t) {
-    let i = this.lastToken >= 0 && this.cache ? p([this.lastToken], [1, 1], "int32") : await this.tokenisePrompt(this.actualTokeniser, this.outputText);
-    const e = t?.maxLength ?? 1e3;
-    for (let o = 0; o < e && this.active; o++) {
+    let e = this.lastToken >= 0 && this.cache ? P([this.lastToken], [1, 1], "int32") : await this.tokenisePrompt(this.actualTokeniser, this.outputText);
+    const i = t?.maxLength ?? 1e3;
+    for (let o = 0; o < i && this.active; o++) {
       const {
         output: s,
         probabilities: n,
-        attention: c
-      } = await this.model.generate(i, this.cache ? this.cache : void 0, {
+        attention: d
+      } = await this._generateToken(e, this.cache ? this.cache : void 0, {
         ...t,
         usePadding: !this.cache
       });
       if (this.cache)
-        i.dispose(), i = s;
+        e.dispose(), e = s;
       else {
-        const h = i;
-        i = f([i, s], 1), h.dispose();
+        const p = e;
+        e = G([e, s], 1), p.dispose();
       }
-      const a = await this.processResponse(this.actualTokeniser, s, c, n);
+      const a = await this.processResponse(this.actualTokeniser, s, d, n);
       if (this.cache || s.dispose(), a === null)
         break;
       this.outputText += a;
     }
-    return i.dispose(), this.outputText;
+    return e.dispose(), this.outputText;
   }
   reset() {
     this.cache && (this.cache.forEach((t) => {
       t && (t.k && t.k.dispose(), t.v && t.v.dispose());
-    }), this.cache = null), this.outputText = "", this.initialPrompt = null, this.lastToken = -1;
+    }), this.cache = null), this.outputText = "", this.initialPrompt = null, this.lastToken = -1, this.attentionData = [], this.probabilitiesData = [], this.tokens = [];
   }
   dispose() {
     this.reset();
   }
-  initialise(t, i) {
-    const e = t && t.length > this.model.config.gpt.blockSize ? t.slice(-this.model.config.gpt.blockSize) : t ?? null;
-    if (this.cache && i?.noCache && this.reset(), this.initialPrompt = e || null, this.lastToken === -1 && (this.outputText = this.initialPrompt || ""), !this.cache && !i?.noCache && this.model.config.gpt.useRope) {
-      const s = new Array(this.model.config.gpt.nLayer);
-      for (let n = 0; n < this.model.config.gpt.nLayer; n++)
+  initialise(t, e) {
+    const i = t && t.length > this.model.config.blockSize ? t.slice(-this.model.config.blockSize) : t ?? null;
+    if (this.cache && e?.noCache && this.reset(), this.initialPrompt = i || null, this.lastToken === -1 && (this.outputText = this.initialPrompt || ""), !this.cache && !e?.noCache && this.model.config.useRope) {
+      const s = new Array(this.model.config.nLayer);
+      for (let n = 0; n < this.model.config.nLayer; n++)
         s[n] = { k: void 0, v: void 0, length: 0, cumulativeLength: 0 };
       this.cache = s, this.lastToken = -1;
     }
-    const o = this.tokeniser.trained ? this.tokeniser : new u(d(k, this.tokeniser.vocabSize));
+    const o = this.tokeniser.trained ? this.tokeniser : new M(U(H, this.tokeniser.vocabSize));
     this.actualTokeniser = o;
   }
-  async step(t, i) {
-    const e = { ...i, maxLength: 1 };
-    return this.generate(t, e);
+  async step(t, e) {
+    const i = { ...e, maxLength: 1 };
+    return this.generate(t, i);
   }
-  async generate(t, i) {
-    this.initialise(t, i), this.active = !0, this.emit("start");
-    const o = await this._generate(i);
+  async generate(t, e) {
+    this.initialise(t, e), this.active = !0, this.emit("start");
+    const o = await this._generate(e);
     return this.active = !1, this.emit("stop"), o;
   }
   stop() {
@@ -139,7 +233,16 @@ class nt extends l {
   getText() {
     return this.outputText;
   }
+  getAttentionData() {
+    return this.attentionData;
+  }
+  getProbabilitiesData() {
+    return this.probabilitiesData;
+  }
+  getTokens() {
+    return this.tokens;
+  }
 }
 export {
-  nt as default
+  qt as default
 };

package/dist/{RealDiv-Dy0p8Bvo.js → RealDiv-D_q39E3A.js} RENAMED Viewed

@@ -1,10 +1,10 @@
-import { aq as T, ac as E, p as O, j as V, ay as B, Y as F, U, az as j } from "./index-BoWRt-10.js";
-import { r as $ } from "./Reshape-DH5srBP0.js";
-import { g as A, a as k, b as C, c as N, e as R } from "./axis_util-BzbKo31C.js";
-import { t as K, m as W } from "./shared-wS99K7_n.js";
-import { c as _ } from "./backend_util-TE7aTPhZ.js";
-import { f as y } from "./gpgpu_math-DGNLNL4I.js";
-import { g as G, b as L } from "./kernel_funcs_utils-BYKWV8Aa.js";
+import { aq as T, ag as E, p as O, j as V, aB as B, a1 as F, ah as j, aC as K } from "./index-DdmHGZjq.js";
+import { r as $ } from "./Reshape-Bh_jzKzV.js";
+import { g as A, a as C, b as k, c as N, e as R } from "./axis_util-Did9235A.js";
+import { t as U, m as W } from "./shared-CZiWmQCI.js";
+import { c as _ } from "./backend_util-yC3YH1jo.js";
+import { f as y } from "./gpgpu_math-D_ODOLix.js";
+import { g as G, b as L } from "./kernel_funcs_utils-CDfFpUab.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -381,7 +381,7 @@ function Q(a, s, e, t) {
   let i = r;
   const c = A(i, l), o = c != null;
   let u = a;
-  o && (u = D(a, c, t), i = k(i.length, l)), C("sum", i, l);
+  o && (u = D(a, c, t), i = C(i.length, l)), k("sum", i, l);
   const [p, h] = N(u.shape, i);
   let d = p;
   e && (d = R(p, r));
@@ -459,15 +459,15 @@ function te(a) {
       const I = e.texData.get(d.dataId).values, m = new Array(i);
       for (let v = 0; v < m.length; v++)
         m[v] = n.shape[u[v]];
-      const z = K(I, n.shape, n.dtype, u, m);
+      const z = U(I, n.shape, n.dtype, u, m);
       d = e.makeTensorInfo(m, n.dtype);
       const M = e.texData.get(d.dataId);
       M.values = z;
     } else
       d = D(n, u, e);
-    o = k(o.length, i);
+    o = C(o.length, i);
   }
-  C("max", o, i);
+  k("max", o, i);
   const [f, S] = N(d.shape, o);
   let g = f;
   r && (g = R(f, c));
@@ -482,7 +482,7 @@ function te(a) {
   return p && e.disposeIntermediateTensorInfo(d), x;
 }
 const he = {
-  kernelName: U,
+  kernelName: j,
   backendName: "webgl",
   kernelFunc: te
 };
@@ -525,7 +525,7 @@ return a / b;`, se = `
   return result;
 `, ne = L({ opSnippet: ae, packedOpSnippet: se, checkOutOfBounds: !0 }), fe = {
-  kernelName: j,
+  kernelName: K,
   backendName: "webgl",
   kernelFunc: ne
 };

package/dist/{Reshape-DvudQDvJ.js → Reshape-41YpQqEo.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { j as h, a3 as d, l as c, K as m } from "./index-BoWRt-10.js";
+import { j as h, a5 as d, n as c, V as m } from "./index-DdmHGZjq.js";
 /**
  * @license
  * Copyright 2021 Google LLC. All Rights Reserved.

package/dist/{Reshape-DH5srBP0.js → Reshape-Bh_jzKzV.js} RENAMED Viewed

@@ -1,5 +1,5 @@
-import { j as c, a3 as C, l as f, K as R } from "./index-BoWRt-10.js";
-import { u as g, g as I, a as x, b as F, c as $, d as u, e as l, i as m } from "./gpgpu_math-DGNLNL4I.js";
+import { j as c, a5 as C, n as f, V as R } from "./index-DdmHGZjq.js";
+import { u as g, g as I, a as x, b as F, c as $, d as u, e as m, i as l } from "./gpgpu_math-D_ODOLix.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -82,14 +82,14 @@ function v(s, t) {
 function b(s, t, i) {
   const a = [
     u(s.shape),
-    ...l(s.shape)
+    ...m(s.shape)
   ], e = {
     dtype: s.dtype,
     shape: a,
     dataId: s.dataId
   }, o = [
     u(t),
-    ...l(t)
+    ...m(t)
   ], r = new S(o, a), p = !0, n = [a], h = i.runWebGLProgram(r, [e], s.dtype, n, p);
   return { dataId: h.dataId, shape: t, dtype: h.dtype };
 }
@@ -113,7 +113,7 @@ function y(s) {
   const { inputs: t, backend: i, attrs: a } = s, { x: e } = t, { shape: o } = a, r = i, p = c(e.shape), n = C(o, p), h = c(n);
   f(p === h, () => `The new shape (${n}) has ${h} elements and the old shape (${e.shape}) has ${p} elements. The new shape and old shape must have the same number of elements.`);
   const d = r.texData.get(e.dataId);
-  return d.isPacked && !m(e.shape, n) && !(d.texture !== null && m(d.shape, n)) ? b(e, n, r) : (r.incRef(e.dataId), { dataId: e.dataId, shape: n, dtype: e.dtype });
+  return d.isPacked && !l(e.shape, n) && !(d.texture !== null && l(d.shape, n)) ? b(e, n, r) : (r.incRef(e.dataId), { dataId: e.dataId, shape: n, dtype: e.dtype });
 }
 const U = {
   kernelName: R,

package/dist/TeachableLLM.d.ts CHANGED Viewed

@@ -1,11 +1,11 @@
-import { GPTConfig } from './config';
+import { GPTConfig } from './models/config';
 import { ITokeniser } from './tokeniser/type';
-import { default as NanoGPT, TrainingLogEntry } from './NanoGPTModel';
-import { SaveOptions } from './utilities/save';
+import { SaveOptions } from './loader/save';
 import { default as Generator, IGenerateOptions } from './Generator';
 import { default as Trainer, ITrainerOptions } from './Trainer';
 import { default as MemoryProfiler } from './utilities/profile';
-import { TrainingProgress } from './training/Trainer';
+import { TrainingLogEntry, TrainingProgress } from './training/Trainer';
+import { default as Model, ModelForwardAttributes } from './models/model';
 type TeachableLLMStatus = 'warmup' | 'awaitingTokens' | 'ready' | 'training' | 'loading' | 'busy' | 'error';
 interface TeachableLLMMeta {
     name?: string;
@@ -20,12 +20,12 @@ export default class TeachableLLM {
     private _status;
     private _memoryRequirements?;
     meta: TeachableLLMMeta;
-    constructor(tokeniser?: ITokeniser, model?: NanoGPT);
+    constructor(tokeniser?: ITokeniser, model?: Model<ModelForwardAttributes>);
     get vocab(): string[];
     /** Model is fully loaded */
     get loaded(): boolean;
     get config(): GPTConfig;
-    get model(): NanoGPT;
+    get model(): Model<ModelForwardAttributes>;
     get tokeniser(): ITokeniser;
     get status(): TeachableLLMStatus;
     /** Model is both ready and not busy */

package/dist/TeachableLLM.js CHANGED Viewed

@@ -1,30 +1,21 @@
-import { defaultConfig as _ } from "./config.js";
-import f from "./NanoGPTModel.js";
-import { saveModel as d } from "./utilities/save.js";
-import { loadModel as l } from "./loader/load.js";
+import { defaultConfig as d } from "./models/config.js";
+import { saveModel as l } from "./loader/save.js";
+import { loadModel as _ } from "./loader/load.js";
 import u from "./Generator.js";
-import p from "./Trainer.js";
-import { E as c } from "./index-Dwqa6Zy2.js";
+import f from "./Trainer.js";
+import { E as p } from "./index-Dwqa6Zy2.js";
 import { dummyPassTrainAsync as m } from "./utilities/dummy.js";
-import g from "./tokeniser/CharTokeniser.js";
-import k from "./tokeniser/bpe.js";
-import "./papaparse.min-C8l2Kvo1.js";
-import "./index-Tf7vU29b.js";
-import "./jszip.min-CjP2V1VV.js";
-import "./index-BoWRt-10.js";
-import "./ops/cpu/scatterSub.js";
-import "./ops/webgl/scatterSub.js";
-import "./ops/cpu/gatherSub.js";
-import "./ops/webgl/gatherSub.js";
+import "./index-DdmHGZjq.js";
 import "./ops/cpu/attentionMask.js";
 import "./ops/webgl/attentionMask.js";
 import "./ops/grads/attentionMask.js";
 import "./ops/cpu/qkv.js";
 import "./ops/webgl/qkv.js";
 import "./ops/grads/qkv.js";
-import "./random_width-sZORGo5k.js";
-import "./register_all_kernels-BwDSRN-f.js";
-import "./dataset-CtdBYwjo.js";
+import "./random_width-DKGeiFuR.js";
+import "./register_all_kernels-Do9VvZmo.js";
+import "./index-Tf7vU29b.js";
+import "./dataset-DPPl-iLT.js";
 import "./ops/cpu/rope.js";
 import "./ops/webgl/rope.js";
 import "./ops/grads/rope.js";
@@ -36,20 +27,31 @@ import "./ops/grads/fusedSoftmax.js";
 import "./ops/cpu/matMulGelu.js";
 import "./ops/webgl/matMulGelu.js";
 import "./ops/grads/matMulGelu.js";
-import "./ops/cpu/gelu.js";
-import "./ops/webgl/gelu.js";
-import "./gelu-C-dPj6Ku.js";
 import "./ops/cpu/normRMS.js";
 import "./ops/webgl/normRMS.js";
 import "./ops/grads/normRMS.js";
+import "./ops/cpu/gatherSub.js";
+import "./ops/webgl/gatherSub.js";
+import "./ops/cpu/scatterSub.js";
+import "./ops/webgl/scatterSub.js";
+import c from "./tokeniser/CharTokeniser.js";
+import g from "./tokeniser/bpe.js";
+import "./papaparse.min-C8l2Kvo1.js";
+import "./jszip.min-CjP2V1VV.js";
+import "./ops/cpu/gelu.js";
+import "./ops/webgl/gelu.js";
+import "./gelu-BkcmEEyD.js";
 import "./ops/webgl/log.js";
 import "./ops/cpu/adamMoments.js";
 import "./ops/webgl/adamMoments.js";
 import "./ops/cpu/adamAdjust.js";
 import "./ops/webgl/adamAdjust.js";
-import w from "./utilities/profile.js";
+import "./checks/normRMS.js";
+import "./checks/normRMSGrad.js";
+import k from "./utilities/profile.js";
+import w from "./models/factory.js";
 class a {
-  ee = new c();
+  ee = new p();
   _config;
   _model;
   _tokeniser;
@@ -69,7 +71,7 @@ class a {
   get config() {
     if (!this._config)
       throw new Error("configuration_not_initialized.");
-    return this._config.gpt;
+    return this._config;
   }
   get model() {
     if (!this._model)
@@ -101,14 +103,14 @@ class a {
   saveModel(t) {
     if (!this._model || !this._tokeniser)
       throw new Error("model_or_tokeniser_not_initialized.");
-    return d(this._model, this._tokeniser, {
+    return l(this._model, this._tokeniser, {
       ...t,
       name: t?.name || this.meta.name
     });
   }
   static loadModel(t) {
     const e = new a();
-    return l(t).then(({ model: r, tokeniser: o, name: s }) => {
+    return _(t).then(({ model: r, tokeniser: o, name: s }) => {
       e._model = r, e._tokeniser = o, e._config = r.config, s && (e.meta.name = s), e.setStatus("warmup"), m(r).then((i) => {
         e._memoryRequirements = i, e.setStatus("ready"), e.ee.emit("loaded");
       }).catch((i) => {
@@ -119,7 +121,7 @@ class a {
     }), e;
   }
   static create(t, e = {}) {
-    const r = { ..._, ...e }, o = t === "char" ? new g(r.vocabSize) : new k(r.vocabSize), s = new f(r), i = new a(o, s);
+    const r = { ...d, ...e }, o = t === "char" ? new c(r.vocabSize) : new g(r.vocabSize), s = w(r), i = new a(o, s);
     return i.setStatus("warmup"), m(s).then((n) => {
       i._memoryRequirements = n, i.tokeniser.trained ? (i.setStatus("ready"), i.ee.emit("loaded")) : (i.setStatus("awaitingTokens"), i.ee.emit("loaded"), i.tokeniser.once("trainStatus", (h) => {
         h === "trained" && i.setStatus("ready");
@@ -138,9 +140,9 @@ class a {
     if (t) {
       if (!this._config)
         return;
-      this._config.layerConfig.profiler || (this._config.layerConfig.profiler = new w());
+      this.model.getProfiler() || this.model.setProfiler(new k());
     } else
-      this._config?.layerConfig.profiler && (this._config.layerConfig.profiler = void 0);
+      this.model.getProfiler() && this.model.setProfiler(null);
   }
   getNumParams() {
     return this._model ? this._model.getNumParams() : 0;
@@ -148,7 +150,7 @@ class a {
   trainer() {
     if (!this._model || !this._tokeniser)
       throw new Error("model_or_tokeniser_not_initialized.");
-    const t = new p(this._model, this._tokeniser);
+    const t = new f(this._model, this._tokeniser);
     return t.on("start", () => this.setStatus("training")), t.on("stop", () => this.setStatus("ready")), t.on("log", async (e, r) => {
       const o = this.ee.listeners("trainStep");
       for (const s of o)

package/dist/Trainer.d.ts CHANGED Viewed

@@ -1,6 +1,7 @@
-import { default as NanoGPT } from './NanoGPTModel';
 import { ITokeniser } from './tokeniser/type';
 import { default as EE } from 'eventemitter3';
+import { TrainingLogEntry, TrainingProgress } from './training/Trainer';
+import { default as Model, ModelForwardAttributes } from './models/model';
 export interface ITrainerOptions {
     batchSize?: number;
     learningRate?: number;
@@ -10,6 +11,11 @@ export interface ITrainerOptions {
     prompt?: string;
     validationSplit?: number;
     advancedMetrics?: boolean;
+    gradientCheckpointing?: boolean;
+}
+interface ExtendedTrainingProgress extends TrainingProgress {
+    progress: number;
+    remaining: number;
 }
 export default class Trainer extends EE<'start' | 'stop' | 'log'> {
     private trainer;
@@ -17,10 +23,15 @@ export default class Trainer extends EE<'start' | 'stop' | 'log'> {
     private trainDataset?;
     private validationDataset?;
     private totalSamples;
-    constructor(model: NanoGPT, tokeniser: ITokeniser);
+    private log;
+    private progress;
+    constructor(model: Model<ModelForwardAttributes>, tokeniser: ITokeniser);
     stop(): void;
     reset(): void;
     prepare(text: string[], options?: ITrainerOptions): Promise<void>;
     train(options?: ITrainerOptions): Promise<void>;
     step(options?: ITrainerOptions): Promise<void>;
+    getLog(): TrainingLogEntry[];
+    getProgress(): ExtendedTrainingProgress | null;
 }
+export {};