npm - @genai-fi/nanogpt - Versions diffs - 0.5.0 → 0.5.2 - Mend

@genai-fi/nanogpt 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (104) hide show

package/dist/Generator.js +95 -46
package/dist/NanoGPTModel.d.ts +3 -2
package/dist/NanoGPTModel.js +91 -76
package/dist/{Reshape-BE5rA4rT.js → Reshape-Bt_t7RNz.js} +4 -4
package/dist/TeachableLLM.js +1 -1
package/dist/TiedEmbedding-DORsPlNL.js +44 -0
package/dist/{axis_util-97KkkyRQ.js → axis_util-CVbf1vmL.js} +3 -3
package/dist/{broadcast_to-CMlkG8NS.js → broadcast_to-BBoMQXbL.js} +4 -4
package/dist/{concat-Cxbo2sOz.js → concat-BRRtq4S2.js} +1 -1
package/dist/dataset-ZHEPJmED.js +1226 -0
package/dist/{dropout-kbDY39Ci.js → dropout-lQm_YyX3.js} +1 -1
package/dist/{gather-Bxe1Qip8.js → gather-BWyutxwi.js} +3 -3
package/dist/{gpgpu_math-C0zyxKFi.js → gpgpu_math-Df7gzJWH.js} +1 -1
package/dist/{index-iNhkcAEQ.js → index-CnHyhpKc.js} +32 -32
package/dist/{kernel_funcs_utils-C4eIk4fE.js → kernel_funcs_utils-Dqo82NH4.js} +25 -25
package/dist/layers/BaseLayer.js +114 -3
package/dist/layers/CausalSelfAttention.d.ts +2 -3
package/dist/layers/CausalSelfAttention.js +31 -30
package/dist/layers/MLP.js +10 -9
package/dist/layers/RMSNorm.js +12 -11
package/dist/layers/RoPECache.js +3 -3
package/dist/layers/TiedEmbedding.js +8 -6
package/dist/layers/TransformerBlock.js +2 -2
package/dist/{log_sum_exp-CkumwesB.js → log_sum_exp-CRH7Np9v.js} +12 -12
package/dist/main.js +1 -1
package/dist/{mat_mul-D0SifYfJ.js → mat_mul-DeGU1U_C.js} +3 -3
package/dist/{max-CYaAjEEp.js → max-CcnEArWK.js} +3 -3
package/dist/{moments-B06NlR_V.js → moments-DLTE6-1p.js} +4 -4
package/dist/{norm-D3676xIo.js → norm-BpWsOapl.js} +5 -5
package/dist/{ones-BIeFnPHR.js → ones-CDWGzVnm.js} +6 -6
package/dist/ops/appendCache.js +3 -3
package/dist/ops/attentionMask.js +1 -1
package/dist/ops/cpu/appendCache.js +2 -2
package/dist/ops/cpu/attentionMask.js +5 -5
package/dist/ops/cpu/fusedSoftmax.js +2 -2
package/dist/ops/cpu/gatherSub.js +5 -5
package/dist/ops/cpu/gelu.js +1 -1
package/dist/ops/cpu/matMulGelu.js +1 -1
package/dist/ops/cpu/matMulMul.js +1 -1
package/dist/ops/cpu/mulDropout.js +1 -1
package/dist/ops/cpu/normRMS.js +1 -1
package/dist/ops/cpu/qkv.js +3 -3
package/dist/ops/cpu/rope.js +5 -5
package/dist/ops/cpu/scatterSub.js +27 -27
package/dist/ops/fusedSoftmax.js +1 -1
package/dist/ops/gatherSub.js +1 -1
package/dist/ops/gelu.js +1 -1
package/dist/ops/grads/attentionMask.js +1 -1
package/dist/ops/grads/fusedSoftmax.js +2 -2
package/dist/ops/grads/gelu.js +1 -1
package/dist/ops/grads/matMulGelu.js +1 -1
package/dist/ops/grads/normRMS.js +1 -1
package/dist/ops/grads/qkv.js +1 -1
package/dist/ops/grads/rope.js +1 -1
package/dist/ops/matMulGelu.js +1 -1
package/dist/ops/matMulMul.js +1 -1
package/dist/ops/mulDrop.js +1 -1
package/dist/ops/node/sparseCrossEntropy.js +1 -1
package/dist/ops/normRMS.js +1 -1
package/dist/ops/qkv.js +1 -1
package/dist/ops/scatterSub.js +1 -1
package/dist/ops/webgl/appendCache.js +1 -1
package/dist/ops/webgl/attentionMask.js +1 -1
package/dist/ops/webgl/fusedSoftmax.js +36 -36
package/dist/ops/webgl/gatherSub.js +1 -1
package/dist/ops/webgl/gelu.js +2 -2
package/dist/ops/webgl/matMulGelu.js +22 -22
package/dist/ops/webgl/matMulMul.js +1 -1
package/dist/ops/webgl/mulDropout.js +1 -1
package/dist/ops/webgl/normRMS.js +2 -2
package/dist/ops/webgl/qkv.js +1 -1
package/dist/ops/webgl/rope.js +1 -1
package/dist/ops/webgl/scatterSub.js +1 -1
package/dist/{ops-ObfXLHYQ.js → ops-DzQTmLIl.js} +60 -60
package/dist/{TiedEmbedding-DsDRvLB0.js → random_width-DI2h9CMs.js} +1215 -1250
package/dist/{range-BsFU-SNG.js → range-CkOJ7090.js} +1 -1
package/dist/{reshape-DxTPgnwL.js → reshape-CTIbqjwm.js} +1 -1
package/dist/{sin-BOX-JVAj.js → sin-HzioENy_.js} +5 -5
package/dist/{slice_util-D-kaD4ZV.js → slice_util-n4wHKmex.js} +1 -1
package/dist/{softmax-BjsptB07.js → softmax-DX6qXAbm.js} +2 -2
package/dist/{split-BCbrzthj.js → split-CVwhL8Oe.js} +3 -3
package/dist/{stack--cqr9Dgc.js → stack-S2-D2JAQ.js} +1 -1
package/dist/{sum-B_92TaHD.js → sum-UdfvaNhB.js} +4 -4
package/dist/{tensor-CfiPXsW4.js → tensor-IZex6Bwp.js} +1 -1
package/dist/{tensor2d-tSxWdFMH.js → tensor2d-CqtBzOKq.js} +1 -1
package/dist/{tfjs_backend-NucKez4s.js → tfjs_backend-DX9yVvwk.js} +41 -41
package/dist/tokeniser/CharTokeniser.js +27 -27
package/dist/tokeniser/bpe.d.ts +1 -0
package/dist/tokeniser/bpe.js +38 -35
package/dist/training/AdamExt.js +1 -1
package/dist/training/DatasetBuilder.js +22 -1242
package/dist/training/FullTrainer.js +1 -1
package/dist/training/Trainer.js +5 -5
package/dist/training/sparseCrossEntropy.js +4 -4
package/dist/utilities/dummy.js +2 -2
package/dist/utilities/generate.js +3 -3
package/dist/utilities/load.js +1 -1
package/dist/utilities/profile.js +1 -1
package/dist/utilities/save.js +5 -5
package/dist/utilities/weights.js +2 -2
package/dist/variable-BGvK-VN3.js +23 -0
package/dist/{zeros-NMYTayy7.js → zeros-CYMicyqz.js} +3 -3
package/package.json +1 -1
package/dist/BaseLayer-BhrMN8JO.js +0 -135

package/dist/Generator.js CHANGED Viewed

@@ -1,72 +1,121 @@
 import { E as u } from "./index-Dwqa6Zy2.js";
-import "./index-iNhkcAEQ.js";
-import { t as d } from "./tensor2d-tSxWdFMH.js";
-import { c as p } from "./concat-Cxbo2sOz.js";
-class w extends u {
-  constructor(i, t) {
-    super(), this.model = i, this.tokeniser = t;
+import "./index-CnHyhpKc.js";
+import "./ops/cpu/attentionMask.js";
+import "./ops/webgl/attentionMask.js";
+import "./ops/grads/attentionMask.js";
+import "./ops/cpu/qkv.js";
+import "./ops/webgl/qkv.js";
+import "./ops/grads/qkv.js";
+import "@tensorflow/tfjs";
+import "./ops/cpu/rope.js";
+import "./ops/webgl/rope.js";
+import "./ops/grads/rope.js";
+import "./ops/cpu/appendCache.js";
+import "./ops/webgl/appendCache.js";
+import "./ops/cpu/fusedSoftmax.js";
+import "./ops/webgl/fusedSoftmax.js";
+import "./ops/grads/fusedSoftmax.js";
+import "./ops/cpu/matMulGelu.js";
+import "./ops/webgl/matMulGelu.js";
+import "./ops/grads/matMulGelu.js";
+import "./ops/cpu/normRMS.js";
+import "./ops/webgl/normRMS.js";
+import "./ops/grads/normRMS.js";
+import "./random_width-DI2h9CMs.js";
+import "./ops/cpu/gatherSub.js";
+import "./ops/webgl/gatherSub.js";
+import "./ops/cpu/scatterSub.js";
+import "./ops/webgl/scatterSub.js";
+import "./jszip.min-CjP2V1VV.js";
+import f from "./tokeniser/CharTokeniser.js";
+import "./dataset-ZHEPJmED.js";
+import "./index-Tf7vU29b.js";
+import "./papaparse.min-C8l2Kvo1.js";
+import "./ops/cpu/gelu.js";
+import "./ops/webgl/gelu.js";
+import "./ops/grads/gelu.js";
+import { t as d } from "./tensor2d-CqtBzOKq.js";
+import { c as g } from "./concat-BRRtq4S2.js";
+const k = [
+  ...Array.from({ length: 95 }, (a, t) => String.fromCharCode(t + 32)),
+  // ASCII
+  // Spanish accented letters and punctuation
+  ..."áéíóúüñ¿¡",
+  // Finnish accented letters
+  ..."äöÄÖÅå",
+  // Greek letters
+  ..."αβγδεζηθικλμνξοπρστυφχψωΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ",
+  // Cyrillic letters
+  ..."абвгдеёжзийклмнопрстуфхцчшщъыьэюяАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ"
+];
+function T(a, t) {
+  return a.length === t ? a : a.length > t ? a.slice(0, t) : a.concat(Array(t - a.length).fill(""));
+}
+class ot extends u {
+  constructor(t, o) {
+    super(), this.model = t, this.tokeniser = o;
   }
   active = !1;
-  async tokenisePrompt(i) {
-    const t = i ? await this.tokeniser.tokenise([i], !0) : [[this.tokeniser.eosToken]];
-    return d(t, [1, t[0].length], "int32");
+  async tokenisePrompt(t, o) {
+    const r = o ? await t.tokenise([o], !0) : [[t.eosToken]];
+    return d(r, [1, r[0].length], "int32");
   }
-  async generateNoCache(i, t) {
-    let s = await this.tokenisePrompt(i), o = i || "";
-    const n = t?.maxLength ?? 1e3;
-    for (let a = 0; a < n && this.active; a++) {
+  async generateNoCache(t, o, r) {
+    let i = await this.tokenisePrompt(t, o), s = o || "";
+    const n = r?.maxLength ?? 1e3;
+    for (let m = 0; m < n && this.active; m++) {
       const {
         output: e,
-        attention: c,
-        probabilities: l
-      } = this.model.generate(s, void 0, t), h = s;
-      s = p([s, e], 1), h.dispose();
-      const r = await this.processResponse(e, c, l);
-      if (e.dispose(), r === null)
+        attention: p,
+        probabilities: c
+      } = this.model.generate(i, void 0, r), h = i;
+      i = g([i, e], 1), h.dispose();
+      const l = await this.processResponse(t, e, p, c);
+      if (e.dispose(), l === null)
         break;
-      o += r;
+      s += l;
     }
-    return s.dispose(), o;
+    return i.dispose(), s;
   }
-  async processResponse(i, t, s) {
-    const o = (await i.array())[0][0];
-    if (o === this.tokeniser.eosToken)
+  async processResponse(t, o, r, i) {
+    const s = (await o.array())[0][0];
+    if (s === this.tokeniser.eosToken)
       return null;
-    const n = await this.tokeniser.decode([o]);
-    let a;
-    t && (a = await t.array(), t.dispose());
+    const n = await t.decode([s]);
+    let m;
+    r && (m = await Promise.all(r.map((p) => p.array().then((c) => c))), r.forEach((p) => p.dispose()));
     let e;
-    return s && (e = await s.array(), s.dispose()), this.emit("tokens", [o], n, a, e), n;
+    return i && (e = await i.array(), i.dispose()), this.emit("tokens", [s], n, m, e), n;
   }
-  async generateCache(i, t) {
-    let s = await this.tokenisePrompt(i), o = i || "";
+  async generateCache(t, o, r) {
+    let i = await this.tokenisePrompt(t, o), s = o || "";
     const n = new Array(this.model.config.gpt.nLayer);
     for (let e = 0; e < this.model.config.gpt.nLayer; e++)
       n[e] = { k: void 0, v: void 0, length: 0, cumulativeLength: 0 };
-    const a = t?.maxLength ?? 1e3;
-    for (let e = 0; e < a && this.active; e++) {
+    const m = r?.maxLength ?? 1e3;
+    for (let e = 0; e < m && this.active; e++) {
       const {
-        output: c,
-        attention: l,
-        probabilities: h
-      } = this.model.generate(s, n, {
-        ...t,
+        output: p,
+        probabilities: c,
+        attention: h
+      } = this.model.generate(i, n, {
+        ...r,
         usePadding: !1
       });
-      s.dispose(), s = c;
-      const r = await this.processResponse(c, l, h);
-      if (r === null)
+      i.dispose(), i = p;
+      const l = await this.processResponse(t, p, h, c);
+      if (l === null)
         break;
-      o += r;
+      s += l;
     }
     return n.forEach((e) => {
       e && (e.k && e.k.dispose(), e.v && e.v.dispose());
-    }), s.dispose(), o;
+    }), i.dispose(), s;
   }
-  async generate(i, t) {
-    const s = i && i.length > this.model.config.gpt.blockSize ? i.slice(-this.model.config.gpt.blockSize) : i;
+  async generate(t, o) {
+    const r = t && t.length > this.model.config.gpt.blockSize ? t.slice(-this.model.config.gpt.blockSize) : t;
     this.active = !0, this.emit("start");
-    const n = await (this.model.config.gpt.useRope && !t?.noCache ? this.generateCache(s, t) : this.generateNoCache(s, t));
+    const i = this.tokeniser.trained ? this.tokeniser : new f(T(k, this.tokeniser.vocabSize)), n = await (this.model.config.gpt.useRope && !o?.noCache ? this.generateCache(i, r, o) : this.generateNoCache(i, r, o));
     return this.active = !1, this.emit("stop"), n;
   }
   stop() {
@@ -74,5 +123,5 @@ class w extends u {
   }
 }
 export {
-  w as default
+  ot as default
 };

package/dist/NanoGPTModel.d.ts CHANGED Viewed

@@ -13,8 +13,9 @@ export interface TrainingLogEntry {
 export interface GenerateOptions {
     temperature?: number;
     topK?: number;
+    topP?: number;
     usePadding?: boolean;
-    attentionScores?: AttentionScores;
+    attentionScores?: boolean;
     includeProbabilities?: boolean;
 }
 export interface ModelForwardAttributes extends ForwardAttributes {
@@ -41,8 +42,8 @@ export default class NanoGPT extends BaseLayer<ModelForwardAttributes> {
     forward(attrs: ModelForwardAttributes, idx: Tensor, targets?: Tensor): Tensor[];
     generate(idx: Tensor, cache?: KVCache[], options?: GenerateOptions): {
         output: Tensor;
-        attention?: Tensor;
         probabilities?: Tensor;
+        attention?: Tensor[];
     };
     getNumParams(): number;
     dispose(): void;

package/dist/NanoGPTModel.js CHANGED Viewed

@@ -1,16 +1,18 @@
-import { defaultConfig as L } from "./config.js";
-import q from "./layers/TransformerBlock.js";
-import { E as O, D as T, T as K, r as P, p as _ } from "./TiedEmbedding-DsDRvLB0.js";
-import F from "./layers/RoPECache.js";
-import D from "./layers/RMSNorm.js";
-import { estimateParameterCount as N } from "./utilities/parameters.js";
-import { createSoftmaxCrossEntropyWithGrad as R } from "./training/sparseCrossEntropy.js";
-import { B } from "./BaseLayer-BhrMN8JO.js";
-import { o as k, i as m, q as G, E as w, aa as A, ab as V, ac as j, t as b, a9 as W, f as y, F as H } from "./index-iNhkcAEQ.js";
-import { r as $ } from "./reshape-DxTPgnwL.js";
-import { r as J } from "./range-BsFU-SNG.js";
-import { g as Q } from "./gather-Bxe1Qip8.js";
-import { s as U } from "./softmax-BjsptB07.js";
+import { defaultConfig as F } from "./config.js";
+import O from "./layers/TransformerBlock.js";
+import { T as N, r as R } from "./TiedEmbedding-DORsPlNL.js";
+import A from "./layers/RoPECache.js";
+import G from "./layers/RMSNorm.js";
+import { estimateParameterCount as j } from "./utilities/parameters.js";
+import { createSoftmaxCrossEntropyWithGrad as B } from "./training/sparseCrossEntropy.js";
+import V from "./layers/BaseLayer.js";
+import { E as H, D as W, p as J } from "./random_width-DI2h9CMs.js";
+import { o as x, j as y, u as Q, E as I, aa as U, ab as X, ac as Y, t as z, a9 as Z, f as L, H as tt } from "./index-CnHyhpKc.js";
+import { r as T } from "./reshape-CTIbqjwm.js";
+import { r as et } from "./range-CkOJ7090.js";
+import { s as q } from "./softmax-DX6qXAbm.js";
+import { t as ot } from "./ops-DzQTmLIl.js";
+import { g as st } from "./gather-BWyutxwi.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -27,13 +29,13 @@ import { s as U } from "./softmax-BjsptB07.js";
  * limitations under the License.
  * =============================================================================
  */
-function X(h, t) {
-  let e = m(h, "a", "mod"), o = m(t, "b", "mod");
-  [e, o] = G(e, o);
+function nt(l, t) {
+  let e = y(l, "a", "mod"), o = y(t, "b", "mod");
+  [e, o] = Q(e, o);
   const n = { a: e, b: o };
-  return w.runKernel(A, n);
+  return I.runKernel(U, n);
 }
-const Y = /* @__PURE__ */ k({ mod_: X });
+const it = /* @__PURE__ */ x({ mod_: nt });
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -50,17 +52,17 @@ const Y = /* @__PURE__ */ k({ mod_: X });
  * limitations under the License.
  * =============================================================================
  */
-function Z(h, t, e, o = !1) {
-  const n = m(h, "logits", "multinomial"), s = n.size, i = n.rank;
+function rt(l, t, e, o = !1) {
+  const n = y(l, "logits", "multinomial"), s = n.size, i = n.rank;
   if (s < 2)
     throw new Error(`Error in multinomial: you need at least 2 outcomes, but got ${s}.`);
   if (i > 2)
     throw new Error(`Rank of probabilities must be 1 or 2, but is ${i}`);
   e = e || Math.random();
-  const a = { logits: i === 1 ? $(n, [1, -1]) : n }, c = { numSamples: t, seed: e, normalized: o }, l = w.runKernel(V, a, c);
-  return i === 1 ? $(l, [l.size]) : l;
+  const r = { logits: i === 1 ? T(n, [1, -1]) : n }, u = { numSamples: t, seed: e, normalized: o }, c = I.runKernel(X, r, u);
+  return i === 1 ? T(c, [c.size]) : c;
 }
-const z = /* @__PURE__ */ k({ multinomial_: Z });
+const C = /* @__PURE__ */ x({ multinomial_: rt });
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -77,8 +79,8 @@ const z = /* @__PURE__ */ k({ multinomial_: Z });
  * limitations under the License.
  * =============================================================================
  */
-function tt(h, t = 1, e = !0) {
-  const o = m(h, "x", "topk");
+function ct(l, t = 1, e = !0) {
+  const o = y(l, "x", "topk");
   if (o.rank === 0)
     throw new Error("topk() expects the input to be of rank 1 or higher");
   const n = o.shape[o.shape.length - 1];
@@ -86,10 +88,10 @@ function tt(h, t = 1, e = !0) {
     throw new Error(`'k' passed to topk() must be >= 0 but got ${t}`);
   if (t > n)
     throw new Error(`'k' passed to topk() must be <= the last dimension (${n}) but got ${t}`);
-  const s = { x: o }, i = { k: t, sorted: e }, [r, a] = w.runKernel(j, s, i);
-  return { values: r, indices: a };
+  const s = { x: o }, i = { k: t, sorted: e }, [p, r] = I.runKernel(Y, s, i);
+  return { values: p, indices: r };
 }
-const et = /* @__PURE__ */ k({ topk_: tt });
+const at = /* @__PURE__ */ x({ topk_: ct });
 /**
  * @license
  * Copyright 2018 Google LLC
@@ -99,13 +101,13 @@ const et = /* @__PURE__ */ k({ topk_: tt });
  * https://opensource.org/licenses/MIT.
  * =============================================================================
  */
-function ot(h) {
-  return new T(h);
+function lt(l) {
+  return new W(l);
 }
-function st(h) {
-  return new O(h);
+function pt(l) {
+  return new H(l);
 }
-class bt extends B {
+class xt extends V {
   wte;
   // Token embeddings
   wpe;
@@ -119,15 +121,15 @@ class bt extends B {
   log = [];
   // Training log
   constructor(t = {}) {
-    super({ gpt: { ...L, ...t }, layerConfig: {} }), this.wte = new K(this.config, "token_embedding", this), this.config.gpt.useRope === !1 ? this.wpe = st({
+    super({ gpt: { ...F, ...t }, layerConfig: {} }), this.wte = new N(this.config, "token_embedding", this), this.config.gpt.useRope === !1 ? this.wpe = pt({
       inputDim: this.config.gpt.blockSize,
       outputDim: this.config.gpt.nEmbed,
       name: "positional_embedding",
-      embeddingsInitializer: P({ mean: 0, stddev: 0.02 })
-    }) : (this.ropeCache = new F(this.config.gpt), this.config.layerConfig.ropeCache = this.ropeCache), this.drop = ot({ rate: this.config.gpt.dropout }), this.blocks = [];
+      embeddingsInitializer: R({ mean: 0, stddev: 0.02 })
+    }) : (this.ropeCache = new A(this.config.gpt), this.config.layerConfig.ropeCache = this.ropeCache), this.drop = lt({ rate: this.config.gpt.dropout }), this.blocks = [];
     for (let e = 0; e < this.config.gpt.nLayer; e++)
-      this.blocks.push(new q(e, this.config, this));
-    this.lnF = new D(this.config, "final_rms_norm", this);
+      this.blocks.push(new O(e, this.config, this));
+    this.lnF = new G(this.config, "final_rms_norm", this);
   }
   get checkpointing() {
     return this.config.layerConfig.checkpointing === !0;
@@ -136,11 +138,11 @@ class bt extends B {
     this.config.layerConfig.checkpointing = t;
   }
   inputPhase(t, e, o = !1) {
-    return b(() => {
+    return z(() => {
       const n = this.wte.embed(t);
       if (this.config.gpt.useRope === !1) {
-        const [, s] = t.shape, i = this.config.gpt.blockSize, r = J(0, s, 1, "int32"), a = Y(W(r, y(e, "int32")), y(i, "int32")), c = this.wpe.apply(a), l = n.add(c);
-        return this.drop.apply(l, { training: o });
+        const [, s] = t.shape, i = this.config.gpt.blockSize, p = et(0, s, 1, "int32"), r = it(Z(p, L(e, "int32")), L(i, "int32")), u = this.wpe.apply(r), c = n.add(u);
+        return this.drop.apply(c, { training: o });
       } else
         return this.drop.apply(n, { training: o });
     });
@@ -167,7 +169,7 @@ class bt extends B {
   }
   calculateLoss(t, e) {
     try {
-      return R()(t, e).mean();
+      return B()(t, e).mean();
     } catch (o) {
       throw console.error("Error computing loss:", o), new Error(`Loss computation failed: ${o}`);
     }
@@ -205,7 +207,7 @@ class bt extends B {
           });
       }*/
   forward(t, e, o) {
-    return this.validateInput(e), b(() => {
+    return this.validateInput(e), z(() => {
       this.startMemory();
       const n = t.cache?.[0]?.length ?? 0;
       let s = this.inputPhase(e, n, t.training);
@@ -213,59 +215,72 @@ class bt extends B {
         throw console.error("Cache", t.cache), new Error(
           `Cache length ${t.cache.length} does not match number of blocks ${this.blocks.length}`
         );
-      let i;
-      for (let c = 0; c < this.blocks.length; c++) {
-        const l = this.blocks[c], f = Math.random() * 1e9, p = {
+      for (let r = 0; r < this.blocks.length; r++) {
+        const u = this.blocks[r], c = Math.random() * 1e9, g = {
           training: t.training,
-          seed: f,
+          seed: c,
           attentionScores: t.attentionScores,
-          pastKV: t.cache ? t.cache[c] : void 0
-        }, u = this.config.layerConfig.checkpointing && t.training ? l.callCheckpoint(p, s) : l.call(p, s);
-        s.dispose(), s = u, p.attentionScores?.attentionOut && (i = p.attentionScores.attentionOut);
+          pastKV: t.cache ? t.cache[r] : void 0
+        }, S = this.config.layerConfig.checkpointing && t.training ? u.callCheckpoint(g, s) : u.call(g, s);
+        s.dispose(), s = S;
       }
       s = this.lnF.call(t, s);
-      const r = this.wte.project(s);
+      const i = this.wte.project(s);
       s.dispose();
-      let a;
-      return o && (a = this.calculateLoss(r, o)), this.endMemory("Forward"), t.attentionScores && (t.attentionScores.attentionOut = i ? H(i) : void 0), a ? [r, a] : [r];
+      let p;
+      return o && (p = this.calculateLoss(i, o)), this.endMemory("Forward"), p ? [i, p] : [i];
     });
   }
   generate(t, e, o) {
-    const n = o?.temperature ?? 1, s = o?.topK, i = o?.usePadding ?? !1;
-    return b(() => {
-      const r = t, a = r.shape[1], c = a <= this.config.gpt.blockSize ? r : r.slice(
-        [0, a - this.config.gpt.blockSize],
+    const n = o?.temperature ?? 1, s = o?.topK, i = o?.topP, p = o?.usePadding ?? !1;
+    return z(() => {
+      const r = t, u = r.shape[1], c = u <= this.config.gpt.blockSize ? r : r.slice(
+        [0, u - this.config.gpt.blockSize],
         [r.shape[0], this.config.gpt.blockSize]
-      ), l = i ? this.config.gpt.blockSize - c.shape[1] : 0, f = l > 0 ? _(c, [
+      ), g = p ? this.config.gpt.blockSize - c.shape[1] : 0, S = g > 0 ? J(c, [
         [0, 0],
-        [0, l]
-      ]) : c, p = {
+        [0, g]
+      ]) : c, f = {
         training: !1,
-        attentionScores: o?.attentionScores,
+        attentionScores: o?.attentionScores ? {
+          attentionOut: []
+        } : void 0,
         cache: e
-      }, [u] = this.forward(p, f), E = u.shape[1] - 1 - l, C = u.slice([0, E, 0], [u.shape[0], 1, u.shape[2]]), I = p.attentionScores?.attentionOut ? p.attentionScores.attentionOut.slice(
-        [0, E, 0],
-        [p.attentionScores.attentionOut.shape[0], 1, p.attentionScores.attentionOut.shape[2]]
-      ) : void 0;
-      u.dispose();
-      const d = C.div(n);
-      let g;
-      if (s) {
-        const { values: v, indices: M } = et(d, s), x = z(v.squeeze([1]), 1);
-        g = Q(M.squeeze([1]), x, 1);
+      }, [d] = this.forward(f, S), M = d.shape[1] - 1 - g, K = d.slice([0, M, 0], [d.shape[0], 1, d.shape[2]]);
+      f.attentionScores?.attentionOut && f.attentionScores.attentionOut.forEach((h, b) => {
+        h.shape[1] !== 1 && (f.attentionScores.attentionOut[b] = tt(
+          h.slice([0, M, 0], [h.shape[0], 1, h.shape[2]])
+        ), h.dispose());
+      }), d.dispose();
+      const w = K.div(n);
+      let m;
+      if (i) {
+        const h = q(w.squeeze([1])), b = h.arraySync()[0];
+        h.dispose();
+        const E = b.map((a, k) => ({ prob: a, index: k })).sort((a, k) => k.prob - a.prob);
+        let v = 0;
+        const $ = new Array(E.length).fill(0);
+        for (const a of E)
+          if (v += a.prob, $[a.index] = a.prob, v >= i)
+            break;
+        const _ = $.reduce((a, k) => a + k, 0), D = $.map((a) => a / _);
+        m = C(ot(D), 1, void 0, !0);
+      } else if (s) {
+        const { values: h, indices: b } = at(w, s), E = C(h.squeeze([1]), 1);
+        m = st(b.squeeze([1]), E, 1);
       } else
-        g = z(d.squeeze([1]), 1);
-      let S;
-      return o?.includeProbabilities && (S = U(d.squeeze([1]))), g = g.reshape([1, 1]), { output: g, attention: I?.squeeze([1]), probabilities: S };
+        m = C(w.squeeze([1]), 1);
+      let P;
+      return o?.includeProbabilities && (P = q(w.squeeze([1]))), m = m.reshape([1, 1]), { output: m, probabilities: P, attention: f.attentionScores?.attentionOut };
     });
   }
   getNumParams() {
-    return N(this.config.gpt);
+    return j(this.config.gpt);
   }
   dispose() {
     this.wte.dispose(), this.wpe && this.wpe.dispose(), this.drop.dispose(), this.blocks.forEach((t) => t.dispose()), this.lnF.dispose();
   }
 }
 export {
-  bt as default
+  xt as default
 };

package/dist/{Reshape-BE5rA4rT.js → Reshape-Bt_t7RNz.js} RENAMED Viewed

@@ -1,5 +1,5 @@
-import { ad as $, ae as g, p, af as C, k as x } from "./index-iNhkcAEQ.js";
-import { u as I } from "./gpgpu_math-C0zyxKFi.js";
+import { ad as $, ae as g, q as p, af as C, l as x } from "./index-CnHyhpKc.js";
+import { u as I } from "./gpgpu_math-Df7gzJWH.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -201,12 +201,12 @@ function D(t, e, o) {
  * limitations under the License.
  * =============================================================================
  */
-function U(t) {
+function k(t) {
   const { inputs: e, backend: o, attrs: s } = t, { x: n } = e, { shape: r } = s, i = o, u = p(n.shape), a = C(r, u), c = p(a);
   x(u === c, () => `The new shape (${a}) has ${c} elements and the old shape (${n.shape}) has ${u} elements. The new shape and old shape must have the same number of elements.`);
   const l = i.texData.get(n.dataId);
   return l.isPacked && !f(n.shape, a) && !(l.texture !== null && f(l.shape, a)) ? D(n, a, i) : (i.incRef(n.dataId), { dataId: n.dataId, shape: a, dtype: n.dtype });
 }
 export {
-  U as r
+  k as r
 };

package/dist/TeachableLLM.js CHANGED Viewed

@@ -11,7 +11,7 @@ import g from "./tokeniser/bpe.js";
 import "./papaparse.min-C8l2Kvo1.js";
 import "./index-Tf7vU29b.js";
 import "./jszip.min-CjP2V1VV.js";
-import "./index-iNhkcAEQ.js";
+import "./index-CnHyhpKc.js";
 import "./ops/cpu/scatterSub.js";
 import "./ops/webgl/scatterSub.js";
 import "./ops/cpu/gatherSub.js";

package/dist/TiedEmbedding-DORsPlNL.js ADDED Viewed

@@ -0,0 +1,44 @@
+import { R as a } from "./random_width-DI2h9CMs.js";
+import "./index-CnHyhpKc.js";
+import { d as s } from "./tfjs_backend-DX9yVvwk.js";
+import o from "./layers/BaseLayer.js";
+import { v as m } from "./variable-BGvK-VN3.js";
+import { g as d } from "./gather-BWyutxwi.js";
+/**
+ * @license
+ * Copyright 2018 Google LLC
+ *
+ * Use of this source code is governed by an MIT-style
+ * license that can be found in the LICENSE file or at
+ * https://opensource.org/licenses/MIT.
+ * =============================================================================
+ */
+function n(e) {
+  return new a(e);
+}
+class c extends o {
+  vocabSize;
+  embedDim;
+  initializer;
+  WEIGHTS;
+  constructor(t, i, r) {
+    super(t, r), this.WEIGHTS = i, this.vocabSize = t.gpt.vocabSize, this.embedDim = t.gpt.nEmbed, this.initializer = n({
+      mean: 0,
+      stddev: 0.02
+    }), this.addVariable(this.WEIGHTS, m(this.initializer.apply([this.vocabSize, this.embedDim]), !0));
+  }
+  embed(t) {
+    return d(this.getVariable(this.WEIGHTS), t, 0);
+  }
+  project(t) {
+    return s(t, this.getVariable(this.WEIGHTS).transpose());
+  }
+  // Dummy, should not be used.
+  forward(t, i) {
+    return this.project(i);
+  }
+}
+export {
+  c as T,
+  n as r
+};

package/dist/{axis_util-97KkkyRQ.js → axis_util-CVbf1vmL.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { k as c } from "./index-iNhkcAEQ.js";
+import { l as c } from "./index-CnHyhpKc.js";
 /**
  * @license
  * Copyright 2017 Google LLC. All Rights Reserved.
@@ -21,7 +21,7 @@ function i(e, n) {
       return !1;
   return !0;
 }
-function p(e, n, t) {
+function l(e, n, t) {
   const r = e.length + n.length, s = [];
   let o = 0, f = 0;
   for (let u = 0; u < r; u++)
@@ -37,7 +37,7 @@ function a(e, n) {
 }
 function m(e, n) {
   const t = n.map((r) => 1);
-  return p(e, t, n);
+  return l(e, t, n);
 }
 function d(e, n, t) {
   c(i(n, t), () => `${e} supports only inner-most axes for now. Got axes ${n} and rank-${t} input.`);

package/dist/{broadcast_to-CMlkG8NS.js → broadcast_to-BBoMQXbL.js} RENAMED Viewed

@@ -1,5 +1,5 @@
-import { o as h, i as f, l as p, x as g, E as u, T } from "./index-iNhkcAEQ.js";
-import { r as b } from "./reshape-DxTPgnwL.js";
+import { o as h, j as f, n as p, y as g, E as u, L as b } from "./index-CnHyhpKc.js";
+import { r as T } from "./reshape-CTIbqjwm.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -25,7 +25,7 @@ function m(e, r) {
     const t = n.shape.slice();
     for (; t.length < r.length; )
       t.unshift(1);
-    n = b(n, t);
+    n = T(n, t);
   }
   const s = n.shape, o = Array.from(r);
   for (let t = r.length - 1; t >= 0; t--)
@@ -36,7 +36,7 @@ function m(e, r) {
   if (o.map((t, l) => t > 1 ? l : -1).filter((t) => t >= 0).length === 0)
     return g(n);
   const i = { x: n }, c = { reps: o };
-  return u.runKernel(T, i, c);
+  return u.runKernel(b, i, c);
 }
 const E = /* @__PURE__ */ h({ broadcastTo_: m });
 export {

package/dist/{concat-Cxbo2sOz.js → concat-BRRtq4S2.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o as s, k as a, j as p, x as i, E as l, C as f } from "./index-iNhkcAEQ.js";
+import { o as s, l as a, k as p, y as i, E as l, C as f } from "./index-CnHyhpKc.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.