npm - @genai-fi/nanogpt - Versions diffs - 0.5.0 → 0.5.2 - Mend

@genai-fi/nanogpt 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (104) hide show

package/dist/Generator.js +95 -46
package/dist/NanoGPTModel.d.ts +3 -2
package/dist/NanoGPTModel.js +91 -76
package/dist/{Reshape-BE5rA4rT.js → Reshape-Bt_t7RNz.js} +4 -4
package/dist/TeachableLLM.js +1 -1
package/dist/TiedEmbedding-DORsPlNL.js +44 -0
package/dist/{axis_util-97KkkyRQ.js → axis_util-CVbf1vmL.js} +3 -3
package/dist/{broadcast_to-CMlkG8NS.js → broadcast_to-BBoMQXbL.js} +4 -4
package/dist/{concat-Cxbo2sOz.js → concat-BRRtq4S2.js} +1 -1
package/dist/dataset-ZHEPJmED.js +1226 -0
package/dist/{dropout-kbDY39Ci.js → dropout-lQm_YyX3.js} +1 -1
package/dist/{gather-Bxe1Qip8.js → gather-BWyutxwi.js} +3 -3
package/dist/{gpgpu_math-C0zyxKFi.js → gpgpu_math-Df7gzJWH.js} +1 -1
package/dist/{index-iNhkcAEQ.js → index-CnHyhpKc.js} +32 -32
package/dist/{kernel_funcs_utils-C4eIk4fE.js → kernel_funcs_utils-Dqo82NH4.js} +25 -25
package/dist/layers/BaseLayer.js +114 -3
package/dist/layers/CausalSelfAttention.d.ts +2 -3
package/dist/layers/CausalSelfAttention.js +31 -30
package/dist/layers/MLP.js +10 -9
package/dist/layers/RMSNorm.js +12 -11
package/dist/layers/RoPECache.js +3 -3
package/dist/layers/TiedEmbedding.js +8 -6
package/dist/layers/TransformerBlock.js +2 -2
package/dist/{log_sum_exp-CkumwesB.js → log_sum_exp-CRH7Np9v.js} +12 -12
package/dist/main.js +1 -1
package/dist/{mat_mul-D0SifYfJ.js → mat_mul-DeGU1U_C.js} +3 -3
package/dist/{max-CYaAjEEp.js → max-CcnEArWK.js} +3 -3
package/dist/{moments-B06NlR_V.js → moments-DLTE6-1p.js} +4 -4
package/dist/{norm-D3676xIo.js → norm-BpWsOapl.js} +5 -5
package/dist/{ones-BIeFnPHR.js → ones-CDWGzVnm.js} +6 -6
package/dist/ops/appendCache.js +3 -3
package/dist/ops/attentionMask.js +1 -1
package/dist/ops/cpu/appendCache.js +2 -2
package/dist/ops/cpu/attentionMask.js +5 -5
package/dist/ops/cpu/fusedSoftmax.js +2 -2
package/dist/ops/cpu/gatherSub.js +5 -5
package/dist/ops/cpu/gelu.js +1 -1
package/dist/ops/cpu/matMulGelu.js +1 -1
package/dist/ops/cpu/matMulMul.js +1 -1
package/dist/ops/cpu/mulDropout.js +1 -1
package/dist/ops/cpu/normRMS.js +1 -1
package/dist/ops/cpu/qkv.js +3 -3
package/dist/ops/cpu/rope.js +5 -5
package/dist/ops/cpu/scatterSub.js +27 -27
package/dist/ops/fusedSoftmax.js +1 -1
package/dist/ops/gatherSub.js +1 -1
package/dist/ops/gelu.js +1 -1
package/dist/ops/grads/attentionMask.js +1 -1
package/dist/ops/grads/fusedSoftmax.js +2 -2
package/dist/ops/grads/gelu.js +1 -1
package/dist/ops/grads/matMulGelu.js +1 -1
package/dist/ops/grads/normRMS.js +1 -1
package/dist/ops/grads/qkv.js +1 -1
package/dist/ops/grads/rope.js +1 -1
package/dist/ops/matMulGelu.js +1 -1
package/dist/ops/matMulMul.js +1 -1
package/dist/ops/mulDrop.js +1 -1
package/dist/ops/node/sparseCrossEntropy.js +1 -1
package/dist/ops/normRMS.js +1 -1
package/dist/ops/qkv.js +1 -1
package/dist/ops/scatterSub.js +1 -1
package/dist/ops/webgl/appendCache.js +1 -1
package/dist/ops/webgl/attentionMask.js +1 -1
package/dist/ops/webgl/fusedSoftmax.js +36 -36
package/dist/ops/webgl/gatherSub.js +1 -1
package/dist/ops/webgl/gelu.js +2 -2
package/dist/ops/webgl/matMulGelu.js +22 -22
package/dist/ops/webgl/matMulMul.js +1 -1
package/dist/ops/webgl/mulDropout.js +1 -1
package/dist/ops/webgl/normRMS.js +2 -2
package/dist/ops/webgl/qkv.js +1 -1
package/dist/ops/webgl/rope.js +1 -1
package/dist/ops/webgl/scatterSub.js +1 -1
package/dist/{ops-ObfXLHYQ.js → ops-DzQTmLIl.js} +60 -60
package/dist/{TiedEmbedding-DsDRvLB0.js → random_width-DI2h9CMs.js} +1215 -1250
package/dist/{range-BsFU-SNG.js → range-CkOJ7090.js} +1 -1
package/dist/{reshape-DxTPgnwL.js → reshape-CTIbqjwm.js} +1 -1
package/dist/{sin-BOX-JVAj.js → sin-HzioENy_.js} +5 -5
package/dist/{slice_util-D-kaD4ZV.js → slice_util-n4wHKmex.js} +1 -1
package/dist/{softmax-BjsptB07.js → softmax-DX6qXAbm.js} +2 -2
package/dist/{split-BCbrzthj.js → split-CVwhL8Oe.js} +3 -3
package/dist/{stack--cqr9Dgc.js → stack-S2-D2JAQ.js} +1 -1
package/dist/{sum-B_92TaHD.js → sum-UdfvaNhB.js} +4 -4
package/dist/{tensor-CfiPXsW4.js → tensor-IZex6Bwp.js} +1 -1
package/dist/{tensor2d-tSxWdFMH.js → tensor2d-CqtBzOKq.js} +1 -1
package/dist/{tfjs_backend-NucKez4s.js → tfjs_backend-DX9yVvwk.js} +41 -41
package/dist/tokeniser/CharTokeniser.js +27 -27
package/dist/tokeniser/bpe.d.ts +1 -0
package/dist/tokeniser/bpe.js +38 -35
package/dist/training/AdamExt.js +1 -1
package/dist/training/DatasetBuilder.js +22 -1242
package/dist/training/FullTrainer.js +1 -1
package/dist/training/Trainer.js +5 -5
package/dist/training/sparseCrossEntropy.js +4 -4
package/dist/utilities/dummy.js +2 -2
package/dist/utilities/generate.js +3 -3
package/dist/utilities/load.js +1 -1
package/dist/utilities/profile.js +1 -1
package/dist/utilities/save.js +5 -5
package/dist/utilities/weights.js +2 -2
package/dist/variable-BGvK-VN3.js +23 -0
package/dist/{zeros-NMYTayy7.js → zeros-CYMicyqz.js} +3 -3
package/package.json +1 -1
package/dist/BaseLayer-BhrMN8JO.js +0 -135

package/dist/{dropout-kbDY39Ci.js → dropout-lQm_YyX3.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o as l, i as h, E as m, ag as p, l as c, ah as d, ae as g, k as u, V, ai as v, a9 as N, b as w } from "./index-iNhkcAEQ.js";
+import { o as l, j as h, E as m, ak as p, n as c, al as d, ae as g, l as u, T as V, am as v, a9 as N, b as w } from "./index-CnHyhpKc.js";
 import { s as f } from "./index-C4L8Cm77.js";
 /**
  * @license

package/dist/{gather-Bxe1Qip8.js → gather-BWyutxwi.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o as g, i as t, E as h, G as p } from "./index-iNhkcAEQ.js";
+import { o as g, j as t, E as h, G as p } from "./index-CnHyhpKc.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -16,8 +16,8 @@ import { o as g, i as t, E as h, G as p } from "./index-iNhkcAEQ.js";
  * =============================================================================
  */
 function u(n, s, r = 0, e = 0) {
-  const o = t(n, "x", "gather"), a = t(s, "indices", "gather", "int32"), i = { x: o, indices: a }, c = { axis: r, batchDims: e };
-  return h.runKernel(p, i, c);
+  const o = t(n, "x", "gather"), a = t(s, "indices", "gather", "int32"), c = { x: o, indices: a }, i = { axis: r, batchDims: e };
+  return h.runKernel(p, c, i);
 }
 const d = /* @__PURE__ */ g({ gather_: u });
 export {

package/dist/{gpgpu_math-C0zyxKFi.js → gpgpu_math-Df7gzJWH.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { K as e } from "./index-iNhkcAEQ.js";
+import { N as e } from "./index-CnHyhpKc.js";
 /**
  * @license
  * Copyright 2017 Google LLC. All Rights Reserved.

package/dist/{index-iNhkcAEQ.js → index-CnHyhpKc.js} RENAMED Viewed

@@ -4005,26 +4005,26 @@ export {
   Ss as A,
   Zs as B,
   or as C,
-  Wa as D,
+  Ft as D,
   g as E,
-  Bn as F,
+  Wa as F,
   Pr as G,
-  Fs as H,
-  kn as I,
-  En as J,
-  k as K,
-  Lr as L,
+  Bn as H,
+  Fs as I,
+  kn as J,
+  En as K,
+  Qa as L,
   ta as M,
-  rs as N,
-  de as O,
+  k as N,
+  Lr as O,
   ba as P,
-  Ea as Q,
+  rs as Q,
   Ia as R,
   qa as S,
-  Qa as T,
-  Zt as U,
-  D as V,
-  To as W,
+  D as T,
+  de as U,
+  Ea as V,
+  Zt as W,
   De as X,
   ar as Y,
   ne as Z,
@@ -4074,13 +4074,13 @@ export {
   $t as ad,
   Rt as ae,
   Rs as af,
-  xr as ag,
-  Wn as ah,
-  x as ai,
-  F as aj,
-  pe as ak,
-  fo as al,
-  dt as am,
+  F as ag,
+  pe as ah,
+  fo as ai,
+  dt as aj,
+  xr as ak,
+  Wn as al,
+  x as am,
   jt as an,
   ue as ao,
   za as ap,
@@ -4214,22 +4214,22 @@ export {
   K as f,
   ss as g,
   lo as h,
-  T as i,
-  In as j,
-  y as k,
-  xt as l,
+  To as i,
+  T as j,
+  In as k,
+  y as l,
   po as m,
-  Ge as n,
+  xt as n,
   N as o,
-  z as p,
-  q,
+  Ge as p,
+  z as q,
   co as r,
   tt as s,
   E as t,
-  Ba as u,
+  q as u,
   ls as v,
-  Ka as w,
-  qn as x,
-  Ft as y,
+  Ba as w,
+  Ka as x,
+  qn as y,
   C as z
 };

package/dist/{kernel_funcs_utils-C4eIk4fE.js → kernel_funcs_utils-Dqo82NH4.js} RENAMED Viewed

@@ -1,5 +1,5 @@
-import { an as D, ao as N, N as w, p as R, O as v, K as P } from "./index-iNhkcAEQ.js";
-import { u as g } from "./gpgpu_math-C0zyxKFi.js";
+import { an as D, ao as N, Q as w, q as R, U as v, N as P } from "./index-CnHyhpKc.js";
+import { u as g } from "./gpgpu_math-Df7gzJWH.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -23,7 +23,7 @@ function B(t) {
     throw new Error(`Failed to decode encoded string bytes into utf-8, error: ${e}`);
   }
 }
-function K(t) {
+function H(t) {
   return t.map((e) => N(e));
 }
 /**
@@ -127,12 +127,12 @@ class C {
  * =============================================================================
  */
 class _ {
-  constructor(e, o, u, p = !1) {
+  constructor(e, o, u, d = !1) {
     this.variableNames = ["A", "B"], this.supportsBroadcasting = !0, this.packedInputs = !0, this.packedOutput = !0, this.outputShape = w(o, u);
     const a = this.outputShape.length;
     this.enableShapeUniforms = g(a);
     let n = "";
-    if (p)
+    if (d)
       if (a === 0 || R(this.outputShape) === 1)
         n = `
           result.y = 0.;
@@ -225,7 +225,7 @@ function A(t) {
  * =============================================================================
  */
 function G(t) {
-  const { inputs: e, backend: o } = t, { real: u, imag: p } = e, a = o.makeTensorInfo(u.shape, "complex64"), n = o.texData.get(a.dataId), l = A({ inputs: { x: u }, backend: o }), s = A({ inputs: { x: p }, backend: o });
+  const { inputs: e, backend: o } = t, { real: u, imag: d } = e, a = o.makeTensorInfo(u.shape, "complex64"), n = o.texData.get(a.dataId), l = A({ inputs: { x: u }, backend: o }), s = A({ inputs: { x: d }, backend: o });
   return n.complexTensorInfos = { real: l, imag: s }, a;
 }
 /**
@@ -260,7 +260,7 @@ class V {
     `;
   }
 }
-const H = "if (isnan(x)) return x;";
+const K = "if (isnan(x)) return x;";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -310,8 +310,8 @@ class L {
  * =============================================================================
  */
 function Y({ opSnippet: t, packedOpSnippet: e, cpuKernelImpl: o, dtype: u }) {
-  return ({ inputs: p, backend: a }) => {
-    const { x: n } = p, l = a, s = u || n.dtype;
+  return ({ inputs: d, backend: a }) => {
+    const { x: n } = d, l = a, s = u || n.dtype;
     if (l.shouldExecuteOnCPU([n]) && o != null) {
       const c = l.texData.get(n.dataId), x = o(c.values, s);
       return l.makeTensorInfo(n.shape, s, x);
@@ -321,37 +321,37 @@ function Y({ opSnippet: t, packedOpSnippet: e, cpuKernelImpl: o, dtype: u }) {
     return i ? r = new L(n.shape, e) : r = new V(n.shape, t), l.runWebGLProgram(r, [n], s);
   };
 }
-function j({ opSnippet: t, packedOpSnippet: e, checkOutOfBounds: o = !1, supportsComplex: u = !1, cpuKernelImpl: p, dtype: a }) {
+function q({ opSnippet: t, packedOpSnippet: e, checkOutOfBounds: o = !1, supportsComplex: u = !1, cpuKernelImpl: d, dtype: a }) {
   return ({ inputs: n, backend: l }) => {
     const { a: s, b: i } = n, r = l;
     if (u && s.dtype === "complex64") {
-      const h = r.texData.get(s.dataId), f = r.texData.get(i.dataId), [O, y] = [
+      const h = r.texData.get(s.dataId), f = r.texData.get(i.dataId), [y, O] = [
         [h.complexTensorInfos.real, f.complexTensorInfos.real],
         [h.complexTensorInfos.imag, f.complexTensorInfos.imag]
       ].map((S) => {
-        const [d, m] = S, $ = {
-          dataId: d.dataId,
-          dtype: d.dtype,
+        const [p, m] = S, $ = {
+          dataId: p.dataId,
+          dtype: p.dtype,
           shape: s.shape
         }, T = {
           dataId: m.dataId,
           dtype: m.dtype,
           shape: i.shape
         }, U = new C(t, s.shape, i.shape);
-        return r.runWebGLProgram(U, [$, T], v(d.dtype, m.dtype));
-      }), I = G({ inputs: { real: O, imag: y }, backend: r });
-      return r.disposeIntermediateTensorInfo(O), r.disposeIntermediateTensorInfo(y), I;
+        return r.runWebGLProgram(U, [$, T], v(p.dtype, m.dtype));
+      }), I = G({ inputs: { real: y, imag: O }, backend: r });
+      return r.disposeIntermediateTensorInfo(y), r.disposeIntermediateTensorInfo(O), I;
     }
     const c = a || v(s.dtype, i.dtype);
-    if ((s.dtype === "string" || i.dtype === "string" || r.shouldExecuteOnCPU([s, i])) && p != null) {
-      const h = r.texData.get(s.dataId).values, f = r.texData.get(i.dataId).values, O = s.dtype === "string" ? (
+    if ((s.dtype === "string" || i.dtype === "string" || r.shouldExecuteOnCPU([s, i])) && d != null) {
+      const h = r.texData.get(s.dataId).values, f = r.texData.get(i.dataId).values, y = s.dtype === "string" ? (
         // tslint:disable-next-line: no-any
         B(h)
-      ) : h, y = s.dtype === "string" ? (
+      ) : h, O = s.dtype === "string" ? (
         // tslint:disable-next-line: no-any
         B(f)
-      ) : f, [I, S] = p(s.shape, i.shape, O, y, c), d = r.makeTensorInfo(S, c), m = r.texData.get(d.dataId);
-      return m.values = I, d;
+      ) : f, [I, S] = d(s.shape, i.shape, y, O, c), p = r.makeTensorInfo(S, c), m = r.texData.get(p.dataId);
+      return m.values = I, p;
     }
     const x = P().getBool("WEBGL_PACK_BINARY_OPERATIONS") && e != null;
     let b;
@@ -359,10 +359,10 @@ function j({ opSnippet: t, packedOpSnippet: e, checkOutOfBounds: o = !1, support
   };
 }
 export {
-  H as C,
-  K as a,
+  K as C,
+  H as a,
   E as b,
-  j as c,
+  q as c,
   B as f,
   k as g,
   Y as u

package/dist/layers/BaseLayer.js CHANGED Viewed

@@ -1,5 +1,116 @@
-import "../index-iNhkcAEQ.js";
-import { B as a } from "../BaseLayer-BhrMN8JO.js";
+import { T as g, c as p, e as o, i as v } from "../index-CnHyhpKc.js";
+import { v as _ } from "../variable-BGvK-VN3.js";
+class M {
+  parent;
+  config;
+  _variables = /* @__PURE__ */ new Map();
+  _trainable = !0;
+  children = [];
+  constructor(t, r) {
+    this.config = t, this.parent = r, this.parent && this.parent.children.push(this);
+  }
+  getProfiler() {
+    return this.config.layerConfig.profiler;
+  }
+  startMemory() {
+    this.config.layerConfig.profiler?.startMemory();
+  }
+  endMemory(t) {
+    this.config.layerConfig.profiler?.endMemory(t);
+  }
+  addVariable(t, r) {
+    this._variables.set(t, r || null);
+  }
+  get variables() {
+    const t = Array.from(this._variables.values()).filter((e) => e !== null), r = this.children.flatMap((e) => e.variables);
+    return [...t, ...r];
+  }
+  get trainableVariables() {
+    const t = Array.from(this._variables.values()).filter(
+      (e) => e !== null && e.trainable
+    ), r = this.children.flatMap((e) => e.trainableVariables);
+    return [...t, ...r];
+  }
+  get trainable() {
+    return this._trainable;
+  }
+  set trainable(t) {
+    this._trainable = t, this._variables.forEach((r) => {
+      r && (r.trainable = t);
+    }), this.children.forEach((r) => {
+      r.trainable = t;
+    });
+  }
+  getVariable(t) {
+    const r = this._variables.get(t);
+    if (!r)
+      throw new Error(`Variable ${t} not found`);
+    return r;
+  }
+  hasVariable(t) {
+    return this._variables.get(t) !== null;
+  }
+  setVariable(t, r) {
+    if (!this._variables.has(t))
+      throw new Error(`Variable ${t} not found`);
+    this._variables.set(t, r);
+  }
+  saveWeights(t) {
+    this._variables.forEach((r, e) => {
+      r && t.set(e, [r.clone()]);
+    }), this.children.forEach((r) => {
+      r.saveWeights(t);
+    });
+  }
+  loadWeights(t) {
+    this._variables.forEach((r, e) => {
+      const i = t.get(e)?.[0];
+      if (!i)
+        throw new Error(`Weights for ${e} not found`);
+      r ? r.assign(i) : this._variables.set(e, _(i, this._trainable));
+    }), this.children.forEach((r) => {
+      r.loadWeights(t);
+    });
+  }
+  dispose() {
+    this._variables.forEach((t) => {
+      t?.dispose();
+    }), this._variables.clear();
+  }
+  build() {
+  }
+  dropout(t) {
+    return t;
+  }
+  call(t, ...r) {
+    this.build();
+    const e = this.forward(t, ...r);
+    if (t.training && e instanceof g) {
+      const i = this.dropout(e);
+      return i !== e && e.dispose(), i;
+    } else
+      return e;
+  }
+  callCheckpoint(t, ...r) {
+    return this.build(), this.checkpointingFn(t, ...r);
+  }
+  checkpointingFn(t, ...r) {
+    const e = this.trainableVariables, s = p((...a) => {
+      const l = a[a.length - 1], n = a.slice(0, r.length), h = this.forward(t, ...n);
+      return l(n), { value: h, gradFunc: (c, f) => {
+        const u = o().state.activeTape;
+        o().state.activeTape = [];
+        const b = v((...d) => this.forward(t, ...d.slice(0, n.length)))([...f, ...e], c);
+        return o().state.activeTape = u, b;
+      } };
+    })(...r, ...e);
+    if (t.training) {
+      const a = this.dropout(s);
+      return a !== s && s.dispose(), a;
+    } else
+      return s;
+  }
+}
 export {
-  a as default
+  M as default
 };

package/dist/layers/CausalSelfAttention.d.ts CHANGED Viewed

@@ -7,9 +7,8 @@ export type KVCache = {
     cumulativeLength: number;
 };
 export interface AttentionScores {
-    head: number;
-    block: number;
-    attentionOut?: Tensor;
+    meanOfHeads?: boolean;
+    attentionOut?: Tensor[];
 }
 interface AttentionForwardAttributes extends ForwardAttributes {
     attentionScores?: AttentionScores;

package/dist/layers/CausalSelfAttention.js CHANGED Viewed

@@ -1,15 +1,16 @@
-import { attentionMask as f } from "../ops/attentionMask.js";
-import { B as O, v as V } from "../BaseLayer-BhrMN8JO.js";
+import { attentionMask as g } from "../ops/attentionMask.js";
+import O from "./BaseLayer.js";
 import { qkv as P } from "../ops/qkv.js";
-import { rope as b } from "../ops/rope.js";
-import { appendCache as v } from "../ops/appendCache.js";
-import { F as c, t as C } from "../index-iNhkcAEQ.js";
+import { rope as v } from "../ops/rope.js";
+import { appendCache as V } from "../ops/appendCache.js";
+import { H as c, t as C } from "../index-CnHyhpKc.js";
 import { fusedSoftmax as T } from "../ops/fusedSoftmax.js";
-import { d as y } from "../tfjs_backend-NucKez4s.js";
-import { r as k, d as L } from "../dropout-kbDY39Ci.js";
-import { r as N } from "../reshape-DxTPgnwL.js";
-import { m as R } from "../mat_mul-D0SifYfJ.js";
-class W extends O {
+import { d as y } from "../tfjs_backend-DX9yVvwk.js";
+import { v as b } from "../variable-BGvK-VN3.js";
+import { r as k, d as L } from "../dropout-lQm_YyX3.js";
+import { r as N } from "../reshape-CTIbqjwm.js";
+import { m as R } from "../mat_mul-DeGU1U_C.js";
+class $ extends O {
   divisor;
   index;
   units;
@@ -22,14 +23,14 @@ class W extends O {
   build() {
     this.hasVariable(this.ATTN) === !1 && this.setVariable(
       this.ATTN,
-      V(
+      b(
         k([this.config.gpt.nEmbed, this.units], 0, 0.02),
         !0
         //`block_${this.index}_attn_cAttn_kernel`
       )
     ), this.hasVariable(this.PROJ) === !1 && this.setVariable(
       this.PROJ,
-      V(
+      b(
         k([this.projUnits, this.config.gpt.nEmbed], 0, 0.02),
         !0
         //`block_${this.index}_attn_cProj_kernel`
@@ -37,12 +38,12 @@ class W extends O {
     );
   }
   getAttentionScores(t, i, s, o) {
-    const e = f(t, i, this.divisor), n = T(e, s ? this.config.gpt.dropout : 0, o);
+    const e = g(t, i, this.divisor), n = T(e, s ? this.config.gpt.dropout : 0, o);
     return e.dispose(), n;
   }
   // Attention with optional past. If pastLen > 0 and T_cur == 1, no mask needed.
   getAttentionScoresWithPast(t, i, s) {
-    const o = f(t, i, this.divisor, s), e = T(o, 0, 0);
+    const o = g(t, i, this.divisor, s), e = T(o, 0, 0);
     return o.dispose(), e;
   }
   getQKV(t) {
@@ -53,33 +54,33 @@ class W extends O {
     return n.dispose(), e.dispose(), p;
   }
   updateCache(t, i, s) {
-    const o = this.config.gpt.blockSize, e = t.shape[2], n = s.length || 0, p = v(t, o, n, s.k);
+    const o = this.config.gpt.blockSize, e = t.shape[2], n = s.length || 0, p = V(t, o, n, s.k);
     t.dispose(), s.k && s.k.dispose();
-    const r = v(i, o, n, s.v);
+    const a = V(i, o, n, s.v);
     i.dispose(), s.v && s.v.dispose();
     const d = Math.min(n + e, o), h = s.cumulativeLength + e;
-    s.length = d, s.cumulativeLength = h, s.k = c(p), s.v = c(r);
+    s.length = d, s.cumulativeLength = h, s.k = c(p), s.v = c(a);
   }
   forward(t, i) {
     return C(() => {
       this.startMemory();
-      const [s, o, e] = this.getQKV(i), n = t.pastKV ? t.pastKV.cumulativeLength : 0, p = this.config.layerConfig.ropeCache, r = p ? b(s, p, n) : s, d = p ? b(o, p, n) : o;
+      const [s, o, e] = this.getQKV(i), n = t.pastKV ? t.pastKV.cumulativeLength : 0, p = this.config.layerConfig.ropeCache, a = p ? v(s, p, n) : s, d = p ? v(o, p, n) : o;
       p && (s.dispose(), o.dispose());
       const h = t.pastKV ? t.pastKV.length : 0;
       t.pastKV && !t.training && this.updateCache(d, e, t.pastKV);
-      const u = t.pastKV?.k ? t.pastKV.k : d, l = t.pastKV?.v ? t.pastKV.v : e;
-      let a;
-      h > 0 ? a = this.getAttentionScoresWithPast(r, u, h) : a = this.getAttentionScores(r, u, t.training, t.seed || 0), r.dispose(), t.pastKV || u.dispose();
-      const m = R(a, l), g = t.attentionScores !== void 0 && t.attentionScores.block === this.index;
-      g || a.dispose(), t.pastKV || l.dispose();
-      const S = this.getOutputProjection(m);
-      if (m.dispose(), g && t.attentionScores && t.attentionScores.head >= 0 && t.attentionScores.head < this.config.gpt.nHead) {
-        const A = a.shape[0], K = a.shape[2];
-        t.attentionScores.attentionOut = c(
-          a.slice([0, t.attentionScores.head, 0, 0], [-1, 1, -1, -1]).reshape([A, K, -1])
+      const u = t.pastKV?.k ? t.pastKV.k : d, m = t.pastKV?.v ? t.pastKV.v : e;
+      let r;
+      h > 0 ? r = this.getAttentionScoresWithPast(a, u, h) : r = this.getAttentionScores(a, u, t.training, t.seed || 0), a.dispose(), t.pastKV || u.dispose();
+      const l = R(r, m), f = t.attentionScores !== void 0 && t.attentionScores.attentionOut !== void 0;
+      f || r.dispose(), t.pastKV || m.dispose();
+      const A = this.getOutputProjection(l);
+      if (l.dispose(), f && t.attentionScores && t.attentionScores.attentionOut !== void 0) {
+        const K = r.shape[1], S = r.shape[2];
+        t.attentionScores.attentionOut?.push(
+          c(r.slice([0, 0, 0, 0], [1, -1, -1, -1]).reshape([K, S, -1]))
         );
       }
-      return this.endMemory("CausalSelfAttention"), S;
+      return this.endMemory("CausalSelfAttention"), A;
     });
   }
   dropout(t) {
@@ -91,5 +92,5 @@ class W extends O {
   }
 }
 export {
-  W as default
+  $ as default
 };

package/dist/layers/MLP.js CHANGED Viewed

@@ -1,10 +1,11 @@
-import { t as l } from "../index-iNhkcAEQ.js";
-import { B as u, v as o } from "../BaseLayer-BhrMN8JO.js";
+import { t as l } from "../index-CnHyhpKc.js";
+import u from "./BaseLayer.js";
 import { matMulGelu as M } from "../ops/matMulGelu.js";
-import { r as h, d as c } from "../dropout-kbDY39Ci.js";
-import { r as d } from "../reshape-DxTPgnwL.js";
-import { m as f } from "../mat_mul-D0SifYfJ.js";
-class O extends u {
+import { v as o } from "../variable-BGvK-VN3.js";
+import { r as h, d as f } from "../dropout-lQm_YyX3.js";
+import { r as d } from "../reshape-CTIbqjwm.js";
+import { m as c } from "../mat_mul-DeGU1U_C.js";
+class V extends u {
   index;
   hiddenUnits;
   MLPHIDDEN;
@@ -36,7 +37,7 @@ class O extends u {
   forward(i, t) {
     return l(() => {
       this.startMemory();
-      const [s, r, e] = t.shape, n = d(t, [s * r, e]), a = M(n, this.getVariable(this.MLPHIDDEN)), p = f(a, this.getVariable(this.MLPOUT));
+      const [s, r, e] = t.shape, n = d(t, [s * r, e]), a = M(n, this.getVariable(this.MLPHIDDEN)), p = c(a, this.getVariable(this.MLPOUT));
       a.dispose();
       const m = d(p, [s, r, e]);
       return this.endMemory("MLP"), m;
@@ -44,12 +45,12 @@ class O extends u {
   }
   dropout(i) {
     if (this.config.gpt.dropout > 0) {
-      const t = c(i, this.config.gpt.dropout);
+      const t = f(i, this.config.gpt.dropout);
       return i.dispose(), t;
     }
     return i;
   }
 }
 export {
-  O as default
+  V as default
 };

package/dist/layers/RMSNorm.js CHANGED Viewed

@@ -1,20 +1,21 @@
-import { t as e } from "../index-iNhkcAEQ.js";
-import { B as o, v as a } from "../BaseLayer-BhrMN8JO.js";
-import { normRMS as i } from "../ops/normRMS.js";
-import { o as M } from "../ones-BIeFnPHR.js";
-class l extends o {
+import { t as s } from "../index-CnHyhpKc.js";
+import e from "./BaseLayer.js";
+import { normRMS as a } from "../ops/normRMS.js";
+import { v as i } from "../variable-BGvK-VN3.js";
+import { o as m } from "../ones-CDWGzVnm.js";
+class f extends e {
   GAMMA;
-  constructor(r, t = "", s) {
-    super(r, s), this.GAMMA = t, this.addVariable(this.GAMMA, a(M([r.gpt.nEmbed]), !0, this.GAMMA, "float32"));
+  constructor(r, t = "", o) {
+    super(r, o), this.GAMMA = t, this.addVariable(this.GAMMA, i(m([r.gpt.nEmbed]), !0, this.GAMMA, "float32"));
   }
   forward(r, t) {
-    return e(() => {
+    return s(() => {
       this.startMemory();
-      const s = i(t, this.getVariable(this.GAMMA));
-      return this.endMemory("RMSNorm"), s;
+      const o = a(t, this.getVariable(this.GAMMA));
+      return this.endMemory("RMSNorm"), o;
     });
   }
 }
 export {
-  l as default
+  f as default
 };

package/dist/layers/RoPECache.js CHANGED Viewed

@@ -1,6 +1,6 @@
-import { o as c, i as f, E as l, Q as m, f as n, U as u, t as p, F as a } from "../index-iNhkcAEQ.js";
-import { c as d, s as C } from "../sin-BOX-JVAj.js";
-import { r as h } from "../range-BsFU-SNG.js";
+import { o as c, j as f, E as l, V as m, f as n, W as u, t as p, H as a } from "../index-CnHyhpKc.js";
+import { c as d, s as C } from "../sin-HzioENy_.js";
+import { r as h } from "../range-CkOJ7090.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.

package/dist/layers/TiedEmbedding.js CHANGED Viewed

@@ -1,8 +1,10 @@
-import { T as a } from "../TiedEmbedding-DsDRvLB0.js";
-import "../index-iNhkcAEQ.js";
-import "../tfjs_backend-NucKez4s.js";
-import "../BaseLayer-BhrMN8JO.js";
-import "../gather-Bxe1Qip8.js";
+import "../random_width-DI2h9CMs.js";
+import "../index-CnHyhpKc.js";
+import { T as f } from "../TiedEmbedding-DORsPlNL.js";
+import "../tfjs_backend-DX9yVvwk.js";
+import "./BaseLayer.js";
+import "../variable-BGvK-VN3.js";
+import "../gather-BWyutxwi.js";
 export {
-  a as default
+  f as default
 };

package/dist/layers/TransformerBlock.js CHANGED Viewed

@@ -1,8 +1,8 @@
 import l from "./CausalSelfAttention.js";
 import r from "./MLP.js";
 import o from "./RMSNorm.js";
-import { B as d } from "../BaseLayer-BhrMN8JO.js";
-import { t as p } from "../index-iNhkcAEQ.js";
+import d from "./BaseLayer.js";
+import { t as p } from "../index-CnHyhpKc.js";
 class k extends d {
   ln1;
   attn;