npm - @genai-fi/nanogpt - Versions diffs - 0.4.2 → 0.4.3 - Mend

@genai-fi/nanogpt 0.4.2 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/NanoGPTModel.js +72 -75
package/dist/layers/CausalSelfAttention.js +37 -37
package/dist/ops/appendCache.d.ts +1 -1
package/dist/ops/appendCache.js +10 -4
package/dist/ops/cpu/appendCache.d.ts +1 -2
package/dist/ops/cpu/appendCache.js +15 -20
package/dist/ops/cpu/attentionMask.js +10 -10
package/dist/ops/webgl/appendCache.js +14 -13
package/package.json +1 -1

package/dist/NanoGPTModel.js CHANGED Viewed

@@ -1,17 +1,17 @@
 import { defaultConfig as x } from "./config.js";
 import W from "./layers/TransformerBlock.js";
-import { E as F, D as P, T as q, r as K, p as T } from "./TiedEmbedding-CnJ1bx4q.js";
-import D from "./layers/RoPECache.js";
+import { E as F, D as P, T as q, r as T, p as D } from "./TiedEmbedding-CnJ1bx4q.js";
+import K from "./layers/RoPECache.js";
 import N from "./layers/RMSNorm.js";
 import { estimateParameterCount as R } from "./utilities/parameters.js";
 import { createSoftmaxCrossEntropyWithGrad as A } from "./training/sparseCrossEntropy.js";
 import B from "./layers/BaseLayer.js";
-import { o as y, h as E, p as G, E as v, a6 as O, a7 as Q, a8 as j, t as w, a5 as U, f as C } from "./index-C4JCoBvj.js";
+import { o as $, h as E, p as G, E as v, a6 as O, a7 as j, a8 as Q, t as w, a5 as V, f as C } from "./index-C4JCoBvj.js";
 import { r as _ } from "./reshape-Boe4DuIO.js";
-import { r as V } from "./range-9AzeApCc.js";
-import { e as X } from "./tfjs_backend-Cug-PH75.js";
-import { g as H } from "./gather-ZYRWhmXR.js";
-import { s as J } from "./softmax-Cujsg4ay.js";
+import { r as X } from "./range-9AzeApCc.js";
+import { e as H } from "./tfjs_backend-Cug-PH75.js";
+import { g as J } from "./gather-ZYRWhmXR.js";
+import { s as U } from "./softmax-Cujsg4ay.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -28,13 +28,13 @@ import { s as J } from "./softmax-Cujsg4ay.js";
  * limitations under the License.
  * =============================================================================
  */
-function Y(h, t) {
-  let e = E(h, "a", "mod"), o = E(t, "b", "mod");
+function Y(c, t) {
+  let e = E(c, "a", "mod"), o = E(t, "b", "mod");
   [e, o] = G(e, o);
-  const n = { a: e, b: o };
-  return v.runKernel(O, n);
+  const i = { a: e, b: o };
+  return v.runKernel(O, i);
 }
-const Z = /* @__PURE__ */ y({ mod_: Y });
+const Z = /* @__PURE__ */ $({ mod_: Y });
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -51,17 +51,17 @@ const Z = /* @__PURE__ */ y({ mod_: Y });
  * limitations under the License.
  * =============================================================================
  */
-function tt(h, t, e, o = !1) {
-  const n = E(h, "logits", "multinomial"), s = n.size, r = n.rank;
+function tt(c, t, e, o = !1) {
+  const i = E(c, "logits", "multinomial"), s = i.size, l = i.rank;
   if (s < 2)
     throw new Error(`Error in multinomial: you need at least 2 outcomes, but got ${s}.`);
-  if (r > 2)
-    throw new Error(`Rank of probabilities must be 1 or 2, but is ${r}`);
+  if (l > 2)
+    throw new Error(`Rank of probabilities must be 1 or 2, but is ${l}`);
   e = e || Math.random();
-  const i = { logits: r === 1 ? _(n, [1, -1]) : n }, p = { numSamples: t, seed: e, normalized: o }, l = v.runKernel(Q, i, p);
-  return r === 1 ? _(l, [l.size]) : l;
+  const n = { logits: l === 1 ? _(i, [1, -1]) : i }, h = { numSamples: t, seed: e, normalized: o }, a = v.runKernel(j, n, h);
+  return l === 1 ? _(a, [a.size]) : a;
 }
-const M = /* @__PURE__ */ y({ multinomial_: tt });
+const M = /* @__PURE__ */ $({ multinomial_: tt });
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -78,19 +78,19 @@ const M = /* @__PURE__ */ y({ multinomial_: tt });
  * limitations under the License.
  * =============================================================================
  */
-function et(h, t = 1, e = !0) {
-  const o = E(h, "x", "topk");
+function et(c, t = 1, e = !0) {
+  const o = E(c, "x", "topk");
   if (o.rank === 0)
     throw new Error("topk() expects the input to be of rank 1 or higher");
-  const n = o.shape[o.shape.length - 1];
+  const i = o.shape[o.shape.length - 1];
   if (t < 0)
     throw new Error(`'k' passed to topk() must be >= 0 but got ${t}`);
-  if (t > n)
-    throw new Error(`'k' passed to topk() must be <= the last dimension (${n}) but got ${t}`);
-  const s = { x: o }, r = { k: t, sorted: e }, [a, i] = v.runKernel(j, s, r);
-  return { values: a, indices: i };
+  if (t > i)
+    throw new Error(`'k' passed to topk() must be <= the last dimension (${i}) but got ${t}`);
+  const s = { x: o }, l = { k: t, sorted: e }, [r, n] = v.runKernel(Q, s, l);
+  return { values: r, indices: n };
 }
-const ot = /* @__PURE__ */ y({ topk_: et });
+const ot = /* @__PURE__ */ $({ topk_: et });
 /**
  * @license
  * Copyright 2018 Google LLC
@@ -100,11 +100,11 @@ const ot = /* @__PURE__ */ y({ topk_: et });
  * https://opensource.org/licenses/MIT.
  * =============================================================================
  */
-function st(h) {
-  return new P(h);
+function st(c) {
+  return new P(c);
 }
-function nt(h) {
-  return new F(h);
+function it(c) {
+  return new F(c);
 }
 class wt extends B {
   wte;
@@ -124,12 +124,12 @@ class wt extends B {
       vocabSize: this.config.gpt.vocabSize,
       embedDim: this.config.gpt.nEmbed,
       name: "token_embedding"
-    }), this.config.gpt.useRope === !1 ? this.wpe = nt({
+    }), this.config.gpt.useRope === !1 ? this.wpe = it({
       inputDim: this.config.gpt.blockSize,
       outputDim: this.config.gpt.nEmbed,
       name: "positional_embedding",
-      embeddingsInitializer: K({ mean: 0, stddev: 0.02 })
-    }) : (this.ropeCache = new D(this.config.gpt), this.config.layerConfig.ropeCache = this.ropeCache), this.drop = st({ rate: this.config.gpt.dropout }), this.blocks = [];
+      embeddingsInitializer: T({ mean: 0, stddev: 0.02 })
+    }) : (this.ropeCache = new K(this.config.gpt), this.config.layerConfig.ropeCache = this.ropeCache), this.drop = st({ rate: this.config.gpt.dropout }), this.blocks = [];
     for (let e = 0; e < this.config.gpt.nLayer; e++)
       this.blocks.push(new W(e, this.config));
     this.lnF = new N(this.config, 1e-8, "final_rms_norm");
@@ -163,12 +163,12 @@ class wt extends B {
   }
   inputPhase(t, e, o = !1) {
     return w(() => {
-      const n = this.wte.embed(t);
+      const i = this.wte.embed(t);
       if (this.config.gpt.useRope === !1) {
-        const [, s] = t.shape, r = this.config.gpt.blockSize, a = V(0, s, 1, "int32"), i = Z(U(a, C(e, "int32")), C(r, "int32")), p = this.wpe.apply(i), l = n.add(p);
-        return this.drop.apply(l, { training: o });
+        const [, s] = t.shape, l = this.config.gpt.blockSize, r = X(0, s, 1, "int32"), n = Z(V(r, C(e, "int32")), C(l, "int32")), h = this.wpe.apply(n), a = i.add(h);
+        return this.drop.apply(a, { training: o });
       } else
-        return this.drop.apply(n, { training: o });
+        return this.drop.apply(i, { training: o });
     });
   }
   setSkipMask(t) {
@@ -209,67 +209,64 @@ class wt extends B {
     return w(() => {
       if (t.length === 0)
         throw new Error("No attentions for rollout");
-      const [e, o, n] = t[0].shape;
-      for (const s of t) {
-        const [r, a, i] = s.shape;
-        if (r !== e || a !== o || i !== n)
+      const [e, o, i] = t[0].shape;
+      for (const n of t) {
+        const [h, a, p] = n.shape;
+        if (h !== e || a !== o || p !== i)
           throw new Error(
-            `Inconsistent attention shapes in rollout: expected [${e},${o},${n}] got [${r},${a},${i}]`
+            `Inconsistent attention shapes in rollout: expected [${e},${o},${i}] got [${h},${a},${p}]`
           );
       }
-      if (o === n) {
-        const s = X(n, n).expandDims(0);
-        let r = s.tile([e, 1, 1]);
-        for (const a of t) {
-          const i = a.add(s);
-          r = i.div(i.sum(-1, !0)).matMul(r);
-        }
-        return r;
+      const s = t.map((n) => n.slice([0, 0, 0], [e, o, o])), l = H(o, o).expandDims(0);
+      let r = l.tile([e, 1, 1]);
+      for (const n of s) {
+        const h = n.add(l);
+        r = h.div(h.sum(-1, !0)).matMul(r);
       }
-      throw new Error(`Unsupported attention shapes for rollout: [B=${e}, Q=${o}, K=${n}]`);
+      return r;
     });
   }
-  forward(t, e, o = !1, n = !1, s) {
+  forward(t, e, o = !1, i = !1, s) {
     return this.validateInput(t), w(() => {
       this.startMemory();
-      const r = s?.[0]?.length ?? 0;
-      let a = this.inputPhase(t, r, o);
-      const i = [];
+      const l = s?.[0]?.length ?? 0;
+      let r = this.inputPhase(t, l, o);
+      const n = [];
       if (s && s.length !== this.blocks.length)
         throw console.error("Cache", s), new Error(`Cache length ${s.length} does not match number of blocks ${this.blocks.length}`);
-      for (let c = 0; c < this.blocks.length; c++) {
-        const u = a, m = this.blocks[c], {
+      for (let g = 0; g < this.blocks.length; g++) {
+        const u = r, m = this.blocks[g], {
           output: b,
           attention: k,
           cache: f
-        } = m.call(a, o, n, s ? s[c] : void 0);
-        a = b, u.dispose(), n && k && i.push(k), s && f ? (s[c]?.k.dispose(), s[c]?.v.dispose(), s[c] = f) : f && (f.k.dispose(), f.v.dispose());
+        } = m.call(r, o, i, s ? s[g] : void 0);
+        r = b, u.dispose(), i && k && n.push(k), s && f ? (s[g]?.k.dispose(), s[g]?.v.dispose(), s[g] = f) : f && (f.k.dispose(), f.v.dispose());
       }
+      let h;
+      i && n.length > 0 && (h = this.computeAttentionRollout(n)), r = this.lnF.apply(r);
+      const a = this.wte.project(r);
       let p;
-      n && i.length > 0 && (p = this.computeAttentionRollout(i)), a = this.lnF.apply(a);
-      const l = this.wte.project(a);
-      let g;
-      return e && (g = this.calculateLoss(l, e)), this.endMemory("Forward"), { logits: l, loss: g, attention: n ? p : void 0 };
+      return e && (p = this.calculateLoss(a, e)), this.endMemory("Forward"), { logits: a, loss: p, attention: i ? h : void 0 };
     });
   }
   generate(t, e, o) {
-    const n = o?.temperature ?? 1, s = o?.topK, r = o?.usePadding ?? !1, a = o?.includeAttention ?? !1;
+    const i = o?.temperature ?? 1, s = o?.topK, l = o?.usePadding ?? !1, r = o?.includeAttention ?? !1;
     return w(() => {
-      const i = t, p = i.shape[1], l = p <= this.config.gpt.blockSize ? i : i.slice(
-        [0, p - this.config.gpt.blockSize],
-        [i.shape[0], this.config.gpt.blockSize]
-      ), g = r ? this.config.gpt.blockSize - l.shape[1] : 0, c = g > 0 ? T(l, [
+      const n = t, h = n.shape[1], a = h <= this.config.gpt.blockSize ? n : n.slice(
+        [0, h - this.config.gpt.blockSize],
+        [n.shape[0], this.config.gpt.blockSize]
+      ), p = l ? this.config.gpt.blockSize - a.shape[1] : 0, g = p > 0 ? D(a, [
         [0, 0],
-        [0, g]
-      ]) : l, { logits: u, attention: m } = this.forward(c, void 0, !1, a, e), b = u.shape[1] - 1 - g, k = u.slice([0, b, 0], [u.shape[0], 1, u.shape[2]]), f = m ? m.slice([0, b, 0], [m.shape[0], 1, m.shape[2]]) : void 0, $ = k.div(n);
+        [0, p]
+      ]) : a, { logits: u, attention: m } = this.forward(g, void 0, !1, r, e), b = u.shape[1] - 1 - p, k = u.slice([0, b, 0], [u.shape[0], 1, u.shape[2]]), f = m ? m.slice([0, b, 0], [m.shape[0], 1, m.shape[2]]) : void 0, y = k.div(i);
       let d;
       if (s) {
-        const { values: S, indices: I } = ot($, s), L = M(S.squeeze([1]), 1);
-        d = H(I.squeeze([1]), L, 1);
+        const { values: S, indices: I } = ot(y, s), L = M(S.squeeze([1]), 1);
+        d = J(I.squeeze([1]), L, 1);
       } else
-        d = M($.squeeze([1]), 1);
+        d = M(y.squeeze([1]), 1);
       let z;
-      return o?.includeProbabilities && (z = J($.squeeze([1]))), d = d.reshape([1, 1]), { output: d, attention: f?.squeeze([1]), probabilities: z };
+      return o?.includeProbabilities && (z = U(y.squeeze([1]))), d = d.reshape([1, 1]), { output: d, attention: f?.squeeze([1]), probabilities: z };
     });
   }
   getNumParams() {

package/dist/layers/CausalSelfAttention.js CHANGED Viewed

@@ -7,8 +7,8 @@ import { D as z, F as S, t as $, c as L, e as j, H as O } from "../index-C4JCoBv
 import { fusedSoftmax as _ } from "../ops/fusedSoftmax.js";
 import { l as W, w as M, d as x } from "../tfjs_backend-Cug-PH75.js";
 import { o as N } from "../ones-Bf3YR48P.js";
+import { v as A } from "../variable-LJT9Ld63.js";
 import { z as q } from "../zeros-dnQxFgAD.js";
-import { v as k } from "../variable-LJT9Ld63.js";
 import { r as C, d as I } from "../dropout-DfDdklfL.js";
 import { r as B } from "../reshape-Boe4DuIO.js";
 import { m as F } from "../mat_mul-415y5Qn2.js";
@@ -24,15 +24,15 @@ class nt extends T {
   projUnits;
   constructor(t, s) {
     super(s), this.index = t, this.units = s.gpt.nEmbed * 3, this.projUnits = s.gpt.nEmbed, this.bias = W.bandPart(N([s.gpt.blockSize, s.gpt.blockSize]), -1, 0).cast("bool"), this.divisor = 1 / Math.sqrt(s.gpt.nEmbed / s.gpt.nHead);
-    const e = q([s.gpt.blockSize, s.gpt.blockSize]), i = z([s.gpt.blockSize, s.gpt.blockSize], Number.NEGATIVE_INFINITY);
-    this.maskInf = M(this.bias, e, i);
+    const o = q([s.gpt.blockSize, s.gpt.blockSize]), e = z([s.gpt.blockSize, s.gpt.blockSize], Number.NEGATIVE_INFINITY);
+    this.maskInf = M(this.bias, o, e);
   }
   build() {
-    this.cAttn === null && (this.cAttn = k(
+    this.cAttn === null && (this.cAttn = A(
       C([this.config.gpt.nEmbed, this.units], 0, 0.02),
       !0
       //`block_${this.index}_attn_cAttn_kernel`
-    )), this.cProj === null && (this.cProj = k(
+    )), this.cProj === null && (this.cProj = A(
       C([this.projUnits, this.config.gpt.nEmbed], 0, 0.02),
       !0
       //`block_${this.index}_attn_cProj_kernel`
@@ -53,74 +53,74 @@ class nt extends T {
     t.set(`block_${this.index}_cAttn`, this.cAttn ? [this.cAttn.clone()] : []), t.set(`block_${this.index}_cProj`, this.cProj ? [this.cProj.clone()] : []);
   }
   loadWeights(t) {
-    const s = t.get(`block_${this.index}_cAttn`)?.[0], e = t.get(`block_${this.index}_cProj`)?.[0];
+    const s = t.get(`block_${this.index}_cAttn`)?.[0], o = t.get(`block_${this.index}_cProj`)?.[0];
     if (!s) throw new Error(`Weights for block_${this.index}_cAttn not found`);
-    if (!e) throw new Error(`Weights for block_${this.index}_cProj not found`);
-    this.cAttn ? this.cAttn.assign(s) : this.cAttn = k(s, !0), this.cProj ? this.cProj.assign(e) : this.cProj = k(e, !0);
+    if (!o) throw new Error(`Weights for block_${this.index}_cProj not found`);
+    this.cAttn ? this.cAttn.assign(s) : this.cAttn = A(s, !0), this.cProj ? this.cProj.assign(o) : this.cProj = A(o, !0);
   }
-  getAttentionScores(t, s, e, i) {
-    const o = P(t, s, this.divisor, this.maskInf);
-    return _(o, e ? this.config.gpt.dropout : 0, i);
+  getAttentionScores(t, s, o, e) {
+    const i = P(t, s, this.divisor, this.maskInf);
+    return _(i, o ? this.config.gpt.dropout : 0, e);
   }
   // Attention with optional past. If pastLen > 0 and T_cur == 1, no mask needed.
-  getAttentionScoresWithPast(t, s, e) {
-    const i = P(t, s, this.divisor, void 0, e);
-    return _(i, 0, 0);
+  getAttentionScoresWithPast(t, s, o) {
+    const e = P(t, s, this.divisor, void 0, o);
+    return _(e, 0, 0);
   }
   getQKV(t) {
     return y(t, this.cAttn, this.config.gpt.nHead);
   }
   getOutputProjection(t) {
-    const s = t.shape[0], e = t.shape[2], i = this.config.gpt.nEmbed, o = t.transpose([0, 2, 1, 3]), n = B(o, [s, e, i]);
+    const s = t.shape[0], o = t.shape[2], e = this.config.gpt.nEmbed, i = t.transpose([0, 2, 1, 3]), n = B(i, [s, o, e]);
     return x(n, this.cProj);
   }
-  updateCache(t, s, e) {
-    const i = this.config.gpt.blockSize, o = t.shape[2], n = Math.min(e?.length || 0, i - o), r = e ? E(e.k, t, i) : t, a = e ? E(e.v, s, i) : s;
+  updateCache(t, s, o, e) {
+    const i = this.config.gpt.blockSize, n = t.shape[2], r = e?.length || 0, a = o ? t : E(t, i, r, e?.k), p = o ? s : E(s, i, r, e?.v);
     return {
-      k: S(r),
-      v: S(a),
-      length: n + o,
-      cumulativeLength: e ? e.cumulativeLength + o : o
+      k: S(a),
+      v: S(p),
+      length: Math.min(r + n, i),
+      cumulativeLength: e ? e.cumulativeLength + n : n
     };
   }
-  forward(t, s = !1, e, i = !1, o) {
+  forward(t, s = !1, o, e = !1, i) {
     return $(() => {
       this.startMemory();
-      const [n, r, a] = this.getQKV(t), p = o ? o.cumulativeLength : 0, c = this.config.layerConfig.ropeCache, u = c ? w(n, c, p) : n, f = c ? w(r, c, p) : r;
+      const [n, r, a] = this.getQKV(t), p = i ? i.cumulativeLength : 0, c = this.config.layerConfig.ropeCache, u = c ? w(n, c, p) : n, f = c ? w(r, c, p) : r;
       c && (n.dispose(), r.dispose());
-      const g = o ? o.length : 0, d = this.updateCache(f, a, o), l = d.k, m = d.v;
-      o && (f.dispose(), a.dispose());
+      const g = i ? i.length : 0, d = this.updateCache(f, a, s, i), l = d.k, m = d.v;
+      i && (f.dispose(), a.dispose());
       let h;
-      g > 0 ? h = this.getAttentionScoresWithPast(u, l, g) : h = this.getAttentionScores(u, l, s, e), u.dispose(), s && l.dispose();
+      g > 0 ? h = this.getAttentionScoresWithPast(u, l, g) : h = this.getAttentionScores(u, l, s, o), u.dispose(), s && l.dispose();
       const b = F(h, m);
-      i || h.dispose(), s && m.dispose();
-      const A = this.getOutputProjection(b);
+      e || h.dispose(), s && m.dispose();
+      const k = this.getOutputProjection(b);
       b.dispose();
-      const v = i ? h.mean(1) : void 0;
-      return this.endMemory("CausalSelfAttention"), { output: A, attention: v, presentKV: s ? void 0 : d };
+      const v = e ? h.mean(1) : void 0;
+      return this.endMemory("CausalSelfAttention"), { output: k, attention: v, presentKV: s ? void 0 : d };
     });
   }
-  call(t, s = !1, e = !1, i) {
-    if (i && !this.config.gpt.useRope)
+  call(t, s = !1, o = !1, e) {
+    if (e && !this.config.gpt.useRope)
       throw new Error("Cannot use pastKV without RoPE enabled");
-    if (s && i)
+    if (s && e)
       throw new Error("Cannot use pastKV during training");
     if (t.shape.length !== 3)
       throw new Error(`Input tensor must be rank 3 [B, T, C], got shape ${t.shape}`);
     if (t.shape[2] !== this.config.gpt.nEmbed)
       throw new Error(`Input tensor last dimension must be ${this.config.gpt.nEmbed}, got ${t.shape[2]}`);
     this.build();
-    const o = Math.random() * 1e9;
+    const i = Math.random() * 1e9;
     if (s && this.config.layerConfig.checkpointAttention) {
       const r = L(
         // @ts-expect-error Invalid params
         (a, p, c, u) => {
-          const f = this.forward(a, !0, o);
+          const f = this.forward(a, !0, i);
           u([a]);
           const g = (d, l) => {
             const [m] = l, h = j().state.activeTape;
             j().state.activeTape = [];
-            const b = O((A, v, R) => this.forward(A, !0, o).output)([m, p, c], d);
+            const b = O((k, v, R) => this.forward(k, !0, i).output)([m, p, c], d);
             return j().state.activeTape = h, b;
           };
           return { value: f.output, gradFunc: g };
@@ -132,7 +132,7 @@ class nt extends T {
       } else
         return { output: r };
     } else {
-      const n = this.forward(t, s, o, e, i);
+      const n = this.forward(t, s, i, o, e);
       if (this.config.gpt.dropout > 0) {
         const r = I(n.output, this.config.gpt.dropout);
         return n.output.dispose(), { output: r, attention: n.attention, presentKV: n.presentKV };

package/dist/ops/appendCache.d.ts CHANGED Viewed

@@ -1,2 +1,2 @@
 import { Tensor } from '@tensorflow/tfjs-core';
-export declare function appendCache(cache: Tensor, item: Tensor, maxSize: number): Tensor;
+export declare function appendCache(item: Tensor, maxSize: number, pastLen: number, cache?: Tensor): Tensor;

package/dist/ops/appendCache.js CHANGED Viewed

@@ -1,9 +1,15 @@
-import { e as p } from "../index-C4JCoBvj.js";
+import { e as a } from "../index-C4JCoBvj.js";
 import "./cpu/appendCache.js";
 import "./webgl/appendCache.js";
-function a(e, n, r) {
-  return p().runKernel("AppendCache", { cache: e, item: n }, { maxSize: r });
+import { z as s } from "../zeros-dnQxFgAD.js";
+import { c } from "../concat-CuRsVY-K.js";
+function i(r, p, n, o) {
+  if (!o) {
+    const e = r.shape[2];
+    return c([r, s([r.shape[0], r.shape[1], p - e, r.shape[3]])], 2);
+  }
+  return a().runKernel("AppendCache", { cache: o, item: r }, { maxSize: p, pastLen: n });
 }
 export {
-  a as appendCache
+  i as appendCache
 };

package/dist/ops/cpu/appendCache.d.ts CHANGED Viewed

@@ -1,2 +1 @@
-import { Tensor } from '@tensorflow/tfjs-core';
-export declare function appendCache(cache: Tensor, item: Tensor, maxSize: number): Tensor;
+export {};

package/dist/ops/cpu/appendCache.js CHANGED Viewed

@@ -1,28 +1,23 @@
-import { r as a, e as m } from "../../index-C4JCoBvj.js";
-import { c as d } from "../../concat-CuRsVY-K.js";
-function r(n) {
-  const { cache: c, item: t } = n.inputs, { maxSize: o } = n.attrs, e = d([c, t], 2), s = e.shape[2];
-  if (s > o) {
-    const p = s - o, i = e.shape[0], l = e.shape[1], h = e.shape[3], u = e.slice([0, 0, p, 0], [i, l, o, h]);
-    return e.dispose(), u;
+import { r as d } from "../../index-C4JCoBvj.js";
+import { c as h } from "../../concat-CuRsVY-K.js";
+function u(p) {
+  const { cache: n, item: s } = p.inputs, { maxSize: r, pastLen: c } = p.attrs, t = n.shape[0], o = n.shape[1], a = n.shape[3], e = s.shape[2];
+  if (c + e <= r) {
+    const f = n.slice([0, 0, 0, 0], [t, o, c, a]), m = n.slice([0, 0, c + e, 0], [t, o, r - c - e, a]), i = e < e ? s.slice([0, 0, 0, 0], [t, o, e, a]) : s, k = h([f, i, m], 2);
+    return f.dispose(), m.dispose(), i !== s && i.dispose(), k;
   }
-  return e;
+  const l = n.slice([0, 0, e, 0], [t, o, r - e, a]), C = h([l, s], 2);
+  return l.dispose(), C;
 }
-const f = {
+const w = {
   kernelName: "AppendCache",
   backendName: "cpu",
-  kernelFunc: r
+  kernelFunc: u
 };
-a(f);
-const C = {
+d(w);
+const N = {
   kernelName: "AppendCache",
   backendName: "tensorflow",
-  kernelFunc: r
-};
-a(C);
-function N(n, c, t) {
-  return m().runKernel("AppendCache", { cache: n, item: c }, { maxSize: t });
-}
-export {
-  N as appendCache
+  kernelFunc: u
 };
+d(N);

package/dist/ops/cpu/attentionMask.js CHANGED Viewed

@@ -1,22 +1,22 @@
-import { r as o, f as l } from "../../index-C4JCoBvj.js";
-import { m as k } from "../../mat_mul-415y5Qn2.js";
+import { r as o, f as k } from "../../index-C4JCoBvj.js";
+import { m as d } from "../../mat_mul-415y5Qn2.js";
 function r(t) {
-  const { q: e, k: c, mask: n } = t.inputs, { divisor: m } = t.attrs, s = e.shape[2], a = k(e, c, !1, !0).mul(l(m));
-  if (n) {
-    const i = n.slice([0, 0], [s, s]).expandDims(0).expandDims(0);
-    return a.add(i);
+  const { q: e, k: n, mask: s } = t.inputs, { divisor: c } = t.attrs, m = e.shape[2], i = n.shape[2], a = d(e, n, !1, !0).mul(k(c));
+  if (s) {
+    const l = s.slice([0, 0], [m, i]).expandDims(0).expandDims(0);
+    return a.add(l);
   }
   return a;
 }
-const d = {
+const u = {
   kernelName: "AttentionMask",
   backendName: "cpu",
   kernelFunc: r
 };
-o(d);
-const u = {
+o(u);
+const f = {
   kernelName: "AttentionMask",
   backendName: "tensorflow",
   kernelFunc: r
 };
-o(u);
+o(f);

package/dist/ops/webgl/appendCache.js CHANGED Viewed

@@ -1,12 +1,12 @@
-import { r as h } from "../../index-C4JCoBvj.js";
+import { r as p } from "../../index-C4JCoBvj.js";
 class m {
   variableNames = ["cache", "item"];
   outputShape;
   userCode;
   customUniforms = [{ name: "cacheT", type: "int" }];
-  constructor(t, a, o, s, n) {
-    const c = Math.min(o + 1, n);
-    this.outputShape = [t, a, c, s], this.userCode = `
+  constructor(t, a, n, o, c) {
+    const s = Math.min(n + 1, c);
+    this.outputShape = [t, a, s, o], this.userCode = `
         void main() {
             ivec4 coords = getOutputCoords(); // [b, h, t, d]
             int b = coords.x;
@@ -15,7 +15,7 @@ class m {
             int d = coords.w;
             int itemT = 1;
-            int maxSize = ${n};
+            int maxSize = ${c};
             int totalT = cacheT + itemT;
             int start = totalT >= maxSize ? 1 : 0;
@@ -23,21 +23,22 @@ class m {
             float val = 0.0;
             if (srcT < cacheT) {
                 val = getCache(b, h, srcT, d);
-            } else {
+            } else if (srcT == cacheT) {
                 val = getItem(b, h, 0, d);
-            }
+            } else {
+                val = 0.0;}
             setOutput(val);
         }
         `;
   }
 }
-function p(e) {
-  const { cache: t, item: a } = e.inputs, { maxSize: o } = e.attrs, s = e.backend, n = t.shape[0], c = t.shape[2], r = t.shape[1], i = new m(n, r, c, a.shape[3], o);
-  return s.runWebGLProgram(i, [t, a], "float32", [[c]]);
+function d(e) {
+  const { cache: t, item: a } = e.inputs, { maxSize: n, pastLen: o } = e.attrs, c = e.backend, s = t.shape[0], r = t.shape[2], i = t.shape[1], h = new m(s, i, r, a.shape[3], n);
+  return c.runWebGLProgram(h, [t, a], "float32", [[o]]);
 }
-const d = {
+const l = {
   kernelName: "AppendCache",
   backendName: "webgl",
-  kernelFunc: p
+  kernelFunc: d
 };
-h(d);
+p(l);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "name": "@genai-fi/nanogpt",
-    "version": "0.4.2",
+    "version": "0.4.3",
     "type": "module",
     "main": "dist/main.js",
     "types": "dist/main.d.ts",