npm - @genai-fi/nanogpt - Versions diffs - 0.2.10 → 0.2.12 - Mend

@genai-fi/nanogpt 0.2.10 → 0.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/dist/TeachableLLM.js +8 -6
package/dist/{complex-x7w5HPOS.js → complex-CJ-qCcLB.js} +1 -1
package/dist/{index-CWQLouWz.js → index-YPKosni4.js} +52 -48
package/dist/layers/CausalSelfAttention.d.ts +2 -0
package/dist/layers/CausalSelfAttention.js +46 -56
package/dist/layers/RoPECache.d.ts +4 -3
package/dist/layers/RoPECache.js +17 -22
package/dist/layers/TiedEmbedding.js +33 -32
package/dist/main.js +18 -16
package/dist/{mat_mul-4v7St11W.js → mat_mul-Bu7bhLms.js} +1 -1
package/dist/ops/attentionMask.js +2 -2
package/dist/ops/gatherSub.js +2 -2
package/dist/ops/node/sparseCrossEntropy.js +1 -1
package/dist/ops/qkv.d.ts +7 -0
package/dist/ops/qkv.js +127 -0
package/dist/ops/rope.d.ts +8 -0
package/dist/ops/rope.js +154 -0
package/dist/ops/scatterSub.js +10 -10
package/dist/reshape-DmnmKT6r.js +25 -0
package/dist/{stack-CTdK-itU.js → stack-BtKpB0Ry.js} +7 -7
package/dist/sum-D7fu15XL.js +27 -0
package/dist/training/AdamExt.js +1 -1
package/dist/training/sparseCrossEntropy.js +31 -30
package/dist/utilities/profile.js +1 -1
package/package.json +1 -1
package/dist/sum-CnIf1YOh.js +0 -49

package/dist/TeachableLLM.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import { defaultConfig as h } from "./config.js";
-import d from "./NanoGPTModel.js";
-import { saveModel as m } from "./utilities/save.js";
+import m from "./NanoGPTModel.js";
+import { saveModel as d } from "./utilities/save.js";
 import { loadModel as f } from "./utilities/load.js";
 import u from "./Generator.js";
 import _ from "./Trainer.js";
@@ -13,7 +13,9 @@ import "./jszip.min-CjP2V1VV.js";
 import "./ops/scatterSub.js";
 import "./ops/gatherSub.js";
 import "./ops/attentionMask.js";
-import w from "./utilities/profile.js";
+import "./ops/qkv.js";
+import "./ops/rope.js";
+import p from "./utilities/profile.js";
 class a extends c {
   _config;
   _model;
@@ -50,7 +52,7 @@ class a extends c {
   saveModel(t) {
     if (!this._model || !this._tokeniser)
       throw new Error("Model or tokeniser is not initialized.");
-    return m(this._model, this._tokeniser, t);
+    return d(this._model, this._tokeniser, t);
   }
   static loadModel(t, r) {
     const e = new a(t);
@@ -65,7 +67,7 @@ class a extends c {
     }), e;
   }
   static create(t, r = {}) {
-    const e = { ...h, ...r }, o = new g(e.vocabSize), s = new d(t, e), i = new a(t, o, s);
+    const e = { ...h, ...r }, o = new g(e.vocabSize), s = new m(t, e), i = new a(t, o, s);
     return i.setStatus("warmup"), l(s).then(() => {
       i.tokeniser.trained ? i.setStatus("ready") : (i.setStatus("awaitingTokens"), i.tokeniser.once("trainStatus", (n) => {
         n === "trained" && i.setStatus("ready");
@@ -84,7 +86,7 @@ class a extends c {
     if (t) {
       if (!this._model)
         throw new Error("Model is not initialized.");
-      this._model.getProfiler() || this._model.setProfiler(new w());
+      this._model.getProfiler() || this._model.setProfiler(new p());
     } else
       this._model && this._model.setProfiler(void 0);
   }

package/dist/{complex-x7w5HPOS.js → complex-CJ-qCcLB.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o as c, d as s, g as n, E as m, C as r } from "./index-CWQLouWz.js";
+import { o as c, d as s, g as n, E as m, C as r } from "./index-YPKosni4.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.

package/dist/{index-CWQLouWz.js → index-YPKosni4.js} RENAMED Viewed

@@ -383,7 +383,7 @@ function _t(n, t) {
     return e.set(n, s), e.get(n);
   }
 }
-const Ge = "Abs", ne = "Add", Es = "BatchMatMul", se = "Cast", As = "Complex", ze = "ComplexAbs", We = "RealDiv", Bs = "Elu", vs = "Exp", je = "Fill", Ke = "FloorDiv", Ms = "GatherNd", re = "Identity", Fs = "Imag", $s = "LeakyRelu", Rs = "Log", xs = "Max", Ve = "Maximum", qe = "Multiply", Ns = "Neg", Ds = "Pack", He = "Pow", Cs = "Prelu", _s = "Range", Ps = "Real", Os = "Relu", Ls = "Reshape", Us = "Relu6", Gs = "ScatterNd", zs = "Sigmoid", Je = "Sqrt", Ws = "Sum", js = "Softmax", Xe = "Sub", Ks = "Transpose", Ye = "ZerosLike", Vs = "Step", qs = "_FusedMatMul";
+const Ge = "Abs", ne = "Add", Es = "BatchMatMul", se = "Cast", As = "Complex", ze = "ComplexAbs", Bs = "Concat", We = "RealDiv", vs = "Elu", Ms = "Exp", je = "Fill", Ke = "FloorDiv", Fs = "GatherV2", $s = "GatherNd", re = "Identity", Rs = "Imag", xs = "LeakyRelu", Ns = "Log", Ds = "Max", Ve = "Maximum", qe = "Multiply", Cs = "Neg", _s = "Pack", He = "Pow", Ps = "Prelu", Os = "Range", Ls = "Real", Us = "Relu", Gs = "Reshape", zs = "Relu6", Ws = "ScatterNd", js = "Sigmoid", Je = "Sqrt", Ks = "Sum", Vs = "SplitV", qs = "Softmax", Xe = "Sub", Hs = "Transpose", Ye = "ZerosLike", Js = "Step", Xs = "_FusedMatMul";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -438,11 +438,11 @@ function Wt(n) {
   }
   return e;
 }
-function Hs(n) {
+function Ys(n) {
   const { kernelName: t, backendName: e } = n, s = ie(t, e);
   ht.has(s) && O(`The kernel '${t}' for backend '${e}' is already registered`), ht.set(s, n);
 }
-function Js(n) {
+function Qs(n) {
   const { kernelName: t } = n;
   It.has(t) && S().getBool("DEBUG") && O(`Overriding the gradient for '${t}'`), It.set(t, n);
 }
@@ -1902,7 +1902,7 @@ function I(n, t, e, s = "numeric") {
   const a = r !== "string" ? ae(n, r) : at(n, [], !0);
   return g.makeTensor(a, i, r);
 }
-function Xs(n, t, e, s = "numeric") {
+function Zs(n, t, e, s = "numeric") {
   if (!Array.isArray(n))
     throw new Error(`Argument ${t} passed to ${e} must be a \`Tensor[]\` or \`TensorLike[]\``);
   return n.map((i, o) => I(i, `${t}[${o}]`, e, s));
@@ -2065,10 +2065,10 @@ function Sn(n, t) {
  * limitations under the License.
  * =============================================================================
  */
-function Ys() {
+function tr() {
   return g;
 }
-function Qs() {
+function er() {
   return g.memory();
 }
 function E(n, t) {
@@ -2893,7 +2893,7 @@ function Yn(n, t, e) {
  * limitations under the License.
  * =============================================================================
  */
-function Zs(n, t) {
+function nr(n, t) {
   const e = [];
   for (let s = 0; s < t.length; s++) {
     const r = n[n.length - s - 1], i = t.length - s - 1, o = t[i];
@@ -3061,7 +3061,7 @@ function ss(n, t) {
     a[u] != null && (c[l.name] = a[u]);
   }), s?.forEach((l) => c[l.name] = null), { value: o, grads: c };
 }
-function tr(n) {
+function sr(n) {
   return g.customGrad(n);
 }
 /**
@@ -3841,55 +3841,59 @@ function bs() {
  */
 bs();
 export {
+  Qn as $,
   ds as A,
   Es as B,
   As as C,
-  C as D,
+  w as D,
   g as E,
-  zs as F,
-  Ms as G,
-  Bs as H,
-  Fs as I,
-  $s as J,
-  Cs as K,
-  Rs as L,
-  xs as M,
-  Ns as N,
-  Ps as O,
-  Ds as P,
-  Os as Q,
-  _s as R,
-  Ws as S,
-  Us as T,
-  Vs as U,
-  Ks as V,
-  Zs as W,
-  Qn as X,
-  qs as _,
+  qs as F,
+  $s as G,
+  sr as H,
+  E as I,
+  C as J,
+  js as K,
+  Ns as L,
+  Ds as M,
+  vs as N,
+  Rs as O,
+  _s as P,
+  xs as Q,
+  Gs as R,
+  Ks as S,
+  Cs as T,
+  Ps as U,
+  Ls as V,
+  Us as W,
+  zs as X,
+  Js as Y,
+  Hs as Z,
+  nr as _,
   p as a,
+  Xs as a0,
   Z as b,
-  Js as c,
+  Qs as c,
   I as d,
-  Ys as e,
+  tr as e,
   V as f,
   Is as g,
-  Xs as h,
-  y as i,
-  Ls as j,
-  $t as k,
-  Dt as l,
-  Qs as m,
-  Zt as n,
+  $t as h,
+  Vs as i,
+  Os as j,
+  Zs as k,
+  y as l,
+  er as m,
+  Gn as n,
   F as o,
-  G as p,
-  De as q,
-  Hs as r,
+  Bs as p,
+  Fs as q,
+  Ys as r,
   K as s,
-  Gs as t,
-  vs as u,
-  Ts as v,
-  w,
-  js as x,
-  tr as y,
-  E as z
+  Dt as t,
+  Zt as u,
+  G as v,
+  De as w,
+  Ws as x,
+  Ms as y,
+  Ts as z
 };

package/dist/layers/CausalSelfAttention.d.ts CHANGED Viewed

@@ -21,7 +21,9 @@ export default class CausalSelfAttention extends BaseLayer {
     private divisor;
     private index;
     private _trainable;
+    private units;
     constructor(tf: typeof TF, index: number, config: GPTConfig, ropeCache?: RoPECache | undefined);
+    private build;
     get variables(): TF.Variable[];
     get trainable(): boolean;
     set trainable(value: boolean);

package/dist/layers/CausalSelfAttention.js CHANGED Viewed

@@ -1,17 +1,10 @@
-import { attentionMask as z } from "../ops/attentionMask.js";
-import S from "./BaseLayer.js";
-class C extends S {
+import { attentionMask as x } from "../ops/attentionMask.js";
+import j from "./BaseLayer.js";
+import { qkv as w } from "../ops/qkv.js";
+import { rope as y } from "../ops/rope.js";
+class N extends j {
   constructor(t, i, s, e) {
-    super(), this.ropeCache = e, this.config = s, this.tf = t, this.index = i, this.cAttn = this.tf.layers.dense({
-      units: 3 * s.nEmbed,
-      useBias: s.biasInLinear,
-      name: `block_${i}_attn_cAttn`,
-      kernelInitializer: this.tf.initializers.randomNormal({
-        mean: 0,
-        stddev: 0.02
-      }),
-      biasInitializer: "zeros"
-    }), this.cProj = this.tf.layers.dense({
+    super(), this.ropeCache = e, this.config = s, this.tf = t, this.index = i, this.units = s.nEmbed * 3, this.cProj = this.tf.layers.dense({
       units: s.nEmbed,
       useBias: s.biasInLinear,
       name: `block_${i}_attn_cProj`,
@@ -21,11 +14,11 @@ class C extends S {
       }),
       biasInitializer: "zeros"
     }), this.attnDropout = this.tf.layers.dropout({ rate: s.dropout }), this.residDropout = this.tf.layers.dropout({ rate: s.dropout }), this.bias = this.tf.linalg.bandPart(this.tf.ones([s.blockSize, s.blockSize]), -1, 0).cast("bool"), this.divisor = 1 / Math.sqrt(s.nEmbed / s.nHead);
-    const o = this.tf.zeros([s.blockSize, s.blockSize]), c = this.tf.fill([s.blockSize, s.blockSize], Number.NEGATIVE_INFINITY);
-    this.maskInf = this.tf.where(this.bias, o, c);
+    const o = this.tf.zeros([s.blockSize, s.blockSize]), a = this.tf.fill([s.blockSize, s.blockSize], Number.NEGATIVE_INFINITY);
+    this.maskInf = this.tf.where(this.bias, o, a);
   }
   config;
-  cAttn;
+  cAttn = null;
   cProj;
   attnDropout;
   residDropout;
@@ -35,26 +28,35 @@ class C extends S {
   divisor;
   index;
   _trainable = !0;
+  units;
+  build() {
+    this.cAttn === null && (this.cAttn = this.tf.variable(
+      this.tf.randomNormal([this.config.nEmbed, this.units], 0, 0.02),
+      !0
+      //`block_${this.index}_attn_cAttn_kernel`
+    ));
+  }
   get variables() {
-    return [
-      ...this.cAttn.trainableWeights.map((t) => t.read()),
-      ...this.cProj.trainableWeights.map((t) => t.read())
-    ];
+    if (this.cAttn === null)
+      throw new Error("Layer not built yet");
+    return [this.cAttn, ...this.cProj.trainableWeights.map((t) => t.read())];
   }
   get trainable() {
     return this._trainable;
   }
   set trainable(t) {
-    this._trainable = t, this.cAttn.trainable = t, this.cProj.trainable = t;
+    this._trainable = t, this.cAttn && (this.cAttn.trainable = t), this.cProj.trainable = t;
   }
   saveWeights(t) {
-    t.set(`block_${this.index}_cAttn`, this.cAttn.getWeights()), t.set(`block_${this.index}_cProj`, this.cProj.getWeights());
+    t.set(`block_${this.index}_cAttn`, this.cAttn ? [this.cAttn.clone()] : []), t.set(`block_${this.index}_cProj`, this.cProj.getWeights());
   }
   loadWeights(t) {
-    this.cAttn.setWeights(t.get(`block_${this.index}_cAttn`) || []), this.cProj.setWeights(t.get(`block_${this.index}_cProj`) || []);
+    const i = t.get(`block_${this.index}_cAttn`)?.[0];
+    if (!i) throw new Error(`Weights for block_${this.index}_cAttn not found`);
+    this.cAttn ? this.cAttn.assign(i) : this.cAttn = this.tf.variable(i, !0), this.cProj.setWeights(t.get(`block_${this.index}_cProj`) || []);
   }
   getAttentionScores(t, i, s) {
-    const e = z(t, i, this.maskInf, this.divisor), o = this.tf.softmax(e, -1);
+    const e = x(t, i, this.maskInf, this.divisor), o = this.tf.softmax(e, -1);
     return this.attnDropout.apply(o, { training: s });
   }
   // Attention with optional past. If pastLen > 0 and T_cur == 1, no mask needed.
@@ -64,61 +66,49 @@ class C extends S {
     if (o > 1 && e > 0)
       throw new Error("Cannot use past with T_cur > 1");
     if (o > 1) {
-      const a = this.maskInf.slice([0, 0], [o, o]).expandDims(0).expandDims(0);
-      r = r.add(a);
+      const c = this.maskInf.slice([0, 0], [o, o]).expandDims(0).expandDims(0);
+      r = r.add(c);
     }
     const h = this.tf.softmax(r, -1);
     return this.attnDropout.apply(h, { training: s });
   }
   getQKV(t) {
-    const [i, s, e] = t.shape, o = this.cAttn.apply(t), [c, r, h] = this.tf.split(o, 3, -1);
-    o.dispose();
-    const a = e / this.config.nHead, u = this.tf.reshape(c, [i, s, this.config.nHead, a]);
-    c.dispose();
-    const f = u.transpose([0, 2, 1, 3]);
-    u.dispose();
-    const d = this.tf.reshape(r, [i, s, this.config.nHead, a]);
-    r.dispose();
-    const n = d.transpose([0, 2, 1, 3]);
-    d.dispose();
-    const l = this.tf.reshape(h, [i, s, this.config.nHead, a]);
-    h.dispose();
-    const p = l.transpose([0, 2, 1, 3]);
-    return l.dispose(), [f, n, p];
+    return w(t, this.cAttn, this.config.nHead);
   }
   getOutputProjection(t, i) {
-    const s = t.shape[0], e = t.shape[2], o = this.config.nEmbed, c = t.transpose([0, 2, 1, 3]), r = this.tf.reshape(c, [s, e, o]), h = this.cProj.apply(r);
+    const s = t.shape[0], e = t.shape[2], o = this.config.nEmbed, a = t.transpose([0, 2, 1, 3]), r = this.tf.reshape(a, [s, e, o]), h = this.cProj.apply(r);
     return this.residDropout.apply(h, { training: i });
   }
   // Added optional KV cache support (pastKV). Returns presentKV for chaining.
   call(t, i = !1, s = !1, e) {
     if (e && !this.config.useRope)
       throw new Error("Cannot use pastKV without RoPE enabled");
-    return this.tf.tidy(() => {
+    return this.build(), this.tf.tidy(() => {
       this.startMemory();
-      const [o, c, r] = this.getQKV(t), h = o.shape[2], a = this.config.blockSize, u = e ? e.cumulativeLength : 0, [f, d] = this.ropeCache ? this.ropeCache.applyRoPE(o, c, u) : [o, c];
-      let n = d, l = r, p = 0;
-      e && (p = e.length, n = this.tf.concat([e.k, d], 2), l = this.tf.concat([e.v, r], 2));
+      const [o, a, r] = this.getQKV(t), h = o.shape[2], c = this.config.blockSize, d = e ? e.cumulativeLength : 0, f = this.ropeCache ? y(o, this.ropeCache, d) : o, m = this.ropeCache ? y(a, this.ropeCache, d) : a;
+      this.ropeCache && (o.dispose(), a.dispose());
+      let n = m, l = r, u = 0;
+      e && (u = e.length, n = this.tf.concat([e.k, m], 2), l = this.tf.concat([e.v, r], 2));
       const b = n.shape[2];
-      if (b > a) {
-        const k = b - a, g = n.shape[0], A = n.shape[1], I = n.shape[3];
-        n = n.slice([0, 0, k, 0], [g, A, a, I]), l = l.slice([0, 0, k, 0], [g, A, a, I]), p = a - h;
+      if (b > c) {
+        const k = b - c, A = n.shape[0], g = n.shape[1], _ = n.shape[3];
+        n = n.slice([0, 0, k, 0], [A, g, c, _]), l = l.slice([0, 0, k, 0], [A, g, c, _]), u = c - h;
       }
-      let m;
-      p > 0 ? m = this.getAttentionScoresWithPast(f, n, i, p) : m = this.getAttentionScores(f, n, i);
-      const _ = this.tf.matMul(m, l), v = this.getOutputProjection(_, i), y = {
+      let p;
+      u > 0 ? p = this.getAttentionScoresWithPast(f, n, i, u) : p = this.getAttentionScores(f, n, i);
+      const P = this.tf.matMul(p, l), S = this.getOutputProjection(P, i), v = {
         k: this.tf.keep(n),
         v: this.tf.keep(l),
-        length: p + h,
+        length: u + h,
         cumulativeLength: e ? e.cumulativeLength + h : h
-      }, P = s ? m.mean(1) : void 0;
-      return this.endMemory("CausalSelfAttention"), { output: v, attention: P, presentKV: y };
+      }, I = s ? p.mean(1) : void 0;
+      return this.endMemory("CausalSelfAttention"), { output: S, attention: I, presentKV: v };
     });
   }
   dispose() {
-    this.cAttn.dispose(), this.cProj.dispose(), this.attnDropout.dispose(), this.residDropout.dispose(), this.bias.dispose(), this.maskInf.dispose();
+    this.cAttn?.dispose(), this.cProj.dispose(), this.attnDropout.dispose(), this.residDropout.dispose(), this.bias.dispose(), this.maskInf.dispose();
   }
 }
 export {
-  C as default
+  N as default
 };

package/dist/layers/RoPECache.d.ts CHANGED Viewed

@@ -3,14 +3,15 @@ import { GPTConfig } from '../config';
 export default class RoPECache {
     private readonly tf;
     private readonly config;
-    private rotaryDim;
+    readonly rotaryDim: number;
     private ropeBase;
     private ropeInvFreq;
     private ropeCos;
     private ropeSin;
     private ropeCacheLen;
     constructor(tf: typeof TF, config: GPTConfig);
-    private ensureRopeCache;
-    applyRoPE(q: TF.Tensor, k: TF.Tensor, pastLen: number): [TF.Tensor, TF.Tensor];
+    ensureRopeCache(needed: number): void;
+    getCos(): TF.Tensor | null;
+    getSin(): TF.Tensor | null;
     dispose(): void;
 }

package/dist/layers/RoPECache.js CHANGED Viewed

@@ -1,12 +1,12 @@
-class b {
-  constructor(s, r) {
-    this.tf = s, this.config = r;
-    const o = this.config.nEmbed / this.config.nHead;
-    if (this.rotaryDim = o, this.rotaryDim % 2 !== 0)
+class n {
+  constructor(i, e) {
+    this.tf = i, this.config = e;
+    const t = this.config.nEmbed / this.config.nHead;
+    if (this.rotaryDim = t, this.rotaryDim % 2 !== 0)
       throw new Error("rotaryDim must be even");
     this.ropeBase = 1e4;
-    const i = this.tf.range(0, this.rotaryDim, 2, "float32"), t = i.div(this.tf.scalar(this.rotaryDim, "float32")), e = this.tf.pow(this.tf.scalar(this.ropeBase, "float32"), t);
-    this.ropeInvFreq = this.tf.reciprocal(e), t.dispose(), e.dispose(), i.dispose(), this.config.useRope === !1 ? (this.ropeCos = null, this.ropeSin = null, this.ropeCacheLen = 0) : this.tf.tidy(() => {
+    const s = this.tf.range(0, this.rotaryDim, 2, "float32"), o = s.div(this.tf.scalar(this.rotaryDim, "float32")), r = this.tf.pow(this.tf.scalar(this.ropeBase, "float32"), o);
+    this.ropeInvFreq = this.tf.reciprocal(r), o.dispose(), r.dispose(), s.dispose(), this.config.useRope === !1 ? (this.ropeCos = null, this.ropeSin = null, this.ropeCacheLen = 0) : this.tf.tidy(() => {
       this.ensureRopeCache(this.config.blockSize * 4);
     });
   }
@@ -18,27 +18,22 @@ class b {
   ropeSin = null;
   // [cacheLen, rotaryDim/2]
   ropeCacheLen = 0;
-  ensureRopeCache(s) {
-    if (s <= this.ropeCacheLen) return;
+  ensureRopeCache(i) {
+    if (i <= this.ropeCacheLen) return;
     this.ropeCos && this.ropeCos.dispose(), this.ropeSin && this.ropeSin.dispose();
-    const o = this.tf.range(0, s, 1, "float32").expandDims(1).mul(this.ropeInvFreq.expandDims(0));
-    this.ropeCos = this.tf.keep(this.tf.cos(o).expandDims(-1)), this.ropeSin = this.tf.keep(this.tf.sin(o).expandDims(-1)), this.ropeCacheLen = s;
+    const e = Math.max(i, this.ropeCacheLen + this.config.blockSize * 4), s = this.tf.range(0, e, 1, "float32").expandDims(1).mul(this.ropeInvFreq.expandDims(0));
+    this.ropeCos = this.tf.keep(this.tf.cos(s).expandDims(-1)), this.ropeSin = this.tf.keep(this.tf.sin(s).expandDims(-1)), this.ropeCacheLen = e;
   }
-  applyRoPE(s, r, o) {
-    const i = s.shape[3], t = this.rotaryDim;
-    if (t > i) return [s, r];
-    const e = s.shape[2], v = o + e;
-    this.ensureRopeCache(v);
-    const n = t / 2, p = this.ropeCos.slice([o, 0, 0], [e, n, 1]).reshape([1, 1, e, n]), a = this.ropeSin.slice([o, 0, 0], [e, n, 1]).reshape([1, 1, e, n]), h = s.shape[0], c = s.shape[1], f = this.tf.range(0, t, 2, "int32"), l = this.tf.range(1, t, 2, "int32"), d = (u) => {
-      const m = u.slice([0, 0, 0, 0], [h, c, e, t]), C = t < i ? u.slice([0, 0, 0, t], [h, c, e, i - t]) : null, D = this.tf.gather(m, f, 3), g = this.tf.gather(m, l, 3), x = D.mul(p).sub(g.mul(a)), k = g.mul(p).add(D.mul(a)), R = this.tf.stack([x, k], -1).reshape([h, c, e, t]);
-      return C ? this.tf.concat([R, C], 3) : R;
-    }, y = d(s), S = d(r);
-    return f.dispose(), l.dispose(), [y, S];
+  getCos() {
+    return this.ropeCos;
+  }
+  getSin() {
+    return this.ropeSin;
   }
   dispose() {
     this.ropeCos && this.ropeCos.dispose(), this.ropeSin && this.ropeSin.dispose(), this.ropeInvFreq.dispose();
   }
 }
 export {
-  b as default
+  n as default
 };

package/dist/layers/TiedEmbedding.js CHANGED Viewed

@@ -1,7 +1,8 @@
-import { o as h, d as i, E as o, F as V, H as X, I as Y, J as Z, N as ee, K as te, O as se, Q as ne, T as re, U as ue, i as L, z as ae, V as A, a as ie, W as oe, w as le, f as q, p as C, X as P, y as U, _ as H } from "../index-CWQLouWz.js";
-import { s as ce, r as f } from "../sum-CnIf1YOh.js";
-import { m } from "../mat_mul-4v7St11W.js";
-import { c as pe } from "../complex-x7w5HPOS.js";
+import { o as h, d as i, E as o, K as X, N as Y, O as Z, Q as J, T as ee, U as te, V as se, W as ne, X as re, Y as ue, l as L, I as ae, Z as A, a as ie, _ as oe, D as le, f as q, v as C, $ as P, H as U, a0 as H } from "../index-YPKosni4.js";
+import { r as f } from "../reshape-DmnmKT6r.js";
+import { s as ce } from "../sum-D7fu15XL.js";
+import { m } from "../mat_mul-Bu7bhLms.js";
+import { c as pe } from "../complex-CJ-qCcLB.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -20,7 +21,7 @@ import { c as pe } from "../complex-x7w5HPOS.js";
  */
 function he(t) {
   const s = { x: i(t, "x", "sigmoid", "float32") };
-  return o.runKernel(V, s);
+  return o.runKernel(X, s);
 }
 const fe = /* @__PURE__ */ h({ sigmoid_: he });
 /**
@@ -41,7 +42,7 @@ const fe = /* @__PURE__ */ h({ sigmoid_: he });
  */
 function de(t) {
   const s = { x: i(t, "x", "elu", "float32") };
-  return o.runKernel(X, s);
+  return o.runKernel(Y, s);
 }
 const me = /* @__PURE__ */ h({ elu_: de });
 /**
@@ -62,7 +63,7 @@ const me = /* @__PURE__ */ h({ elu_: de });
  */
 function ge(t) {
   const s = { input: i(t, "input", "imag") };
-  return o.runKernel(Y, s);
+  return o.runKernel(Z, s);
 }
 const $e = /* @__PURE__ */ h({ imag_: ge });
 /**
@@ -83,7 +84,7 @@ const $e = /* @__PURE__ */ h({ imag_: ge });
  */
 function xe(t, e = 0.2) {
   const n = { x: i(t, "x", "leakyRelu") }, r = { alpha: e };
-  return o.runKernel(Z, n, r);
+  return o.runKernel(J, n, r);
 }
 const ke = /* @__PURE__ */ h({ leakyRelu_: xe });
 /**
@@ -169,7 +170,7 @@ function Me(t) {
   const s = { x: i(t, "x", "relu") };
   return o.runKernel(ne, s);
 }
-const we = /* @__PURE__ */ h({ relu_: Me });
+const We = /* @__PURE__ */ h({ relu_: Me });
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -186,11 +187,11 @@ const we = /* @__PURE__ */ h({ relu_: Me });
  * limitations under the License.
  * =============================================================================
  */
-function We(t) {
+function we(t) {
   const s = { x: i(t, "x", "relu6") };
   return o.runKernel(re, s);
 }
-const ze = /* @__PURE__ */ h({ relu6_: We });
+const ze = /* @__PURE__ */ h({ relu6_: we });
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -273,7 +274,7 @@ function Te(t, e, s, n) {
   if (e === "linear")
     return t;
   if (e === "relu")
-    return we(t);
+    return We(t);
   if (e === "elu")
     return me(t);
   if (e === "relu6")
@@ -310,42 +311,42 @@ function Ne({ a: t, b: e, transposeA: s = !1, transposeB: n = !1, bias: r, activ
   }
   let u = i(t, "a", "fused matMul"), a = i(e, "b", "fused matMul");
   [u, a] = q(u, a);
-  const D = s ? u.shape[u.rank - 2] : u.shape[u.rank - 1], b = n ? a.shape[a.rank - 1] : a.shape[a.rank - 2], w = s ? u.shape[u.rank - 1] : u.shape[u.rank - 2], W = n ? a.shape[a.rank - 2] : a.shape[a.rank - 1], T = u.shape.slice(0, -2), y = a.shape.slice(0, -2), B = C(T), N = C(y);
+  const D = s ? u.shape[u.rank - 2] : u.shape[u.rank - 1], b = n ? a.shape[a.rank - 1] : a.shape[a.rank - 2], W = s ? u.shape[u.rank - 1] : u.shape[u.rank - 2], w = n ? a.shape[a.rank - 2] : a.shape[a.rank - 1], T = u.shape.slice(0, -2), y = a.shape.slice(0, -2), B = C(T), N = C(y);
   L(D === b, () => `Error in fused matMul: inner shapes (${D}) and (${b}) of Tensors with shapes ${u.shape} and ${a.shape} and transposeA=${s} and transposeB=${n} must match.`);
-  const O = P(u.shape.slice(0, -2), a.shape.slice(0, -2)).concat([w, W]), F = s ? f(u, [B, D, w]) : f(u, [B, w, D]), R = n ? f(a, [N, W, b]) : f(a, [N, b, W]);
+  const O = P(u.shape.slice(0, -2), a.shape.slice(0, -2)).concat([W, w]), F = s ? f(u, [B, D, W]) : f(u, [B, W, D]), R = n ? f(a, [N, w, b]) : f(a, [N, b, w]);
   let S;
   r != null && (S = i(r, "bias", "fused matMul"), [S] = q(S, u), P(O, S.shape));
-  let G;
-  l != null && (G = i(l, "prelu weights", "fused matMul"));
-  const I = (x, M) => {
+  let v;
+  l != null && (v = i(l, "prelu weights", "fused matMul"));
+  const G = (x, M) => {
     const [g, $, k, z] = M, d = Ae(f(x, k.shape), k, c);
     let K, _;
     if (!s && !n ? (K = m(d, $, !1, !0), _ = m(g, d, !0, !1)) : !s && n ? (K = m(d, $, !1, !1), _ = m(d, g, !0, !1)) : s && !n ? (K = m($, d, !1, !0), _ = m(g, d, !1, !1)) : (K = m($, d, !0, !0), _ = m(d, g, !0, !0)), r != null) {
-      const Q = Le(z, d);
-      return [K, _, Q];
+      const V = Le(z, d);
+      return [K, _, V];
     } else
       return [K, _];
-  }, v = {
+  }, I = {
     a: F,
     b: R,
     bias: S,
-    preluActivationWeights: G
+    preluActivationWeights: v
   }, j = { transposeA: s, transposeB: n, activation: c, leakyreluAlpha: p };
   return r == null ? U((M, g, $) => {
     const k = (
       // tslint:disable-next-line: no-unnecessary-type-assertion
-      o.runKernel(H, v, j)
+      o.runKernel(H, I, j)
     );
-    return $([M, g, k]), { value: f(k, O), gradFunc: I };
+    return $([M, g, k]), { value: f(k, O), gradFunc: G };
   })(F, R) : U((M, g, $, k) => {
     const z = (
       // tslint:disable-next-line: no-unnecessary-type-assertion
-      o.runKernel(H, v, j)
+      o.runKernel(H, I, j)
     );
-    return k([M, g, z, $]), { value: f(z, O), gradFunc: I };
+    return k([M, g, z, $]), { value: f(z, O), gradFunc: G };
   })(F, R, S);
 }
-const J = /* @__PURE__ */ h({ fusedMatMul_: Ne });
+const Q = /* @__PURE__ */ h({ fusedMatMul_: Ne });
 /**
  * @license
  * Copyright 2018 Google LLC
@@ -369,7 +370,7 @@ class E extends Error {
  * https://opensource.org/licenses/MIT.
  * =============================================================================
  */
-function Ge(t, e, s, n) {
+function ve(t, e, s, n) {
   if (t.rank < 2 || e.rank < 2)
     throw new E(`dot requires both inputs to be rank >= 2 but got x shape = ${t.shape} and y shape = ${e.shape}`);
   if (e.rank >= 3) {
@@ -378,7 +379,7 @@ function Ge(t, e, s, n) {
       throw new E(`If rank y >= 3, then the second last dim of y must equal the last dim of x but got x shape = ${t.shape} and  y shape = ${e.shape}`);
   }
   if (t.rank === 2 && e.rank === 2)
-    return J({
+    return Q({
       a: t,
       b: e,
       transposeA: !1,
@@ -392,7 +393,7 @@ function Ge(t, e, s, n) {
     const l = e.shape.slice(), p = l.pop(), u = l.pop(), a = [...l, p], D = Array.from({ length: e.rank }, (T, y) => y === 0 ? e.rank - 2 : y <= e.rank - 2 ? y - 1 : y);
     e = f(Re(e, D), [u, -1]);
     const b = [...r, ...a];
-    return f(J({
+    return f(Q({
       a: t,
       b: e,
       transposeA: !1,
@@ -402,7 +403,7 @@ function Ge(t, e, s, n) {
     }), b);
   }
 }
-class Pe {
+class Ue {
   vocabSize;
   embedDim;
   tf;
@@ -425,7 +426,7 @@ class Pe {
     return this.tf.gather(this.tiedWeights, e, 0);
   }
   project(e) {
-    return Ge(e, this.tiedWeights.transpose());
+    return ve(e, this.tiedWeights.transpose());
   }
   getWeights() {
     return [this.tiedWeights];
@@ -444,5 +445,5 @@ class Pe {
   }
 }
 export {
-  Pe as default
+  Ue as default
 };