npm - @genai-fi/nanogpt - Versions diffs - 0.4.4 → 0.5.0 - Mend

@genai-fi/nanogpt 0.4.4 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (116) hide show

package/dist/BaseLayer-BhrMN8JO.js +135 -0
package/dist/Generator.js +44 -41
package/dist/NanoGPTModel.d.ts +12 -16
package/dist/NanoGPTModel.js +128 -138
package/dist/{Reshape-CiAY8ltP.js → Reshape-BE5rA4rT.js} +8 -8
package/dist/TeachableLLM.js +8 -5
package/dist/{TiedEmbedding-DznFwzcB.js → TiedEmbedding-DsDRvLB0.js} +751 -768
package/dist/{axis_util-QP0LdI1v.js → axis_util-97KkkyRQ.js} +1 -1
package/dist/broadcast_to-CMlkG8NS.js +44 -0
package/dist/{concat-DvWM7HGZ.js → concat-Cxbo2sOz.js} +3 -3
package/dist/{dropout-DFEXTPV0.js → dropout-kbDY39Ci.js} +1 -1
package/dist/{gather-C5D8PxwA.js → gather-Bxe1Qip8.js} +4 -4
package/dist/{gpgpu_math-CUzjlO9A.js → gpgpu_math-C0zyxKFi.js} +1 -1
package/dist/{index--6vO-cOz.js → index-iNhkcAEQ.js} +82 -82
package/dist/{kernel_funcs_utils-C6YBCuOt.js → kernel_funcs_utils-C4eIk4fE.js} +20 -20
package/dist/layers/BaseLayer.d.ts +28 -4
package/dist/layers/BaseLayer.js +3 -16
package/dist/layers/CausalSelfAttention.d.ts +22 -24
package/dist/layers/CausalSelfAttention.js +73 -127
package/dist/layers/MLP.d.ts +8 -15
package/dist/layers/MLP.js +43 -81
package/dist/layers/RMSNorm.d.ts +5 -11
package/dist/layers/RMSNorm.js +13 -29
package/dist/layers/RoPECache.js +14 -12
package/dist/layers/TiedEmbedding.d.ts +6 -16
package/dist/layers/TiedEmbedding.js +5 -5
package/dist/layers/TransformerBlock.d.ts +12 -16
package/dist/layers/TransformerBlock.js +20 -41
package/dist/{log_sum_exp-CiEy1aUe.js → log_sum_exp-CkumwesB.js} +11 -11
package/dist/main.js +22 -19
package/dist/{mat_mul-BEHRPMh0.js → mat_mul-D0SifYfJ.js} +3 -3
package/dist/{max-BUShNgfh.js → max-CYaAjEEp.js} +3 -3
package/dist/{moments-DYOHXoRV.js → moments-B06NlR_V.js} +6 -6
package/dist/{norm-DSva3hI3.js → norm-D3676xIo.js} +7 -7
package/dist/{ones-D6kB8bdY.js → ones-BIeFnPHR.js} +2 -2
package/dist/ops/appendCache.js +4 -4
package/dist/ops/attentionMask.d.ts +1 -1
package/dist/ops/attentionMask.js +4 -4
package/dist/ops/cpu/appendCache.js +2 -2
package/dist/ops/cpu/attentionMask.js +14 -15
package/dist/ops/cpu/fusedSoftmax.js +2 -2
package/dist/ops/cpu/gatherSub.js +5 -5
package/dist/ops/cpu/gelu.js +1 -1
package/dist/ops/cpu/matMulGelu.js +1 -1
package/dist/ops/cpu/matMulMul.d.ts +1 -0
package/dist/ops/cpu/matMulMul.js +17 -0
package/dist/ops/cpu/mulDropout.js +1 -1
package/dist/ops/cpu/normRMS.d.ts +1 -0
package/dist/ops/cpu/normRMS.js +39 -0
package/dist/ops/cpu/qkv.js +3 -3
package/dist/ops/cpu/rope.js +5 -5
package/dist/ops/cpu/scatterSub.js +8 -8
package/dist/ops/fusedSoftmax.js +1 -1
package/dist/ops/gatherSub.js +1 -1
package/dist/ops/gelu.js +1 -1
package/dist/ops/grads/attentionMask.js +13 -9
package/dist/ops/grads/fusedSoftmax.js +12 -9
package/dist/ops/grads/gelu.js +1 -1
package/dist/ops/grads/matMulGelu.js +1 -1
package/dist/ops/grads/normRMS.d.ts +2 -0
package/dist/ops/grads/normRMS.js +20 -0
package/dist/ops/grads/qkv.js +19 -9
package/dist/ops/grads/rope.js +1 -1
package/dist/ops/matMulGelu.js +1 -1
package/dist/ops/matMulMul.d.ts +2 -0
package/dist/ops/matMulMul.js +9 -0
package/dist/ops/mulDrop.js +1 -1
package/dist/ops/node/sparseCrossEntropy.js +1 -1
package/dist/ops/normRMS.d.ts +2 -0
package/dist/ops/normRMS.js +10 -0
package/dist/ops/qkv.js +1 -1
package/dist/ops/scatterSub.js +1 -1
package/dist/ops/webgl/appendCache.js +1 -1
package/dist/ops/webgl/attentionMask.js +13 -12
package/dist/ops/webgl/fusedSoftmax.js +43 -40
package/dist/ops/webgl/gatherSub.js +1 -1
package/dist/ops/webgl/gelu.js +2 -2
package/dist/ops/webgl/matMulGelu.d.ts +3 -2
package/dist/ops/webgl/matMulGelu.js +77 -75
package/dist/ops/webgl/matMulMul.d.ts +14 -0
package/dist/ops/webgl/matMulMul.js +28 -0
package/dist/ops/webgl/mulDropout.js +1 -1
package/dist/ops/webgl/normRMS.d.ts +1 -0
package/dist/ops/webgl/normRMS.js +86 -0
package/dist/ops/webgl/qkv.js +1 -1
package/dist/ops/webgl/rope.js +1 -1
package/dist/ops/webgl/scatterSub.js +1 -1
package/dist/ops-ObfXLHYQ.js +1269 -0
package/dist/{range-C_vpUjBu.js → range-BsFU-SNG.js} +1 -1
package/dist/{reshape-z51Eu-re.js → reshape-DxTPgnwL.js} +3 -3
package/dist/{sin-H567uayl.js → sin-BOX-JVAj.js} +5 -5
package/dist/slice_util-D-kaD4ZV.js +49 -0
package/dist/{softmax-Dsxflvdl.js → softmax-BjsptB07.js} +2 -2
package/dist/{split-B_k_jwud.js → split-BCbrzthj.js} +4 -4
package/dist/{stack-CmqSdsfs.js → stack--cqr9Dgc.js} +2 -2
package/dist/{sum-DdkDf2MG.js → sum-B_92TaHD.js} +5 -5
package/dist/{tensor-BGYi41cj.js → tensor-CfiPXsW4.js} +1 -1
package/dist/{tensor2d-DUr_htjt.js → tensor2d-tSxWdFMH.js} +1 -1
package/dist/tfjs_backend-NucKez4s.js +1010 -0
package/dist/training/AdamExt.js +1 -1
package/dist/training/DatasetBuilder.js +44 -44
package/dist/training/Evaluator.js +6 -6
package/dist/training/FullTrainer.js +1 -1
package/dist/training/Trainer.js +7 -7
package/dist/training/sparseCrossEntropy.js +4 -4
package/dist/utilities/dummy.js +10 -10
package/dist/utilities/generate.js +3 -3
package/dist/utilities/load.js +1 -1
package/dist/utilities/profile.js +1 -1
package/dist/utilities/save.js +10 -8
package/dist/utilities/weights.js +2 -2
package/dist/{zeros-8xl-W2DC.js → zeros-NMYTayy7.js} +3 -3
package/package.json +1 -1
package/dist/slice_util-BdhYwFY_.js +0 -90
package/dist/tfjs_backend-DuKis_xG.js +0 -2271
package/dist/variable-BJTZ3jOy.js +0 -23

package/dist/{axis_util-QP0LdI1v.js → axis_util-97KkkyRQ.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { j as c } from "./index--6vO-cOz.js";
+import { k as c } from "./index-iNhkcAEQ.js";
 /**
  * @license
  * Copyright 2017 Google LLC. All Rights Reserved.

package/dist/broadcast_to-CMlkG8NS.js ADDED Viewed

@@ -0,0 +1,44 @@
+import { o as h, i as f, l as p, x as g, E as u, T } from "./index-iNhkcAEQ.js";
+import { r as b } from "./reshape-DxTPgnwL.js";
+/**
+ * @license
+ * Copyright 2020 Google LLC. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+function m(e, r) {
+  let n = f(e, "broadcastTo", "x");
+  const a = n.shape;
+  if (p(r), r.length < n.rank)
+    throw new Error(`broadcastTo(): shape.length=${r.length} < input.rank=${n.rank}.`);
+  if (r.length > n.rank) {
+    const t = n.shape.slice();
+    for (; t.length < r.length; )
+      t.unshift(1);
+    n = b(n, t);
+  }
+  const s = n.shape, o = Array.from(r);
+  for (let t = r.length - 1; t >= 0; t--)
+    if (s[t] === r[t])
+      o[t] = 1;
+    else if (n.shape[t] !== 1)
+      throw new Error(`broadcastTo(): [${a}] cannot be broadcast to [${r}].`);
+  if (o.map((t, l) => t > 1 ? l : -1).filter((t) => t >= 0).length === 0)
+    return g(n);
+  const i = { x: n }, c = { reps: o };
+  return u.runKernel(T, i, c);
+}
+const E = /* @__PURE__ */ h({ broadcastTo_: m });
+export {
+  E as b
+};

package/dist/{concat-DvWM7HGZ.js → concat-Cxbo2sOz.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o as s, j as a, i, w as p, E as l, C as f } from "./index--6vO-cOz.js";
+import { o as s, k as a, j as p, x as i, E as l, C as f } from "./index-iNhkcAEQ.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -17,13 +17,13 @@ import { o as s, j as a, i, w as p, E as l, C as f } from "./index--6vO-cOz.js";
  */
 function h(o, e = 0) {
   a(o.length >= 1, () => "Pass at least one tensor to concat");
-  const t = i(o, "tensors", "concat", "string_or_numeric");
+  const t = p(o, "tensors", "concat", "string_or_numeric");
   if (t[0].dtype === "complex64" && t.forEach((n) => {
     if (n.dtype !== "complex64")
       throw new Error(`Cannot concatenate complex64 tensors with a tensor
           with dtype ${n.dtype}. `);
   }), t.length === 1)
-    return p(t[0]);
+    return i(t[0]);
   const r = t, c = { axis: e };
   return l.runKernel(f, r, c);
 }

package/dist/{dropout-DFEXTPV0.js → dropout-kbDY39Ci.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o as l, h, E as m, af as p, k as c, ag as d, ad as g, j as u, ah as V, ai as v, a8 as N, b as w } from "./index--6vO-cOz.js";
+import { o as l, i as h, E as m, ag as p, l as c, ah as d, ae as g, k as u, V, ai as v, a9 as N, b as w } from "./index-iNhkcAEQ.js";
 import { s as f } from "./index-C4L8Cm77.js";
 /**
  * @license

package/dist/{gather-C5D8PxwA.js → gather-Bxe1Qip8.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o as h, h as t, E as g, G as p } from "./index--6vO-cOz.js";
+import { o as g, i as t, E as h, G as p } from "./index-iNhkcAEQ.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -16,10 +16,10 @@ import { o as h, h as t, E as g, G as p } from "./index--6vO-cOz.js";
  * =============================================================================
  */
 function u(n, s, r = 0, e = 0) {
-  const o = t(n, "x", "gather"), a = t(s, "indices", "gather", "int32"), c = { x: o, indices: a }, i = { axis: r, batchDims: e };
-  return g.runKernel(p, c, i);
+  const o = t(n, "x", "gather"), a = t(s, "indices", "gather", "int32"), i = { x: o, indices: a }, c = { axis: r, batchDims: e };
+  return h.runKernel(p, i, c);
 }
-const d = /* @__PURE__ */ h({ gather_: u });
+const d = /* @__PURE__ */ g({ gather_: u });
 export {
   d as g
 };

package/dist/{gpgpu_math-CUzjlO9A.js → gpgpu_math-C0zyxKFi.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { L as e } from "./index--6vO-cOz.js";
+import { K as e } from "./index-iNhkcAEQ.js";
 /**
  * @license
  * Copyright 2017 Google LLC. All Rights Reserved.

package/dist/{index--6vO-cOz.js → index-iNhkcAEQ.js} RENAMED Viewed

@@ -4001,81 +4001,81 @@ function As() {
  */
 As();
 export {
-  xe as $,
+  Oa as $,
   Ss as A,
   Zs as B,
   or as C,
-  ss as D,
+  Wa as D,
   g as E,
   Bn as F,
   Pr as G,
-  To as H,
-  Fs as I,
-  kn as J,
-  En as K,
-  k as L,
+  Fs as H,
+  kn as I,
+  En as J,
+  k as K,
+  Lr as L,
   ta as M,
-  Lr as N,
-  rs as O,
+  rs as N,
+  de as O,
   ba as P,
-  de as Q,
+  Ea as Q,
   Ia as R,
   qa as S,
-  Ea as T,
+  Qa as T,
   Zt as U,
-  dr as V,
-  Oa as W,
+  D as V,
+  To as W,
   De as X,
   ar as Y,
   ne as Z,
-  aa as _,
+  dr as _,
   M as a,
   xs as a$,
-  V as a0,
-  oa as a1,
-  ns as a2,
-  nt as a3,
-  Qa as a4,
-  Ca as a5,
-  Fr as a6,
-  qr as a7,
-  S as a8,
-  la as a9,
-  Wr as aA,
-  jr as aB,
-  Kr as aC,
-  ha as aD,
-  Jr as aE,
-  ia as aF,
-  Sa as aG,
-  Ta as aH,
-  Aa as aI,
-  Ra as aJ,
-  $a as aK,
-  Ds as aL,
-  ro as aM,
-  no as aN,
-  eo as aO,
-  Io as aP,
-  oo as aQ,
-  yr as aR,
-  $r as aS,
-  ao as aT,
-  da as aU,
-  ma as aV,
-  ga as aW,
-  Na as aX,
-  va as aY,
-  to as aZ,
-  yo as a_,
-  ua as aa,
-  Za as ab,
-  $t as ac,
-  Rt as ad,
-  Rs as ae,
-  xr as af,
-  Wn as ag,
-  D as ah,
+  aa as a0,
+  xe as a1,
+  V as a2,
+  oa as a3,
+  ns as a4,
+  nt as a5,
+  Ca as a6,
+  Fr as a7,
+  qr as a8,
+  S as a9,
+  _a as aA,
+  er as aB,
+  Pa as aC,
+  Ar as aD,
+  Rr as aE,
+  _r as aF,
+  Or as aG,
+  Gr as aH,
+  jr as aI,
+  Kr as aJ,
+  ha as aK,
+  Jr as aL,
+  ia as aM,
+  Ta as aN,
+  $a as aO,
+  Ds as aP,
+  no as aQ,
+  eo as aR,
+  yr as aS,
+  $r as aT,
+  ao as aU,
+  da as aV,
+  ma as aW,
+  ga as aX,
+  Na as aY,
+  va as aZ,
+  to as a_,
+  la as aa,
+  ua as ab,
+  Za as ac,
+  $t as ad,
+  Rt as ae,
+  Rs as af,
+  xr as ag,
+  Wn as ah,
   x as ai,
   F as aj,
   pe as ak,
@@ -4084,16 +4084,16 @@ export {
   jt as an,
   ue as ao,
   za as ap,
-  _a as aq,
-  er as ar,
-  rr as as,
-  Pa as at,
-  Ar as au,
-  Br as av,
-  Rr as aw,
-  _r as ax,
-  Or as ay,
-  Gr as az,
+  rr as aq,
+  Br as ar,
+  Wr as as,
+  Sa as at,
+  Aa as au,
+  Ra as av,
+  ro as aw,
+  Io as ax,
+  oo as ay,
+  yo as az,
   b,
   Vs as b$,
   $s as b0,
@@ -4212,24 +4212,24 @@ export {
   go as d,
   mo as e,
   K as f,
-  lo as g,
-  T as h,
-  In as i,
-  y as j,
-  xt as k,
-  Ge as l,
+  ss as g,
+  lo as h,
+  T as i,
+  In as j,
+  y as k,
+  xt as l,
   po as m,
-  z as n,
+  Ge as n,
   N as o,
-  q as p,
-  Ba as q,
+  z as p,
+  q,
   co as r,
   tt as s,
   E as t,
-  Ka as u,
+  Ba as u,
   ls as v,
-  qn as w,
-  Ft as x,
-  Wa as y,
+  Ka as w,
+  qn as x,
+  Ft as y,
   C as z
 };

package/dist/{kernel_funcs_utils-C6YBCuOt.js → kernel_funcs_utils-C4eIk4fE.js} RENAMED Viewed

@@ -1,5 +1,5 @@
-import { an as D, ao as N, O as w, n as R, Q as v, L as P } from "./index--6vO-cOz.js";
-import { u as g } from "./gpgpu_math-CUzjlO9A.js";
+import { an as D, ao as N, N as w, p as R, O as v, K as P } from "./index-iNhkcAEQ.js";
+import { u as g } from "./gpgpu_math-C0zyxKFi.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -23,7 +23,7 @@ function B(t) {
     throw new Error(`Failed to decode encoded string bytes into utf-8, error: ${e}`);
   }
 }
-function H(t) {
+function K(t) {
   return t.map((e) => N(e));
 }
 /**
@@ -127,12 +127,12 @@ class C {
  * =============================================================================
  */
 class _ {
-  constructor(e, o, u, d = !1) {
+  constructor(e, o, u, p = !1) {
     this.variableNames = ["A", "B"], this.supportsBroadcasting = !0, this.packedInputs = !0, this.packedOutput = !0, this.outputShape = w(o, u);
     const a = this.outputShape.length;
     this.enableShapeUniforms = g(a);
     let n = "";
-    if (d)
+    if (p)
       if (a === 0 || R(this.outputShape) === 1)
         n = `
           result.y = 0.;
@@ -225,7 +225,7 @@ function A(t) {
  * =============================================================================
  */
 function G(t) {
-  const { inputs: e, backend: o } = t, { real: u, imag: d } = e, a = o.makeTensorInfo(u.shape, "complex64"), n = o.texData.get(a.dataId), l = A({ inputs: { x: u }, backend: o }), s = A({ inputs: { x: d }, backend: o });
+  const { inputs: e, backend: o } = t, { real: u, imag: p } = e, a = o.makeTensorInfo(u.shape, "complex64"), n = o.texData.get(a.dataId), l = A({ inputs: { x: u }, backend: o }), s = A({ inputs: { x: p }, backend: o });
   return n.complexTensorInfos = { real: l, imag: s }, a;
 }
 /**
@@ -260,7 +260,7 @@ class V {
     `;
   }
 }
-const K = "if (isnan(x)) return x;";
+const H = "if (isnan(x)) return x;";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -310,8 +310,8 @@ class L {
  * =============================================================================
  */
 function Y({ opSnippet: t, packedOpSnippet: e, cpuKernelImpl: o, dtype: u }) {
-  return ({ inputs: d, backend: a }) => {
-    const { x: n } = d, l = a, s = u || n.dtype;
+  return ({ inputs: p, backend: a }) => {
+    const { x: n } = p, l = a, s = u || n.dtype;
     if (l.shouldExecuteOnCPU([n]) && o != null) {
       const c = l.texData.get(n.dataId), x = o(c.values, s);
       return l.makeTensorInfo(n.shape, s, x);
@@ -321,7 +321,7 @@ function Y({ opSnippet: t, packedOpSnippet: e, cpuKernelImpl: o, dtype: u }) {
     return i ? r = new L(n.shape, e) : r = new V(n.shape, t), l.runWebGLProgram(r, [n], s);
   };
 }
-function Q({ opSnippet: t, packedOpSnippet: e, checkOutOfBounds: o = !1, supportsComplex: u = !1, cpuKernelImpl: d, dtype: a }) {
+function j({ opSnippet: t, packedOpSnippet: e, checkOutOfBounds: o = !1, supportsComplex: u = !1, cpuKernelImpl: p, dtype: a }) {
   return ({ inputs: n, backend: l }) => {
     const { a: s, b: i } = n, r = l;
     if (u && s.dtype === "complex64") {
@@ -329,29 +329,29 @@ function Q({ opSnippet: t, packedOpSnippet: e, checkOutOfBounds: o = !1, support
         [h.complexTensorInfos.real, f.complexTensorInfos.real],
         [h.complexTensorInfos.imag, f.complexTensorInfos.imag]
       ].map((S) => {
-        const [p, m] = S, $ = {
-          dataId: p.dataId,
-          dtype: p.dtype,
+        const [d, m] = S, $ = {
+          dataId: d.dataId,
+          dtype: d.dtype,
           shape: s.shape
         }, T = {
           dataId: m.dataId,
           dtype: m.dtype,
           shape: i.shape
         }, U = new C(t, s.shape, i.shape);
-        return r.runWebGLProgram(U, [$, T], v(p.dtype, m.dtype));
+        return r.runWebGLProgram(U, [$, T], v(d.dtype, m.dtype));
       }), I = G({ inputs: { real: O, imag: y }, backend: r });
       return r.disposeIntermediateTensorInfo(O), r.disposeIntermediateTensorInfo(y), I;
     }
     const c = a || v(s.dtype, i.dtype);
-    if ((s.dtype === "string" || i.dtype === "string" || r.shouldExecuteOnCPU([s, i])) && d != null) {
+    if ((s.dtype === "string" || i.dtype === "string" || r.shouldExecuteOnCPU([s, i])) && p != null) {
       const h = r.texData.get(s.dataId).values, f = r.texData.get(i.dataId).values, O = s.dtype === "string" ? (
         // tslint:disable-next-line: no-any
         B(h)
       ) : h, y = s.dtype === "string" ? (
         // tslint:disable-next-line: no-any
         B(f)
-      ) : f, [I, S] = d(s.shape, i.shape, O, y, c), p = r.makeTensorInfo(S, c), m = r.texData.get(p.dataId);
-      return m.values = I, p;
+      ) : f, [I, S] = p(s.shape, i.shape, O, y, c), d = r.makeTensorInfo(S, c), m = r.texData.get(d.dataId);
+      return m.values = I, d;
     }
     const x = P().getBool("WEBGL_PACK_BINARY_OPERATIONS") && e != null;
     let b;
@@ -359,10 +359,10 @@ function Q({ opSnippet: t, packedOpSnippet: e, checkOutOfBounds: o = !1, support
   };
 }
 export {
-  K as C,
-  H as a,
+  H as C,
+  K as a,
   E as b,
-  Q as c,
+  j as c,
   B as f,
   k as g,
   Y as u

package/dist/layers/BaseLayer.d.ts CHANGED Viewed

@@ -1,9 +1,9 @@
 import { GPTConfig } from '../config';
 import { default as MemoryProfiler } from '../utilities/profile';
 import { default as RoPECache } from './RoPECache';
+import { Tensor, Variable } from '@tensorflow/tfjs-core';
 export interface LayerConfig {
-    checkpointAttention?: boolean;
-    checkpointMLP?: boolean;
+    checkpointing?: boolean;
     profiler?: MemoryProfiler;
     ropeCache?: RoPECache;
 }
@@ -11,10 +11,34 @@ export interface GPTLayerConfig {
     gpt: GPTConfig;
     layerConfig: LayerConfig;
 }
-export default abstract class BaseLayer {
+export interface ForwardAttributes {
+    training: boolean;
+}
+export default abstract class BaseLayer<ATTR extends ForwardAttributes = ForwardAttributes> {
+    readonly parent?: BaseLayer;
     readonly config: GPTLayerConfig;
-    constructor(config: GPTLayerConfig);
+    private _variables;
+    private _trainable;
+    readonly children: BaseLayer[];
+    constructor(config: GPTLayerConfig, parent?: BaseLayer);
     getProfiler(): MemoryProfiler | undefined;
     startMemory(): void;
     endMemory(label: string): void;
+    addVariable(name: string, variable?: Variable): void;
+    get variables(): Variable[];
+    get trainableVariables(): Variable[];
+    get trainable(): boolean;
+    set trainable(value: boolean);
+    getVariable(name: string): Variable;
+    hasVariable(name: string): boolean;
+    setVariable(name: string, variable: Variable): void;
+    saveWeights(map: Map<string, Tensor[]>): void;
+    loadWeights(weights: Map<string, Tensor[]>): void;
+    dispose(): void;
+    protected build(): void;
+    protected dropout(x: Tensor): Tensor;
+    abstract forward(attrs: ATTR, ...x: Tensor[]): Tensor | Tensor[];
+    call(attrs: ATTR, ...x: Tensor[]): Tensor | Tensor[];
+    callCheckpoint(attrs: ATTR, ...x: Tensor[]): Tensor;
+    private checkpointingFn;
 }

package/dist/layers/BaseLayer.js CHANGED Viewed

@@ -1,18 +1,5 @@
-class o {
-  config;
-  constructor(r) {
-    this.config = r;
-  }
-  getProfiler() {
-    return this.config.layerConfig.profiler;
-  }
-  startMemory() {
-    this.config.layerConfig.profiler?.startMemory();
-  }
-  endMemory(r) {
-    this.config.layerConfig.profiler?.endMemory(r);
-  }
-}
+import "../index-iNhkcAEQ.js";
+import { B as a } from "../BaseLayer-BhrMN8JO.js";
 export {
-  o as default
+  a as default
 };

package/dist/layers/CausalSelfAttention.d.ts CHANGED Viewed

@@ -1,38 +1,36 @@
-import { default as BaseLayer, GPTLayerConfig } from './BaseLayer';
-import { Tensor, Variable } from '@tensorflow/tfjs-core';
+import { default as BaseLayer, ForwardAttributes, GPTLayerConfig } from './BaseLayer';
+import { Tensor } from '@tensorflow/tfjs-core';
 export type KVCache = {
-    k: Tensor;
-    v: Tensor;
+    k?: Tensor;
+    v?: Tensor;
     length: number;
     cumulativeLength: number;
 };
-export default class CausalSelfAttention extends BaseLayer {
-    private cAttn;
-    private cProj;
-    private bias;
-    private maskInf;
+export interface AttentionScores {
+    head: number;
+    block: number;
+    attentionOut?: Tensor;
+}
+interface AttentionForwardAttributes extends ForwardAttributes {
+    attentionScores?: AttentionScores;
+    pastKV?: KVCache;
+    seed?: number;
+}
+export default class CausalSelfAttention extends BaseLayer<AttentionForwardAttributes> {
     private divisor;
     private index;
-    private _trainable;
     private units;
     private projUnits;
-    constructor(index: number, config: GPTLayerConfig);
-    private build;
-    get variables(): Variable[];
-    get trainable(): boolean;
-    set trainable(value: boolean);
-    saveWeights(map: Map<string, Tensor[]>): void;
-    loadWeights(weights: Map<string, Tensor[]>): void;
+    private ATTN;
+    private PROJ;
+    constructor(index: number, config: GPTLayerConfig, parent?: BaseLayer);
+    protected build(): void;
     private getAttentionScores;
     private getAttentionScoresWithPast;
     private getQKV;
     private getOutputProjection;
     private updateCache;
-    private forward;
-    call(x: Tensor, training?: boolean, includeAttention?: boolean, pastKV?: KVCache): {
-        output: Tensor;
-        attention?: Tensor;
-        presentKV?: KVCache;
-    };
-    dispose(): void;
+    forward(attr: AttentionForwardAttributes, x: Tensor): Tensor;
+    protected dropout(x: Tensor): Tensor;
 }
+export {};