npm - @genai-fi/nanogpt - Versions diffs - 0.2.9 → 0.2.11 - Mend

@genai-fi/nanogpt 0.2.9 → 0.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

package/dist/Generator.d.ts +2 -0
package/dist/Generator.js +37 -32
package/dist/NanoGPTModel.d.ts +4 -1
package/dist/NanoGPTModel.js +33 -25
package/dist/TeachableLLM.d.ts +4 -0
package/dist/TeachableLLM.js +32 -15
package/dist/{complex-Cd8sqiBC.js → complex-CJ-qCcLB.js} +6 -6
package/dist/{index-Dsg28SG6.js → index-YPKosni4.js} +59 -51
package/dist/layers/BaseLayer.d.ts +8 -0
package/dist/layers/BaseLayer.js +18 -0
package/dist/layers/CausalSelfAttention.d.ts +4 -1
package/dist/layers/CausalSelfAttention.js +47 -55
package/dist/layers/MLP.d.ts +2 -1
package/dist/layers/MLP.js +16 -14
package/dist/layers/RMSNorm.d.ts +2 -1
package/dist/layers/RMSNorm.js +13 -11
package/dist/layers/RoPECache.d.ts +4 -2
package/dist/layers/RoPECache.js +13 -7
package/dist/layers/TiedEmbedding.js +16 -15
package/dist/layers/TransformerBlock.d.ts +4 -1
package/dist/layers/TransformerBlock.js +9 -5
package/dist/main.js +18 -16
package/dist/{mat_mul-BAYDrXvE.js → mat_mul-Bu7bhLms.js} +5 -5
package/dist/ops/attentionMask.js +31 -25
package/dist/ops/gatherSub.js +2 -2
package/dist/ops/node/sparseCrossEntropy.js +1 -1
package/dist/ops/qkv.d.ts +7 -0
package/dist/ops/qkv.js +127 -0
package/dist/ops/rope.d.ts +8 -0
package/dist/ops/rope.js +153 -0
package/dist/ops/scatterSub.js +14 -14
package/dist/reshape-DmnmKT6r.js +25 -0
package/dist/{stack-1o648CP_.js → stack-BtKpB0Ry.js} +5 -5
package/dist/sum-D7fu15XL.js +27 -0
package/dist/training/AdamExt.js +1 -1
package/dist/training/Trainer.js +30 -29
package/dist/training/sparseCrossEntropy.js +34 -33
package/dist/utilities/profile.d.ts +10 -0
package/dist/utilities/profile.js +29 -0
package/package.json +1 -1
package/dist/sum-NWazHI7f.js +0 -49

package/dist/Generator.d.ts CHANGED Viewed

@@ -8,10 +8,12 @@ export interface IGenerateOptions extends GenerateOptions {
 export default class Generator extends EE<'start' | 'stop' | 'tokens'> {
     private readonly model;
     private readonly tokeniser;
+    private active;
     constructor(model: NanoGPT, tokeniser: ITokeniser);
     private tokenisePrompt;
     private generateNoCache;
     private processResponse;
     private generateCache;
     generate(prompt?: string, options?: IGenerateOptions): Promise<string>;
+    stop(): void;
 }

package/dist/Generator.js CHANGED Viewed

@@ -1,65 +1,70 @@
 import { E as u } from "./index-Dwqa6Zy2.js";
-class p extends u {
+class f extends u {
   constructor(s, e) {
     super(), this.model = s, this.tokeniser = e;
   }
+  active = !1;
   async tokenisePrompt(s) {
     const e = s ? await this.tokeniser.tokenise([s], !0) : [[this.tokeniser.eosToken]];
     return this.model.tf.tensor2d(e, [1, e[0].length], "int32");
   }
   async generateNoCache(s, e) {
-    let t = await this.tokenisePrompt(s), n = s || "";
-    const a = e?.maxLength ?? 1e3;
-    for (let i = 0; i < a; i++) {
+    let t = await this.tokenisePrompt(s), i = s || "";
+    const o = e?.maxLength ?? 1e3;
+    for (let a = 0; a < o && this.active; a++) {
       const {
-        output: o,
+        output: n,
         attention: c,
-        probabilities: h
-      } = this.model.generate(t, void 0, e), l = t;
-      t = this.model.tf.concat([t, o], 1), l.dispose();
-      const r = await this.processResponse(o, c, h);
-      if (o.dispose(), r === null)
+        probabilities: l
+      } = this.model.generate(t, void 0, e), h = t;
+      t = this.model.tf.concat([t, n], 1), h.dispose();
+      const r = await this.processResponse(n, c, l);
+      if (n.dispose(), r === null)
         break;
-      n += r;
+      i += r;
     }
-    return t.dispose(), n;
+    return t.dispose(), i;
   }
   async processResponse(s, e, t) {
-    const n = (await s.array())[0][0];
-    if (n === this.tokeniser.eosToken)
+    const i = (await s.array())[0][0];
+    if (i === this.tokeniser.eosToken)
       return null;
-    const a = await this.tokeniser.decode([n]);
-    let i;
-    e && (i = await e.array(), e.dispose());
-    let o;
-    return t && (o = await t.array(), t.dispose()), this.emit("tokens", [n], a, i, o), a;
+    const o = await this.tokeniser.decode([i]);
+    let a;
+    e && (a = await e.array(), e.dispose());
+    let n;
+    return t && (n = await t.array(), t.dispose()), this.emit("tokens", [i], o, a, n), o;
   }
   async generateCache(s, e) {
-    let t = await this.tokenisePrompt(s), n = s || "";
-    const a = new Array(this.model.config.nLayer).fill(void 0), i = e?.maxLength ?? 1e3;
-    for (let o = 0; o < i; o++) {
+    let t = await this.tokenisePrompt(s), i = s || "";
+    const o = new Array(this.model.config.nLayer).fill(void 0), a = e?.maxLength ?? 1e3;
+    for (let n = 0; n < a && this.active; n++) {
       const {
         output: c,
-        attention: h,
-        probabilities: l
-      } = this.model.generate(t, a, {
+        attention: l,
+        probabilities: h
+      } = this.model.generate(t, o, {
         ...e,
         usePadding: !1
       });
       t.dispose(), t = c;
-      const r = await this.processResponse(c, h, l);
+      const r = await this.processResponse(c, l, h);
       if (r === null)
         break;
-      n += r;
+      i += r;
     }
-    return t.dispose(), n;
+    return t.dispose(), i;
   }
   async generate(s, e) {
-    this.emit("start");
-    const t = this.model.config.useRope && !e?.noCache ? this.generateCache(s, e) : this.generateNoCache(s, e);
-    return this.emit("stop"), t;
+    const t = s && s.length > this.model.config.blockSize ? s.slice(-this.model.config.blockSize) : s;
+    this.active = !0, this.emit("start");
+    const o = await (this.model.config.useRope && !e?.noCache ? this.generateCache(t, e) : this.generateNoCache(t, e));
+    return this.active = !1, this.emit("stop"), o;
+  }
+  stop() {
+    this.active = !1;
   }
 }
 export {
-  p as default
+  f as default
 };

package/dist/NanoGPTModel.d.ts CHANGED Viewed

@@ -1,6 +1,8 @@
 import { default as TF } from '@tensorflow/tfjs';
 import { GPTConfig } from './config';
 import { KVCache } from './layers/CausalSelfAttention';
+import { default as MemoryProfiler } from './utilities/profile';
+import { default as BaseLayer } from './layers/BaseLayer';
 export interface TrainingLogEntry {
     loss: number;
     valLoss?: number;
@@ -16,7 +18,7 @@ export interface GenerateOptions {
     includeAttention?: boolean;
     includeProbabilities?: boolean;
 }
-export default class NanoGPT {
+export default class NanoGPT extends BaseLayer {
     readonly config: GPTConfig;
     private wte;
     private wpe?;
@@ -34,6 +36,7 @@ export default class NanoGPT {
     setSkipMask(mask: boolean[]): void;
     setTrainableMask(mask: boolean[]): void;
     set trainable(value: boolean);
+    setProfiler(value: MemoryProfiler | undefined): void;
     private validateInput;
     private calculateLoss;
     private computeAttentionRollout;

package/dist/NanoGPTModel.js CHANGED Viewed

@@ -1,11 +1,12 @@
-import { defaultConfig as $ } from "./config.js";
+import { defaultConfig as v } from "./config.js";
 import z from "./layers/TransformerBlock.js";
 import S from "./layers/TiedEmbedding.js";
-import I from "./layers/RoPECache.js";
-import _ from "./layers/RMSNorm.js";
-import { estimateParameterCount as W } from "./utilities/parameters.js";
-import { createSoftmaxCrossEntropyWithGrad as C } from "./training/sparseCrossEntropy.js";
-class K {
+import _ from "./layers/RoPECache.js";
+import I from "./layers/RMSNorm.js";
+import { estimateParameterCount as F } from "./utilities/parameters.js";
+import { createSoftmaxCrossEntropyWithGrad as L } from "./training/sparseCrossEntropy.js";
+import P from "./layers/BaseLayer.js";
+class A extends P {
   config;
   wte;
   // Token embeddings
@@ -21,7 +22,7 @@ class K {
   log = [];
   // Training log
   constructor(t, e = {}) {
-    this.tf = t, this.config = { ...$, ...e }, this.wte = new S(t, {
+    super(), this.tf = t, this.config = { ...v, ...e }, this.wte = new S(t, {
       vocabSize: this.config.vocabSize,
       embedDim: this.config.nEmbed,
       name: "token_embedding"
@@ -30,10 +31,10 @@ class K {
       outputDim: this.config.nEmbed,
       name: "positional_embedding",
       embeddingsInitializer: this.tf.initializers.randomNormal({ mean: 0, stddev: 0.02 })
-    }) : this.ropeCache = new I(t, this.config), this.drop = this.tf.layers.dropout({ rate: this.config.dropout }), this.blocks = [];
+    }) : this.ropeCache = new _(t, this.config), this.drop = this.tf.layers.dropout({ rate: this.config.dropout }), this.blocks = [];
     for (let o = 0; o < this.config.nLayer; o++)
       this.blocks.push(new z(this.tf, o, this.config, this.ropeCache));
-    this.lnF = new _(t, [this.config.nEmbed], 1e-8, "final_rms_norm");
+    this.lnF = new I(t, [this.config.nEmbed], 1e-8, "final_rms_norm");
   }
   get variables() {
     return [
@@ -86,6 +87,12 @@ class K {
       e.trainable = t;
     this.lnF.trainable = t;
   }
+  setProfiler(t) {
+    this._profiler = t;
+    for (const e of this.blocks)
+      e.setProfiler(t);
+    this.lnF.setProfiler(t);
+  }
   validateInput(t) {
     if (t.shape.length !== 2)
       throw new Error(`Invalid input shape: expected [batch_size, sequence_length], got ${t.shape}`);
@@ -96,7 +103,7 @@ class K {
   }
   calculateLoss(t, e) {
     try {
-      return C()(t, e).mean();
+      return L()(t, e).mean();
     } catch (o) {
       throw console.error("Error computing loss:", o), new Error(`Loss computation failed: ${o}`);
     }
@@ -139,24 +146,25 @@ class K {
   }
   forward(t, e, o = !1, i = !1, s) {
     return this.validateInput(t), this.tf.tidy(() => {
+      this.startMemory();
       const l = s?.[0]?.length ?? 0;
       let r = this.inputPhase(t, l, o);
       const n = [];
       if (s && s.length !== this.blocks.length)
         throw console.error("Cache", s), new Error(`Cache length ${s.length} does not match number of blocks ${this.blocks.length}`);
       for (let a = 0; a < this.blocks.length; a++) {
-        const d = this.blocks[a], {
-          output: g,
-          attention: u,
+        const d = r, g = this.blocks[a], {
+          output: m,
+          attention: b,
           cache: f
-        } = d.call(r, o, i, s ? s[a] : void 0);
-        r = g, i && u && n.push(u), s && f ? (s[a]?.k.dispose(), s[a]?.v.dispose(), s[a] = f) : f && (f.k.dispose(), f.v.dispose());
+        } = g.call(r, o, i, s ? s[a] : void 0);
+        r = m, d.dispose(), i && b && n.push(b), s && f ? (s[a]?.k.dispose(), s[a]?.v.dispose(), s[a] = f) : f && (f.k.dispose(), f.v.dispose());
       }
       let h;
       i && n.length > 0 && (h = this.computeAttentionRollout(n)), r = this.lnF.apply(r);
       const c = this.wte.project(r);
       let p;
-      return e && (p = this.calculateLoss(c, e)), { logits: c, loss: p, attention: i ? h : void 0 };
+      return e && (p = this.calculateLoss(c, e)), this.endMemory("Forward"), { logits: c, loss: p, attention: i ? h : void 0 };
     });
   }
   generate(t, e, o) {
@@ -168,24 +176,24 @@ class K {
       ), p = l ? this.config.blockSize - c.shape[1] : 0, a = p > 0 ? this.tf.pad(c, [
         [0, 0],
         [0, p]
-      ]) : c, { logits: d, attention: g } = this.forward(a, void 0, !1, r, e), u = d.shape[1] - 1 - p, f = d.slice([0, u, 0], [d.shape[0], 1, d.shape[2]]), w = g ? g.slice([0, u, 0], [g.shape[0], 1, g.shape[2]]) : void 0, b = f.div(i);
-      let m;
+      ]) : c, { logits: d, attention: g } = this.forward(a, void 0, !1, r, e), m = d.shape[1] - 1 - p, b = d.slice([0, m, 0], [d.shape[0], 1, d.shape[2]]), f = g ? g.slice([0, m, 0], [g.shape[0], 1, g.shape[2]]) : void 0, k = b.div(i);
+      let u;
       if (s) {
-        const { values: v, indices: y } = this.tf.topk(b, s), E = this.tf.multinomial(v.squeeze([1]), 1);
-        m = this.tf.gather(y.squeeze([1]), E, 1);
+        const { values: y, indices: E } = this.tf.topk(k, s), $ = this.tf.multinomial(y.squeeze([1]), 1);
+        u = this.tf.gather(E.squeeze([1]), $, 1);
       } else
-        m = this.tf.multinomial(b.squeeze([1]), 1);
-      let k;
-      return o?.includeProbabilities && (k = this.tf.softmax(b.squeeze([1]))), m = m.reshape([1, 1]), { output: m, attention: w?.squeeze([1]), probabilities: k };
+        u = this.tf.multinomial(k.squeeze([1]), 1);
+      let w;
+      return o?.includeProbabilities && (w = this.tf.softmax(k.squeeze([1]))), u = u.reshape([1, 1]), { output: u, attention: f?.squeeze([1]), probabilities: w };
     });
   }
   getNumParams() {
-    return W(this.config);
+    return F(this.config);
   }
   dispose() {
     this.wte.dispose(), this.wpe && this.wpe.dispose(), this.drop.dispose(), this.blocks.forEach((t) => t.dispose()), this.lnF.dispose();
   }
 }
 export {
-  K as default
+  A as default
 };

package/dist/TeachableLLM.d.ts CHANGED Viewed

@@ -6,6 +6,7 @@ import { SaveOptions } from './utilities/save';
 import { default as Generator, IGenerateOptions } from './Generator';
 import { default as Trainer, ITrainerOptions } from './Trainer';
 import { default as EE } from 'eventemitter3';
+import { default as MemoryProfiler } from './utilities/profile';
 type TeachableLLMStatus = 'warmup' | 'awaitingTokens' | 'ready' | 'training' | 'loading' | 'busy' | 'error';
 export default class TeachableLLM extends EE<'status' | 'error' | 'trainStep'> {
     private _config?;
@@ -23,6 +24,9 @@ export default class TeachableLLM extends EE<'status' | 'error' | 'trainStep'> {
     saveModel(options?: SaveOptions): Promise<Blob>;
     static loadModel(tf: typeof TF, data: Blob | Buffer | string): TeachableLLM;
     static create(tf: typeof TF, config?: Partial<GPTConfig>): TeachableLLM;
+    getProfiler(): MemoryProfiler | undefined;
+    get enableProfiler(): boolean;
+    set enableProfiler(value: boolean);
     getNumParams(): number;
     trainer(): Trainer;
     train(text: string[], options?: ITrainerOptions): Promise<void>;

package/dist/TeachableLLM.js CHANGED Viewed

@@ -1,11 +1,11 @@
-import { defaultConfig as d } from "./config.js";
+import { defaultConfig as h } from "./config.js";
 import m from "./NanoGPTModel.js";
-import { saveModel as u } from "./utilities/save.js";
-import { loadModel as l } from "./utilities/load.js";
-import f from "./Generator.js";
+import { saveModel as d } from "./utilities/save.js";
+import { loadModel as f } from "./utilities/load.js";
+import u from "./Generator.js";
 import _ from "./Trainer.js";
 import { E as c } from "./index-Dwqa6Zy2.js";
-import { dummyPassAsync as h } from "./utilities/dummy.js";
+import { dummyPassAsync as l } from "./utilities/dummy.js";
 import g from "./tokeniser/CharTokeniser.js";
 import "./papaparse.min-C8l2Kvo1.js";
 import "./index-Tf7vU29b.js";
@@ -13,6 +13,9 @@ import "./jszip.min-CjP2V1VV.js";
 import "./ops/scatterSub.js";
 import "./ops/gatherSub.js";
 import "./ops/attentionMask.js";
+import "./ops/qkv.js";
+import "./ops/rope.js";
+import p from "./utilities/profile.js";
 class a extends c {
   _config;
   _model;
@@ -49,23 +52,23 @@ class a extends c {
   saveModel(t) {
     if (!this._model || !this._tokeniser)
       throw new Error("Model or tokeniser is not initialized.");
-    return u(this._model, this._tokeniser, t);
+    return d(this._model, this._tokeniser, t);
   }
   static loadModel(t, r) {
     const e = new a(t);
-    return l(t, r).then(({ model: s, tokeniser: o }) => {
-      e._model = s, e._tokeniser = o, e._config = s.config, e.setStatus("warmup"), h(s).then(() => {
+    return f(t, r).then(({ model: o, tokeniser: s }) => {
+      e._model = o, e._tokeniser = s, e._config = o.config, e.setStatus("warmup"), l(o).then(() => {
         e.setStatus("ready");
       }).catch((i) => {
         e.setStatus("error"), e.emit("error", i);
       });
-    }).catch((s) => {
-      e.setStatus("error"), e.emit("error", s);
+    }).catch((o) => {
+      e.setStatus("error"), e.emit("error", o);
     }), e;
   }
   static create(t, r = {}) {
-    const e = { ...d, ...r }, s = new g(e.vocabSize), o = new m(t, e), i = new a(t, s, o);
-    return i.setStatus("warmup"), h(o).then(() => {
+    const e = { ...h, ...r }, o = new g(e.vocabSize), s = new m(t, e), i = new a(t, o, s);
+    return i.setStatus("warmup"), l(s).then(() => {
       i.tokeniser.trained ? i.setStatus("ready") : (i.setStatus("awaitingTokens"), i.tokeniser.once("trainStatus", (n) => {
         n === "trained" && i.setStatus("ready");
       }));
@@ -73,6 +76,20 @@ class a extends c {
       i.setStatus("error"), i.emit("error", n);
     }), i;
   }
+  getProfiler() {
+    return this._model?.getProfiler();
+  }
+  get enableProfiler() {
+    return !!this._model?.getProfiler();
+  }
+  set enableProfiler(t) {
+    if (t) {
+      if (!this._model)
+        throw new Error("Model is not initialized.");
+      this._model.getProfiler() || this._model.setProfiler(new p());
+    } else
+      this._model && this._model.setProfiler(void 0);
+  }
   getNumParams() {
     if (!this._model)
       throw new Error("Model is not initialized.");
@@ -84,8 +101,8 @@ class a extends c {
     const t = new _(this._model, this._tokeniser);
     return t.on("start", () => this.setStatus("training")), t.on("stop", () => this.setStatus("ready")), t.on("log", async (r) => {
       const e = this.listeners("trainStep");
-      for (const s of e)
-        await s(r);
+      for (const o of e)
+        await o(r);
     }), t;
   }
   train(t, r) {
@@ -94,7 +111,7 @@ class a extends c {
   generator() {
     if (!this._model || !this._tokeniser)
       throw new Error("Model or tokeniser is not initialized.");
-    const t = new f(this._model, this._tokeniser);
+    const t = new u(this._model, this._tokeniser);
     return t.on("start", () => {
       this.status === "ready" && this.setStatus("busy");
     }), t.on("stop", () => {

package/dist/{complex-Cd8sqiBC.js → complex-CJ-qCcLB.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o as t, c as s, f as n, E as m, C as r } from "./index-Dsg28SG6.js";
+import { o as c, d as s, g as n, E as m, C as r } from "./index-YPKosni4.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -15,13 +15,13 @@ import { o as t, c as s, f as n, E as m, C as r } from "./index-Dsg28SG6.js";
  * limitations under the License.
  * =============================================================================
  */
-function l(o, c) {
-  const a = s(o, "real", "complex"), e = s(c, "imag", "complex");
+function l(o, p) {
+  const a = s(o, "real", "complex"), e = s(p, "imag", "complex");
   n(a.shape, e.shape, `real and imag shapes, ${a.shape} and ${e.shape}, must match in call to tf.complex().`);
-  const p = { real: a, imag: e };
-  return m.runKernel(r, p);
+  const t = { real: a, imag: e };
+  return m.runKernel(r, t);
 }
-const i = /* @__PURE__ */ t({ complex_: l });
+const i = /* @__PURE__ */ c({ complex_: l });
 export {
   i as c
 };

package/dist/{index-Dsg28SG6.js → index-YPKosni4.js} RENAMED Viewed

@@ -383,7 +383,7 @@ function _t(n, t) {
     return e.set(n, s), e.get(n);
   }
 }
-const Ge = "Abs", ne = "Add", Es = "BatchMatMul", se = "Cast", As = "Complex", ze = "ComplexAbs", We = "RealDiv", Bs = "Elu", vs = "Exp", je = "Fill", Ke = "FloorDiv", Ms = "GatherNd", re = "Identity", Fs = "Imag", $s = "LeakyRelu", Rs = "Log", xs = "Max", Ve = "Maximum", qe = "Multiply", Ns = "Neg", Ds = "Pack", He = "Pow", Cs = "Prelu", _s = "Range", Ps = "Real", Os = "Relu", Ls = "Reshape", Us = "Relu6", Gs = "ScatterNd", zs = "Sigmoid", Je = "Sqrt", Ws = "Sum", js = "Softmax", Xe = "Sub", Ks = "Transpose", Ye = "ZerosLike", Vs = "Step", qs = "_FusedMatMul";
+const Ge = "Abs", ne = "Add", Es = "BatchMatMul", se = "Cast", As = "Complex", ze = "ComplexAbs", Bs = "Concat", We = "RealDiv", vs = "Elu", Ms = "Exp", je = "Fill", Ke = "FloorDiv", Fs = "GatherV2", $s = "GatherNd", re = "Identity", Rs = "Imag", xs = "LeakyRelu", Ns = "Log", Ds = "Max", Ve = "Maximum", qe = "Multiply", Cs = "Neg", _s = "Pack", He = "Pow", Ps = "Prelu", Os = "Range", Ls = "Real", Us = "Relu", Gs = "Reshape", zs = "Relu6", Ws = "ScatterNd", js = "Sigmoid", Je = "Sqrt", Ks = "Sum", Vs = "SplitV", qs = "Softmax", Xe = "Sub", Hs = "Transpose", Ye = "ZerosLike", Js = "Step", Xs = "_FusedMatMul";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -438,11 +438,11 @@ function Wt(n) {
   }
   return e;
 }
-function Hs(n) {
+function Ys(n) {
   const { kernelName: t, backendName: e } = n, s = ie(t, e);
   ht.has(s) && O(`The kernel '${t}' for backend '${e}' is already registered`), ht.set(s, n);
 }
-function Js(n) {
+function Qs(n) {
   const { kernelName: t } = n;
   It.has(t) && S().getBool("DEBUG") && O(`Overriding the gradient for '${t}'`), It.set(t, n);
 }
@@ -1902,7 +1902,7 @@ function I(n, t, e, s = "numeric") {
   const a = r !== "string" ? ae(n, r) : at(n, [], !0);
   return g.makeTensor(a, i, r);
 }
-function Xs(n, t, e, s = "numeric") {
+function Zs(n, t, e, s = "numeric") {
   if (!Array.isArray(n))
     throw new Error(`Argument ${t} passed to ${e} must be a \`Tensor[]\` or \`TensorLike[]\``);
   return n.map((i, o) => I(i, `${t}[${o}]`, e, s));
@@ -2065,9 +2065,12 @@ function Sn(n, t) {
  * limitations under the License.
  * =============================================================================
  */
-function Ys() {
+function tr() {
   return g;
 }
+function er() {
+  return g.memory();
+}
 function E(n, t) {
   return g.tidy(n, t);
 }
@@ -2890,7 +2893,7 @@ function Yn(n, t, e) {
  * limitations under the License.
  * =============================================================================
  */
-function Qs(n, t) {
+function nr(n, t) {
   const e = [];
   for (let s = 0; s < t.length; s++) {
     const r = n[n.length - s - 1], i = t.length - s - 1, o = t[i];
@@ -3058,7 +3061,7 @@ function ss(n, t) {
     a[u] != null && (c[l.name] = a[u]);
   }), s?.forEach((l) => c[l.name] = null), { value: o, grads: c };
 }
-function Zs(n) {
+function sr(n) {
   return g.customGrad(n);
 }
 /**
@@ -3838,54 +3841,59 @@ function bs() {
  */
 bs();
 export {
+  Qn as $,
   ds as A,
   Es as B,
   As as C,
-  zs as D,
+  w as D,
   g as E,
-  Bs as F,
-  Ms as G,
-  $s as H,
-  Fs as I,
-  Cs as J,
-  Ps as K,
-  Rs as L,
-  xs as M,
-  Ns as N,
-  Os as O,
-  Ds as P,
-  Us as Q,
-  _s as R,
-  Ws as S,
-  Vs as T,
-  Ks as U,
-  Qs as V,
-  Qn as W,
-  qs as _,
-  Z as a,
-  Js as b,
-  I as c,
-  V as d,
-  Ys as e,
-  Is as f,
-  Xs as g,
-  y as h,
-  Ls as i,
-  $t as j,
-  Dt as k,
-  Zt as l,
-  p as m,
-  G as n,
+  qs as F,
+  $s as G,
+  sr as H,
+  E as I,
+  C as J,
+  js as K,
+  Ns as L,
+  Ds as M,
+  vs as N,
+  Rs as O,
+  _s as P,
+  xs as Q,
+  Gs as R,
+  Ks as S,
+  Cs as T,
+  Ps as U,
+  Ls as V,
+  Us as W,
+  zs as X,
+  Js as Y,
+  Hs as Z,
+  nr as _,
+  p as a,
+  Xs as a0,
+  Z as b,
+  Qs as c,
+  I as d,
+  tr as e,
+  V as f,
+  Is as g,
+  $t as h,
+  Vs as i,
+  Os as j,
+  Zs as k,
+  y as l,
+  er as m,
+  Gn as n,
   F as o,
-  De as p,
-  Gs as q,
-  Hs as r,
+  Bs as p,
+  Fs as q,
+  Ys as r,
   K as s,
-  vs as t,
-  Ts as u,
-  w as v,
-  js as w,
-  Zs as x,
-  E as y,
-  C as z
+  Dt as t,
+  Zt as u,
+  G as v,
+  De as w,
+  Ws as x,
+  Ms as y,
+  Ts as z
 };

package/dist/layers/BaseLayer.d.ts ADDED Viewed

@@ -0,0 +1,8 @@
+import { default as MemoryProfiler } from '../utilities/profile';
+export default abstract class BaseLayer {
+    protected _profiler?: MemoryProfiler;
+    getProfiler(): MemoryProfiler | undefined;
+    setProfiler(value: MemoryProfiler | undefined): void;
+    startMemory(): void;
+    endMemory(label: string): void;
+}

package/dist/layers/BaseLayer.js ADDED Viewed

@@ -0,0 +1,18 @@
+class t {
+  _profiler;
+  getProfiler() {
+    return this._profiler;
+  }
+  setProfiler(r) {
+    this._profiler = r;
+  }
+  startMemory() {
+    this._profiler?.startMemory();
+  }
+  endMemory(r) {
+    this._profiler?.endMemory(r);
+  }
+}
+export {
+  t as default
+};

package/dist/layers/CausalSelfAttention.d.ts CHANGED Viewed

@@ -1,13 +1,14 @@
 import { default as TF } from '@tensorflow/tfjs';
 import { GPTConfig } from '../config';
 import { default as RoPECache } from './RoPECache';
+import { default as BaseLayer } from './BaseLayer';
 export type KVCache = {
     k: TF.Tensor;
     v: TF.Tensor;
     length: number;
     cumulativeLength: number;
 };
-export default class CausalSelfAttention {
+export default class CausalSelfAttention extends BaseLayer {
     private readonly ropeCache?;
     private config;
     private cAttn;
@@ -20,7 +21,9 @@ export default class CausalSelfAttention {
     private divisor;
     private index;
     private _trainable;
+    private units;
     constructor(tf: typeof TF, index: number, config: GPTConfig, ropeCache?: RoPECache | undefined);
+    private build;
     get variables(): TF.Variable[];
     get trainable(): boolean;
     set trainable(value: boolean);