npm - @genai-fi/nanogpt - Versions diffs - 0.1.9 → 0.2.1 - Mend

@genai-fi/nanogpt 0.1.9 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/dist/Generator.d.ts +7 -8
package/dist/Generator.js +55 -52
package/dist/NanoGPTModel.d.ts +5 -3
package/dist/NanoGPTModel.js +92 -55
package/dist/TeachableLLM.js +17 -17
package/dist/config.d.ts +1 -0
package/dist/config.js +5 -3
package/dist/layers/CausalSelfAttention.d.ts +12 -2
package/dist/layers/CausalSelfAttention.js +73 -40
package/dist/layers/RMSNorm.d.ts +13 -0
package/dist/layers/RMSNorm.js +32 -0
package/dist/layers/RoPECache.d.ts +16 -0
package/dist/layers/RoPECache.js +44 -0
package/dist/layers/TransformerBlock.d.ts +5 -2
package/dist/layers/TransformerBlock.js +14 -10
package/dist/main.d.ts +1 -0
package/dist/main.js +4 -2
package/dist/utilities/generate.js +14 -14
package/package.json +1 -1

package/dist/Generator.d.ts CHANGED Viewed

@@ -1,18 +1,17 @@
-import { default as NanoGPT } from './NanoGPTModel';
+import { default as NanoGPT, GenerateOptions } from './NanoGPTModel';
 import { ITokeniser } from './tokeniser/type';
 import { default as EE } from 'eventemitter3';
-export interface IGenerateOptions {
+export interface IGenerateOptions extends GenerateOptions {
     maxLength?: number;
-    temperature?: number;
-    topK?: number;
-    usePadding?: boolean;
-    includeAttention?: boolean;
-    includeProbabilities?: boolean;
+    noCache?: boolean;
 }
 export default class Generator extends EE<'start' | 'stop' | 'tokens'> {
     private readonly model;
     private readonly tokeniser;
     constructor(model: NanoGPT, tokeniser: ITokeniser);
-    private generateBlockOfTokens;
+    private tokenisePrompt;
+    private generateNoCache;
+    private processResponse;
+    private generateCache;
     generate(prompt?: string, options?: IGenerateOptions): Promise<string>;
 }

package/dist/Generator.js CHANGED Viewed

@@ -1,62 +1,65 @@
-import { E as m } from "./index-SOhdqzHq.js";
-const b = 4;
-class x extends m {
-  constructor(a, t) {
-    super(), this.model = a, this.tokeniser = t;
+import { E as u } from "./index-SOhdqzHq.js";
+class p extends u {
+  constructor(s, e) {
+    super(), this.model = s, this.tokeniser = e;
   }
-  generateBlockOfTokens(a, t) {
-    const g = t?.temperature ?? 1, c = t?.topK, d = t?.usePadding ?? t?.includeAttention ?? !1, k = t?.includeAttention ?? !1, h = t?.includeProbabilities ?? !1;
-    let i = a, n, s;
-    for (let e = 0; e < b; e++) {
+  async tokenisePrompt(s) {
+    const e = s ? await this.tokeniser.tokenise([s], !0) : [[this.tokeniser.eosToken]];
+    return this.model.tf.tensor2d(e, [1, e[0].length], "int32");
+  }
+  async generateNoCache(s, e) {
+    let t = await this.tokenisePrompt(s), n = s || "";
+    const a = e?.maxLength ?? 1e3;
+    for (let i = 0; i < a; i++) {
       const {
-        output: u,
-        attention: l,
-        probabilities: r
-      } = this.model.generate(i, {
-        temperature: g,
-        topK: c,
-        usePadding: d,
-        includeAttention: k,
-        includeProbabilities: h
-      }), p = i;
-      if (i = this.model.tf.concat([i, u], 1), n && l) {
-        const o = n;
-        n = this.model.tf.concat([n, l], 0), o.dispose();
-      } else l && (n = l);
-      if (s && r) {
-        const o = s;
-        s = this.model.tf.concat([s, r], 0), o.dispose();
-      } else r && (s = r);
-      p.dispose(), u.dispose();
+        output: o,
+        attention: c,
+        probabilities: h
+      } = this.model.generate(t, void 0, e), l = t;
+      t = this.model.tf.concat([t, o], 1), l.dispose();
+      const r = await this.processResponse(o, c, h);
+      if (o.dispose(), r === null)
+        break;
+      n += r;
     }
-    return { output: i, attention: n, probabilities: s };
+    return t.dispose(), n;
   }
-  async generate(a, t) {
-    const g = a ? await this.tokeniser.tokenise([a], !0) : [[this.tokeniser.eosToken]];
-    let c = this.model.tf.tensor2d(g, [1, g[0].length], "int32");
-    this.emit("start");
-    let d = a || "";
-    for (; ; ) {
-      const { output: k, attention: h, probabilities: i } = this.generateBlockOfTokens(c, t), n = c;
-      c = k;
-      const s = k.slice([0, n.shape[1]], [1, b]), e = (await s.array())[0];
-      n.dispose(), s.dispose();
-      let u = !1, l = !1;
-      const r = e.indexOf(this.tokeniser.eosToken);
-      r !== -1 && (u = !0, e.splice(r)), e.length + d.length >= (t?.maxLength ?? 1e3) && (l = !0, e.splice(
-        t?.maxLength ? t.maxLength - d.length : e.length
-      ));
-      const p = await this.tokeniser.decode(e);
-      d += p;
-      let o;
-      h && (o = await h.array(), h.dispose(), o.length > e.length && (o = o.slice(0, e.length)));
-      let f;
-      if (i && (f = await i.array(), i.dispose(), f.length > e.length && (f = f.slice(0, e.length))), this.emit("tokens", e, p, o, f), u || l)
+  async processResponse(s, e, t) {
+    const n = (await s.array())[0][0];
+    if (n === this.tokeniser.eosToken)
+      return null;
+    const a = await this.tokeniser.decode([n]);
+    let i;
+    e && (i = await e.array(), e.dispose());
+    let o;
+    return t && (o = await t.array(), t.dispose()), this.emit("tokens", [n], a, i, o), a;
+  }
+  async generateCache(s, e) {
+    let t = await this.tokenisePrompt(s), n = s || "";
+    const a = new Array(this.model.config.nLayer).fill(void 0), i = e?.maxLength ?? 1e3;
+    for (let o = 0; o < i; o++) {
+      const {
+        output: c,
+        attention: h,
+        probabilities: l
+      } = this.model.generate(t, a, {
+        ...e,
+        usePadding: !1
+      });
+      t.dispose(), t = c;
+      const r = await this.processResponse(c, h, l);
+      if (r === null)
         break;
+      n += r;
     }
-    return c.dispose(), this.emit("stop"), d;
+    return t.dispose(), n;
+  }
+  async generate(s, e) {
+    this.emit("start");
+    const t = this.model.config.useRope && !e?.noCache ? this.generateCache(s, e) : this.generateNoCache(s, e);
+    return this.emit("stop"), t;
   }
 }
 export {
-  x as default
+  p as default
 };

package/dist/NanoGPTModel.d.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import { default as TF } from '@tensorflow/tfjs';
 import { GPTConfig } from './config';
+import { KVCache } from './layers/CausalSelfAttention';
 export interface TrainingLogEntry {
     loss: number;
     valLoss?: number;
@@ -18,10 +19,11 @@ export interface GenerateOptions {
 export default class NanoGPT {
     readonly config: GPTConfig;
     private wte;
-    private wpe;
+    private wpe?;
     private drop;
     private blocks;
     private lnF;
+    private ropeCache?;
     readonly tf: typeof TF;
     log: TrainingLogEntry[];
     constructor(tf: typeof TF, config?: Partial<GPTConfig>);
@@ -35,12 +37,12 @@ export default class NanoGPT {
     private validateInput;
     private calculateLoss;
     private computeAttentionRollout;
-    forward(idx: TF.Tensor, targets?: TF.Tensor, training?: boolean, includeAttention?: boolean): {
+    forward(idx: TF.Tensor, targets?: TF.Tensor, training?: boolean, includeAttention?: boolean, cache?: (KVCache | undefined)[]): {
         logits: TF.Tensor;
         loss?: TF.Tensor;
         attention?: TF.Tensor;
     };
-    generate(idx: TF.Tensor, options?: GenerateOptions): {
+    generate(idx: TF.Tensor, cache?: (KVCache | undefined)[], options?: GenerateOptions): {
         output: TF.Tensor;
         attention?: TF.Tensor;
         probabilities?: TF.Tensor;

package/dist/NanoGPTModel.js CHANGED Viewed

@@ -1,8 +1,9 @@
 import { defaultConfig as z } from "./config.js";
-import v from "./layers/TransformerBlock.js";
+import $ from "./layers/TransformerBlock.js";
 import S from "./layers/TiedEmbedding.js";
-import _ from "./layers/LayerNorm.js";
-class $ {
+import I from "./layers/RoPECache.js";
+import _ from "./layers/RMSNorm.js";
+class M {
   config;
   wte;
   // Token embeddings
@@ -13,6 +14,7 @@ class $ {
   blocks;
   lnF;
   // Final layer norm
+  ropeCache;
   tf;
   log = [];
   // Training log
@@ -21,19 +23,19 @@ class $ {
       vocabSize: this.config.vocabSize,
       embedDim: this.config.nEmbed,
       name: "token_embedding"
-    }), this.wpe = this.tf.layers.embedding({
+    }), this.config.useRope === !1 ? this.wpe = this.tf.layers.embedding({
       inputDim: this.config.blockSize,
       outputDim: this.config.nEmbed,
       name: "positional_embedding",
       embeddingsInitializer: this.tf.initializers.randomNormal({ mean: 0, stddev: 0.02 })
-    }), this.drop = this.tf.layers.dropout({ rate: this.config.dropout }), this.blocks = [];
-    for (let s = 0; s < this.config.nLayer; s++)
-      this.blocks.push(new v(this.tf, s, this.config));
-    this.lnF = new _(t, [this.config.nEmbed], 1e-5, "final_layer_norm");
+    }) : this.ropeCache = new I(t, this.config), this.drop = this.tf.layers.dropout({ rate: this.config.dropout }), this.blocks = [];
+    for (let o = 0; o < this.config.nLayer; o++)
+      this.blocks.push(new $(this.tf, o, this.config, this.ropeCache));
+    this.lnF = new _(t, [this.config.nEmbed], 1e-8, "final_rms_norm");
   }
   get variables() {
     return [
-      ...this.wpe.trainableWeights.map((t) => t.read()),
+      //...this.wpe.trainableWeights.map((v) => v.read() as TF.Variable),
       ...this.blocks.flatMap((t) => t.variables),
       ...this.lnF.trainableWeights.map((t) => t),
       ...this.wte.variables
@@ -41,21 +43,28 @@ class $ {
   }
   saveWeights() {
     const t = /* @__PURE__ */ new Map();
-    t.set("token_embedding", this.wte.getWeights()), t.set("positional_embedding", this.wpe.getWeights());
+    t.set("token_embedding", this.wte.getWeights()), this.wpe && t.set("positional_embedding", this.wpe.getWeights());
     for (let e = 0; e < this.blocks.length; e++)
       this.blocks[e].saveWeights(t);
-    return t.set("final_layer_norm", this.lnF.getWeights()), t;
+    return t.set("final_rms_norm", this.lnF.getWeights()), t;
   }
   loadWeights(t) {
-    this.wte.setWeights(t.get("token_embedding") || []), this.wpe.setWeights(t.get("positional_embedding") || []);
+    this.wte.setWeights(t.get("token_embedding") || []), this.wpe && this.wpe.setWeights(t.get("positional_embedding") || []);
     for (let e = 0; e < this.blocks.length; e++)
       this.blocks[e].loadWeights(t);
-    this.lnF.setWeights(t.get("final_layer_norm") || []);
+    this.lnF.setWeights(t.get("final_rms_norm") || []);
   }
-  inputPhase(t, e = !1) {
+  inputPhase(t, e, o = !1) {
     return this.tf.tidy(() => {
-      const [, s] = t.shape, i = this.wte.embed(t), n = this.tf.range(0, s, 1, "int32"), h = this.wpe.apply(n), o = i.add(h);
-      return this.drop.apply(o, { training: e });
+      const i = this.wte.embed(t);
+      if (this.config.useRope === !1) {
+        const [, s] = t.shape, r = this.config.blockSize, l = this.tf.range(0, s, 1, "int32"), n = this.tf.mod(
+          this.tf.add(l, this.tf.scalar(e, "int32")),
+          this.tf.scalar(r, "int32")
+        ), h = this.wpe.apply(n), c = i.add(h);
+        return this.drop.apply(c, { training: o });
+      } else
+        return this.drop.apply(i, { training: o });
     });
   }
   setSkipMask(t) {
@@ -73,7 +82,7 @@ class $ {
   set trainable(t) {
     for (const e of this.blocks)
       e.trainable = t;
-    this.wpe.trainable = t, this.lnF.trainable = t;
+    this.lnF.trainable = t;
   }
   validateInput(t) {
     if (t.shape.length !== 2)
@@ -86,8 +95,8 @@ class $ {
   calculateLoss(t, e) {
     try {
       return this.tf.losses.softmaxCrossEntropy(e, t, this.tf.Reduction.MEAN);
-    } catch (s) {
-      throw console.error("Error computing loss:", s), new Error(`Loss computation failed: ${s}`);
+    } catch (o) {
+      throw console.error("Error computing loss:", o), new Error(`Loss computation failed: ${o}`);
     }
   }
   // Attention rollout per Abnar & Zuidema (2020)
@@ -96,60 +105,88 @@ class $ {
     return this.tf.tidy(() => {
       if (t.length === 0)
         throw new Error("No attentions for rollout");
-      const e = t[0].shape[0], s = t[0].shape[1], i = this.tf.eye(s, s).expandDims(0);
-      let n = i.tile([e, 1, 1]);
-      for (const h of t) {
-        let o = h.add(i);
-        o = o.div(o.sum(-1, !0)), n = o.matMul(n);
+      const [e, o, i] = t[0].shape;
+      for (const s of t) {
+        const [r, l, n] = s.shape;
+        if (r !== e || l !== o || n !== i)
+          throw new Error(
+            `Inconsistent attention shapes in rollout: expected [${e},${o},${i}] got [${r},${l},${n}]`
+          );
+      }
+      if (o === i) {
+        const s = this.tf.eye(i, i).expandDims(0);
+        let r = s.tile([e, 1, 1]);
+        for (const l of t) {
+          const n = l.add(s);
+          r = n.div(n.sum(-1, !0)).matMul(r);
+        }
+        return r;
+      }
+      if (o === 1) {
+        let s = null;
+        const r = this.tf.tensor1d([i - 1], "int32"), l = this.tf.oneHot(r, i).reshape([1, 1, i]).tile([e, 1, 1]);
+        r.dispose();
+        for (const n of t) {
+          let h = n.add(l);
+          h = h.div(h.sum(-1, !0)), s == null ? s = h : (s = s.mul(h), s = s.div(s.sum(-1, !0)));
+        }
+        return s;
       }
-      return n;
+      throw new Error(`Unsupported attention shapes for rollout: [B=${e}, Q=${o}, K=${i}]`);
     });
   }
-  forward(t, e, s = !1, i = !1) {
+  forward(t, e, o = !1, i = !1, s) {
     return this.validateInput(t), this.tf.tidy(() => {
-      let n = this.inputPhase(t, s);
-      const h = [];
-      for (const c of this.blocks) {
-        const { output: d, attention: l } = c.call(n, s, i);
-        n = d, i && l && h.push(l);
+      const r = s?.[0]?.length ?? 0;
+      let l = this.inputPhase(t, r, o);
+      const n = [];
+      if (s && s.length !== this.blocks.length)
+        throw console.error("Cache", s), new Error(`Cache length ${s.length} does not match number of blocks ${this.blocks.length}`);
+      for (let a = 0; a < this.blocks.length; a++) {
+        const d = this.blocks[a], {
+          output: g,
+          attention: m,
+          cache: p
+        } = d.call(l, o, i, s ? s[a] : void 0);
+        l = g, i && m && n.push(m), s && p ? (s[a]?.k.dispose(), s[a]?.v.dispose(), s[a] = p) : p && (p.k.dispose(), p.v.dispose());
       }
-      let o;
-      i && h.length > 0 && (o = this.computeAttentionRollout(h)), n = this.lnF.apply(n);
-      const a = this.wte.project(n);
-      let r;
-      return e && (r = this.calculateLoss(a, e)), { logits: a, loss: r, attention: i ? o : void 0 };
+      let h;
+      i && n.length > 0 && (h = this.computeAttentionRollout(n)), l = this.lnF.apply(l);
+      const c = this.wte.project(l);
+      let f;
+      return e && (f = this.calculateLoss(c, e)), { logits: c, loss: f, attention: i ? h : void 0 };
     });
   }
-  generate(t, e) {
-    const s = e?.temperature ?? 1, i = e?.topK, n = e?.usePadding ?? !1, h = e?.includeAttention ?? !1;
+  generate(t, e, o) {
+    const i = o?.temperature ?? 1, s = o?.topK, r = o?.usePadding ?? !1, l = o?.includeAttention ?? !1;
     return this.tf.tidy(() => {
-      const o = t, a = o.shape[1], r = a <= this.config.blockSize ? o : o.slice(
-        [0, a - this.config.blockSize],
-        [o.shape[0], this.config.blockSize]
-      ), c = n ? this.config.blockSize - r.shape[1] : 0, d = c > 0 ? this.tf.pad(r, [
+      const n = t, h = n.shape[1], c = h <= this.config.blockSize ? n : n.slice(
+        [0, h - this.config.blockSize],
+        [n.shape[0], this.config.blockSize]
+      ), f = r ? this.config.blockSize - c.shape[1] : 0, a = f > 0 ? this.tf.pad(c, [
         [0, 0],
-        [0, c]
-      ]) : r, { logits: l, attention: p } = this.forward(d, void 0, !1, h), b = l.shape[1] - 1 - c, u = l.slice([0, b, 0], [l.shape[0], 1, l.shape[2]]), k = p ? p.slice([0, b, 0], [p.shape[0], 1, p.shape[2]]) : void 0, g = u.div(s);
-      let f;
-      if (i) {
-        const { values: w, indices: E } = this.tf.topk(g, i), y = this.tf.multinomial(w.squeeze([1]), 1);
-        f = this.tf.gather(E.squeeze([1]), y, 1);
+        [0, f]
+      ]) : c, { logits: d, attention: g } = this.forward(a, void 0, !1, l, e), m = d.shape[1] - 1 - f, p = d.slice([0, m, 0], [d.shape[0], 1, d.shape[2]]), w = g ? g.slice([0, m, 0], [g.shape[0], 1, g.shape[2]]) : void 0, u = p.div(i);
+      let b;
+      if (s) {
+        const { values: E, indices: v } = this.tf.topk(u, s), y = this.tf.multinomial(E.squeeze([1]), 1);
+        b = this.tf.gather(v.squeeze([1]), y, 1);
       } else
-        f = this.tf.multinomial(g.squeeze([1]), 1);
-      let m;
-      return e?.includeProbabilities && (m = this.tf.softmax(g.squeeze([1]))), f = f.reshape([1, 1]), { output: f, attention: k?.squeeze([1]), probabilities: m };
+        b = this.tf.multinomial(u.squeeze([1]), 1);
+      let k;
+      return o?.includeProbabilities && (k = this.tf.softmax(u.squeeze([1]))), b = b.reshape([1, 1]), { output: b, attention: w?.squeeze([1]), probabilities: k };
     });
   }
   getNumParams() {
     const t = this.config.vocabSize * this.config.nEmbed + this.config.blockSize * this.config.nEmbed, e = this.config.nLayer * (4 * this.config.nEmbed * this.config.nEmbed + // qkv + proj
-    2 * this.config.nEmbed), s = this.config.nLayer * (4 * this.config.nEmbed * this.config.nEmbed + // fc
+    2 * this.config.nEmbed), o = this.config.nLayer * (4 * this.config.nEmbed * this.config.nEmbed + // fc
     this.config.nEmbed * 4 * this.config.nEmbed), i = this.config.nEmbed + this.config.vocabSize * this.config.nEmbed;
-    return t + e + s + i;
+    return t + e + o + i;
   }
   dispose() {
-    this.wte.dispose(), this.wpe.dispose(), this.drop.dispose(), this.blocks.forEach((t) => t.dispose()), this.lnF.dispose();
+    this.wte.dispose(), this.wpe && this.wpe.dispose(), this.drop.dispose(), this.blocks.forEach((t) => t.dispose()), this.lnF.dispose();
   }
 }
 export {
-  $ as default
+  M as default
 };

package/dist/TeachableLLM.js CHANGED Viewed

@@ -1,5 +1,5 @@
-import d from "./NanoGPTModel.js";
-import { defaultConfig as u } from "./config.js";
+import { defaultConfig as d } from "./config.js";
+import u from "./NanoGPTModel.js";
 import { saveModel as m } from "./utilities/save.js";
 import { loadModel as l } from "./utilities/load.js";
 import f from "./Generator.js";
@@ -47,25 +47,25 @@ class a extends c {
   }
   static loadModel(t, r) {
     const e = new a(t);
-    return l(t, r).then(({ model: i, tokeniser: o }) => {
-      e._model = i, e._tokeniser = o, e._config = i.config, e.setStatus("warmup"), h(i).then(() => {
+    return l(t, r).then(({ model: s, tokeniser: o }) => {
+      e._model = s, e._tokeniser = o, e._config = s.config, e.setStatus("warmup"), h(s).then(() => {
         e.setStatus("ready");
-      }).catch((s) => {
-        e.setStatus("error"), e.emit("error", s);
+      }).catch((i) => {
+        e.setStatus("error"), e.emit("error", i);
       });
-    }).catch((i) => {
-      e.setStatus("error"), e.emit("error", i);
+    }).catch((s) => {
+      e.setStatus("error"), e.emit("error", s);
     }), e;
   }
   static create(t, r = {}) {
-    const e = { ...u, ...r }, i = new g(e.vocabSize), o = new d(t, e), s = new a(t, i, o);
-    return s.setStatus("warmup"), h(o).then(() => {
-      s.setStatus("awaitingTokens"), s.tokeniser.once("trainStatus", (n) => {
-        n === "trained" && s.setStatus("ready");
-      });
+    const e = { ...d, ...r }, s = new g(e.vocabSize), o = new u(t, e), i = new a(t, s, o);
+    return i.setStatus("warmup"), h(o).then(() => {
+      i.tokeniser.trained ? i.setStatus("ready") : (i.setStatus("awaitingTokens"), i.tokeniser.once("trainStatus", (n) => {
+        n === "trained" && i.setStatus("ready");
+      }));
     }).catch((n) => {
-      s.setStatus("error"), s.emit("error", n);
-    }), s;
+      i.setStatus("error"), i.emit("error", n);
+    }), i;
   }
   getNumParams() {
     if (!this._model)
@@ -78,8 +78,8 @@ class a extends c {
     const t = new _(this._model, this._tokeniser);
     return t.on("start", () => this.setStatus("training")), t.on("stop", () => this.setStatus("ready")), t.on("log", async (r) => {
       const e = this.listeners("trainStep");
-      for (const i of e)
-        await i(r);
+      for (const s of e)
+        await s(r);
     }), t;
   }
   train(t, r) {

package/dist/config.d.ts CHANGED Viewed

@@ -8,5 +8,6 @@ export interface GPTConfig {
     biasInLinear: boolean;
     biasInLayerNorm: boolean;
     mlpFactor: number;
+    useRope: boolean;
 }
 export declare const defaultConfig: GPTConfig;

package/dist/config.js CHANGED Viewed

@@ -1,4 +1,4 @@
-const a = {
+const e = {
   vocabSize: 50304,
   // GPT-2 vocab size
   blockSize: 1024,
@@ -13,8 +13,10 @@ const a = {
   // Dropout probability
   biasInLinear: !1,
   biasInLayerNorm: !1,
-  mlpFactor: 4
+  mlpFactor: 4,
+  useRope: !1
+  // Use Rotary Position Embeddings
 };
 export {
-  a as defaultConfig
+  e as defaultConfig
 };

package/dist/layers/CausalSelfAttention.d.ts CHANGED Viewed

@@ -1,6 +1,14 @@
 import { default as TF } from '@tensorflow/tfjs';
 import { GPTConfig } from '../config';
+import { default as RoPECache } from './RoPECache';
+export type KVCache = {
+    k: TF.Tensor;
+    v: TF.Tensor;
+    length: number;
+    cumulativeLength: number;
+};
 export default class CausalSelfAttention {
+    private readonly ropeCache?;
     private config;
     private cAttn;
     private cProj;
@@ -12,18 +20,20 @@ export default class CausalSelfAttention {
     private divisor;
     private index;
     private _trainable;
-    constructor(tf: typeof TF, index: number, config: GPTConfig);
+    constructor(tf: typeof TF, index: number, config: GPTConfig, ropeCache?: RoPECache | undefined);
     get variables(): TF.Variable[];
     get trainable(): boolean;
     set trainable(value: boolean);
     saveWeights(map: Map<string, TF.Tensor[]>): void;
     loadWeights(weights: Map<string, TF.Tensor[]>): void;
     private getAttentionScores;
+    private getAttentionScoresWithPast;
     private getQKV;
     private getOutputProjection;
-    call(x: TF.Tensor, training?: boolean, includeAttention?: boolean): {
+    call(x: TF.Tensor, training?: boolean, includeAttention?: boolean, pastKV?: KVCache): {
         output: TF.Tensor;
         attention?: TF.Tensor;
+        presentKV?: KVCache;
     };
     dispose(): void;
 }

package/dist/layers/CausalSelfAttention.js CHANGED Viewed

@@ -1,20 +1,9 @@
-class m {
-  config;
-  cAttn;
-  cProj;
-  attnDropout;
-  residDropout;
-  bias;
-  maskInf;
-  tf;
-  divisor;
-  index;
-  _trainable = !0;
-  constructor(t, e, s) {
-    this.config = s, this.tf = t, this.index = e, this.cAttn = this.tf.layers.dense({
+class S {
+  constructor(t, i, s, e) {
+    this.ropeCache = e, this.config = s, this.tf = t, this.index = i, this.cAttn = this.tf.layers.dense({
       units: 3 * s.nEmbed,
       useBias: s.biasInLinear,
-      name: `block_${e}_attn_cAttn`,
+      name: `block_${i}_attn_cAttn`,
       kernelInitializer: this.tf.initializers.randomNormal({
         mean: 0,
         stddev: 0.02
@@ -23,14 +12,27 @@ class m {
     }), this.cProj = this.tf.layers.dense({
       units: s.nEmbed,
       useBias: s.biasInLinear,
-      name: `block_${e}_attn_cProj`,
+      name: `block_${i}_attn_cProj`,
       kernelInitializer: this.tf.initializers.randomNormal({
         mean: 0,
         stddev: 0.02 / Math.sqrt(2 * s.nLayer)
       }),
       biasInitializer: "zeros"
-    }), this.attnDropout = this.tf.layers.dropout({ rate: s.dropout }), this.residDropout = this.tf.layers.dropout({ rate: s.dropout }), this.bias = this.tf.linalg.bandPart(this.tf.ones([s.blockSize, s.blockSize]), -1, 0).cast("bool"), this.divisor = this.tf.scalar(1 / Math.sqrt(s.nEmbed / s.nHead)), this.maskInf = this.tf.zeros([s.blockSize, s.blockSize]).where(this.bias, -1 / 0);
+    }), this.attnDropout = this.tf.layers.dropout({ rate: s.dropout }), this.residDropout = this.tf.layers.dropout({ rate: s.dropout }), this.bias = this.tf.linalg.bandPart(this.tf.ones([s.blockSize, s.blockSize]), -1, 0).cast("bool"), this.divisor = this.tf.scalar(1 / Math.sqrt(s.nEmbed / s.nHead));
+    const a = this.tf.zeros([s.blockSize, s.blockSize]), h = this.tf.fill([s.blockSize, s.blockSize], Number.NEGATIVE_INFINITY);
+    this.maskInf = this.tf.where(this.bias, a, h);
   }
+  config;
+  cAttn;
+  cProj;
+  attnDropout;
+  residDropout;
+  bias;
+  maskInf;
+  tf;
+  divisor;
+  index;
+  _trainable = !0;
   get variables() {
     return [
       ...this.cAttn.trainableWeights.map((t) => t.read()),
@@ -49,34 +51,65 @@ class m {
   loadWeights(t) {
     this.cAttn.setWeights(t.get(`block_${this.index}_cAttn`) || []), this.cProj.setWeights(t.get(`block_${this.index}_cProj`) || []);
   }
-  getAttentionScores(t, e, s) {
-    const a = t.shape[2], o = this.tf.matMul(t, e, !1, !0).mul(this.divisor), i = this.maskInf.slice([0, 0], [a, a]), n = o.add(i), h = this.tf.softmax(n, -1);
-    return this.attnDropout.apply(h, { training: s });
+  getAttentionScores(t, i, s) {
+    const e = t.shape[2], h = this.tf.matMul(t, i, !1, !0).mul(this.divisor), n = this.maskInf.slice([0, 0], [e, e]).expandDims(0).expandDims(0), r = h.add(n), o = this.tf.softmax(r, -1);
+    return this.attnDropout.apply(o, { training: s });
+  }
+  // Attention with optional past. If pastLen > 0 and T_cur == 1, no mask needed.
+  getAttentionScoresWithPast(t, i, s, e) {
+    const a = t.shape[2];
+    let n = this.tf.matMul(t, i, !1, !0).mul(this.divisor);
+    if (a > 1 && e > 0)
+      throw new Error("Cannot use past with T_cur > 1");
+    if (a > 1) {
+      const o = this.maskInf.slice([0, 0], [a, a]).expandDims(0).expandDims(0);
+      n = n.add(o);
+    }
+    const r = this.tf.softmax(n, -1);
+    return this.attnDropout.apply(r, { training: s });
   }
   getQKV(t) {
-    const [e, s, a] = t.shape, r = this.cAttn.apply(t), [o, i, n] = this.tf.split(r, 3, -1);
-    r.dispose();
-    const h = a / this.config.nHead, c = this.tf.reshape(o, [e, s, this.config.nHead, h]);
-    o.dispose();
-    const l = c.transpose([0, 2, 1, 3]);
-    c.dispose();
-    const d = this.tf.reshape(i, [e, s, this.config.nHead, h]);
-    i.dispose();
-    const u = d.transpose([0, 2, 1, 3]);
-    d.dispose();
-    const p = this.tf.reshape(n, [e, s, this.config.nHead, h]);
+    const [i, s, e] = t.shape, a = this.cAttn.apply(t), [h, n, r] = this.tf.split(a, 3, -1);
+    a.dispose();
+    const o = e / this.config.nHead, u = this.tf.reshape(h, [i, s, this.config.nHead, o]);
+    h.dispose();
+    const f = u.transpose([0, 2, 1, 3]);
+    u.dispose();
+    const d = this.tf.reshape(n, [i, s, this.config.nHead, o]);
     n.dispose();
-    const b = p.transpose([0, 2, 1, 3]);
-    return p.dispose(), [l, u, b];
+    const c = d.transpose([0, 2, 1, 3]);
+    d.dispose();
+    const l = this.tf.reshape(r, [i, s, this.config.nHead, o]);
+    r.dispose();
+    const p = l.transpose([0, 2, 1, 3]);
+    return l.dispose(), [f, c, p];
   }
-  getOutputProjection(t, e) {
-    const s = t.shape[0], a = t.shape[2], r = this.config.nEmbed, o = t.transpose([0, 2, 1, 3]), i = this.tf.reshape(o, [s, a, r]), n = this.cProj.apply(i);
-    return this.residDropout.apply(n, { training: e });
+  getOutputProjection(t, i) {
+    const s = t.shape[0], e = t.shape[2], a = this.config.nEmbed, h = t.transpose([0, 2, 1, 3]), n = this.tf.reshape(h, [s, e, a]), r = this.cProj.apply(n);
+    return this.residDropout.apply(r, { training: i });
   }
-  call(t, e = !1, s = !1) {
+  // Added optional KV cache support (pastKV). Returns presentKV for chaining.
+  call(t, i = !1, s = !1, e) {
+    if (e && !this.config.useRope)
+      throw new Error("Cannot use pastKV without RoPE enabled");
     return this.tf.tidy(() => {
-      const [a, r, o] = this.getQKV(t), i = this.getAttentionScores(a, r, e), n = this.tf.matMul(i, o);
-      return { output: this.getOutputProjection(n, e), attention: s ? i.mean(1) : void 0 };
+      const [a, h, n] = this.getQKV(t), r = a.shape[2], o = this.config.blockSize, u = e ? e.cumulativeLength : 0, [f, d] = this.ropeCache ? this.ropeCache.applyRoPE(a, h, u) : [a, h];
+      let c = d, l = n, p = 0;
+      e && (p = e.length, c = this.tf.concat([e.k, d], 2), l = this.tf.concat([e.v, n], 2));
+      const b = c.shape[2];
+      if (b > o) {
+        const k = b - o, g = c.shape[0], v = c.shape[1], I = c.shape[3];
+        c = c.slice([0, 0, k, 0], [g, v, o, I]), l = l.slice([0, 0, k, 0], [g, v, o, I]), p = o - r;
+      }
+      let m;
+      p > 0 ? m = this.getAttentionScoresWithPast(f, c, i, p) : m = this.getAttentionScores(f, c, i);
+      const _ = this.tf.matMul(m, l), A = this.getOutputProjection(_, i), P = {
+        k: this.tf.keep(c),
+        v: this.tf.keep(l),
+        length: p + r,
+        cumulativeLength: e ? e.cumulativeLength + r : r
+      };
+      return { output: A, attention: s ? m.mean(1) : void 0, presentKV: P };
     });
   }
   dispose() {
@@ -84,5 +117,5 @@ class m {
   }
 }
 export {
-  m as default
+  S as default
 };

package/dist/layers/RMSNorm.d.ts ADDED Viewed

@@ -0,0 +1,13 @@
+import { default as TF } from '@tensorflow/tfjs';
+export default class RMSNorm {
+    private gamma;
+    private epsilon;
+    private tf;
+    constructor(tf: typeof TF, shape: number[], epsilon?: number, name?: string);
+    get trainableWeights(): TF.Variable[];
+    set trainable(value: boolean);
+    getWeights(): TF.Tensor[];
+    setWeights(weights: TF.Tensor[]): void;
+    apply(x: TF.Tensor): TF.Tensor;
+    dispose(): void;
+}

package/dist/layers/RMSNorm.js ADDED Viewed

@@ -0,0 +1,32 @@
+class m {
+  gamma;
+  epsilon;
+  tf;
+  constructor(a, s, t = 1e-8, e = "") {
+    this.tf = a, this.epsilon = t, this.gamma = a.variable(a.ones(s), !0, `${e}_gamma`, "float32");
+  }
+  get trainableWeights() {
+    return [this.gamma];
+  }
+  set trainable(a) {
+    this.gamma.trainable = a;
+  }
+  getWeights() {
+    return [this.gamma];
+  }
+  setWeights(a) {
+    this.gamma.assign(a[0]);
+  }
+  apply(a) {
+    return this.tf.tidy(() => {
+      const t = a.square().mean(-1, !0).add(this.epsilon).rsqrt();
+      return a.mul(t).mul(this.gamma);
+    });
+  }
+  dispose() {
+    this.gamma.dispose();
+  }
+}
+export {
+  m as default
+};

package/dist/layers/RoPECache.d.ts ADDED Viewed

@@ -0,0 +1,16 @@
+import { default as TF } from '@tensorflow/tfjs';
+import { GPTConfig } from '../config';
+export default class RoPECache {
+    private readonly tf;
+    private readonly config;
+    private rotaryDim;
+    private ropeBase;
+    private ropeInvFreq;
+    private ropeCos;
+    private ropeSin;
+    private ropeCacheLen;
+    constructor(tf: typeof TF, config: GPTConfig);
+    private ensureRopeCache;
+    applyRoPE(q: TF.Tensor, k: TF.Tensor, pastLen: number): [TF.Tensor, TF.Tensor];
+    dispose(): void;
+}

package/dist/layers/RoPECache.js ADDED Viewed

@@ -0,0 +1,44 @@
+class b {
+  constructor(s, r) {
+    this.tf = s, this.config = r;
+    const o = this.config.nEmbed / this.config.nHead;
+    if (this.rotaryDim = o, this.rotaryDim % 2 !== 0)
+      throw new Error("rotaryDim must be even");
+    this.ropeBase = 1e4;
+    const i = this.tf.range(0, this.rotaryDim, 2, "float32"), t = i.div(this.tf.scalar(this.rotaryDim, "float32")), e = this.tf.pow(this.tf.scalar(this.ropeBase, "float32"), t);
+    this.ropeInvFreq = this.tf.reciprocal(e), t.dispose(), e.dispose(), i.dispose(), this.config.useRope === !1 ? (this.ropeCos = null, this.ropeSin = null, this.ropeCacheLen = 0) : this.tf.tidy(() => {
+      this.ensureRopeCache(this.config.blockSize * 4);
+    });
+  }
+  rotaryDim;
+  ropeBase;
+  ropeInvFreq;
+  ropeCos = null;
+  // [cacheLen, rotaryDim/2]
+  ropeSin = null;
+  // [cacheLen, rotaryDim/2]
+  ropeCacheLen = 0;
+  ensureRopeCache(s) {
+    if (s <= this.ropeCacheLen) return;
+    this.ropeCos && this.ropeCos.dispose(), this.ropeSin && this.ropeSin.dispose();
+    const o = this.tf.range(0, s, 1, "float32").expandDims(1).mul(this.ropeInvFreq.expandDims(0));
+    this.ropeCos = this.tf.keep(this.tf.cos(o).expandDims(-1)), this.ropeSin = this.tf.keep(this.tf.sin(o).expandDims(-1)), this.ropeCacheLen = s;
+  }
+  applyRoPE(s, r, o) {
+    const i = s.shape[3], t = this.rotaryDim;
+    if (t > i) return [s, r];
+    const e = s.shape[2], v = o + e;
+    this.ensureRopeCache(v);
+    const n = t / 2, p = this.ropeCos.slice([o, 0, 0], [e, n, 1]).reshape([1, 1, e, n]), a = this.ropeSin.slice([o, 0, 0], [e, n, 1]).reshape([1, 1, e, n]), h = s.shape[0], c = s.shape[1], f = this.tf.range(0, t, 2, "int32"), l = this.tf.range(1, t, 2, "int32"), d = (u) => {
+      const m = u.slice([0, 0, 0, 0], [h, c, e, t]), C = t < i ? u.slice([0, 0, 0, t], [h, c, e, i - t]) : null, D = this.tf.gather(m, f, 3), g = this.tf.gather(m, l, 3), x = D.mul(p).sub(g.mul(a)), k = g.mul(p).add(D.mul(a)), R = this.tf.stack([x, k], -1).reshape([h, c, e, t]);
+      return C ? this.tf.concat([R, C], 3) : R;
+    }, y = d(s), S = d(r);
+    return f.dispose(), l.dispose(), [y, S];
+  }
+  dispose() {
+    this.ropeCos && this.ropeCos.dispose(), this.ropeSin && this.ropeSin.dispose(), this.ropeInvFreq.dispose();
+  }
+}
+export {
+  b as default
+};

package/dist/layers/TransformerBlock.d.ts CHANGED Viewed

@@ -1,5 +1,7 @@
 import { default as TF } from '@tensorflow/tfjs';
 import { GPTConfig } from '../config';
+import { KVCache } from './CausalSelfAttention';
+import { default as RoPECache } from './RoPECache';
 export default class Block {
     private ln1;
     private attn;
@@ -9,16 +11,17 @@ export default class Block {
     private index;
     private _trainable;
     skipped: boolean;
-    constructor(tf: typeof TF, index: number, config: GPTConfig);
+    constructor(tf: typeof TF, index: number, config: GPTConfig, ropeCache?: RoPECache);
     get variables(): TF.Variable[];
     get trainable(): boolean;
     set trainable(value: boolean);
     saveWeights(map: Map<string, TF.Tensor[]>): void;
     loadWeights(weights: Map<string, TF.Tensor[]>): void;
     private getMLPOutput;
-    call(x: TF.Tensor, training?: boolean, includeAttention?: boolean): {
+    call(x: TF.Tensor, training?: boolean, includeAttention?: boolean, cache?: KVCache): {
         output: TF.Tensor;
         attention?: TF.Tensor;
+        cache?: KVCache;
     };
     dispose(): void;
 }

package/dist/layers/TransformerBlock.js CHANGED Viewed

@@ -1,6 +1,6 @@
-import h from "./CausalSelfAttention.js";
-import r from "./MLP.js";
-import l from "./LayerNorm.js";
+import r from "./CausalSelfAttention.js";
+import o from "./MLP.js";
+import a from "./RMSNorm.js";
 class u {
   ln1;
   attn;
@@ -10,8 +10,8 @@ class u {
   index;
   _trainable = !0;
   skipped = !1;
-  constructor(t, i, s) {
-    this.tf = t, this.index = i, this.ln1 = new l(t, [s.nEmbed], 1e-5, `block_${this.index}_ln1`), this.attn = new h(this.tf, this.index, s), this.ln2 = new l(t, [s.nEmbed], 1e-5, `block_${this.index}_ln2`), this.mlp = new r(this.tf, this.index, s);
+  constructor(t, i, s, e) {
+    this.tf = t, this.index = i, this.ln1 = new a(t, [s.nEmbed], 1e-8, `block_${this.index}_rms1`), this.attn = new r(this.tf, this.index, s, e), this.ln2 = new a(t, [s.nEmbed], 1e-8, `block_${this.index}_rms2`), this.mlp = new o(this.tf, this.index, s);
   }
   get variables() {
     return [
@@ -28,21 +28,25 @@ class u {
     this._trainable = t, this.ln1.trainable = t, this.ln2.trainable = t, this.attn.trainable = t, this.mlp.trainable = t;
   }
   saveWeights(t) {
-    this.attn.saveWeights(t), this.mlp.saveWeights(t), t.set(`block_${this.index}_ln1`, this.ln1.getWeights()), t.set(`block_${this.index}_ln2`, this.ln2.getWeights());
+    this.attn.saveWeights(t), this.mlp.saveWeights(t), t.set(`block_${this.index}_rms1`, this.ln1.getWeights()), t.set(`block_${this.index}_rms2`, this.ln2.getWeights());
   }
   loadWeights(t) {
-    this.attn.loadWeights(t), this.mlp.loadWeights(t), this.ln1.setWeights(t.get(`block_${this.index}_ln1`) || []), this.ln2.setWeights(t.get(`block_${this.index}_ln2`) || []);
+    this.attn.loadWeights(t), this.mlp.loadWeights(t), this.ln1.setWeights(t.get(`block_${this.index}_rms1`) || []), this.ln2.setWeights(t.get(`block_${this.index}_rms2`) || []);
   }
   getMLPOutput(t, i) {
     const s = this.ln2.apply(t), e = this.mlp.call(s, i);
     return t.add(e);
   }
-  call(t, i = !1, s = !1) {
+  call(t, i = !1, s = !1, e) {
     return this.tf.tidy(() => {
       if (this.skipped)
         return { output: t };
-      const e = this.ln1.apply(t), n = this.attn.call(e, i, s), a = t.add(n.output);
-      return { output: this.getMLPOutput(a, i), attention: n.attention };
+      const l = this.ln1.apply(t), n = this.attn.call(l, i, s, e), h = t.add(n.output);
+      return {
+        output: this.getMLPOutput(h, i),
+        attention: n.attention,
+        cache: n.presentKV
+      };
     });
   }
   dispose() {

package/dist/main.d.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 export { default as NanoGPT } from './NanoGPTModel';
 export { default as TeachableLLM } from './TeachableLLM';
 export { default as CharTokeniser } from './tokeniser/CharTokeniser';
+export { default as waitForModel } from './utilities/waitForModel';
 export type { ITrainerOptions } from './Trainer';
 export type { IGenerateOptions } from './Generator';
 export type { TrainingLogEntry } from './NanoGPTModel';

package/dist/main.js CHANGED Viewed

@@ -1,8 +1,10 @@
 import { default as o } from "./NanoGPTModel.js";
-import { default as f } from "./TeachableLLM.js";
+import { default as t } from "./TeachableLLM.js";
 import { default as l } from "./tokeniser/CharTokeniser.js";
+import { default as s } from "./utilities/waitForModel.js";
 export {
   l as CharTokeniser,
   o as NanoGPT,
-  f as TeachableLLM
+  t as TeachableLLM,
+  s as waitForModel
 };

package/dist/utilities/generate.js CHANGED Viewed

@@ -1,20 +1,20 @@
-async function w(n, t, r, s, g) {
-  if (s <= 0)
+async function h(r, t, a, c, g) {
+  if (c <= 0)
     throw new Error("Length must be a positive integer");
-  if (r.length === 0)
+  if (a.length === 0)
     throw new Error("Prompt cannot be an empty string");
-  const i = await n.tokenise([r], !0), a = t.tf.tidy(() => {
-    let e = t.tf.tensor2d(i, [1, i[0].length], "int32");
-    for (let d = 0; d < s; d++) {
-      const { output: p } = t.generate(e, g), f = e;
-      e = t.tf.concat([e, p], 1), f.dispose(), p.dispose();
+  const p = await r.tokenise([a], !0), s = t.config.useRope ? new Array(t.config.nLayer).fill(void 0) : void 0, u = t.tf.tidy(() => {
+    let e = t.tf.tensor2d(p, [1, p[0].length], "int32"), n = e;
+    for (let f = 0; f < c; f++) {
+      const { output: o } = t.generate(e, s, g), w = e, y = n;
+      n = t.tf.concat([n, o], 1), e = s ? o : t.tf.concat([e, o], 1), w.dispose(), y.dispose(), s || o.dispose();
     }
-    return e;
-  }), u = await a.array();
-  a.dispose();
-  const o = u[0], c = o.indexOf(n.eosToken);
-  return c !== -1 && o.splice(c), await n.decode(o);
+    return n;
+  }), T = await u.array();
+  u.dispose();
+  const i = T[0], d = i.indexOf(r.eosToken);
+  return d !== -1 && i.splice(d), await r.decode(i);
 }
 export {
-  w as generateText
+  h as generateText
 };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "name": "@genai-fi/nanogpt",
-    "version": "0.1.9",
+    "version": "0.2.1",
     "type": "module",
     "main": "dist/main.js",
     "types": "dist/main.d.ts",