npm - @genai-fi/nanogpt - Versions diffs - 0.1.7 → 0.1.8 - Mend

@genai-fi/nanogpt 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/dist/NanoGPTModel.d.ts +1 -0
package/dist/NanoGPTModel.js +20 -17
package/dist/TeachableLLM.d.ts +4 -2
package/dist/TeachableLLM.js +20 -15
package/dist/layers/CausalSelfAttention.d.ts +1 -0
package/dist/layers/CausalSelfAttention.js +19 -16
package/dist/layers/LayerNorm.d.ts +1 -0
package/dist/layers/LayerNorm.js +7 -4
package/dist/layers/MLP.d.ts +1 -0
package/dist/layers/MLP.js +16 -13
package/dist/layers/TiedEmbedding.d.ts +1 -0
package/dist/layers/TiedEmbedding.js +18 -15
package/dist/layers/TransformerBlock.d.ts +1 -0
package/dist/layers/TransformerBlock.js +10 -7
package/dist/tokeniser/CharTokeniser.js +23 -23
package/dist/utilities/save.d.ts +7 -1
package/dist/utilities/save.js +28 -13
package/package.json +1 -1

package/dist/NanoGPTModel.d.ts CHANGED Viewed

@@ -46,4 +46,5 @@ export default class NanoGPT {
         probabilities?: TF.Tensor;
     };
     getNumParams(): number;
+    dispose(): void;
 }

package/dist/NanoGPTModel.js CHANGED Viewed

@@ -54,7 +54,7 @@ class $ {
   }
   inputPhase(t, e = !1) {
     return this.tf.tidy(() => {
-      const [, s] = t.shape, i = this.wte.embed(t), n = this.tf.range(0, s, 1, "int32"), a = this.wpe.apply(n), o = i.add(a);
+      const [, s] = t.shape, i = this.wte.embed(t), n = this.tf.range(0, s, 1, "int32"), h = this.wpe.apply(n), o = i.add(h);
       return this.drop.apply(o, { training: e });
     });
   }
@@ -98,8 +98,8 @@ class $ {
         throw new Error("No attentions for rollout");
       const e = t[0].shape[0], s = t[0].shape[1], i = this.tf.eye(s, s).expandDims(0);
       let n = i.tile([e, 1, 1]);
-      for (const a of t) {
-        let o = a.add(i);
+      for (const h of t) {
+        let o = h.add(i);
         o = o.div(o.sum(-1, !0)), n = o.matMul(n);
       }
       return n;
@@ -108,36 +108,36 @@ class $ {
   forward(t, e, s = !1, i = !1) {
     return this.validateInput(t), this.tf.tidy(() => {
       let n = this.inputPhase(t, s);
-      const a = [];
+      const h = [];
       for (const c of this.blocks) {
-        const { output: p, attention: l } = c.call(n, s, i);
-        n = p, i && l && a.push(l);
+        const { output: d, attention: l } = c.call(n, s, i);
+        n = d, i && l && h.push(l);
       }
       let o;
-      i && a.length > 0 && (o = this.computeAttentionRollout(a)), n = this.lnF.apply(n);
-      const h = this.wte.project(n);
+      i && h.length > 0 && (o = this.computeAttentionRollout(h)), n = this.lnF.apply(n);
+      const a = this.wte.project(n);
       let r;
-      return e && (r = this.calculateLoss(h, e)), { logits: h, loss: r, attention: i ? o : void 0 };
+      return e && (r = this.calculateLoss(a, e)), { logits: a, loss: r, attention: i ? o : void 0 };
     });
   }
   generate(t, e) {
-    const s = e?.temperature ?? 1, i = e?.topK, n = e?.usePadding ?? !1, a = e?.includeAttention ?? !1;
+    const s = e?.temperature ?? 1, i = e?.topK, n = e?.usePadding ?? !1, h = e?.includeAttention ?? !1;
     return this.tf.tidy(() => {
-      const o = t, h = o.shape[1], r = h <= this.config.blockSize ? o : o.slice(
-        [0, h - this.config.blockSize],
+      const o = t, a = o.shape[1], r = a <= this.config.blockSize ? o : o.slice(
+        [0, a - this.config.blockSize],
         [o.shape[0], this.config.blockSize]
-      ), c = n ? this.config.blockSize - r.shape[1] : 0, p = c > 0 ? this.tf.pad(r, [
+      ), c = n ? this.config.blockSize - r.shape[1] : 0, d = c > 0 ? this.tf.pad(r, [
         [0, 0],
         [0, c]
-      ]) : r, { logits: l, attention: g } = this.forward(p, void 0, !1, a), b = l.shape[1] - 1 - c, u = l.slice([0, b, 0], [l.shape[0], 1, l.shape[2]]), k = g ? g.slice([0, b, 0], [g.shape[0], 1, g.shape[2]]) : void 0, d = u.div(s);
+      ]) : r, { logits: l, attention: p } = this.forward(d, void 0, !1, h), b = l.shape[1] - 1 - c, u = l.slice([0, b, 0], [l.shape[0], 1, l.shape[2]]), k = p ? p.slice([0, b, 0], [p.shape[0], 1, p.shape[2]]) : void 0, g = u.div(s);
       let f;
       if (i) {
-        const { values: w, indices: E } = this.tf.topk(d, i), y = this.tf.multinomial(w.squeeze([1]), 1);
+        const { values: w, indices: E } = this.tf.topk(g, i), y = this.tf.multinomial(w.squeeze([1]), 1);
         f = this.tf.gather(E.squeeze([1]), y, 1);
       } else
-        f = this.tf.multinomial(d.squeeze([1]), 1);
+        f = this.tf.multinomial(g.squeeze([1]), 1);
       let m;
-      return e?.includeProbabilities && (m = this.tf.softmax(d.squeeze([1]))), f = f.reshape([1, 1]), { output: f, attention: k?.squeeze([1]), probabilities: m };
+      return e?.includeProbabilities && (m = this.tf.softmax(g.squeeze([1]))), f = f.reshape([1, 1]), { output: f, attention: k?.squeeze([1]), probabilities: m };
     });
   }
   getNumParams() {
@@ -146,6 +146,9 @@ class $ {
     this.config.nEmbed * 4 * this.config.nEmbed), i = this.config.nEmbed + this.config.vocabSize * this.config.nEmbed;
     return t + e + s + i;
   }
+  dispose() {
+    this.wte.dispose(), this.wpe.dispose(), this.drop.dispose(), this.blocks.forEach((t) => t.dispose()), this.lnF.dispose();
+  }
 }
 export {
   $ as default

package/dist/TeachableLLM.d.ts CHANGED Viewed

@@ -2,10 +2,11 @@ import { default as TF } from '@tensorflow/tfjs';
 import { GPTConfig } from './config';
 import { ITokeniser } from './tokeniser/type';
 import { default as NanoGPT } from './NanoGPTModel';
+import { SaveOptions } from './utilities/save';
 import { default as Generator, IGenerateOptions } from './Generator';
 import { default as Trainer, ITrainerOptions } from './Trainer';
 import { default as EE } from 'eventemitter3';
-type TeachableLLMStatus = 'warmup' | 'ready' | 'training' | 'loading' | 'busy' | 'error';
+type TeachableLLMStatus = 'warmup' | 'awaitingTokens' | 'ready' | 'training' | 'loading' | 'busy' | 'error';
 export default class TeachableLLM extends EE<'status' | 'error' | 'trainStep'> {
     private _config?;
     private _model?;
@@ -19,7 +20,7 @@ export default class TeachableLLM extends EE<'status' | 'error' | 'trainStep'> {
     get status(): TeachableLLMStatus;
     get ready(): boolean;
     private setStatus;
-    saveModel(): Promise<Blob>;
+    saveModel(options?: SaveOptions): Promise<Blob>;
     static loadModel(tf: typeof TF, data: Blob | Buffer | string): TeachableLLM;
     static create(tf: typeof TF, config?: Partial<GPTConfig>): TeachableLLM;
     getNumParams(): number;
@@ -27,5 +28,6 @@ export default class TeachableLLM extends EE<'status' | 'error' | 'trainStep'> {
     train(text: string[], options?: ITrainerOptions): Promise<void>;
     generator(): Generator;
     generateText(prompt?: string, options?: IGenerateOptions): Promise<string>;
+    dispose(): void;
 }
 export {};

package/dist/TeachableLLM.js CHANGED Viewed

@@ -1,13 +1,13 @@
 import d from "./NanoGPTModel.js";
-import { defaultConfig as m } from "./config.js";
-import { saveModel as u } from "./utilities/save.js";
+import { defaultConfig as u } from "./config.js";
+import { saveModel as m } from "./utilities/save.js";
 import { loadModel as l } from "./utilities/load.js";
 import f from "./Generator.js";
 import _ from "./Trainer.js";
 import { E as c } from "./index-SOhdqzHq.js";
-import { dummyPassAsync as a } from "./utilities/dummy.js";
+import { dummyPassAsync as h } from "./utilities/dummy.js";
 import g from "./tokeniser/CharTokeniser.js";
-class n extends c {
+class a extends c {
   _config;
   _model;
   tf;
@@ -35,20 +35,20 @@ class n extends c {
     return this._status;
   }
   get ready() {
-    return this._status === "ready" && !!this._model && !!this._tokeniser;
+    return this._status === "ready" && !!this._model && !!this._tokeniser && this.tokeniser.trained;
   }
   setStatus(t) {
     this._status !== t && (this._status = t, this.emit("status", t));
   }
-  saveModel() {
+  saveModel(t) {
     if (!this._model || !this._tokeniser)
       throw new Error("Model or tokeniser is not initialized.");
-    return u(this._model, this._tokeniser);
+    return m(this._model, this._tokeniser, t);
   }
   static loadModel(t, r) {
-    const e = new n(t);
+    const e = new a(t);
     return l(t, r).then(({ model: i, tokeniser: o }) => {
-      e._model = i, e._tokeniser = o, e._config = i.config, e.setStatus("warmup"), a(i).then(() => {
+      e._model = i, e._tokeniser = o, e._config = i.config, e.setStatus("warmup"), h(i).then(() => {
         e.setStatus("ready");
       }).catch((s) => {
         e.setStatus("error"), e.emit("error", s);
@@ -58,11 +58,13 @@ class n extends c {
     }), e;
   }
   static create(t, r = {}) {
-    const e = { ...m, ...r }, i = new g(e.vocabSize), o = new d(t, e), s = new n(t, i, o);
-    return s.setStatus("warmup"), a(o).then(() => {
-      s.setStatus("ready");
-    }).catch((h) => {
-      s.setStatus("error"), s.emit("error", h);
+    const e = { ...u, ...r }, i = new g(e.vocabSize), o = new d(t, e), s = new a(t, i, o);
+    return s.setStatus("warmup"), h(o).then(() => {
+      s.setStatus("awaitingTokens"), s.tokeniser.once("trainStatus", (n) => {
+        n === "trained" && s.setStatus("ready");
+      });
+    }).catch((n) => {
+      s.setStatus("error"), s.emit("error", n);
     }), s;
   }
   getNumParams() {
@@ -96,7 +98,10 @@ class n extends c {
   generateText(t, r) {
     return this.generator().generate(t, r);
   }
+  dispose() {
+    this._model?.dispose();
+  }
 }
 export {
-  n as default
+  a as default
 };

package/dist/layers/CausalSelfAttention.d.ts CHANGED Viewed

@@ -25,4 +25,5 @@ export default class CausalSelfAttention {
         output: TF.Tensor;
         attention?: TF.Tensor;
     };
+    dispose(): void;
 }

package/dist/layers/CausalSelfAttention.js CHANGED Viewed

@@ -50,35 +50,38 @@ class m {
     this.cAttn.setWeights(t.get(`block_${this.index}_cAttn`) || []), this.cProj.setWeights(t.get(`block_${this.index}_cProj`) || []);
   }
   getAttentionScores(t, e, s) {
-    const a = t.shape[2], n = this.tf.matMul(t, e, !1, !0).mul(this.divisor), i = this.maskInf.slice([0, 0], [a, a]), o = n.add(i), h = this.tf.softmax(o, -1);
+    const a = t.shape[2], o = this.tf.matMul(t, e, !1, !0).mul(this.divisor), i = this.maskInf.slice([0, 0], [a, a]), n = o.add(i), h = this.tf.softmax(n, -1);
     return this.attnDropout.apply(h, { training: s });
   }
   getQKV(t) {
-    const [e, s, a] = t.shape, r = this.cAttn.apply(t), [n, i, o] = this.tf.split(r, 3, -1);
+    const [e, s, a] = t.shape, r = this.cAttn.apply(t), [o, i, n] = this.tf.split(r, 3, -1);
     r.dispose();
-    const h = a / this.config.nHead, c = this.tf.reshape(n, [e, s, this.config.nHead, h]);
-    n.dispose();
-    const p = c.transpose([0, 2, 1, 3]);
+    const h = a / this.config.nHead, c = this.tf.reshape(o, [e, s, this.config.nHead, h]);
+    o.dispose();
+    const l = c.transpose([0, 2, 1, 3]);
     c.dispose();
-    const l = this.tf.reshape(i, [e, s, this.config.nHead, h]);
+    const d = this.tf.reshape(i, [e, s, this.config.nHead, h]);
     i.dispose();
-    const u = l.transpose([0, 2, 1, 3]);
-    l.dispose();
-    const d = this.tf.reshape(o, [e, s, this.config.nHead, h]);
-    o.dispose();
-    const b = d.transpose([0, 2, 1, 3]);
-    return d.dispose(), [p, u, b];
+    const u = d.transpose([0, 2, 1, 3]);
+    d.dispose();
+    const p = this.tf.reshape(n, [e, s, this.config.nHead, h]);
+    n.dispose();
+    const b = p.transpose([0, 2, 1, 3]);
+    return p.dispose(), [l, u, b];
   }
   getOutputProjection(t, e) {
-    const s = t.shape[0], a = t.shape[2], r = this.config.nEmbed, n = t.transpose([0, 2, 1, 3]), i = this.tf.reshape(n, [s, a, r]), o = this.cProj.apply(i);
-    return this.residDropout.apply(o, { training: e });
+    const s = t.shape[0], a = t.shape[2], r = this.config.nEmbed, o = t.transpose([0, 2, 1, 3]), i = this.tf.reshape(o, [s, a, r]), n = this.cProj.apply(i);
+    return this.residDropout.apply(n, { training: e });
   }
   call(t, e = !1, s = !1) {
     return this.tf.tidy(() => {
-      const [a, r, n] = this.getQKV(t), i = this.getAttentionScores(a, r, e), o = this.tf.matMul(i, n);
-      return { output: this.getOutputProjection(o, e), attention: s ? i.mean(1) : void 0 };
+      const [a, r, o] = this.getQKV(t), i = this.getAttentionScores(a, r, e), n = this.tf.matMul(i, o);
+      return { output: this.getOutputProjection(n, e), attention: s ? i.mean(1) : void 0 };
     });
   }
+  dispose() {
+    this.cAttn.dispose(), this.cProj.dispose(), this.attnDropout.dispose(), this.residDropout.dispose(), this.bias.dispose(), this.maskInf.dispose(), this.divisor.dispose();
+  }
 }
 export {
   m as default

package/dist/layers/LayerNorm.d.ts CHANGED Viewed

@@ -9,4 +9,5 @@ export default class LayerNorm {
     getWeights(): TF.Tensor[];
     setWeights(weights: TF.Tensor[]): void;
     apply(x: TF.Tensor): TF.Tensor;
+    dispose(): void;
 }

package/dist/layers/LayerNorm.js CHANGED Viewed

@@ -1,4 +1,4 @@
-class u {
+class h {
   gamma;
   //private beta: TF.Variable;
   epsilon;
@@ -20,11 +20,14 @@ class u {
   }
   apply(a) {
     return this.tf.tidy(() => {
-      const s = a.mean(-1, !0), t = a.sub(s), n = t.square().mean(-1, !0).add(this.epsilon).rsqrt();
-      return t.mul(n).mul(this.gamma);
+      const s = a.mean(-1, !0), t = a.sub(s), i = t.square().mean(-1, !0).add(this.epsilon).rsqrt();
+      return t.mul(i).mul(this.gamma);
     });
   }
+  dispose() {
+    this.gamma.dispose();
+  }
 }
 export {
-  u as default
+  h as default
 };

package/dist/layers/MLP.d.ts CHANGED Viewed

@@ -14,4 +14,5 @@ export default class MLP {
     saveWeights(map: Map<string, TF.Tensor[]>): void;
     loadWeights(weights: Map<string, TF.Tensor[]>): void;
     call(x: TF.Tensor, training?: boolean): TF.Tensor;
+    dispose(): void;
 }

package/dist/layers/MLP.js CHANGED Viewed

@@ -5,27 +5,27 @@ class l {
   tf;
   index;
   _trainable = !0;
-  constructor(t, i, e) {
-    this.tf = t, this.index = i, this.cFc = this.tf.layers.dense({
-      units: e.mlpFactor * e.nEmbed,
+  constructor(t, e, i) {
+    this.tf = t, this.index = e, this.cFc = this.tf.layers.dense({
+      units: i.mlpFactor * i.nEmbed,
       activation: "gelu",
-      useBias: e.biasInLinear,
+      useBias: i.biasInLinear,
       kernelInitializer: this.tf.initializers.randomNormal({
         mean: 0,
         stddev: 0.02
       }),
       biasInitializer: "zeros",
-      name: `block_${i}_mlp_cFc`
+      name: `block_${e}_mlp_cFc`
     }), this.cProj = this.tf.layers.dense({
-      units: e.nEmbed,
-      useBias: e.biasInLinear,
+      units: i.nEmbed,
+      useBias: i.biasInLinear,
       kernelInitializer: this.tf.initializers.randomNormal({
         mean: 0,
-        stddev: 0.02 / Math.sqrt(2 * e.nLayer)
+        stddev: 0.02 / Math.sqrt(2 * i.nLayer)
       }),
       biasInitializer: "zeros",
-      name: `block_${i}_mlp_cProj`
-    }), this.dropout = this.tf.layers.dropout({ rate: e.dropout });
+      name: `block_${e}_mlp_cProj`
+    }), this.dropout = this.tf.layers.dropout({ rate: i.dropout });
   }
   get variables() {
     return [
@@ -45,12 +45,15 @@ class l {
   loadWeights(t) {
     this.cFc.setWeights(t.get(`block_${this.index}_mlpHidden`) || []), this.cProj.setWeights(t.get(`block_${this.index}_mlpOut`) || []);
   }
-  call(t, i = !1) {
+  call(t, e = !1) {
     return this.tf.tidy(() => {
-      const e = this.cFc.apply(t), s = this.cProj.apply(e);
-      return this.dropout.apply(s, { training: i });
+      const i = this.cFc.apply(t), s = this.cProj.apply(i);
+      return this.dropout.apply(s, { training: e });
     });
   }
+  dispose() {
+    this.cFc.dispose(), this.cProj.dispose(), this.dropout.dispose();
+  }
 }
 export {
   l as default

package/dist/layers/TiedEmbedding.d.ts CHANGED Viewed

@@ -19,4 +19,5 @@ export default class TiedEmbeddingOutputLayer {
         vocabSize: number;
         embedDim: number;
     };
+    dispose(): void;
 }

package/dist/layers/TiedEmbedding.js CHANGED Viewed

@@ -168,11 +168,11 @@ const we = /* @__PURE__ */ p({ imag_: Ke });
  * limitations under the License.
  * =============================================================================
  */
-function ze(t, e = 0.2) {
+function We(t, e = 0.2) {
   const n = { x: a(t, "x", "leakyRelu") }, r = { alpha: e };
   return u.runKernel(ae, n, r);
 }
-const Ee = /* @__PURE__ */ p({ leakyRelu_: ze });
+const ze = /* @__PURE__ */ p({ leakyRelu_: We });
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -189,11 +189,11 @@ const Ee = /* @__PURE__ */ p({ leakyRelu_: ze });
  * limitations under the License.
  * =============================================================================
  */
-function We(t) {
+function Ee(t) {
   const s = { x: a(t, "x", "neg") };
   return u.runKernel(ue, s);
 }
-const Oe = /* @__PURE__ */ p({ neg_: We });
+const Oe = /* @__PURE__ */ p({ neg_: Ee });
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -368,7 +368,7 @@ function Ue(t, e, s, n) {
   if (e === "prelu")
     return Fe(t, s);
   if (e === "leakyrelu")
-    return Ee(t, n);
+    return ze(t, n);
   if (e === "sigmoid")
     return De(t);
   throw new Error(`Unknown fused activation ${e}.`);
@@ -397,18 +397,18 @@ function Je({ a: t, b: e, transposeA: s = !1, transposeB: n = !1, bias: r, activ
   }
   let o = a(t, "a", "fused matMul"), c = a(e, "b", "fused matMul");
   [o, c] = A(o, c);
-  const b = s ? o.shape[o.rank - 2] : o.shape[o.rank - 1], D = n ? c.shape[c.rank - 1] : c.shape[c.rank - 2], w = s ? o.shape[o.rank - 1] : o.shape[o.rank - 2], z = n ? c.shape[c.rank - 2] : c.shape[c.rank - 1], T = o.shape.slice(0, -2), S = c.shape.slice(0, -2), N = q(T), v = q(S);
+  const b = s ? o.shape[o.rank - 2] : o.shape[o.rank - 1], D = n ? c.shape[c.rank - 1] : c.shape[c.rank - 2], w = s ? o.shape[o.rank - 1] : o.shape[o.rank - 2], W = n ? c.shape[c.rank - 2] : c.shape[c.rank - 1], T = o.shape.slice(0, -2), S = c.shape.slice(0, -2), N = q(T), v = q(S);
   B(b === D, () => `Error in fused matMul: inner shapes (${b}) and (${D}) of Tensors with shapes ${o.shape} and ${c.shape} and transposeA=${s} and transposeB=${n} must match.`);
-  const O = P(o.shape.slice(0, -2), c.shape.slice(0, -2)).concat([w, z]), R = s ? f(o, [N, b, w]) : f(o, [N, w, b]), F = n ? f(c, [v, z, D]) : f(c, [v, D, z]);
+  const O = P(o.shape.slice(0, -2), c.shape.slice(0, -2)).concat([w, W]), R = s ? f(o, [N, b, w]) : f(o, [N, w, b]), F = n ? f(c, [v, W, D]) : f(c, [v, D, W]);
   let y;
   r != null && (y = a(r, "bias", "fused matMul"), [y] = A(y, o), P(O, y.shape));
   let C;
   l != null && (C = a(l, "prelu weights", "fused matMul"));
   const G = (x, K) => {
-    const [g, $, k, E] = K, m = qe(f(x, k.shape), k, i);
+    const [g, $, k, z] = K, m = qe(f(x, k.shape), k, i);
     let _, M;
     if (!s && !n ? (_ = d(m, $, !1, !0), M = d(g, m, !0, !1)) : !s && n ? (_ = d(m, $, !1, !1), M = d(m, g, !0, !1)) : s && !n ? (_ = d($, m, !1, !0), M = d(g, m, !1, !1)) : (_ = d($, m, !0, !0), M = d(m, g, !0, !0)), r != null) {
-      const Q = Pe(E, m);
+      const Q = Pe(z, m);
       return [_, M, Q];
     } else
       return [_, M];
@@ -425,11 +425,11 @@ function Je({ a: t, b: e, transposeA: s = !1, transposeB: n = !1, bias: r, activ
     );
     return $([K, g, k]), { value: f(k, O), gradFunc: G };
   })(R, F) : U((K, g, $, k) => {
-    const E = (
+    const z = (
       // tslint:disable-next-line: no-unnecessary-type-assertion
       u.runKernel(H, I, j)
     );
-    return k([K, g, E, $]), { value: f(E, O), gradFunc: G };
+    return k([K, g, z, $]), { value: f(z, O), gradFunc: G };
   })(R, F, y);
 }
 const J = /* @__PURE__ */ p({ fusedMatMul_: Je });
@@ -442,9 +442,9 @@ const J = /* @__PURE__ */ p({ fusedMatMul_: Je });
  * https://opensource.org/licenses/MIT.
  * =============================================================================
  */
-class W extends Error {
+class E extends Error {
   constructor(e) {
-    super(e), Object.setPrototypeOf(this, W.prototype);
+    super(e), Object.setPrototypeOf(this, E.prototype);
   }
 }
 /**
@@ -458,11 +458,11 @@ class W extends Error {
  */
 function Qe(t, e, s, n) {
   if (t.rank < 2 || e.rank < 2)
-    throw new W(`dot requires both inputs to be rank >= 2 but got x shape = ${t.shape} and y shape = ${e.shape}`);
+    throw new E(`dot requires both inputs to be rank >= 2 but got x shape = ${t.shape} and y shape = ${e.shape}`);
   if (e.rank >= 3) {
     const r = t.shape.slice(-1)[0], i = e.shape.slice(-2)[0];
     if (r !== i)
-      throw new W(`If rank y >= 3, then the second last dim of y must equal the last dim of x but got x shape = ${t.shape} and  y shape = ${e.shape}`);
+      throw new E(`If rank y >= 3, then the second last dim of y must equal the last dim of x but got x shape = ${t.shape} and  y shape = ${e.shape}`);
   }
   if (t.rank === 2 && e.rank === 2)
     return J({
@@ -526,6 +526,9 @@ class Ye {
       embedDim: this.embedDim
     };
   }
+  dispose() {
+    this.tiedWeights.dispose();
+  }
 }
 export {
   Ye as default

package/dist/layers/TransformerBlock.d.ts CHANGED Viewed

@@ -20,4 +20,5 @@ export default class Block {
         output: TF.Tensor;
         attention?: TF.Tensor;
     };
+    dispose(): void;
 }

package/dist/layers/TransformerBlock.js CHANGED Viewed

@@ -10,8 +10,8 @@ class u {
   index;
   _trainable = !0;
   skipped = !1;
-  constructor(t, s, i) {
-    this.tf = t, this.index = s, this.ln1 = new l(t, [i.nEmbed], 1e-5, `block_${this.index}_ln1`), this.attn = new h(this.tf, this.index, i), this.ln2 = new l(t, [i.nEmbed], 1e-5, `block_${this.index}_ln2`), this.mlp = new r(this.tf, this.index, i);
+  constructor(t, i, s) {
+    this.tf = t, this.index = i, this.ln1 = new l(t, [s.nEmbed], 1e-5, `block_${this.index}_ln1`), this.attn = new h(this.tf, this.index, s), this.ln2 = new l(t, [s.nEmbed], 1e-5, `block_${this.index}_ln2`), this.mlp = new r(this.tf, this.index, s);
   }
   get variables() {
     return [
@@ -33,18 +33,21 @@ class u {
   loadWeights(t) {
     this.attn.loadWeights(t), this.mlp.loadWeights(t), this.ln1.setWeights(t.get(`block_${this.index}_ln1`) || []), this.ln2.setWeights(t.get(`block_${this.index}_ln2`) || []);
   }
-  getMLPOutput(t, s) {
-    const i = this.ln2.apply(t), e = this.mlp.call(i, s);
+  getMLPOutput(t, i) {
+    const s = this.ln2.apply(t), e = this.mlp.call(s, i);
     return t.add(e);
   }
-  call(t, s = !1, i = !1) {
+  call(t, i = !1, s = !1) {
     return this.tf.tidy(() => {
       if (this.skipped)
         return { output: t };
-      const e = this.ln1.apply(t), n = this.attn.call(e, s, i), a = t.add(n.output);
-      return { output: this.getMLPOutput(a, s), attention: n.attention };
+      const e = this.ln1.apply(t), n = this.attn.call(e, i, s), a = t.add(n.output);
+      return { output: this.getMLPOutput(a, i), attention: n.attention };
     });
   }
+  dispose() {
+    this.ln1.dispose(), this.attn.dispose(), this.ln2.dispose(), this.mlp.dispose();
+  }
 }
 export {
   u as default

package/dist/tokeniser/CharTokeniser.js CHANGED Viewed

@@ -1,6 +1,6 @@
-import { E as h } from "../index-SOhdqzHq.js";
-const c = ["<eos>", "<unk>"];
-class l extends h {
+import { E as r } from "../index-SOhdqzHq.js";
+const h = ["<eos>", "<unk>"];
+class l extends r {
   vocabSize = 0;
   eosToken = 0;
   unkToken = 0;
@@ -9,7 +9,7 @@ class l extends h {
   constructor(s) {
     if (super(), Array.isArray(s))
       if (this.vocab = s, this.vocab.length > 0)
-        this.vocabSize = this.vocab.length, this.eosToken = this.vocab.indexOf("<eos>"), this.unkToken = this.vocab.indexOf("<unk>"), this.unkToken === -1 && (this.unkToken = this.eosToken), this.vocab.forEach((i, o) => {
+        this.vocabSize = this.vocab.length, this.eosToken = this.vocab.indexOf("<eos>"), this.unkToken = this.vocab.indexOf("<unk>"), this.unkToken === -1 && (this.unkToken = this.vocab.indexOf("<pad>")), this.unkToken === -1 && (this.unkToken = this.vocab.indexOf("_")), this.unkToken === -1 && (this.unkToken = this.vocab.indexOf(" ")), this.unkToken === -1 && (this.unkToken = this.eosToken), this.vocab.forEach((i, o) => {
           this.cache.set(i, o);
         });
       else
@@ -23,29 +23,29 @@ class l extends h {
   destroy() {
   }
   async train(s) {
-    const i = s.map((e) => e.split("")).flat(), o = new Set(i), t = Array.from(o), n = this.vocabSize - c.length;
-    if (t.length > n) {
-      const e = /* @__PURE__ */ new Map();
+    const i = s.map((t) => t.split("")).flat(), o = new Set(i), e = Array.from(o), n = this.vocabSize - h.length;
+    if (e.length > n) {
+      const t = /* @__PURE__ */ new Map();
       i.forEach((a) => {
-        e.set(a, (e.get(a) || 0) + 1);
-      }), t.sort((a, r) => (e.get(a) || 0) - (e.get(r) || 0)), t.splice(0, t.length - n);
-    } else if (t.length < n)
-      for (; t.length < n; )
-        t.push("<pad>");
-    return t.sort((e, a) => e.charCodeAt(0) - a.charCodeAt(0)), this.vocab = [...t, ...c], this.eosToken = this.vocab.indexOf("<eos>"), this.unkToken = this.vocab.indexOf("<unk>"), this.vocabSize = this.vocab.length, this.cache.clear(), this.vocab.forEach((e, a) => {
-      this.cache.set(e, a);
-    }), this.vocabSize;
+        t.set(a, (t.get(a) || 0) + 1);
+      }), e.sort((a, c) => (t.get(a) || 0) - (t.get(c) || 0)), e.splice(0, e.length - n);
+    } else if (e.length < n)
+      for (; e.length < n; )
+        e.push("<pad>");
+    return e.sort((t, a) => t.charCodeAt(0) - a.charCodeAt(0)), this.vocab = [...e, ...h], this.eosToken = this.vocab.indexOf("<eos>"), this.unkToken = this.vocab.indexOf("<unk>"), this.vocabSize = this.vocab.length, this.cache.clear(), this.vocab.forEach((t, a) => {
+      this.cache.set(t, a);
+    }), this.emit("trainStatus", "trained"), this.vocabSize;
   }
   async tokenise(s, i) {
     if (!this.trained)
       throw new Error("Tokeniser not trained");
-    return s.map((t) => i ? t.split("").map((n) => this.cache.get(n) ?? this.unkToken) : t.split("").map((n) => {
-      const e = this.cache.get(n);
-      return e !== void 0 ? this.vocab[e] : "<unk>";
+    return s.map((e) => i ? e.split("").map((n) => this.cache.get(n) ?? this.unkToken) : e.split("").map((n) => {
+      const t = this.cache.get(n);
+      return t !== void 0 ? this.vocab[t] : "<unk>";
     }));
   }
   async detokenise(s) {
-    return s.map((o) => o.map((t) => this.vocab[t]).join(""));
+    return s.map((o) => o.map((e) => this.vocab[e]).join(""));
   }
   async encode(s) {
     return (await this.tokenise([s], !0))[0];
@@ -60,10 +60,10 @@ class l extends h {
     return [];
   }
   async createTrainingData(s, i = 5) {
-    const o = await this.tokenise(s, !0), t = [], n = [];
-    for (let e = 0; e < o.length - i; e++)
-      t.push(...o[e].slice(0, i)), n.push(o[e + 1][0]);
-    return [t, n];
+    const o = await this.tokenise(s, !0), e = [], n = [];
+    for (let t = 0; t < o.length - i; t++)
+      e.push(...o[t].slice(0, i)), n.push(o[t + 1][0]);
+    return [e, n];
   }
 }
 export {

package/dist/utilities/save.d.ts CHANGED Viewed

@@ -1,3 +1,9 @@
 import { default as NanoGPT } from '../NanoGPTModel';
 import { ITokeniser } from '../tokeniser/type';
-export declare function saveModel(model: NanoGPT, tokeniser: ITokeniser): Promise<Blob>;
+export interface SaveOptions {
+    includeLog?: boolean;
+    name?: string;
+    metadata?: Record<string, unknown>;
+    files?: Record<string, unknown>;
+}
+export declare function saveModel(model: NanoGPT, tokeniser: ITokeniser, options?: SaveOptions): Promise<Blob>;

package/dist/utilities/save.js CHANGED Viewed

@@ -1,21 +1,36 @@
-import { z as f } from "../jszip.min-BLbRbbKt.js";
-import { exportWeights as g } from "./weights.js";
-async function l(i, t) {
-  const o = i.saveWeights(), e = new f(), s = {};
-  for (const [n, r] of o) {
-    const a = await g(r);
-    s[n] = a.spec, e.file(`${n}.bin`, a.data.buffer, { binary: !0 });
+import { z as g } from "../jszip.min-BLbRbbKt.js";
+import { exportWeights as l } from "./weights.js";
+const b = "1.0.0";
+async function p(t, s, i) {
+  const o = i?.includeLog ?? !0, c = t.saveWeights(), e = new g(), f = {};
+  for (const [n, a] of c) {
+    const r = await l(a);
+    f[n] = r.spec, e.file(`${n}.bin`, r.data.buffer, { binary: !0 });
   }
-  return e.file("manifest.json", JSON.stringify({ weightSpec: s, config: i.config }), {
-    binary: !1
-  }), e.file(
+  if (e.file(
+    "manifest.json",
+    JSON.stringify({
+      weightSpec: f,
+      config: t.config,
+      version: b,
+      application: "@genai-fi/nanogpt",
+      meta: i?.metadata,
+      name: i?.name
+    }),
+    {
+      binary: !1
+    }
+  ), e.file(
     "tokeniser.json",
-    JSON.stringify({ vocab: t.getVocab(), merges: await t.getMerges() }),
+    JSON.stringify({ vocab: s.getVocab(), merges: await s.getMerges() }),
     {
       binary: !1
     }
-  ), e.file("log.json", JSON.stringify(i.log), { binary: !1 }), e.generateAsync({ type: "blob" });
+  ), o && e.file("log.json", JSON.stringify(t.log), { binary: !1 }), i?.files)
+    for (const [n, a] of Object.entries(i.files))
+      e.file(n, JSON.stringify(a), { binary: !1 });
+  return e.generateAsync({ type: "blob" });
 }
 export {
-  l as saveModel
+  p as saveModel
 };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "name": "@genai-fi/nanogpt",
-    "version": "0.1.7",
+    "version": "0.1.8",
     "type": "module",
     "main": "dist/main.js",
     "types": "dist/main.d.ts",