npm - @genai-fi/nanogpt - Versions diffs - 0.5.6 → 0.6.1 - Mend

@genai-fi/nanogpt 0.5.6 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (116) hide show

package/dist/Generator.js +10 -9
package/dist/NanoGPTModel.js +70 -121
package/dist/RealDiv-7xu-pkZN.js +540 -0
package/dist/Reshape-BYC1oUku.js +127 -0
package/dist/TeachableLLM.d.ts +2 -0
package/dist/TeachableLLM.js +42 -34
package/dist/{TiedEmbedding-8S8xn8e6.js → TiedEmbedding-C1HBot-5.js} +12 -13
package/dist/{axis_util-BczFISHz.js → axis_util-CCNL7jea.js} +14 -12
package/dist/{broadcast_to-B7NGsBSh.js → broadcast_to-CddAF879.js} +2 -2
package/dist/{concat-DdKPyAtw.js → concat-XOK9ANZu.js} +7 -7
package/dist/{dataset-iqT4Otvb.js → dataset-BFFipD1c.js} +5 -5
package/dist/{dropout-B09InSJS.js → dropout-xlKRoJyU.js} +9 -9
package/dist/{gather-D6MsdXqc.js → gather-DKtUaTtA.js} +1 -1
package/dist/gpgpu_math-B_ycgZ4W.js +3115 -0
package/dist/{index-Du-bmOP8.js → index-CamYe_M8.js} +844 -647
package/dist/{kernel_funcs_utils-DShm7-0k.js → kernel_funcs_utils-D5MS0JFg.js} +232 -136
package/dist/layers/BaseLayer.js +2 -2
package/dist/layers/CausalSelfAttention.js +6 -6
package/dist/layers/MLP.js +5 -5
package/dist/layers/RMSNorm.js +3 -3
package/dist/layers/RoPECache.js +13 -33
package/dist/layers/TiedEmbedding.js +6 -7
package/dist/layers/TransformerBlock.js +1 -1
package/dist/{log_sum_exp-CxfBtUaG.js → log_sum_exp-CV_5-TTu.js} +15 -15
package/dist/main.js +24 -20
package/dist/{mat_mul-CbiqIe2d.js → mat_mul-CAbRFWUj.js} +4 -4
package/dist/{max-0Xnlpv8k.js → max-JBBv7aUf.js} +3 -3
package/dist/mulmat_packed_gpu-DW4doKL_.js +71 -0
package/dist/{norm-01kY9I2B.js → norm-B9dQTFYn.js} +12 -12
package/dist/{ones-CrutWGas.js → ones-CMHNqMr6.js} +2 -2
package/dist/ops/appendCache.js +3 -3
package/dist/ops/attentionMask.js +1 -1
package/dist/ops/cpu/appendCache.js +2 -2
package/dist/ops/cpu/attentionMask.js +5 -5
package/dist/ops/cpu/fusedSoftmax.js +2 -2
package/dist/ops/cpu/gatherSub.js +5 -5
package/dist/ops/cpu/gelu.js +1 -1
package/dist/ops/cpu/matMulGelu.js +1 -1
package/dist/ops/cpu/matMulMul.js +1 -1
package/dist/ops/cpu/mulDropout.js +1 -1
package/dist/ops/cpu/normRMS.js +1 -1
package/dist/ops/cpu/qkv.js +3 -3
package/dist/ops/cpu/rope.js +5 -5
package/dist/ops/cpu/scatterSub.js +18 -49
package/dist/ops/fusedSoftmax.js +1 -1
package/dist/ops/gatherSub.js +1 -1
package/dist/ops/gelu.js +1 -1
package/dist/ops/grads/attentionMask.js +15 -11
package/dist/ops/grads/fusedSoftmax.js +12 -10
package/dist/ops/grads/gelu.js +1 -1
package/dist/ops/grads/matMulGelu.js +1 -1
package/dist/ops/grads/normRMS.js +1 -1
package/dist/ops/grads/qkv.js +1 -1
package/dist/ops/grads/rope.js +1 -1
package/dist/ops/log.d.ts +0 -0
package/dist/ops/log.js +1 -0
package/dist/ops/matMulGelu.js +1 -1
package/dist/ops/matMulMul.js +1 -1
package/dist/ops/mulDrop.js +1 -1
package/dist/ops/node/sparseCrossEntropy.js +1 -1
package/dist/ops/normRMS.js +1 -1
package/dist/ops/qkv.js +1 -1
package/dist/ops/rope.js +8 -4
package/dist/ops/scatterSub.js +1 -1
package/dist/ops/webgl/appendCache.js +1 -1
package/dist/ops/webgl/attentionMask.js +1 -1
package/dist/ops/webgl/fusedSoftmax.js +31 -3379
package/dist/ops/webgl/gatherSub.js +1 -1
package/dist/ops/webgl/gelu.js +2 -2
package/dist/{gpgpu_math-BFbOyvk4.js → ops/webgl/log.d.ts} +2 -8
package/dist/ops/webgl/log.js +39 -0
package/dist/ops/webgl/matMulGelu.js +48 -115
package/dist/ops/webgl/matMulMul.js +1 -1
package/dist/ops/webgl/mulDropout.js +1 -1
package/dist/ops/webgl/normRMS.js +2 -2
package/dist/ops/webgl/qkv.js +1 -1
package/dist/ops/webgl/rope.js +1 -1
package/dist/ops/webgl/scatterSub.js +1 -1
package/dist/{ops-CJNniCAV.js → ops-DqtYemmV.js} +143 -135
package/dist/{random_width-C-v-35bY.js → random_width-CLMQG5Jn.js} +6925 -6291
package/dist/{range-Bvs1hidm.js → range-DqYjKnuG.js} +1 -1
package/dist/reciprocal-z49filta.js +25 -0
package/dist/register_all_kernels-COt6wLD0.js +21397 -0
package/dist/{reshape-BH7eBpwq.js → reshape-C45vIIRU.js} +1 -1
package/dist/scatter_nd_util-qgtnviTE.js +46 -0
package/dist/selu_util-4QV_GXTB.js +740 -0
package/dist/shared-ByfrGA97.js +3199 -0
package/dist/{sin-CPAZXNjH.js → sin-9JBrfVaB.js} +1 -1
package/dist/{softmax-DhWoBa7r.js → softmax-DvMvui-_.js} +1 -1
package/dist/{split-BCUhuU7B.js → split-DxrHrPFK.js} +4 -4
package/dist/{stack-BV1v7l3S.js → stack-DgaoDmnF.js} +1 -1
package/dist/{sum-Cvq06317.js → sum-BpcpxNEh.js} +3 -3
package/dist/{tensor-DgTOPY6h.js → tensor-CDz5x1mP.js} +1 -1
package/dist/{tensor2d-CRWjDyUe.js → tensor2d-jO8JY5Jd.js} +1 -1
package/dist/training/AdamExt.js +1 -1
package/dist/training/DatasetBuilder.js +2 -2
package/dist/training/FullTrainer.js +1 -1
package/dist/training/Trainer.js +3 -3
package/dist/training/sparseCrossEntropy.js +4 -4
package/dist/utilities/dummy.d.ts +6 -0
package/dist/utilities/dummy.js +31 -10
package/dist/utilities/generate.js +3 -3
package/dist/utilities/load.d.ts +25 -0
package/dist/utilities/load.js +89 -37
package/dist/utilities/profile.d.ts +5 -0
package/dist/utilities/profile.js +12 -9
package/dist/utilities/safetensors.d.ts +3 -0
package/dist/utilities/safetensors.js +83 -0
package/dist/utilities/save.js +47 -29
package/dist/utilities/weights.js +2 -2
package/dist/{variable-DZ3fF0R2.js → variable-CLVXjN7F.js} +1 -1
package/dist/{zeros-BaHhQTWf.js → zeros-DUkkVccu.js} +8 -8
package/package.json +3 -9
package/dist/Reshape-Biok_3X1.js +0 -212
package/dist/slice_util-DskXqRZa.js +0 -49
package/dist/tfjs_backend-D9Ytje0G.js +0 -1010

package/dist/{sin-CPAZXNjH.js → sin-9JBrfVaB.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o, j as t, E as c, _ as a, $ as e } from "./index-Du-bmOP8.js";
+import { q as o, w as t, E as c, a0 as a, a1 as e } from "./index-CamYe_M8.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.

package/dist/{softmax-DhWoBa7r.js → softmax-DvMvui-_.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o as r, j as f, E as e, S as i } from "./index-Du-bmOP8.js";
+import { q as r, w as f, E as e, S as i } from "./index-CamYe_M8.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.

package/dist/{split-BCUhuU7B.js → split-DxrHrPFK.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o as p, j as i, E as a, x as c } from "./index-Du-bmOP8.js";
+import { q as p, w as i, E as a, H as c } from "./index-CamYe_M8.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -15,9 +15,9 @@ import { o as p, j as i, E as a, x as c } from "./index-Du-bmOP8.js";
  * limitations under the License.
  * =============================================================================
  */
-function e(t, s, o = 0) {
-  const n = { x: i(t, "x", "split") }, r = { numOrSizeSplits: s, axis: o };
-  return a.runKernel(c, n, r);
+function e(t, s, n = 0) {
+  const o = { x: i(t, "x", "split") }, r = { numOrSizeSplits: s, axis: n };
+  return a.runKernel(c, o, r);
 }
 const u = /* @__PURE__ */ p({ split_: e });
 export {

package/dist/{stack-BV1v7l3S.js → stack-DgaoDmnF.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o as e, k as c, l as n, E as k, P as i } from "./index-Du-bmOP8.js";
+import { q as e, x as c, k as n, E as k, P as i } from "./index-CamYe_M8.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.

package/dist/{sum-Cvq06317.js → sum-BpcpxNEh.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o as e, j as u, D as c, E as l, F as m } from "./index-Du-bmOP8.js";
+import { q as e, w as u, N as c, E as l, O as m } from "./index-CamYe_M8.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -15,10 +15,10 @@ import { o as e, j as u, D as c, E as l, F as m } from "./index-Du-bmOP8.js";
  * limitations under the License.
  * =============================================================================
  */
-function i(t, o = null, n = !1) {
+function i(t, n = null, o = !1) {
   let s = u(t, "x", "sum");
   s.dtype === "bool" && (s = c(s, "int32"));
-  const r = { x: s }, a = { axis: o, keepDims: n };
+  const r = { x: s }, a = { axis: n, keepDims: o };
   return l.runKernel(m, r, a);
 }
 const f = /* @__PURE__ */ e({ sum_: i });

package/dist/{tensor-DgTOPY6h.js → tensor-CDz5x1mP.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { J as t, K as a } from "./index-Du-bmOP8.js";
+import { Q as t, U as a } from "./index-CamYe_M8.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.

package/dist/{tensor2d-CRWjDyUe.js → tensor2d-jO8JY5Jd.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { I as t, J as s, K as a } from "./index-Du-bmOP8.js";
+import { V as t, Q as s, U as a } from "./index-CamYe_M8.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.

package/dist/training/AdamExt.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { A as r, b as c, f as h, s as g, e as o } from "../index-Du-bmOP8.js";
+import { A as r, b as c, f as h, s as g, e as o } from "../index-CamYe_M8.js";
 class u extends r {
   constructor(t, e, s, a, i) {
     super(t, e, s, a), this.config = i, this.startLearningRate = t;

package/dist/training/DatasetBuilder.js CHANGED Viewed

@@ -1,5 +1,5 @@
-import { t as u } from "../index-Du-bmOP8.js";
-import { d as z, i as f } from "../dataset-iqT4Otvb.js";
+import { t as u } from "../index-CamYe_M8.js";
+import { d as z, i as f } from "../dataset-BFFipD1c.js";
 import "../index-Tf7vU29b.js";
 /**
  * @license

package/dist/training/FullTrainer.js CHANGED Viewed

@@ -1,7 +1,7 @@
 import { generateText as T } from "../utilities/generate.js";
 import L from "./Trainer.js";
 import x from "./Evaluator.js";
-import { a as h } from "../index-Du-bmOP8.js";
+import { a as h } from "../index-CamYe_M8.js";
 import y from "../utilities/profile.js";
 const D = {
   desiredLoss: 0.01,

package/dist/training/Trainer.js CHANGED Viewed

@@ -1,8 +1,8 @@
 import { DatasetBuilder as g, flattenTokens as m, PAGE_FACTOR as u } from "./DatasetBuilder.js";
 import f from "./AdamExt.js";
-import { t as y, v as z, a as c } from "../index-Du-bmOP8.js";
-import { n as S } from "../norm-01kY9I2B.js";
-import { z as p } from "../zeros-BaHhQTWf.js";
+import { t as y, v as z, a as c } from "../index-CamYe_M8.js";
+import { n as S } from "../norm-B9dQTFYn.js";
+import { z as p } from "../zeros-DUkkVccu.js";
 class R {
   constructor(t, e, s = 1e-3) {
     this.tokenizer = e, this.model = t, this.learningRate = s, this.resetOptimizer(), this.datasetBuilder = new g(e, t.config.gpt.blockSize);

package/dist/training/sparseCrossEntropy.js CHANGED Viewed

@@ -1,9 +1,9 @@
 import { gatherSub as L } from "../ops/gatherSub.js";
 import { scatterSub as y } from "../ops/scatterSub.js";
-import { e as u, c as i, z as S, t as f, s as G } from "../index-Du-bmOP8.js";
-import { s as v } from "../softmax-DhWoBa7r.js";
-import { m as z } from "../max-0Xnlpv8k.js";
-import { l as k } from "../log_sum_exp-CxfBtUaG.js";
+import { e as u, c as i, z as S, t as f, s as G } from "../index-CamYe_M8.js";
+import { s as v } from "../softmax-DvMvui-_.js";
+import { m as z } from "../max-JBBv7aUf.js";
+import { l as k } from "../log_sum_exp-CV_5-TTu.js";
 function F(a, s) {
   return f(() => {
     const e = a.shape[a.shape.length - 1], o = a.shape.slice(0, -1).reduce((d, c) => d * c, 1), p = a.shape.length > 2 ? a.reshape([o, e]) : a, n = s.shape.length > 1 ? s.reshape([o]).cast("int32") : s.cast("int32"), t = z(p, -1, !0), r = G(p, t), h = k(r, -1);

package/dist/utilities/dummy.d.ts CHANGED Viewed

@@ -1,3 +1,9 @@
 import { default as NanoGPT } from '../NanoGPTModel';
 export declare function dummyPassAsync(model: NanoGPT): Promise<void>;
+export interface MemoryRequirements {
+    perBatch: number;
+    tapeSize: number;
+    gradients: number;
+}
+export declare function dummyPassTrainAsync(model: NanoGPT): Promise<MemoryRequirements>;
 export declare function dummyPass(model: NanoGPT): void;

package/dist/utilities/dummy.js CHANGED Viewed

@@ -1,14 +1,35 @@
-import "../index-Du-bmOP8.js";
-import { z as n } from "../zeros-BaHhQTWf.js";
-async function c(s) {
-  const i = n([1, s.config.gpt.blockSize], "int32"), [t, o] = s.forward({ training: !1 }, i);
-  await t.data(), t.dispose(), o && o.dispose(), i.dispose();
+import { m as f, v as S, e as w } from "../index-CamYe_M8.js";
+import { z as i } from "../zeros-DUkkVccu.js";
+async function P(s) {
+  const t = i([1, s.config.gpt.blockSize], "int32"), [e, n] = s.forward({ training: !1 }, t);
+  await e.data(), e.dispose(), n && n.dispose(), t.dispose();
 }
-function d(s) {
-  const i = n([1, s.config.gpt.blockSize], "int32"), [t, o] = s.forward({ training: !1 }, i);
-  t.dispose(), o && o.dispose(), i.dispose();
+async function v(s) {
+  const t = f(), e = t.numBytesInGPUAllocated ?? t.numBytes;
+  await P(s);
+  const n = i([1, s.config.gpt.blockSize], "int32"), r = i([1, s.config.gpt.blockSize], "int32"), o = {
+    perBatch: 0,
+    tapeSize: 0,
+    gradients: s.getNumParams() * 4
+  }, g = () => {
+    const [a, l] = s.forward({ training: !0 }, n, r), d = w().state.activeTape;
+    let u = 0;
+    if (d)
+      for (const z of d)
+        u += z.saved?.reduce((B, I) => B + I.size * 4, 0) || 0;
+    return o.tapeSize = u, a.dispose(), l;
+  }, { value: c, grads: m } = S(g), p = f(), y = p.numBytesInGPUAllocated ?? p.numBytes;
+  o.perBatch = y - e - o.gradients, console.log("Dummy training memory requirements:", o), await c.data(), c.dispose();
+  for (const a in m)
+    m[a].dispose();
+  return n.dispose(), r.dispose(), o;
+}
+function A(s) {
+  const t = i([1, s.config.gpt.blockSize], "int32"), [e, n] = s.forward({ training: !1 }, t);
+  e.dispose(), n && n.dispose(), t.dispose();
 }
 export {
-  d as dummyPass,
-  c as dummyPassAsync
+  A as dummyPass,
+  P as dummyPassAsync,
+  v as dummyPassTrainAsync
 };

package/dist/utilities/generate.js CHANGED Viewed

@@ -1,6 +1,6 @@
-import { t as y } from "../index-Du-bmOP8.js";
-import { t as x } from "../tensor2d-CRWjDyUe.js";
-import { c as f } from "../concat-DdKPyAtw.js";
+import { t as y } from "../index-CamYe_M8.js";
+import { t as x } from "../tensor2d-jO8JY5Jd.js";
+import { c as f } from "../concat-XOK9ANZu.js";
 async function A(o, r, a, c, T) {
   if (c <= 0)
     throw new Error("Length must be a positive integer");

package/dist/utilities/load.d.ts CHANGED Viewed

@@ -1,6 +1,31 @@
+import { default as zip } from 'jszip';
 import { default as NanoGPT } from '../NanoGPTModel';
 import { ITokeniser } from '../tokeniser/type';
+export declare const VERSION = 2;
+export interface TransformersConfig {
+    model_type: string;
+    vocab_size: number;
+    hidden_size: number;
+    num_hidden_layers: number;
+    num_attention_heads: number;
+    block_size: number;
+    dropout: number;
+    biasInLinear: boolean;
+    biasInLayerNorm: boolean;
+    mlpFactor: number;
+    useRope: boolean;
+}
+export interface Metadata {
+    version: string;
+    application: string;
+    name?: string;
+}
+export declare function loadOldModel(zipFile: zip): Promise<{
+    model: NanoGPT;
+    tokeniser: ITokeniser;
+}>;
 export declare function loadModel(data: Blob | Buffer | string): Promise<{
     model: NanoGPT;
     tokeniser: ITokeniser;
+    name?: string;
 }>;

package/dist/utilities/load.js CHANGED Viewed

@@ -1,47 +1,99 @@
-import { j as k } from "../jszip.min-CjP2V1VV.js";
-import { importWeights as b } from "./weights.js";
-import u from "../tokeniser/CharTokeniser.js";
-import F from "../NanoGPTModel.js";
-import { dummyPassAsync as j } from "./dummy.js";
-import { d as T } from "../index-Du-bmOP8.js";
-import E from "../tokeniser/bpe.js";
-async function A(t) {
-  const o = await fetch(t);
-  if (!o.ok)
-    throw new Error(`Failed to fetch ${t}: ${o.statusText}`);
-  return o.arrayBuffer();
+import { j as v } from "../jszip.min-CjP2V1VV.js";
+import { importWeights as F } from "./weights.js";
+import h from "../tokeniser/CharTokeniser.js";
+import b from "../NanoGPTModel.js";
+import { dummyPassAsync as u } from "./dummy.js";
+import { d as k } from "../index-CamYe_M8.js";
+import j from "../tokeniser/bpe.js";
+import { load_safetensors as N } from "./safetensors.js";
+const I = 2;
+async function O(t) {
+  const s = await fetch(t);
+  if (!s.ok)
+    throw new Error(`Failed to fetch ${t}: ${s.statusText}`);
+  return s.arrayBuffer();
 }
-async function P(t) {
-  const o = typeof t == "string" ? await A(t) : t, n = await k.loadAsync(o), i = /* @__PURE__ */ new Map(), f = await n.file("manifest.json")?.async("string");
-  if (!f)
+async function S(t) {
+  const s = /* @__PURE__ */ new Map(), r = await t.file("manifest.json")?.async("string");
+  if (!r)
     throw new Error("Manifest file not found in the zip archive");
-  const l = JSON.parse(f);
-  for (const [e, r] of Object.entries(l.weightSpec))
-    i.set(e, { spec: r, data: new Float32Array() });
-  const p = await n.file("tokeniser.json")?.async("string");
-  if (!p)
+  const p = JSON.parse(r);
+  for (const [o, a] of Object.entries(p.weightSpec))
+    s.set(o, { spec: a, data: new Float32Array() });
+  const e = await t.file("tokeniser.json")?.async("string");
+  if (!e)
     throw new Error("Tokeniser file not found in the zip archive");
-  const s = JSON.parse(p), y = (s.type ?? "char") === "char" ? new u(s.vocab) : new E(s.vocab, s.merges), w = /* @__PURE__ */ new Map();
-  for (const e of Object.keys(n.files))
-    if (e.endsWith(".bin")) {
-      const r = e.replace(".bin", ""), g = await n.file(e).async("arraybuffer"), h = new Float32Array(g), c = i.get(r) || { spec: [], data: new Float32Array() };
-      c.data = h, i.set(r, c);
-      const d = await b(c);
-      w.set(r, d);
+  const i = JSON.parse(e), c = (i.type ?? "char") === "char" ? new h(i.vocab) : new j(i.vocab, i.merges), d = /* @__PURE__ */ new Map();
+  for (const o of Object.keys(t.files))
+    if (o.endsWith(".bin")) {
+      const a = o.replace(".bin", ""), w = await t.file(o).async("arraybuffer"), g = new Float32Array(w), l = s.get(a) || { spec: [], data: new Float32Array() };
+      l.data = g, s.set(a, l);
+      const n = await F(l);
+      d.set(a, n);
     }
-  T();
-  const a = new F(l.config);
-  await j(a), a.loadWeights(w);
-  const m = await n.file("log.json")?.async("string");
+  k();
+  const f = new b(p.config);
+  await u(f), f.loadWeights(d);
+  const m = await t.file("log.json")?.async("string");
   if (m)
     try {
-      const e = JSON.parse(m);
-      a.log = e;
-    } catch (e) {
-      throw console.error("Error parsing training log:", e), new Error(`Failed to parse training log: ${e}`);
+      const o = JSON.parse(m);
+      f.log = o;
+    } catch (o) {
+      throw console.error("Error parsing training log:", o), new Error(`Failed to parse training log: ${o}`);
     }
-  return { model: a, tokeniser: y };
+  return { model: f, tokeniser: c };
+}
+async function R(t) {
+  const s = typeof t == "string" ? await O(t) : t, r = await v.loadAsync(s);
+  if (r.file("manifest.json"))
+    return S(r);
+  {
+    const p = await r.file("config.json")?.async("string");
+    if (!p)
+      throw new Error("Config file not found in the zip archive");
+    const e = JSON.parse(p), i = {
+      vocabSize: e.vocab_size,
+      blockSize: e.block_size,
+      nLayer: e.num_hidden_layers,
+      nHead: e.num_attention_heads,
+      nEmbed: e.hidden_size,
+      dropout: e.dropout,
+      biasInLinear: e.biasInLinear,
+      biasInLayerNorm: e.biasInLayerNorm,
+      mlpFactor: e.mlpFactor,
+      useRope: e.useRope
+    }, y = await r.file("tokeniser.json")?.async("string");
+    if (!y)
+      throw new Error("Tokeniser file not found in the zip archive");
+    const c = JSON.parse(y), f = (c.type ?? "char") === "char" ? new h(c.vocab) : new j(c.vocab, c.merges), m = await N(await r.file("model.safetensors").async("arraybuffer")), o = /* @__PURE__ */ new Map();
+    for (const [n, E] of Object.entries(m))
+      o.set(n, [E]);
+    k();
+    const a = new b(i);
+    await u(a), a.loadWeights(o);
+    const w = await r.file("meta.json")?.async("string");
+    let g;
+    if (w)
+      try {
+        const n = JSON.parse(w);
+        n.name && (g = n.name);
+      } catch (n) {
+        console.error("Error parsing meta file:", n);
+      }
+    const l = await r.file("log.json")?.async("string");
+    if (l)
+      try {
+        const n = JSON.parse(l);
+        a.log = n;
+      } catch (n) {
+        throw console.error("Error parsing training log:", n), new Error(`Failed to parse training log: ${n}`);
+      }
+    return { model: a, tokeniser: f, name: g };
+  }
 }
 export {
-  P as loadModel
+  I as VERSION,
+  R as loadModel,
+  S as loadOldModel
 };

package/dist/utilities/profile.d.ts CHANGED Viewed

@@ -1,3 +1,7 @@
+import { MemoryInfo } from '@tensorflow/tfjs-core';
+export interface ExtendedMemoryInfo extends MemoryInfo {
+    numBytesInGPUAllocated?: number;
+}
 export default class MemoryProfiler {
     private log;
     private maxMemory;
@@ -6,6 +10,7 @@ export default class MemoryProfiler {
     private peakMemory;
     startMemory(): void;
     getPeakMemory(): number;
+    getMaxMemory(): number;
     endMemory(label: string): void;
     printSummary(): void;
 }

package/dist/utilities/profile.js CHANGED Viewed

@@ -1,32 +1,35 @@
-import { m as s } from "../index-Du-bmOP8.js";
+import { m as a } from "../index-CamYe_M8.js";
 const m = 1024 * 1024;
-class M {
+class l {
   log = /* @__PURE__ */ new Map();
   maxMemory = 0;
   maxLabel;
   lastMemInfo = [];
   peakMemory = 0;
   startMemory() {
-    this.lastMemInfo.push(s());
+    this.lastMemInfo.push(a());
   }
   getPeakMemory() {
     return this.peakMemory;
   }
-  endMemory(e) {
+  getMaxMemory() {
+    return this.maxMemory;
+  }
+  endMemory(o) {
     if (this.lastMemInfo.length === 0) {
       console.warn("MemoryProfiler: endMemory called without matching startMemory");
       return;
     }
-    const o = s(), t = o.numBytes - (this.lastMemInfo.pop()?.numBytes || 0);
-    this.log.set(e, Math.max(this.log.get(e) || 0, t)), t > this.maxMemory && (this.maxMemory = t, this.maxLabel = e), this.peakMemory = Math.max(this.peakMemory, o.numBytes);
+    const e = a(), s = this.lastMemInfo.pop(), t = (e.numBytesInGPUAllocated ?? e.numBytes) - (s?.numBytes ?? s?.numBytesInGPUAllocated ?? 0);
+    this.log.set(o, Math.max(this.log.get(o) || 0, t)), t > this.maxMemory && (this.maxMemory = t, this.maxLabel = o), this.peakMemory = Math.max(this.peakMemory, e.numBytesInGPUAllocated ?? e.numBytes);
   }
   printSummary() {
     console.log("Memory Usage Summary:");
-    for (const [e, o] of this.log.entries())
-      console.log(`- ${e}: ${(o / m).toFixed(2)} MB`);
+    for (const [o, e] of this.log.entries())
+      console.log(`- ${o}: ${(e / m).toFixed(2)} MB`);
     this.maxLabel && console.log(`Peak Memory Usage: ${(this.maxMemory / m).toFixed(2)} MB at "${this.maxLabel}"`), console.log(`Overall Peak Memory Usage: ${(this.peakMemory / m).toFixed(2)} MB`);
   }
 }
 export {
-  M as default
+  l as default
 };

package/dist/utilities/safetensors.d.ts ADDED Viewed

@@ -0,0 +1,3 @@
+import { Tensor } from '@tensorflow/tfjs-core';
+export declare function save_safetensors(tensors: Record<string, Tensor>): Promise<ArrayBuffer>;
+export declare function load_safetensors(buffer: ArrayBuffer): Promise<Record<string, Tensor>>;

package/dist/utilities/safetensors.js ADDED Viewed

@@ -0,0 +1,83 @@
+import "../index-CamYe_M8.js";
+import { t as y } from "../tensor-CDz5x1mP.js";
+function l(t) {
+  if (t === "float32") return "F32";
+  if (t === "int32") return "I32";
+  throw new Error(`Unsupported dtype: ${t}`);
+}
+function h(t) {
+  if (t === "F32") return "float32";
+  if (t === "I32") return "int32";
+  throw new Error(`Unsupported dtype: ${t}`);
+}
+async function _(t) {
+  const c = {};
+  let a = 0;
+  for (const [n, s] of Object.entries(t))
+    c[n] = {
+      dtype: l(s.dtype),
+      shape: s.shape,
+      data_offsets: [a, a + s.size * 4]
+    }, a += s.size * 4;
+  const p = JSON.stringify(c);
+  let r = new TextEncoder().encode(p);
+  if (r.length % 4 !== 0) {
+    const n = 4 - r.length % 4, s = new Uint8Array(r.length + n);
+    s.set(r);
+    for (let w = r.length; w < s.length; w++)
+      s[w] = 32;
+    r = s;
+  }
+  const o = r.length, f = 8 + o + a, e = new ArrayBuffer(f);
+  new DataView(e).setUint32(0, o, !0), new Uint8Array(e, 8, o).set(r);
+  let d = 8 + o;
+  for (const n of Object.values(t)) {
+    if (n.size === 0) continue;
+    const s = await n.data();
+    if (n.dtype === "float32")
+      new Float32Array(e, d, n.size).set(s), d += n.size * 4;
+    else if (n.dtype === "int32")
+      new Int32Array(e, d, n.size).set(s), d += n.size * 4;
+    else
+      throw new Error(`Unsupported dtype: ${n.dtype}`);
+  }
+  return e;
+}
+async function U(t) {
+  const a = new DataView(t).getUint32(0, !0), p = new Uint8Array(t, 8, a), r = JSON.parse(new TextDecoder().decode(p)), o = {};
+  for (const [f, e] of Object.entries(r)) {
+    if (e.data_offsets[0] === e.data_offsets[1]) {
+      o[f] = y([], e.shape, h(e.dtype));
+      continue;
+    }
+    if (e.dtype === "F32") {
+      const i = y(
+        new Float32Array(
+          t,
+          e.data_offsets[0] + 8 + a,
+          (e.data_offsets[1] - e.data_offsets[0]) / 4
+        ),
+        e.shape,
+        h(e.dtype)
+      );
+      o[f] = i;
+    } else if (e.dtype === "I32") {
+      const i = y(
+        new Int32Array(
+          t,
+          e.data_offsets[0] + 8 + a,
+          (e.data_offsets[1] - e.data_offsets[0]) / 4
+        ),
+        e.shape,
+        h(e.dtype)
+      );
+      o[f] = i;
+    } else
+      throw new Error(`Unsupported dtype: ${e.dtype}`);
+  }
+  return o;
+}
+export {
+  U as load_safetensors,
+  _ as save_safetensors
+};

package/dist/utilities/save.js CHANGED Viewed

@@ -1,43 +1,61 @@
-import { j as g } from "../jszip.min-CjP2V1VV.js";
-import { exportWeights as l } from "./weights.js";
-import p from "../tokeniser/CharTokeniser.js";
-const b = "1.0.0";
-async function h(t, a, i) {
-  const c = i?.includeLog ?? !0, f = /* @__PURE__ */ new Map();
-  t.saveWeights(f);
-  const e = new g(), r = {};
-  for (const [n, s] of f) {
-    const o = await l(s);
-    r[n] = o.spec, e.file(`${n}.bin`, o.data.buffer, { binary: !0 });
-  }
-  if (e.file(
-    "manifest.json",
-    JSON.stringify({
-      weightSpec: r,
-      config: t.config.gpt,
-      version: b,
-      application: "@genai-fi/nanogpt",
-      meta: i?.metadata,
-      name: i?.name
-    }),
+import { j as p } from "../jszip.min-CjP2V1VV.js";
+import b from "../tokeniser/CharTokeniser.js";
+import { save_safetensors as l } from "./safetensors.js";
+import { VERSION as y } from "./load.js";
+async function N(e, a, n) {
+  const f = n?.includeLog ?? !0, s = /* @__PURE__ */ new Map();
+  e.saveWeights(s);
+  const i = new p(), r = {};
+  s.forEach((t, o) => {
+    t.length === 1 && (r[o] = t[0]);
+  });
+  const g = await l(r);
+  i.file("model.safetensors", g, { binary: !0 });
+  const c = {
+    model_type: "GenAI_NanoGPT_1",
+    vocab_size: a.getVocab().length,
+    hidden_size: e.config.gpt.nEmbed,
+    num_hidden_layers: e.config.gpt.nLayer,
+    num_attention_heads: e.config.gpt.nHead,
+    block_size: e.config.gpt.blockSize,
+    dropout: e.config.gpt.dropout,
+    biasInLinear: e.config.gpt.biasInLinear,
+    biasInLayerNorm: e.config.gpt.biasInLayerNorm,
+    mlpFactor: e.config.gpt.mlpFactor,
+    useRope: e.config.gpt.useRope
+  };
+  if (i.file("config.json", JSON.stringify(c, void 0, 4), {
+    binary: !1
+  }), i.file(
+    "meta.json",
+    JSON.stringify(
+      {
+        version: y,
+        application: "@genai-fi/nanogpt",
+        meta: n?.metadata,
+        name: n?.name
+      },
+      void 0,
+      4
+    ),
     {
       binary: !1
     }
-  ), e.file(
+  ), i.file(
     "tokeniser.json",
     JSON.stringify({
-      type: a instanceof p ? "char" : "bpe",
+      type: a instanceof b ? "char" : "bpe",
       vocab: a.getVocab(),
       merges: await a.getMerges()
     }),
     {
       binary: !1
     }
-  ), c && e.file("log.json", JSON.stringify(t.log), { binary: !1 }), i?.files)
-    for (const [n, s] of Object.entries(i.files))
-      e.file(n, JSON.stringify(s), { binary: !1 });
-  return e.generateAsync({ type: "blob" });
+  ), f && i.file("log.json", JSON.stringify(e.log), { binary: !1 }), n?.files)
+    for (const [t, o] of Object.entries(n.files))
+      i.file(t, JSON.stringify(o), { binary: !1 });
+  return i.generateAsync({ type: "blob" });
 }
 export {
-  h as saveModel
+  N as saveModel
 };

package/dist/utilities/weights.js CHANGED Viewed

@@ -1,5 +1,5 @@
-import "../index-Du-bmOP8.js";
-import { t as p } from "../tensor-DgTOPY6h.js";
+import "../index-CamYe_M8.js";
+import { t as p } from "../tensor-CDz5x1mP.js";
 function h(n) {
   const e = n.reduce((s, o) => s + o.length, 0), a = new Float32Array(e);
   let t = 0;

package/dist/{variable-DZ3fF0R2.js → variable-CLVXjN7F.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { E as i } from "./index-Du-bmOP8.js";
+import { E as i } from "./index-CamYe_M8.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.