npm - @genai-fi/nanogpt - Versions diffs - 0.5.6 → 0.6.0 - Mend

@genai-fi/nanogpt 0.5.6 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (105) hide show

package/dist/Generator.js +8 -7
package/dist/NanoGPTModel.js +8 -8
package/dist/{Reshape-Biok_3X1.js → Reshape-CLOrdpve.js} +2 -2
package/dist/TeachableLLM.js +16 -15
package/dist/{TiedEmbedding-8S8xn8e6.js → TiedEmbedding-BhxWO8QR.js} +5 -5
package/dist/{axis_util-BczFISHz.js → axis_util-D17qZRQm.js} +1 -1
package/dist/{broadcast_to-B7NGsBSh.js → broadcast_to-BMQLjvt_.js} +2 -2
package/dist/{concat-DdKPyAtw.js → concat-DhZfF1GY.js} +1 -1
package/dist/{dataset-iqT4Otvb.js → dataset-oilnemHf.js} +3 -3
package/dist/{dropout-B09InSJS.js → dropout-CrMQPCeG.js} +1 -1
package/dist/{gather-D6MsdXqc.js → gather-DZCMHZuN.js} +1 -1
package/dist/{gpgpu_math-BFbOyvk4.js → gpgpu_math-Ctc31slO.js} +1 -1
package/dist/{index-Du-bmOP8.js → index-bMBtI-WR.js} +50 -50
package/dist/{kernel_funcs_utils-DShm7-0k.js → kernel_funcs_utils-CNmjLWnB.js} +26 -24
package/dist/layers/BaseLayer.js +2 -2
package/dist/layers/CausalSelfAttention.js +6 -6
package/dist/layers/MLP.js +5 -5
package/dist/layers/RMSNorm.js +3 -3
package/dist/layers/RoPECache.js +3 -3
package/dist/layers/TiedEmbedding.js +6 -6
package/dist/layers/TransformerBlock.js +1 -1
package/dist/{log_sum_exp-CxfBtUaG.js → log_sum_exp-BHdkCb4s.js} +5 -5
package/dist/main.js +20 -19
package/dist/{mat_mul-CbiqIe2d.js → mat_mul-BsrLfy81.js} +1 -1
package/dist/{max-0Xnlpv8k.js → max-DechV4Bc.js} +1 -1
package/dist/{norm-01kY9I2B.js → norm-B9hWHZH1.js} +5 -5
package/dist/{ones-CrutWGas.js → ones-g0K8jVwm.js} +2 -2
package/dist/ops/appendCache.js +3 -3
package/dist/ops/attentionMask.js +1 -1
package/dist/ops/cpu/appendCache.js +2 -2
package/dist/ops/cpu/attentionMask.js +5 -5
package/dist/ops/cpu/fusedSoftmax.js +2 -2
package/dist/ops/cpu/gatherSub.js +3 -3
package/dist/ops/cpu/gelu.js +1 -1
package/dist/ops/cpu/matMulGelu.js +1 -1
package/dist/ops/cpu/matMulMul.js +1 -1
package/dist/ops/cpu/mulDropout.js +1 -1
package/dist/ops/cpu/normRMS.js +1 -1
package/dist/ops/cpu/qkv.js +3 -3
package/dist/ops/cpu/rope.js +5 -5
package/dist/ops/cpu/scatterSub.js +4 -4
package/dist/ops/fusedSoftmax.js +1 -1
package/dist/ops/gatherSub.js +1 -1
package/dist/ops/gelu.js +1 -1
package/dist/ops/grads/attentionMask.js +15 -11
package/dist/ops/grads/fusedSoftmax.js +12 -10
package/dist/ops/grads/gelu.js +1 -1
package/dist/ops/grads/matMulGelu.js +1 -1
package/dist/ops/grads/normRMS.js +1 -1
package/dist/ops/grads/qkv.js +1 -1
package/dist/ops/grads/rope.js +1 -1
package/dist/ops/log.d.ts +0 -0
package/dist/ops/log.js +1 -0
package/dist/ops/matMulGelu.js +1 -1
package/dist/ops/matMulMul.js +1 -1
package/dist/ops/mulDrop.js +1 -1
package/dist/ops/node/sparseCrossEntropy.js +1 -1
package/dist/ops/normRMS.js +1 -1
package/dist/ops/qkv.js +1 -1
package/dist/ops/scatterSub.js +1 -1
package/dist/ops/webgl/appendCache.js +1 -1
package/dist/ops/webgl/attentionMask.js +1 -1
package/dist/ops/webgl/fusedSoftmax.js +205 -3022
package/dist/ops/webgl/gatherSub.js +1 -1
package/dist/ops/webgl/gelu.js +2 -2
package/dist/ops/webgl/log.d.ts +17 -0
package/dist/ops/webgl/log.js +39 -0
package/dist/ops/webgl/matMulGelu.js +4 -4
package/dist/ops/webgl/matMulMul.js +1 -1
package/dist/ops/webgl/mulDropout.js +1 -1
package/dist/ops/webgl/normRMS.js +2 -2
package/dist/ops/webgl/qkv.js +1 -1
package/dist/ops/webgl/rope.js +1 -1
package/dist/ops/webgl/scatterSub.js +1 -1
package/dist/{ops-CJNniCAV.js → ops-Mv7Ta72x.js} +13 -13
package/dist/{random_width-C-v-35bY.js → random_width-BBAWzDym.js} +23 -23
package/dist/{range-Bvs1hidm.js → range-DMaG9A3G.js} +1 -1
package/dist/{reshape-BH7eBpwq.js → reshape-T4yDEqoF.js} +1 -1
package/dist/shared-XNAoXhOa.js +2826 -0
package/dist/{sin-CPAZXNjH.js → sin-EEhbrRO_.js} +1 -1
package/dist/{slice_util-DskXqRZa.js → slice_util-Ddk0uxGJ.js} +1 -1
package/dist/{softmax-DhWoBa7r.js → softmax-B2_IKPDR.js} +1 -1
package/dist/{split-BCUhuU7B.js → split-dcks18H1.js} +1 -1
package/dist/{stack-BV1v7l3S.js → stack-lpJ5kYvE.js} +1 -1
package/dist/{sum-Cvq06317.js → sum-CutF5lj2.js} +1 -1
package/dist/{tensor-DgTOPY6h.js → tensor-C15NA2LA.js} +1 -1
package/dist/{tensor2d-CRWjDyUe.js → tensor2d-DZ_e5eKM.js} +1 -1
package/dist/{tfjs_backend-D9Ytje0G.js → tfjs_backend-BDb8r9qx.js} +28 -28
package/dist/training/AdamExt.js +1 -1
package/dist/training/DatasetBuilder.js +2 -2
package/dist/training/FullTrainer.js +1 -1
package/dist/training/Trainer.js +3 -3
package/dist/training/sparseCrossEntropy.js +4 -4
package/dist/utilities/dummy.js +2 -2
package/dist/utilities/generate.js +3 -3
package/dist/utilities/load.d.ts +25 -0
package/dist/utilities/load.js +89 -37
package/dist/utilities/profile.js +4 -4
package/dist/utilities/safetensors.d.ts +3 -0
package/dist/utilities/safetensors.js +83 -0
package/dist/utilities/save.js +47 -29
package/dist/utilities/weights.js +2 -2
package/dist/{variable-DZ3fF0R2.js → variable-CdRKKp8x.js} +1 -1
package/dist/{zeros-BaHhQTWf.js → zeros-CAbHfODe.js} +1 -1
package/package.json +1 -1

package/dist/{sin-CPAZXNjH.js → sin-EEhbrRO_.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o, j as t, E as c, _ as a, $ as e } from "./index-Du-bmOP8.js";
+import { o, j as t, E as c, _ as a, $ as e } from "./index-bMBtI-WR.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.

package/dist/{slice_util-DskXqRZa.js → slice_util-Ddk0uxGJ.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { l as s } from "./index-Du-bmOP8.js";
+import { l as s } from "./index-bMBtI-WR.js";
 /**
  * @license
  * Copyright 2021 Google LLC. All Rights Reserved.

package/dist/{softmax-DhWoBa7r.js → softmax-B2_IKPDR.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o as r, j as f, E as e, S as i } from "./index-Du-bmOP8.js";
+import { o as r, j as f, E as e, S as i } from "./index-bMBtI-WR.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.

package/dist/{split-BCUhuU7B.js → split-dcks18H1.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o as p, j as i, E as a, x as c } from "./index-Du-bmOP8.js";
+import { o as p, j as i, E as a, x as c } from "./index-bMBtI-WR.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.

package/dist/{stack-BV1v7l3S.js → stack-lpJ5kYvE.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o as e, k as c, l as n, E as k, P as i } from "./index-Du-bmOP8.js";
+import { o as e, k as c, l as n, E as k, P as i } from "./index-bMBtI-WR.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.

package/dist/{sum-Cvq06317.js → sum-CutF5lj2.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o as e, j as u, D as c, E as l, F as m } from "./index-Du-bmOP8.js";
+import { o as e, j as u, D as c, E as l, F as m } from "./index-bMBtI-WR.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.

package/dist/{tensor-DgTOPY6h.js → tensor-C15NA2LA.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { J as t, K as a } from "./index-Du-bmOP8.js";
+import { J as t, K as a } from "./index-bMBtI-WR.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.

package/dist/{tensor2d-CRWjDyUe.js → tensor2d-DZ_e5eKM.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { I as t, J as s, K as a } from "./index-Du-bmOP8.js";
+import { I as t, J as s, K as a } from "./index-bMBtI-WR.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.

package/dist/{tfjs_backend-D9Ytje0G.js → tfjs_backend-BDb8r9qx.js} RENAMED Viewed

@@ -1,11 +1,11 @@
-import { o as h, j as f, E as $, ao as Te, l as _, g as Ee, ap as xe, aq as Ie, ar as Le, as as be, at as Ne, au as Ce, av as Pe, b as H, aw as Fe, a8 as U, u as ae, q as ie, Q as le, c as fe, ax as he, ai as pe, ay as je, t as S, D as $e, al as Me, a2 as Be } from "./index-Du-bmOP8.js";
-import { s as C, t as Ke, a as Ue, b as ve } from "./ops-CJNniCAV.js";
-import { r as Re, d as Ve } from "./dropout-B09InSJS.js";
-import { r as u } from "./reshape-BH7eBpwq.js";
-import { g as qe } from "./gather-D6MsdXqc.js";
-import { s as Ge } from "./sum-Cvq06317.js";
-import { m as A } from "./mat_mul-CbiqIe2d.js";
-import { c as M } from "./concat-DdKPyAtw.js";
+import { o as h, j as f, E as $, ar as Te, l as _, g as Ee, as as xe, at as Ie, au as Le, av as be, aw as Ne, ax as Ce, ay as Pe, b as H, az as Fe, a8 as U, u as ae, q as ie, Q as le, c as fe, aA as he, ai as pe, aB as je, t as S, D as $e, al as Be, a2 as Me } from "./index-bMBtI-WR.js";
+import { s as C, t as Ke, a as Ue, b as ve } from "./ops-Mv7Ta72x.js";
+import { r as Re, d as Ve } from "./dropout-CrMQPCeG.js";
+import { r as u } from "./reshape-T4yDEqoF.js";
+import { g as Ge } from "./gather-DZCMHZuN.js";
+import { s as qe } from "./sum-CutF5lj2.js";
+import { m as A } from "./mat_mul-BsrLfy81.js";
+import { c as B } from "./concat-DhZfF1GY.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -52,7 +52,7 @@ function We(e, n, t) {
 }
 const Ye = /* @__PURE__ */ h({ clipByValue_: We });
 function Qe(e) {
-  return M(
+  return B(
     e,
     0
     /* axis */
@@ -60,15 +60,15 @@ function Qe(e) {
 }
 const He = /* @__PURE__ */ h({ concat1d_: Qe });
 function Xe(e, n) {
-  return M(e, n);
+  return B(e, n);
 }
 const ze = /* @__PURE__ */ h({ concat2d_: Xe });
 function en(e, n) {
-  return M(e, n);
+  return B(e, n);
 }
 const nn = /* @__PURE__ */ h({ concat3d_: en });
 function tn(e, n) {
-  return M(e, n);
+  return B(e, n);
 }
 const rn = /* @__PURE__ */ h({ concat4d_: tn });
 /**
@@ -307,7 +307,7 @@ function An(e, n, t) {
 function Sn(e, n) {
   let t = n;
   const r = Fe(e.shape, n.shape);
-  return r.length > 0 && (t = Ge(t, r)), u(t, e.shape);
+  return r.length > 0 && (t = qe(t, r)), u(t, e.shape);
 }
 function yn(e, n, t, r) {
   if (n === "linear")
@@ -352,22 +352,22 @@ function _n({ a: e, b: n, transposeA: t = !1, transposeB: r = !1, bias: s, activ
   [c, a] = ae(c, a);
   const k = t ? c.shape[c.rank - 2] : c.shape[c.rank - 1], m = r ? a.shape[a.rank - 1] : a.shape[a.rank - 2], E = t ? c.shape[c.rank - 1] : c.shape[c.rank - 2], d = r ? a.shape[a.rank - 2] : a.shape[a.rank - 1], ne = c.shape.slice(0, -2), x = a.shape.slice(0, -2), te = ie(ne), re = ie(x);
   _(k === m, () => `Error in fused matMul: inner shapes (${k}) and (${m}) of Tensors with shapes ${c.shape} and ${a.shape} and transposeA=${t} and transposeB=${r} must match.`);
-  const R = le(c.shape.slice(0, -2), a.shape.slice(0, -2)).concat([E, d]), V = t ? u(c, [te, k, E]) : u(c, [te, E, k]), q = r ? u(a, [re, d, m]) : u(a, [re, m, d]);
+  const R = le(c.shape.slice(0, -2), a.shape.slice(0, -2)).concat([E, d]), V = t ? u(c, [te, k, E]) : u(c, [te, E, k]), G = r ? u(a, [re, d, m]) : u(a, [re, m, d]);
   let I;
   s != null && (I = f(s, "bias", "fused matMul"), [I] = ae(I, c), le(R, I.shape));
   let se;
   i != null && (se = f(i, "prelu weights", "fused matMul"));
   const oe = (D, P) => {
-    const [y, O, T, B] = P, w = An(u(D, T.shape), T, o);
+    const [y, O, T, M] = P, w = An(u(D, T.shape), T, o);
     let L, b;
     if (!t && !r ? (L = A(w, O, !1, !0), b = A(y, w, !0, !1)) : !t && r ? (L = A(w, O, !1, !1), b = A(w, y, !0, !1)) : t && !r ? (L = A(O, w, !1, !0), b = A(y, w, !1, !1)) : (L = A(O, w, !0, !0), b = A(w, y, !0, !0)), s != null) {
-      const De = Sn(B, w);
+      const De = Sn(M, w);
       return [L, b, De];
     } else
       return [L, b];
   }, ue = {
     a: V,
-    b: q,
+    b: G,
     bias: I,
     preluActivationWeights: se
   }, ce = { transposeA: t, transposeB: r, activation: o, leakyreluAlpha: p };
@@ -377,13 +377,13 @@ function _n({ a: e, b: n, transposeA: t = !1, transposeB: r = !1, bias: s, activ
       $.runKernel(he, ue, ce)
     );
     return O([P, y, T]), { value: u(T, R), gradFunc: oe };
-  })(V, q) : fe((P, y, O, T) => {
-    const B = (
+  })(V, G) : fe((P, y, O, T) => {
+    const M = (
       // tslint:disable-next-line: no-unnecessary-type-assertion
       $.runKernel(he, ue, ce)
     );
-    return T([P, y, B, O]), { value: u(B, R), gradFunc: oe };
-  })(V, q, I);
+    return T([P, y, M, O]), { value: u(M, R), gradFunc: oe };
+  })(V, G, I);
 }
 const de = /* @__PURE__ */ h({ fusedMatMul_: _n });
 /**
@@ -395,7 +395,7 @@ const de = /* @__PURE__ */ h({ fusedMatMul_: _n });
  * https://opensource.org/licenses/MIT.
  * =============================================================================
  */
-const Dn = ["channelsFirst", "channelsLast"], Tn = ["nearest", "bilinear"], En = ["valid", "same", "causal"], xn = ["max", "avg"], Gn = ["sum", "mul", "concat", "ave"];
+const Dn = ["channelsFirst", "channelsLast"], Tn = ["nearest", "bilinear"], En = ["valid", "same", "causal"], xn = ["max", "avg"], qn = ["sum", "mul", "concat", "ave"];
 /**
  * @license
  * Copyright 2018 Google LLC
@@ -695,9 +695,9 @@ function gt(e, n) {
  * https://opensource.org/licenses/MIT.
  * =============================================================================
  */
-let G;
+let q;
 function mt() {
-  return G == null && (G = je().epsilon()), G;
+  return q == null && (q = je().epsilon()), q;
 }
 function Y() {
   return "channelsLast";
@@ -830,7 +830,7 @@ function St(e, n, t, r) {
 }
 function yt(e, n = -1) {
   let t;
-  return n < 0 && (t = e[0].rank, t !== 0 ? n = t : n = 0), n === e[0].rank && (n = -1), M(e, n);
+  return n < 0 && (t = e[0].rank, t !== 0 ? n = t : n = 0), n === e[0].rank && (n = -1), B(e, n);
 }
 function Ot(e, n) {
   switch (e.rank) {
@@ -888,7 +888,7 @@ function Dt(e, n, t, r) {
   }
 }
 function Tt(e, n, t) {
-  return S(() => (Array.isArray(n) ? n = Ke(n, "int32") : n = $e(n, "int32"), qe(e, n, t)));
+  return S(() => (Array.isArray(n) ? n = Ke(n, "int32") : n = $e(n, "int32"), Ge(e, n, t)));
 }
 function Et(e) {
   return H(e, e);
@@ -925,7 +925,7 @@ function It(e, n = 1) {
   return ke(e);
 }
 function Lt(e) {
-  return S(() => Me(e, U(Be(e), 1)));
+  return S(() => Be(e, U(Me(e), 1)));
 }
 function bt(e, n, t, r) {
   return S(() => Ve(e, n, t, r));
@@ -981,7 +981,7 @@ export {
   At as a9,
   kt as aa,
   at as ab,
-  Gn as ac,
+  qn as ac,
   Sn as b,
   v as c,
   Dt as d,

package/dist/training/AdamExt.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { A as r, b as c, f as h, s as g, e as o } from "../index-Du-bmOP8.js";
+import { A as r, b as c, f as h, s as g, e as o } from "../index-bMBtI-WR.js";
 class u extends r {
   constructor(t, e, s, a, i) {
     super(t, e, s, a), this.config = i, this.startLearningRate = t;

package/dist/training/DatasetBuilder.js CHANGED Viewed

@@ -1,5 +1,5 @@
-import { t as u } from "../index-Du-bmOP8.js";
-import { d as z, i as f } from "../dataset-iqT4Otvb.js";
+import { t as u } from "../index-bMBtI-WR.js";
+import { d as z, i as f } from "../dataset-oilnemHf.js";
 import "../index-Tf7vU29b.js";
 /**
  * @license

package/dist/training/FullTrainer.js CHANGED Viewed

@@ -1,7 +1,7 @@
 import { generateText as T } from "../utilities/generate.js";
 import L from "./Trainer.js";
 import x from "./Evaluator.js";
-import { a as h } from "../index-Du-bmOP8.js";
+import { a as h } from "../index-bMBtI-WR.js";
 import y from "../utilities/profile.js";
 const D = {
   desiredLoss: 0.01,

package/dist/training/Trainer.js CHANGED Viewed

@@ -1,8 +1,8 @@
 import { DatasetBuilder as g, flattenTokens as m, PAGE_FACTOR as u } from "./DatasetBuilder.js";
 import f from "./AdamExt.js";
-import { t as y, v as z, a as c } from "../index-Du-bmOP8.js";
-import { n as S } from "../norm-01kY9I2B.js";
-import { z as p } from "../zeros-BaHhQTWf.js";
+import { t as y, v as z, a as c } from "../index-bMBtI-WR.js";
+import { n as S } from "../norm-B9hWHZH1.js";
+import { z as p } from "../zeros-CAbHfODe.js";
 class R {
   constructor(t, e, s = 1e-3) {
     this.tokenizer = e, this.model = t, this.learningRate = s, this.resetOptimizer(), this.datasetBuilder = new g(e, t.config.gpt.blockSize);

package/dist/training/sparseCrossEntropy.js CHANGED Viewed

@@ -1,9 +1,9 @@
 import { gatherSub as L } from "../ops/gatherSub.js";
 import { scatterSub as y } from "../ops/scatterSub.js";
-import { e as u, c as i, z as S, t as f, s as G } from "../index-Du-bmOP8.js";
-import { s as v } from "../softmax-DhWoBa7r.js";
-import { m as z } from "../max-0Xnlpv8k.js";
-import { l as k } from "../log_sum_exp-CxfBtUaG.js";
+import { e as u, c as i, z as S, t as f, s as G } from "../index-bMBtI-WR.js";
+import { s as v } from "../softmax-B2_IKPDR.js";
+import { m as z } from "../max-DechV4Bc.js";
+import { l as k } from "../log_sum_exp-BHdkCb4s.js";
 function F(a, s) {
   return f(() => {
     const e = a.shape[a.shape.length - 1], o = a.shape.slice(0, -1).reduce((d, c) => d * c, 1), p = a.shape.length > 2 ? a.reshape([o, e]) : a, n = s.shape.length > 1 ? s.reshape([o]).cast("int32") : s.cast("int32"), t = z(p, -1, !0), r = G(p, t), h = k(r, -1);

package/dist/utilities/dummy.js CHANGED Viewed

@@ -1,5 +1,5 @@
-import "../index-Du-bmOP8.js";
-import { z as n } from "../zeros-BaHhQTWf.js";
+import "../index-bMBtI-WR.js";
+import { z as n } from "../zeros-CAbHfODe.js";
 async function c(s) {
   const i = n([1, s.config.gpt.blockSize], "int32"), [t, o] = s.forward({ training: !1 }, i);
   await t.data(), t.dispose(), o && o.dispose(), i.dispose();

package/dist/utilities/generate.js CHANGED Viewed

@@ -1,6 +1,6 @@
-import { t as y } from "../index-Du-bmOP8.js";
-import { t as x } from "../tensor2d-CRWjDyUe.js";
-import { c as f } from "../concat-DdKPyAtw.js";
+import { t as y } from "../index-bMBtI-WR.js";
+import { t as x } from "../tensor2d-DZ_e5eKM.js";
+import { c as f } from "../concat-DhZfF1GY.js";
 async function A(o, r, a, c, T) {
   if (c <= 0)
     throw new Error("Length must be a positive integer");

package/dist/utilities/load.d.ts CHANGED Viewed

@@ -1,6 +1,31 @@
+import { default as zip } from 'jszip';
 import { default as NanoGPT } from '../NanoGPTModel';
 import { ITokeniser } from '../tokeniser/type';
+export declare const VERSION = 2;
+export interface TransformersConfig {
+    model_type: string;
+    vocab_size: number;
+    hidden_size: number;
+    num_hidden_layers: number;
+    num_attention_heads: number;
+    block_size: number;
+    dropout: number;
+    biasInLinear: boolean;
+    biasInLayerNorm: boolean;
+    mlpFactor: number;
+    useRope: boolean;
+}
+export interface Metadata {
+    version: string;
+    application: string;
+    name?: string;
+}
+export declare function loadOldModel(zipFile: zip): Promise<{
+    model: NanoGPT;
+    tokeniser: ITokeniser;
+}>;
 export declare function loadModel(data: Blob | Buffer | string): Promise<{
     model: NanoGPT;
     tokeniser: ITokeniser;
+    name?: string;
 }>;

package/dist/utilities/load.js CHANGED Viewed

@@ -1,47 +1,99 @@
-import { j as k } from "../jszip.min-CjP2V1VV.js";
-import { importWeights as b } from "./weights.js";
-import u from "../tokeniser/CharTokeniser.js";
-import F from "../NanoGPTModel.js";
-import { dummyPassAsync as j } from "./dummy.js";
-import { d as T } from "../index-Du-bmOP8.js";
-import E from "../tokeniser/bpe.js";
-async function A(t) {
-  const o = await fetch(t);
-  if (!o.ok)
-    throw new Error(`Failed to fetch ${t}: ${o.statusText}`);
-  return o.arrayBuffer();
+import { j as v } from "../jszip.min-CjP2V1VV.js";
+import { importWeights as F } from "./weights.js";
+import h from "../tokeniser/CharTokeniser.js";
+import b from "../NanoGPTModel.js";
+import { dummyPassAsync as u } from "./dummy.js";
+import { d as k } from "../index-bMBtI-WR.js";
+import j from "../tokeniser/bpe.js";
+import { load_safetensors as N } from "./safetensors.js";
+const I = 2;
+async function O(t) {
+  const s = await fetch(t);
+  if (!s.ok)
+    throw new Error(`Failed to fetch ${t}: ${s.statusText}`);
+  return s.arrayBuffer();
 }
-async function P(t) {
-  const o = typeof t == "string" ? await A(t) : t, n = await k.loadAsync(o), i = /* @__PURE__ */ new Map(), f = await n.file("manifest.json")?.async("string");
-  if (!f)
+async function S(t) {
+  const s = /* @__PURE__ */ new Map(), r = await t.file("manifest.json")?.async("string");
+  if (!r)
     throw new Error("Manifest file not found in the zip archive");
-  const l = JSON.parse(f);
-  for (const [e, r] of Object.entries(l.weightSpec))
-    i.set(e, { spec: r, data: new Float32Array() });
-  const p = await n.file("tokeniser.json")?.async("string");
-  if (!p)
+  const p = JSON.parse(r);
+  for (const [o, a] of Object.entries(p.weightSpec))
+    s.set(o, { spec: a, data: new Float32Array() });
+  const e = await t.file("tokeniser.json")?.async("string");
+  if (!e)
     throw new Error("Tokeniser file not found in the zip archive");
-  const s = JSON.parse(p), y = (s.type ?? "char") === "char" ? new u(s.vocab) : new E(s.vocab, s.merges), w = /* @__PURE__ */ new Map();
-  for (const e of Object.keys(n.files))
-    if (e.endsWith(".bin")) {
-      const r = e.replace(".bin", ""), g = await n.file(e).async("arraybuffer"), h = new Float32Array(g), c = i.get(r) || { spec: [], data: new Float32Array() };
-      c.data = h, i.set(r, c);
-      const d = await b(c);
-      w.set(r, d);
+  const i = JSON.parse(e), c = (i.type ?? "char") === "char" ? new h(i.vocab) : new j(i.vocab, i.merges), d = /* @__PURE__ */ new Map();
+  for (const o of Object.keys(t.files))
+    if (o.endsWith(".bin")) {
+      const a = o.replace(".bin", ""), w = await t.file(o).async("arraybuffer"), g = new Float32Array(w), l = s.get(a) || { spec: [], data: new Float32Array() };
+      l.data = g, s.set(a, l);
+      const n = await F(l);
+      d.set(a, n);
     }
-  T();
-  const a = new F(l.config);
-  await j(a), a.loadWeights(w);
-  const m = await n.file("log.json")?.async("string");
+  k();
+  const f = new b(p.config);
+  await u(f), f.loadWeights(d);
+  const m = await t.file("log.json")?.async("string");
   if (m)
     try {
-      const e = JSON.parse(m);
-      a.log = e;
-    } catch (e) {
-      throw console.error("Error parsing training log:", e), new Error(`Failed to parse training log: ${e}`);
+      const o = JSON.parse(m);
+      f.log = o;
+    } catch (o) {
+      throw console.error("Error parsing training log:", o), new Error(`Failed to parse training log: ${o}`);
     }
-  return { model: a, tokeniser: y };
+  return { model: f, tokeniser: c };
+}
+async function R(t) {
+  const s = typeof t == "string" ? await O(t) : t, r = await v.loadAsync(s);
+  if (r.file("manifest.json"))
+    return S(r);
+  {
+    const p = await r.file("config.json")?.async("string");
+    if (!p)
+      throw new Error("Config file not found in the zip archive");
+    const e = JSON.parse(p), i = {
+      vocabSize: e.vocab_size,
+      blockSize: e.block_size,
+      nLayer: e.num_hidden_layers,
+      nHead: e.num_attention_heads,
+      nEmbed: e.hidden_size,
+      dropout: e.dropout,
+      biasInLinear: e.biasInLinear,
+      biasInLayerNorm: e.biasInLayerNorm,
+      mlpFactor: e.mlpFactor,
+      useRope: e.useRope
+    }, y = await r.file("tokeniser.json")?.async("string");
+    if (!y)
+      throw new Error("Tokeniser file not found in the zip archive");
+    const c = JSON.parse(y), f = (c.type ?? "char") === "char" ? new h(c.vocab) : new j(c.vocab, c.merges), m = await N(await r.file("model.safetensors").async("arraybuffer")), o = /* @__PURE__ */ new Map();
+    for (const [n, E] of Object.entries(m))
+      o.set(n, [E]);
+    k();
+    const a = new b(i);
+    await u(a), a.loadWeights(o);
+    const w = await r.file("meta.json")?.async("string");
+    let g;
+    if (w)
+      try {
+        const n = JSON.parse(w);
+        n.name && (g = n.name);
+      } catch (n) {
+        console.error("Error parsing meta file:", n);
+      }
+    const l = await r.file("log.json")?.async("string");
+    if (l)
+      try {
+        const n = JSON.parse(l);
+        a.log = n;
+      } catch (n) {
+        throw console.error("Error parsing training log:", n), new Error(`Failed to parse training log: ${n}`);
+      }
+    return { model: a, tokeniser: f, name: g };
+  }
 }
 export {
-  P as loadModel
+  I as VERSION,
+  R as loadModel,
+  S as loadOldModel
 };

package/dist/utilities/profile.js CHANGED Viewed

@@ -1,6 +1,6 @@
-import { m as s } from "../index-Du-bmOP8.js";
+import { m as s } from "../index-bMBtI-WR.js";
 const m = 1024 * 1024;
-class M {
+class l {
   log = /* @__PURE__ */ new Map();
   maxMemory = 0;
   maxLabel;
@@ -18,7 +18,7 @@ class M {
       return;
     }
     const o = s(), t = o.numBytes - (this.lastMemInfo.pop()?.numBytes || 0);
-    this.log.set(e, Math.max(this.log.get(e) || 0, t)), t > this.maxMemory && (this.maxMemory = t, this.maxLabel = e), this.peakMemory = Math.max(this.peakMemory, o.numBytes);
+    this.log.set(e, Math.max(this.log.get(e) || 0, t)), t > this.maxMemory && (this.maxMemory = t, this.maxLabel = e), this.peakMemory = Math.max(this.peakMemory, o.numBytesInGPUAllocated || o.numBytes);
   }
   printSummary() {
     console.log("Memory Usage Summary:");
@@ -28,5 +28,5 @@ class M {
   }
 }
 export {
-  M as default
+  l as default
 };

package/dist/utilities/safetensors.d.ts ADDED Viewed

@@ -0,0 +1,3 @@
+import { Tensor } from '@tensorflow/tfjs-core';
+export declare function save_safetensors(tensors: Record<string, Tensor>): Promise<ArrayBuffer>;
+export declare function load_safetensors(buffer: ArrayBuffer): Promise<Record<string, Tensor>>;

package/dist/utilities/safetensors.js ADDED Viewed

@@ -0,0 +1,83 @@
+import "../index-bMBtI-WR.js";
+import { t as y } from "../tensor-C15NA2LA.js";
+function l(t) {
+  if (t === "float32") return "F32";
+  if (t === "int32") return "I32";
+  throw new Error(`Unsupported dtype: ${t}`);
+}
+function h(t) {
+  if (t === "F32") return "float32";
+  if (t === "I32") return "int32";
+  throw new Error(`Unsupported dtype: ${t}`);
+}
+async function _(t) {
+  const c = {};
+  let a = 0;
+  for (const [n, s] of Object.entries(t))
+    c[n] = {
+      dtype: l(s.dtype),
+      shape: s.shape,
+      data_offsets: [a, a + s.size * 4]
+    }, a += s.size * 4;
+  const p = JSON.stringify(c);
+  let r = new TextEncoder().encode(p);
+  if (r.length % 4 !== 0) {
+    const n = 4 - r.length % 4, s = new Uint8Array(r.length + n);
+    s.set(r);
+    for (let w = r.length; w < s.length; w++)
+      s[w] = 32;
+    r = s;
+  }
+  const o = r.length, f = 8 + o + a, e = new ArrayBuffer(f);
+  new DataView(e).setUint32(0, o, !0), new Uint8Array(e, 8, o).set(r);
+  let d = 8 + o;
+  for (const n of Object.values(t)) {
+    if (n.size === 0) continue;
+    const s = await n.data();
+    if (n.dtype === "float32")
+      new Float32Array(e, d, n.size).set(s), d += n.size * 4;
+    else if (n.dtype === "int32")
+      new Int32Array(e, d, n.size).set(s), d += n.size * 4;
+    else
+      throw new Error(`Unsupported dtype: ${n.dtype}`);
+  }
+  return e;
+}
+async function U(t) {
+  const a = new DataView(t).getUint32(0, !0), p = new Uint8Array(t, 8, a), r = JSON.parse(new TextDecoder().decode(p)), o = {};
+  for (const [f, e] of Object.entries(r)) {
+    if (e.data_offsets[0] === e.data_offsets[1]) {
+      o[f] = y([], e.shape, h(e.dtype));
+      continue;
+    }
+    if (e.dtype === "F32") {
+      const i = y(
+        new Float32Array(
+          t,
+          e.data_offsets[0] + 8 + a,
+          (e.data_offsets[1] - e.data_offsets[0]) / 4
+        ),
+        e.shape,
+        h(e.dtype)
+      );
+      o[f] = i;
+    } else if (e.dtype === "I32") {
+      const i = y(
+        new Int32Array(
+          t,
+          e.data_offsets[0] + 8 + a,
+          (e.data_offsets[1] - e.data_offsets[0]) / 4
+        ),
+        e.shape,
+        h(e.dtype)
+      );
+      o[f] = i;
+    } else
+      throw new Error(`Unsupported dtype: ${e.dtype}`);
+  }
+  return o;
+}
+export {
+  U as load_safetensors,
+  _ as save_safetensors
+};