npm - @genai-fi/nanogpt - Versions diffs - 0.6.0 → 0.6.2 - Mend

@genai-fi/nanogpt 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (121) hide show

package/dist/Generator.js +7 -7
package/dist/NanoGPTModel.js +70 -121
package/dist/RealDiv-BYViZwhN.js +540 -0
package/dist/Reshape-t7Kcikjk.js +127 -0
package/dist/TeachableLLM.d.ts +2 -0
package/dist/TeachableLLM.js +34 -27
package/dist/{TiedEmbedding-BhxWO8QR.js → TiedEmbedding-9WeDwvjO.js} +12 -13
package/dist/{axis_util-D17qZRQm.js → axis_util-Bu4h7XWV.js} +14 -12
package/dist/{broadcast_to-BMQLjvt_.js → broadcast_to-DARN-DBD.js} +2 -2
package/dist/{concat-DhZfF1GY.js → concat-5aPGqw3Z.js} +3 -3
package/dist/{dataset-oilnemHf.js → dataset-pgqp-YfL.js} +3 -3
package/dist/{dropout-CrMQPCeG.js → dropout-Bciw46HT.js} +7 -7
package/dist/{gather-DZCMHZuN.js → gather-DjyCjmOD.js} +1 -1
package/dist/gpgpu_math-CNslybmD.js +3115 -0
package/dist/{index-bMBtI-WR.js → index-BAzbokzv.js} +846 -649
package/dist/{kernel_funcs_utils-CNmjLWnB.js → kernel_funcs_utils-CUxJCg0g.js} +232 -138
package/dist/layers/BaseLayer.js +2 -2
package/dist/layers/CausalSelfAttention.js +6 -6
package/dist/layers/MLP.js +5 -5
package/dist/layers/RMSNorm.js +3 -3
package/dist/layers/RoPECache.js +13 -33
package/dist/layers/TiedEmbedding.js +6 -7
package/dist/layers/TransformerBlock.js +1 -1
package/dist/loader/load.d.ts +13 -0
package/dist/loader/load.js +27 -0
package/dist/loader/loadHF.d.ts +7 -0
package/dist/loader/loadHF.js +22 -0
package/dist/{utilities/load.d.ts → loader/loadTransformers.d.ts} +11 -11
package/dist/loader/loadTransformers.js +28 -0
package/dist/loader/newZipLoad.d.ts +8 -0
package/dist/loader/newZipLoad.js +21 -0
package/dist/loader/oldZipLoad.d.ts +7 -0
package/dist/loader/oldZipLoad.js +76 -0
package/dist/{log_sum_exp-BHdkCb4s.js → log_sum_exp-YEo2h3gb.js} +14 -14
package/dist/main.js +23 -20
package/dist/{mat_mul-BsrLfy81.js → mat_mul-7121rsJk.js} +1 -1
package/dist/{max-DechV4Bc.js → max-DtlIuVeW.js} +1 -1
package/dist/mulmat_packed_gpu-D4nKF7Je.js +71 -0
package/dist/{norm-B9hWHZH1.js → norm-CzltS9Fz.js} +16 -16
package/dist/{ones-g0K8jVwm.js → ones-BBlSRqn1.js} +2 -2
package/dist/ops/appendCache.js +3 -3
package/dist/ops/attentionMask.js +1 -1
package/dist/ops/cpu/appendCache.js +2 -2
package/dist/ops/cpu/attentionMask.js +6 -6
package/dist/ops/cpu/fusedSoftmax.js +2 -2
package/dist/ops/cpu/gatherSub.js +9 -9
package/dist/ops/cpu/gelu.js +1 -1
package/dist/ops/cpu/matMulGelu.js +1 -1
package/dist/ops/cpu/matMulMul.js +1 -1
package/dist/ops/cpu/mulDropout.js +1 -1
package/dist/ops/cpu/normRMS.js +1 -1
package/dist/ops/cpu/qkv.js +3 -3
package/dist/ops/cpu/rope.js +5 -5
package/dist/ops/cpu/scatterSub.js +17 -48
package/dist/ops/fusedSoftmax.js +1 -1
package/dist/ops/gatherSub.js +1 -1
package/dist/ops/gelu.js +1 -1
package/dist/ops/grads/attentionMask.js +1 -1
package/dist/ops/grads/fusedSoftmax.js +4 -4
package/dist/ops/grads/gelu.js +1 -1
package/dist/ops/grads/matMulGelu.js +1 -1
package/dist/ops/grads/normRMS.js +1 -1
package/dist/ops/grads/qkv.js +1 -1
package/dist/ops/grads/rope.js +1 -1
package/dist/ops/matMulGelu.js +1 -1
package/dist/ops/matMulMul.js +1 -1
package/dist/ops/mulDrop.js +1 -1
package/dist/ops/node/sparseCrossEntropy.js +1 -1
package/dist/ops/normRMS.js +1 -1
package/dist/ops/qkv.js +1 -1
package/dist/ops/rope.js +8 -4
package/dist/ops/scatterSub.js +1 -1
package/dist/ops/webgl/appendCache.js +1 -1
package/dist/ops/webgl/attentionMask.js +1 -1
package/dist/ops/webgl/fusedSoftmax.js +29 -560
package/dist/ops/webgl/gatherSub.js +1 -1
package/dist/ops/webgl/gelu.js +2 -2
package/dist/ops/webgl/log.js +3 -3
package/dist/ops/webgl/matMulGelu.js +46 -113
package/dist/ops/webgl/matMulMul.js +1 -1
package/dist/ops/webgl/mulDropout.js +1 -1
package/dist/ops/webgl/normRMS.js +2 -2
package/dist/ops/webgl/qkv.js +1 -1
package/dist/ops/webgl/rope.js +1 -1
package/dist/ops/webgl/scatterSub.js +1 -1
package/dist/{ops-Mv7Ta72x.js → ops-C0sQEcPw.js} +117 -109
package/dist/{random_width-BBAWzDym.js → random_width-DWzaOgrn.js} +6925 -6291
package/dist/{range-DMaG9A3G.js → range-DYsrnfiy.js} +1 -1
package/dist/{gpgpu_math-Ctc31slO.js → reciprocal-CJQeasVa.js} +7 -5
package/dist/register_all_kernels-BfFCQAqs.js +21397 -0
package/dist/{reshape-T4yDEqoF.js → reshape-krWGKraP.js} +1 -1
package/dist/scatter_nd_util-93ln7Hut.js +46 -0
package/dist/selu_util-sntGesxr.js +740 -0
package/dist/{shared-XNAoXhOa.js → shared-Ca6iDobD.js} +1462 -1089
package/dist/{sin-EEhbrRO_.js → sin-D_h-qCSx.js} +1 -1
package/dist/{softmax-B2_IKPDR.js → softmax-fsdtf6JC.js} +1 -1
package/dist/{split-dcks18H1.js → split-eiktj-6L.js} +1 -1
package/dist/{stack-lpJ5kYvE.js → stack-dfEEz2OY.js} +2 -2
package/dist/{sum-CutF5lj2.js → sum-BE_Irnim.js} +1 -1
package/dist/{tensor-C15NA2LA.js → tensor-Xyi595sG.js} +1 -1
package/dist/{tensor2d-DZ_e5eKM.js → tensor2d-CPEkynbH.js} +1 -1
package/dist/training/AdamExt.js +1 -1
package/dist/training/DatasetBuilder.js +2 -2
package/dist/training/FullTrainer.js +1 -1
package/dist/training/Trainer.js +3 -3
package/dist/training/sparseCrossEntropy.js +5 -5
package/dist/utilities/dummy.d.ts +6 -0
package/dist/utilities/dummy.js +31 -10
package/dist/utilities/generate.js +3 -3
package/dist/utilities/profile.d.ts +5 -0
package/dist/utilities/profile.js +10 -7
package/dist/utilities/safetensors.js +2 -2
package/dist/utilities/save.js +1 -1
package/dist/utilities/weights.js +2 -2
package/dist/{variable-CdRKKp8x.js → variable-wSS22xj5.js} +1 -1
package/dist/{zeros-CAbHfODe.js → zeros-YJDE7oRb.js} +4 -4
package/package.json +2 -8
package/dist/Reshape-CLOrdpve.js +0 -212
package/dist/slice_util-Ddk0uxGJ.js +0 -49
package/dist/tfjs_backend-BDb8r9qx.js +0 -1010
package/dist/utilities/load.js +0 -99

package/dist/TeachableLLM.js CHANGED Viewed

@@ -1,17 +1,17 @@
-import { defaultConfig as h } from "./config.js";
+import { defaultConfig as d } from "./config.js";
 import l from "./NanoGPTModel.js";
-import { saveModel as d } from "./utilities/save.js";
-import { loadModel as f } from "./utilities/load.js";
-import u from "./Generator.js";
-import p from "./Trainer.js";
-import { E as _ } from "./index-Dwqa6Zy2.js";
-import { dummyPassAsync as m } from "./utilities/dummy.js";
-import c from "./tokeniser/CharTokeniser.js";
-import g from "./tokeniser/bpe.js";
+import { saveModel as f } from "./utilities/save.js";
+import { loadModel as u } from "./loader/load.js";
+import p from "./Generator.js";
+import _ from "./Trainer.js";
+import { E as c } from "./index-Dwqa6Zy2.js";
+import { dummyPassTrainAsync as m } from "./utilities/dummy.js";
+import g from "./tokeniser/CharTokeniser.js";
+import w from "./tokeniser/bpe.js";
 import "./papaparse.min-C8l2Kvo1.js";
 import "./index-Tf7vU29b.js";
 import "./jszip.min-CjP2V1VV.js";
-import "./index-bMBtI-WR.js";
+import "./index-BAzbokzv.js";
 import "./ops/cpu/scatterSub.js";
 import "./ops/webgl/scatterSub.js";
 import "./ops/cpu/gatherSub.js";
@@ -22,7 +22,9 @@ import "./ops/grads/attentionMask.js";
 import "./ops/cpu/qkv.js";
 import "./ops/webgl/qkv.js";
 import "./ops/grads/qkv.js";
-import "@tensorflow/tfjs";
+import "./random_width-DWzaOgrn.js";
+import "./register_all_kernels-BfFCQAqs.js";
+import "./dataset-pgqp-YfL.js";
 import "./ops/cpu/rope.js";
 import "./ops/webgl/rope.js";
 import "./ops/grads/rope.js";
@@ -41,13 +43,14 @@ import "./ops/cpu/normRMS.js";
 import "./ops/webgl/normRMS.js";
 import "./ops/grads/normRMS.js";
 import "./ops/webgl/log.js";
-import w from "./utilities/profile.js";
+import k from "./utilities/profile.js";
 class a {
-  ee = new _();
+  ee = new c();
   _config;
   _model;
   _tokeniser;
   _status = "loading";
+  _memoryRequirements;
   meta = {};
   constructor(t, e) {
     this._config = e?.config, this._tokeniser = t, this._model = e;
@@ -79,22 +82,26 @@ class a {
   get ready() {
     return this._status === "ready" && !!this._model && !!this._tokeniser && this.tokeniser.trained;
   }
+  estimateTrainingMemoryUsage(t) {
+    const e = this._memoryRequirements ?? { perBatch: 0, gradients: 0 }, r = e.perBatch * t, o = e.gradients;
+    return r * 0.66 + o * 4;
+  }
   setStatus(t) {
     this._status !== t && (this._status = t, this.ee.emit("status", t));
   }
   saveModel(t) {
     if (!this._model || !this._tokeniser)
       throw new Error("Model or tokeniser is not initialized.");
-    return d(this._model, this._tokeniser, {
+    return f(this._model, this._tokeniser, {
       ...t,
       name: t?.name || this.meta.name
     });
   }
   static loadModel(t) {
     const e = new a();
-    return f(t).then(({ model: r, tokeniser: s, name: o }) => {
-      e._model = r, e._tokeniser = s, e._config = r.config, o && (e.meta.name = o), e.setStatus("warmup"), m(r).then(() => {
-        e.setStatus("ready"), e.ee.emit("loaded");
+    return u(t).then(({ model: r, tokeniser: o, name: s }) => {
+      e._model = r, e._tokeniser = o, e._config = r.config, s && (e.meta.name = s), e.setStatus("warmup"), m(r).then((i) => {
+        e._memoryRequirements = i, e.setStatus("ready"), e.ee.emit("loaded");
       }).catch((i) => {
         e.setStatus("error"), e.ee.emit("error", i);
       });
@@ -103,10 +110,10 @@ class a {
     }), e;
   }
   static create(t, e = {}) {
-    const r = { ...h, ...e }, s = t === "char" ? new c(r.vocabSize) : new g(r.vocabSize), o = new l(r), i = new a(s, o);
-    return i.setStatus("warmup"), m(o).then(() => {
-      i.tokeniser.trained ? (i.setStatus("ready"), i.ee.emit("loaded")) : (i.setStatus("awaitingTokens"), i.ee.emit("loaded"), i.tokeniser.once("trainStatus", (n) => {
-        n === "trained" && i.setStatus("ready");
+    const r = { ...d, ...e }, o = t === "char" ? new g(r.vocabSize) : new w(r.vocabSize), s = new l(r), i = new a(o, s);
+    return i.setStatus("warmup"), m(s).then((n) => {
+      i._memoryRequirements = n, i.tokeniser.trained ? (i.setStatus("ready"), i.ee.emit("loaded")) : (i.setStatus("awaitingTokens"), i.ee.emit("loaded"), i.tokeniser.once("trainStatus", (h) => {
+        h === "trained" && i.setStatus("ready");
       }));
     }).catch((n) => {
       i.setStatus("error"), i.ee.emit("error", n);
@@ -122,7 +129,7 @@ class a {
     if (t) {
       if (!this._config)
         throw new Error("Model is not initialized.");
-      this._config.layerConfig.profiler || (this._config.layerConfig.profiler = new w());
+      this._config.layerConfig.profiler || (this._config.layerConfig.profiler = new k());
     } else
       this._config?.layerConfig.profiler && (this._config.layerConfig.profiler = void 0);
   }
@@ -134,11 +141,11 @@ class a {
   trainer() {
     if (!this._model || !this._tokeniser)
       throw new Error("Model or tokeniser is not initialized.");
-    const t = new p(this._model, this._tokeniser);
+    const t = new _(this._model, this._tokeniser);
     return t.on("start", () => this.setStatus("training")), t.on("stop", () => this.setStatus("ready")), t.on("log", async (e, r) => {
-      const s = this.ee.listeners("trainStep");
-      for (const o of s)
-        await o(e, r);
+      const o = this.ee.listeners("trainStep");
+      for (const s of o)
+        await s(e, r);
     }), t;
   }
   train(t, e) {
@@ -153,7 +160,7 @@ class a {
   generator() {
     if (!this._model || !this._tokeniser)
       throw new Error("model_or_tokeniser_not_initialized.");
-    const t = new u(this._model, this._tokeniser);
+    const t = new p(this._model, this._tokeniser);
     return t.on("start", () => {
       this.status === "ready" && this.setStatus("busy");
     }), t.on("stop", () => {

package/dist/{TiedEmbedding-BhxWO8QR.js → TiedEmbedding-9WeDwvjO.js} RENAMED Viewed

@@ -1,9 +1,8 @@
-import { R as a } from "./random_width-BBAWzDym.js";
-import "./index-bMBtI-WR.js";
-import { d as s } from "./tfjs_backend-BDb8r9qx.js";
+import { R as a, d as s } from "./random_width-DWzaOgrn.js";
+import "./index-BAzbokzv.js";
 import o from "./layers/BaseLayer.js";
-import { v as m } from "./variable-CdRKKp8x.js";
-import { g as d } from "./gather-DZCMHZuN.js";
+import { v as m } from "./variable-wSS22xj5.js";
+import { g as d } from "./gather-DjyCjmOD.js";
 /**
  * @license
  * Copyright 2018 Google LLC
@@ -13,16 +12,16 @@ import { g as d } from "./gather-DZCMHZuN.js";
  * https://opensource.org/licenses/MIT.
  * =============================================================================
  */
-function n(e) {
-  return new a(e);
+function n(i) {
+  return new a(i);
 }
-class c extends o {
+class S extends o {
   vocabSize;
   embedDim;
   initializer;
   WEIGHTS;
-  constructor(t, i, r) {
-    super(t, r), this.WEIGHTS = i, this.vocabSize = t.gpt.vocabSize, this.embedDim = t.gpt.nEmbed, this.initializer = n({
+  constructor(t, e, r) {
+    super(t, r), this.WEIGHTS = e, this.vocabSize = t.gpt.vocabSize, this.embedDim = t.gpt.nEmbed, this.initializer = n({
       mean: 0,
       stddev: 0.02
     }), this.addVariable(this.WEIGHTS, m(this.initializer.apply([this.vocabSize, this.embedDim]), !0));
@@ -34,11 +33,11 @@ class c extends o {
     return s(t, this.getVariable(this.WEIGHTS).transpose());
   }
   // Dummy, should not be used.
-  forward(t, i) {
-    return this.project(i);
+  forward(t, e) {
+    return this.project(e);
   }
 }
 export {
-  c as T,
+  S as T,
   n as r
 };

package/dist/{axis_util-D17qZRQm.js → axis_util-Bu4h7XWV.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { l as c } from "./index-bMBtI-WR.js";
+import { i as c } from "./index-BAzbokzv.js";
 /**
  * @license
  * Copyright 2017 Google LLC. All Rights Reserved.
@@ -21,28 +21,28 @@ function i(e, n) {
       return !1;
   return !0;
 }
-function l(e, n, t) {
+function a(e, n, t) {
   const r = e.length + n.length, s = [];
   let o = 0, f = 0;
   for (let u = 0; u < r; u++)
     t.indexOf(u) === -1 ? s.push(e[o++]) : s.push(n[f++]);
   return s;
 }
-function a(e, n) {
+function l(e, n) {
   const t = [], r = e.length;
   for (let o = 0; o < r; o++)
     n.indexOf(o) === -1 && t.push(e[o]);
   const s = n.map((o) => e[o]);
   return [t, s];
 }
-function m(e, n) {
+function h(e, n) {
   const t = n.map((r) => 1);
-  return l(e, t, n);
+  return a(e, t, n);
 }
-function d(e, n, t) {
+function m(e, n, t) {
   c(i(n, t), () => `${e} supports only inner-most axes for now. Got axes ${n} and rank-${t} input.`);
 }
-function h(e, n) {
+function d(e, n) {
   if (i(e, n))
     return null;
   const t = [];
@@ -61,9 +61,11 @@ function x(e, n) {
 }
 export {
   x as a,
-  d as b,
-  a as c,
-  g as d,
-  m as e,
-  h as g
+  m as b,
+  l as c,
+  i as d,
+  h as e,
+  a as f,
+  d as g,
+  g as h
 };

package/dist/{broadcast_to-BMQLjvt_.js → broadcast_to-DARN-DBD.js} RENAMED Viewed

@@ -1,5 +1,5 @@
-import { o as h, j as f, n as p, y as g, E as u, L as b } from "./index-bMBtI-WR.js";
-import { r as T } from "./reshape-T4yDEqoF.js";
+import { o as h, q as f, x as p, H as g, E as u, I as b } from "./index-BAzbokzv.js";
+import { r as T } from "./reshape-krWGKraP.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.

package/dist/{concat-DhZfF1GY.js → concat-5aPGqw3Z.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o as s, l as a, k as p, y as i, E as l, C as f } from "./index-bMBtI-WR.js";
+import { o as s, i as a, w as i, H as p, E as l, J as f } from "./index-BAzbokzv.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -17,13 +17,13 @@ import { o as s, l as a, k as p, y as i, E as l, C as f } from "./index-bMBtI-WR
  */
 function h(o, e = 0) {
   a(o.length >= 1, () => "Pass at least one tensor to concat");
-  const t = p(o, "tensors", "concat", "string_or_numeric");
+  const t = i(o, "tensors", "concat", "string_or_numeric");
   if (t[0].dtype === "complex64" && t.forEach((n) => {
     if (n.dtype !== "complex64")
       throw new Error(`Cannot concatenate complex64 tensors with a tensor
           with dtype ${n.dtype}. `);
   }), t.length === 1)
-    return i(t[0]);
+    return p(t[0]);
   const r = t, c = { axis: e };
   return l.runKernel(f, r, c);
 }

package/dist/{dataset-oilnemHf.js → dataset-pgqp-YfL.js} RENAMED Viewed

@@ -1,7 +1,7 @@
-import { af as S, T as h, N, a as v, ag as o, ah as p, ai as g, l as k, t as y } from "./index-bMBtI-WR.js";
+import { ab as S, T as h, ac as N, d as v, ad as o, ae as p, af as g, i as k, t as y } from "./index-BAzbokzv.js";
 import { s as R } from "./index-C4L8Cm77.js";
-import { s as $ } from "./stack-lpJ5kYvE.js";
-import { t as B } from "./tensor-C15NA2LA.js";
+import { s as $ } from "./stack-dfEEz2OY.js";
+import { t as B } from "./tensor-Xyi595sG.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.

package/dist/{dropout-CrMQPCeG.js → dropout-Bciw46HT.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o as l, j as h, E as m, aj as p, n as c, ak as d, ad as g, l as u, T as V, al as v, a8 as N, b as w } from "./index-bMBtI-WR.js";
+import { o as l, q as h, E as m, ag as p, x as c, ah as d, ai as g, i as u, T as V, aj as v, a7 as N, a as w } from "./index-BAzbokzv.js";
 import { s as f } from "./index-C4L8Cm77.js";
 /**
  * @license
@@ -16,11 +16,11 @@ import { s as f } from "./index-C4L8Cm77.js";
  * limitations under the License.
  * =============================================================================
  */
-function b(r) {
+function x(r) {
   const e = { x: h(r, "x", "floor", "float32") };
   return m.runKernel(p, e);
 }
-const x = /* @__PURE__ */ l({ floor_: b });
+const b = /* @__PURE__ */ l({ floor_: x });
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -180,14 +180,14 @@ function R(r, t, e, s) {
   const n = h(r, "x", "dropout");
   if (u(n.dtype === "float32", () => `x has to be a floating point tensor since it's going to be scaled, but got a ${n.dtype} tensor instead.`), u(t >= 0 && t < 1, () => `rate must be a float in the range [0, 1), but got ${t}.`), t === 0)
     return r instanceof V ? n.clone() : n;
-  const o = E(n, e), a = 1 - t, i = v(x(N(D(o, 0, 1, "float32", s), a)), a);
+  const o = E(n, e), a = 1 - t, i = v(b(N(D(o, 0, 1, "float32", s), a)), a);
   return w(n, i);
 }
-const G = /* @__PURE__ */ l({ dropout_: R });
+const q = /* @__PURE__ */ l({ dropout_: R });
 export {
   T as M,
   D as a,
-  G as d,
-  x as f,
+  q as d,
+  b as f,
   _ as r
 };

package/dist/{gather-DZCMHZuN.js → gather-DjyCjmOD.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o as g, j as t, E as h, G as p } from "./index-bMBtI-WR.js";
+import { o as g, q as t, E as h, G as p } from "./index-BAzbokzv.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.