npm - @genai-fi/nanogpt - Versions diffs - 0.6.2 → 0.7.0 - Mend

@genai-fi/nanogpt 0.6.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (142) hide show

package/dist/Generator.js +11 -11
package/dist/NanoGPTModel.d.ts +2 -2
package/dist/NanoGPTModel.js +104 -136
package/dist/{RealDiv-BYViZwhN.js → RealDiv-C4hOvYOZ.js} +26 -25
package/dist/{Reshape-t7Kcikjk.js → Reshape-BLijOA8h.js} +5 -5
package/dist/TeachableLLM.d.ts +3 -0
package/dist/TeachableLLM.js +50 -47
package/dist/{TiedEmbedding-9WeDwvjO.js → TiedEmbedding-BLltddza.js} +4 -4
package/dist/{axis_util-Bu4h7XWV.js → axis_util-DaAl5MER.js} +3 -3
package/dist/backend.d.ts +1 -0
package/dist/backend.js +7 -0
package/dist/backend_util-DWiwsi2N.js +749 -0
package/dist/{broadcast_to-DARN-DBD.js → broadcast_to-C4v-j9yA.js} +2 -2
package/dist/{concat-5aPGqw3Z.js → concat-CsHeR4zV.js} +8 -8
package/dist/{dataset-pgqp-YfL.js → dataset-JDyjG3QR.js} +3 -3
package/dist/{dropout-Bciw46HT.js → dropout-hpDwECTe.js} +7 -7
package/dist/{gather-DjyCjmOD.js → gather-D0_gPiBz.js} +4 -4
package/dist/gelu-uyHP1x1f.js +26 -0
package/dist/gpgpu_math-DJm3ZTAf.js +2371 -0
package/dist/index-BPPzKVdR.js +12099 -0
package/dist/{index-BAzbokzv.js → index-C0dhsYom.js} +405 -389
package/dist/{kernel_funcs_utils-CUxJCg0g.js → kernel_funcs_utils-CwRTFqrc.js} +31 -30
package/dist/layers/BaseLayer.js +2 -2
package/dist/layers/CausalSelfAttention.js +6 -6
package/dist/layers/MLP.js +5 -5
package/dist/layers/RMSNorm.js +3 -3
package/dist/layers/RoPECache.js +4 -4
package/dist/layers/TiedEmbedding.js +5 -5
package/dist/layers/TransformerBlock.js +1 -1
package/dist/loader/loadTransformers.js +1 -1
package/dist/loader/oldZipLoad.js +5 -5
package/dist/{log_sum_exp-YEo2h3gb.js → log_sum_exp-D086OgZJ.js} +15 -15
package/dist/main.d.ts +2 -0
package/dist/main.js +9 -5
package/dist/{mat_mul-7121rsJk.js → mat_mul-1nwdPkQ_.js} +4 -4
package/dist/{max-DtlIuVeW.js → max-BQc2Aj-I.js} +4 -4
package/dist/{mulmat_packed_gpu-D4nKF7Je.js → mulmat_packed_gpu-Gzf3I9UV.js} +1 -1
package/dist/non_max_suppression_impl-CsEgBuMA.js +134 -0
package/dist/{ones-BBlSRqn1.js → ones-D63HpSF_.js} +2 -2
package/dist/ops/appendCache.js +3 -3
package/dist/ops/attentionMask.js +1 -1
package/dist/ops/cpu/appendCache.js +8 -8
package/dist/ops/cpu/attentionMask.js +9 -9
package/dist/ops/cpu/fusedSoftmax.js +17 -11
package/dist/ops/cpu/gatherSub.js +7 -7
package/dist/ops/cpu/gelu.js +13 -13
package/dist/ops/cpu/matMulGelu.js +36 -24
package/dist/ops/cpu/matMulMul.js +14 -8
package/dist/ops/cpu/mulDropout.js +9 -3
package/dist/ops/cpu/normRMS.js +5 -5
package/dist/ops/cpu/qkv.js +3 -3
package/dist/ops/cpu/rope.js +5 -5
package/dist/ops/cpu/scatterSub.js +11 -11
package/dist/ops/fusedSoftmax.js +1 -1
package/dist/ops/gatherSub.js +1 -1
package/dist/ops/gelu.js +2 -2
package/dist/ops/grads/attentionMask.js +1 -1
package/dist/ops/grads/fusedSoftmax.js +2 -2
package/dist/ops/grads/gelu.js +3 -24
package/dist/ops/grads/matMulGelu.js +5 -5
package/dist/ops/grads/normRMS.js +6 -6
package/dist/ops/grads/qkv.js +1 -1
package/dist/ops/grads/rope.js +3 -3
package/dist/ops/matMulGelu.js +1 -1
package/dist/ops/matMulMul.js +1 -1
package/dist/ops/mulDrop.js +1 -1
package/dist/ops/normRMS.js +1 -1
package/dist/ops/qkv.js +1 -1
package/dist/ops/rope.js +4 -4
package/dist/ops/scatterSub.js +1 -1
package/dist/ops/webgl/appendCache.js +1 -1
package/dist/ops/webgl/attentionMask.js +1 -1
package/dist/ops/webgl/fusedSoftmax.js +4 -4
package/dist/ops/webgl/gatherSub.js +1 -1
package/dist/ops/webgl/gelu.js +2 -2
package/dist/ops/webgl/log.js +5 -5
package/dist/ops/webgl/matMulGelu.js +17 -17
package/dist/ops/webgl/matMulMul.js +1 -1
package/dist/ops/webgl/mulDropout.js +4 -4
package/dist/ops/webgl/normRMS.js +2 -2
package/dist/ops/webgl/qkv.js +1 -1
package/dist/ops/webgl/rope.js +1 -1
package/dist/ops/webgl/scatterSub.js +1 -1
package/dist/ops/webgpu/appendCache.js +56 -0
package/dist/ops/webgpu/attentionMask.d.ts +1 -0
package/dist/ops/webgpu/attentionMask.js +64 -0
package/dist/ops/webgpu/gatherSub.d.ts +1 -0
package/dist/ops/webgpu/gatherSub.js +37 -0
package/dist/ops/webgpu/gelu.d.ts +14 -0
package/dist/ops/webgpu/gelu.js +86 -0
package/dist/ops/webgpu/index.d.ts +0 -0
package/dist/ops/webgpu/index.js +8 -0
package/dist/ops/webgpu/normRMS.d.ts +1 -0
package/dist/ops/webgpu/normRMS.js +115 -0
package/dist/ops/webgpu/qkv.d.ts +1 -0
package/dist/ops/webgpu/qkv.js +56 -0
package/dist/ops/webgpu/rope.d.ts +1 -0
package/dist/ops/webgpu/rope.js +68 -0
package/dist/ops/webgpu/scatterSub.d.ts +1 -0
package/dist/ops/webgpu/scatterSub.js +37 -0
package/dist/{ops-C0sQEcPw.js → ops-CIQLNshk.js} +452 -503
package/dist/{random_width-DWzaOgrn.js → random_width-DkYP8W8N.js} +143 -144
package/dist/{range-DYsrnfiy.js → range-CYzpQY53.js} +1 -1
package/dist/{reciprocal-CJQeasVa.js → reciprocal-_A9yv27J.js} +1 -1
package/dist/{register_all_kernels-BfFCQAqs.js → register_all_kernels-guvSxp7M.js} +202 -200
package/dist/{reshape-krWGKraP.js → reshape-BMUzc1UY.js} +3 -3
package/dist/{scatter_nd_util-93ln7Hut.js → scatter_nd_util-IRBqKz_b.js} +3 -3
package/dist/{selu_util-sntGesxr.js → selu_util-Dt_iuXaq.js} +6 -6
package/dist/shared-BNa2q6jD.js +69 -0
package/dist/{shared-Ca6iDobD.js → shared-CDu9S76h.js} +541 -606
package/dist/{sin-D_h-qCSx.js → sin-Cocju-BY.js} +6 -6
package/dist/{softmax-fsdtf6JC.js → softmax-GPNK3o-U.js} +3 -3
package/dist/{split-eiktj-6L.js → split-CHzJjxDv.js} +4 -4
package/dist/{stack-dfEEz2OY.js → stack-Dpgg_1W1.js} +2 -2
package/dist/{sum-BE_Irnim.js → sum-B8wEpKsg.js} +5 -5
package/dist/{tensor-Xyi595sG.js → tensor-RvZVNmg0.js} +1 -1
package/dist/{tensor2d-CPEkynbH.js → tensor2d-B_kyod7_.js} +1 -1
package/dist/training/AdamExt.js +1 -1
package/dist/training/DatasetBuilder.js +2 -2
package/dist/training/Evaluator.js +1 -1
package/dist/training/FullTrainer.js +20 -20
package/dist/training/Trainer.d.ts +5 -6
package/dist/training/Trainer.js +59 -60
package/dist/training/sparseCrossEntropy.js +19 -26
package/dist/utilities/dummy.js +19 -19
package/dist/utilities/generate.js +15 -16
package/dist/utilities/multinomialCPU.d.ts +2 -0
package/dist/utilities/multinomialCPU.js +13 -0
package/dist/utilities/performance.d.ts +2 -0
package/dist/utilities/performance.js +16 -0
package/dist/utilities/profile.d.ts +1 -0
package/dist/utilities/profile.js +9 -6
package/dist/utilities/safetensors.js +2 -2
package/dist/utilities/weights.js +2 -2
package/dist/{variable-wSS22xj5.js → variable-DXEUOwew.js} +1 -1
package/dist/webgpu_util-g13LvDIv.js +625 -0
package/dist/{zeros-YJDE7oRb.js → zeros-DCPCdFGq.js} +8 -8
package/package.json +2 -1
package/dist/gpgpu_math-CNslybmD.js +0 -3115
package/dist/norm-CzltS9Fz.js +0 -86
package/dist/ops/node/sparseCrossEntropy.js +0 -11
/package/dist/ops/{node/sparseCrossEntropy.d.ts → webgpu/appendCache.d.ts} +0 -0

package/dist/TeachableLLM.js CHANGED Viewed

@@ -1,17 +1,17 @@
-import { defaultConfig as d } from "./config.js";
-import l from "./NanoGPTModel.js";
-import { saveModel as f } from "./utilities/save.js";
-import { loadModel as u } from "./loader/load.js";
-import p from "./Generator.js";
-import _ from "./Trainer.js";
-import { E as c } from "./index-Dwqa6Zy2.js";
+import { defaultConfig as _ } from "./config.js";
+import f from "./NanoGPTModel.js";
+import { saveModel as u } from "./utilities/save.js";
+import { loadModel as d } from "./loader/load.js";
+import l from "./Generator.js";
+import p from "./Trainer.js";
+import { E as g } from "./index-Dwqa6Zy2.js";
 import { dummyPassTrainAsync as m } from "./utilities/dummy.js";
-import g from "./tokeniser/CharTokeniser.js";
-import w from "./tokeniser/bpe.js";
+import c from "./tokeniser/CharTokeniser.js";
+import k from "./tokeniser/bpe.js";
 import "./papaparse.min-C8l2Kvo1.js";
 import "./index-Tf7vU29b.js";
 import "./jszip.min-CjP2V1VV.js";
-import "./index-BAzbokzv.js";
+import "./index-C0dhsYom.js";
 import "./ops/cpu/scatterSub.js";
 import "./ops/webgl/scatterSub.js";
 import "./ops/cpu/gatherSub.js";
@@ -22,9 +22,9 @@ import "./ops/grads/attentionMask.js";
 import "./ops/cpu/qkv.js";
 import "./ops/webgl/qkv.js";
 import "./ops/grads/qkv.js";
-import "./random_width-DWzaOgrn.js";
-import "./register_all_kernels-BfFCQAqs.js";
-import "./dataset-pgqp-YfL.js";
+import "./random_width-DkYP8W8N.js";
+import "./register_all_kernels-guvSxp7M.js";
+import "./dataset-JDyjG3QR.js";
 import "./ops/cpu/rope.js";
 import "./ops/webgl/rope.js";
 import "./ops/grads/rope.js";
@@ -38,14 +38,14 @@ import "./ops/webgl/matMulGelu.js";
 import "./ops/grads/matMulGelu.js";
 import "./ops/cpu/gelu.js";
 import "./ops/webgl/gelu.js";
-import "./ops/grads/gelu.js";
+import "./gelu-uyHP1x1f.js";
 import "./ops/cpu/normRMS.js";
 import "./ops/webgl/normRMS.js";
 import "./ops/grads/normRMS.js";
 import "./ops/webgl/log.js";
-import k from "./utilities/profile.js";
+import w from "./utilities/profile.js";
 class a {
-  ee = new c();
+  ee = new g();
   _config;
   _model;
   _tokeniser;
@@ -58,66 +58,71 @@ class a {
   get vocab() {
     return this._tokeniser?.getVocab() || [];
   }
+  /** Model is fully loaded */
   get loaded() {
     return !!this._model && !!this._tokeniser && !!this._config;
   }
   get config() {
     if (!this._config)
-      throw new Error("Model configuration is not initialized.");
+      throw new Error("configuration_not_initialized.");
     return this._config.gpt;
   }
   get model() {
     if (!this._model)
-      throw new Error("Model is not initialized.");
+      throw new Error("model_not_initialized.");
     return this._model;
   }
   get tokeniser() {
     if (!this._tokeniser)
-      throw new Error("Tokeniser is not initialized.");
+      throw new Error("tokeniser_not_initialized.");
     return this._tokeniser;
   }
   get status() {
     return this._status;
   }
+  /** Model is both ready and not busy */
   get ready() {
-    return this._status === "ready" && !!this._model && !!this._tokeniser && this.tokeniser.trained;
+    return this._status === "ready" && !!this._model && !!this._tokeniser;
+  }
+  get busy() {
+    return this._status === "busy" || this._status === "training";
   }
   estimateTrainingMemoryUsage(t) {
-    const e = this._memoryRequirements ?? { perBatch: 0, gradients: 0 }, r = e.perBatch * t, o = e.gradients;
-    return r * 0.66 + o * 4;
+    const e = this._memoryRequirements ?? { perBatch: 0, gradients: 0 }, i = e.perBatch * t, o = e.gradients;
+    return i * 0.66 + o * 4;
   }
   setStatus(t) {
     this._status !== t && (this._status = t, this.ee.emit("status", t));
   }
   saveModel(t) {
     if (!this._model || !this._tokeniser)
-      throw new Error("Model or tokeniser is not initialized.");
-    return f(this._model, this._tokeniser, {
+      throw new Error("model_or_tokeniser_not_initialized.");
+    return u(this._model, this._tokeniser, {
       ...t,
       name: t?.name || this.meta.name
     });
   }
   static loadModel(t) {
     const e = new a();
-    return u(t).then(({ model: r, tokeniser: o, name: s }) => {
-      e._model = r, e._tokeniser = o, e._config = r.config, s && (e.meta.name = s), e.setStatus("warmup"), m(r).then((i) => {
-        e._memoryRequirements = i, e.setStatus("ready"), e.ee.emit("loaded");
-      }).catch((i) => {
-        e.setStatus("error"), e.ee.emit("error", i);
+    return d(t).then(({ model: i, tokeniser: o, name: s }) => {
+      e._model = i, e._tokeniser = o, e._config = i.config, s && (e.meta.name = s), e.setStatus("warmup"), m(i).then((r) => {
+        e._memoryRequirements = r, e.setStatus("ready"), e.ee.emit("loaded");
+      }).catch((r) => {
+        e.setStatus("error"), e.ee.emit("error", r);
       });
-    }).catch((r) => {
-      e.setStatus("error"), e.ee.emit("error", r);
+    }).catch((i) => {
+      e.setStatus("error"), e.ee.emit("error", i);
     }), e;
   }
   static create(t, e = {}) {
-    const r = { ...d, ...e }, o = t === "char" ? new g(r.vocabSize) : new w(r.vocabSize), s = new l(r), i = new a(o, s);
-    return i.setStatus("warmup"), m(s).then((n) => {
-      i._memoryRequirements = n, i.tokeniser.trained ? (i.setStatus("ready"), i.ee.emit("loaded")) : (i.setStatus("awaitingTokens"), i.ee.emit("loaded"), i.tokeniser.once("trainStatus", (h) => {
-        h === "trained" && i.setStatus("ready");
+    const i = { ..._, ...e }, o = t === "char" ? new c(i.vocabSize) : new k(i.vocabSize), s = new f(i), r = new a(o, s);
+    return r.setStatus("warmup"), m(s).then((n) => {
+      r._memoryRequirements = n, r.tokeniser.trained ? (r.setStatus("ready"), r.ee.emit("loaded")) : (r.setStatus("awaitingTokens"), r.ee.emit("loaded"), r.tokeniser.once("trainStatus", (h) => {
+        h === "trained" && r.setStatus("ready");
       }));
     }).catch((n) => {
-      i.setStatus("error"), i.ee.emit("error", n);
-    }), i;
+      r.setStatus("error"), r.ee.emit("error", n);
+    }), r;
   }
   getProfiler() {
     return this._model?.getProfiler();
@@ -128,24 +133,22 @@ class a {
   set enableProfiler(t) {
     if (t) {
       if (!this._config)
-        throw new Error("Model is not initialized.");
-      this._config.layerConfig.profiler || (this._config.layerConfig.profiler = new k());
+        return;
+      this._config.layerConfig.profiler || (this._config.layerConfig.profiler = new w());
     } else
       this._config?.layerConfig.profiler && (this._config.layerConfig.profiler = void 0);
   }
   getNumParams() {
-    if (!this._model)
-      throw new Error("Model is not initialized.");
-    return this._model.getNumParams();
+    return this._model ? this._model.getNumParams() : 0;
   }
   trainer() {
     if (!this._model || !this._tokeniser)
-      throw new Error("Model or tokeniser is not initialized.");
-    const t = new _(this._model, this._tokeniser);
-    return t.on("start", () => this.setStatus("training")), t.on("stop", () => this.setStatus("ready")), t.on("log", async (e, r) => {
+      throw new Error("model_or_tokeniser_not_initialized.");
+    const t = new p(this._model, this._tokeniser);
+    return t.on("start", () => this.setStatus("training")), t.on("stop", () => this.setStatus("ready")), t.on("log", async (e, i) => {
       const o = this.ee.listeners("trainStep");
       for (const s of o)
-        await s(e, r);
+        await s(e, i);
     }), t;
   }
   train(t, e) {
@@ -160,7 +163,7 @@ class a {
   generator() {
     if (!this._model || !this._tokeniser)
       throw new Error("model_or_tokeniser_not_initialized.");
-    const t = new p(this._model, this._tokeniser);
+    const t = new l(this._model, this._tokeniser);
     return t.on("start", () => {
       this.status === "ready" && this.setStatus("busy");
     }), t.on("stop", () => {

package/dist/{TiedEmbedding-9WeDwvjO.js → TiedEmbedding-BLltddza.js} RENAMED Viewed

@@ -1,8 +1,8 @@
-import { R as a, d as s } from "./random_width-DWzaOgrn.js";
-import "./index-BAzbokzv.js";
+import { R as a, d as s } from "./random_width-DkYP8W8N.js";
+import "./index-C0dhsYom.js";
 import o from "./layers/BaseLayer.js";
-import { v as m } from "./variable-wSS22xj5.js";
-import { g as d } from "./gather-DjyCjmOD.js";
+import { v as m } from "./variable-DXEUOwew.js";
+import { g as d } from "./gather-D0_gPiBz.js";
 /**
  * @license
  * Copyright 2018 Google LLC

package/dist/{axis_util-Bu4h7XWV.js → axis_util-DaAl5MER.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { i as c } from "./index-BAzbokzv.js";
+import { l as c } from "./index-C0dhsYom.js";
 /**
  * @license
  * Copyright 2017 Google LLC. All Rights Reserved.
@@ -28,7 +28,7 @@ function a(e, n, t) {
     t.indexOf(u) === -1 ? s.push(e[o++]) : s.push(n[f++]);
   return s;
 }
-function l(e, n) {
+function p(e, n) {
   const t = [], r = e.length;
   for (let o = 0; o < r; o++)
     n.indexOf(o) === -1 && t.push(e[o]);
@@ -62,7 +62,7 @@ function x(e, n) {
 export {
   x as a,
   m as b,
-  l as c,
+  p as c,
   i as d,
   h as e,
   a as f,

package/dist/backend.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export declare function selectBackend(backendName: 'cpu' \| 'webgl' \| 'webgpu'): Promise<void>;

package/dist/backend.js ADDED Viewed

@@ -0,0 +1,7 @@
+import { g as a, s as i, r as o } from "./index-C0dhsYom.js";
+async function e(t) {
+  a() !== t && (t === "webgpu" && (await import("./index-BPPzKVdR.js"), await import("./ops/webgpu/index.js")), await i(t), await o(), console.log(`Backend set to ${t}`));
+}
+export {
+  e as selectBackend
+};