npm - @genai-fi/nanogpt - Versions diffs - 0.4.1 → 0.4.2 - Mend

@genai-fi/nanogpt 0.4.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

package/dist/Generator.js +3 -3
package/dist/NanoGPTModel.js +83 -70
package/dist/TeachableLLM.js +1 -1
package/dist/{random_width-CMHmdbSu.js → TiedEmbedding-CnJ1bx4q.js} +760 -719
package/dist/{axis_util-DeydwOoC.js → axis_util-BgTGy5w8.js} +1 -1
package/dist/{concat-DS_qH7MI.js → concat-CuRsVY-K.js} +1 -1
package/dist/dropout-DfDdklfL.js +193 -0
package/dist/{gather-BUmJIS8n.js → gather-ZYRWhmXR.js} +1 -1
package/dist/gelu-CnCt17Lk.js +26 -0
package/dist/{index-XjBAhiFO.js → index-C4JCoBvj.js} +61 -61
package/dist/kernel_funcs_utils-CAd1h9X1.js +388 -0
package/dist/layers/CausalSelfAttention.js +71 -70
package/dist/layers/MLP.d.ts +3 -1
package/dist/layers/MLP.js +93 -5
package/dist/layers/RMSNorm.js +3 -3
package/dist/layers/RoPECache.js +3 -3
package/dist/layers/TiedEmbedding.js +6 -46
package/dist/layers/TransformerBlock.js +2 -2
package/dist/{log_sum_exp-DJPkVZZn.js → log_sum_exp-BswFnwOb.js} +5 -5
package/dist/main.js +1 -1
package/dist/{mat_mul-CKwFEV1Q.js → mat_mul-415y5Qn2.js} +1 -1
package/dist/{max-DJvEiCAJ.js → max-CP_9O2Yd.js} +1 -1
package/dist/{moments-CrWRPcR3.js → moments-CjeIaVdp.js} +3 -3
package/dist/{norm-BzY929B_.js → norm-CZM380I3.js} +5 -5
package/dist/{ones-BO01zpJG.js → ones-Bf3YR48P.js} +2 -2
package/dist/ops/appendCache.js +1 -1
package/dist/ops/attentionMask.d.ts +1 -1
package/dist/ops/attentionMask.js +4 -4
package/dist/ops/cpu/appendCache.js +2 -2
package/dist/ops/cpu/attentionMask.js +13 -9
package/dist/ops/cpu/fusedSoftmax.js +2 -2
package/dist/ops/cpu/gatherSub.js +3 -3
package/dist/ops/cpu/gelu.d.ts +1 -0
package/dist/ops/cpu/gelu.js +40 -0
package/dist/ops/cpu/mulDropout.js +1 -1
package/dist/ops/cpu/qkv.js +3 -3
package/dist/ops/cpu/rope.js +5 -5
package/dist/ops/cpu/scatterSub.js +4 -4
package/dist/ops/fusedSoftmax.js +1 -1
package/dist/ops/gatherSub.js +1 -1
package/dist/ops/gelu.d.ts +3 -0
package/dist/ops/gelu.js +8 -0
package/dist/ops/grads/attentionMask.js +1 -1
package/dist/ops/grads/fusedSoftmax.js +2 -2
package/dist/ops/grads/gelu.d.ts +2 -0
package/dist/ops/grads/gelu.js +5 -0
package/dist/ops/grads/qkv.js +1 -1
package/dist/ops/grads/rope.js +1 -1
package/dist/ops/mulDrop.js +1 -1
package/dist/ops/node/sparseCrossEntropy.js +1 -1
package/dist/ops/qkv.js +1 -1
package/dist/ops/scatterSub.js +1 -1
package/dist/ops/webgl/appendCache.js +1 -1
package/dist/ops/webgl/attentionMask.js +19 -18
package/dist/ops/webgl/fusedSoftmax.js +483 -782
package/dist/ops/webgl/gatherSub.js +1 -1
package/dist/ops/webgl/gelu.d.ts +2 -0
package/dist/ops/webgl/gelu.js +50 -0
package/dist/ops/webgl/mulDropout.js +1 -1
package/dist/ops/webgl/qkv.js +1 -1
package/dist/ops/webgl/rope.js +1 -1
package/dist/ops/webgl/scatterSub.js +1 -1
package/dist/{range-DQMNzBWs.js → range-9AzeApCc.js} +1 -1
package/dist/{reshape-DFzh97Sc.js → reshape-Boe4DuIO.js} +1 -1
package/dist/{sin-BYM-U4Ut.js → sin-KmhiDuMa.js} +1 -1
package/dist/{slice_util-CnVNPQI-.js → slice_util-19zDNNSn.js} +2 -2
package/dist/{softmax-4DOn6cPq.js → softmax-Cujsg4ay.js} +1 -1
package/dist/{split-CkbeVdF8.js → split-DbcNm1-i.js} +1 -1
package/dist/{stack-DaIMO5iX.js → stack-D1YjmgKN.js} +1 -1
package/dist/{sum-C6u3xMi3.js → sum-R28pucR5.js} +1 -1
package/dist/{tensor-Cu1fU7H7.js → tensor-BVeHdl7V.js} +1 -1
package/dist/{tensor2d-D0CKdG6B.js → tensor2d-DqFGNs_K.js} +1 -1
package/dist/{tfjs_backend-Bzl2SrRo.js → tfjs_backend-Cug-PH75.js} +826 -1015
package/dist/training/AdamExt.js +1 -1
package/dist/training/DatasetBuilder.js +3 -3
package/dist/training/FullTrainer.js +1 -1
package/dist/training/Trainer.js +5 -5
package/dist/training/sparseCrossEntropy.js +4 -4
package/dist/utilities/dummy.js +2 -2
package/dist/utilities/generate.js +3 -3
package/dist/utilities/load.js +1 -1
package/dist/utilities/profile.js +1 -1
package/dist/utilities/weights.js +2 -2
package/dist/{variable-BS4AKqNU.js → variable-LJT9Ld63.js} +1 -1
package/dist/{zeros-CmJFiC84.js → zeros-dnQxFgAD.js} +1 -1
package/package.json +1 -1
package/dist/MLP-KHhikThU.js +0 -83

package/dist/training/AdamExt.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { A as r, b as c, f as h, s as g, e as o } from "../index-XjBAhiFO.js";
+import { A as r, b as c, f as h, s as g, e as o } from "../index-C4JCoBvj.js";
 class u extends r {
   constructor(t, e, s, a, i) {
     super(t, e, s, a), this.config = i, this.startLearningRate = t;

package/dist/training/DatasetBuilder.js CHANGED Viewed

@@ -1,7 +1,7 @@
-import { a9 as $, aa as m, ab as M, a as R, ac as f, ad as v, ae as z, j as _, t as x } from "../index-XjBAhiFO.js";
+import { ae as $, ac as m, af as M, a as R, ag as f, ah as v, ai as z, j as _, t as x } from "../index-C4JCoBvj.js";
 import { s as E } from "../index-C4L8Cm77.js";
-import { s as P } from "../stack-DaIMO5iX.js";
-import { t as D } from "../tensor-Cu1fU7H7.js";
+import { s as P } from "../stack-D1YjmgKN.js";
+import { t as D } from "../tensor-BVeHdl7V.js";
 import "../index-Tf7vU29b.js";
 /**
  * @license

package/dist/training/FullTrainer.js CHANGED Viewed

@@ -1,7 +1,7 @@
 import { generateText as v } from "../utilities/generate.js";
 import L from "./Trainer.js";
 import x from "./Evaluator.js";
-import { a as h } from "../index-XjBAhiFO.js";
+import { a as h } from "../index-C4JCoBvj.js";
 const D = {
   desiredLoss: 0.01,
   logInterval: 1,

package/dist/training/Trainer.js CHANGED Viewed

@@ -1,10 +1,10 @@
 import { DatasetBuilder as d } from "./DatasetBuilder.js";
 import h from "./AdamExt.js";
-import { t as g, v as u, a as o } from "../index-XjBAhiFO.js";
-import { m as y, n as f } from "../norm-BzY929B_.js";
-import { m as S, a as z } from "../moments-CrWRPcR3.js";
-import { m as b } from "../max-DJvEiCAJ.js";
-import { z as n } from "../zeros-CmJFiC84.js";
+import { t as g, v as u, a as o } from "../index-C4JCoBvj.js";
+import { m as y, n as f } from "../norm-CZM380I3.js";
+import { m as S, a as z } from "../moments-CjeIaVdp.js";
+import { m as b } from "../max-CP_9O2Yd.js";
+import { z as n } from "../zeros-dnQxFgAD.js";
 class G {
   constructor(t, s, e = 1e-3) {
     this.tokenizer = s, this.model = t, this.learningRate = e, this.resetOptimizer(), this.datasetBuilder = new d(s, t.config.gpt.blockSize);

package/dist/training/sparseCrossEntropy.js CHANGED Viewed

@@ -1,9 +1,9 @@
 import { gatherSub as L } from "../ops/gatherSub.js";
 import { scatterSub as y } from "../ops/scatterSub.js";
-import { e as u, c as i, z as S, t as f, s as G } from "../index-XjBAhiFO.js";
-import { s as v } from "../softmax-4DOn6cPq.js";
-import { m as z } from "../max-DJvEiCAJ.js";
-import { l as k } from "../log_sum_exp-DJPkVZZn.js";
+import { e as u, c as i, z as S, t as f, s as G } from "../index-C4JCoBvj.js";
+import { s as v } from "../softmax-Cujsg4ay.js";
+import { m as z } from "../max-CP_9O2Yd.js";
+import { l as k } from "../log_sum_exp-BswFnwOb.js";
 function F(a, s) {
   return f(() => {
     const e = a.shape[a.shape.length - 1], o = a.shape.slice(0, -1).reduce((d, c) => d * c, 1), p = a.shape.length > 2 ? a.reshape([o, e]) : a, n = s.shape.length > 1 ? s.reshape([o]).cast("int32") : s.cast("int32"), t = z(p, -1, !0), r = G(p, t), h = k(r, -1);

package/dist/utilities/dummy.js CHANGED Viewed

@@ -1,5 +1,5 @@
-import "../index-XjBAhiFO.js";
-import { z as n } from "../zeros-CmJFiC84.js";
+import "../index-C4JCoBvj.js";
+import { z as n } from "../zeros-dnQxFgAD.js";
 async function a(s) {
   const o = n([1, s.config.gpt.blockSize], "int32"), { logits: t, loss: i } = s.forward(o, void 0, !1);
   await t.data(), t.dispose(), i && i.dispose(), o.dispose();

package/dist/utilities/generate.js CHANGED Viewed

@@ -1,6 +1,6 @@
-import { t as y } from "../index-XjBAhiFO.js";
-import { t as x } from "../tensor2d-D0CKdG6B.js";
-import { c as f } from "../concat-DS_qH7MI.js";
+import { t as y } from "../index-C4JCoBvj.js";
+import { t as x } from "../tensor2d-DqFGNs_K.js";
+import { c as f } from "../concat-CuRsVY-K.js";
 async function A(o, r, a, c, T) {
   if (c <= 0)
     throw new Error("Length must be a positive integer");

package/dist/utilities/load.js CHANGED Viewed

@@ -3,7 +3,7 @@ import { importWeights as b } from "./weights.js";
 import u from "../tokeniser/CharTokeniser.js";
 import F from "../NanoGPTModel.js";
 import { dummyPassAsync as j } from "./dummy.js";
-import { d as T } from "../index-XjBAhiFO.js";
+import { d as T } from "../index-C4JCoBvj.js";
 import E from "../tokeniser/bpe.js";
 async function A(t) {
   const o = await fetch(t);

package/dist/utilities/profile.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { m as s } from "../index-XjBAhiFO.js";
+import { m as s } from "../index-C4JCoBvj.js";
 const m = 1024 * 1024;
 class i {
   log = /* @__PURE__ */ new Map();

package/dist/utilities/weights.js CHANGED Viewed

@@ -1,5 +1,5 @@
-import "../index-XjBAhiFO.js";
-import { t as p } from "../tensor-Cu1fU7H7.js";
+import "../index-C4JCoBvj.js";
+import { t as p } from "../tensor-BVeHdl7V.js";
 function h(n) {
   const e = n.reduce((s, o) => s + o.length, 0), a = new Float32Array(e);
   let t = 0;

package/dist/{variable-BS4AKqNU.js → variable-LJT9Ld63.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { E as i } from "./index-XjBAhiFO.js";
+import { E as i } from "./index-C4JCoBvj.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.

package/dist/{zeros-CmJFiC84.js → zeros-dnQxFgAD.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o as m, h as r, U as l, E as c, V as i, k as p, W as u, n as f } from "./index-XjBAhiFO.js";
+import { o as m, h as r, U as l, E as c, V as i, k as p, W as u, n as f } from "./index-C4JCoBvj.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "name": "@genai-fi/nanogpt",
-    "version": "0.4.1",
+    "version": "0.4.2",
     "type": "module",
     "main": "dist/main.js",
     "types": "dist/main.d.ts",

package/dist/MLP-KHhikThU.js DELETED Viewed

@@ -1,83 +0,0 @@
-import { t as d } from "./index-XjBAhiFO.js";
-import c from "./layers/BaseLayer.js";
-import { E as p, D as l, a as h, r as i } from "./random_width-CMHmdbSu.js";
-/**
- * @license
- * Copyright 2018 Google LLC
- *
- * Use of this source code is governed by an MIT-style
- * license that can be found in the LICENSE file or at
- * https://opensource.org/licenses/MIT.
- * =============================================================================
- */
-function r(s) {
-  return new h(s);
-}
-function u(s) {
-  return new l(s);
-}
-function g(s) {
-  return new p(s);
-}
-class P extends c {
-  cFc;
-  cProj;
-  dropout;
-  index;
-  _trainable = !0;
-  constructor(t, e) {
-    super(e), this.index = t, this.cFc = r({
-      units: e.gpt.mlpFactor * e.gpt.nEmbed,
-      activation: "gelu",
-      useBias: e.gpt.biasInLinear,
-      kernelInitializer: i({
-        mean: 0,
-        stddev: 0.02
-      }),
-      biasInitializer: "zeros",
-      name: `block_${t}_mlp_cFc`
-    }), this.cProj = r({
-      units: e.gpt.nEmbed,
-      useBias: e.gpt.biasInLinear,
-      kernelInitializer: i({
-        mean: 0,
-        stddev: 0.02 / Math.sqrt(2 * e.gpt.nLayer)
-      }),
-      biasInitializer: "zeros",
-      name: `block_${t}_mlp_cProj`
-    }), this.dropout = u({ rate: e.gpt.dropout });
-  }
-  get variables() {
-    return [
-      ...this.cFc.trainableWeights.map((t) => t.read()),
-      ...this.cProj.trainableWeights.map((t) => t.read())
-    ];
-  }
-  get trainable() {
-    return this._trainable;
-  }
-  set trainable(t) {
-    this._trainable = t, this.cFc.trainable = t, this.cProj.trainable = t;
-  }
-  saveWeights(t) {
-    t.set(`block_${this.index}_mlpHidden`, this.cFc.getWeights()), t.set(`block_${this.index}_mlpOut`, this.cProj.getWeights());
-  }
-  loadWeights(t) {
-    this.cFc.setWeights(t.get(`block_${this.index}_mlpHidden`) || []), this.cProj.setWeights(t.get(`block_${this.index}_mlpOut`) || []);
-  }
-  call(t, e = !1) {
-    return d(() => {
-      this.startMemory();
-      const a = this.cFc.apply(t), n = this.cProj.apply(a), o = this.dropout.apply(n, { training: e });
-      return this.endMemory("MLP"), o;
-    });
-  }
-  dispose() {
-    this.cFc.dispose(), this.cProj.dispose(), this.dropout.dispose();
-  }
-}
-export {
-  P as M,
-  u as d,
-  g as e
-};