npm - @genai-fi/nanogpt - Versions diffs - 0.2.8 → 0.2.9 - Mend

@genai-fi/nanogpt 0.2.8 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/dist/{complex-CeoYJn2o.js → complex-Cd8sqiBC.js} +1 -1
package/dist/{index-DQfEAU9u.js → index-Dsg28SG6.js} +304 -299
package/dist/layers/TiedEmbedding.js +21 -21
package/dist/{mat_mul-CuHB58-H.js → mat_mul-BAYDrXvE.js} +3 -3
package/dist/ops/attentionMask.js +40 -20
package/dist/ops/gatherSub.js +2 -2
package/dist/ops/node/sparseCrossEntropy.js +1 -1
package/dist/ops/scatterSub.js +5 -5
package/dist/{stack-C9cTkqpq.js → stack-1o648CP_.js} +5 -5
package/dist/{sum-B-O33dgG.js → sum-NWazHI7f.js} +1 -1
package/dist/training/AdamExt.js +1 -1
package/dist/training/sparseCrossEntropy.js +12 -12
package/package.json +1 -1

package/dist/layers/TiedEmbedding.js CHANGED Viewed

@@ -1,7 +1,7 @@
-import { o as h, c as i, E as o, y as V, D as X, I as Y, F as Z, N as ee, H as te, J as se, K as ne, O as re, Q as ue, g as L, x as ae, T as A, m as ie, U as oe, u as le, b as q, l as C, V as P, w as U, _ as H } from "../index-DQfEAU9u.js";
-import { s as ce, r as f } from "../sum-B-O33dgG.js";
-import { m } from "../mat_mul-CuHB58-H.js";
-import { c as pe } from "../complex-CeoYJn2o.js";
+import { o as h, c as i, E as o, D as V, F as X, I as Y, H as Z, N as ee, J as te, K as se, O as ne, Q as re, T as ue, h as L, y as ae, U as A, m as ie, V as oe, v as le, d as q, n as C, W as P, x as U, _ as H } from "../index-Dsg28SG6.js";
+import { s as ce, r as f } from "../sum-NWazHI7f.js";
+import { m } from "../mat_mul-BAYDrXvE.js";
+import { c as pe } from "../complex-Cd8sqiBC.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -169,7 +169,7 @@ function Me(t) {
   const s = { x: i(t, "x", "relu") };
   return o.runKernel(ne, s);
 }
-const we = /* @__PURE__ */ h({ relu_: Me });
+const We = /* @__PURE__ */ h({ relu_: Me });
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -186,11 +186,11 @@ const we = /* @__PURE__ */ h({ relu_: Me });
  * limitations under the License.
  * =============================================================================
  */
-function We(t) {
+function we(t) {
   const s = { x: i(t, "x", "relu6") };
   return o.runKernel(re, s);
 }
-const ze = /* @__PURE__ */ h({ relu6_: We });
+const ze = /* @__PURE__ */ h({ relu6_: we });
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -273,7 +273,7 @@ function Te(t, e, s, n) {
   if (e === "linear")
     return t;
   if (e === "relu")
-    return we(t);
+    return We(t);
   if (e === "elu")
     return me(t);
   if (e === "relu6")
@@ -310,14 +310,14 @@ function Ne({ a: t, b: e, transposeA: s = !1, transposeB: n = !1, bias: r, activ
   }
   let u = i(t, "a", "fused matMul"), a = i(e, "b", "fused matMul");
   [u, a] = q(u, a);
-  const D = s ? u.shape[u.rank - 2] : u.shape[u.rank - 1], b = n ? a.shape[a.rank - 1] : a.shape[a.rank - 2], w = s ? u.shape[u.rank - 1] : u.shape[u.rank - 2], W = n ? a.shape[a.rank - 2] : a.shape[a.rank - 1], T = u.shape.slice(0, -2), y = a.shape.slice(0, -2), B = C(T), N = C(y);
+  const D = s ? u.shape[u.rank - 2] : u.shape[u.rank - 1], b = n ? a.shape[a.rank - 1] : a.shape[a.rank - 2], W = s ? u.shape[u.rank - 1] : u.shape[u.rank - 2], w = n ? a.shape[a.rank - 2] : a.shape[a.rank - 1], T = u.shape.slice(0, -2), y = a.shape.slice(0, -2), B = C(T), N = C(y);
   L(D === b, () => `Error in fused matMul: inner shapes (${D}) and (${b}) of Tensors with shapes ${u.shape} and ${a.shape} and transposeA=${s} and transposeB=${n} must match.`);
-  const O = P(u.shape.slice(0, -2), a.shape.slice(0, -2)).concat([w, W]), F = s ? f(u, [B, D, w]) : f(u, [B, w, D]), R = n ? f(a, [N, W, b]) : f(a, [N, b, W]);
+  const O = P(u.shape.slice(0, -2), a.shape.slice(0, -2)).concat([W, w]), F = s ? f(u, [B, D, W]) : f(u, [B, W, D]), R = n ? f(a, [N, w, b]) : f(a, [N, b, w]);
   let S;
   r != null && (S = i(r, "bias", "fused matMul"), [S] = q(S, u), P(O, S.shape));
-  let G;
-  l != null && (G = i(l, "prelu weights", "fused matMul"));
-  const I = (x, M) => {
+  let v;
+  l != null && (v = i(l, "prelu weights", "fused matMul"));
+  const G = (x, M) => {
     const [g, $, k, z] = M, d = Ae(f(x, k.shape), k, c);
     let K, _;
     if (!s && !n ? (K = m(d, $, !1, !0), _ = m(g, d, !0, !1)) : !s && n ? (K = m(d, $, !1, !1), _ = m(d, g, !0, !1)) : s && !n ? (K = m($, d, !1, !0), _ = m(g, d, !1, !1)) : (K = m($, d, !0, !0), _ = m(d, g, !0, !0)), r != null) {
@@ -325,24 +325,24 @@ function Ne({ a: t, b: e, transposeA: s = !1, transposeB: n = !1, bias: r, activ
       return [K, _, Q];
     } else
       return [K, _];
-  }, v = {
+  }, I = {
     a: F,
     b: R,
     bias: S,
-    preluActivationWeights: G
+    preluActivationWeights: v
   }, j = { transposeA: s, transposeB: n, activation: c, leakyreluAlpha: p };
   return r == null ? U((M, g, $) => {
     const k = (
       // tslint:disable-next-line: no-unnecessary-type-assertion
-      o.runKernel(H, v, j)
+      o.runKernel(H, I, j)
     );
-    return $([M, g, k]), { value: f(k, O), gradFunc: I };
+    return $([M, g, k]), { value: f(k, O), gradFunc: G };
   })(F, R) : U((M, g, $, k) => {
     const z = (
       // tslint:disable-next-line: no-unnecessary-type-assertion
-      o.runKernel(H, v, j)
+      o.runKernel(H, I, j)
     );
-    return k([M, g, z, $]), { value: f(z, O), gradFunc: I };
+    return k([M, g, z, $]), { value: f(z, O), gradFunc: G };
   })(F, R, S);
 }
 const J = /* @__PURE__ */ h({ fusedMatMul_: Ne });
@@ -369,7 +369,7 @@ class E extends Error {
  * https://opensource.org/licenses/MIT.
  * =============================================================================
  */
-function Ge(t, e, s, n) {
+function ve(t, e, s, n) {
   if (t.rank < 2 || e.rank < 2)
     throw new E(`dot requires both inputs to be rank >= 2 but got x shape = ${t.shape} and y shape = ${e.shape}`);
   if (e.rank >= 3) {
@@ -425,7 +425,7 @@ class Pe {
     return this.tf.gather(this.tiedWeights, e, 0);
   }
   project(e) {
-    return Ge(e, this.tiedWeights.transpose());
+    return ve(e, this.tiedWeights.transpose());
   }
   getWeights() {
     return [this.tiedWeights];

package/dist/{mat_mul-CuHB58-H.js → mat_mul-BAYDrXvE.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o as c, c as s, b as m, E as M, B as p } from "./index-DQfEAU9u.js";
+import { o as c, c as s, d as m, E as M, B as p } from "./index-Dsg28SG6.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -15,13 +15,13 @@ import { o as c, c as s, b as m, E as M, B as p } from "./index-DQfEAU9u.js";
  * limitations under the License.
  * =============================================================================
  */
-function b(e, o, n = !1, l = !1) {
+function f(e, o, n = !1, l = !1) {
   let a = s(e, "a", "matMul"), t = s(o, "b", "matMul");
   [a, t] = m(a, t);
   const r = { a, b: t }, u = { transposeA: n, transposeB: l };
   return M.runKernel(p, r, u);
 }
-const i = /* @__PURE__ */ c({ matMul_: b });
+const i = /* @__PURE__ */ c({ matMul_: f });
 export {
   i as m
 };

package/dist/ops/attentionMask.js CHANGED Viewed

@@ -1,14 +1,14 @@
-import { engine as d } from "@tensorflow/tfjs";
-import { r as k, s as u } from "../index-DQfEAU9u.js";
-import { m as l } from "../mat_mul-CuHB58-H.js";
-class p {
+import { engine as l } from "@tensorflow/tfjs";
+import { r as u, b as k, s as d } from "../index-Dsg28SG6.js";
+import { m as p } from "../mat_mul-BAYDrXvE.js";
+class f {
   variableNames = ["q", "k", "mask"];
   outputShape;
   userCode;
   // enableShapeUniforms = true;
   customUniforms = [{ name: "divisor", type: "float" }];
-  constructor(t, e, n, a) {
-    this.outputShape = [t, e, n, n], this.userCode = `
+  constructor(s, n, e, a) {
+    this.outputShape = [s, n, e, e], this.userCode = `
         void main() {
             ivec4 coords = getOutputCoords(); // [batch, nh, t1, t2]
             int b = coords.x;
@@ -34,29 +34,49 @@ class p {
         `;
   }
 }
-function f(s) {
-  const { q: t, k: e, mask: n } = s.inputs, { divisor: a } = s.attrs, o = s.backend, c = t.shape[0], i = t.shape[2], r = t.shape[1], m = new p(c, r, i, t.shape[3]);
-  return o.runWebGLProgram(m, [t, e, n], "float32", [[a]]);
+function h(t) {
+  const { q: s, k: n, mask: e } = t.inputs, { divisor: a } = t.attrs, o = t.backend, r = s.shape[0], i = s.shape[2], c = s.shape[1], m = new f(r, c, i, s.shape[3]);
+  return o.runWebGLProgram(m, [s, n, e], "float32", [[a]]);
 }
-const h = {
+const v = {
   kernelName: "AttentionMask",
   backendName: "webgl",
-  kernelFunc: f
+  kernelFunc: h
 };
-k(h);
-function b(s) {
-  const { q: t, k: e, mask: n } = s.inputs, { divisor: a } = s.attrs, o = t.shape[2], i = l(t, e, !1, !0).mul(u(a)), r = n.slice([0, 0], [o, o]).expandDims(0).expandDims(0);
-  return i.add(r);
+u(v);
+function b(t) {
+  const { q: s, k: n, mask: e } = t.inputs, { divisor: a } = t.attrs, o = s.shape[2], i = p(s, n, !1, !0).mul(d(a)), c = e.slice([0, 0], [o, o]).expandDims(0).expandDims(0);
+  return i.add(c);
 }
-const v = {
+const M = {
   kernelName: "AttentionMask",
   backendName: "cpu",
   kernelFunc: b
 };
-k(v);
-function C(s, t, e, n) {
-  return d().runKernel("AttentionMask", { q: s, k: t, mask: e }, { divisor: n });
+u(M);
+function w(t, s, n, e) {
+  return l().runKernel("AttentionMask", { q: t, k: s, mask: n }, { divisor: e });
 }
+const g = {
+  kernelName: "AttentionMask",
+  inputsToSave: ["q", "k"],
+  outputsToSave: [],
+  gradFunc: (t, s, n) => {
+    if (Array.isArray(t))
+      throw new Error("Expected dy to be a single Tensor");
+    const [e, a] = s, { divisor: o } = n;
+    return {
+      q: () => t.matMul(a).mul(o),
+      k: () => e.transpose([0, 1, 3, 2]).matMul(t).mul(o).transpose([0, 1, 3, 2]),
+      mask: () => t,
+      divisor: () => {
+        const r = e.matMul(a, !1, !0);
+        return t.mul(r).sum();
+      }
+    };
+  }
+};
+k(g);
 export {
-  C as attentionMask
+  w as attentionMask
 };

package/dist/ops/gatherSub.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import { engine as l } from "@tensorflow/tfjs";
-import { o as g, c as i, E as b, G as d, r as c, a as h } from "../index-DQfEAU9u.js";
-import { r as p, s as f } from "../stack-C9cTkqpq.js";
+import { o as g, c as i, E as b, G as d, r as c, a as h } from "../index-Dsg28SG6.js";
+import { r as p, s as f } from "../stack-1o648CP_.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.

package/dist/ops/node/sparseCrossEntropy.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { r as o } from "../../index-DQfEAU9u.js";
+import { r as o } from "../../index-Dsg28SG6.js";
 function r(e) {
   const { logits: t, labels: n } = e.inputs;
   return e.backend.executeMultipleOutputs("SparseSoftmaxCrossEntropyWithLogits", [], [t, n], 2);

package/dist/ops/scatterSub.js CHANGED Viewed

@@ -1,7 +1,7 @@
 import { engine as $ } from "@tensorflow/tfjs";
-import { j as u, k as S, l as p, E as f, n as E, o as N, c as l, p as y, r as h, a as D, m as x } from "../index-DQfEAU9u.js";
-import { c as m } from "../complex-CeoYJn2o.js";
-import { r as v, s as T } from "../stack-C9cTkqpq.js";
+import { k as u, l as S, n as p, E as f, p as E, o as N, c as l, q as y, r as h, a as D, m as x } from "../index-Dsg28SG6.js";
+import { c as m } from "../complex-Cd8sqiBC.js";
+import { r as v, s as T } from "../stack-1o648CP_.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -142,9 +142,9 @@ const F = {
   kernelFunc: A
 };
 h(F);
-function M(e, t, r) {
+function R(e, t, r) {
   return $().runKernel("EfficientScatterSub", { logits: e, labels: t, dy: r }, {});
 }
 export {
-  M as scatterSub
+  R as scatterSub
 };

package/dist/{stack-C9cTkqpq.js → stack-1o648CP_.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { E as e, R as c, o as f, f as u, g as a, P as i } from "./index-DQfEAU9u.js";
+import { E as e, R as c, o as f, g as u, h as a, P as i } from "./index-Dsg28SG6.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -15,7 +15,7 @@ import { E as e, R as c, o as f, f as u, g as a, P as i } from "./index-DQfEAU9u
  * limitations under the License.
  * =============================================================================
  */
-function l(n, s, t = 1, r = "float32") {
+function h(n, s, t = 1, r = "float32") {
   if (t === 0)
     throw new Error("Cannot have a step of zero");
   const o = { start: n, stop: s, step: t, dtype: r };
@@ -43,8 +43,8 @@ function k(n, s = 0) {
   const r = t, o = { axis: s };
   return e.runKernel(i, r, o);
 }
-const h = /* @__PURE__ */ f({ stack_: k });
+const l = /* @__PURE__ */ f({ stack_: k });
 export {
-  l as r,
-  h as s
+  h as r,
+  l as s
 };

package/dist/{sum-B-O33dgG.js → sum-NWazHI7f.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o, c as a, E as u, h as i, i as p, S as x } from "./index-DQfEAU9u.js";
+import { o, c as a, E as u, i, j as p, S as x } from "./index-Dsg28SG6.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.

package/dist/training/AdamExt.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { A as r, m as c, s as h, a as g, e as o } from "../index-DQfEAU9u.js";
+import { A as r, m as c, s as h, a as g, e as o } from "../index-Dsg28SG6.js";
 class u extends r {
   constructor(t, e, s, a, i) {
     super(t, e, s, a), this.config = i, this.startLearningRate = t;

package/dist/training/sparseCrossEntropy.js CHANGED Viewed

@@ -1,7 +1,7 @@
 import { gatherSub as w } from "../ops/gatherSub.js";
 import { scatterSub as K } from "../ops/scatterSub.js";
-import { o as l, c as d, E as f, M as _, q as z, L as I, t as N, a as E, u as M, v as T, e as m, w as g, x as $, z as S } from "../index-DQfEAU9u.js";
-import { s as F, r as b } from "../sum-B-O33dgG.js";
+import { o as l, c as d, E as f, M as _, t as z, L as I, u as N, a as E, v as M, w as T, e as m, x as g, y as $, z as S } from "../index-Dsg28SG6.js";
+import { s as F, r as b } from "../sum-NWazHI7f.js";
 /**
  * @license
  * Copyright 2017 Google LLC. All Rights Reserved.
@@ -25,7 +25,7 @@ function P(n, s, t) {
     t.indexOf(o) === -1 ? e.push(n[r++]) : e.push(s[c++]);
   return e;
 }
-function q(n, s) {
+function A(n, s) {
   const t = s.map((a) => 1);
   return P(n, t, s);
 }
@@ -45,11 +45,11 @@ function q(n, s) {
  * limitations under the License.
  * =============================================================================
  */
-function A(n, s = null, t = !1) {
+function D(n, s = null, t = !1) {
   const e = { x: d(n, "x", "max") }, r = { reductionIndices: s, keepDims: t };
   return f.runKernel(_, e, r);
 }
-const L = /* @__PURE__ */ l({ max_: A });
+const L = /* @__PURE__ */ l({ max_: D });
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -66,11 +66,11 @@ const L = /* @__PURE__ */ l({ max_: A });
  * limitations under the License.
  * =============================================================================
  */
-function D(n) {
+function O(n) {
   const t = { x: d(n, "x", "exp") };
   return f.runKernel(z, t);
 }
-const O = /* @__PURE__ */ l({ exp_: D });
+const W = /* @__PURE__ */ l({ exp_: O });
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -87,11 +87,11 @@ const O = /* @__PURE__ */ l({ exp_: D });
  * limitations under the License.
  * =============================================================================
  */
-function W(n) {
+function j(n) {
   const t = { x: d(n, "x", "log", "float32") };
   return f.runKernel(I, t);
 }
-const j = /* @__PURE__ */ l({ log_: W });
+const q = /* @__PURE__ */ l({ log_: j });
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -114,9 +114,9 @@ function B(n, s = null, t = !1) {
     e,
     !0
     /* keepDims */
-  ), c = E(a, r), o = O(c), p = F(o, e), u = j(p), i = M(b(r, u.shape), u);
+  ), c = E(a, r), o = W(c), p = F(o, e), u = q(p), i = M(b(r, u.shape), u);
   if (t) {
-    const h = q(i.shape, e);
+    const h = A(i.shape, e);
     return b(i, h);
   }
   return i;
@@ -165,7 +165,7 @@ function ss() {
     (s, t, a) => {
       const e = s.shape[s.shape.length - 1], c = s.shape.slice(0, -1).reduce((h, x) => h * x, 1), o = s.reshape([c, e]), p = t.reshape([c]).cast("int32"), u = R(o, p);
       return a([o, p]), o.dispose(), p.dispose(), { value: u, gradFunc: (h, x) => $(() => {
-        const k = x[0], y = x[1], C = Q(k), G = K(C, y, h), v = S(t);
+        const y = x[0], k = x[1], C = Q(y), G = K(C, k, h), v = S(t);
         return [G.reshape(s.shape), v];
       }) };
     }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "name": "@genai-fi/nanogpt",
-    "version": "0.2.8",
+    "version": "0.2.9",
     "type": "module",
     "main": "dist/main.js",
     "types": "dist/main.d.ts",