npm - @genai-fi/nanogpt - Versions diffs - 0.2.7 → 0.2.9 - Mend

@genai-fi/nanogpt 0.2.7 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/dist/TeachableLLM.js +1 -0
package/dist/{complex-D6Bq1XDf.js → complex-Cd8sqiBC.js} +1 -1
package/dist/{index-D1SlunD-.js → index-Dsg28SG6.js} +304 -299
package/dist/layers/CausalSelfAttention.js +40 -39
package/dist/layers/TiedEmbedding.js +106 -128
package/dist/main.js +15 -14
package/dist/mat_mul-BAYDrXvE.js +27 -0
package/dist/ops/attentionMask.d.ts +2 -0
package/dist/ops/attentionMask.js +82 -0
package/dist/ops/gatherSub.js +2 -2
package/dist/ops/node/sparseCrossEntropy.js +1 -1
package/dist/ops/scatterSub.js +9 -9
package/dist/{stack-DB2YLlAs.js → stack-1o648CP_.js} +5 -5
package/dist/{sum-02UQ5Eaq.js → sum-NWazHI7f.js} +3 -3
package/dist/training/AdamExt.js +1 -1
package/dist/training/sparseCrossEntropy.js +12 -12
package/package.json +1 -1

package/dist/ops/attentionMask.js ADDED Viewed

@@ -0,0 +1,82 @@
+import { engine as l } from "@tensorflow/tfjs";
+import { r as u, b as k, s as d } from "../index-Dsg28SG6.js";
+import { m as p } from "../mat_mul-BAYDrXvE.js";
+class f {
+  variableNames = ["q", "k", "mask"];
+  outputShape;
+  userCode;
+  // enableShapeUniforms = true;
+  customUniforms = [{ name: "divisor", type: "float" }];
+  constructor(s, n, e, a) {
+    this.outputShape = [s, n, e, e], this.userCode = `
+        void main() {
+            ivec4 coords = getOutputCoords(); // [batch, nh, t1, t2]
+            int b = coords.x;
+            int h = coords.y;
+            int t1 = coords.z;
+            int t2 = coords.w;
+            float sum = 0.0;
+            for (int i = 0; i < ${a}; ++i) {
+                float qv = getQ(b, h, t1, i);
+                float kv = getK(b, h, t2, i); // k is transposed on last two dims
+                sum += qv * kv;
+            }
+            // Scale by divisor
+            float scaled = sum * divisor;
+            // Add mask
+            float maskVal = getMask(t1, t2); // mask is [T,T]
+            setOutput(scaled + maskVal);
+        }
+        `;
+  }
+}
+function h(t) {
+  const { q: s, k: n, mask: e } = t.inputs, { divisor: a } = t.attrs, o = t.backend, r = s.shape[0], i = s.shape[2], c = s.shape[1], m = new f(r, c, i, s.shape[3]);
+  return o.runWebGLProgram(m, [s, n, e], "float32", [[a]]);
+}
+const v = {
+  kernelName: "AttentionMask",
+  backendName: "webgl",
+  kernelFunc: h
+};
+u(v);
+function b(t) {
+  const { q: s, k: n, mask: e } = t.inputs, { divisor: a } = t.attrs, o = s.shape[2], i = p(s, n, !1, !0).mul(d(a)), c = e.slice([0, 0], [o, o]).expandDims(0).expandDims(0);
+  return i.add(c);
+}
+const M = {
+  kernelName: "AttentionMask",
+  backendName: "cpu",
+  kernelFunc: b
+};
+u(M);
+function w(t, s, n, e) {
+  return l().runKernel("AttentionMask", { q: t, k: s, mask: n }, { divisor: e });
+}
+const g = {
+  kernelName: "AttentionMask",
+  inputsToSave: ["q", "k"],
+  outputsToSave: [],
+  gradFunc: (t, s, n) => {
+    if (Array.isArray(t))
+      throw new Error("Expected dy to be a single Tensor");
+    const [e, a] = s, { divisor: o } = n;
+    return {
+      q: () => t.matMul(a).mul(o),
+      k: () => e.transpose([0, 1, 3, 2]).matMul(t).mul(o).transpose([0, 1, 3, 2]),
+      mask: () => t,
+      divisor: () => {
+        const r = e.matMul(a, !1, !0);
+        return t.mul(r).sum();
+      }
+    };
+  }
+};
+k(g);
+export {
+  w as attentionMask
+};

package/dist/ops/gatherSub.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import { engine as l } from "@tensorflow/tfjs";
-import { o as g, c as i, E as b, G as d, r as c, a as h } from "../index-D1SlunD-.js";
-import { r as p, s as f } from "../stack-DB2YLlAs.js";
+import { o as g, c as i, E as b, G as d, r as c, a as h } from "../index-Dsg28SG6.js";
+import { r as p, s as f } from "../stack-1o648CP_.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.

package/dist/ops/node/sparseCrossEntropy.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { r as o } from "../../index-D1SlunD-.js";
+import { r as o } from "../../index-Dsg28SG6.js";
 function r(e) {
   const { logits: t, labels: n } = e.inputs;
   return e.backend.executeMultipleOutputs("SparseSoftmaxCrossEntropyWithLogits", [], [t, n], 2);

package/dist/ops/scatterSub.js CHANGED Viewed

@@ -1,7 +1,7 @@
 import { engine as $ } from "@tensorflow/tfjs";
-import { i as u, j as S, k as h, E as f, l as E, o as N, c as l, n as y, r as p, a as D, m as x } from "../index-D1SlunD-.js";
-import { c as m } from "../complex-D6Bq1XDf.js";
-import { r as v, s as T } from "../stack-DB2YLlAs.js";
+import { k as u, l as S, n as p, E as f, p as E, o as N, c as l, q as y, r as h, a as D, m as x } from "../index-Dsg28SG6.js";
+import { c as m } from "../complex-Cd8sqiBC.js";
+import { r as v, s as T } from "../stack-1o648CP_.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -23,7 +23,7 @@ function i(e, t = "float32") {
     const a = i(e, "float32"), o = i(e, "float32");
     return m(a, o);
   }
-  const r = S(h(e), t);
+  const r = S(p(e), t);
   return f.makeTensor(r, e, t);
 }
 /**
@@ -47,7 +47,7 @@ function d(e, t = "float32") {
     const a = d(e, "float32"), o = i(e, "float32");
     return m(a, o);
   }
-  const r = E(h(e), t);
+  const r = E(p(e), t);
   return f.makeTensor(r, e, t);
 }
 function C(e, t, r) {
@@ -131,7 +131,7 @@ const K = {
   backendName: "webgl",
   kernelFunc: P
 };
-p(K);
+h(K);
 function A(e) {
   const { logits: t, labels: r, dy: a } = e.inputs, o = r.shape[0], s = t.shape[1], n = v(0, o, 1, "int32"), c = T([n, r], 1), b = d([o]), g = I(c, b, [o, s]), k = D(t, g), w = a.reshape([o, 1]);
   return x(k, w);
@@ -141,10 +141,10 @@ const F = {
   backendName: "cpu",
   kernelFunc: A
 };
-p(F);
-function M(e, t, r) {
+h(F);
+function R(e, t, r) {
   return $().runKernel("EfficientScatterSub", { logits: e, labels: t, dy: r }, {});
 }
 export {
-  M as scatterSub
+  R as scatterSub
 };

package/dist/{stack-DB2YLlAs.js → stack-1o648CP_.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { E as e, R as c, o as f, d as u, f as a, P as i } from "./index-D1SlunD-.js";
+import { E as e, R as c, o as f, g as u, h as a, P as i } from "./index-Dsg28SG6.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -15,7 +15,7 @@ import { E as e, R as c, o as f, d as u, f as a, P as i } from "./index-D1SlunD-
  * limitations under the License.
  * =============================================================================
  */
-function g(n, s, t = 1, r = "float32") {
+function h(n, s, t = 1, r = "float32") {
   if (t === 0)
     throw new Error("Cannot have a step of zero");
   const o = { start: n, stop: s, step: t, dtype: r };
@@ -43,8 +43,8 @@ function k(n, s = 0) {
   const r = t, o = { axis: s };
   return e.runKernel(i, r, o);
 }
-const h = /* @__PURE__ */ f({ stack_: k });
+const l = /* @__PURE__ */ f({ stack_: k });
 export {
-  g as r,
-  h as s
+  h as r,
+  l as s
 };

package/dist/{sum-02UQ5Eaq.js → sum-NWazHI7f.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o, c as a, E as u, g as p, h as i, S as x } from "./index-D1SlunD-.js";
+import { o, c as a, E as u, i, j as p, S as x } from "./index-Dsg28SG6.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -17,7 +17,7 @@ import { o, c as a, E as u, g as p, h as i, S as x } from "./index-D1SlunD-.js";
  */
 function l(n, t) {
   const s = { x: a(n, "x", "reshape", "string_or_numeric") }, r = { shape: t };
-  return u.runKernel(p, s, r);
+  return u.runKernel(i, s, r);
 }
 const h = /* @__PURE__ */ o({ reshape_: l });
 /**
@@ -38,7 +38,7 @@ const h = /* @__PURE__ */ o({ reshape_: l });
  */
 function m(n, t = null, e = !1) {
   let s = a(n, "x", "sum");
-  s.dtype === "bool" && (s = i(s, "int32"));
+  s.dtype === "bool" && (s = p(s, "int32"));
   const r = { x: s }, c = { axis: t, keepDims: e };
   return u.runKernel(x, r, c);
 }

package/dist/training/AdamExt.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { A as r, m as c, s as h, a as g, e as o } from "../index-D1SlunD-.js";
+import { A as r, m as c, s as h, a as g, e as o } from "../index-Dsg28SG6.js";
 class u extends r {
   constructor(t, e, s, a, i) {
     super(t, e, s, a), this.config = i, this.startLearningRate = t;

package/dist/training/sparseCrossEntropy.js CHANGED Viewed

@@ -1,7 +1,7 @@
 import { gatherSub as w } from "../ops/gatherSub.js";
 import { scatterSub as K } from "../ops/scatterSub.js";
-import { o as l, c as d, E as f, M as _, p as z, L as I, q as N, a as E, t as M, u as T, e as m, v as g, w as $, z as S } from "../index-D1SlunD-.js";
-import { s as F, r as b } from "../sum-02UQ5Eaq.js";
+import { o as l, c as d, E as f, M as _, t as z, L as I, u as N, a as E, v as M, w as T, e as m, x as g, y as $, z as S } from "../index-Dsg28SG6.js";
+import { s as F, r as b } from "../sum-NWazHI7f.js";
 /**
  * @license
  * Copyright 2017 Google LLC. All Rights Reserved.
@@ -25,7 +25,7 @@ function P(n, s, t) {
     t.indexOf(o) === -1 ? e.push(n[r++]) : e.push(s[c++]);
   return e;
 }
-function q(n, s) {
+function A(n, s) {
   const t = s.map((a) => 1);
   return P(n, t, s);
 }
@@ -45,11 +45,11 @@ function q(n, s) {
  * limitations under the License.
  * =============================================================================
  */
-function A(n, s = null, t = !1) {
+function D(n, s = null, t = !1) {
   const e = { x: d(n, "x", "max") }, r = { reductionIndices: s, keepDims: t };
   return f.runKernel(_, e, r);
 }
-const L = /* @__PURE__ */ l({ max_: A });
+const L = /* @__PURE__ */ l({ max_: D });
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -66,11 +66,11 @@ const L = /* @__PURE__ */ l({ max_: A });
  * limitations under the License.
  * =============================================================================
  */
-function D(n) {
+function O(n) {
   const t = { x: d(n, "x", "exp") };
   return f.runKernel(z, t);
 }
-const O = /* @__PURE__ */ l({ exp_: D });
+const W = /* @__PURE__ */ l({ exp_: O });
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -87,11 +87,11 @@ const O = /* @__PURE__ */ l({ exp_: D });
  * limitations under the License.
  * =============================================================================
  */
-function W(n) {
+function j(n) {
   const t = { x: d(n, "x", "log", "float32") };
   return f.runKernel(I, t);
 }
-const j = /* @__PURE__ */ l({ log_: W });
+const q = /* @__PURE__ */ l({ log_: j });
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -114,9 +114,9 @@ function B(n, s = null, t = !1) {
     e,
     !0
     /* keepDims */
-  ), c = E(a, r), o = O(c), p = F(o, e), u = j(p), i = M(b(r, u.shape), u);
+  ), c = E(a, r), o = W(c), p = F(o, e), u = q(p), i = M(b(r, u.shape), u);
   if (t) {
-    const h = q(i.shape, e);
+    const h = A(i.shape, e);
     return b(i, h);
   }
   return i;
@@ -165,7 +165,7 @@ function ss() {
     (s, t, a) => {
       const e = s.shape[s.shape.length - 1], c = s.shape.slice(0, -1).reduce((h, x) => h * x, 1), o = s.reshape([c, e]), p = t.reshape([c]).cast("int32"), u = R(o, p);
       return a([o, p]), o.dispose(), p.dispose(), { value: u, gradFunc: (h, x) => $(() => {
-        const k = x[0], y = x[1], C = Q(k), G = K(C, y, h), v = S(t);
+        const y = x[0], k = x[1], C = Q(y), G = K(C, k, h), v = S(t);
         return [G.reshape(s.shape), v];
       }) };
     }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "name": "@genai-fi/nanogpt",
-    "version": "0.2.7",
+    "version": "0.2.9",
     "type": "module",
     "main": "dist/main.js",
     "types": "dist/main.d.ts",