npm - @genai-fi/nanogpt - Versions diffs - 0.4.4 → 0.5.0 - Mend

@genai-fi/nanogpt 0.4.4 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (116) hide show

package/dist/BaseLayer-BhrMN8JO.js +135 -0
package/dist/Generator.js +44 -41
package/dist/NanoGPTModel.d.ts +12 -16
package/dist/NanoGPTModel.js +128 -138
package/dist/{Reshape-CiAY8ltP.js → Reshape-BE5rA4rT.js} +8 -8
package/dist/TeachableLLM.js +8 -5
package/dist/{TiedEmbedding-DznFwzcB.js → TiedEmbedding-DsDRvLB0.js} +751 -768
package/dist/{axis_util-QP0LdI1v.js → axis_util-97KkkyRQ.js} +1 -1
package/dist/broadcast_to-CMlkG8NS.js +44 -0
package/dist/{concat-DvWM7HGZ.js → concat-Cxbo2sOz.js} +3 -3
package/dist/{dropout-DFEXTPV0.js → dropout-kbDY39Ci.js} +1 -1
package/dist/{gather-C5D8PxwA.js → gather-Bxe1Qip8.js} +4 -4
package/dist/{gpgpu_math-CUzjlO9A.js → gpgpu_math-C0zyxKFi.js} +1 -1
package/dist/{index--6vO-cOz.js → index-iNhkcAEQ.js} +82 -82
package/dist/{kernel_funcs_utils-C6YBCuOt.js → kernel_funcs_utils-C4eIk4fE.js} +20 -20
package/dist/layers/BaseLayer.d.ts +28 -4
package/dist/layers/BaseLayer.js +3 -16
package/dist/layers/CausalSelfAttention.d.ts +22 -24
package/dist/layers/CausalSelfAttention.js +73 -127
package/dist/layers/MLP.d.ts +8 -15
package/dist/layers/MLP.js +43 -81
package/dist/layers/RMSNorm.d.ts +5 -11
package/dist/layers/RMSNorm.js +13 -29
package/dist/layers/RoPECache.js +14 -12
package/dist/layers/TiedEmbedding.d.ts +6 -16
package/dist/layers/TiedEmbedding.js +5 -5
package/dist/layers/TransformerBlock.d.ts +12 -16
package/dist/layers/TransformerBlock.js +20 -41
package/dist/{log_sum_exp-CiEy1aUe.js → log_sum_exp-CkumwesB.js} +11 -11
package/dist/main.js +22 -19
package/dist/{mat_mul-BEHRPMh0.js → mat_mul-D0SifYfJ.js} +3 -3
package/dist/{max-BUShNgfh.js → max-CYaAjEEp.js} +3 -3
package/dist/{moments-DYOHXoRV.js → moments-B06NlR_V.js} +6 -6
package/dist/{norm-DSva3hI3.js → norm-D3676xIo.js} +7 -7
package/dist/{ones-D6kB8bdY.js → ones-BIeFnPHR.js} +2 -2
package/dist/ops/appendCache.js +4 -4
package/dist/ops/attentionMask.d.ts +1 -1
package/dist/ops/attentionMask.js +4 -4
package/dist/ops/cpu/appendCache.js +2 -2
package/dist/ops/cpu/attentionMask.js +14 -15
package/dist/ops/cpu/fusedSoftmax.js +2 -2
package/dist/ops/cpu/gatherSub.js +5 -5
package/dist/ops/cpu/gelu.js +1 -1
package/dist/ops/cpu/matMulGelu.js +1 -1
package/dist/ops/cpu/matMulMul.d.ts +1 -0
package/dist/ops/cpu/matMulMul.js +17 -0
package/dist/ops/cpu/mulDropout.js +1 -1
package/dist/ops/cpu/normRMS.d.ts +1 -0
package/dist/ops/cpu/normRMS.js +39 -0
package/dist/ops/cpu/qkv.js +3 -3
package/dist/ops/cpu/rope.js +5 -5
package/dist/ops/cpu/scatterSub.js +8 -8
package/dist/ops/fusedSoftmax.js +1 -1
package/dist/ops/gatherSub.js +1 -1
package/dist/ops/gelu.js +1 -1
package/dist/ops/grads/attentionMask.js +13 -9
package/dist/ops/grads/fusedSoftmax.js +12 -9
package/dist/ops/grads/gelu.js +1 -1
package/dist/ops/grads/matMulGelu.js +1 -1
package/dist/ops/grads/normRMS.d.ts +2 -0
package/dist/ops/grads/normRMS.js +20 -0
package/dist/ops/grads/qkv.js +19 -9
package/dist/ops/grads/rope.js +1 -1
package/dist/ops/matMulGelu.js +1 -1
package/dist/ops/matMulMul.d.ts +2 -0
package/dist/ops/matMulMul.js +9 -0
package/dist/ops/mulDrop.js +1 -1
package/dist/ops/node/sparseCrossEntropy.js +1 -1
package/dist/ops/normRMS.d.ts +2 -0
package/dist/ops/normRMS.js +10 -0
package/dist/ops/qkv.js +1 -1
package/dist/ops/scatterSub.js +1 -1
package/dist/ops/webgl/appendCache.js +1 -1
package/dist/ops/webgl/attentionMask.js +13 -12
package/dist/ops/webgl/fusedSoftmax.js +43 -40
package/dist/ops/webgl/gatherSub.js +1 -1
package/dist/ops/webgl/gelu.js +2 -2
package/dist/ops/webgl/matMulGelu.d.ts +3 -2
package/dist/ops/webgl/matMulGelu.js +77 -75
package/dist/ops/webgl/matMulMul.d.ts +14 -0
package/dist/ops/webgl/matMulMul.js +28 -0
package/dist/ops/webgl/mulDropout.js +1 -1
package/dist/ops/webgl/normRMS.d.ts +1 -0
package/dist/ops/webgl/normRMS.js +86 -0
package/dist/ops/webgl/qkv.js +1 -1
package/dist/ops/webgl/rope.js +1 -1
package/dist/ops/webgl/scatterSub.js +1 -1
package/dist/ops-ObfXLHYQ.js +1269 -0
package/dist/{range-C_vpUjBu.js → range-BsFU-SNG.js} +1 -1
package/dist/{reshape-z51Eu-re.js → reshape-DxTPgnwL.js} +3 -3
package/dist/{sin-H567uayl.js → sin-BOX-JVAj.js} +5 -5
package/dist/slice_util-D-kaD4ZV.js +49 -0
package/dist/{softmax-Dsxflvdl.js → softmax-BjsptB07.js} +2 -2
package/dist/{split-B_k_jwud.js → split-BCbrzthj.js} +4 -4
package/dist/{stack-CmqSdsfs.js → stack--cqr9Dgc.js} +2 -2
package/dist/{sum-DdkDf2MG.js → sum-B_92TaHD.js} +5 -5
package/dist/{tensor-BGYi41cj.js → tensor-CfiPXsW4.js} +1 -1
package/dist/{tensor2d-DUr_htjt.js → tensor2d-tSxWdFMH.js} +1 -1
package/dist/tfjs_backend-NucKez4s.js +1010 -0
package/dist/training/AdamExt.js +1 -1
package/dist/training/DatasetBuilder.js +44 -44
package/dist/training/Evaluator.js +6 -6
package/dist/training/FullTrainer.js +1 -1
package/dist/training/Trainer.js +7 -7
package/dist/training/sparseCrossEntropy.js +4 -4
package/dist/utilities/dummy.js +10 -10
package/dist/utilities/generate.js +3 -3
package/dist/utilities/load.js +1 -1
package/dist/utilities/profile.js +1 -1
package/dist/utilities/save.js +10 -8
package/dist/utilities/weights.js +2 -2
package/dist/{zeros-8xl-W2DC.js → zeros-NMYTayy7.js} +3 -3
package/package.json +1 -1
package/dist/slice_util-BdhYwFY_.js +0 -90
package/dist/tfjs_backend-DuKis_xG.js +0 -2271
package/dist/variable-BJTZ3jOy.js +0 -23

package/dist/ops/grads/fusedSoftmax.js CHANGED Viewed

@@ -1,17 +1,20 @@
-import { g as p, b as m, s as d } from "../../index--6vO-cOz.js";
-import { mulDrop as c } from "../mulDrop.js";
-import { s as f } from "../../sum-DdkDf2MG.js";
-const g = {
+import { h as d, b as u, s as f } from "../../index-iNhkcAEQ.js";
+import { mulDrop as l } from "../mulDrop.js";
+import { s as g } from "../../sum-B_92TaHD.js";
+const T = {
   kernelName: "FusedSoftmax",
   outputsToSave: [!0],
-  gradFunc: (s, a, u) => {
-    const [o] = a, { dim: i, dropoutRate: t, seed: r } = u, n = !0, e = t && r ? c(s, o, t, r) : m(s, o);
+  gradFunc: (o, i, n) => {
+    const [s] = i, { dim: a, dropoutRate: t, seed: e } = n, p = !0, r = t && e ? l(o, s, t, e) : u(o, s);
     return {
-      logits: () => d(e, m(f(e, [i], n), o))
+      logits: () => {
+        const m = g(r, [a], p), c = u(m, s);
+        return m.dispose(), f(r, c);
+      }
     };
   }
 };
-p(g);
+d(T);
 export {
-  g as softmaxGradConfig
+  T as softmaxGradConfig
 };

package/dist/ops/grads/gelu.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { g as t, e as n } from "../../index--6vO-cOz.js";
+import { h as t, e as n } from "../../index-iNhkcAEQ.js";
 import "../cpu/gelu.js";
 import "../webgl/gelu.js";
 const o = {

package/dist/ops/grads/matMulGelu.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { g as a, e as o } from "../../index--6vO-cOz.js";
+import { h as a, e as o } from "../../index-iNhkcAEQ.js";
 function s(e, n, r) {
   return o().runKernel("MatMulGeluGrad", { dy: e, x: n, kernel: r });
 }

package/dist/ops/grads/normRMS.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ import { GradConfig } from '@tensorflow/tfjs-core';
2	+ export declare const normRMSGradConfig: GradConfig;

package/dist/ops/grads/normRMS.js ADDED Viewed

@@ -0,0 +1,20 @@
+import { h as t, e as g } from "../../index-iNhkcAEQ.js";
+function s(r, a, n) {
+  return g().runKernel("RMSNormGrad", { dy: r, x: a, gamma: n });
+}
+const u = {
+  kernelName: "RMSNorm",
+  inputsToSave: ["x", "gamma"],
+  outputsToSave: [],
+  gradFunc: (r, a) => {
+    const [n, e] = a, [m, o] = s(r, n, e);
+    return {
+      x: () => m,
+      gamma: () => o
+    };
+  }
+};
+t(u);
+export {
+  u as normRMSGradConfig
+};

package/dist/ops/grads/qkv.js CHANGED Viewed

@@ -1,20 +1,30 @@
-import { g as v } from "../../index--6vO-cOz.js";
-const g = {
+import { h as Q } from "../../index-iNhkcAEQ.js";
+const V = {
   kernelName: "QKV",
   inputsToSave: ["x", "kernel"],
   outputsToSave: [],
-  gradFunc: (k, m) => {
-    const [x, K, f] = k, [c, r] = m, [t, s, e] = c.shape, d = x.transpose([0, 2, 1, 3]).reshape([t * s, e]), l = K.transpose([0, 2, 1, 3]).reshape([t * s, e]), u = f.transpose([0, 2, 1, 3]).reshape([t * s, e]), i = r.slice([0, 0], [e, e]), h = r.slice([0, e], [e, e]), M = r.slice([0, 2 * e], [e, e]);
+  gradFunc: (x, K) => {
+    const [f, h, M] = x, [p, l] = K, [t, n, e] = p.shape, i = f.transpose([0, 2, 1, 3]).reshape([t * n, e]), u = h.transpose([0, 2, 1, 3]).reshape([t * n, e]), k = M.transpose([0, 2, 1, 3]).reshape([t * n, e]);
     return {
       x: () => {
-        const n = d.matMul(i, !1, !0), a = l.matMul(h, !1, !0), o = u.matMul(M, !1, !0);
-        return n.add(a).add(o).reshape([t, s, e]);
+        const s = l.slice([0, 0], [e, e]), o = i.matMul(s, !1, !0);
+        s.dispose();
+        const d = l.slice([0, e], [e, e]), r = u.matMul(d, !1, !0);
+        d.dispose();
+        const a = o.add(r);
+        o.dispose(), r.dispose();
+        const c = l.slice([0, 2 * e], [e, e]), m = k.matMul(c, !1, !0);
+        c.dispose();
+        const v = a.add(m).reshape([t, n, e]);
+        return a.dispose(), m.dispose(), v;
       },
       kernel: () => {
-        const n = c.reshape([t * s, e]), a = n.matMul(d, !0, !1), o = n.matMul(l, !0, !1), p = n.matMul(u, !0, !1);
-        return a.concat(o, 1).concat(p, 1);
+        const s = p.reshape([t * n, e]), o = s.matMul(i, !0, !1), d = s.matMul(u, !0, !1), r = o.concat(d, 1);
+        o.dispose(), d.dispose();
+        const a = s.matMul(k, !0, !1), c = r.concat(a, 1);
+        return r.dispose(), a.dispose(), s.dispose(), c;
       }
     };
   }
 };
-v(g);
+Q(V);

package/dist/ops/grads/rope.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { g as a, e as i } from "../../index--6vO-cOz.js";
+import { h as a, e as i } from "../../index-iNhkcAEQ.js";
 function p(n, e, s, o) {
   return i().runKernel("Rope", { x: n, sin: e, cos: s }, { pastLen: o });
 }

package/dist/ops/matMulGelu.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { e as u } from "../index--6vO-cOz.js";
+import { e as u } from "../index-iNhkcAEQ.js";
 import "./cpu/matMulGelu.js";
 import "./webgl/matMulGelu.js";
 import "./grads/matMulGelu.js";

package/dist/ops/matMulMul.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ import { Tensor } from '@tensorflow/tfjs-core';
2	+ export declare function matMulMul(x: Tensor, kernel: Tensor, y: Tensor, transposeA?: boolean, transposeB?: boolean): Tensor;

package/dist/ops/matMulMul.js ADDED Viewed

@@ -0,0 +1,9 @@
+import { e as u } from "../index-iNhkcAEQ.js";
+import "./cpu/matMulMul.js";
+import "./webgl/matMulMul.js";
+function m(e, r, t, l = !1, n = !1) {
+  return u().runKernel("MatMulMul", { x: e, kernel: r, y: t }, { transposeA: l, transposeB: n });
+}
+export {
+  m as matMulMul
+};

package/dist/ops/mulDrop.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { e as t } from "../index--6vO-cOz.js";
+import { e as t } from "../index-iNhkcAEQ.js";
 import "./cpu/mulDropout.js";
 import "./webgl/mulDropout.js";
 function m(r, o, e, n) {

package/dist/ops/node/sparseCrossEntropy.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { r as o } from "../../index--6vO-cOz.js";
+import { r as o } from "../../index-iNhkcAEQ.js";
 function r(e) {
   const { logits: t, labels: n } = e.inputs;
   return e.backend.executeMultipleOutputs("SparseSoftmaxCrossEntropyWithLogits", [], [t, n], 2);

package/dist/ops/normRMS.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ import { Tensor } from '@tensorflow/tfjs-core';
2	+ export declare function normRMS(x: Tensor, gamma: Tensor): Tensor;

package/dist/ops/normRMS.js ADDED Viewed

@@ -0,0 +1,10 @@
+import { e as n } from "../index-iNhkcAEQ.js";
+import "./cpu/normRMS.js";
+import "./webgl/normRMS.js";
+import "./grads/normRMS.js";
+function p(r, o) {
+  return n().runKernel("RMSNorm", { x: r, gamma: o });
+}
+export {
+  p as normRMS
+};

package/dist/ops/qkv.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { e as o } from "../index--6vO-cOz.js";
+import { e as o } from "../index-iNhkcAEQ.js";
 import "./cpu/qkv.js";
 import "./webgl/qkv.js";
 import "./grads/qkv.js";

package/dist/ops/scatterSub.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { e as i } from "../index--6vO-cOz.js";
+import { e as i } from "../index-iNhkcAEQ.js";
 import "./cpu/scatterSub.js";
 import "./webgl/scatterSub.js";
 function c(t, r, e) {

package/dist/ops/webgl/appendCache.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { r as p } from "../../index--6vO-cOz.js";
+import { r as p } from "../../index-iNhkcAEQ.js";
 class m {
   variableNames = ["cache", "item"];
   outputShape;

package/dist/ops/webgl/attentionMask.js CHANGED Viewed

@@ -1,14 +1,15 @@
-import { r as h } from "../../index--6vO-cOz.js";
-class l {
+import { r as m } from "../../index-iNhkcAEQ.js";
+class h {
   variableNames = ["q", "k"];
   outputShape;
   userCode;
   customUniforms = [
     { name: "divisor", type: "float" },
-    { name: "pastLen", type: "int" }
+    { name: "pastLen", type: "int" },
+    { name: "inf", type: "float" }
   ];
-  constructor(t, s, e, n, a) {
-    this.outputShape = [t, s, e, n], this.userCode = `
+  constructor(t, e, s, n, a) {
+    this.outputShape = [t, e, s, n], this.userCode = `
         void main() {
             ivec4 coords = getOutputCoords(); // [batch, nh, t1, t2]
             int b = coords.x;
@@ -27,18 +28,18 @@ class l {
             float scaled = sum * divisor;
             // Mask out future positions
-            setOutput((t2 > t1 + pastLen) ? -1.0/0.0 : scaled);
+            setOutput((t2 > t1 + pastLen) ? inf : scaled);
         }
         `;
   }
 }
-function m(o) {
-  const { q: t, k: s } = o.inputs, { divisor: e, pastLen: n } = o.attrs, a = o.backend, i = t.shape[0], r = t.shape[2], c = s.shape[2], u = t.shape[1], p = t.shape[3], d = new l(i, u, r, c, p);
-  return a.runWebGLProgram(d, [t, s], "float32", [[e], [n]]);
+function l(o) {
+  const { q: t, k: e } = o.inputs, { divisor: s, pastLen: n } = o.attrs, a = o.backend, i = t.shape[0], r = t.shape[2], c = e.shape[2], u = t.shape[1], p = t.shape[3], d = new h(i, u, r, c, p);
+  return a.runWebGLProgram(d, [t, e], "float32", [[s], [n], [Number.NEGATIVE_INFINITY]]);
 }
-const k = {
+const f = {
   kernelName: "AttentionMask",
   backendName: "webgl",
-  kernelFunc: m
+  kernelFunc: l
 };
-h(k);
+m(f);

package/dist/ops/webgl/fusedSoftmax.js CHANGED Viewed

@@ -1,10 +1,11 @@
-import { a$ as qt, n as y, O as Ct, ac as J, b0 as nt, b1 as It, b2 as pt, b3 as ft, b4 as kt, Z as ot, ag as B, b5 as q, b6 as Ut, Q as Gt, j as jt, t as Zt, b7 as dt, ao as ct, b8 as tt, $ as ut, b9 as Bt, L as Ht, ba as Kt, r as Xt } from "../../index--6vO-cOz.js";
-import { f as $t, a as Qt, g as yt, b as Yt, c as Jt } from "../../kernel_funcs_utils-C6YBCuOt.js";
-import { c as mt, g as Tt, a as Vt, b as Mt, e as wt } from "../../axis_util-QP0LdI1v.js";
-import { b as te, i as ee, c as ne } from "../../slice_util-BdhYwFY_.js";
-import { r as oe } from "../../reshape-z51Eu-re.js";
+import { a$ as qt, p as y, N as Ct, ad as Q, b0 as nt, b1 as It, b2 as pt, b3 as ft, b4 as kt, Z as ot, ah as B, b5 as q, b6 as Ut, O as Gt, k as jt, t as Zt, b7 as dt, ao as ct, b8 as tt, a1 as ut, b9 as Bt, K as Ht, ba as Kt, r as Xt } from "../../index-iNhkcAEQ.js";
+import { f as $t, a as Yt, g as yt, b as Jt, c as Qt } from "../../kernel_funcs_utils-C4eIk4fE.js";
+import { c as mt, g as Tt, a as Vt, b as Mt, e as wt } from "../../axis_util-97KkkyRQ.js";
+import { b as te } from "../../broadcast_to-CMlkG8NS.js";
+import { r as ee } from "../../reshape-DxTPgnwL.js";
+import { i as ne, c as oe } from "../../slice_util-D-kaD4ZV.js";
 import { g as se } from "../../_commonjsHelpers-ByX85dGu.js";
-import { r as st } from "../../Reshape-CiAY8ltP.js";
+import { r as st } from "../../Reshape-BE5rA4rT.js";
 function re(t, e) {
   for (var n = 0; n < e.length; n++) {
     const o = e[n];
@@ -654,20 +655,20 @@ const _t = /* @__PURE__ */ se(Lt), ae = /* @__PURE__ */ re({
  * limitations under the License.
  * =============================================================================
  */
-const Y = (
+const J = (
   // tslint:disable-next-line
   _t || ae
 );
 function lt(t) {
-  return Y.fromString(t, !0, 16);
+  return J.fromString(t, !0, 16);
 }
-const Nt = lt("c3a5c85c97cb3127"), Q = lt("b492b66fbe98f273"), W = lt("9ae16a3b2f90404f");
+const Nt = lt("c3a5c85c97cb3127"), Y = lt("b492b66fbe98f273"), W = lt("9ae16a3b2f90404f");
 function gt(t) {
   return t.xor(t.shru(47));
 }
 function At(t, e, n) {
   const o = t.slice(e, e + n);
-  return Y.fromBytes(Array.from(o), !0, !0);
+  return J.fromBytes(Array.from(o), !0, !0);
 }
 function R(t, e) {
   return At(t, e, 8);
@@ -708,7 +709,7 @@ function le(t, e = t.length) {
   return W;
 }
 function ce(t, e = t.length) {
-  const n = W.add(e * 2), o = R(t, 0).mul(Q), s = R(t, 8), r = R(t, e - 8).mul(n), a = R(t, e - 16).mul(W);
+  const n = W.add(e * 2), o = R(t, 0).mul(Y), s = R(t, 8), r = R(t, e - 8).mul(n), a = R(t, e - 16).mul(W);
   return X(A(o.add(s), 43).add(A(r, 30)).add(a), o.add(A(s.add(W), 18)).add(r), n);
 }
 function he(t, e = t.length) {
@@ -716,19 +717,19 @@ function he(t, e = t.length) {
   return X(A(c.add(h), 43).add(A(f, 30)).add(w), c.add(A(h.add(o), 18)).add(f), n);
 }
 function fe(t, e = t.length) {
-  const n = Y.fromNumber(81, !0);
+  const n = J.fromNumber(81, !0);
   if (e <= 32)
     return e <= 16 ? le(t, e) : ce(t, e);
   if (e <= 64)
     return he(t, e);
-  let o = n, s = n.mul(Q).add(113), r = gt(s.mul(W).add(113)).mul(W), a = [Y.UZERO, Y.UZERO], i = [Y.UZERO, Y.UZERO];
+  let o = n, s = n.mul(Y).add(113), r = gt(s.mul(W).add(113)).mul(W), a = [J.UZERO, J.UZERO], i = [J.UZERO, J.UZERO];
   o = o.mul(W).add(R(t, 0));
   let u = 0;
   const c = (e - 1 >> 6) * 64, h = c + (e - 1 & 63) - 63;
   do
-    o = A(o.add(s).add(a[0]).add(R(t, u + 8)), 37).mul(Q), s = A(s.add(a[1]).add(R(t, u + 48)), 42).mul(Q), o = o.xor(i[1]), s = s.add(a[0]).add(R(t, u + 40)), r = A(r.add(i[0]), 33).mul(Q), a = it(t, u, a[1].mul(Q), o.add(i[0])), i = it(t, u + 32, r.add(i[1]), s.add(R(t, u + 16))), [r, o] = [o, r], u += 64;
+    o = A(o.add(s).add(a[0]).add(R(t, u + 8)), 37).mul(Y), s = A(s.add(a[1]).add(R(t, u + 48)), 42).mul(Y), o = o.xor(i[1]), s = s.add(a[0]).add(R(t, u + 40)), r = A(r.add(i[0]), 33).mul(Y), a = it(t, u, a[1].mul(Y), o.add(i[0])), i = it(t, u + 32, r.add(i[1]), s.add(R(t, u + 16))), [r, o] = [o, r], u += 64;
   while (u !== c);
-  const f = Q.add(r.and(255).shl(1));
+  const f = Y.add(r.and(255).shl(1));
   return u = h, i[0] = i[0].add(e - 1 & 63), a[0] = a[0].add(i[0]), i[0] = i[0].add(a[0]), o = A(o.add(s).add(a[0]).add(R(t, u + 8)), 37).mul(f), s = A(s.add(a[1]).add(R(t, u + 48)), 42).mul(f), o = o.xor(i[1].mul(9)), s = s.add(a[0].mul(9).add(R(t, u + 40))), r = A(r.add(i[0]), 33).mul(f), a = it(t, u, a[1].mul(f), o.add(i[0])), i = it(t, u + 32, r.add(i[1]), s.add(R(t, u + 16))), [r, o] = [o, r], X(X(a[0], i[0], f).add(gt(s).mul(Nt)).add(r), X(a[1], i[1], f).add(o), f);
 }
 /**
@@ -954,7 +955,7 @@ function Ve(t) {
  */
 function C(t) {
   return (e, n, o, s, r) => {
-    const a = Ct(e, n), i = a.length, u = J(a), c = y(a), h = nt(r, c), f = e.length, w = n.length, p = J(e), m = J(n), b = It(e, a), d = It(n, a);
+    const a = Ct(e, n), i = a.length, u = Q(a), c = y(a), h = nt(r, c), f = e.length, w = n.length, p = Q(e), m = Q(n), b = It(e, a), d = It(n, a);
     if (b.length + d.length === 0)
       for (let g = 0; g < h.length; ++g)
         h[g] = t(o[g % o.length], s[g % s.length]);
@@ -1415,7 +1416,7 @@ const Xe = K((t) => Math.log(t));
  * limitations under the License.
  * =============================================================================
  */
-function Qe(t, e, n, o) {
+function Ye(t, e, n, o) {
   const s = nt(o, y(n));
   for (let r = 0; r < s.length; ++r) {
     const a = r * e;
@@ -1444,7 +1445,7 @@ function Qe(t, e, n, o) {
  * limitations under the License.
  * =============================================================================
  */
-const Ye = C((t, e) => Math.max(t, e));
+const Je = C((t, e) => Math.max(t, e));
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -1461,7 +1462,7 @@ const Ye = C((t, e) => Math.max(t, e));
  * limitations under the License.
  * =============================================================================
  */
-const Je = C((t, e) => Math.min(t, e));
+const Qe = C((t, e) => Math.min(t, e));
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -1533,7 +1534,7 @@ const en = C((t, e) => t !== e ? 1 : 0);
  * =============================================================================
  */
 function nn(t, e, n, o, s) {
-  const r = e.length, a = y(e), i = J(e), u = J(s), c = nt(n, y(s));
+  const r = e.length, a = y(e), i = Q(e), u = Q(s), c = nt(n, y(s));
   for (let h = 0; h < a; ++h) {
     const f = pt(h, r, i), w = new Array(f.length);
     for (let m = 0; m < w.length; m++)
@@ -1589,7 +1590,7 @@ function on(t, e, n, o) {
 function sn(t, e, n) {
   t.forEach((o, s) => {
     if (o < 0 || o >= n) {
-      const r = pt(s, e.length, J(e)).join(",");
+      const r = pt(s, e.length, Q(e)).join(",");
       throw new Error(`indices[${r}] = ${o} is not in [0, ${n})`);
     }
   });
@@ -1944,7 +1945,7 @@ class at {
     if (h.length !== u && h.length !== 1) {
       const m = this.defaultValueShape;
       Zt(() => {
-        const b = oe(h, m);
+        const b = ee(h, m);
         h = te(b, i).dataSync();
       });
     }
@@ -2109,9 +2110,9 @@ const wn = K((t) => 1 / (1 + Math.exp(-t)));
  * =============================================================================
  */
 function In(t, e, n, o, s) {
-  const r = ee(o, e, n), a = y(n), i = J(o);
+  const r = ne(o, e, n), a = y(n), i = Q(o);
   if (r) {
-    const f = ne(e, i);
+    const f = oe(e, i);
     return s === "string" ? t.slice(f, f + a) : t.subarray(f, f + a);
   }
   const u = s === "string" ? $t(t) : t, c = B(o, s, u), h = B(n, s);
@@ -2119,7 +2120,7 @@ function In(t, e, n, o, s) {
     const w = h.indexToLoc(f), p = w.map((m, b) => m + e[b]);
     h.set(c.get(...p), ...w);
   }
-  return s === "string" ? Qt(h.values) : h.values;
+  return s === "string" ? Yt(h.values) : h.values;
 }
 /**
  * @license
@@ -2790,9 +2791,9 @@ const An = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
   lessImpl: Be,
   linSpaceImpl: Ke,
   logImpl: Xe,
-  maxImpl: Qe,
-  maximumImpl: Ye,
-  minimumImpl: Je,
+  maxImpl: Ye,
+  maximumImpl: Je,
+  minimumImpl: Qe,
   multiplyImpl: Pt,
   negImpl: tn,
   notEqualImpl: en,
@@ -3170,7 +3171,7 @@ class Un {
       o[h] = e[n[h]];
     if (this.outputShape = o, this.rank = o.length, this.rank > 6)
       throw Error(`Packed transpose for rank ${this.rank} is not yet supported.`);
-    const s = yt(this.rank), r = Yt("rc", this.rank), a = new Array(this.rank);
+    const s = yt(this.rank), r = Jt("rc", this.rank), a = new Array(this.rank);
     for (let h = 0; h < n.length; h++)
       a[n[h]] = r[h];
     const i = `vec2(${a.slice(-2).join()})`, u = `++${r[this.rank - 1]} < ${o[this.rank - 1]}`, c = `getChannel(getA(${a.join()}), ${i})`;
@@ -3348,7 +3349,7 @@ return a / b;`, Hn = `
   }
   return result;
-`, Kn = Jt({ opSnippet: Bn, packedOpSnippet: Hn, checkOutOfBounds: !0 });
+`, Kn = Qt({ opSnippet: Bn, packedOpSnippet: Hn, checkOutOfBounds: !0 });
 class Xn {
   variableNames = ["logits", "maxLogits"];
   outputShape;
@@ -3368,7 +3369,7 @@ class Xn {
         `;
   }
 }
-class Qn {
+class Yn {
   variableNames = ["exp", "sum"];
   outputShape;
   userCode;
@@ -3395,7 +3396,7 @@ class Qn {
         `;
   }
 }
-function Yn(t) {
+function Jn(t) {
   const { inputs: e, attrs: n } = t, { logits: o } = e, { dim: s, dropoutRate: r, seed: a } = n, i = t.backend;
   if (!o)
     throw new Error("Error in softmax: input logits is null");
@@ -3403,23 +3404,25 @@ function Yn(t) {
     inputs: { x: o },
     backend: i,
     attrs: { reductionIndices: u, keepDims: !1 }
-  }), h = wt(c.shape, u), f = new Xn(o.shape), w = i.runWebGLProgram(f, [o, c], "float32"), p = Zn({ inputs: { x: w }, backend: i, attrs: { axis: u, keepDims: !1 } }), m = st({ inputs: { x: p }, backend: i, attrs: { shape: h } });
+  }), h = wt(c.shape, u), f = new Xn(o.shape), w = i.runWebGLProgram(f, [o, c], "float32");
+  i.disposeIntermediateTensorInfo(c);
+  const p = Zn({ inputs: { x: w }, backend: i, attrs: { axis: u, keepDims: !1 } }), m = st({ inputs: { x: p }, backend: i, attrs: { shape: h } });
   if (r !== void 0 && r > 0) {
-    const d = new Qn(o.shape), g = i.runWebGLProgram(d, [w, m], "float32", [
+    const d = new Yn(o.shape), g = i.runWebGLProgram(d, [w, m], "float32", [
       [r],
       [a ?? Math.random() * 1e4]
     ]);
-    return i.disposeIntermediateTensorInfo(c), i.disposeIntermediateTensorInfo(w), i.disposeIntermediateTensorInfo(p), i.disposeIntermediateTensorInfo(m), g;
+    return i.disposeIntermediateTensorInfo(w), i.disposeIntermediateTensorInfo(p), i.disposeIntermediateTensorInfo(m), g;
   }
   const b = Kn({ inputs: { a: w, b: m }, backend: i });
-  return i.disposeIntermediateTensorInfo(c), i.disposeIntermediateTensorInfo(w), i.disposeIntermediateTensorInfo(p), i.disposeIntermediateTensorInfo(m), b;
+  return i.disposeIntermediateTensorInfo(w), i.disposeIntermediateTensorInfo(p), i.disposeIntermediateTensorInfo(m), b;
 }
-const Jn = {
+const Qn = {
   kernelName: "FusedSoftmax",
   backendName: "webgl",
-  kernelFunc: Yn
+  kernelFunc: Jn
 };
-Xt(Jn);
+Xt(Qn);
 export {
-  Yn as softmax
+  Jn as softmax
 };

package/dist/ops/webgl/gatherSub.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { r as l } from "../../index--6vO-cOz.js";
+import { r as l } from "../../index-iNhkcAEQ.js";
 class u {
   variableNames = ["labels", "logits", "values"];
   outputShape;

package/dist/ops/webgl/gelu.js CHANGED Viewed

@@ -1,5 +1,5 @@
-import { r as a } from "../../index--6vO-cOz.js";
-import { u as s, C as x } from "../../kernel_funcs_utils-C6YBCuOt.js";
+import { r as a } from "../../index-iNhkcAEQ.js";
+import { u as s, C as x } from "../../kernel_funcs_utils-C4eIk4fE.js";
 const t = 0.7978845608028654, r = 0.044715, c = x + `
     float x3 = x * x * x;
     float inner = x + ${r} * x3;

package/dist/ops/webgl/matMulGelu.d.ts CHANGED Viewed

@@ -7,9 +7,10 @@ type BatchMatMulConfig = {
     transposeA: boolean;
     transposeB: boolean;
     backend: MathBackendWebGL;
-    activationSnippet: string;
+    activationSnippet?: string;
+    multiplier?: TensorInfo;
 };
-export declare function batchMatMulGeluImpl({ a, b, transposeA, transposeB, backend, activationSnippet, }: BatchMatMulConfig): TensorInfo;
+export declare function batchMatMulGeluImpl({ a, b, transposeA, transposeB, backend, activationSnippet, multiplier, }: BatchMatMulConfig): TensorInfo;
 export declare function batchMatMulKernel(args: {
     inputs: {
         x: TensorInfo;