npm - @genai-fi/nanogpt - Versions diffs - 0.6.3 → 0.7.0 - Mend

@genai-fi/nanogpt 0.6.3 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (140) hide show

package/dist/Generator.js +11 -11
package/dist/NanoGPTModel.d.ts +2 -2
package/dist/NanoGPTModel.js +104 -136
package/dist/{RealDiv-BYViZwhN.js → RealDiv-C4hOvYOZ.js} +26 -25
package/dist/{Reshape-t7Kcikjk.js → Reshape-BLijOA8h.js} +5 -5
package/dist/TeachableLLM.js +5 -5
package/dist/{TiedEmbedding-9WeDwvjO.js → TiedEmbedding-BLltddza.js} +4 -4
package/dist/{axis_util-Bu4h7XWV.js → axis_util-DaAl5MER.js} +3 -3
package/dist/backend.d.ts +1 -0
package/dist/backend.js +7 -0
package/dist/backend_util-DWiwsi2N.js +749 -0
package/dist/{broadcast_to-DARN-DBD.js → broadcast_to-C4v-j9yA.js} +2 -2
package/dist/{concat-5aPGqw3Z.js → concat-CsHeR4zV.js} +8 -8
package/dist/{dataset-pgqp-YfL.js → dataset-JDyjG3QR.js} +3 -3
package/dist/{dropout-Bciw46HT.js → dropout-hpDwECTe.js} +7 -7
package/dist/{gather-DjyCjmOD.js → gather-D0_gPiBz.js} +4 -4
package/dist/gelu-uyHP1x1f.js +26 -0
package/dist/gpgpu_math-DJm3ZTAf.js +2371 -0
package/dist/index-BPPzKVdR.js +12099 -0
package/dist/{index-BAzbokzv.js → index-C0dhsYom.js} +405 -389
package/dist/{kernel_funcs_utils-CUxJCg0g.js → kernel_funcs_utils-CwRTFqrc.js} +31 -30
package/dist/layers/BaseLayer.js +2 -2
package/dist/layers/CausalSelfAttention.js +6 -6
package/dist/layers/MLP.js +5 -5
package/dist/layers/RMSNorm.js +3 -3
package/dist/layers/RoPECache.js +4 -4
package/dist/layers/TiedEmbedding.js +5 -5
package/dist/layers/TransformerBlock.js +1 -1
package/dist/loader/loadTransformers.js +1 -1
package/dist/loader/oldZipLoad.js +5 -5
package/dist/{log_sum_exp-YEo2h3gb.js → log_sum_exp-D086OgZJ.js} +15 -15
package/dist/main.d.ts +2 -0
package/dist/main.js +9 -5
package/dist/{mat_mul-7121rsJk.js → mat_mul-1nwdPkQ_.js} +4 -4
package/dist/{max-DtlIuVeW.js → max-BQc2Aj-I.js} +4 -4
package/dist/{mulmat_packed_gpu-D4nKF7Je.js → mulmat_packed_gpu-Gzf3I9UV.js} +1 -1
package/dist/non_max_suppression_impl-CsEgBuMA.js +134 -0
package/dist/{ones-BBlSRqn1.js → ones-D63HpSF_.js} +2 -2
package/dist/ops/appendCache.js +3 -3
package/dist/ops/attentionMask.js +1 -1
package/dist/ops/cpu/appendCache.js +8 -8
package/dist/ops/cpu/attentionMask.js +9 -9
package/dist/ops/cpu/fusedSoftmax.js +17 -11
package/dist/ops/cpu/gatherSub.js +7 -7
package/dist/ops/cpu/gelu.js +13 -13
package/dist/ops/cpu/matMulGelu.js +36 -24
package/dist/ops/cpu/matMulMul.js +14 -8
package/dist/ops/cpu/mulDropout.js +9 -3
package/dist/ops/cpu/normRMS.js +5 -5
package/dist/ops/cpu/qkv.js +3 -3
package/dist/ops/cpu/rope.js +5 -5
package/dist/ops/cpu/scatterSub.js +11 -11
package/dist/ops/fusedSoftmax.js +1 -1
package/dist/ops/gatherSub.js +1 -1
package/dist/ops/gelu.js +2 -2
package/dist/ops/grads/attentionMask.js +1 -1
package/dist/ops/grads/fusedSoftmax.js +2 -2
package/dist/ops/grads/gelu.js +3 -24
package/dist/ops/grads/matMulGelu.js +5 -5
package/dist/ops/grads/normRMS.js +6 -6
package/dist/ops/grads/qkv.js +1 -1
package/dist/ops/grads/rope.js +3 -3
package/dist/ops/matMulGelu.js +1 -1
package/dist/ops/matMulMul.js +1 -1
package/dist/ops/mulDrop.js +1 -1
package/dist/ops/normRMS.js +1 -1
package/dist/ops/qkv.js +1 -1
package/dist/ops/rope.js +4 -4
package/dist/ops/scatterSub.js +1 -1
package/dist/ops/webgl/appendCache.js +1 -1
package/dist/ops/webgl/attentionMask.js +1 -1
package/dist/ops/webgl/fusedSoftmax.js +4 -4
package/dist/ops/webgl/gatherSub.js +1 -1
package/dist/ops/webgl/gelu.js +2 -2
package/dist/ops/webgl/log.js +5 -5
package/dist/ops/webgl/matMulGelu.js +17 -17
package/dist/ops/webgl/matMulMul.js +1 -1
package/dist/ops/webgl/mulDropout.js +4 -4
package/dist/ops/webgl/normRMS.js +2 -2
package/dist/ops/webgl/qkv.js +1 -1
package/dist/ops/webgl/rope.js +1 -1
package/dist/ops/webgl/scatterSub.js +1 -1
package/dist/ops/webgpu/appendCache.d.ts +1 -0
package/dist/ops/webgpu/appendCache.js +56 -0
package/dist/ops/webgpu/attentionMask.d.ts +1 -0
package/dist/ops/webgpu/attentionMask.js +64 -0
package/dist/ops/webgpu/gatherSub.d.ts +1 -0
package/dist/ops/webgpu/gatherSub.js +37 -0
package/dist/ops/webgpu/gelu.d.ts +14 -0
package/dist/ops/webgpu/gelu.js +86 -0
package/dist/ops/webgpu/index.d.ts +0 -0
package/dist/ops/webgpu/index.js +8 -0
package/dist/ops/webgpu/normRMS.d.ts +1 -0
package/dist/ops/webgpu/normRMS.js +115 -0
package/dist/ops/webgpu/qkv.d.ts +1 -0
package/dist/ops/webgpu/qkv.js +56 -0
package/dist/ops/webgpu/rope.d.ts +1 -0
package/dist/ops/webgpu/rope.js +68 -0
package/dist/ops/webgpu/scatterSub.d.ts +1 -0
package/dist/ops/webgpu/scatterSub.js +37 -0
package/dist/{ops-C0sQEcPw.js → ops-CIQLNshk.js} +452 -503
package/dist/{random_width-DWzaOgrn.js → random_width-DkYP8W8N.js} +143 -144
package/dist/{range-DYsrnfiy.js → range-CYzpQY53.js} +1 -1
package/dist/{reciprocal-CJQeasVa.js → reciprocal-_A9yv27J.js} +1 -1
package/dist/{register_all_kernels-BfFCQAqs.js → register_all_kernels-guvSxp7M.js} +202 -200
package/dist/{reshape-krWGKraP.js → reshape-BMUzc1UY.js} +3 -3
package/dist/{scatter_nd_util-93ln7Hut.js → scatter_nd_util-IRBqKz_b.js} +3 -3
package/dist/{selu_util-sntGesxr.js → selu_util-Dt_iuXaq.js} +6 -6
package/dist/shared-BNa2q6jD.js +69 -0
package/dist/{shared-Ca6iDobD.js → shared-CDu9S76h.js} +541 -606
package/dist/{sin-D_h-qCSx.js → sin-Cocju-BY.js} +6 -6
package/dist/{softmax-fsdtf6JC.js → softmax-GPNK3o-U.js} +3 -3
package/dist/{split-eiktj-6L.js → split-CHzJjxDv.js} +4 -4
package/dist/{stack-dfEEz2OY.js → stack-Dpgg_1W1.js} +2 -2
package/dist/{sum-BE_Irnim.js → sum-B8wEpKsg.js} +5 -5
package/dist/{tensor-Xyi595sG.js → tensor-RvZVNmg0.js} +1 -1
package/dist/{tensor2d-CPEkynbH.js → tensor2d-B_kyod7_.js} +1 -1
package/dist/training/AdamExt.js +1 -1
package/dist/training/DatasetBuilder.js +2 -2
package/dist/training/Evaluator.js +1 -1
package/dist/training/FullTrainer.js +20 -20
package/dist/training/Trainer.d.ts +5 -6
package/dist/training/Trainer.js +59 -60
package/dist/training/sparseCrossEntropy.js +4 -4
package/dist/utilities/dummy.js +19 -19
package/dist/utilities/generate.js +15 -16
package/dist/utilities/multinomialCPU.d.ts +2 -0
package/dist/utilities/multinomialCPU.js +13 -0
package/dist/utilities/performance.d.ts +2 -0
package/dist/utilities/performance.js +16 -0
package/dist/utilities/profile.d.ts +1 -0
package/dist/utilities/profile.js +9 -6
package/dist/utilities/safetensors.js +2 -2
package/dist/utilities/weights.js +2 -2
package/dist/{variable-wSS22xj5.js → variable-DXEUOwew.js} +1 -1
package/dist/webgpu_util-g13LvDIv.js +625 -0
package/dist/{zeros-YJDE7oRb.js → zeros-DCPCdFGq.js} +8 -8
package/package.json +2 -1
package/dist/gpgpu_math-CNslybmD.js +0 -3115
package/dist/norm-CzltS9Fz.js +0 -86

package/dist/ops/webgl/gelu.js CHANGED Viewed

@@ -1,5 +1,5 @@
-import { r as a } from "../../index-BAzbokzv.js";
-import { u as s, C as x } from "../../kernel_funcs_utils-CUxJCg0g.js";
+import { f as a } from "../../index-C0dhsYom.js";
+import { u as s, C as x } from "../../kernel_funcs_utils-CwRTFqrc.js";
 const t = 0.7978845608028654, r = 0.044715, c = x + `
     float x3 = x * x * x;
     float inner = x + ${r} * x3;

package/dist/ops/webgl/log.js CHANGED Viewed

@@ -1,6 +1,6 @@
-import { r, a9 as e } from "../../index-BAzbokzv.js";
-import { u as s, l as N } from "../../kernel_funcs_utils-CUxJCg0g.js";
-import { aG as l } from "../../shared-Ca6iDobD.js";
+import { f as e, a8 as r } from "../../index-C0dhsYom.js";
+import { u as s, l as N } from "../../kernel_funcs_utils-CwRTFqrc.js";
+import { y as l } from "../../shared-BNa2q6jD.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -32,8 +32,8 @@ const a = N + `
   packedOpSnippet: t,
   cpuKernelImpl: l
 }), o = {
-  kernelName: e,
+  kernelName: r,
   backendName: "webgl",
   kernelFunc: n
 };
-r(o);
+e(o);

package/dist/ops/webgl/matMulGelu.js CHANGED Viewed

@@ -1,8 +1,8 @@
-import { r as _, t as R, e as C, g as A, h as N, i as H, u as O } from "../../index-BAzbokzv.js";
-import { r as f } from "../../Reshape-t7Kcikjk.js";
-import { M as U } from "../../mulmat_packed_gpu-D4nKF7Je.js";
-import { m as E } from "../../mat_mul-7121rsJk.js";
-const M = 0.7978845608028654, x = 0.044715, q = `
+import { f as _, t as R, e as C, j as A, k as N, l as H, u as O } from "../../index-C0dhsYom.js";
+import { r as f } from "../../Reshape-BLijOA8h.js";
+import { M as U } from "../../mulmat_packed_gpu-Gzf3I9UV.js";
+import { m as E } from "../../mat_mul-1nwdPkQ_.js";
+const M = 0.7978845608028654, x = 0.044715, j = `
     vec4 x3 = x * x * x;
     vec4 inner = x + ${x} * x3;
     inner = ${M} * inner;
@@ -10,7 +10,7 @@ const M = 0.7978845608028654, x = 0.044715, q = `
     inner = 0.5 * (1.0 + inner);
     vec4 result = x * inner;
     return result;
-`, z = `
+`, q = `
     vec4 a2 = a * a;
     vec4 a3 = a2 * a;
     vec4 u  = ${M} * (a + ${x} * a3);
@@ -34,7 +34,7 @@ function w({
     i === p,
     () => `Error in matMul: inner shapes (${i}) and (${p}) of Tensors with shapes ${e.shape} and ${t.shape} and transposeA=${s} and transposeB=${n} must match.`
   );
-  const v = s ? [d, i, h] : [d, h, i], S = n ? [m, l, p] : [m, p, l], g = f({ inputs: { x: e }, backend: a, attrs: { shape: v } }), D = f({ inputs: { x: t }, backend: a, attrs: { shape: S } }), G = [g, D], y = Math.max(d, m), L = c, B = O(e.dtype, t.dtype), F = new U(
+  const v = s ? [d, i, h] : [d, h, i], S = n ? [m, l, p] : [m, p, l], k = f({ inputs: { x: e }, backend: a, attrs: { shape: v } }), D = f({ inputs: { x: t }, backend: a, attrs: { shape: S } }), G = [k, D], y = Math.max(d, m), L = c, B = O(e.dtype, t.dtype), F = new U(
     v,
     S,
     [y, h, l],
@@ -44,15 +44,15 @@ function w({
     L,
     !!o,
     !1
-  ), k = [g, D];
-  o && k.push(o);
-  const $ = a.runWebGLProgram(F, k, B), I = f({ inputs: { x: $ }, backend: a, attrs: { shape: b } });
+  ), g = [k, D];
+  o && g.push(o);
+  const $ = a.runWebGLProgram(F, g, B), I = f({ inputs: { x: $ }, backend: a, attrs: { shape: b } });
   G.push($);
   for (const P of G)
     a.disposeIntermediateTensorInfo(P);
   return I;
 }
-function W(e) {
+function z(e) {
   const { inputs: t, backend: s } = e, { x: n, kernel: a } = t;
   if (n === void 0 || a === void 0)
     throw new Error("BatchMatMul requires two input tensors.");
@@ -62,15 +62,15 @@ function W(e) {
     transposeA: !1,
     transposeB: !1,
     backend: s,
-    activationSnippet: q
+    activationSnippet: j
   });
 }
-const j = {
+const W = {
   kernelName: "MatMulGelu",
   backendName: "webgl",
-  kernelFunc: W
+  kernelFunc: z
 };
-_(j);
+_(W);
 function J(e) {
   const { dy: t, x: s, kernel: n } = e.inputs, a = e.backend;
   return R(() => {
@@ -81,7 +81,7 @@ function J(e) {
         transposeA: !1,
         transposeB: !1,
         backend: a,
-        activationSnippet: z,
+        activationSnippet: q,
         multiplier: t
       })
     ), o = E(c, n, !1, !0), r = E(s, c, !0, !1);
@@ -97,5 +97,5 @@ _(Q);
 export {
   te as MATMUL_SHARED_DIM_THRESHOLD,
   w as batchMatMulGeluImpl,
-  W as batchMatMulKernel
+  z as batchMatMulKernel
 };

package/dist/ops/webgl/matMulMul.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { r as u } from "../../index-BAzbokzv.js";
+import { f as u } from "../../index-C0dhsYom.js";
 import { batchMatMulGeluImpl as c } from "./matMulGelu.js";
 const M = `
     return a * b;

package/dist/ops/webgl/mulDropout.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { r as m } from "../../index-BAzbokzv.js";
+import { f as m } from "../../index-C0dhsYom.js";
 class f {
   variableNames = ["a", "b"];
   outputShape;
@@ -7,8 +7,8 @@ class f {
     { name: "dropoutRate", type: "float" },
     { name: "seed", type: "float" }
   ];
-  constructor(r, t, o) {
-    this.outputShape = [r, t, o, o], this.userCode = `
+  constructor(t, r, o) {
+    this.outputShape = [t, r, o, o], this.userCode = `
         float random(ivec4 coords) {
             float x = float(coords.x * 4096 + coords.y * 256 + coords.z * 16 + coords.w);
             return fract(sin(seed + x) * 43758.5453123);
@@ -27,7 +27,7 @@ class f {
   }
 }
 function b(e) {
-  const { inputs: r, attrs: t } = e, { a: o, b: s } = r, { dropoutRate: a, seed: c } = t, n = e.backend, d = o.shape[0], u = o.shape[2], p = o.shape[1], l = new f(d, p, u);
+  const { inputs: t, attrs: r } = e, { a: o, b: s } = t, { dropoutRate: a, seed: c } = r, n = e.backend, d = o.shape[0], u = o.shape[2], p = o.shape[1], l = new f(d, p, u);
   return n.runWebGLProgram(l, [o, s], "float32", [
     [a ?? 0],
     [c ?? Math.random() * 1e4]

package/dist/ops/webgl/normRMS.js CHANGED Viewed

@@ -1,5 +1,5 @@
-import { r as p, e as G } from "../../index-BAzbokzv.js";
-import { s as x } from "../../sum-BE_Irnim.js";
+import { f as p, e as G } from "../../index-C0dhsYom.js";
+import { s as x } from "../../sum-B8wEpKsg.js";
 class y {
   variableNames = ["x", "meanSquare", "gamma"];
   outputShape;

package/dist/ops/webgl/qkv.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { r as i } from "../../index-BAzbokzv.js";
+import { f as i } from "../../index-C0dhsYom.js";
 class l {
   variableNames = ["x", "kernel"];
   outputShape;

package/dist/ops/webgl/rope.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { r as u } from "../../index-BAzbokzv.js";
+import { f as u } from "../../index-C0dhsYom.js";
 class l {
   variableNames = ["x", "sin", "cos"];
   outputShape;

package/dist/ops/webgl/scatterSub.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { r as i } from "../../index-BAzbokzv.js";
+import { f as i } from "../../index-C0dhsYom.js";
 class u {
   variableNames = ["labels", "softmaxProbs", "dy"];
   outputShape;

package/dist/ops/webgpu/appendCache.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist/ops/webgpu/appendCache.js ADDED Viewed

@@ -0,0 +1,56 @@
+import { f as u, c as d, g as l } from "../../webgpu_util-g13LvDIv.js";
+import { f as m } from "../../index-C0dhsYom.js";
+class f {
+  variableNames = ["cache", "item"];
+  outputShape;
+  shaderKey = "AppendCache";
+  dispatchLayout;
+  dispatch;
+  workgroupSize = [64, 1, 1];
+  size = !0;
+  uniforms = "cacheT: i32";
+  constructor(t, a, s, o, c) {
+    const i = Math.min(s + 1, c);
+    this.outputShape = [t, a, i, o], this.dispatchLayout = u(this.outputShape), this.dispatch = d(this.dispatchLayout, this.outputShape, this.workgroupSize);
+  }
+  getUserCode() {
+    const t = this.outputShape[2];
+    return `
+        ${l("index")} {
+            if (index < uniforms.size) {
+                let coords = getCoordsFromIndex(index); // [b, h, t, d]
+                let b = coords[0];
+                let h = coords[1];
+                let t = coords[2];
+                let d = coords[3];
+                let itemT = 1;
+                let maxSize = ${t};
+                let totalT = uniforms.cacheT + itemT;
+                let start = select(0, 1, totalT >= maxSize);
+                let srcT = t + start;
+                var val = 0.0;
+                if (srcT < uniforms.cacheT) {
+                    val = getCache(b, h, srcT, d);
+                } else if (srcT == uniforms.cacheT) {
+                    val = getItem(b, h, 0, d);
+                } else {
+                    val = 0.0;
+                }
+                setOutputAtIndex(index, val);
+            }
+        }
+        `;
+  }
+}
+function T(e) {
+  const { cache: t, item: a } = e.inputs, { maxSize: s, pastLen: o } = e.attrs, c = e.backend, i = t.shape[0], r = t.shape[2], n = t.shape[1], h = new f(i, n, r, a.shape[3], s), p = [{ type: "int32", data: [o] }];
+  return c.runWebGPUProgram(h, [t, a], "float32", p);
+}
+const g = {
+  kernelName: "AppendCache",
+  backendName: "webgpu",
+  kernelFunc: T
+};
+m(g);

package/dist/ops/webgpu/attentionMask.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist/ops/webgpu/attentionMask.js ADDED Viewed

@@ -0,0 +1,64 @@
+import { f } from "../../index-C0dhsYom.js";
+import { f as m, c as k, g as l } from "../../webgpu_util-g13LvDIv.js";
+class g {
+  variableNames = ["q", "k"];
+  outputShape;
+  shaderKey = "AttentionMask";
+  dispatchLayout;
+  dispatch;
+  uniforms = "divisor: f32, pastLen: i32, inf: f32";
+  workgroupSize = [64, 1, 1];
+  size = !0;
+  hs;
+  nh;
+  T1;
+  T2;
+  constructor(t, e, o, i, a) {
+    if (this.outputShape = [t, e, o, i], this.hs = a, this.nh = e, this.T1 = o, this.T2 = i, this.dispatchLayout = m(this.outputShape), this.dispatch = k(this.dispatchLayout, this.outputShape, this.workgroupSize), a % 4 !== 0)
+      throw new Error("Head size must be a multiple of 4 for AttentionMaskProgram");
+  }
+  getUserCode() {
+    return `
+            ${l("index")} {
+                let coords = getCoordsFromIndex(index);
+                let b = coords[0];
+                let h = coords[1];
+                let t1 = coords[2];
+                let t2 = coords[3];
+                if (index < uniforms.size) {
+                    if (t2 > t1 + uniforms.pastLen) {
+                        setOutputAtIndex(index, uniforms.inf);
+                        return;
+                    }
+                    var sum: f32 = 0.0;
+                    for (var i: i32 = 0; i < ${this.hs}; i = i + 4) {
+                        let q0 = getIndexFromCoords4D(vec4<i32>(b, h, t1, i), uniforms.qShape);
+                        let qv = vec4<f32>(q[q0], q[q0 + 1], q[q0 + 2], q[q0 + 3]);
+                        let k0 = getIndexFromCoords4D(vec4<i32>(b, h, t2, i), uniforms.kShape);
+                        let kv = vec4<f32>(k[k0], k[k0 + 1], k[k0 + 2], k[k0 + 3]);
+                        sum = sum + dot(qv, kv);
+                    }
+                    let scaled = sum * uniforms.divisor;
+                    setOutputAtIndex(index, scaled);
+                }
+            }
+        `;
+  }
+}
+function q(s) {
+  const { q: t, k: e } = s.inputs, { divisor: o, pastLen: i } = s.attrs, a = s.backend, n = t.shape[0], r = t.shape[2], u = e.shape[2], c = t.shape[1], d = t.shape[3], h = new g(n, c, r, u, d), p = [
+    { type: "float32", data: [o] },
+    { type: "int32", data: [i] },
+    { type: "float32", data: [Number.NEGATIVE_INFINITY] }
+  ];
+  return a.runWebGPUProgram(h, [t, e], "float32", p);
+}
+const v = {
+  kernelName: "AttentionMask",
+  backendName: "webgpu",
+  kernelFunc: q
+};
+f(v);

package/dist/ops/webgpu/gatherSub.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist/ops/webgpu/gatherSub.js ADDED Viewed

@@ -0,0 +1,37 @@
+import { f as u, c as n, g as c } from "../../webgpu_util-g13LvDIv.js";
+import { f as p } from "../../index-C0dhsYom.js";
+class d {
+  variableNames = ["labels", "logits", "values"];
+  outputShape;
+  shaderKey = "GatherSub";
+  dispatchLayout;
+  dispatch;
+  workgroupSize = [64, 1, 1];
+  size = !0;
+  constructor(e) {
+    this.outputShape = [e], this.dispatchLayout = u(this.outputShape), this.dispatch = n(this.dispatchLayout, this.outputShape, this.workgroupSize);
+  }
+  getUserCode() {
+    return `
+        ${c("index")} {
+            if (index < uniforms.size) {
+                let coords = getCoordsFromIndex(index);
+                let idx = i32(getLabelsByOutputIndex(index));
+                let val = getValuesByOutputIndex(index);
+                let logit = getLogits(coords, idx);
+                setOutputAtIndex(index, val - logit);
+            }
+        }
+    `;
+  }
+}
+function l(t) {
+  const { logits: e, labels: a, values: s } = t.inputs, i = t.backend, o = a.shape[0], r = new d(o);
+  return i.runWebGPUProgram(r, [a, e, s], "float32");
+}
+const h = {
+  kernelName: "EfficientGatherSub",
+  backendName: "webgpu",
+  kernelFunc: l
+};
+p(h);

package/dist/ops/webgpu/gelu.d.ts ADDED Viewed

@@ -0,0 +1,14 @@
+import { WebGPUProgram } from '@tensorflow/tfjs-backend-webgpu';
+export declare class GeluProgram implements WebGPUProgram {
+    outputShape: number[];
+    shaderKey: string;
+    dispatchLayout: {
+        x: number[];
+    };
+    dispatch: [number, number, number];
+    variableNames: string[];
+    workgroupSize: [number, number, number];
+    size: boolean;
+    constructor(outputShape: number[]);
+    getUserCode(): string;
+}

package/dist/ops/webgpu/gelu.js ADDED Viewed

@@ -0,0 +1,86 @@
+import { f as i } from "../../index-C0dhsYom.js";
+import { f as o, c as s, g as p } from "../../webgpu_util-g13LvDIv.js";
+const u = 0.7978845608028654, a = 0.044715;
+class c {
+  outputShape;
+  shaderKey;
+  dispatchLayout;
+  dispatch;
+  variableNames = ["A"];
+  workgroupSize;
+  size = !0;
+  constructor(e) {
+    this.workgroupSize = [128, 1, 1], this.outputShape = e, this.dispatchLayout = o(this.outputShape), this.dispatch = s(this.dispatchLayout, this.outputShape, this.workgroupSize), this.shaderKey = "unary_gelu";
+  }
+  getUserCode() {
+    return `
+      fn unaryOperation(x : f32) -> f32 {
+        let x3 = x * x * x;
+        var inner = fma(${a}, x3, x);
+        inner = ${u} * inner;
+        inner = tanh(inner);
+        inner = 0.5 * (1.0 + inner);
+        return x * inner;
+      }
+      ${p("index")} {
+        if (index < uniforms.size) {
+          let a = getAByOutputIndex(index);
+          setOutputAtIndex(index, unaryOperation(a));
+        }
+      }
+      `;
+  }
+}
+function h(t) {
+  const { x: e } = t.inputs, n = t.backend, r = new c(e.shape);
+  return n.runWebGPUProgram(r, [e], "float32");
+}
+const l = {
+  kernelName: "Gelu",
+  backendName: "webgpu",
+  kernelFunc: h
+};
+i(l);
+class x {
+  // Inputs: dy, x
+  variableNames = ["dy", "x"];
+  outputShape;
+  shaderKey = "GeluGrad";
+  dispatchLayout;
+  dispatch;
+  workgroupSize = [128, 1, 1];
+  size = !0;
+  constructor(e) {
+    this.outputShape = e, this.dispatchLayout = o(this.outputShape), this.dispatch = s(this.dispatchLayout, this.outputShape, this.workgroupSize);
+  }
+  getUserCode() {
+    return `
+            ${p("index")} {
+                if (index < uniforms.size) {
+                    let X  = getXByOutputIndex(index);
+                    let x2 = X * X;
+                    let x3 = x2 * X;
+                    let u  = ${u} * (X + ${a} * x3);
+                    let t  = tanh(u);
+                    let sech2 = 1.0 - t * t;
+                    let du_dx = ${u} * (1.0 + 3.0 * ${a} * x2);
+                    let dgelu = 0.5 * (1.0 + t) + 0.5 * X * sech2 * du_dx;
+                    let DY = getDyByOutputIndex(index);
+                    setOutputAtIndex(index, DY * dgelu);
+                }
+            }`;
+  }
+}
+function g(t) {
+  const { dy: e, x: n } = t.inputs, r = t.backend, d = new x(n.shape);
+  return r.runWebGPUProgram(d, [e, n], "float32");
+}
+const m = {
+  kernelName: "GeluGrad",
+  backendName: "webgpu",
+  kernelFunc: g
+};
+i(m);
+export {
+  c as GeluProgram
+};

package/dist/ops/webgpu/index.d.ts ADDED Viewed

File without changes

package/dist/ops/webgpu/index.js ADDED Viewed

@@ -0,0 +1,8 @@
+import "./attentionMask.js";
+import "./normRMS.js";
+import "./rope.js";
+import "./appendCache.js";
+import "./scatterSub.js";
+import "./gatherSub.js";
+import "./qkv.js";
+import "./gelu.js";

package/dist/ops/webgpu/normRMS.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist/ops/webgpu/normRMS.js ADDED Viewed

@@ -0,0 +1,115 @@
+import { f as m, c as p, g as c } from "../../webgpu_util-g13LvDIv.js";
+import { f as l, e as k } from "../../index-C0dhsYom.js";
+import { s as M } from "../../sum-B8wEpKsg.js";
+class N {
+  variableNames = ["x", "meanSquare", "gamma"];
+  outputShape;
+  shaderKey = "RMSNorm";
+  dispatchLayout;
+  dispatch;
+  workgroupSize = [64, 1, 1];
+  size = !0;
+  constructor(t, e, a) {
+    this.outputShape = [t, e, a], this.dispatchLayout = m(this.outputShape), this.dispatch = p(this.dispatchLayout, this.outputShape, this.workgroupSize);
+  }
+  getUserCode() {
+    return `
+        ${c("index")} {
+            if (index < uniforms.size) {
+                let coords = getCoordsFromIndex(index);
+                let x = getXByOutputIndex(index);
+                let meanSquare = getMeanSquare(coords[0], coords[1], 0);
+                let gamma = getGammaByOutputIndex(index);
+                let invRms = inverseSqrt(meanSquare + 1e-8);
+                let normalized = x * invRms;
+                let outVal = normalized * gamma;
+                setOutputAtIndex(index, outVal);
+            }
+        }
+        `;
+  }
+}
+function b(s) {
+  const { x: t, gamma: e } = s.inputs, a = s.backend, o = t.shape[0], n = t.shape[1], i = t.shape[2], u = t.square().mean(-1, !0), r = new N(o, n, i);
+  return a.runWebGPUProgram(r, [t, u, e], "float32");
+}
+const z = {
+  kernelName: "RMSNorm",
+  backendName: "webgpu",
+  kernelFunc: b
+};
+l(z);
+class R {
+  variableNames = ["x", "meanSquare", "dyGamma", "dyXMean"];
+  outputShape;
+  shaderKey = "RMSNormGradX";
+  dispatchLayout;
+  dispatch;
+  workgroupSize = [64, 1, 1];
+  size = !0;
+  C;
+  constructor(t, e, a) {
+    this.outputShape = [t, e, a], this.dispatchLayout = m(this.outputShape), this.dispatch = p(this.dispatchLayout, this.outputShape, this.workgroupSize), this.C = a;
+  }
+  getUserCode() {
+    return `
+        ${c("index")} {
+            if (index < uniforms.size) {
+                let coords = getCoordsFromIndex(index);
+                let x = getXByOutputIndex(index);
+                let meanSquare = getMeanSquare(coords[0], coords[1], 0) + 1e-8;
+                let dyGamma = getDyGammaByOutputIndex(index);
+                let dyXMean = getDyXMean(coords[0], coords[1], 0) / ${this.C}.0;
+                let invRms = inverseSqrt(meanSquare);
+                let dx = dyGamma * invRms - x * dyXMean * invRms / meanSquare;
+                setOutputAtIndex(index, dx);
+            }
+        }
+        `;
+  }
+}
+class v {
+  variableNames = ["x", "meanSquare", "dy"];
+  outputShape;
+  shaderKey = "RMSNormGradGamma";
+  dispatchLayout;
+  dispatch;
+  workgroupSize = [64, 1, 1];
+  size = !0;
+  constructor(t, e, a) {
+    this.outputShape = [t, e, a], this.dispatchLayout = m(this.outputShape), this.dispatch = p(this.dispatchLayout, this.outputShape, this.workgroupSize);
+  }
+  getUserCode() {
+    return `
+        ${c("index")} {
+            if (index < uniforms.size) {
+                let coords = getCoordsFromIndex(index);
+                let x = getXByOutputIndex(index);
+                let meanSquare = getMeanSquare(coords[0], coords[1], 0) + 1e-8;
+                let dy = getDyByOutputIndex(index);
+                let invRms = inverseSqrt(meanSquare);
+                let dGamma = dy * (x * invRms);
+                setOutputAtIndex(index,dGamma);
+            }
+        }
+        `;
+  }
+}
+function I(s) {
+  const { dy: t, x: e, gamma: a } = s.inputs, o = s.backend, n = e.shape[0], i = e.shape[1], u = e.shape[2], r = t.mul(a), h = r.mul(e), g = h.sum(-1, !0);
+  h.dispose();
+  const S = e.square(), d = S.mean(-1, !0);
+  S.dispose();
+  const y = new R(n, i, u), G = o.runWebGPUProgram(y, [e, d, r, g], "float32");
+  r.dispose(), g.dispose();
+  const q = new v(n, i, u), x = o.runWebGPUProgram(q, [e, d, t], "float32");
+  d.dispose();
+  const f = M(k().makeTensorFromTensorInfo(x), [0, 1]);
+  return o.disposeData(x), [G, f];
+}
+const P = {
+  kernelName: "RMSNormGrad",
+  backendName: "webgpu",
+  kernelFunc: I
+};
+l(P);

package/dist/ops/webgpu/qkv.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist/ops/webgpu/qkv.js ADDED Viewed

@@ -0,0 +1,56 @@
+import { f as c, c as d, g as h } from "../../webgpu_util-g13LvDIv.js";
+import { f as p } from "../../index-C0dhsYom.js";
+class l {
+  variableNames = ["x", "kernel"];
+  outputShape;
+  shaderKey = "QKV";
+  dispatchLayout;
+  dispatch;
+  uniforms = "mode: i32";
+  workgroupSize = [64, 1, 1];
+  size = !0;
+  constructor(t, e, o, s) {
+    const r = s / e;
+    this.outputShape = [t, e, o, r], this.dispatchLayout = c(this.outputShape), this.dispatch = d(this.dispatchLayout, this.outputShape, this.workgroupSize);
+  }
+  getUserCode() {
+    const t = this.outputShape[1], e = this.outputShape[3], o = t * e;
+    return `
+        ${h("index")} {
+            if (index < uniforms.size) {
+                let coords = getCoordsFromIndex(index); // [b, h, t, d]
+                let b = coords[0];
+                let h = coords[1];
+                let t = coords[2];
+                let d = coords[3];
+                // Compute output channel index in fused kernel
+                let out_offset = uniforms.mode * ${t} * ${e} + h * ${e} + d;
+                var sum = 0.0;
+                for (var c = 0; c < ${o}; c += 1) {
+                    let xval = getX(b, t, c); // fetch from x
+                    let kval = getKernel(c, out_offset); // fetch from kernel
+                    sum += xval * kval;
+                }
+                setOutputAtIndex(index, sum);
+            }
+        }
+        `;
+  }
+}
+function m(a) {
+  const { x: t, kernel: e } = a.inputs, { heads: o } = a.attrs, s = a.backend, r = t.shape[0], i = t.shape[1], u = t.shape[2], n = new l(r, o, i, u);
+  return [
+    s.runWebGPUProgram(n, [t, e], "float32", [{ type: "int32", data: [0] }]),
+    s.runWebGPUProgram(n, [t, e], "float32", [{ type: "int32", data: [1] }]),
+    s.runWebGPUProgram(n, [t, e], "float32", [{ type: "int32", data: [2] }])
+  ];
+}
+const f = {
+  kernelName: "QKV",
+  backendName: "webgpu",
+  kernelFunc: m
+};
+p(f);

package/dist/ops/webgpu/rope.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};