npm - @genai-fi/nanogpt - Versions diffs - 0.10.1 → 0.10.3 - Mend

@genai-fi/nanogpt 0.10.1 → 0.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (249) hide show

package/dist/Generator.js +11761 -171
package/dist/{RealDiv-DgA3z9oO.js → RealDiv-KAPDe8zB.js} +28 -30
package/dist/Reshape-BYkmUnAv.js +14 -0
package/dist/{Reshape-_kILl6tK.js → Reshape-Zt6eb7yh.js} +18 -20
package/dist/TeachableLLM.js +10 -11
package/dist/{axis_util-BvHEw88j.js → axis_util-BaG7mf5A.js} +3 -3
package/dist/backend.js +2 -2
package/dist/{backend_util-D-rUb2ty.js → backend_util-RCe-rHaj.js} +59 -60
package/dist/{backend_webgpu-B0u2ndUn.js → backend_webgpu-DE3ACOLx.js} +45 -47
package/dist/broadcast_to-B3eYlZm7.js +28 -0
package/dist/checks/appendCache.js +2 -2
package/dist/checks/attentionMask.js +3 -3
package/dist/checks/gelu.js +2 -2
package/dist/checks/matMulGelu.js +7 -11
package/dist/checks/normRMS.js +9 -9
package/dist/checks/normRMSGrad.js +3 -3
package/dist/checks/packUnpack.js +2 -2
package/dist/checks/qkv.js +12 -13
package/dist/checks/rope.js +2 -2
package/dist/clip_by_value-BnO7-a88.js +12 -0
package/dist/complex-DjxcVmoX.js +11 -0
package/dist/concat-BV8bt5H-.js +17 -0
package/dist/{concat_util-DcJk7YHS.js → concat_util-DpW8mL_l.js} +1 -1
package/dist/{dataset-0xP8GjwI.js → dataset-BcwmTGYc.js} +137 -139
package/dist/dropout-BcvN9JYi.js +92 -0
package/dist/expand_dims-DT4tEPwA.js +11 -0
package/dist/{exports_initializers-xuidcwI4.js → exports_initializers-Hta_rEnm.js} +1 -1
package/dist/floor-D5QdR_le.js +9 -0
package/dist/gather-D3JcZUaI.js +9 -0
package/dist/{gelu-CNLFZWea.js → gelu-CjNPL4OH.js} +10 -11
package/dist/{gpgpu_math-DDVJCn6-.js → gpgpu_math-DAOmgtXR.js} +841 -1015
package/dist/{index-CjOj7j-u.js → index-BwexR4lA.js} +262 -263
package/dist/index-DOvlwCh-.js +3520 -0
package/dist/{kernel_funcs_utils-Dg_-E44D.js → kernel_funcs_utils-CCzYdUZg.js} +129 -131
package/dist/layers/BaseLayer.js +14 -15
package/dist/layers/CausalSelfAttention.js +6 -6
package/dist/layers/MLP.js +4 -4
package/dist/layers/PositionEmbedding.js +7 -7
package/dist/layers/RMSNorm.js +3 -3
package/dist/layers/RoPECache.js +9 -9
package/dist/layers/TiedEmbedding.js +6 -6
package/dist/layers/TransformerBlock.js +1 -1
package/dist/loader/loadTransformers.js +1 -1
package/dist/loader/oldZipLoad.js +13 -14
package/dist/log_sum_exp-ngO0-4pK.js +39 -0
package/dist/main.js +49 -50
package/dist/{matMul16--R5hOwDG.js → matMul16-BWRSOCWB.js} +14 -15
package/dist/matMulGelu-CzfgT6Wq.js +163 -0
package/dist/mat_mul-SjpJRLyL.js +11 -0
package/dist/mod-AnXEvvpo.js +11 -0
package/dist/models/NanoGPTV1.js +2 -2
package/dist/models/model.js +13 -14
package/dist/ones-D2rT0xk2.js +14 -0
package/dist/ops/adamAdjust.js +1 -1
package/dist/ops/adamMoments.js +1 -1
package/dist/ops/add16.js +1 -1
package/dist/ops/appendCache.js +3 -3
package/dist/ops/attentionMask.js +1 -1
package/dist/ops/concat16.js +2 -2
package/dist/ops/cpu/adamAdjust.js +13 -14
package/dist/ops/cpu/adamMoments.js +6 -7
package/dist/ops/cpu/appendCache.js +7 -8
package/dist/ops/cpu/attentionMask.js +7 -7
package/dist/ops/cpu/fusedSoftmax.js +10 -11
package/dist/ops/cpu/gatherSub.js +9 -10
package/dist/ops/cpu/gelu.js +9 -10
package/dist/ops/cpu/matMul16.js +6 -7
package/dist/ops/cpu/matMulGelu.js +5 -6
package/dist/ops/cpu/matMulMul.js +3 -4
package/dist/ops/cpu/mulDropout.js +3 -4
package/dist/ops/cpu/normRMS.js +10 -11
package/dist/ops/cpu/qkv.js +8 -9
package/dist/ops/cpu/rope.js +5 -6
package/dist/ops/cpu/scatterSub.js +17 -19
package/dist/ops/dot16.js +2 -2
package/dist/ops/gatherSub.js +1 -1
package/dist/ops/gelu.js +2 -2
package/dist/ops/grads/add16.js +11 -12
package/dist/ops/grads/attentionMask.js +5 -6
package/dist/ops/grads/gelu.js +3 -4
package/dist/ops/grads/matMul16.js +4 -5
package/dist/ops/grads/matMulGelu.js +9 -10
package/dist/ops/grads/normRMS.js +7 -8
package/dist/ops/grads/pack16.js +4 -5
package/dist/ops/grads/qkv.js +17 -19
package/dist/ops/grads/rope.js +3 -5
package/dist/ops/grads/softmax16.js +3 -4
package/dist/ops/grads/unpack16.js +3 -4
package/dist/ops/grads/utils.d.ts +1 -0
package/dist/ops/grads/utils.js +8 -4
package/dist/ops/matMul16.js +3 -3
package/dist/ops/matMulGelu.js +2 -2
package/dist/ops/matMulMul.js +1 -1
package/dist/ops/mul16.js +1 -1
package/dist/ops/mulDrop.js +1 -1
package/dist/ops/normRMS.js +1 -1
package/dist/ops/pack16.js +3 -4
package/dist/ops/qkv.js +4 -8
package/dist/ops/reshape16.js +14 -16
package/dist/ops/rope.d.ts +1 -1
package/dist/ops/rope.js +3 -8
package/dist/ops/scatterSub.js +1 -1
package/dist/ops/slice16.js +2 -2
package/dist/ops/softmax16.js +5 -8
package/dist/ops/sub16.js +1 -1
package/dist/ops/sum16.js +2 -2
package/dist/ops/transpose16.js +23 -24
package/dist/ops/unpack16.js +2 -2
package/dist/ops/webgl/adamAdjust.js +2 -3
package/dist/ops/webgl/adamMoments.js +1 -2
package/dist/ops/webgl/appendCache.js +1 -2
package/dist/ops/webgl/attentionMask.js +4 -5
package/dist/ops/webgl/fusedSoftmax.js +4 -6
package/dist/ops/webgl/gatherSub.js +6 -7
package/dist/ops/webgl/gelu.js +2 -3
package/dist/ops/webgl/log.js +11 -12
package/dist/ops/webgl/matMul16.js +10 -11
package/dist/ops/webgl/matMulGelu.js +7 -111
package/dist/ops/webgl/matMulMul.js +9 -10
package/dist/ops/webgl/mulDropout.js +8 -9
package/dist/ops/webgl/normRMS.js +2 -3
package/dist/ops/webgl/qkv.js +5 -6
package/dist/ops/webgl/rope.js +7 -8
package/dist/ops/webgl/scatterSub.js +5 -6
package/dist/ops/webgpu/adamAdjust.js +10 -12
package/dist/ops/webgpu/adamMoments.js +8 -10
package/dist/ops/webgpu/add16.js +8 -9
package/dist/ops/webgpu/appendCache.js +23 -25
package/dist/ops/webgpu/attentionMask.js +8 -10
package/dist/ops/webgpu/attentionMask32_program.js +2 -2
package/dist/ops/webgpu/concat16.js +12 -14
package/dist/ops/webgpu/gatherSub.js +11 -13
package/dist/ops/webgpu/gelu.js +28 -29
package/dist/ops/webgpu/matMul16.js +26 -28
package/dist/ops/webgpu/matMul16_program.js +4 -5
package/dist/ops/webgpu/mul16.js +9 -10
package/dist/ops/webgpu/normRMS.js +15 -17
package/dist/ops/webgpu/normRMSGrad.js +21 -28
package/dist/ops/webgpu/pack16.js +12 -13
package/dist/ops/webgpu/pack16_program.js +2 -2
package/dist/ops/webgpu/qkv.js +16 -18
package/dist/ops/webgpu/rope.js +25 -27
package/dist/ops/webgpu/scatterSub.js +7 -9
package/dist/ops/webgpu/slice16.js +21 -23
package/dist/ops/webgpu/softmax16.js +17 -19
package/dist/ops/webgpu/softmax16_program.js +2 -2
package/dist/ops/webgpu/softmax16_subgroup_program.js +2 -2
package/dist/ops/webgpu/softmax16grad.js +7 -8
package/dist/ops/webgpu/sub16.js +7 -8
package/dist/ops/webgpu/sum16.js +18 -20
package/dist/ops/webgpu/transpose16.js +19 -20
package/dist/ops/webgpu/transpose16_program.js +2 -2
package/dist/ops/webgpu/transpose16_shared_program.js +11 -12
package/dist/ops/webgpu/unpack16.js +3 -4
package/dist/ops/webgpu/utils/binary_op.js +7 -8
package/dist/ops/webgpu/utils/reductions.js +14 -22
package/dist/ops-B5yanEdW.js +476 -0
package/dist/pack16-nQ6JaLo-.js +39 -0
package/dist/patches/webgpu_backend.js +19 -20
package/dist/patches/webgpu_base.js +1 -1
package/dist/patches/webgpu_program.js +21 -22
package/dist/{random_width-DY6Kk2Dl.js → random_width-or-CEftb.js} +2506 -2761
package/dist/range-BklejeeW.js +10 -0
package/dist/relu-CP0ZcxWO.js +9 -0
package/dist/reshape-ByE68wS9.js +9 -0
package/dist/resize_nearest_neighbor-B19mCEg2.js +175 -0
package/dist/rope-Ir4mTyD1.js +24 -0
package/dist/{scatter_nd_util-5EL-8VAQ.js → scatter_nd_util-lvSiX8q4.js} +1 -1
package/dist/selu_util-kbhpTdYD.js +44 -0
package/dist/{shared-BRksrJb3.js → shared-DT1TkE6w.js} +1 -1
package/dist/{shared-BuAXb4CI.js → shared-dntlHIDQ.js} +343 -345
package/dist/slice-BfEGSH82.js +12 -0
package/dist/{slice_util-DtEldBfK.js → slice_util-uTKwiEpW.js} +1 -1
package/dist/{softmax-ZHVebtR1.js → softmax-CA5jFsLR.js} +4 -5
package/dist/split-CVLc0w--.js +9 -0
package/dist/squeeze-C7Z2srUo.js +10 -0
package/dist/stack-Cf4n9h0N.js +11 -0
package/dist/step-CINUs5QB.js +261 -0
package/dist/sum-DWAtNGez.js +11 -0
package/dist/tensor-DJoc7gJU.js +8 -0
package/dist/tensor1d-D11P_7Dp.js +11 -0
package/dist/{tensor2d-G4Ys2GxX.js → tensor2d-Bs9wZRc7.js} +6 -7
package/dist/{tensor4d-B8roDgtc.js → tensor4d-BARPdTaS.js} +6 -7
package/dist/{tfjs_backend-kNyO5L2d.js → tfjs_backend-y1cvNhLA.js} +244 -253
package/dist/tile-mbfagpsB.js +11 -0
package/dist/training/Adam.js +2 -2
package/dist/training/AdamExt.js +1 -1
package/dist/training/DatasetBuilder.js +2 -2
package/dist/training/FullTrainer.js +1 -1
package/dist/training/Trainer.js +2 -2
package/dist/training/sparseCrossEntropy.js +5 -5
package/dist/transpose-ClWiBS_b.js +36 -0
package/dist/unsorted_segment_sum-BDDhB_E6.js +277 -0
package/dist/utilities/dummy.js +3 -3
package/dist/utilities/multinomialCPU.js +2 -2
package/dist/utilities/packed.d.ts +1 -4
package/dist/utilities/packed.js +10 -711
package/dist/utilities/performance.js +1 -1
package/dist/utilities/profile.js +1 -1
package/dist/utilities/safetensors.js +2 -2
package/dist/utilities/sentences.js +5 -5
package/dist/utilities/weights.js +2 -2
package/dist/{variable-Bhn5bHYv.js → variable-WawDEaAb.js} +1 -1
package/dist/{webgpu_program-Cigz-7RF.js → webgpu_program-DuOXPQol.js} +178 -172
package/dist/{webgpu_util-BBCnKm2X.js → webgpu_util-RxEF33Rj.js} +34 -35
package/dist/zeros-KnWaWf-X.js +13 -0
package/dist/zeros_like-DvE73F4e.js +721 -0
package/package.json +4 -2
package/dist/Reshape-CF6odzV4.js +0 -16
package/dist/broadcast_to-CwF7XIeu.js +0 -30
package/dist/complex-CSlYz-2T.js +0 -13
package/dist/concat-BHlIJeyT.js +0 -19
package/dist/dropout-C1pM3f11.js +0 -99
package/dist/expand_dims-BPG4fwBP.js +0 -13
package/dist/gather-DykLGqmW.js +0 -10
package/dist/index-ZyQhjEPo.js +0 -2157
package/dist/log_sum_exp-DWI-76TI.js +0 -41
package/dist/mat_mul-DeAh4uTH.js +0 -12
package/dist/mod-Gt1rMB4n.js +0 -12
package/dist/mulmat_packed_gpu-BMFhLwta.js +0 -55
package/dist/ones-CAMiP4I2.js +0 -15
package/dist/ops-CNI3TwqM.js +0 -645
package/dist/pack16-CFUqumar.js +0 -41
package/dist/patches/PackedTensor.d.ts +0 -12
package/dist/patches/PackedTensor.js +0 -11
package/dist/patches/engine.d.ts +0 -261
package/dist/patches/engine.js +0 -10
package/dist/patches/tape.d.ts +0 -12
package/dist/patches/tape.js +0 -5
package/dist/range-BMS52eQi.js +0 -11
package/dist/reciprocal-CTmshQ9J.js +0 -10
package/dist/register_all_kernels-Bwu1PTuU.js +0 -12307
package/dist/relu-yZ2-7WxU.js +0 -10
package/dist/reshape-DevtBWtf.js +0 -10
package/dist/rope-B5UUMsPi.js +0 -32
package/dist/selu_util-D1w6yyTO.js +0 -303
package/dist/sin-BGfy2HZo.js +0 -16
package/dist/slice-D_gkkqZK.js +0 -13
package/dist/split-DrfihRpZ.js +0 -10
package/dist/squeeze-DZEpeblb.js +0 -11
package/dist/stack-yOIAalTq.js +0 -13
package/dist/sum-_fzj5ZTB.js +0 -12
package/dist/tensor-DdQUJZlz.js +0 -909
package/dist/tensor-f35l8Odg.js +0 -8
package/dist/tensor1d-CeZuc-Rv.js +0 -12
package/dist/tensor_util-DV-FP5Q3.js +0 -523
package/dist/tile-BzyEiF-F.js +0 -13
package/dist/transpose-DKELTqhe.js +0 -38
package/dist/zeros-2gldETuK.js +0 -14

package/dist/ops/webgl/matMulGelu.js CHANGED Viewed

@@ -1,113 +1,9 @@
-import { t as R, e as C, j as N } from "../../index-ZyQhjEPo.js";
-import { r as f } from "../../Reshape-_kILl6tK.js";
-import { M as H } from "../../mulmat_packed_gpu-BMFhLwta.js";
-import { r as E, u as O } from "../../tensor_util-DV-FP5Q3.js";
-import { m as $ } from "../../mat_mul-DeAh4uTH.js";
-import { s as A, a as U } from "../../tensor-DdQUJZlz.js";
-const M = 0.7978845608028654, g = 0.044715, j = `
-    vec4 x3 = x * x * x;
-    vec4 inner = x + ${g} * x3;
-    inner = ${M} * inner;
-    inner = vec4(
-        abs(inner[0]) > 15.0 ? sign(inner[0]) : tanh(inner[0]),
-        abs(inner[1]) > 15.0 ? sign(inner[1]) : tanh(inner[1]),
-        abs(inner[2]) > 15.0 ? sign(inner[2]) : tanh(inner[2]),
-        abs(inner[3]) > 15.0 ? sign(inner[3]) : tanh(inner[3])
-    );
-    inner = 0.5 * (1.0 + inner);
-    vec4 result = x * inner;
-    return result;
-`, q = `
-    vec4 a2 = a * a;
-    vec4 a3 = a2 * a;
-    vec4 u  = ${M} * (a + ${g} * a3);
-    vec4 t = vec4(
-        abs(u[0]) > 15.0 ? sign(u[0]) : tanh(u[0]),
-        abs(u[1]) > 15.0 ? sign(u[1]) : tanh(u[1]),
-        abs(u[2]) > 15.0 ? sign(u[2]) : tanh(u[2]),
-        abs(u[3]) > 15.0 ? sign(u[3]) : tanh(u[3])
-    );
-    vec4 sech2 = 1.0 - t * t;
-    vec4 du_dx = ${M} * (1.0 + 3.0 * ${g} * a2);
-    vec4 dgelu = 0.5 * (1.0 + t) + 0.5 * a * sech2 * du_dx;
-    return dgelu * b;
-`, se = 1e3;
-function _({
-  a: e,
-  b: n,
-  transposeA: s,
-  transposeB: t,
-  backend: a,
-  activationSnippet: i,
-  multiplier: o
-}) {
-  const r = e.shape.length, c = n.shape.length, u = s ? e.shape[r - 2] : e.shape[r - 1], p = t ? n.shape[c - 1] : n.shape[c - 2], h = s ? e.shape[r - 1] : e.shape[r - 2], l = t ? n.shape[c - 2] : n.shape[c - 1], w = e.shape.slice(0, -2), K = n.shape.slice(0, -2), m = A(w), d = A(K), T = N(e.shape.slice(0, -2), n.shape.slice(0, -2)).concat([h, l]);
-  U(
-    u === p,
-    () => `Error in matMul: inner shapes (${u}) and (${p}) of Tensors with shapes ${e.shape} and ${n.shape} and transposeA=${s} and transposeB=${t} must match.`
-  );
-  const v = s ? [m, u, h] : [m, h, u], x = t ? [d, l, p] : [d, p, l], S = f({ inputs: { x: e }, backend: a, attrs: { shape: v } }), b = f({ inputs: { x: n }, backend: a, attrs: { shape: x } }), D = [S, b], y = Math.max(m, d), L = i, B = O(e.dtype, n.dtype), F = new H(
-    v,
-    x,
-    [y, h, l],
-    s,
-    t,
-    !1,
-    L,
-    !!o,
-    !1
-  ), G = [S, b];
-  o && G.push(o);
-  const k = a.runWebGLProgram(F, G, B), I = f({ inputs: { x: k }, backend: a, attrs: { shape: T } });
-  D.push(k);
-  for (const P of D)
-    a.disposeIntermediateTensorInfo(P);
-  return I;
-}
-function z(e) {
-  const { inputs: n, backend: s } = e, { x: t, kernel: a } = n;
-  if (t === void 0 || a === void 0)
-    throw new Error("BatchMatMul requires two input tensors.");
-  return _({
-    a: t,
-    b: a,
-    transposeA: !1,
-    transposeB: !1,
-    backend: s,
-    activationSnippet: j
-  });
-}
-const W = {
-  kernelName: "MatMulGelu",
-  backendName: "webgl",
-  kernelFunc: z
-};
-E(W);
-function J(e) {
-  const { dy: n, x: s, kernel: t } = e.inputs, a = e.backend;
-  return R(() => {
-    const i = C().makeTensorFromTensorInfo(
-      _({
-        a: s,
-        b: t,
-        transposeA: !1,
-        transposeB: !1,
-        backend: a,
-        activationSnippet: q,
-        multiplier: n
-      })
-    ), o = $(i, t, !1, !0), r = $(s, i, !0, !1);
-    return [o, r];
-  });
-}
-const Q = {
-  kernelName: "MatMulGeluGrad",
-  backendName: "webgl",
-  kernelFunc: J
-};
-E(Q);
+import "../../index-DOvlwCh-.js";
+import "../../Reshape-Zt6eb7yh.js";
+import { a as m, b as o, c as p } from "../../matMulGelu-CzfgT6Wq.js";
+import "../../mat_mul-SjpJRLyL.js";
 export {
-  se as MATMUL_SHARED_DIM_THRESHOLD,
-  _ as batchMatMulGeluImpl,
-  z as batchMatMulKernel
+  m as MATMUL_SHARED_DIM_THRESHOLD,
+  o as batchMatMulGeluImpl,
+  p as batchMatMulKernel
 };

package/dist/ops/webgl/matMulMul.js CHANGED Viewed

@@ -1,21 +1,20 @@
-import "../../index-ZyQhjEPo.js";
-import { batchMatMulGeluImpl as u } from "./matMulGelu.js";
-import { r as c } from "../../tensor_util-DV-FP5Q3.js";
+import { p as u } from "../../index-DOvlwCh-.js";
+import { b as c } from "../../matMulGelu-CzfgT6Wq.js";
 const p = `
     return a * b;
 `;
 function M(r) {
-  const { inputs: n, backend: o, attrs: a } = r, { x: t, kernel: e, y: i } = n, { transposeA: l, transposeB: s } = a;
+  const { inputs: n, backend: a, attrs: o } = r, { x: t, kernel: e, y: l } = n, { transposeA: s, transposeB: i } = o;
   if (t === void 0 || e === void 0)
     throw new Error("BatchMatMul requires two input tensors.");
-  return u({
+  return c({
     a: t,
     b: e,
-    transposeA: l,
-    transposeB: s,
-    backend: o,
+    transposeA: s,
+    transposeB: i,
+    backend: a,
     activationSnippet: p,
-    multiplier: i
+    multiplier: l
   });
 }
 const m = {
@@ -23,7 +22,7 @@ const m = {
   backendName: "webgl",
   kernelFunc: M
 };
-c(m);
+u(m);
 export {
   M as batchMatMulKernel
 };

package/dist/ops/webgl/mulDropout.js CHANGED Viewed

@@ -1,5 +1,4 @@
-import "../../index-ZyQhjEPo.js";
-import { r as m } from "../../tensor_util-DV-FP5Q3.js";
+import { p as m } from "../../index-DOvlwCh-.js";
 class f {
   variableNames = ["a", "b"];
   outputShape;
@@ -8,8 +7,8 @@ class f {
     { name: "dropoutRate", type: "float" },
     { name: "seed", type: "float" }
   ];
-  constructor(r, t, o) {
-    this.outputShape = [r, t, o, o], this.userCode = `
+  constructor(t, r, o) {
+    this.outputShape = [t, r, o, o], this.userCode = `
         float random(ivec4 coords) {
             float x = float(coords.x * 4096 + coords.y * 256 + coords.z * 16 + coords.w);
             return fract(sin(seed + x) * 43758.5453123);
@@ -27,16 +26,16 @@ class f {
         `;
   }
 }
-function i(e) {
-  const { inputs: r, attrs: t } = e, { a: o, b: s } = r, { dropoutRate: a, seed: c } = t, n = e.backend, d = o.shape[0], u = o.shape[2], p = o.shape[1], l = new f(d, p, u);
+function b(e) {
+  const { inputs: t, attrs: r } = e, { a: o, b: s } = t, { dropoutRate: a, seed: c } = r, n = e.backend, d = o.shape[0], u = o.shape[2], p = o.shape[1], l = new f(d, p, u);
   return n.runWebGLProgram(l, [o, s], "float32", [
     [a ?? 0],
     [c ?? Math.random() * 1e4]
   ]);
 }
-const b = {
+const i = {
   kernelName: "MulDropout",
   backendName: "webgl",
-  kernelFunc: i
+  kernelFunc: b
 };
-m(b);
+m(i);

package/dist/ops/webgl/normRMS.js CHANGED Viewed

@@ -1,6 +1,5 @@
-import { e as G } from "../../index-ZyQhjEPo.js";
-import { r as g } from "../../tensor_util-DV-FP5Q3.js";
-import { s as x } from "../../sum-_fzj5ZTB.js";
+import { p as g, e as G } from "../../index-DOvlwCh-.js";
+import { s as x } from "../../sum-DWAtNGez.js";
 class y {
   variableNames = ["x", "meanSquare", "gamma"];
   outputShape;

package/dist/ops/webgl/qkv.js CHANGED Viewed

@@ -1,6 +1,5 @@
-import "../../index-ZyQhjEPo.js";
-import { r as i } from "../../tensor_util-DV-FP5Q3.js";
-class m {
+import { p as i } from "../../index-DOvlwCh-.js";
+class l {
   variableNames = ["x", "kernel"];
   outputShape;
   userCode;
@@ -31,8 +30,8 @@ class m {
         `;
   }
 }
-function l(r) {
-  const { x: e, kernel: t } = r.inputs, { heads: s } = r.attrs, o = r.backend, n = e.shape[0], c = e.shape[1], u = e.shape[2], a = new m(n, s, c, u);
+function m(r) {
+  const { x: e, kernel: t } = r.inputs, { heads: s } = r.attrs, o = r.backend, n = e.shape[0], c = e.shape[1], u = e.shape[2], a = new l(n, s, c, u);
   return [
     o.runWebGLProgram(a, [e, t], "float32", [[0]]),
     o.runWebGLProgram(a, [e, t], "float32", [[1]]),
@@ -42,6 +41,6 @@ function l(r) {
 const d = {
   kernelName: "QKV",
   backendName: "webgl",
-  kernelFunc: l
+  kernelFunc: m
 };
 i(d);

package/dist/ops/webgl/rope.js CHANGED Viewed

@@ -1,5 +1,4 @@
-import "../../index-ZyQhjEPo.js";
-import { r as h } from "../../tensor_util-DV-FP5Q3.js";
+import { p as h } from "../../index-DOvlwCh-.js";
 class g {
   variableNames = ["x", "sin", "cos"];
   outputShape;
@@ -45,13 +44,13 @@ class g {
         `;
   }
 }
-function m(o) {
-  const { x: t } = o.inputs, { pastLen: s, ropeCache: e, negSin: n } = o.attrs, a = n ? e.getNegSin() : e.getSin(), r = e.getCos(), i = o.backend, d = t.shape[0], c = t.shape[1], p = t.shape[2], u = t.shape[3], l = new g(d, c, p, u);
-  return i.runWebGLProgram(l, [t, a, r], "float32", [[s]]);
+function f(o) {
+  const { x: t } = o.inputs, { pastLen: s, ropeCache: e, negSin: n } = o.attrs, a = n ? e.getNegSin() : e.getSin(), r = e.getCos(), d = o.backend, i = t.shape[0], c = t.shape[1], p = t.shape[2], u = t.shape[3], l = new g(i, c, p, u);
+  return d.runWebGLProgram(l, [t, a, r], "float32", [[s]]);
 }
-const f = {
+const m = {
   kernelName: "Rope",
   backendName: "webgl",
-  kernelFunc: m
+  kernelFunc: f
 };
-h(f);
+h(m);

package/dist/ops/webgl/scatterSub.js CHANGED Viewed

@@ -1,11 +1,10 @@
-import "../../index-ZyQhjEPo.js";
-import { r as i } from "../../tensor_util-DV-FP5Q3.js";
+import { p as i } from "../../index-DOvlwCh-.js";
 class u {
   variableNames = ["labels", "softmaxProbs", "dy"];
   outputShape;
   userCode;
-  constructor(t, e) {
-    this.outputShape = [t, e], this.userCode = `
+  constructor(e, t) {
+    this.outputShape = [e, t], this.userCode = `
             void main() {
                 ivec2 coords = getOutputCoords();
                 int index = int(getLabels(coords.x));
@@ -17,8 +16,8 @@ class u {
   }
 }
 function d(o) {
-  const { logits: t, labels: e, dy: r } = o.inputs, s = o.backend, n = e.shape[0], a = t.shape[1], c = new u(n, a);
-  return s.runWebGLProgram(c, [e, t, r], "float32");
+  const { logits: e, labels: t, dy: r } = o.inputs, s = o.backend, n = t.shape[0], a = e.shape[1], c = new u(n, a);
+  return s.runWebGLProgram(c, [t, e, r], "float32");
 }
 const b = {
   kernelName: "EfficientScatterSub",

package/dist/ops/webgpu/adamAdjust.js CHANGED Viewed

@@ -1,8 +1,6 @@
-import { e as p } from "../../webgpu_program-Cigz-7RF.js";
-import { f as d, c as l } from "../../webgpu_util-BBCnKm2X.js";
-import "../../index-ZyQhjEPo.js";
-import { j as f } from "../../tensor-DdQUJZlz.js";
-import { r as c } from "../../tensor_util-DV-FP5Q3.js";
+import { e as p } from "../../webgpu_program-DuOXPQol.js";
+import { f as d, c as l } from "../../webgpu_util-RxEF33Rj.js";
+import { p as f, ab as c } from "../../index-DOvlwCh-.js";
 class h {
   variableNames = ["moments", "value"];
   outputShape;
@@ -38,19 +36,19 @@ class h {
   }
 }
 function v(t) {
-  const { moments: e, value: a } = t.inputs, { beta1: n, beta2: o, learningRate: s, epsilon: i } = t.attrs, r = t.backend;
-  f(e.shape, [...a.shape, 2], "Error in AdamAdjust: ");
-  const m = new h(a.shape), u = [
+  const { moments: e, value: a } = t.inputs, { beta1: n, beta2: s, learningRate: o, epsilon: i } = t.attrs, r = t.backend;
+  c(e.shape, [...a.shape, 2], "Error in AdamAdjust: ");
+  const u = new h(a.shape), m = [
     { type: "float32", data: [1 / n] },
-    { type: "float32", data: [1 / o] },
-    { type: "float32", data: [s] },
+    { type: "float32", data: [1 / s] },
+    { type: "float32", data: [o] },
     { type: "float32", data: [i] }
   ];
-  return r.runWebGPUProgram(m, [e, a], "float32", u);
+  return r.runWebGPUProgram(u, [e, a], "float32", m);
 }
 const b = {
   kernelName: "AdamAdjust",
   backendName: "webgpu",
   kernelFunc: v
 };
-c(b);
+f(b);

package/dist/ops/webgpu/adamMoments.js CHANGED Viewed

@@ -1,8 +1,6 @@
-import { e as p } from "../../webgpu_program-Cigz-7RF.js";
-import { f as u, c as d } from "../../webgpu_util-BBCnKm2X.js";
-import "../../index-ZyQhjEPo.js";
-import { j as f } from "../../tensor-DdQUJZlz.js";
-import { r as c } from "../../tensor_util-DV-FP5Q3.js";
+import { e as u } from "../../webgpu_program-DuOXPQol.js";
+import { f as p, c as d } from "../../webgpu_util-RxEF33Rj.js";
+import { p as c, ab as f } from "../../index-DOvlwCh-.js";
 class l {
   variableNames = ["moments", "gradient"];
   outputShape;
@@ -15,7 +13,7 @@ class l {
   outputComponent = 2;
   variableComponents = [2, 1];
   constructor(t) {
-    this.outputShape = t, this.dispatchLayout = u(this.outputShape.slice(0, -1)), this.dispatch = d(
+    this.outputShape = t, this.dispatchLayout = p(this.outputShape.slice(0, -1)), this.dispatch = d(
       this.dispatchLayout,
       this.outputShape.slice(0, -1),
       this.workgroupSize,
@@ -24,7 +22,7 @@ class l {
   }
   getUserCode() {
     return `
-        ${p("index")} {
+        ${u("index")} {
             if (index < uniforms.size) {
                 let m: vec2<f32> = moments[index];
@@ -41,7 +39,7 @@ class l {
   }
 }
 function h(e) {
-  const { moments: t, gradient: a } = e.inputs, { beta1: n, beta2: o, lossScaling: r } = e.attrs, s = e.backend;
+  const { moments: t, gradient: a } = e.inputs, { beta1: n, beta2: o, lossScaling: s } = e.attrs, r = e.backend;
   if (a.dtype !== "float32")
     throw new Error(`Gradient must be float32, but got ${a.dtype}`);
   if (f(t.shape, [...a.shape, 2], "Error in AdamMoments: "), n < 0 || n >= 1)
@@ -51,9 +49,9 @@ function h(e) {
   const i = new l(t.shape), m = [
     { type: "float32", data: [n] },
     { type: "float32", data: [o] },
-    { type: "float32", data: [1 / r] }
+    { type: "float32", data: [1 / s] }
   ];
-  return s.runWebGPUProgram(i, [t, a], "float32", m);
+  return r.runWebGPUProgram(i, [t, a], "float32", m);
 }
 const g = {
   kernelName: "AdamMoments",

package/dist/ops/webgpu/add16.js CHANGED Viewed

@@ -1,14 +1,13 @@
-import "../../index-ZyQhjEPo.js";
+import { p as t } from "../../index-DOvlwCh-.js";
 import { BinaryOpProgram as p } from "./utils/binary_op.js";
-import { B as m } from "../../binary_op_util-pKXltfxI.js";
-import { r as c } from "../../tensor_util-DV-FP5Q3.js";
-function i(r) {
-  const { a: e, b: n } = r.inputs, a = r.backend, t = new p(m.ADD, e.shape, n.shape), o = a.runWebGPUProgram(t, [e, n], "int32");
-  return o.packed = !0, o;
+import { B as s } from "../../binary_op_util-pKXltfxI.js";
+function c(e) {
+  const { a: r, b: n } = e.inputs, a = e.backend, o = new p(s.ADD, r.shape, n.shape);
+  return a.runWebGPUProgram(o, [r, n], "packedF16");
 }
-const s = {
+const m = {
   kernelName: "Add16",
   backendName: "webgpu",
-  kernelFunc: i
+  kernelFunc: c
 };
-c(s);
+t(m);

package/dist/ops/webgpu/appendCache.js CHANGED Viewed

@@ -1,10 +1,8 @@
-import { isPackedTensor as S } from "../../utilities/packed.js";
-import { e as d } from "../../webgpu_program-Cigz-7RF.js";
-import { f as u, c as m } from "../../webgpu_util-BBCnKm2X.js";
-import "../../index-ZyQhjEPo.js";
-import { j as g } from "../../tensor-DdQUJZlz.js";
-import { r as x } from "../../tensor_util-DV-FP5Q3.js";
-class b {
+import { isPackedTensor as T } from "../../utilities/packed.js";
+import { e as p } from "../../webgpu_program-DuOXPQol.js";
+import { f as d, c as u } from "../../webgpu_util-RxEF33Rj.js";
+import { p as S, ab as g } from "../../index-DOvlwCh-.js";
+class x {
   variableNames = ["cache", "item"];
   outputShape;
   shaderKey = "AppendCache";
@@ -13,14 +11,14 @@ class b {
   workgroupSize = [64, 1, 1];
   size = !0;
   uniforms = "cacheT: i32";
-  constructor(e, t, s, o, i) {
-    const a = Math.min(s + 1, i);
-    this.shaderKey = `AppendCache_${a}`, this.outputShape = [e, t, a, o], this.dispatchLayout = u(this.outputShape), this.dispatch = m(this.dispatchLayout, this.outputShape, this.workgroupSize);
+  constructor(e, t, a, s, i) {
+    const o = Math.min(a + 1, i);
+    this.shaderKey = `AppendCache_${o}`, this.outputShape = [e, t, o, s], this.dispatchLayout = d(this.outputShape), this.dispatch = u(this.dispatchLayout, this.outputShape, this.workgroupSize);
   }
   getUserCode() {
     const e = this.outputShape[2];
     return `
-        ${d("index")} {
+        ${p("index")} {
             if (index < uniforms.size) {
                 let coords = getCoordsFromIndex(index); // [b, h, t, d]
                 let b = coords[0];
@@ -48,7 +46,7 @@ class b {
         `;
   }
 }
-class C {
+class b {
   variableNames = ["cache", "item"];
   outputShape;
   shaderKey = "AppendCache";
@@ -57,14 +55,14 @@ class C {
   workgroupSize = [64, 1, 1];
   size = !0;
   uniforms = "cacheT: i32";
-  constructor(e, t, s, o, i) {
-    const a = Math.min(s + 1, i);
-    this.shaderKey = `AppendCache_${a}`, this.outputShape = [e, t, a, o], this.dispatchLayout = u(this.outputShape), this.dispatch = m(this.dispatchLayout, this.outputShape, this.workgroupSize);
+  constructor(e, t, a, s, i) {
+    const o = Math.min(a + 1, i);
+    this.shaderKey = `AppendCache_${o}`, this.outputShape = [e, t, o, s], this.dispatchLayout = d(this.outputShape), this.dispatch = u(this.dispatchLayout, this.outputShape, this.workgroupSize);
   }
   getUserCode() {
     const e = this.outputShape[2];
     return `
-        ${d("index")} {
+        ${p("index")} {
             if (index < uniforms.size) {
                 let coords = getCoordsFromIndex(index); // [b, h, t, d]
                 let b = coords[0];
@@ -92,16 +90,16 @@ class C {
         `;
   }
 }
-function v(r) {
-  const { cache: e, item: t } = r.inputs, { maxSize: s, pastLen: o } = r.attrs, i = r.backend, a = S(e), c = e.shape[0], n = e.shape[2], h = e.shape[1];
-  if (g(t.shape, [c, h, 1, t.shape[3]], "Error in AppendCache: "), o < 0 || o > s)
-    throw new Error(`Invalid pastLen value: ${o}. Must be in the range [0, ${s}].`);
-  const l = a ? new C(c, h, n, t.shape[3], s) : new b(c, h, n, t.shape[3], s), f = [{ type: "int32", data: [o] }], T = a ? "int32" : e.dtype, p = i.runWebGPUProgram(l, [e, t], T, f);
-  return p.packed = a, p;
+function C(r) {
+  const { cache: e, item: t } = r.inputs, { maxSize: a, pastLen: s } = r.attrs, i = r.backend, o = T(e), c = e.shape[0], n = e.shape[2], h = e.shape[1];
+  if (g(t.shape, [c, h, 1, t.shape[3]], "Error in AppendCache: "), s < 0 || s > a)
+    throw new Error(`Invalid pastLen value: ${s}. Must be in the range [0, ${a}].`);
+  const m = o ? new b(c, h, n, t.shape[3], a) : new x(c, h, n, t.shape[3], a), l = [{ type: "int32", data: [s] }], f = o ? "packedF16" : e.dtype;
+  return i.runWebGPUProgram(m, [e, t], f, l);
 }
-const z = {
+const v = {
   kernelName: "AppendCache",
   backendName: "webgpu",
-  kernelFunc: v
+  kernelFunc: C
 };
-x(z);
+S(v);

package/dist/ops/webgpu/attentionMask.js CHANGED Viewed

@@ -1,19 +1,17 @@
-import "../../index-ZyQhjEPo.js";
-import { j as d } from "../../tensor-DdQUJZlz.js";
+import { p as d, ab as b } from "../../index-DOvlwCh-.js";
 import { isPackedTensor as p } from "../../utilities/packed.js";
-import { b } from "../../matMul16--R5hOwDG.js";
-import l from "./attentionMask32_program.js";
-import { r as M } from "../../tensor_util-DV-FP5Q3.js";
+import { b as l } from "../../matMul16-BWRSOCWB.js";
+import M from "./attentionMask32_program.js";
 function w(n) {
   const { q: t, k: e } = n.inputs, { divisor: a, pastLen: o } = n.attrs, m = n.backend;
   if (p(t) && p(e))
-    return b(t, e, !1, !0, { causalMask: !0, pastLen: o, scale: a });
-  const r = t.shape[0], k = t.shape[2], s = e.shape[2], i = t.shape[1], c = t.shape[3];
-  if (d(e.shape, [r, i, s, c], "Error in AttentionMask: "), a === 0)
+    return l(t, e, !1, !0, { causalMask: !0, pastLen: o, scale: a });
+  const s = t.shape[0], k = t.shape[2], r = e.shape[2], i = t.shape[1], c = t.shape[3];
+  if (b(e.shape, [s, i, r, c], "Error in AttentionMask: "), a === 0)
     throw new Error("Divisor must be non-zero in AttentionMask");
   if (o < 0)
     throw new Error("pastLen must be non-negative in AttentionMask");
-  const u = new l(r, i, k, s, c), f = [
+  const u = new M(s, i, k, r, c), f = [
     { type: "float32", data: [a] },
     { type: "int32", data: [o] },
     { type: "float32", data: [Number.NEGATIVE_INFINITY] }
@@ -25,4 +23,4 @@ const A = {
   backendName: "webgpu",
   kernelFunc: w
 };
-M(A);
+d(A);

package/dist/ops/webgpu/attentionMask32_program.js CHANGED Viewed

@@ -1,5 +1,5 @@
-import { e as r } from "../../webgpu_program-Cigz-7RF.js";
-import { f as a, c as u } from "../../webgpu_util-BBCnKm2X.js";
+import { e as r } from "../../webgpu_program-DuOXPQol.js";
+import { f as a, c as u } from "../../webgpu_util-RxEF33Rj.js";
 class p {
   variableNames = ["q", "k"];
   outputShape;

package/dist/ops/webgpu/concat16.js CHANGED Viewed

@@ -1,10 +1,8 @@
-import "../../index-ZyQhjEPo.js";
-import { e as x } from "../../webgpu_program-Cigz-7RF.js";
-import { f as I, c as D } from "../../webgpu_util-BBCnKm2X.js";
-import { r as y } from "../../Reshape-CF6odzV4.js";
-import { r as $ } from "../../tensor_util-DV-FP5Q3.js";
-import { p as F, s as c } from "../../tensor-DdQUJZlz.js";
-import { a as L, c as d } from "../../concat_util-DcJk7YHS.js";
+import { p as x, af as I, h as c } from "../../index-DOvlwCh-.js";
+import { e as D } from "../../webgpu_program-DuOXPQol.js";
+import { f as $, c as F } from "../../webgpu_util-RxEF33Rj.js";
+import { r as g } from "../../Reshape-BYkmUnAv.js";
+import { a as L, c as d } from "../../concat_util-DpW8mL_l.js";
 class T {
   outputShape;
   shaderKey;
@@ -21,7 +19,7 @@ class T {
       t,
       1
       /* axis */
-    ), this.variableNames = t.map((e, a) => `T${a}`), this.dispatchLayout = I(this.outputShape), this.dispatch = D(this.dispatchLayout, this.outputShape, this.workgroupSize, [
+    ), this.variableNames = t.map((e, a) => `T${a}`), this.dispatchLayout = $(this.outputShape), this.dispatch = F(this.dispatchLayout, this.outputShape, this.workgroupSize, [
       this.workPerThread,
       1,
       1
@@ -49,7 +47,7 @@ class T {
         "result[getIndexFromCoords2D(coords, uniforms.outShape)] = T0[getIndexFromCoords2D(vec2<i32>(yR, yC), uniforms.t0Shape)];"
       );
     return `
-      ${x("index")} {
+      ${D("index")} {
         for(var i = 0; i < ${this.workPerThread}; i = i + 1) {
           let flatIndex = index * ${this.workPerThread} + i;
           if(flatIndex < uniforms.size) {
@@ -86,8 +84,8 @@ function m(n, t, e) {
   }
   const l = e.runWebGPUProgram(u, i, i[0].dtype, f);
   i.forEach((o) => e.disposeData(o.dataId));
-  const g = y({ inputs: { x: l }, backend: e, attrs: { shape: s } });
-  return e.disposeData(l.dataId), g.packed = !0, g;
+  const y = g({ inputs: { x: l }, backend: e, attrs: { shape: s } });
+  return e.disposeData(l.dataId), y;
 }
 function P(n, t, e) {
   const a = d(
@@ -95,7 +93,7 @@ function P(n, t, e) {
     t
   );
   return { tensors2D: n.map(
-    (s) => y({
+    (s) => g({
       inputs: { x: s },
       backend: e,
       attrs: {
@@ -105,7 +103,7 @@ function P(n, t, e) {
   ), outShape: a };
 }
 function w(n) {
-  const { inputs: t, backend: e, attrs: a } = n, { axis: i } = a, s = F(i, t[0].shape)[0], h = t.map((r) => r.shape);
+  const { inputs: t, backend: e, attrs: a } = n, { axis: i } = a, s = I(i, t[0].shape)[0], h = t.map((r) => r.shape);
   L(h, s);
   const u = d(
     t.map((r) => r.shape),
@@ -121,7 +119,7 @@ const v = {
   backendName: "webgpu",
   kernelFunc: w
 };
-$(v);
+x(v);
 export {
   T as ConcatProgram,
   v as concatConfig

package/dist/ops/webgpu/gatherSub.js CHANGED Viewed

@@ -1,8 +1,6 @@
-import { e as u } from "../../webgpu_program-Cigz-7RF.js";
-import { f as p, c as h } from "../../webgpu_util-BBCnKm2X.js";
-import "../../index-ZyQhjEPo.js";
-import { j as s } from "../../tensor-DdQUJZlz.js";
-import { r as c } from "../../tensor_util-DV-FP5Q3.js";
+import { e as u } from "../../webgpu_program-DuOXPQol.js";
+import { f as h, c as p } from "../../webgpu_util-RxEF33Rj.js";
+import { p as c, ab as r } from "../../index-DOvlwCh-.js";
 class l {
   variableNames = ["labels", "logits", "values"];
   outputShape;
@@ -11,8 +9,8 @@ class l {
   dispatch;
   workgroupSize = [64, 1, 1];
   size = !0;
-  constructor(e) {
-    this.outputShape = [e], this.dispatchLayout = p(this.outputShape), this.dispatch = h(this.dispatchLayout, this.outputShape, this.workgroupSize);
+  constructor(t) {
+    this.outputShape = [t], this.dispatchLayout = h(this.outputShape), this.dispatch = p(this.dispatchLayout, this.outputShape, this.workgroupSize);
   }
   getUserCode() {
     return `
@@ -27,15 +25,15 @@ class l {
     `;
   }
 }
-function d(t) {
-  const { logits: e, labels: a, values: r } = t.inputs, o = t.backend, i = a.shape[0];
-  s(r.shape, [i], "Error in EfficientGatherSub: "), s(a.shape, [i], "Error in EfficientGatherSub: ");
+function d(e) {
+  const { logits: t, labels: a, values: s } = e.inputs, o = e.backend, i = a.shape[0];
+  r(s.shape, [i], "Error in EfficientGatherSub: "), r(a.shape, [i], "Error in EfficientGatherSub: ");
   const n = new l(i);
-  return o.runWebGPUProgram(n, [a, e, r], "float32");
+  return o.runWebGPUProgram(n, [a, t, s], "float32");
 }
-const f = {
+const b = {
   kernelName: "EfficientGatherSub",
   backendName: "webgpu",
   kernelFunc: d
 };
-c(f);
+c(b);