npm - @genai-fi/nanogpt - Versions diffs - 0.10.2 → 0.10.3 - Mend

@genai-fi/nanogpt 0.10.2 → 0.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (249) hide show

package/dist/Generator.js +11761 -171
package/dist/{RealDiv-zz7FpkKX.js → RealDiv-KAPDe8zB.js} +23 -25
package/dist/Reshape-BYkmUnAv.js +14 -0
package/dist/{Reshape-CHdUjC72.js → Reshape-Zt6eb7yh.js} +18 -20
package/dist/TeachableLLM.js +10 -11
package/dist/{axis_util-BsIr9ZNu.js → axis_util-BaG7mf5A.js} +3 -3
package/dist/backend.js +2 -2
package/dist/{backend_util-B1XRLuq9.js → backend_util-RCe-rHaj.js} +72 -73
package/dist/{backend_webgpu-CqpfEImu.js → backend_webgpu-DE3ACOLx.js} +45 -47
package/dist/broadcast_to-B3eYlZm7.js +28 -0
package/dist/checks/appendCache.js +2 -2
package/dist/checks/attentionMask.js +3 -3
package/dist/checks/gelu.js +2 -2
package/dist/checks/matMulGelu.js +7 -11
package/dist/checks/normRMS.js +9 -9
package/dist/checks/normRMSGrad.js +3 -3
package/dist/checks/packUnpack.js +2 -2
package/dist/checks/qkv.js +12 -13
package/dist/checks/rope.js +2 -2
package/dist/clip_by_value-BnO7-a88.js +12 -0
package/dist/complex-DjxcVmoX.js +11 -0
package/dist/concat-BV8bt5H-.js +17 -0
package/dist/{concat_util-iBYIyuQe.js → concat_util-DpW8mL_l.js} +1 -1
package/dist/{dataset-D2P7rHAw.js → dataset-BcwmTGYc.js} +137 -139
package/dist/dropout-BcvN9JYi.js +92 -0
package/dist/expand_dims-DT4tEPwA.js +11 -0
package/dist/{exports_initializers-CZSUJoVE.js → exports_initializers-Hta_rEnm.js} +1 -1
package/dist/floor-D5QdR_le.js +9 -0
package/dist/gather-D3JcZUaI.js +9 -0
package/dist/{gelu-Bmhopi0J.js → gelu-CjNPL4OH.js} +10 -11
package/dist/{gpgpu_math-DsCcikas.js → gpgpu_math-DAOmgtXR.js} +841 -1015
package/dist/{index-DRyE072i.js → index-BwexR4lA.js} +262 -263
package/dist/index-DOvlwCh-.js +3520 -0
package/dist/{kernel_funcs_utils-CWfOAPGO.js → kernel_funcs_utils-CCzYdUZg.js} +130 -132
package/dist/layers/BaseLayer.js +15 -16
package/dist/layers/CausalSelfAttention.js +6 -6
package/dist/layers/MLP.js +4 -4
package/dist/layers/PositionEmbedding.js +7 -7
package/dist/layers/RMSNorm.js +3 -3
package/dist/layers/RoPECache.js +9 -9
package/dist/layers/TiedEmbedding.js +6 -6
package/dist/layers/TransformerBlock.js +1 -1
package/dist/loader/loadTransformers.js +1 -1
package/dist/loader/oldZipLoad.js +13 -14
package/dist/log_sum_exp-ngO0-4pK.js +39 -0
package/dist/main.js +49 -50
package/dist/{matMul16-fEAJ4smh.js → matMul16-BWRSOCWB.js} +14 -15
package/dist/matMulGelu-CzfgT6Wq.js +163 -0
package/dist/mat_mul-SjpJRLyL.js +11 -0
package/dist/mod-AnXEvvpo.js +11 -0
package/dist/models/NanoGPTV1.js +2 -2
package/dist/models/model.js +13 -14
package/dist/ones-D2rT0xk2.js +14 -0
package/dist/ops/adamAdjust.js +1 -1
package/dist/ops/adamMoments.js +1 -1
package/dist/ops/add16.js +1 -1
package/dist/ops/appendCache.js +3 -3
package/dist/ops/attentionMask.js +1 -1
package/dist/ops/concat16.js +2 -2
package/dist/ops/cpu/adamAdjust.js +13 -14
package/dist/ops/cpu/adamMoments.js +6 -7
package/dist/ops/cpu/appendCache.js +7 -8
package/dist/ops/cpu/attentionMask.js +7 -7
package/dist/ops/cpu/fusedSoftmax.js +10 -11
package/dist/ops/cpu/gatherSub.js +9 -10
package/dist/ops/cpu/gelu.js +9 -10
package/dist/ops/cpu/matMul16.js +6 -7
package/dist/ops/cpu/matMulGelu.js +5 -6
package/dist/ops/cpu/matMulMul.js +3 -4
package/dist/ops/cpu/mulDropout.js +3 -4
package/dist/ops/cpu/normRMS.js +10 -11
package/dist/ops/cpu/qkv.js +8 -9
package/dist/ops/cpu/rope.js +5 -6
package/dist/ops/cpu/scatterSub.js +17 -19
package/dist/ops/dot16.js +2 -2
package/dist/ops/gatherSub.js +1 -1
package/dist/ops/gelu.js +2 -2
package/dist/ops/grads/add16.js +11 -12
package/dist/ops/grads/attentionMask.js +5 -6
package/dist/ops/grads/gelu.js +3 -4
package/dist/ops/grads/matMul16.js +4 -5
package/dist/ops/grads/matMulGelu.js +9 -10
package/dist/ops/grads/normRMS.js +7 -8
package/dist/ops/grads/pack16.js +4 -5
package/dist/ops/grads/qkv.js +17 -19
package/dist/ops/grads/rope.js +3 -5
package/dist/ops/grads/softmax16.js +3 -4
package/dist/ops/grads/unpack16.js +3 -4
package/dist/ops/grads/utils.d.ts +1 -0
package/dist/ops/grads/utils.js +8 -4
package/dist/ops/matMul16.js +3 -3
package/dist/ops/matMulGelu.js +2 -2
package/dist/ops/matMulMul.js +1 -1
package/dist/ops/mul16.js +1 -1
package/dist/ops/mulDrop.js +1 -1
package/dist/ops/normRMS.js +1 -1
package/dist/ops/pack16.js +3 -4
package/dist/ops/qkv.js +4 -8
package/dist/ops/reshape16.js +14 -16
package/dist/ops/rope.d.ts +1 -1
package/dist/ops/rope.js +3 -8
package/dist/ops/scatterSub.js +1 -1
package/dist/ops/slice16.js +2 -2
package/dist/ops/softmax16.js +5 -8
package/dist/ops/sub16.js +1 -1
package/dist/ops/sum16.js +2 -2
package/dist/ops/transpose16.js +23 -24
package/dist/ops/unpack16.js +2 -2
package/dist/ops/webgl/adamAdjust.js +2 -3
package/dist/ops/webgl/adamMoments.js +1 -2
package/dist/ops/webgl/appendCache.js +1 -2
package/dist/ops/webgl/attentionMask.js +4 -5
package/dist/ops/webgl/fusedSoftmax.js +4 -6
package/dist/ops/webgl/gatherSub.js +6 -7
package/dist/ops/webgl/gelu.js +2 -3
package/dist/ops/webgl/log.js +11 -12
package/dist/ops/webgl/matMul16.js +10 -11
package/dist/ops/webgl/matMulGelu.js +7 -111
package/dist/ops/webgl/matMulMul.js +9 -10
package/dist/ops/webgl/mulDropout.js +8 -9
package/dist/ops/webgl/normRMS.js +2 -3
package/dist/ops/webgl/qkv.js +5 -6
package/dist/ops/webgl/rope.js +7 -8
package/dist/ops/webgl/scatterSub.js +5 -6
package/dist/ops/webgpu/adamAdjust.js +10 -12
package/dist/ops/webgpu/adamMoments.js +8 -10
package/dist/ops/webgpu/add16.js +8 -9
package/dist/ops/webgpu/appendCache.js +23 -25
package/dist/ops/webgpu/attentionMask.js +8 -10
package/dist/ops/webgpu/attentionMask32_program.js +2 -2
package/dist/ops/webgpu/concat16.js +12 -14
package/dist/ops/webgpu/gatherSub.js +11 -13
package/dist/ops/webgpu/gelu.js +28 -29
package/dist/ops/webgpu/matMul16.js +26 -28
package/dist/ops/webgpu/matMul16_program.js +4 -5
package/dist/ops/webgpu/mul16.js +9 -10
package/dist/ops/webgpu/normRMS.js +15 -17
package/dist/ops/webgpu/normRMSGrad.js +21 -28
package/dist/ops/webgpu/pack16.js +12 -13
package/dist/ops/webgpu/pack16_program.js +2 -2
package/dist/ops/webgpu/qkv.js +16 -18
package/dist/ops/webgpu/rope.js +25 -27
package/dist/ops/webgpu/scatterSub.js +7 -9
package/dist/ops/webgpu/slice16.js +21 -23
package/dist/ops/webgpu/softmax16.js +17 -19
package/dist/ops/webgpu/softmax16_program.js +2 -2
package/dist/ops/webgpu/softmax16_subgroup_program.js +2 -2
package/dist/ops/webgpu/softmax16grad.js +7 -8
package/dist/ops/webgpu/sub16.js +7 -8
package/dist/ops/webgpu/sum16.js +18 -20
package/dist/ops/webgpu/transpose16.js +19 -20
package/dist/ops/webgpu/transpose16_program.js +2 -2
package/dist/ops/webgpu/transpose16_shared_program.js +11 -12
package/dist/ops/webgpu/unpack16.js +3 -4
package/dist/ops/webgpu/utils/binary_op.js +7 -8
package/dist/ops/webgpu/utils/reductions.js +14 -22
package/dist/ops-B5yanEdW.js +476 -0
package/dist/pack16-nQ6JaLo-.js +39 -0
package/dist/patches/webgpu_backend.js +19 -20
package/dist/patches/webgpu_base.js +1 -1
package/dist/patches/webgpu_program.js +21 -22
package/dist/{random_width-BVV9HveY.js → random_width-or-CEftb.js} +2506 -2761
package/dist/range-BklejeeW.js +10 -0
package/dist/relu-CP0ZcxWO.js +9 -0
package/dist/reshape-ByE68wS9.js +9 -0
package/dist/resize_nearest_neighbor-B19mCEg2.js +175 -0
package/dist/rope-Ir4mTyD1.js +24 -0
package/dist/{scatter_nd_util-C7zXRT_h.js → scatter_nd_util-lvSiX8q4.js} +1 -1
package/dist/selu_util-kbhpTdYD.js +44 -0
package/dist/{shared-CHhxz-O5.js → shared-DT1TkE6w.js} +1 -1
package/dist/{shared-D2NP_CpY.js → shared-dntlHIDQ.js} +343 -345
package/dist/slice-BfEGSH82.js +12 -0
package/dist/{slice_util-DyjSAD0u.js → slice_util-uTKwiEpW.js} +1 -1
package/dist/{softmax-C9JQEtnO.js → softmax-CA5jFsLR.js} +4 -5
package/dist/split-CVLc0w--.js +9 -0
package/dist/squeeze-C7Z2srUo.js +10 -0
package/dist/stack-Cf4n9h0N.js +11 -0
package/dist/step-CINUs5QB.js +261 -0
package/dist/sum-DWAtNGez.js +11 -0
package/dist/tensor-DJoc7gJU.js +8 -0
package/dist/tensor1d-D11P_7Dp.js +11 -0
package/dist/{tensor2d-CSB4KOb0.js → tensor2d-Bs9wZRc7.js} +6 -7
package/dist/{tensor4d-D7bLqGqz.js → tensor4d-BARPdTaS.js} +6 -7
package/dist/{tfjs_backend-CNkSTL0c.js → tfjs_backend-y1cvNhLA.js} +255 -264
package/dist/tile-mbfagpsB.js +11 -0
package/dist/training/Adam.js +2 -2
package/dist/training/AdamExt.js +1 -1
package/dist/training/DatasetBuilder.js +2 -2
package/dist/training/FullTrainer.js +1 -1
package/dist/training/Trainer.js +2 -2
package/dist/training/sparseCrossEntropy.js +5 -5
package/dist/transpose-ClWiBS_b.js +36 -0
package/dist/unsorted_segment_sum-BDDhB_E6.js +277 -0
package/dist/utilities/dummy.js +3 -3
package/dist/utilities/multinomialCPU.js +2 -2
package/dist/utilities/packed.d.ts +1 -4
package/dist/utilities/packed.js +10 -745
package/dist/utilities/performance.js +1 -1
package/dist/utilities/profile.js +1 -1
package/dist/utilities/safetensors.js +2 -2
package/dist/utilities/sentences.js +5 -5
package/dist/utilities/weights.js +2 -2
package/dist/{variable-DzfrwYuP.js → variable-WawDEaAb.js} +1 -1
package/dist/{webgpu_program-DzaQiqel.js → webgpu_program-DuOXPQol.js} +178 -172
package/dist/{webgpu_util-0_ubCEHJ.js → webgpu_util-RxEF33Rj.js} +34 -35
package/dist/zeros-KnWaWf-X.js +13 -0
package/dist/zeros_like-DvE73F4e.js +721 -0
package/package.json +4 -2
package/dist/Reshape-CDVLyVfz.js +0 -16
package/dist/broadcast_to-B0ChcDaz.js +0 -30
package/dist/complex-BBiRlsVq.js +0 -13
package/dist/concat-DmBLPVGC.js +0 -19
package/dist/dropout-B1x1kYMa.js +0 -99
package/dist/expand_dims-ouvfxQ1n.js +0 -13
package/dist/gather-CH9sdacz.js +0 -10
package/dist/index-D6Q1lPZO.js +0 -2157
package/dist/log_sum_exp-D3ftBNY5.js +0 -41
package/dist/mat_mul-C59XWcJd.js +0 -12
package/dist/mod-DESSvHIU.js +0 -12
package/dist/mulmat_packed_gpu-Coh6qbJk.js +0 -55
package/dist/ones-jU9jlQvM.js +0 -15
package/dist/ops-BFDtP6th.js +0 -645
package/dist/pack16-CmVZs6af.js +0 -41
package/dist/patches/PackedTensor.d.ts +0 -12
package/dist/patches/PackedTensor.js +0 -11
package/dist/patches/engine.d.ts +0 -261
package/dist/patches/engine.js +0 -12
package/dist/patches/tape.d.ts +0 -12
package/dist/patches/tape.js +0 -5
package/dist/range-ZZZD60Fx.js +0 -11
package/dist/reciprocal-CrYlsAGD.js +0 -10
package/dist/register_all_kernels-nvj2k7OC.js +0 -12307
package/dist/relu-BYDneVPn.js +0 -10
package/dist/reshape-CaPQzFvz.js +0 -10
package/dist/rope-s4W2XO9B.js +0 -32
package/dist/selu_util-BGPXmd4B.js +0 -303
package/dist/sin-Djs4aQiu.js +0 -16
package/dist/slice-DvovR5wq.js +0 -13
package/dist/split-DBck65sX.js +0 -10
package/dist/squeeze-C00Ipm_7.js +0 -11
package/dist/stack-ChnHwRpX.js +0 -13
package/dist/sum-ywRJj3Zr.js +0 -12
package/dist/tensor-0r5yOo2R.js +0 -8
package/dist/tensor-CzmOBsdf.js +0 -909
package/dist/tensor1d-BlUT89BP.js +0 -12
package/dist/tensor_util-DfwaWayG.js +0 -523
package/dist/tile-CR074jmp.js +0 -13
package/dist/transpose-DH4gmHvu.js +0 -38
package/dist/zeros-DBFVbpv5.js +0 -14

package/dist/{index-DRyE072i.js → index-BwexR4lA.js} RENAMED Viewed

@@ -1,27 +1,26 @@
-import { W as Mt } from "./backend_webgpu-CqpfEImu.js";
-import { f as Et, j, J as ke } from "./index-D6Q1lPZO.js";
-import { i as Ut, a as Ht, c as b, f as v, M as Y, b as at, d as rt, e as nt } from "./webgpu_util-0_ubCEHJ.js";
-import { e as X, a as L, N as Gt, Z as Xt, s as D, l as Ye, b as De, p as te, Y as Kt, g as qt, i as ut, j as Yt, z as dt, f as jt } from "./tensor-CzmOBsdf.js";
+import { W as Mt } from "./backend_webgpu-DE3ACOLx.js";
+import { f as Et, j as X, l as L, de as Ut, df as Ht, bZ as Gt, h as D, a3 as j, aX as Xt, ag as Ye, aQ as Kt, ac as qt, ak as fe, bR as Yt, c9 as jt, ca as Qt, bX as Zt, cQ as Jt, as as es, n as De, af as te, aS as ts, bo as ss, bp as os, bq as is, cb as as, cc as rs, cd as ns, ce as us, cf as ds, cg as ls, am as cs, b7 as hs, br as ps, cA as fs, cR as ms, cS as gs, M as xs, S as Cs, bt as ws, bf as ys, dg as Ss, b9 as bs, ar as vs, bU as ks, bV as Is, i as Rs, b_ as Ps, F as $s, cU as Ds, ap as Ns, H as zs, bv as As, cF as Fs, bw as Ws, cB as Ls, cV as Vs, cC as Bs, bx as Ts, by as _s, bh as Os, bz as Ms, bA as Es, cD as Us, ch as Hs, bB as Gs, cH as Xs, cI as Ks, dh as qs, ci as Ys, cW as js, cX as Qs, di as Zs, c2 as Js, Y as eo, bg as to, aI as so, cY as oo, bC as io, bD as ao, an as ro, I as no, b$ as uo, cr as lo, bi as co, J as ho, c0 as po, dj as fo, ad as at, bu as mo, cG as go, dk as xo, aj as Co, K as wo, at as ke, b1 as yo, b2 as So, cs as bo, cj as vo, ck as ko, cl as Io, aJ as Ro, b3 as Po, b4 as $o, dl as Do, ao as No, b5 as zo, b6 as Ao, bF as Fo, cn as Wo, cm as Lo, c_ as Vo, c1 as Bo, bG as To, cE as _o, c$ as Oo, d0 as Mo, dm as Eo, a$ as Uo, b8 as Ho, co as Go, N as Xo, Q as Ko, dn as qo, aq as Yo, bk as jo, bl as Qo, bH as Zo, d5 as Jo, bI as ei, W as ti, ab as si, bJ as oi, d1 as ii, aK as ai, c3 as ri, a2 as ni, aT as ui, cp as di, P as li, aL as ci, bd as hi, d2 as pi, be as fi, d3 as mi, bL as gi, bj as xi, ba as Ci, bM as wi, ai as yi, dp as Si, a_ as bi, bN as vi, aH as ki, cq as Ii, bO as Ri, bP as Pi, bE as $i, bK as Di, dq as Ni, dr as zi, T as Ai, ax as rt, ds as Fi, Z as Wi, U as Li, c5 as Vi, d4 as Bi, bb as Ti, aM as _i, ct as Oi, dt as Mi, c7 as Ei, cu as Ui, bs as Hi, du as Gi, cv as Xi, bn as Ki, bc as qi, bQ as Yi, p as ji } from "./index-DOvlwCh-.js";
+import { i as Qi, a as Zi, c as b, f as v, M as Y, b as nt, d as ut, e as dt } from "./webgpu_util-RxEF33Rj.js";
 import { g as _e, B as F } from "./binary_op_util-pKXltfxI.js";
-import { S as Qt, a as Zt, h as Ce, i as Ne, j as we, d as Q, e as Oe, g as Me, k as lt } from "./selu_util-BGPXmd4B.js";
-import { E as Jt, t as es, u as ts, w as ss, x as os, y as is, f as je, z as ct, A as ht, B as pt, C as as, D as rs, F as ns, G as us, H as ds, I as ls, J as cs, K as hs, L as ps, M as fs, N as ms, O as gs } from "./backend_util-B1XRLuq9.js";
-import { t as W, e as S, h as Z, b as G, c as Ie, P as ft, d as xs, a as Cs } from "./webgpu_program-DzaQiqel.js";
-import { aa as ws, a2 as ys, I as Ss, h as bs, u as fe, a9 as vs, bi as ks, bj as Is, A as Rs, bk as Ps, H as $s, _ as Ds, aM as Ns, aN as zs, aO as As, bl as Fs, bm as Ws, bn as Ls, bp as Vs, bo as Bs, bq as Ts, y as _s, aq as Os, aP as Ms, aQ as Es, br as Us, bs as Hs, B as Gs, f as Xs, aS as Ks, ag as qs, bN as Ys, as as js, F as Qs, x as Zs, aG as Js, a1 as eo, a8 as to, D as so, C as oo, aU as io, be as ao, aV as ro, aW as no, bu as uo, aX as lo, l as co, aY as ho, aw as po, aZ as fo, a_ as mo, a$ as go, bO as xo, b0 as Co, bg as wo, bh as yo, bP as So, bv as bo, bw as vo, bx as ko, bQ as Io, a7 as Ro, g as Po, ai as $o, V as Do, by as No, aH as zo, b1 as Ao, z as Fo, E as Wo, aI as Lo, bR as Vo, ax as Bo, Q as To, a6 as _o, bS as Oo, aT as Mo, bf as Eo, bT as Uo, k as Ho, G as Go, ak as Xo, al as Ko, bU as qo, bz as Yo, bA as jo, bB as Qo, W as Zo, am as Jo, an as ei, bV as ti, L as si, ao as oi, ap as ii, b3 as ai, bW as ri, bE as ni, bD as ui, af as di, b4 as li, b5 as ci, bF as hi, bG as pi, bX as fi, aj as mi, ar as gi, bH as xi, M as Ci, S as wi, i as yi, N as Si, az as bi, aA as vi, b6 as ki, ab as Ii, b7 as Ri, P as Pi, b8 as $i, ac as Di, X as Ni, aJ as zi, R as Ai, $ as Fi, d as Wi, e as Li, Y as Vi, aC as Bi, bI as Ti, aD as _i, bJ as Oi, ba as Mi, ay as Ei, at as Ui, aK as Hi, j as Gi, bY as Xi, ah as Ki, bb as qi, U as Yi, bK as ji, n as Qi, bc as Zi, b2 as Ji, b9 as ea, bZ as ta, b_ as sa, T as oa, b$ as ia, c as aa, ad as ra, bL as na, au as ua, Z as da, c0 as la, c1 as ca, ae as ha, bM as pa, aR as fa, c2 as ma, c3 as ga, aE as xa, av as Ca, bd as wa, r as ya } from "./tensor_util-DfwaWayG.js";
-import { r as R, a as Sa } from "./Reshape-CDVLyVfz.js";
-import { s as ba } from "./shared-D2NP_CpY.js";
-import { c as Ee, a as ye, b as Se, d as Ue, e as va, g as mt } from "./axis_util-BsIr9ZNu.js";
-import { p as ka, a as Ia, b as Ra, d as Pa } from "./slice_util-DyjSAD0u.js";
-import { z as $a } from "./zeros-DBFVbpv5.js";
-import { c as me, a as Da } from "./concat_util-iBYIyuQe.js";
+import { S as Ji, a as ea } from "./selu_util-kbhpTdYD.js";
+import { E as ta, t as sa, u as oa, w as ia, x as aa, y as ra, f as je, z as lt, A as ct, B as ht, C as na, D as ua, F as da, G as la, H as ca, I as ha, J as pa, K as fa, L as ma, M as ga, N as xa, O as Ca } from "./backend_util-RCe-rHaj.js";
+import { t as W, e as S, h as Q, b as G, c as Ie, P as pt, d as wa, a as ya } from "./webgpu_program-DuOXPQol.js";
+import { r as R, a as Sa } from "./Reshape-BYkmUnAv.js";
+import { s as ba } from "./shared-dntlHIDQ.js";
+import { c as Oe, a as Ce, b as we, d as Me, e as va, g as ft } from "./axis_util-BaG7mf5A.js";
+import { h as ye, i as Ne, j as Se, b as Z, d as Ee, g as Ue, k as mt } from "./step-CINUs5QB.js";
+import { p as ka, a as Ia, b as Ra, d as Pa } from "./slice_util-uTKwiEpW.js";
+import { z as $a } from "./zeros-KnWaWf-X.js";
+import { c as me, a as Da } from "./concat_util-DpW8mL_l.js";
 import { n as Na, a as za } from "./non_max_suppression_impl-B2W7YjZB.js";
-import { c as He } from "./scatter_nd_util-C7zXRT_h.js";
-Ut() && Et(
+import { c as He } from "./scatter_nd_util-lvSiX8q4.js";
+Qi() && Et(
   "webgpu",
   async () => {
     const o = {
       powerPreference: X().get("WEBGPU_USE_LOW_POWER_GPU") ? "low-power" : "high-performance"
     }, t = await navigator.gpu.requestAdapter(o), e = {}, i = [];
-    t.features.has("timestamp-query") && i.push("timestamp-query"), t.features.has("bgra8unorm-storage") && i.push(["bgra8unorm-storage"]), e.requiredFeatures = i;
+    t.features.has("timestamp-query") && i.push("timestamp-query"), t.features.has("bgra8unorm-storage") && i.push(["bgra8unorm-storage"]), t.features.has("subgroups") && i.push("subgroups"), e.requiredFeatures = i;
     const s = t.limits;
     e.requiredLimits = {
       maxComputeWorkgroupStorageSize: s.maxComputeWorkgroupStorageSize,
@@ -94,12 +93,12 @@ const Aa = "return abs(a);", Fa = `
   // Error function is calculated approximately with elementary function.
   // See "Handbook of Mathematical Functions with Formulas,
   // Graphs, and Mathematical Tables", Abramowitz and Stegun.
-  let p = ${Jt};
-  let a1 = ${es};
-  let a2 = ${ts};
-  let a3 = ${ss};
-  let a4 = ${os};
-  let a5 = ${is};
+  let p = ${ta};
+  let a1 = ${sa};
+  let a2 = ${oa};
+  let a3 = ${ia};
+  let a4 = ${aa};
+  let a5 = ${ra};
   let sign = sign(a);
   let absA = abs(a);
@@ -116,9 +115,9 @@ const Aa = "return abs(a);", Fa = `
   return select(a, vec4<f32>(0.0), a < vec4<f32>(0.0));
 `, dr = "return round(a);", lr = "return inverseSqrt(a);", cr = `
   if (a >= 0.0) {
-    return ${Qt} * a;
+    return ${Ji} * a;
   } else {
-    return ${Zt} * (exp(a) - 1.0);
+    return ${ea} * (exp(a) - 1.0);
   }
 `, hr = "return 1.0 / (1.0 + exp(-1.0 * a));", pr = "return sign(a);", fr = "return sin(a);", mr = `
   let e2x = exp(a);
@@ -604,7 +603,7 @@ class $r {
     if (this.isVec4 = (d % 4 === 0 && !i || e[1] % 4 === 0 && i) && e[2] % 4 === 0 && !s, this.outputComponent = this.isVec4 ? 4 : 1, this.isVectorA = e[1] === 1 && !i, !this.isVec4 && this.isVectorA)
       this.elementsPerThread = [1, 1, 1], this.workgroupSize = [32, 1, 1];
     else {
-      const c = Ht(e[1], d, e[2], i);
+      const c = Zi(e[1], d, e[2], i);
       this.workgroupSize = c.workgroupSize, this.elementsPerThread = c.elementsPerThread;
     }
     this.dispatch = b(this.dispatchLayout, this.outputShape, this.workgroupSize, this.elementsPerThread);
@@ -773,7 +772,7 @@ class Fr {
           // The problem is that we should initialize output to zero before using.
           // Otherwise, the original value will be added to the result.
           for (var i = 0; i < ${t}; i = i + 1) {
-            ${Z("&result[flatIndex + i]", `${t > 1 ? "value[i]" : "value"}`, "float32")}
+            ${Q("&result[flatIndex + i]", `${t > 1 ? "value[i]" : "value"}`, "float32")}
           }
         }
       }
@@ -816,8 +815,8 @@ class Lr {
 function M(o) {
   const { backend: t, attrs: e } = o, { shape: i, value: s } = e;
   let { dtype: a } = e;
-  if (a = a || Gt(s), a === "string") {
-    const r = Xt(a, D(i));
+  if (a = a || Ht(s), a === "string") {
+    const r = Gt(a, D(i));
     return r.fill(s), t.makeTensorInfo(i, a, r);
   } else {
     const r = new Lr(i), n = [{ type: "float32", data: [s] }];
@@ -825,7 +824,7 @@ function M(o) {
   }
 }
 const Vr = {
-  kernelName: ws,
+  kernelName: Ut,
   backendName: "webgpu",
   kernelFunc: M
 };
@@ -897,7 +896,7 @@ function Br(o) {
   });
 }
 const Tr = {
-  kernelName: ys,
+  kernelName: Xt,
   backendName: "webgpu",
   kernelFunc: Br
 };
@@ -983,7 +982,7 @@ function U(o) {
   return o.backend.incRef(e.dataId), { dataId: e.dataId, shape: e.shape, dtype: e.dtype };
 }
 const _r = {
-  kernelName: Ss,
+  kernelName: Kt,
   backendName: "webgpu",
   kernelFunc: U
 };
@@ -992,7 +991,7 @@ function oe(o) {
   return r.complexTensorInfos = { real: n, imag: u }, a;
 }
 const Or = {
-  kernelName: bs,
+  kernelName: qt,
   backendName: "webgpu",
   kernelFunc: oe
 };
@@ -1094,22 +1093,22 @@ function V({ opType: o, cpuKernelImpl: t, supportsComplex: e = !1, dtype: i }) {
 }
 const { addImpl: Mr, castImpl: Er, ceilImpl: Ur, concatImpl: Hr, equalImpl: Gr, expImpl: Xr, expm1Impl: Kr, floorImpl: qr, floorDivImpl: Yr, gatherNdImpl: jr, gatherV2Impl: Qr, greaterEqualImpl: Zr, greaterImpl: Jr, lessEqualImpl: en, lessImpl: tn, logImpl: sn, maxImpl: on, maximumImpl: an, minimumImpl: rn, multiplyImpl: nn, negImpl: un, notEqualImpl: dn, prodImpl: ln, rangeImpl: cn, rsqrtImpl: hn, scatterImpl: pn, simpleAbsImpl: fn, sliceImpl: mn, stridedSliceImpl: gn, stringNGramsImpl: xn, subImpl: Cn, tileImpl: wn, topKImpl: yn, transposeImpl: Sn } = ba;
 const bn = N({ opType: y.ABS, cpuKernelImpl: fn }), vn = {
-  kernelName: vs,
+  kernelName: Yt,
   backendName: "webgpu",
   kernelFunc: bn
 };
 const kn = N({ opType: y.ACOS }), In = {
-  kernelName: ks,
+  kernelName: jt,
   backendName: "webgpu",
   kernelFunc: kn
 };
 const Rn = N({ opType: y.ACOSH }), Pn = {
-  kernelName: Is,
+  kernelName: Qt,
   backendName: "webgpu",
   kernelFunc: Rn
 };
 const $n = V({ opType: F.ADD, cpuKernelImpl: Mr, supportsComplex: !0 }), Dn = {
-  kernelName: Rs,
+  kernelName: Zt,
   backendName: "webgpu",
   kernelFunc: $n
 };
@@ -1146,7 +1145,7 @@ function zn(o) {
   return e.runWebGPUProgram(r, i, s);
 }
 const An = {
-  kernelName: Ps,
+  kernelName: Jt,
   backendName: "webgpu",
   kernelFunc: zn
 };
@@ -1232,14 +1231,14 @@ function K(o) {
   return r.runWebGPUProgram(d, [s], s.dtype);
 }
 const Ln = {
-  kernelName: $s,
+  kernelName: es,
   backendName: "webgpu",
   kernelFunc: K
 };
 class Vn {
   constructor(t, e, i) {
     this.variableNames = ["x"], this.uniforms = "reduceSize : i32,", this.size = !0, this.inputShape = [t.batchSize, t.inSize];
-    const [s] = Ee(this.inputShape, [1]);
+    const [s] = Oe(this.inputShape, [1]);
     this.outputShape = s.length === 0 ? [1] : s, t.inSize >= 32768 && i >= 512 ? this.workgroupSize = [512, 1, 1] : t.inSize >= 4096 ? this.workgroupSize = [256, 1, 1] : this.workgroupSize = [64, 1, 1], this.dispatchLayout = v(this.outputShape), this.dispatch = b(this.dispatchLayout, this.outputShape, [1, 1, 1]), this.reduceType = e, this.shaderKey = `reduce_${e}`;
   }
   getUserCode() {
@@ -1309,10 +1308,10 @@ const Bn = {
 function ie(o, t, e, i, s) {
   const a = o.shape.length, r = [], n = te(t, o.shape);
   let u = n;
-  const d = ye(u, a);
+  const d = Ce(u, a);
   let h = o;
-  d != null && (h = K({ inputs: { x: o }, attrs: { perm: d }, backend: s }), u = Se(u.length, a), r.push(h)), Ue(i, u, a);
-  const [l, c] = Ee(h.shape, u);
+  d != null && (h = K({ inputs: { x: o }, attrs: { perm: d }, backend: s }), u = we(u.length, a), r.push(h)), Me(i, u, a);
+  const [l, c] = Oe(h.shape, u);
   let p = l;
   e && (p = va(l, n));
   let f;
@@ -1331,7 +1330,7 @@ function ie(o, t, e, i, s) {
         throw new Error(`${i} CPU implementation is not yet supported.`);
     }
   } else {
-    const m = D(c), x = D(h.shape) / m, C = { windowSize: m, inSize: m, batchSize: x, outSize: 1 }, w = Bn[i] || Ds(o.dtype), k = [
+    const m = D(c), x = D(h.shape) / m, C = { windowSize: m, inSize: m, batchSize: x, outSize: 1 }, w = Bn[i] || ts(o.dtype), k = [
       { type: "int32", data: [m] }
     ], I = new Vn(C, i, s.device.limits.maxComputeWorkgroupSizeX), P = s.runWebGPUProgram(I, [h], w, k);
     r.push(P), f = R({ inputs: { x: P }, attrs: { shape: p }, backend: s });
@@ -1343,7 +1342,7 @@ function Tn(o) {
   return ie(s, r, a, "all", e);
 }
 const _n = {
-  kernelName: Ns,
+  kernelName: ss,
   backendName: "webgpu",
   kernelFunc: Tn
 };
@@ -1352,7 +1351,7 @@ function On(o) {
   return ie(s, r, a, "any", e);
 }
 const Mn = {
-  kernelName: zs,
+  kernelName: os,
   backendName: "webgpu",
   kernelFunc: On
 };
@@ -1361,7 +1360,7 @@ class Ct {
     this.workgroupSize = [64, 1, 1], this.variableNames = ["x"], this.uniforms = "infinityValue : f32,", this.size = !0;
     const s = [e];
     this.op = i === "min" ? "<" : ">";
-    const [a, r] = Ee(t, s);
+    const [a, r] = Oe(t, s);
     this.outputShape = a.length === 0 ? [1] : a, this.dispatchLayout = v(this.outputShape), D(r) < 32 ? (this.type = "plain", this.dispatch = b(this.dispatchLayout, this.outputShape, this.workgroupSize)) : (this.type = "shared", this.dispatch = b(this.dispatchLayout, this.outputShape, [1, 1, 1])), this.inputShape = t, this.shaderKey = `argMinMax_${this.op}_${this.type}`;
   }
   getUserCode() {
@@ -1446,55 +1445,55 @@ class Ct {
 function En(o) {
   const { inputs: t, backend: e, attrs: i } = o, { x: s } = t, { axis: a } = i;
   let r = te(a, s.shape);
-  const n = ye(r, s.shape.length);
+  const n = Ce(r, s.shape.length);
   let u = s;
   const d = [];
-  n != null && (u = K({ inputs: { x: s }, backend: e, attrs: { perm: n } }), d.push(u), r = Se(r.length, u.shape.length)), Ue("argMax", [r[0]], u.shape.length);
+  n != null && (u = K({ inputs: { x: s }, backend: e, attrs: { perm: n } }), d.push(u), r = we(r.length, u.shape.length)), Me("argMax", [r[0]], u.shape.length);
   const h = new Ct(u.shape, r[0], "max"), l = [{ type: "float32", data: [Number.NEGATIVE_INFINITY] }], c = e.runWebGPUProgram(h, [u], "int32", l);
   return d.forEach((p) => e.disposeData(p.dataId)), c;
 }
 const Un = {
-  kernelName: As,
+  kernelName: is,
   backendName: "webgpu",
   kernelFunc: En
 };
 function Hn(o) {
   const { inputs: t, backend: e, attrs: i } = o, { x: s } = t, { axis: a } = i;
   let r = te(a, s.shape);
-  const n = ye(r, s.shape.length);
+  const n = Ce(r, s.shape.length);
   let u = s;
   const d = [];
-  n != null && (u = K({ inputs: { x: s }, backend: e, attrs: { perm: n } }), d.push(u), r = Se(r.length, u.shape.length)), Ue("argMin", [r[0]], u.shape.length);
+  n != null && (u = K({ inputs: { x: s }, backend: e, attrs: { perm: n } }), d.push(u), r = we(r.length, u.shape.length)), Me("argMin", [r[0]], u.shape.length);
   const h = new Ct(u.shape, r[0], "min"), l = [{ type: "float32", data: [Number.POSITIVE_INFINITY] }], c = e.runWebGPUProgram(h, [u], "int32", l);
   return d.forEach((p) => e.disposeData(p.dataId)), c;
 }
 const Gn = {
-  kernelName: Fs,
+  kernelName: as,
   backendName: "webgpu",
   kernelFunc: Hn
 };
 const Xn = N({ opType: y.ASIN }), Kn = {
-  kernelName: Ws,
+  kernelName: rs,
   backendName: "webgpu",
   kernelFunc: Xn
 };
 const qn = N({ opType: y.ASINH }), Yn = {
-  kernelName: Ls,
+  kernelName: ns,
   backendName: "webgpu",
   kernelFunc: qn
 };
 const jn = N({ opType: y.ATAN }), Qn = {
-  kernelName: Vs,
+  kernelName: us,
   backendName: "webgpu",
   kernelFunc: jn
 };
 const Zn = V({ opType: F.ATAN2 }), Jn = {
-  kernelName: Bs,
+  kernelName: ds,
   backendName: "webgpu",
   kernelFunc: Zn
 };
 const eu = N({ opType: y.ATANH }), tu = {
-  kernelName: Ts,
+  kernelName: ls,
   backendName: "webgpu",
   kernelFunc: eu
 };
@@ -1642,7 +1641,7 @@ function wt(o) {
   return ie(s, a, r, "max", e);
 }
 const ou = {
-  kernelName: _s,
+  kernelName: cs,
   backendName: "webgpu",
   kernelFunc: wt
 };
@@ -1651,7 +1650,7 @@ function yt(o) {
   return ie(s, r, a, "mean", e);
 }
 const iu = {
-  kernelName: Os,
+  kernelName: hs,
   backendName: "webgpu",
   kernelFunc: yt
 };
@@ -1690,11 +1689,11 @@ function St(o, t, e, i) {
   })), i.runWebGPUProgram(s, [o], o.dtype, a);
 }
 function au(o) {
-  const { inputs: t, backend: e, attrs: i } = o, { x: s } = t, { filterSize: a, strides: r, pad: n, dimRoundingMode: u } = i, h = Ce(s.shape, a, r, 1, n, u);
+  const { inputs: t, backend: e, attrs: i } = o, { x: s } = t, { filterSize: a, strides: r, pad: n, dimRoundingMode: u } = i, h = ye(s.shape, a, r, 1, n, u);
   return St(s, h, "avg", e);
 }
 const ru = {
-  kernelName: Ms,
+  kernelName: ps,
   backendName: "webgpu",
   kernelFunc: au
 };
@@ -1724,7 +1723,7 @@ function nu(o) {
   return e.runWebGPUProgram(c, [s], s.dtype, p);
 }
 const uu = {
-  kernelName: Es,
+  kernelName: fs,
   backendName: "webgpu",
   kernelFunc: nu
 };
@@ -1862,14 +1861,14 @@ function cu(o) {
   return e.runWebGPUProgram(c, [s], r.dtype, f);
 }
 const hu = {
-  kernelName: Us,
+  kernelName: ms,
   backendName: "webgpu",
   kernelFunc: cu
 };
 function pu(o) {
   const { inputs: t, backend: e, attrs: i } = o, { dy: s, input: a } = t, r = a;
-  at([s, a], "avgPoolGrad");
-  const { filterSize: n, strides: u, pad: d } = i, h = Ce(r.shape, n, u, 1, d), l = new du(h), c = 1 / (h.filterHeight * h.filterWidth), p = [
+  nt([s, a], "avgPoolGrad");
+  const { filterSize: n, strides: u, pad: d } = i, h = ye(r.shape, n, u, 1, d), l = new du(h), c = 1 / (h.filterHeight * h.filterWidth), p = [
     { type: "int32", data: [h.strideHeight, h.strideWidth] },
     {
       type: "int32",
@@ -1890,7 +1889,7 @@ function pu(o) {
   return e.runWebGPUProgram(l, [s], r.dtype, p);
 }
 const fu = {
-  kernelName: Hs,
+  kernelName: gs,
   backendName: "webgpu",
   kernelFunc: pu
 };
@@ -1899,7 +1898,7 @@ function mu(o) {
   return Fe({ a: s, b: a, transposeA: r, transposeB: n, backend: e });
 }
 const gu = {
-  kernelName: Gs,
+  kernelName: xs,
   backendName: "webgpu",
   kernelFunc: mu
 };
@@ -1943,14 +1942,14 @@ function de(o) {
   return e.runWebGPUProgram(d, [s], s.dtype, h);
 }
 const wu = {
-  kernelName: Xs,
+  kernelName: Cs,
   backendName: "webgpu",
   kernelFunc: de
 };
 const yu = (o) => {
   const { inputs: t, backend: e, attrs: i } = o, { x: s } = t, { blockShape: a, crops: r } = i;
   L(s.shape.length <= 4, () => "batchToSpaceND for rank > 4 with a WebGPU backend not implemented yet");
-  const n = a.reduce((C, w) => C * w), u = ct(s.shape, a, n), d = ht(u.length, a.length), h = pt(s.shape, a, n), l = as(r, a.length), c = rs(h, r, a.length), p = [], f = R({ inputs: { x: s }, backend: e, attrs: { shape: u } }), m = K({ inputs: { x: f }, backend: e, attrs: { perm: d } }), g = R({
+  const n = a.reduce((C, w) => C * w), u = lt(s.shape, a, n), d = ct(u.length, a.length), h = ht(s.shape, a, n), l = na(r, a.length), c = ua(h, r, a.length), p = [], f = R({ inputs: { x: s }, backend: e, attrs: { shape: u } }), m = K({ inputs: { x: f }, backend: e, attrs: { perm: d } }), g = R({
     inputs: { x: m },
     backend: e,
     attrs: { shape: h }
@@ -1961,13 +1960,13 @@ const yu = (o) => {
   });
   return p.push(f), p.push(m), p.push(g), p.forEach((C) => e.disposeData(C.dataId)), x;
 }, Su = {
-  kernelName: Ks,
+  kernelName: ws,
   backendName: "webgpu",
   kernelFunc: yu
 };
 const bu = `
   fn bincount_write(index: i32, value: f32) {
-    ${Z("&result[index]", "value", "float32")}
+    ${Q("&result[index]", "value", "float32")}
   }
 `, vu = `
   fn bincount_write(index: i32, value: f32) {
@@ -2005,7 +2004,7 @@ function ku(o) {
   return e.runWebGPUProgram(p, m, l, f, c);
 }
 const Iu = {
-  kernelName: qs,
+  kernelName: ys,
   backendName: "webgpu",
   kernelFunc: ku
 };
@@ -2052,7 +2051,7 @@ function Pu(o) {
   return e.runWebGPUProgram(u, [i, s], "int32", d);
 }
 const $u = {
-  kernelName: Ys,
+  kernelName: Ss,
   backendName: "webgpu",
   kernelFunc: Pu
 };
@@ -2061,7 +2060,7 @@ const vt = V({
   dtype: "bool",
   cpuKernelImpl: dn
 }), Du = {
-  kernelName: js,
+  kernelName: bs,
   backendName: "webgpu",
   kernelFunc: vt
 };
@@ -2070,7 +2069,7 @@ function be(o) {
   return U({ inputs: { x: s.complexTensorInfos.real }, backend: e });
 }
 const Nu = {
-  kernelName: Qs,
+  kernelName: vs,
   backendName: "webgpu",
   kernelFunc: be
 };
@@ -2090,7 +2089,7 @@ function Be(o) {
     const r = be({ inputs: { input: s }, backend: e }), n = Be({ inputs: { x: r }, backend: e, attrs: { dtype: a } });
     return e.disposeData(r.dataId), n;
   }
-  if (!Kt(s.dtype, a)) {
+  if (!Is(s.dtype, a)) {
     const r = U({ inputs: { x: s }, backend: e });
     return { dataId: r.dataId, shape: r.shape, dtype: a };
   }
@@ -2101,18 +2100,18 @@ function Be(o) {
   if (a === "int32")
     return zu(s, e);
   if (a === "bool") {
-    const r = e.makeTensorInfo([], "bool", qt("bool", 1)), u = vt({ inputs: { a: s, b: r }, backend: e });
+    const r = e.makeTensorInfo([], "bool", Rs("bool", 1)), u = vt({ inputs: { a: s, b: r }, backend: e });
     return e.disposeData(r.dataId), u;
   }
   throw new Error(`Error in Cast: failed to cast ${s.dtype} to ${a}`);
 }
 const Au = {
-  kernelName: Zs,
+  kernelName: ks,
   backendName: "webgpu",
   kernelFunc: Be
 };
 const Fu = N({ opType: y.CEIL, cpuKernelImpl: Ur }), Wu = {
-  kernelName: Js,
+  kernelName: Ps,
   backendName: "webgpu",
   kernelFunc: Fu
 };
@@ -2163,7 +2162,7 @@ function Bu(o) {
   return D(s.shape) % 4 === 0 ? n = new Lu(s.shape) : n = new Vu(s.shape), e.runWebGPUProgram(n, [s], s.dtype, u);
 }
 const Tu = {
-  kernelName: eo,
+  kernelName: $s,
   backendName: "webgpu",
   kernelFunc: Bu
 };
@@ -2202,7 +2201,7 @@ function Ou(o) {
   return e.runWebGPUProgram(a, r, r[0].dtype);
 }
 const Mu = {
-  kernelName: to,
+  kernelName: Ds,
   backendName: "webgpu",
   kernelFunc: Ou
 };
@@ -2249,7 +2248,7 @@ function We(o) {
   return U({ inputs: { x: s.complexTensorInfos.imag }, backend: e });
 }
 const Uu = {
-  kernelName: so,
+  kernelName: Ns,
   backendName: "webgpu",
   kernelFunc: We
 };
@@ -2317,7 +2316,7 @@ function kt(o) {
   return u.length === 1 ? U({ inputs: { x: u[0] }, backend: e }) : he(u, a, e);
 }
 const Gu = {
-  kernelName: oo,
+  kernelName: zs,
   backendName: "webgpu",
   kernelFunc: kt
 };
@@ -2411,7 +2410,7 @@ function Xu(o, t, e, i, s = !1, a = null, r = !1, n = 4, u = 4, d = 4) {
 }
 class Ku {
   constructor(t, e, i, s, a = !1, r = null, n = !1, u = !1) {
-    this.variableNames = ["x", "W"], this.uniforms = "filterDims : vec2<i32>, pads : vec2<i32>, strides : vec2<i32>, dilations : vec2<i32>, dimAOuter : i32, dimBOuter : i32, dimInner : i32,", this.outputShape = t.outShape, this.isChannelsLast = t.dataFormat === "channelsLast", this.isVec4 = ((t.inChannels % 4 === 0 || t.inChannels % 3 === 0) && this.isChannelsLast || t.outWidth % 4 === 0 && !this.isChannelsLast) && t.outChannels % 4 === 0, this.dispatchLayout = this.isChannelsLast ? { x: [3], y: [1, 2], z: [0] } : { x: [2, 3], y: [1], z: [0] }, this.workgroupSize = rt(this.dispatchLayout, this.outputShape, this.isVec4), this.elementsPerThread = nt(this.dispatchLayout, this.outputShape, this.isVec4), this.dispatch = b(this.dispatchLayout, this.outputShape, this.workgroupSize, this.elementsPerThread), this.isVec4 ? (this.outputComponent = 4, this.isChannelsLast && t.inChannels % 4 !== 0 ? (this.innerElementSize = 3, this.variableComponents = [1, 4]) : (this.innerElementSize = 4, this.variableComponents = [4, 4]), a && (this.variableNames.push("bias"), this.variableComponents.push(4)), n && (this.variableNames.push("preluActivationWeights"), this.variableComponents.push(4))) : (this.innerElementSize = this.elementsPerThread[0], a && this.variableNames.push("bias"), n && this.variableNames.push("preluActivationWeights")), this.sequentialAccessByThreads = u, this.addBias = a, this.activation = r, this.hasPreluActivationWeights = n, this.tileAOuter = this.workgroupSize[1] * this.elementsPerThread[1], this.tileBOuter = this.workgroupSize[0] * this.elementsPerThread[0], this.tileInner = Math.max(this.workgroupSize[0] * this.innerElementSize, this.workgroupSize[1]), this.fitAOuter = e % this.tileAOuter === 0, this.fitBOuter = i % this.tileBOuter === 0, this.fitInner = s % this.tileInner === 0, this.shaderKey = `conv2DMM_${this.elementsPerThread}_${this.activation}}_${this.fitAOuter}_${this.fitBOuter}_${this.fitInner}_${this.isVec4}_${this.innerElementSize}_${this.isChannelsLast}_${this.sequentialAccessByThreads}`;
+    this.variableNames = ["x", "W"], this.uniforms = "filterDims : vec2<i32>, pads : vec2<i32>, strides : vec2<i32>, dilations : vec2<i32>, dimAOuter : i32, dimBOuter : i32, dimInner : i32,", this.outputShape = t.outShape, this.isChannelsLast = t.dataFormat === "channelsLast", this.isVec4 = ((t.inChannels % 4 === 0 || t.inChannels % 3 === 0) && this.isChannelsLast || t.outWidth % 4 === 0 && !this.isChannelsLast) && t.outChannels % 4 === 0, this.dispatchLayout = this.isChannelsLast ? { x: [3], y: [1, 2], z: [0] } : { x: [2, 3], y: [1], z: [0] }, this.workgroupSize = ut(this.dispatchLayout, this.outputShape, this.isVec4), this.elementsPerThread = dt(this.dispatchLayout, this.outputShape, this.isVec4), this.dispatch = b(this.dispatchLayout, this.outputShape, this.workgroupSize, this.elementsPerThread), this.isVec4 ? (this.outputComponent = 4, this.isChannelsLast && t.inChannels % 4 !== 0 ? (this.innerElementSize = 3, this.variableComponents = [1, 4]) : (this.innerElementSize = 4, this.variableComponents = [4, 4]), a && (this.variableNames.push("bias"), this.variableComponents.push(4)), n && (this.variableNames.push("preluActivationWeights"), this.variableComponents.push(4))) : (this.innerElementSize = this.elementsPerThread[0], a && this.variableNames.push("bias"), n && this.variableNames.push("preluActivationWeights")), this.sequentialAccessByThreads = u, this.addBias = a, this.activation = r, this.hasPreluActivationWeights = n, this.tileAOuter = this.workgroupSize[1] * this.elementsPerThread[1], this.tileBOuter = this.workgroupSize[0] * this.elementsPerThread[0], this.tileInner = Math.max(this.workgroupSize[0] * this.innerElementSize, this.workgroupSize[1]), this.fitAOuter = e % this.tileAOuter === 0, this.fitBOuter = i % this.tileBOuter === 0, this.fitInner = s % this.tileInner === 0, this.shaderKey = `conv2DMM_${this.elementsPerThread}_${this.activation}}_${this.fitAOuter}_${this.fitBOuter}_${this.fitInner}_${this.isVec4}_${this.innerElementSize}_${this.isChannelsLast}_${this.sequentialAccessByThreads}`;
   }
   getUserCode() {
     const t = this.isVec4 ? ze(this.elementsPerThread, this.workgroupSize, !this.isChannelsLast, this.tileInner) : Ae(this.elementsPerThread, this.workgroupSize, !this.isChannelsLast, this.tileInner, !1, null, this.sequentialAccessByThreads), e = this.isVec4 ? [this.innerElementSize, 4, 4] : [1, 1, 1];
@@ -2675,11 +2674,11 @@ function It({ x: o, filter: t, convInfo: e, backend: i, bias: s = null, preluAct
   return I;
 }
 function Zu(o) {
-  const { inputs: t, attrs: e, backend: i } = o, { x: s, filter: a } = t, { strides: r, pad: n, dataFormat: u, dilations: d, dimRoundingMode: h } = e, l = we(u), c = Q(s.shape, a.shape, r, d, n, h, !1, l);
+  const { inputs: t, attrs: e, backend: i } = o, { x: s, filter: a } = t, { strides: r, pad: n, dataFormat: u, dilations: d, dimRoundingMode: h } = e, l = Se(u), c = Z(s.shape, a.shape, r, d, n, h, !1, l);
   return It({ x: s, filter: a, convInfo: c, backend: i });
 }
 const Ju = {
-  kernelName: io,
+  kernelName: As,
   backendName: "webgpu",
   kernelFunc: Zu
 };
@@ -2996,7 +2995,7 @@ class od {
   }
 }
 function id(o) {
-  const { inputs: t, backend: e, attrs: i } = o, { x: s, dy: a } = t, { strides: r, pad: n, dataFormat: u, dimRoundingMode: d, filterShape: h } = i, l = we(u), c = Q(s.shape, h, r, 1, n, d, !1, l), p = new td(c), f = [
+  const { inputs: t, backend: e, attrs: i } = o, { x: s, dy: a } = t, { strides: r, pad: n, dataFormat: u, dimRoundingMode: d, filterShape: h } = i, l = Se(u), c = Z(s.shape, h, r, 1, n, d, !1, l), p = new td(c), f = [
     { type: "int32", data: [c.padInfo.top, c.padInfo.left] },
     { type: "int32", data: [c.strideHeight, c.strideWidth] },
     { type: "int32", data: [c.batchSize] },
@@ -3008,7 +3007,7 @@ function id(o) {
   return e.runWebGPUProgram(p, [s, a], s.dtype, f);
 }
 const ad = {
-  kernelName: ao,
+  kernelName: Fs,
   backendName: "webgpu",
   kernelFunc: id
 };
@@ -3087,7 +3086,7 @@ function rd(o = 4) {
 }
 class nd {
   constructor(t) {
-    this.variableNames = ["x", "W"], this.uniforms = "filterDims : vec2<i32>, pads : vec2<i32>, strides : vec2<i32>, outBackprop : vec4<i32>, dimAOuter : i32, dimBOuter : i32, dimInner : i32,", this.outputShape = t.inShape, L(t.dataFormat === "channelsLast", () => "TODO: NCHW is unimplemented"), this.isVec4 = t.inChannels % 4 === 0 && t.outChannels % 4 === 0, this.dispatchLayout = { x: [3], y: [1, 2], z: [0] }, this.workgroupSize = rt(this.dispatchLayout, this.outputShape, this.isVec4), this.elementsPerThread = nt(this.dispatchLayout, this.outputShape, this.isVec4), this.dispatch = b(this.dispatchLayout, this.outputShape, this.workgroupSize, this.elementsPerThread), this.isVec4 && (this.outputComponent = 4, this.variableComponents = [4, 1]), this.shaderKey = `conv2DDerInputMM_${this.isVec4}_${this.elementsPerThread}`;
+    this.variableNames = ["x", "W"], this.uniforms = "filterDims : vec2<i32>, pads : vec2<i32>, strides : vec2<i32>, outBackprop : vec4<i32>, dimAOuter : i32, dimBOuter : i32, dimInner : i32,", this.outputShape = t.inShape, L(t.dataFormat === "channelsLast", () => "TODO: NCHW is unimplemented"), this.isVec4 = t.inChannels % 4 === 0 && t.outChannels % 4 === 0, this.dispatchLayout = { x: [3], y: [1, 2], z: [0] }, this.workgroupSize = ut(this.dispatchLayout, this.outputShape, this.isVec4), this.elementsPerThread = dt(this.dispatchLayout, this.outputShape, this.isVec4), this.dispatch = b(this.dispatchLayout, this.outputShape, this.workgroupSize, this.elementsPerThread), this.isVec4 && (this.outputComponent = 4, this.variableComponents = [4, 1]), this.shaderKey = `conv2DDerInputMM_${this.isVec4}_${this.elementsPerThread}`;
   }
   getUserCode() {
     const t = this.isVec4 ? ze(this.elementsPerThread, this.workgroupSize) : Ae(this.elementsPerThread, this.workgroupSize);
@@ -3098,7 +3097,7 @@ class nd {
   }
 }
 function ud(o) {
-  const { inputs: t, backend: e, attrs: i } = o, { dy: s, filter: a } = t, { inputShape: r, strides: n, pad: u, dataFormat: d, dimRoundingMode: h } = i, l = we(d), c = Q(r, a.shape, n, 1, u, h, !1, l), p = [
+  const { inputs: t, backend: e, attrs: i } = o, { dy: s, filter: a } = t, { inputShape: r, strides: n, pad: u, dataFormat: d, dimRoundingMode: h } = i, l = Se(d), c = Z(r, a.shape, n, 1, u, h, !1, l), p = [
     { type: "int32", data: [c.filterHeight, c.filterWidth] },
     {
       type: "int32",
@@ -3129,7 +3128,7 @@ function ud(o) {
   return e.runWebGPUProgram(f, [s, a], "float32", p);
 }
 const dd = {
-  kernelName: ro,
+  kernelName: Ws,
   backendName: "webgpu",
   kernelFunc: ud
 };
@@ -3224,7 +3223,7 @@ class ld {
   }
 }
 function cd(o) {
-  const { inputs: t, backend: e, attrs: i } = o, { x: s, filter: a } = t, { strides: r, pad: n, dilations: u } = i, d = Oe(s.shape, a.shape, r, u, n), h = [d.padInfo.front, d.padInfo.top, d.padInfo.left], l = [
+  const { inputs: t, backend: e, attrs: i } = o, { x: s, filter: a } = t, { strides: r, pad: n, dilations: u } = i, d = Ee(s.shape, a.shape, r, u, n), h = [d.padInfo.front, d.padInfo.top, d.padInfo.left], l = [
     {
       type: "int32",
       data: [d.filterDepth, d.filterHeight, d.filterWidth]
@@ -3246,12 +3245,12 @@ function cd(o) {
   return e.runWebGPUProgram(c, [s, a], p, l);
 }
 const hd = {
-  kernelName: no,
+  kernelName: Ls,
   backendName: "webgpu",
   kernelFunc: cd
 };
 function pd(o) {
-  const { inputs: t, backend: e, attrs: i } = o, { x: s, dy: a } = t, { strides: r, pad: n, filterShape: u } = i, d = Oe(s.shape, u, r, 1, n), h = new sd(d), l = [
+  const { inputs: t, backend: e, attrs: i } = o, { x: s, dy: a } = t, { strides: r, pad: n, filterShape: u } = i, d = Ee(s.shape, u, r, 1, n), h = new sd(d), l = [
     {
       type: "int32",
       data: [d.padInfo.front, d.padInfo.top, d.padInfo.left]
@@ -3271,12 +3270,12 @@ function pd(o) {
   return e.runWebGPUProgram(h, [s, a], a.dtype, l);
 }
 const fd = {
-  kernelName: uo,
+  kernelName: Vs,
   backendName: "webgpu",
   kernelFunc: pd
 };
 function md(o) {
-  const { inputs: t, backend: e, attrs: i } = o, { dy: s, filter: a } = t, { strides: r, pad: n, inputShape: u } = i, d = Oe(u, a.shape, r, 1, n), h = new od(d), l = [
+  const { inputs: t, backend: e, attrs: i } = o, { dy: s, filter: a } = t, { strides: r, pad: n, inputShape: u } = i, d = Ee(u, a.shape, r, 1, n), h = new od(d), l = [
     {
       type: "int32",
       data: [d.filterDepth, d.filterHeight, d.filterWidth]
@@ -3301,17 +3300,17 @@ function md(o) {
   return e.runWebGPUProgram(h, [s, a], s.dtype, l);
 }
 const gd = {
-  kernelName: lo,
+  kernelName: Bs,
   backendName: "webgpu",
   kernelFunc: md
 };
 const xd = N({ opType: y.COS }), Cd = {
-  kernelName: co,
+  kernelName: Ts,
   backendName: "webgpu",
   kernelFunc: xd
 };
 const wd = N({ opType: y.COSH }), yd = {
-  kernelName: ho,
+  kernelName: _s,
   backendName: "webgpu",
   kernelFunc: wd
 };
@@ -3402,7 +3401,7 @@ const bd = (o) => {
   const { inputs: t, backend: e, attrs: i } = o, { image: s, boxes: a, boxInd: r } = t, { cropSize: n, method: u, extrapolationValue: d } = i, h = new Sd(s.shape[3], a.shape, n, u), l = [{ type: "float32", data: [d] }];
   return e.runWebGPUProgram(h, [s, a, r], "float32", l);
 }, vd = {
-  kernelName: po,
+  kernelName: Os,
   backendName: "webgpu",
   kernelFunc: bd
 };
@@ -3459,10 +3458,10 @@ function st(o, t, e) {
   throw Error(`Cumulative ${e} for rank ${o} is not yet supported`);
 }
 function Rt(o, t, e, i, s, a) {
-  const r = t.shape.length, n = ye([i], r);
+  const r = t.shape.length, n = Ce([i], r);
   let u = t;
   n != null && (u = K({ inputs: { x: t }, backend: e, attrs: { perm: n } }));
-  const d = Se(1, r)[0];
+  const d = we(1, r)[0];
   if (d !== r - 1)
     throw new Error(`WebGPU cumprod shader expects an inner-most axis=${t.shape.length - 1} but got axis=${i}`);
   const h = u.shape[d];
@@ -3476,7 +3475,7 @@ function Rt(o, t, e, i, s, a) {
     l = e.runWebGPUProgram(c, [l], l.dtype, f), e.disposeData(p.dataId);
   }
   if (n != null) {
-    const c = mt(n), p = K({ inputs: { x: l }, backend: e, attrs: { perm: c } });
+    const c = ft(n), p = K({ inputs: { x: l }, backend: e, attrs: { perm: c } });
     return e.disposeData(l.dataId), e.disposeData(u.dataId), p;
   }
   return l;
@@ -3486,7 +3485,7 @@ function kd(o) {
   return Rt(xe.Prod, s, e, a, r, n);
 }
 const Id = {
-  kernelName: fo,
+  kernelName: Ms,
   backendName: "webgpu",
   kernelFunc: kd
 };
@@ -3495,7 +3494,7 @@ function Rd(o) {
   return Rt(xe.Sum, s, e, a, r, n);
 }
 const Pd = {
-  kernelName: mo,
+  kernelName: Es,
   backendName: "webgpu",
   kernelFunc: Rd
 };
@@ -3504,7 +3503,7 @@ function $d(o) {
   return e.runWebGPUProgram(m, x, l, g, f);
 }
 const Dd = {
-  kernelName: go,
+  kernelName: Us,
   backendName: "webgpu",
   kernelFunc: $d
 };
@@ -3558,7 +3557,7 @@ function zd(o) {
   return e.runWebGPUProgram(g, [s], s.dtype, m);
 }
 const Ad = {
-  kernelName: xo,
+  kernelName: Hs,
   backendName: "webgpu",
   kernelFunc: zd
 };
@@ -3781,10 +3780,10 @@ class $t {
   }
 }
 function Wd(o) {
-  const { inputs: t, backend: e, attrs: i } = o, { x: s, filter: a } = t, { strides: r, pad: n, dataFormat: u, dilations: d, dimRoundingMode: h } = i, l = we(u);
+  const { inputs: t, backend: e, attrs: i } = o, { x: s, filter: a } = t, { strides: r, pad: n, dataFormat: u, dilations: d, dimRoundingMode: h } = i, l = Se(u);
   let c = d;
   c == null && (c = [1, 1]);
-  const p = Q(s.shape, a.shape, r, c, n, h, !0, l), f = [
+  const p = Z(s.shape, a.shape, r, c, n, h, !0, l), f = [
     { type: "int32", data: [p.padInfo.top, p.padInfo.left] },
     { type: "int32", data: [p.inHeight, p.inWidth] }
   ], m = p.dataFormat === "channelsLast";
@@ -3795,7 +3794,7 @@ function Wd(o) {
   })), e.runWebGPUProgram(g, [s, a], s.dtype, f);
 }
 const Ld = {
-  kernelName: Co,
+  kernelName: Gs,
   backendName: "webgpu",
   kernelFunc: Wd
 };
@@ -3895,7 +3894,7 @@ class Bd {
   }
 }
 function Td(o) {
-  const { inputs: t, backend: e, attrs: i } = o, { x: s, dy: a } = t, { strides: r, dilations: n, pad: u, dimRoundingMode: d, filterShape: h } = i, l = Q(
+  const { inputs: t, backend: e, attrs: i } = o, { x: s, dy: a } = t, { strides: r, dilations: n, pad: u, dimRoundingMode: d, filterShape: h } = i, l = Z(
     s.shape,
     h,
     r,
@@ -3918,12 +3917,12 @@ function Td(o) {
   return e.runWebGPUProgram(c, [s, a], "float32", p);
 }
 const _d = {
-  kernelName: wo,
+  kernelName: Xs,
   backendName: "webgpu",
   kernelFunc: Td
 };
 function Od(o) {
-  const { inputs: t, backend: e, attrs: i } = o, { dy: s, filter: a } = t, { strides: r, dilations: n, pad: u, dimRoundingMode: d, inputShape: h } = i, l = Q(
+  const { inputs: t, backend: e, attrs: i } = o, { dy: s, filter: a } = t, { strides: r, dilations: n, pad: u, dimRoundingMode: d, inputShape: h } = i, l = Z(
     h,
     a.shape,
     r,
@@ -3949,7 +3948,7 @@ function Od(o) {
   return e.runWebGPUProgram(c, [s, a], s.dtype, p);
 }
 const Md = {
-  kernelName: yo,
+  kernelName: Ks,
   backendName: "webgpu",
   kernelFunc: Od
 };
@@ -3974,7 +3973,7 @@ function Ud(o) {
   return e.disposeData(r.dataId), e.disposeData(u.dataId), d;
 }
 const Hd = {
-  kernelName: So,
+  kernelName: qs,
   backendName: "webgpu",
   kernelFunc: Ud
 };
@@ -4019,7 +4018,7 @@ class Gd {
   }
 }
 function Xd(o) {
-  const { inputs: t, backend: e, attrs: i } = o, { x: s, filter: a } = t, { strides: r, pad: n, dilations: u } = i, d = Me(s.shape, a.shape, r, n, "NHWC", u), h = [d.padInfo.top, d.padInfo.left], l = [
+  const { inputs: t, backend: e, attrs: i } = o, { x: s, filter: a } = t, { strides: r, pad: n, dilations: u } = i, d = Ue(s.shape, a.shape, r, n, "NHWC", u), h = [d.padInfo.top, d.padInfo.left], l = [
     { type: "int32", data: [d.filterHeight, d.filterWidth] },
     { type: "int32", data: [...h] },
     { type: "int32", data: [d.strideHeight, d.strideWidth] },
@@ -4028,7 +4027,7 @@ function Xd(o) {
   return e.runWebGPUProgram(c, [s, a], s.dtype, l);
 }
 const Kd = {
-  kernelName: bo,
+  kernelName: Ys,
   backendName: "webgpu",
   kernelFunc: Xd
 };
@@ -4080,7 +4079,7 @@ class qd {
            let flatIndexIn = d + uniforms.xShape[3] *
                (xCMax + uniforms.xShape[2] * (xRMax + uniforms.xShape[1] * b));
            let value = getDy(b, r, c, d);
-           ${Z("&result[flatIndexIn]", "value", this.type)}
+           ${Q("&result[flatIndexIn]", "value", this.type)}
          }
        }
      `;
@@ -4133,14 +4132,14 @@ class Yd {
            let flatIndexIn = d + uniforms.wShape[2] * (wCMax + wRMax * uniforms.wShape[1]);
            let value = getDy(b, r, c, d);
-           ${Z("&result[flatIndexIn]", "value", this.type)}
+           ${Q("&result[flatIndexIn]", "value", this.type)}
          }
        }
      `;
   }
 }
 function jd(o) {
-  const { inputs: t, backend: e, attrs: i } = o, { x: s, filter: a, dy: r } = t, { strides: n, pad: u, dilations: d } = i, h = Me(s.shape, a.shape, n, u, "NHWC", d), l = a.dtype, c = new Yd(h, a.shape, l), p = [
+  const { inputs: t, backend: e, attrs: i } = o, { x: s, filter: a, dy: r } = t, { strides: n, pad: u, dilations: d } = i, h = Ue(s.shape, a.shape, n, u, "NHWC", d), l = a.dtype, c = new Yd(h, a.shape, l), p = [
     { type: "int32", data: [h.filterHeight, h.filterWidth] },
     { type: "int32", data: [h.padInfo.top, h.padInfo.left] },
     { type: "int32", data: [h.strideHeight, h.strideWidth] },
@@ -4150,12 +4149,12 @@ function jd(o) {
   return e.runWebGPUProgram(c, [s, a, r], l, p, f);
 }
 const Qd = {
-  kernelName: vo,
+  kernelName: js,
   backendName: "webgpu",
   kernelFunc: jd
 };
 function Zd(o) {
-  const { inputs: t, backend: e, attrs: i } = o, { x: s, filter: a, dy: r } = t, { strides: n, pad: u, dilations: d } = i, h = Me(s.shape, a.shape, n, u, "NHWC", d), l = s.dtype, c = new qd(h, l), p = [
+  const { inputs: t, backend: e, attrs: i } = o, { x: s, filter: a, dy: r } = t, { strides: n, pad: u, dilations: d } = i, h = Ue(s.shape, a.shape, n, u, "NHWC", d), l = s.dtype, c = new qd(h, l), p = [
     { type: "int32", data: [h.filterHeight, h.filterWidth] },
     { type: "int32", data: [h.padInfo.top, h.padInfo.left] },
     { type: "int32", data: [h.strideHeight, h.strideWidth] },
@@ -4165,13 +4164,13 @@ function Zd(o) {
   return e.runWebGPUProgram(c, [s, a, r], l, p, f);
 }
 const Jd = {
-  kernelName: ko,
+  kernelName: Qs,
   backendName: "webgpu",
   kernelFunc: Zd
 };
 class el {
   constructor(t, e, i) {
-    this.variableNames = ["Image"], this.uniforms = "alpha: f32,", this.workgroupSize = [64, 1, 1], this.pixelsOpType = ft.DRAW, this.size = !0, this.outputShape = t, this.dispatchLayout = v(this.outputShape), this.dispatch = b(this.dispatchLayout, this.outputShape, this.workgroupSize), this.type = e, this.textureFormat = i, this.shaderKey = `draw_${e}_${i}`;
+    this.variableNames = ["Image"], this.uniforms = "alpha: f32,", this.workgroupSize = [64, 1, 1], this.pixelsOpType = pt.DRAW, this.size = !0, this.outputShape = t, this.dispatchLayout = v(this.outputShape), this.dispatch = b(this.dispatchLayout, this.outputShape, this.workgroupSize), this.type = e, this.textureFormat = i, this.shaderKey = `draw_${e}_${i}`;
   }
   getUserCode() {
     let t;
@@ -4227,7 +4226,7 @@ function tl(o) {
   return e.disposeData(w.dataId), s;
 }
 const sl = {
-  kernelName: Io,
+  kernelName: Zs,
   backendName: "webgpu",
   kernelFunc: tl
 };
@@ -4236,7 +4235,7 @@ const Dt = V({
   cpuKernelImpl: nn,
   supportsComplex: !0
 }), ol = {
-  kernelName: Ro,
+  kernelName: Js,
   backendName: "webgpu",
   kernelFunc: Dt
 };
@@ -4245,21 +4244,21 @@ function Nt(o) {
   return ie(s, a, r, "sum", e);
 }
 const il = {
-  kernelName: Po,
+  kernelName: eo,
   backendName: "webgpu",
   kernelFunc: Nt
 };
 function al(o) {
-  const { inputs: t, backend: e, attrs: i } = o, { equation: s } = i, a = t, { allDims: r, summedDims: n, idDims: u } = ns(s, a.length);
-  us(r.length, u, a);
-  const { path: d, steps: h } = ds(n, u), l = h.length;
+  const { inputs: t, backend: e, attrs: i } = o, { equation: s } = i, a = t, { allDims: r, summedDims: n, idDims: u } = da(s, a.length);
+  la(r.length, u, a);
+  const { path: d, steps: h } = ca(n, u), l = h.length;
   let c = null, p = r.length;
   const f = [];
   for (let m = 0; m < l; ++m) {
     for (const g of h[m]) {
-      const { permutationIndices: x, expandDims: C } = ls(p, u[g]);
+      const { permutationIndices: x, expandDims: C } = ha(p, u[g]);
       let w;
-      cs(x) ? w = a[g] : (w = K({ inputs: { x: a[g] }, backend: e, attrs: { perm: x } }), f.push(w));
+      pa(x) ? w = a[g] : (w = K({ inputs: { x: a[g] }, backend: e, attrs: { perm: x } }), f.push(w));
       const k = w.shape.slice();
       for (let I = 0; I < C.length; ++I)
         k.splice(C[I], 0, 1);
@@ -4279,12 +4278,12 @@ function al(o) {
   return c;
 }
 const rl = {
-  kernelName: $o,
+  kernelName: to,
   backendName: "webgpu",
   kernelFunc: al
 };
 const nl = N({ opType: y.ELU }), ul = {
-  kernelName: Do,
+  kernelName: so,
   backendName: "webgpu",
   kernelFunc: nl
 };
@@ -4292,17 +4291,17 @@ const dl = (o) => {
   const { inputs: t, backend: e } = o, { dy: i, y: s } = t, a = new Re(F.ELU_DER, i.shape, s.shape);
   return e.runWebGPUProgram(a, [i, s], i.dtype);
 }, ll = {
-  kernelName: No,
+  kernelName: oo,
   backendName: "webgpu",
   kernelFunc: dl
 };
 const cl = V({ opType: F.EQUAL, dtype: "bool", cpuKernelImpl: Gr }), hl = {
-  kernelName: zo,
+  kernelName: io,
   backendName: "webgpu",
   kernelFunc: cl
 };
 const pl = N({ opType: y.ERF }), fl = {
-  kernelName: Ao,
+  kernelName: ao,
   backendName: "webgpu",
   kernelFunc: pl
 };
@@ -4311,7 +4310,7 @@ const ml = N({
   cpuKernelImpl: Xr,
   dtype: "float32"
 }), gl = {
-  kernelName: Fo,
+  kernelName: ro,
   backendName: "webgpu",
   kernelFunc: ml
 };
@@ -4321,12 +4320,12 @@ function Te(o) {
   return s < 0 && (L(-(r + 1) <= s, () => `Axis must be in the interval [${-(r + 1)}, ${r}]`), u = r + s + 1), n.splice(u, 0, 1), R({ inputs: { x: a }, backend: i, attrs: { shape: n } });
 }
 const xl = {
-  kernelName: Wo,
+  kernelName: no,
   backendName: "webgpu",
   kernelFunc: Te
 };
 const Cl = N({ opType: y.EXPM1, cpuKernelImpl: Kr }), wl = {
-  kernelName: Lo,
+  kernelName: uo,
   backendName: "webgpu",
   kernelFunc: Cl
 };
@@ -4402,7 +4401,7 @@ function yl(o) {
   return zt(i, !1, e);
 }
 const Sl = {
-  kernelName: Vo,
+  kernelName: lo,
   backendName: "webgpu",
   kernelFunc: yl
 };
@@ -4424,7 +4423,7 @@ class bl {
   }
 }
 const vl = {
-  kernelName: Bo,
+  kernelName: co,
   backendName: "webgpu",
   kernelFunc: ({ inputs: o, backend: t }) => {
     const { image: e } = o, i = t, s = new bl(e.shape);
@@ -4432,7 +4431,7 @@ const vl = {
   }
 };
 const kl = N({ opType: y.FLOOR, cpuKernelImpl: qr }), Il = {
-  kernelName: To,
+  kernelName: ho,
   backendName: "webgpu",
   kernelFunc: kl
 };
@@ -4441,13 +4440,13 @@ const Rl = V({
   cpuKernelImpl: Yr,
   dtype: "int32"
 }), Pl = {
-  kernelName: _o,
+  kernelName: po,
   backendName: "webgpu",
   kernelFunc: Rl
 };
 class $l {
   constructor(t, e, i = !1) {
-    this.pixelsOpType = ft.FROM_PIXELS, this.outputShape = [0], this.variableNames = [], this.workgroupSize = [256, 1, 1], this.outputShape = t, this.dispatchLayout = v(this.outputShape), this.dispatch = b(this.dispatchLayout, this.outputShape, this.workgroupSize, [e, 1, 1]), this.importVideo = i, this.shaderKey = `fromPixels_${this.importVideo}`;
+    this.pixelsOpType = pt.FROM_PIXELS, this.outputShape = [0], this.variableNames = [], this.workgroupSize = [256, 1, 1], this.outputShape = t, this.dispatchLayout = v(this.outputShape), this.dispatch = b(this.dispatchLayout, this.outputShape, this.workgroupSize, [e, 1, 1]), this.importVideo = i, this.shaderKey = `fromPixels_${this.importVideo}`;
   }
   getUserCode() {
     const t = this.importVideo ? "textureLoad(src, vec2<i32>(coords.yx));" : "textureLoad(src, vec2<i32>(coords.yx), 0)";
@@ -4467,7 +4466,7 @@ class $l {
   }
 }
 const Dl = {
-  kernelName: Oo,
+  kernelName: fo,
   backendName: "webgpu",
   kernelFunc: Nl
 };
@@ -4494,7 +4493,7 @@ function Nl(o) {
       const B = GPUTextureUsage.COPY_DST | GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.TEXTURE_BINDING, H = e.textureManager.acquireTexture(c[1], c[0], "rgba8unorm", B);
       e.queue.copyExternalImageToTexture({ source: s }, { texture: H }, [c[1], c[0]]), C = H;
     }
-    const w = D(c), k = ut(c), I = new $l(c, a, p), P = [
+    const w = D(c), k = at(c), I = new $l(c, a, p), P = [
       { type: "uint32", data: [w] },
       { type: "uint32", data: [a] },
       { type: "uint32", data: [...k] }
@@ -4540,7 +4539,7 @@ class zl {
   }
 }
 const Al = {
-  kernelName: Mo,
+  kernelName: mo,
   backendName: "webgpu",
   kernelFunc: ({ inputs: o, attrs: t, backend: e }) => {
     const { x: i, scale: s, offset: a, mean: r, variance: n } = o, { varianceEpsilon: u } = t, d = e, h = [i, r, n];
@@ -4553,7 +4552,7 @@ const Al = {
   }
 };
 function Fl(o) {
-  const { inputs: t, backend: e, attrs: i } = o, { x: s, filter: a, bias: r, preluActivationWeights: n } = t, { strides: u, pad: d, dataFormat: h, dilations: l, dimRoundingMode: c, activation: p, leakyreluAlpha: f } = i, m = we(h), g = Q(s.shape, a.shape, u, l, d, c, !1, m);
+  const { inputs: t, backend: e, attrs: i } = o, { x: s, filter: a, bias: r, preluActivationWeights: n } = t, { strides: u, pad: d, dataFormat: h, dilations: l, dimRoundingMode: c, activation: p, leakyreluAlpha: f } = i, m = Se(h), g = Z(s.shape, a.shape, u, l, d, c, !1, m);
   return It({
     x: s,
     filter: a,
@@ -4566,15 +4565,15 @@ function Fl(o) {
   });
 }
 const Wl = {
-  kernelName: Eo,
+  kernelName: go,
   backendName: "webgpu",
   kernelFunc: Fl
 };
 function Ll(o) {
   const { inputs: t, backend: e, attrs: i } = o, { x: s, filter: a, bias: r, preluActivationWeights: n } = t, { strides: u, pad: d, dilations: h, dimRoundingMode: l, activation: c, leakyreluAlpha: p } = i;
   let f = h;
-  f == null && (f = [1, 1]), L(lt(u, f), () => `Error in depthwiseConv2d: Either strides or dilations must be 1. Got strides ${u} and dilations '${f}'`);
-  const m = Q(
+  f == null && (f = [1, 1]), L(mt(u, f), () => `Error in depthwiseConv2d: Either strides or dilations must be 1. Got strides ${u} and dilations '${f}'`);
+  const m = Z(
     s.shape,
     a.shape,
     u,
@@ -4596,7 +4595,7 @@ function Ll(o) {
   })), c === "leakyrelu" && (w.push({ type: "float32", data: [p] }), k.uniforms += " alpha : f32,"), e.runWebGPUProgram(k, g, "float32", w);
 }
 const Vl = {
-  kernelName: Uo,
+  kernelName: xo,
   backendName: "webgpu",
   kernelFunc: Ll
 };
@@ -4624,7 +4623,7 @@ class Bl {
   }
 }
 function Tl(o) {
-  const { inputs: t, backend: e } = o, { params: i, indices: s } = t, a = s.shape, r = a[a.length - 1], n = D(i.shape), [u, d, h, l] = hs(i, s), c = R({ inputs: { x: s }, backend: e, attrs: { shape: [d, r] } }), p = R({
+  const { inputs: t, backend: e } = o, { params: i, indices: s } = t, a = s.shape, r = a[a.length - 1], n = D(i.shape), [u, d, h, l] = fa(i, s), c = R({ inputs: { x: s }, backend: e, attrs: { shape: [d, r] } }), p = R({
     inputs: { x: i },
     backend: e,
     attrs: { shape: [D(i.shape) / h, h] }
@@ -4637,7 +4636,7 @@ function Tl(o) {
   return e.disposeData(c.dataId), e.disposeData(p.dataId), e.disposeData(g.dataId), x;
 }
 const _l = {
-  kernelName: Ho,
+  kernelName: Co,
   backendName: "webgpu",
   kernelFunc: Tl
 };
@@ -4666,7 +4665,7 @@ function Ml(o) {
   return e.join();
 }
 function At(o) {
-  const { inputs: t, backend: e, attrs: i } = o, { x: s, indices: a } = t, { axis: r, batchDims: n } = i, u = te(r, s.shape)[0], d = ps(s, a, u, n), h = D(a.shape), l = [], c = R({
+  const { inputs: t, backend: e, attrs: i } = o, { x: s, indices: a } = t, { axis: r, batchDims: n } = i, u = te(r, s.shape)[0], d = ma(s, a, u, n), h = D(a.shape), l = [], c = R({
     inputs: { x: s },
     backend: e,
     attrs: {
@@ -4699,7 +4698,7 @@ function At(o) {
   return l.forEach((C) => e.disposeData(C.dataId)), x;
 }
 const El = {
-  kernelName: Go,
+  kernelName: wo,
   backendName: "webgpu",
   kernelFunc: At
 };
@@ -4708,7 +4707,7 @@ const Ul = V({
   cpuKernelImpl: Jr,
   dtype: "bool"
 }), Hl = {
-  kernelName: Xo,
+  kernelName: yo,
   backendName: "webgpu",
   kernelFunc: Ul
 };
@@ -4717,7 +4716,7 @@ const Gl = V({
   dtype: "bool",
   cpuKernelImpl: Zr
 }), Xl = {
-  kernelName: Ko,
+  kernelName: So,
   backendName: "webgpu",
   kernelFunc: Gl
 };
@@ -4726,22 +4725,22 @@ function Kl(o) {
   return zt(i, !0, e);
 }
 const ql = {
-  kernelName: qo,
+  kernelName: bo,
   backendName: "webgpu",
   kernelFunc: Kl
 };
 const Yl = N({ opType: y.IS_FINITE, dtype: "bool" }), jl = {
-  kernelName: Yo,
+  kernelName: vo,
   backendName: "webgpu",
   kernelFunc: Yl
 };
 const Ql = N({ opType: y.IS_INF, dtype: "bool" }), Zl = {
-  kernelName: jo,
+  kernelName: ko,
   backendName: "webgpu",
   kernelFunc: Ql
 };
 const Jl = N({ opType: y.IS_NAN, dtype: "bool" }), ec = {
-  kernelName: Qo,
+  kernelName: Io,
   backendName: "webgpu",
   kernelFunc: Jl
 };
@@ -4750,12 +4749,12 @@ function tc(o) {
   return e.runWebGPUProgram(n, [s], "float32", r);
 }
 const sc = {
-  kernelName: Zo,
+  kernelName: Ro,
   backendName: "webgpu",
   kernelFunc: tc
 };
 const oc = V({ opType: F.LESS, dtype: "bool", cpuKernelImpl: tn }), ic = {
-  kernelName: Jo,
+  kernelName: Po,
   backendName: "webgpu",
   kernelFunc: oc
 };
@@ -4764,7 +4763,7 @@ const ac = V({
   dtype: "bool",
   cpuKernelImpl: en
 }), rc = {
-  kernelName: ei,
+  kernelName: $o,
   backendName: "webgpu",
   kernelFunc: ac
 };
@@ -4787,32 +4786,32 @@ function uc(o) {
   return t.runWebGPUProgram(n, [], "float32", u);
 }
 const dc = {
-  kernelName: ti,
+  kernelName: Do,
   backendName: "webgpu",
   kernelFunc: uc
 };
 const lc = N({ opType: y.LOG, cpuKernelImpl: sn }), cc = {
-  kernelName: si,
+  kernelName: No,
   backendName: "webgpu",
   kernelFunc: lc
 };
 const hc = N({ opType: y.LOG1P }), pc = {
-  kernelName: oi,
+  kernelName: zo,
   backendName: "webgpu",
   kernelFunc: hc
 };
 const fc = V({ opType: F.LOGICAL_AND, dtype: "bool" }), mc = {
-  kernelName: ii,
+  kernelName: Ao,
   backendName: "webgpu",
   kernelFunc: fc
 };
 const gc = N({ opType: y.LOGICAL_NOT }), xc = {
-  kernelName: ai,
+  kernelName: Fo,
   backendName: "webgpu",
   kernelFunc: gc
 };
 const Cc = V({ opType: F.LOGICAL_OR }), wc = {
-  kernelName: ri,
+  kernelName: Wo,
   backendName: "webgpu",
   kernelFunc: Cc
 };
@@ -4915,7 +4914,7 @@ function bc(o) {
   return e.runWebGPUProgram(d, [s], s.dtype, h);
 }
 const vc = {
-  kernelName: ni,
+  kernelName: Lo,
   backendName: "webgpu",
   kernelFunc: bc
 };
@@ -4987,7 +4986,7 @@ function Ic(o) {
   return e.runWebGPUProgram(l, [s, a, r], s.dtype, c);
 }
 const Rc = {
-  kernelName: ui,
+  kernelName: Vo,
   backendName: "webgpu",
   kernelFunc: Ic
 };
@@ -4995,16 +4994,16 @@ const Pc = V({
   opType: F.MAX,
   cpuKernelImpl: an
 }), $c = {
-  kernelName: di,
+  kernelName: Bo,
   backendName: "webgpu",
   kernelFunc: Pc
 };
 function Dc(o) {
-  const { inputs: t, backend: e, attrs: i } = o, { x: s } = t, { filterSize: a, strides: r, pad: n, dimRoundingMode: u } = i, h = Ce(s.shape, a, r, 1, n, u);
+  const { inputs: t, backend: e, attrs: i } = o, { x: s } = t, { filterSize: a, strides: r, pad: n, dimRoundingMode: u } = i, h = ye(s.shape, a, r, 1, n, u);
   return St(s, h, "max", e);
 }
 const Nc = {
-  kernelName: li,
+  kernelName: To,
   backendName: "webgpu",
   kernelFunc: Dc
 };
@@ -5034,7 +5033,7 @@ function zc(o) {
   return e.runWebGPUProgram(c, [s], s.dtype, p);
 }
 const Ac = {
-  kernelName: ci,
+  kernelName: _o,
   backendName: "webgpu",
   kernelFunc: zc
 };
@@ -5216,14 +5215,14 @@ function Lc(o) {
   return e.disposeData(m.dataId), x;
 }
 const Vc = {
-  kernelName: hi,
+  kernelName: Oo,
   backendName: "webgpu",
   kernelFunc: Lc
 };
 function Bc(o) {
   const { inputs: t, backend: e, attrs: i } = o, { dy: s, input: a, output: r } = t, n = a;
-  at([a, r], "maxPoolGrad");
-  const { filterSize: u, strides: d, pad: h, dimRoundingMode: l } = i, c = Ce(n.shape, u, d, 1, h, l), p = new ge(c, "max", !0);
+  nt([a, r], "maxPoolGrad");
+  const { filterSize: u, strides: d, pad: h, dimRoundingMode: l } = i, c = ye(n.shape, u, d, 1, h, l), p = new ge(c, "max", !0);
   let f = [
     { type: "int32", data: [c.strideHeight, c.strideWidth] },
     { type: "int32", data: [c.padInfo.top, c.padInfo.left] },
@@ -5256,7 +5255,7 @@ function Bc(o) {
   return e.disposeData(m.dataId), x;
 }
 const Tc = {
-  kernelName: pi,
+  kernelName: Mo,
   backendName: "webgpu",
   kernelFunc: Bc
 };
@@ -5264,8 +5263,8 @@ function _c(o) {
   const { inputs: t, backend: e, attrs: i } = o, { filterSize: s, strides: a, pad: r, includeBatchInIndex: n } = i, { x: u } = t;
   L(u.shape.length === 4, () => `Error in maxPool: input must be rank 4 but got rank ${u.shape.length}.`);
   const d = [1, 1];
-  L(lt(a, d), () => `Error in maxPool: Either strides or dilations must be 1. Got strides ${a} and dilations '${d}'`);
-  const h = Ce(u.shape, s, a, d, r), l = [
+  L(mt(a, d), () => `Error in maxPool: Either strides or dilations must be 1. Got strides ${a} and dilations '${d}'`);
+  const h = ye(u.shape, s, a, d, r), l = [
     { type: "int32", data: [h.strideHeight, h.strideWidth] },
     { type: "int32", data: [h.padInfo.top, h.padInfo.left] },
     { type: "int32", data: [h.dilationHeight, h.dilationWidth] },
@@ -5282,7 +5281,7 @@ function _c(o) {
   return [p, f];
 }
 const Oc = {
-  kernelName: fi,
+  kernelName: Eo,
   backendName: "webgpu",
   kernelFunc: _c
 };
@@ -5291,7 +5290,7 @@ function Mc(o) {
   return ie(s, a, r, "min", e);
 }
 const Ec = {
-  kernelName: mi,
+  kernelName: Uo,
   backendName: "webgpu",
   kernelFunc: Mc
 };
@@ -5299,7 +5298,7 @@ const Uc = V({
   opType: F.MIN,
   cpuKernelImpl: rn
 }), Hc = {
-  kernelName: gi,
+  kernelName: Ho,
   backendName: "webgpu",
   kernelFunc: Uc
 };
@@ -5335,7 +5334,7 @@ class Gc {
   }
 }
 const Xc = {
-  kernelName: xi,
+  kernelName: Go,
   backendName: "webgpu",
   kernelFunc: ({ inputs: o, attrs: t, backend: e }) => {
     const { x: i } = o, { paddings: s, mode: a } = t, r = e, n = s.map((h) => ({ type: "int32", data: [h[0], h[1]] })), u = new Gc(i.shape, s, a);
@@ -5343,7 +5342,7 @@ const Xc = {
   }
 };
 const Kc = V({ opType: F.MOD }), qc = {
-  kernelName: Ci,
+  kernelName: Xo,
   backendName: "webgpu",
   kernelFunc: Kc
 };
@@ -5469,7 +5468,7 @@ function Wt(o) {
   return e.disposeData(r.dataId), e.disposeData(u.dataId), d;
 }
 const Qc = {
-  kernelName: wi,
+  kernelName: Ko,
   backendName: "webgpu",
   kernelFunc: Wt
 };
@@ -5478,7 +5477,7 @@ function Zc(o) {
   return n || e.disposeData(u.dataId), p;
 }
 const Jc = {
-  kernelName: yi,
+  kernelName: qo,
   backendName: "webgpu",
   kernelFunc: Zc
 };
@@ -5492,7 +5491,7 @@ function eh(o) {
   return e.runWebGPUProgram(s, [i], i.dtype);
 }
 const th = {
-  kernelName: Si,
+  kernelName: Yo,
   backendName: "webgpu",
   kernelFunc: eh
 };
@@ -5502,7 +5501,7 @@ function sh(o) {
   return e.makeTensorInfo([l.length], "int32", new Int32Array(l));
 }
 const oh = {
-  kernelName: bi,
+  kernelName: jo,
   backendName: "webgpu",
   kernelFunc: sh
 };
@@ -5515,7 +5514,7 @@ function ih(o) {
   ];
 }
 const ah = {
-  kernelName: vi,
+  kernelName: Qo,
   backendName: "webgpu",
   kernelFunc: ih
 };
@@ -5542,7 +5541,7 @@ function nh(o) {
   return e.disposeData(p.dataId), m;
 }
 const uh = {
-  kernelName: ki,
+  kernelName: Zo,
   backendName: "webgpu",
   kernelFunc: nh
 };
@@ -5562,7 +5561,7 @@ function $e(o) {
     });
 }
 const dh = {
-  kernelName: Ii,
+  kernelName: Jo,
   backendName: "webgpu",
   kernelFunc: $e
 };
@@ -5577,7 +5576,7 @@ function Lt(o) {
     return M({ attrs: { shape: i.shape, dtype: i.dtype, value: 1 }, backend: e });
 }
 const lh = {
-  kernelName: Ri,
+  kernelName: ei,
   backendName: "webgpu",
   kernelFunc: Lt
 };
@@ -5587,7 +5586,7 @@ function ch(o) {
     return Te({ inputs: { input: t[0] }, backend: e, attrs: { dim: s } });
   const a = t[0].shape, r = t[0].dtype;
   t.forEach((h) => {
-    Yt(a, h.shape, "All tensors passed to stack must have matching shapes"), L(r === h.dtype, () => "All tensors passed to stack must have matching dtypes");
+    si(a, h.shape, "All tensors passed to stack must have matching shapes"), L(r === h.dtype, () => "All tensors passed to stack must have matching dtypes");
   });
   const n = [], u = t.map((h) => {
     const l = Te({ inputs: { input: h }, backend: e, attrs: { dim: s } });
@@ -5596,7 +5595,7 @@ function ch(o) {
   return n.forEach((h) => e.disposeData(h.dataId)), d;
 }
 const hh = {
-  kernelName: Pi,
+  kernelName: ti,
   backendName: "webgpu",
   kernelFunc: ch
 };
@@ -5652,14 +5651,14 @@ const fh = (o) => {
   const u = new ph(s.shape, a);
   return e.runWebGPUProgram(u, [s], s.dtype, n);
 }, mh = {
-  kernelName: $i,
+  kernelName: oi,
   backendName: "webgpu",
   kernelFunc: fh
 };
 const gh = V({
   opType: F.POW
 }), xh = {
-  kernelName: Di,
+  kernelName: ii,
   backendName: "webgpu",
   kernelFunc: gh
 };
@@ -5668,7 +5667,7 @@ function Ch(o) {
   return e.runWebGPUProgram(a, [i, s], "float32");
 }
 const wh = {
-  kernelName: Ni,
+  kernelName: ai,
   backendName: "webgpu",
   kernelFunc: Ch
 };
@@ -5677,7 +5676,7 @@ function yh(o) {
   return ie(s, a, r, "prod", e);
 }
 const Sh = {
-  kernelName: zi,
+  kernelName: ri,
   backendName: "webgpu",
   kernelFunc: yh
 };
@@ -5685,27 +5684,27 @@ const bh = (o) => {
   const { backend: t, attrs: e } = o, { start: i, stop: s, step: a, dtype: r } = e, n = cn(i, s, a, r);
   return t.makeTensorInfo([n.length], r, n);
 }, vh = {
-  kernelName: Ai,
+  kernelName: ni,
   backendName: "webgpu",
   kernelFunc: bh
 };
 const kh = V({ opType: F.DIV }), Ih = {
-  kernelName: Fi,
+  kernelName: ui,
   backendName: "webgpu",
   kernelFunc: kh
 };
 const Rh = N({ opType: y.RECIPROCAL }), Ph = {
-  kernelName: Wi,
+  kernelName: di,
   backendName: "webgpu",
   kernelFunc: Rh
 };
 const $h = N({ opType: y.RELU }), Dh = {
-  kernelName: Li,
+  kernelName: li,
   backendName: "webgpu",
   kernelFunc: $h
 };
 const Nh = N({ opType: y.RELU6 }), zh = {
-  kernelName: Vi,
+  kernelName: ci,
   backendName: "webgpu",
   kernelFunc: Nh
 };
@@ -5768,7 +5767,7 @@ function Fh(o) {
   return e.runWebGPUProgram(f, [s], "float32", p);
 }
 const Wh = {
-  kernelName: Bi,
+  kernelName: hi,
   backendName: "webgpu",
   kernelFunc: Fh
 };
@@ -5875,7 +5874,7 @@ function Vh(o) {
   return e.runWebGPUProgram(w, [a], a.dtype, k);
 }
 const Bh = {
-  kernelName: Ti,
+  kernelName: pi,
   backendName: "webgpu",
   kernelFunc: Vh
 };
@@ -5927,7 +5926,7 @@ function _h(o) {
   return e.runWebGPUProgram(f, [s], s.dtype, p);
 }
 const Oh = {
-  kernelName: _i,
+  kernelName: fi,
   backendName: "webgpu",
   kernelFunc: _h
 };
@@ -6017,7 +6016,7 @@ function Eh(o) {
   return e.runWebGPUProgram(w, [a], a.dtype, k);
 }
 const Uh = {
-  kernelName: Oi,
+  kernelName: mi,
   backendName: "webgpu",
   kernelFunc: Eh
 };
@@ -6079,7 +6078,7 @@ function Gh(o) {
   return e.disposeData(f.dataId), m;
 }
 const Xh = {
-  kernelName: Mi,
+  kernelName: gi,
   backendName: "webgpu",
   kernelFunc: Gh
 };
@@ -6113,10 +6112,10 @@ class Kh {
   }
 }
 const qh = {
-  kernelName: Ei,
+  kernelName: xi,
   backendName: "webgpu",
   kernelFunc: ({ inputs: o, attrs: t, backend: e }) => {
-    const { image: i } = o, { radians: s, fillValue: a, center: r } = t, n = e, u = new Kh(i.shape, a), [d, h] = fs(r, i.shape[1], i.shape[2]), l = [
+    const { image: i } = o, { radians: s, fillValue: a, center: r } = t, n = e, u = new Kh(i.shape, a), [d, h] = ga(r, i.shape[1], i.shape[2]), l = [
       { type: "float32", data: [d] },
       { type: "float32", data: [h] },
       { type: "float32", data: [Math.sin(s)] },
@@ -6126,12 +6125,12 @@ const qh = {
   }
 };
 const Yh = N({ opType: y.ROUND }), jh = {
-  kernelName: Ui,
+  kernelName: Ci,
   backendName: "webgpu",
   kernelFunc: Yh
 };
 const Qh = N({ opType: y.RSQRT, cpuKernelImpl: hn }), Zh = {
-  kernelName: Hi,
+  kernelName: wi,
   backendName: "webgpu",
   kernelFunc: Qh
 };
@@ -6174,10 +6173,10 @@ class pe {
             flattenedIndex = flattenedIndex + indexInside * ${i};
           }
           let updateValue =
-              ${xs(this.type)}(${n});
+              ${wa(this.type)}(${n});
           let flatIndex = getOutputIndexFromCoords(${s});
-          ${this.sumDupeIndices ? Z("&result[flatIndex]", "updateValue", this.type) : "atomicStore(&result[flatIndex], bitcast<i32>(updateValue));"}
+          ${this.sumDupeIndices ? Q("&result[flatIndex]", "updateValue", this.type) : "atomicStore(&result[flatIndex], bitcast<i32>(updateValue));"}
         }
       }`;
   }
@@ -6194,7 +6193,7 @@ function Jh(o) {
   return e.disposeData(p.dataId), e.disposeData(f.dataId), e.disposeData(k.dataId), I;
 }
 const ep = {
-  kernelName: Gi,
+  kernelName: yi,
   backendName: "webgpu",
   kernelFunc: Jh
 };
@@ -6233,7 +6232,7 @@ function sp(o) {
   return e.runWebGPUProgram(n, [s, a], "int32", u);
 }
 const op = {
-  kernelName: Xi,
+  kernelName: Si,
   backendName: "webgpu",
   kernelFunc: sp
 };
@@ -6273,37 +6272,37 @@ function ap(o) {
   return e.runWebGPUProgram(r, [i, s, a], fe(s.dtype, a.dtype));
 }
 const rp = {
-  kernelName: Ki,
+  kernelName: bi,
   backendName: "webgpu",
   kernelFunc: ap
 };
 const np = N({ opType: y.SELU }), up = {
-  kernelName: qi,
+  kernelName: vi,
   backendName: "webgpu",
   kernelFunc: np
 };
 const dp = N({ opType: y.SIGMOID }), lp = {
-  kernelName: Yi,
+  kernelName: ki,
   backendName: "webgpu",
   kernelFunc: dp
 };
 const cp = N({ opType: y.SIGN }), hp = {
-  kernelName: ji,
+  kernelName: Ii,
   backendName: "webgpu",
   kernelFunc: cp
 };
 const pp = N({ opType: y.SIN }), fp = {
-  kernelName: Qi,
+  kernelName: Ri,
   backendName: "webgpu",
   kernelFunc: pp
 };
 const mp = N({ opType: y.SINH }), gp = {
-  kernelName: Zi,
+  kernelName: Pi,
   backendName: "webgpu",
   kernelFunc: mp
 };
 const xp = N({ opType: y.SOFTPLUS }), Cp = {
-  kernelName: Ji,
+  kernelName: $i,
   backendName: "webgpu",
   kernelFunc: xp
 };
@@ -6320,7 +6319,7 @@ class wp {
   getUserCode() {
     const t = G(this.outputShape.length), e = xt(this.newDim);
     return `
-      ${Cs(this.paddedXShape, "PaddedX")}
+      ${ya(this.paddedXShape, "PaddedX")}
       ${S("index")} {
         if(index < uniforms.size) {
           let coords = getCoordsFromIndex(index);
@@ -6342,7 +6341,7 @@ const yp = (o) => {
   const d = u.map(
     (C, w) => C[0] + s.shape[w] + C[1]
     /* afterPad */
-  ), h = ct(d, a, n, !1), l = ht(h.length, a.length, !1), c = pt(d, a, n, !1), p = ut(d), f = new wp(s.shape, d, u, h, l, p.length), m = [
+  ), h = lt(d, a, n, !1), l = ct(h.length, a.length, !1), c = ht(d, a, n, !1), p = at(d), f = new wp(s.shape, d, u, h, l, p.length), m = [
     { type: "int32", data: h },
     { type: "int32", data: p }
   ];
@@ -6350,7 +6349,7 @@ const yp = (o) => {
   const g = e.runWebGPUProgram(f, [s], s.dtype, m), x = R({ inputs: { x: g }, backend: e, attrs: { shape: c } });
   return e.disposeData(g.dataId), x;
 }, Sp = {
-  kernelName: ea,
+  kernelName: Di,
   backendName: "webgpu",
   kernelFunc: yp
 };
@@ -6369,7 +6368,7 @@ class bp {
         let value = input[indexInInput * uniforms.segmentSize + indexInSegment];
         let outIndex = segmentId * uniforms.segmentSize + indexInSegment;
-        ${Z("&result[outIndex]", "value", this.type)}
+        ${Q("&result[outIndex]", "value", this.type)}
       }
     }
   `;
@@ -6384,7 +6383,7 @@ class vp {
     ${S("index")} {
       if (index < uniforms.segmentIdsShape) {
         let segmentId = segmentIds[index];
-        ${Z("&result[segmentId]", "1", "int32")}
+        ${Q("&result[segmentId]", "1", "int32")}
       }
     }
   `;
@@ -6434,7 +6433,7 @@ function Ip(o) {
   return Bt(i, s, a, !1, e);
 }
 const Rp = {
-  kernelName: ta,
+  kernelName: Ni,
   backendName: "webgpu",
   kernelFunc: Ip
 };
@@ -6443,7 +6442,7 @@ function Pp(o) {
   return Bt(i, s, a, !0, e);
 }
 const $p = {
-  kernelName: sa,
+  kernelName: zi,
   backendName: "webgpu",
   kernelFunc: Pp
 };
@@ -6480,21 +6479,21 @@ function Np(o, t = "") {
 function Ke(o) {
   const { inputs: t, backend: e, attrs: i } = o, { x: s } = t, { reps: a } = i;
   if (e.shouldExecuteOnCPU([s]) || s.dtype === "string" || s.shape.length >= 5) {
-    const u = e.readSync(s.dataId), d = s.dtype === "string" ? u.map((c) => dt(c)) : u, h = ke(s.shape, s.dtype, d), l = wn(h, a);
+    const u = e.readSync(s.dataId), d = s.dtype === "string" ? u.map((c) => rt(c)) : u, h = ke(s.shape, s.dtype, d), l = wn(h, a);
     return e.makeTensorInfo(l.shape, l.dtype, l.values);
   }
   const r = new Dp(s.shape, a);
   return e.runWebGPUProgram(r, [s], s.dtype);
 }
 const zp = {
-  kernelName: oa,
+  kernelName: Ai,
   backendName: "webgpu",
   kernelFunc: Ke
 };
 function Ap(o) {
   const { inputs: t, backend: e, attrs: i } = o, { sparseIndices: s, sparseValues: a, defaultValue: r } = t, { outputShape: n } = i, { sliceRank: u, numUpdates: d, sliceSize: h, strides: l, outputSize: c } = He(a, s, n), p = !1;
   if (a.dtype === "string") {
-    const A = e.bufferSync(s), z = e.bufferSync(a), B = dt(e.readSync(r.dataId)[0]), T = pn(A, z, n, c, h, d, u, l, B, p);
+    const A = e.bufferSync(s), z = e.bufferSync(a), B = rt(e.readSync(r.dataId)[0]), T = pn(A, z, n, c, h, d, u, l, B, p);
     return e.makeTensorInfo(n, T.dtype, T.values);
   }
   const f = [c / h, h], m = R({
@@ -6505,7 +6504,7 @@ function Ap(o) {
     inputs: { x: a },
     backend: e,
     attrs: { shape: [d, h] }
-  }) : U({ inputs: { x: a }, backend: e }), x = g.dtype, C = e.makeTensorInfo([], x, jt(1, x)), w = R({
+  }) : U({ inputs: { x: a }, backend: e }), x = g.dtype, C = e.makeTensorInfo([], x, Wi(1, x)), w = R({
     inputs: { x: r },
     backend: e,
     attrs: { shape: Array(f.length).fill(1) }
@@ -6537,12 +6536,12 @@ function Ap(o) {
   return e.disposeData(m.dataId), e.disposeData(g.dataId), e.disposeData(w.dataId), e.disposeData(C.dataId), e.disposeData(k.dataId), $;
 }
 const Fp = {
-  kernelName: ia,
+  kernelName: Fi,
   backendName: "webgpu",
   kernelFunc: Ap
 };
 function Wp(o) {
-  const { inputs: t, backend: e, attrs: i } = o, { x: s } = t, { numOrSizeSplits: a, axis: r } = i, n = te(r, s.shape)[0], u = ms(s, a, n), d = s.shape.length, h = new Array(d).fill(0), l = s.shape.slice();
+  const { inputs: t, backend: e, attrs: i } = o, { x: s } = t, { numOrSizeSplits: a, axis: r } = i, n = te(r, s.shape)[0], u = xa(s, a, n), d = s.shape.length, h = new Array(d).fill(0), l = s.shape.slice();
   return u.map((c) => {
     const p = [...l];
     p[n] = c;
@@ -6551,17 +6550,17 @@ function Wp(o) {
   });
 }
 const Lp = {
-  kernelName: aa,
+  kernelName: Li,
   backendName: "webgpu",
   kernelFunc: Wp
 };
 const Vp = N({ opType: y.SQRT }), Bp = {
-  kernelName: ra,
+  kernelName: Vi,
   backendName: "webgpu",
   kernelFunc: Vp
 };
 const Tp = {
-  kernelName: na,
+  kernelName: Bi,
   backendName: "webgpu",
   kernelFunc: ({ inputs: o, backend: t }) => {
     const { x: e } = o, i = t, s = new ue(e.shape, y.SQUARE);
@@ -6571,7 +6570,7 @@ const Tp = {
 const _p = V({
   opType: F.SQUARED_DIFFERENCE
 }), Op = {
-  kernelName: ua,
+  kernelName: Ti,
   backendName: "webgpu",
   kernelFunc: _p
 };
@@ -6580,7 +6579,7 @@ function Mp({ inputs: o, attrs: t, backend: e }) {
   return e.runWebGPUProgram(s, [i], i.dtype, a);
 }
 const Ep = {
-  kernelName: da,
+  kernelName: _i,
   backendName: "webgpu",
   kernelFunc: Mp
 };
@@ -6628,7 +6627,7 @@ function Hp(o) {
   return I;
 }
 const Gp = {
-  kernelName: la,
+  kernelName: Oi,
   backendName: "webgpu",
   kernelFunc: Hp
 };
@@ -6640,22 +6639,22 @@ function Xp(o) {
   ];
 }
 const Kp = {
-  kernelName: ca,
+  kernelName: Mi,
   backendName: "webgpu",
   kernelFunc: Xp
 };
 const qp = V({ opType: F.SUB, cpuKernelImpl: Cn, supportsComplex: !0 }), Yp = {
-  kernelName: ha,
+  kernelName: Ei,
   backendName: "webgpu",
   kernelFunc: qp
 };
 const jp = N({ opType: y.TAN }), Qp = {
-  kernelName: pa,
+  kernelName: Ui,
   backendName: "webgpu",
   kernelFunc: jp
 };
 const Zp = N({ opType: y.TANH }), Jp = {
-  kernelName: fa,
+  kernelName: Hi,
   backendName: "webgpu",
   kernelFunc: Zp
 };
@@ -6683,7 +6682,7 @@ function ef(o) {
   return p.forEach(($) => e.disposeData($.dataId)), P;
 }
 const tf = {
-  kernelName: ma,
+  kernelName: Gi,
   backendName: "webgpu",
   kernelFunc: ef
 };
@@ -6896,7 +6895,7 @@ function af(o) {
   return C = R({ inputs: { x: C }, attrs: { shape: w }, backend: e }), ne(e, k), [C, f];
 }
 const rf = {
-  kernelName: ga,
+  kernelName: Xi,
   backendName: "webgpu",
   kernelFunc: af
 };
@@ -7057,7 +7056,7 @@ function uf(o) {
   return e.runWebGPUProgram(x, [s, a], "float32", k);
 }
 const df = {
-  kernelName: xa,
+  kernelName: Ki,
   backendName: "webgpu",
   kernelFunc: uf
 };
@@ -7080,7 +7079,7 @@ function lf(o) {
   return l.forEach((m) => e.disposeData(m.dataId)), f;
 }
 const cf = {
-  kernelName: Ca,
+  kernelName: qi,
   backendName: "webgpu",
   kernelFunc: lf
 };
@@ -7104,7 +7103,7 @@ class hf {
           let flatIndex = b * uniforms.numSegments + segmentId % uniforms.numSegments;
           let value = getX(b, inCol);
-          ${Z("&result[flatIndex]", "value", this.type)}
+          ${Q("&result[flatIndex]", "value", this.type)}
         }
       }
     }
@@ -7114,10 +7113,10 @@ class hf {
 function pf(o) {
   const { inputs: t, backend: e, attrs: i } = o, { x: s, segmentIds: a } = t, { numSegments: r } = i, n = s.shape.length, u = [];
   let d = 0;
-  const h = ye([d], n);
+  const h = Ce([d], n);
   let l = s;
-  h != null && (l = K({ inputs: { x: s }, backend: e, attrs: { perm: h } }), u.push(l), d = Se(1, n)[0]);
-  const c = gs(l.shape, d, r), p = D([l.shape[d]]), f = R({ inputs: { x: l }, backend: e, attrs: { shape: [-1, p] } });
+  h != null && (l = K({ inputs: { x: s }, backend: e, attrs: { perm: h } }), u.push(l), d = we(1, n)[0]);
+  const c = Ca(l.shape, d, r), p = D([l.shape[d]]), f = R({ inputs: { x: l }, backend: e, attrs: { shape: [-1, p] } });
   u.push(f);
   const m = s.dtype, g = [f.shape[0], r], x = M({ backend: e, attrs: { shape: g, value: 0, dtype: m } }), C = new hf(f.shape, g, m), w = [
     { type: "int32", data: [r] },
@@ -7127,13 +7126,13 @@ function pf(o) {
   let P = I;
   if (h != null) {
     u.push(I);
-    const $ = mt(h);
+    const $ = ft(h);
     P = K({ inputs: { x: P }, backend: e, attrs: { perm: $ } });
   }
   return u.forEach(($) => e.disposeData($.dataId)), P;
 }
 const ff = {
-  kernelName: wa,
+  kernelName: Yi,
   backendName: "webgpu",
   kernelFunc: pf
 };
@@ -7302,7 +7301,7 @@ const mf = [
   dh
 ];
 for (const o of mf)
-  ya(o);
+  ji(o);
 export {
   Mt as WebGPUBackend
 };