@genai-fi/nanogpt 0.10.2 → 0.10.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Generator.js +11761 -171
- package/dist/{RealDiv-zz7FpkKX.js → RealDiv-KAPDe8zB.js} +23 -25
- package/dist/Reshape-BYkmUnAv.js +14 -0
- package/dist/{Reshape-CHdUjC72.js → Reshape-Zt6eb7yh.js} +18 -20
- package/dist/TeachableLLM.js +10 -11
- package/dist/{axis_util-BsIr9ZNu.js → axis_util-BaG7mf5A.js} +3 -3
- package/dist/backend.js +2 -2
- package/dist/{backend_util-B1XRLuq9.js → backend_util-RCe-rHaj.js} +72 -73
- package/dist/{backend_webgpu-CqpfEImu.js → backend_webgpu-DE3ACOLx.js} +45 -47
- package/dist/broadcast_to-B3eYlZm7.js +28 -0
- package/dist/checks/appendCache.js +2 -2
- package/dist/checks/attentionMask.js +3 -3
- package/dist/checks/gelu.js +2 -2
- package/dist/checks/matMulGelu.js +7 -11
- package/dist/checks/normRMS.js +9 -9
- package/dist/checks/normRMSGrad.js +3 -3
- package/dist/checks/packUnpack.js +2 -2
- package/dist/checks/qkv.js +12 -13
- package/dist/checks/rope.js +2 -2
- package/dist/clip_by_value-BnO7-a88.js +12 -0
- package/dist/complex-DjxcVmoX.js +11 -0
- package/dist/concat-BV8bt5H-.js +17 -0
- package/dist/{concat_util-iBYIyuQe.js → concat_util-DpW8mL_l.js} +1 -1
- package/dist/{dataset-D2P7rHAw.js → dataset-BcwmTGYc.js} +137 -139
- package/dist/dropout-BcvN9JYi.js +92 -0
- package/dist/expand_dims-DT4tEPwA.js +11 -0
- package/dist/{exports_initializers-CZSUJoVE.js → exports_initializers-Hta_rEnm.js} +1 -1
- package/dist/floor-D5QdR_le.js +9 -0
- package/dist/gather-D3JcZUaI.js +9 -0
- package/dist/{gelu-Bmhopi0J.js → gelu-CjNPL4OH.js} +10 -11
- package/dist/{gpgpu_math-DsCcikas.js → gpgpu_math-DAOmgtXR.js} +841 -1015
- package/dist/{index-DRyE072i.js → index-BwexR4lA.js} +262 -263
- package/dist/index-DOvlwCh-.js +3520 -0
- package/dist/{kernel_funcs_utils-CWfOAPGO.js → kernel_funcs_utils-CCzYdUZg.js} +130 -132
- package/dist/layers/BaseLayer.js +15 -16
- package/dist/layers/CausalSelfAttention.js +6 -6
- package/dist/layers/MLP.js +4 -4
- package/dist/layers/PositionEmbedding.js +7 -7
- package/dist/layers/RMSNorm.js +3 -3
- package/dist/layers/RoPECache.js +9 -9
- package/dist/layers/TiedEmbedding.js +6 -6
- package/dist/layers/TransformerBlock.js +1 -1
- package/dist/loader/loadTransformers.js +1 -1
- package/dist/loader/oldZipLoad.js +13 -14
- package/dist/log_sum_exp-ngO0-4pK.js +39 -0
- package/dist/main.js +49 -50
- package/dist/{matMul16-fEAJ4smh.js → matMul16-BWRSOCWB.js} +14 -15
- package/dist/matMulGelu-CzfgT6Wq.js +163 -0
- package/dist/mat_mul-SjpJRLyL.js +11 -0
- package/dist/mod-AnXEvvpo.js +11 -0
- package/dist/models/NanoGPTV1.js +2 -2
- package/dist/models/model.js +13 -14
- package/dist/ones-D2rT0xk2.js +14 -0
- package/dist/ops/adamAdjust.js +1 -1
- package/dist/ops/adamMoments.js +1 -1
- package/dist/ops/add16.js +1 -1
- package/dist/ops/appendCache.js +3 -3
- package/dist/ops/attentionMask.js +1 -1
- package/dist/ops/concat16.js +2 -2
- package/dist/ops/cpu/adamAdjust.js +13 -14
- package/dist/ops/cpu/adamMoments.js +6 -7
- package/dist/ops/cpu/appendCache.js +7 -8
- package/dist/ops/cpu/attentionMask.js +7 -7
- package/dist/ops/cpu/fusedSoftmax.js +10 -11
- package/dist/ops/cpu/gatherSub.js +9 -10
- package/dist/ops/cpu/gelu.js +9 -10
- package/dist/ops/cpu/matMul16.js +6 -7
- package/dist/ops/cpu/matMulGelu.js +5 -6
- package/dist/ops/cpu/matMulMul.js +3 -4
- package/dist/ops/cpu/mulDropout.js +3 -4
- package/dist/ops/cpu/normRMS.js +10 -11
- package/dist/ops/cpu/qkv.js +8 -9
- package/dist/ops/cpu/rope.js +5 -6
- package/dist/ops/cpu/scatterSub.js +17 -19
- package/dist/ops/dot16.js +2 -2
- package/dist/ops/gatherSub.js +1 -1
- package/dist/ops/gelu.js +2 -2
- package/dist/ops/grads/add16.js +11 -12
- package/dist/ops/grads/attentionMask.js +5 -6
- package/dist/ops/grads/gelu.js +3 -4
- package/dist/ops/grads/matMul16.js +4 -5
- package/dist/ops/grads/matMulGelu.js +9 -10
- package/dist/ops/grads/normRMS.js +7 -8
- package/dist/ops/grads/pack16.js +4 -5
- package/dist/ops/grads/qkv.js +17 -19
- package/dist/ops/grads/rope.js +3 -5
- package/dist/ops/grads/softmax16.js +3 -4
- package/dist/ops/grads/unpack16.js +3 -4
- package/dist/ops/grads/utils.d.ts +1 -0
- package/dist/ops/grads/utils.js +8 -4
- package/dist/ops/matMul16.js +3 -3
- package/dist/ops/matMulGelu.js +2 -2
- package/dist/ops/matMulMul.js +1 -1
- package/dist/ops/mul16.js +1 -1
- package/dist/ops/mulDrop.js +1 -1
- package/dist/ops/normRMS.js +1 -1
- package/dist/ops/pack16.js +3 -4
- package/dist/ops/qkv.js +4 -8
- package/dist/ops/reshape16.js +14 -16
- package/dist/ops/rope.d.ts +1 -1
- package/dist/ops/rope.js +3 -8
- package/dist/ops/scatterSub.js +1 -1
- package/dist/ops/slice16.js +2 -2
- package/dist/ops/softmax16.js +5 -8
- package/dist/ops/sub16.js +1 -1
- package/dist/ops/sum16.js +2 -2
- package/dist/ops/transpose16.js +23 -24
- package/dist/ops/unpack16.js +2 -2
- package/dist/ops/webgl/adamAdjust.js +2 -3
- package/dist/ops/webgl/adamMoments.js +1 -2
- package/dist/ops/webgl/appendCache.js +1 -2
- package/dist/ops/webgl/attentionMask.js +4 -5
- package/dist/ops/webgl/fusedSoftmax.js +4 -6
- package/dist/ops/webgl/gatherSub.js +6 -7
- package/dist/ops/webgl/gelu.js +2 -3
- package/dist/ops/webgl/log.js +11 -12
- package/dist/ops/webgl/matMul16.js +10 -11
- package/dist/ops/webgl/matMulGelu.js +7 -111
- package/dist/ops/webgl/matMulMul.js +9 -10
- package/dist/ops/webgl/mulDropout.js +8 -9
- package/dist/ops/webgl/normRMS.js +2 -3
- package/dist/ops/webgl/qkv.js +5 -6
- package/dist/ops/webgl/rope.js +7 -8
- package/dist/ops/webgl/scatterSub.js +5 -6
- package/dist/ops/webgpu/adamAdjust.js +10 -12
- package/dist/ops/webgpu/adamMoments.js +8 -10
- package/dist/ops/webgpu/add16.js +8 -9
- package/dist/ops/webgpu/appendCache.js +23 -25
- package/dist/ops/webgpu/attentionMask.js +8 -10
- package/dist/ops/webgpu/attentionMask32_program.js +2 -2
- package/dist/ops/webgpu/concat16.js +12 -14
- package/dist/ops/webgpu/gatherSub.js +11 -13
- package/dist/ops/webgpu/gelu.js +28 -29
- package/dist/ops/webgpu/matMul16.js +26 -28
- package/dist/ops/webgpu/matMul16_program.js +4 -5
- package/dist/ops/webgpu/mul16.js +9 -10
- package/dist/ops/webgpu/normRMS.js +15 -17
- package/dist/ops/webgpu/normRMSGrad.js +21 -28
- package/dist/ops/webgpu/pack16.js +12 -13
- package/dist/ops/webgpu/pack16_program.js +2 -2
- package/dist/ops/webgpu/qkv.js +16 -18
- package/dist/ops/webgpu/rope.js +25 -27
- package/dist/ops/webgpu/scatterSub.js +7 -9
- package/dist/ops/webgpu/slice16.js +21 -23
- package/dist/ops/webgpu/softmax16.js +17 -19
- package/dist/ops/webgpu/softmax16_program.js +2 -2
- package/dist/ops/webgpu/softmax16_subgroup_program.js +2 -2
- package/dist/ops/webgpu/softmax16grad.js +7 -8
- package/dist/ops/webgpu/sub16.js +7 -8
- package/dist/ops/webgpu/sum16.js +18 -20
- package/dist/ops/webgpu/transpose16.js +19 -20
- package/dist/ops/webgpu/transpose16_program.js +2 -2
- package/dist/ops/webgpu/transpose16_shared_program.js +11 -12
- package/dist/ops/webgpu/unpack16.js +3 -4
- package/dist/ops/webgpu/utils/binary_op.js +7 -8
- package/dist/ops/webgpu/utils/reductions.js +14 -22
- package/dist/ops-B5yanEdW.js +476 -0
- package/dist/pack16-nQ6JaLo-.js +39 -0
- package/dist/patches/webgpu_backend.js +19 -20
- package/dist/patches/webgpu_base.js +1 -1
- package/dist/patches/webgpu_program.js +21 -22
- package/dist/{random_width-BVV9HveY.js → random_width-or-CEftb.js} +2506 -2761
- package/dist/range-BklejeeW.js +10 -0
- package/dist/relu-CP0ZcxWO.js +9 -0
- package/dist/reshape-ByE68wS9.js +9 -0
- package/dist/resize_nearest_neighbor-B19mCEg2.js +175 -0
- package/dist/rope-Ir4mTyD1.js +24 -0
- package/dist/{scatter_nd_util-C7zXRT_h.js → scatter_nd_util-lvSiX8q4.js} +1 -1
- package/dist/selu_util-kbhpTdYD.js +44 -0
- package/dist/{shared-CHhxz-O5.js → shared-DT1TkE6w.js} +1 -1
- package/dist/{shared-D2NP_CpY.js → shared-dntlHIDQ.js} +343 -345
- package/dist/slice-BfEGSH82.js +12 -0
- package/dist/{slice_util-DyjSAD0u.js → slice_util-uTKwiEpW.js} +1 -1
- package/dist/{softmax-C9JQEtnO.js → softmax-CA5jFsLR.js} +4 -5
- package/dist/split-CVLc0w--.js +9 -0
- package/dist/squeeze-C7Z2srUo.js +10 -0
- package/dist/stack-Cf4n9h0N.js +11 -0
- package/dist/step-CINUs5QB.js +261 -0
- package/dist/sum-DWAtNGez.js +11 -0
- package/dist/tensor-DJoc7gJU.js +8 -0
- package/dist/tensor1d-D11P_7Dp.js +11 -0
- package/dist/{tensor2d-CSB4KOb0.js → tensor2d-Bs9wZRc7.js} +6 -7
- package/dist/{tensor4d-D7bLqGqz.js → tensor4d-BARPdTaS.js} +6 -7
- package/dist/{tfjs_backend-CNkSTL0c.js → tfjs_backend-y1cvNhLA.js} +255 -264
- package/dist/tile-mbfagpsB.js +11 -0
- package/dist/training/Adam.js +2 -2
- package/dist/training/AdamExt.js +1 -1
- package/dist/training/DatasetBuilder.js +2 -2
- package/dist/training/FullTrainer.js +1 -1
- package/dist/training/Trainer.js +2 -2
- package/dist/training/sparseCrossEntropy.js +5 -5
- package/dist/transpose-ClWiBS_b.js +36 -0
- package/dist/unsorted_segment_sum-BDDhB_E6.js +277 -0
- package/dist/utilities/dummy.js +3 -3
- package/dist/utilities/multinomialCPU.js +2 -2
- package/dist/utilities/packed.d.ts +1 -4
- package/dist/utilities/packed.js +10 -745
- package/dist/utilities/performance.js +1 -1
- package/dist/utilities/profile.js +1 -1
- package/dist/utilities/safetensors.js +2 -2
- package/dist/utilities/sentences.js +5 -5
- package/dist/utilities/weights.js +2 -2
- package/dist/{variable-DzfrwYuP.js → variable-WawDEaAb.js} +1 -1
- package/dist/{webgpu_program-DzaQiqel.js → webgpu_program-DuOXPQol.js} +178 -172
- package/dist/{webgpu_util-0_ubCEHJ.js → webgpu_util-RxEF33Rj.js} +34 -35
- package/dist/zeros-KnWaWf-X.js +13 -0
- package/dist/zeros_like-DvE73F4e.js +721 -0
- package/package.json +4 -2
- package/dist/Reshape-CDVLyVfz.js +0 -16
- package/dist/broadcast_to-B0ChcDaz.js +0 -30
- package/dist/complex-BBiRlsVq.js +0 -13
- package/dist/concat-DmBLPVGC.js +0 -19
- package/dist/dropout-B1x1kYMa.js +0 -99
- package/dist/expand_dims-ouvfxQ1n.js +0 -13
- package/dist/gather-CH9sdacz.js +0 -10
- package/dist/index-D6Q1lPZO.js +0 -2157
- package/dist/log_sum_exp-D3ftBNY5.js +0 -41
- package/dist/mat_mul-C59XWcJd.js +0 -12
- package/dist/mod-DESSvHIU.js +0 -12
- package/dist/mulmat_packed_gpu-Coh6qbJk.js +0 -55
- package/dist/ones-jU9jlQvM.js +0 -15
- package/dist/ops-BFDtP6th.js +0 -645
- package/dist/pack16-CmVZs6af.js +0 -41
- package/dist/patches/PackedTensor.d.ts +0 -12
- package/dist/patches/PackedTensor.js +0 -11
- package/dist/patches/engine.d.ts +0 -261
- package/dist/patches/engine.js +0 -12
- package/dist/patches/tape.d.ts +0 -12
- package/dist/patches/tape.js +0 -5
- package/dist/range-ZZZD60Fx.js +0 -11
- package/dist/reciprocal-CrYlsAGD.js +0 -10
- package/dist/register_all_kernels-nvj2k7OC.js +0 -12307
- package/dist/relu-BYDneVPn.js +0 -10
- package/dist/reshape-CaPQzFvz.js +0 -10
- package/dist/rope-s4W2XO9B.js +0 -32
- package/dist/selu_util-BGPXmd4B.js +0 -303
- package/dist/sin-Djs4aQiu.js +0 -16
- package/dist/slice-DvovR5wq.js +0 -13
- package/dist/split-DBck65sX.js +0 -10
- package/dist/squeeze-C00Ipm_7.js +0 -11
- package/dist/stack-ChnHwRpX.js +0 -13
- package/dist/sum-ywRJj3Zr.js +0 -12
- package/dist/tensor-0r5yOo2R.js +0 -8
- package/dist/tensor-CzmOBsdf.js +0 -909
- package/dist/tensor1d-BlUT89BP.js +0 -12
- package/dist/tensor_util-DfwaWayG.js +0 -523
- package/dist/tile-CR074jmp.js +0 -13
- package/dist/transpose-DH4gmHvu.js +0 -38
- package/dist/zeros-DBFVbpv5.js +0 -14
|
@@ -1,27 +1,26 @@
|
|
|
1
|
-
import { W as Mt } from "./backend_webgpu-
|
|
2
|
-
import { f as Et, j, J as ke } from "./index-
|
|
3
|
-
import { i as
|
|
4
|
-
import { e as X, a as L, N as Gt, Z as Xt, s as D, l as Ye, b as De, p as te, Y as Kt, g as qt, i as ut, j as Yt, z as dt, f as jt } from "./tensor-CzmOBsdf.js";
|
|
1
|
+
import { W as Mt } from "./backend_webgpu-DE3ACOLx.js";
|
|
2
|
+
import { f as Et, j as X, l as L, de as Ut, df as Ht, bZ as Gt, h as D, a3 as j, aX as Xt, ag as Ye, aQ as Kt, ac as qt, ak as fe, bR as Yt, c9 as jt, ca as Qt, bX as Zt, cQ as Jt, as as es, n as De, af as te, aS as ts, bo as ss, bp as os, bq as is, cb as as, cc as rs, cd as ns, ce as us, cf as ds, cg as ls, am as cs, b7 as hs, br as ps, cA as fs, cR as ms, cS as gs, M as xs, S as Cs, bt as ws, bf as ys, dg as Ss, b9 as bs, ar as vs, bU as ks, bV as Is, i as Rs, b_ as Ps, F as $s, cU as Ds, ap as Ns, H as zs, bv as As, cF as Fs, bw as Ws, cB as Ls, cV as Vs, cC as Bs, bx as Ts, by as _s, bh as Os, bz as Ms, bA as Es, cD as Us, ch as Hs, bB as Gs, cH as Xs, cI as Ks, dh as qs, ci as Ys, cW as js, cX as Qs, di as Zs, c2 as Js, Y as eo, bg as to, aI as so, cY as oo, bC as io, bD as ao, an as ro, I as no, b$ as uo, cr as lo, bi as co, J as ho, c0 as po, dj as fo, ad as at, bu as mo, cG as go, dk as xo, aj as Co, K as wo, at as ke, b1 as yo, b2 as So, cs as bo, cj as vo, ck as ko, cl as Io, aJ as Ro, b3 as Po, b4 as $o, dl as Do, ao as No, b5 as zo, b6 as Ao, bF as Fo, cn as Wo, cm as Lo, c_ as Vo, c1 as Bo, bG as To, cE as _o, c$ as Oo, d0 as Mo, dm as Eo, a$ as Uo, b8 as Ho, co as Go, N as Xo, Q as Ko, dn as qo, aq as Yo, bk as jo, bl as Qo, bH as Zo, d5 as Jo, bI as ei, W as ti, ab as si, bJ as oi, d1 as ii, aK as ai, c3 as ri, a2 as ni, aT as ui, cp as di, P as li, aL as ci, bd as hi, d2 as pi, be as fi, d3 as mi, bL as gi, bj as xi, ba as Ci, bM as wi, ai as yi, dp as Si, a_ as bi, bN as vi, aH as ki, cq as Ii, bO as Ri, bP as Pi, bE as $i, bK as Di, dq as Ni, dr as zi, T as Ai, ax as rt, ds as Fi, Z as Wi, U as Li, c5 as Vi, d4 as Bi, bb as Ti, aM as _i, ct as Oi, dt as Mi, c7 as Ei, cu as Ui, bs as Hi, du as Gi, cv as Xi, bn as Ki, bc as qi, bQ as Yi, p as ji } from "./index-DOvlwCh-.js";
|
|
3
|
+
import { i as Qi, a as Zi, c as b, f as v, M as Y, b as nt, d as ut, e as dt } from "./webgpu_util-RxEF33Rj.js";
|
|
5
4
|
import { g as _e, B as F } from "./binary_op_util-pKXltfxI.js";
|
|
6
|
-
import { S as
|
|
7
|
-
import { E as
|
|
8
|
-
import { t as W, e as S, h as
|
|
9
|
-
import {
|
|
10
|
-
import {
|
|
11
|
-
import {
|
|
12
|
-
import {
|
|
13
|
-
import { p as ka, a as Ia, b as Ra, d as Pa } from "./slice_util-
|
|
14
|
-
import { z as $a } from "./zeros-
|
|
15
|
-
import { c as me, a as Da } from "./concat_util-
|
|
5
|
+
import { S as Ji, a as ea } from "./selu_util-kbhpTdYD.js";
|
|
6
|
+
import { E as ta, t as sa, u as oa, w as ia, x as aa, y as ra, f as je, z as lt, A as ct, B as ht, C as na, D as ua, F as da, G as la, H as ca, I as ha, J as pa, K as fa, L as ma, M as ga, N as xa, O as Ca } from "./backend_util-RCe-rHaj.js";
|
|
7
|
+
import { t as W, e as S, h as Q, b as G, c as Ie, P as pt, d as wa, a as ya } from "./webgpu_program-DuOXPQol.js";
|
|
8
|
+
import { r as R, a as Sa } from "./Reshape-BYkmUnAv.js";
|
|
9
|
+
import { s as ba } from "./shared-dntlHIDQ.js";
|
|
10
|
+
import { c as Oe, a as Ce, b as we, d as Me, e as va, g as ft } from "./axis_util-BaG7mf5A.js";
|
|
11
|
+
import { h as ye, i as Ne, j as Se, b as Z, d as Ee, g as Ue, k as mt } from "./step-CINUs5QB.js";
|
|
12
|
+
import { p as ka, a as Ia, b as Ra, d as Pa } from "./slice_util-uTKwiEpW.js";
|
|
13
|
+
import { z as $a } from "./zeros-KnWaWf-X.js";
|
|
14
|
+
import { c as me, a as Da } from "./concat_util-DpW8mL_l.js";
|
|
16
15
|
import { n as Na, a as za } from "./non_max_suppression_impl-B2W7YjZB.js";
|
|
17
|
-
import { c as He } from "./scatter_nd_util-
|
|
18
|
-
|
|
16
|
+
import { c as He } from "./scatter_nd_util-lvSiX8q4.js";
|
|
17
|
+
Qi() && Et(
|
|
19
18
|
"webgpu",
|
|
20
19
|
async () => {
|
|
21
20
|
const o = {
|
|
22
21
|
powerPreference: X().get("WEBGPU_USE_LOW_POWER_GPU") ? "low-power" : "high-performance"
|
|
23
22
|
}, t = await navigator.gpu.requestAdapter(o), e = {}, i = [];
|
|
24
|
-
t.features.has("timestamp-query") && i.push("timestamp-query"), t.features.has("bgra8unorm-storage") && i.push(["bgra8unorm-storage"]), e.requiredFeatures = i;
|
|
23
|
+
t.features.has("timestamp-query") && i.push("timestamp-query"), t.features.has("bgra8unorm-storage") && i.push(["bgra8unorm-storage"]), t.features.has("subgroups") && i.push("subgroups"), e.requiredFeatures = i;
|
|
25
24
|
const s = t.limits;
|
|
26
25
|
e.requiredLimits = {
|
|
27
26
|
maxComputeWorkgroupStorageSize: s.maxComputeWorkgroupStorageSize,
|
|
@@ -94,12 +93,12 @@ const Aa = "return abs(a);", Fa = `
|
|
|
94
93
|
// Error function is calculated approximately with elementary function.
|
|
95
94
|
// See "Handbook of Mathematical Functions with Formulas,
|
|
96
95
|
// Graphs, and Mathematical Tables", Abramowitz and Stegun.
|
|
97
|
-
let p = ${
|
|
98
|
-
let a1 = ${
|
|
99
|
-
let a2 = ${
|
|
100
|
-
let a3 = ${
|
|
101
|
-
let a4 = ${
|
|
102
|
-
let a5 = ${
|
|
96
|
+
let p = ${ta};
|
|
97
|
+
let a1 = ${sa};
|
|
98
|
+
let a2 = ${oa};
|
|
99
|
+
let a3 = ${ia};
|
|
100
|
+
let a4 = ${aa};
|
|
101
|
+
let a5 = ${ra};
|
|
103
102
|
|
|
104
103
|
let sign = sign(a);
|
|
105
104
|
let absA = abs(a);
|
|
@@ -116,9 +115,9 @@ const Aa = "return abs(a);", Fa = `
|
|
|
116
115
|
return select(a, vec4<f32>(0.0), a < vec4<f32>(0.0));
|
|
117
116
|
`, dr = "return round(a);", lr = "return inverseSqrt(a);", cr = `
|
|
118
117
|
if (a >= 0.0) {
|
|
119
|
-
return ${
|
|
118
|
+
return ${Ji} * a;
|
|
120
119
|
} else {
|
|
121
|
-
return ${
|
|
120
|
+
return ${ea} * (exp(a) - 1.0);
|
|
122
121
|
}
|
|
123
122
|
`, hr = "return 1.0 / (1.0 + exp(-1.0 * a));", pr = "return sign(a);", fr = "return sin(a);", mr = `
|
|
124
123
|
let e2x = exp(a);
|
|
@@ -604,7 +603,7 @@ class $r {
|
|
|
604
603
|
if (this.isVec4 = (d % 4 === 0 && !i || e[1] % 4 === 0 && i) && e[2] % 4 === 0 && !s, this.outputComponent = this.isVec4 ? 4 : 1, this.isVectorA = e[1] === 1 && !i, !this.isVec4 && this.isVectorA)
|
|
605
604
|
this.elementsPerThread = [1, 1, 1], this.workgroupSize = [32, 1, 1];
|
|
606
605
|
else {
|
|
607
|
-
const c =
|
|
606
|
+
const c = Zi(e[1], d, e[2], i);
|
|
608
607
|
this.workgroupSize = c.workgroupSize, this.elementsPerThread = c.elementsPerThread;
|
|
609
608
|
}
|
|
610
609
|
this.dispatch = b(this.dispatchLayout, this.outputShape, this.workgroupSize, this.elementsPerThread);
|
|
@@ -773,7 +772,7 @@ class Fr {
|
|
|
773
772
|
// The problem is that we should initialize output to zero before using.
|
|
774
773
|
// Otherwise, the original value will be added to the result.
|
|
775
774
|
for (var i = 0; i < ${t}; i = i + 1) {
|
|
776
|
-
${
|
|
775
|
+
${Q("&result[flatIndex + i]", `${t > 1 ? "value[i]" : "value"}`, "float32")}
|
|
777
776
|
}
|
|
778
777
|
}
|
|
779
778
|
}
|
|
@@ -816,8 +815,8 @@ class Lr {
|
|
|
816
815
|
function M(o) {
|
|
817
816
|
const { backend: t, attrs: e } = o, { shape: i, value: s } = e;
|
|
818
817
|
let { dtype: a } = e;
|
|
819
|
-
if (a = a ||
|
|
820
|
-
const r =
|
|
818
|
+
if (a = a || Ht(s), a === "string") {
|
|
819
|
+
const r = Gt(a, D(i));
|
|
821
820
|
return r.fill(s), t.makeTensorInfo(i, a, r);
|
|
822
821
|
} else {
|
|
823
822
|
const r = new Lr(i), n = [{ type: "float32", data: [s] }];
|
|
@@ -825,7 +824,7 @@ function M(o) {
|
|
|
825
824
|
}
|
|
826
825
|
}
|
|
827
826
|
const Vr = {
|
|
828
|
-
kernelName:
|
|
827
|
+
kernelName: Ut,
|
|
829
828
|
backendName: "webgpu",
|
|
830
829
|
kernelFunc: M
|
|
831
830
|
};
|
|
@@ -897,7 +896,7 @@ function Br(o) {
|
|
|
897
896
|
});
|
|
898
897
|
}
|
|
899
898
|
const Tr = {
|
|
900
|
-
kernelName:
|
|
899
|
+
kernelName: Xt,
|
|
901
900
|
backendName: "webgpu",
|
|
902
901
|
kernelFunc: Br
|
|
903
902
|
};
|
|
@@ -983,7 +982,7 @@ function U(o) {
|
|
|
983
982
|
return o.backend.incRef(e.dataId), { dataId: e.dataId, shape: e.shape, dtype: e.dtype };
|
|
984
983
|
}
|
|
985
984
|
const _r = {
|
|
986
|
-
kernelName:
|
|
985
|
+
kernelName: Kt,
|
|
987
986
|
backendName: "webgpu",
|
|
988
987
|
kernelFunc: U
|
|
989
988
|
};
|
|
@@ -992,7 +991,7 @@ function oe(o) {
|
|
|
992
991
|
return r.complexTensorInfos = { real: n, imag: u }, a;
|
|
993
992
|
}
|
|
994
993
|
const Or = {
|
|
995
|
-
kernelName:
|
|
994
|
+
kernelName: qt,
|
|
996
995
|
backendName: "webgpu",
|
|
997
996
|
kernelFunc: oe
|
|
998
997
|
};
|
|
@@ -1094,22 +1093,22 @@ function V({ opType: o, cpuKernelImpl: t, supportsComplex: e = !1, dtype: i }) {
|
|
|
1094
1093
|
}
|
|
1095
1094
|
const { addImpl: Mr, castImpl: Er, ceilImpl: Ur, concatImpl: Hr, equalImpl: Gr, expImpl: Xr, expm1Impl: Kr, floorImpl: qr, floorDivImpl: Yr, gatherNdImpl: jr, gatherV2Impl: Qr, greaterEqualImpl: Zr, greaterImpl: Jr, lessEqualImpl: en, lessImpl: tn, logImpl: sn, maxImpl: on, maximumImpl: an, minimumImpl: rn, multiplyImpl: nn, negImpl: un, notEqualImpl: dn, prodImpl: ln, rangeImpl: cn, rsqrtImpl: hn, scatterImpl: pn, simpleAbsImpl: fn, sliceImpl: mn, stridedSliceImpl: gn, stringNGramsImpl: xn, subImpl: Cn, tileImpl: wn, topKImpl: yn, transposeImpl: Sn } = ba;
|
|
1096
1095
|
const bn = N({ opType: y.ABS, cpuKernelImpl: fn }), vn = {
|
|
1097
|
-
kernelName:
|
|
1096
|
+
kernelName: Yt,
|
|
1098
1097
|
backendName: "webgpu",
|
|
1099
1098
|
kernelFunc: bn
|
|
1100
1099
|
};
|
|
1101
1100
|
const kn = N({ opType: y.ACOS }), In = {
|
|
1102
|
-
kernelName:
|
|
1101
|
+
kernelName: jt,
|
|
1103
1102
|
backendName: "webgpu",
|
|
1104
1103
|
kernelFunc: kn
|
|
1105
1104
|
};
|
|
1106
1105
|
const Rn = N({ opType: y.ACOSH }), Pn = {
|
|
1107
|
-
kernelName:
|
|
1106
|
+
kernelName: Qt,
|
|
1108
1107
|
backendName: "webgpu",
|
|
1109
1108
|
kernelFunc: Rn
|
|
1110
1109
|
};
|
|
1111
1110
|
const $n = V({ opType: F.ADD, cpuKernelImpl: Mr, supportsComplex: !0 }), Dn = {
|
|
1112
|
-
kernelName:
|
|
1111
|
+
kernelName: Zt,
|
|
1113
1112
|
backendName: "webgpu",
|
|
1114
1113
|
kernelFunc: $n
|
|
1115
1114
|
};
|
|
@@ -1146,7 +1145,7 @@ function zn(o) {
|
|
|
1146
1145
|
return e.runWebGPUProgram(r, i, s);
|
|
1147
1146
|
}
|
|
1148
1147
|
const An = {
|
|
1149
|
-
kernelName:
|
|
1148
|
+
kernelName: Jt,
|
|
1150
1149
|
backendName: "webgpu",
|
|
1151
1150
|
kernelFunc: zn
|
|
1152
1151
|
};
|
|
@@ -1232,14 +1231,14 @@ function K(o) {
|
|
|
1232
1231
|
return r.runWebGPUProgram(d, [s], s.dtype);
|
|
1233
1232
|
}
|
|
1234
1233
|
const Ln = {
|
|
1235
|
-
kernelName:
|
|
1234
|
+
kernelName: es,
|
|
1236
1235
|
backendName: "webgpu",
|
|
1237
1236
|
kernelFunc: K
|
|
1238
1237
|
};
|
|
1239
1238
|
class Vn {
|
|
1240
1239
|
constructor(t, e, i) {
|
|
1241
1240
|
this.variableNames = ["x"], this.uniforms = "reduceSize : i32,", this.size = !0, this.inputShape = [t.batchSize, t.inSize];
|
|
1242
|
-
const [s] =
|
|
1241
|
+
const [s] = Oe(this.inputShape, [1]);
|
|
1243
1242
|
this.outputShape = s.length === 0 ? [1] : s, t.inSize >= 32768 && i >= 512 ? this.workgroupSize = [512, 1, 1] : t.inSize >= 4096 ? this.workgroupSize = [256, 1, 1] : this.workgroupSize = [64, 1, 1], this.dispatchLayout = v(this.outputShape), this.dispatch = b(this.dispatchLayout, this.outputShape, [1, 1, 1]), this.reduceType = e, this.shaderKey = `reduce_${e}`;
|
|
1244
1243
|
}
|
|
1245
1244
|
getUserCode() {
|
|
@@ -1309,10 +1308,10 @@ const Bn = {
|
|
|
1309
1308
|
function ie(o, t, e, i, s) {
|
|
1310
1309
|
const a = o.shape.length, r = [], n = te(t, o.shape);
|
|
1311
1310
|
let u = n;
|
|
1312
|
-
const d =
|
|
1311
|
+
const d = Ce(u, a);
|
|
1313
1312
|
let h = o;
|
|
1314
|
-
d != null && (h = K({ inputs: { x: o }, attrs: { perm: d }, backend: s }), u =
|
|
1315
|
-
const [l, c] =
|
|
1313
|
+
d != null && (h = K({ inputs: { x: o }, attrs: { perm: d }, backend: s }), u = we(u.length, a), r.push(h)), Me(i, u, a);
|
|
1314
|
+
const [l, c] = Oe(h.shape, u);
|
|
1316
1315
|
let p = l;
|
|
1317
1316
|
e && (p = va(l, n));
|
|
1318
1317
|
let f;
|
|
@@ -1331,7 +1330,7 @@ function ie(o, t, e, i, s) {
|
|
|
1331
1330
|
throw new Error(`${i} CPU implementation is not yet supported.`);
|
|
1332
1331
|
}
|
|
1333
1332
|
} else {
|
|
1334
|
-
const m = D(c), x = D(h.shape) / m, C = { windowSize: m, inSize: m, batchSize: x, outSize: 1 }, w = Bn[i] ||
|
|
1333
|
+
const m = D(c), x = D(h.shape) / m, C = { windowSize: m, inSize: m, batchSize: x, outSize: 1 }, w = Bn[i] || ts(o.dtype), k = [
|
|
1335
1334
|
{ type: "int32", data: [m] }
|
|
1336
1335
|
], I = new Vn(C, i, s.device.limits.maxComputeWorkgroupSizeX), P = s.runWebGPUProgram(I, [h], w, k);
|
|
1337
1336
|
r.push(P), f = R({ inputs: { x: P }, attrs: { shape: p }, backend: s });
|
|
@@ -1343,7 +1342,7 @@ function Tn(o) {
|
|
|
1343
1342
|
return ie(s, r, a, "all", e);
|
|
1344
1343
|
}
|
|
1345
1344
|
const _n = {
|
|
1346
|
-
kernelName:
|
|
1345
|
+
kernelName: ss,
|
|
1347
1346
|
backendName: "webgpu",
|
|
1348
1347
|
kernelFunc: Tn
|
|
1349
1348
|
};
|
|
@@ -1352,7 +1351,7 @@ function On(o) {
|
|
|
1352
1351
|
return ie(s, r, a, "any", e);
|
|
1353
1352
|
}
|
|
1354
1353
|
const Mn = {
|
|
1355
|
-
kernelName:
|
|
1354
|
+
kernelName: os,
|
|
1356
1355
|
backendName: "webgpu",
|
|
1357
1356
|
kernelFunc: On
|
|
1358
1357
|
};
|
|
@@ -1361,7 +1360,7 @@ class Ct {
|
|
|
1361
1360
|
this.workgroupSize = [64, 1, 1], this.variableNames = ["x"], this.uniforms = "infinityValue : f32,", this.size = !0;
|
|
1362
1361
|
const s = [e];
|
|
1363
1362
|
this.op = i === "min" ? "<" : ">";
|
|
1364
|
-
const [a, r] =
|
|
1363
|
+
const [a, r] = Oe(t, s);
|
|
1365
1364
|
this.outputShape = a.length === 0 ? [1] : a, this.dispatchLayout = v(this.outputShape), D(r) < 32 ? (this.type = "plain", this.dispatch = b(this.dispatchLayout, this.outputShape, this.workgroupSize)) : (this.type = "shared", this.dispatch = b(this.dispatchLayout, this.outputShape, [1, 1, 1])), this.inputShape = t, this.shaderKey = `argMinMax_${this.op}_${this.type}`;
|
|
1366
1365
|
}
|
|
1367
1366
|
getUserCode() {
|
|
@@ -1446,55 +1445,55 @@ class Ct {
|
|
|
1446
1445
|
function En(o) {
|
|
1447
1446
|
const { inputs: t, backend: e, attrs: i } = o, { x: s } = t, { axis: a } = i;
|
|
1448
1447
|
let r = te(a, s.shape);
|
|
1449
|
-
const n =
|
|
1448
|
+
const n = Ce(r, s.shape.length);
|
|
1450
1449
|
let u = s;
|
|
1451
1450
|
const d = [];
|
|
1452
|
-
n != null && (u = K({ inputs: { x: s }, backend: e, attrs: { perm: n } }), d.push(u), r =
|
|
1451
|
+
n != null && (u = K({ inputs: { x: s }, backend: e, attrs: { perm: n } }), d.push(u), r = we(r.length, u.shape.length)), Me("argMax", [r[0]], u.shape.length);
|
|
1453
1452
|
const h = new Ct(u.shape, r[0], "max"), l = [{ type: "float32", data: [Number.NEGATIVE_INFINITY] }], c = e.runWebGPUProgram(h, [u], "int32", l);
|
|
1454
1453
|
return d.forEach((p) => e.disposeData(p.dataId)), c;
|
|
1455
1454
|
}
|
|
1456
1455
|
const Un = {
|
|
1457
|
-
kernelName:
|
|
1456
|
+
kernelName: is,
|
|
1458
1457
|
backendName: "webgpu",
|
|
1459
1458
|
kernelFunc: En
|
|
1460
1459
|
};
|
|
1461
1460
|
function Hn(o) {
|
|
1462
1461
|
const { inputs: t, backend: e, attrs: i } = o, { x: s } = t, { axis: a } = i;
|
|
1463
1462
|
let r = te(a, s.shape);
|
|
1464
|
-
const n =
|
|
1463
|
+
const n = Ce(r, s.shape.length);
|
|
1465
1464
|
let u = s;
|
|
1466
1465
|
const d = [];
|
|
1467
|
-
n != null && (u = K({ inputs: { x: s }, backend: e, attrs: { perm: n } }), d.push(u), r =
|
|
1466
|
+
n != null && (u = K({ inputs: { x: s }, backend: e, attrs: { perm: n } }), d.push(u), r = we(r.length, u.shape.length)), Me("argMin", [r[0]], u.shape.length);
|
|
1468
1467
|
const h = new Ct(u.shape, r[0], "min"), l = [{ type: "float32", data: [Number.POSITIVE_INFINITY] }], c = e.runWebGPUProgram(h, [u], "int32", l);
|
|
1469
1468
|
return d.forEach((p) => e.disposeData(p.dataId)), c;
|
|
1470
1469
|
}
|
|
1471
1470
|
const Gn = {
|
|
1472
|
-
kernelName:
|
|
1471
|
+
kernelName: as,
|
|
1473
1472
|
backendName: "webgpu",
|
|
1474
1473
|
kernelFunc: Hn
|
|
1475
1474
|
};
|
|
1476
1475
|
const Xn = N({ opType: y.ASIN }), Kn = {
|
|
1477
|
-
kernelName:
|
|
1476
|
+
kernelName: rs,
|
|
1478
1477
|
backendName: "webgpu",
|
|
1479
1478
|
kernelFunc: Xn
|
|
1480
1479
|
};
|
|
1481
1480
|
const qn = N({ opType: y.ASINH }), Yn = {
|
|
1482
|
-
kernelName:
|
|
1481
|
+
kernelName: ns,
|
|
1483
1482
|
backendName: "webgpu",
|
|
1484
1483
|
kernelFunc: qn
|
|
1485
1484
|
};
|
|
1486
1485
|
const jn = N({ opType: y.ATAN }), Qn = {
|
|
1487
|
-
kernelName:
|
|
1486
|
+
kernelName: us,
|
|
1488
1487
|
backendName: "webgpu",
|
|
1489
1488
|
kernelFunc: jn
|
|
1490
1489
|
};
|
|
1491
1490
|
const Zn = V({ opType: F.ATAN2 }), Jn = {
|
|
1492
|
-
kernelName:
|
|
1491
|
+
kernelName: ds,
|
|
1493
1492
|
backendName: "webgpu",
|
|
1494
1493
|
kernelFunc: Zn
|
|
1495
1494
|
};
|
|
1496
1495
|
const eu = N({ opType: y.ATANH }), tu = {
|
|
1497
|
-
kernelName:
|
|
1496
|
+
kernelName: ls,
|
|
1498
1497
|
backendName: "webgpu",
|
|
1499
1498
|
kernelFunc: eu
|
|
1500
1499
|
};
|
|
@@ -1642,7 +1641,7 @@ function wt(o) {
|
|
|
1642
1641
|
return ie(s, a, r, "max", e);
|
|
1643
1642
|
}
|
|
1644
1643
|
const ou = {
|
|
1645
|
-
kernelName:
|
|
1644
|
+
kernelName: cs,
|
|
1646
1645
|
backendName: "webgpu",
|
|
1647
1646
|
kernelFunc: wt
|
|
1648
1647
|
};
|
|
@@ -1651,7 +1650,7 @@ function yt(o) {
|
|
|
1651
1650
|
return ie(s, r, a, "mean", e);
|
|
1652
1651
|
}
|
|
1653
1652
|
const iu = {
|
|
1654
|
-
kernelName:
|
|
1653
|
+
kernelName: hs,
|
|
1655
1654
|
backendName: "webgpu",
|
|
1656
1655
|
kernelFunc: yt
|
|
1657
1656
|
};
|
|
@@ -1690,11 +1689,11 @@ function St(o, t, e, i) {
|
|
|
1690
1689
|
})), i.runWebGPUProgram(s, [o], o.dtype, a);
|
|
1691
1690
|
}
|
|
1692
1691
|
function au(o) {
|
|
1693
|
-
const { inputs: t, backend: e, attrs: i } = o, { x: s } = t, { filterSize: a, strides: r, pad: n, dimRoundingMode: u } = i, h =
|
|
1692
|
+
const { inputs: t, backend: e, attrs: i } = o, { x: s } = t, { filterSize: a, strides: r, pad: n, dimRoundingMode: u } = i, h = ye(s.shape, a, r, 1, n, u);
|
|
1694
1693
|
return St(s, h, "avg", e);
|
|
1695
1694
|
}
|
|
1696
1695
|
const ru = {
|
|
1697
|
-
kernelName:
|
|
1696
|
+
kernelName: ps,
|
|
1698
1697
|
backendName: "webgpu",
|
|
1699
1698
|
kernelFunc: au
|
|
1700
1699
|
};
|
|
@@ -1724,7 +1723,7 @@ function nu(o) {
|
|
|
1724
1723
|
return e.runWebGPUProgram(c, [s], s.dtype, p);
|
|
1725
1724
|
}
|
|
1726
1725
|
const uu = {
|
|
1727
|
-
kernelName:
|
|
1726
|
+
kernelName: fs,
|
|
1728
1727
|
backendName: "webgpu",
|
|
1729
1728
|
kernelFunc: nu
|
|
1730
1729
|
};
|
|
@@ -1862,14 +1861,14 @@ function cu(o) {
|
|
|
1862
1861
|
return e.runWebGPUProgram(c, [s], r.dtype, f);
|
|
1863
1862
|
}
|
|
1864
1863
|
const hu = {
|
|
1865
|
-
kernelName:
|
|
1864
|
+
kernelName: ms,
|
|
1866
1865
|
backendName: "webgpu",
|
|
1867
1866
|
kernelFunc: cu
|
|
1868
1867
|
};
|
|
1869
1868
|
function pu(o) {
|
|
1870
1869
|
const { inputs: t, backend: e, attrs: i } = o, { dy: s, input: a } = t, r = a;
|
|
1871
|
-
|
|
1872
|
-
const { filterSize: n, strides: u, pad: d } = i, h =
|
|
1870
|
+
nt([s, a], "avgPoolGrad");
|
|
1871
|
+
const { filterSize: n, strides: u, pad: d } = i, h = ye(r.shape, n, u, 1, d), l = new du(h), c = 1 / (h.filterHeight * h.filterWidth), p = [
|
|
1873
1872
|
{ type: "int32", data: [h.strideHeight, h.strideWidth] },
|
|
1874
1873
|
{
|
|
1875
1874
|
type: "int32",
|
|
@@ -1890,7 +1889,7 @@ function pu(o) {
|
|
|
1890
1889
|
return e.runWebGPUProgram(l, [s], r.dtype, p);
|
|
1891
1890
|
}
|
|
1892
1891
|
const fu = {
|
|
1893
|
-
kernelName:
|
|
1892
|
+
kernelName: gs,
|
|
1894
1893
|
backendName: "webgpu",
|
|
1895
1894
|
kernelFunc: pu
|
|
1896
1895
|
};
|
|
@@ -1899,7 +1898,7 @@ function mu(o) {
|
|
|
1899
1898
|
return Fe({ a: s, b: a, transposeA: r, transposeB: n, backend: e });
|
|
1900
1899
|
}
|
|
1901
1900
|
const gu = {
|
|
1902
|
-
kernelName:
|
|
1901
|
+
kernelName: xs,
|
|
1903
1902
|
backendName: "webgpu",
|
|
1904
1903
|
kernelFunc: mu
|
|
1905
1904
|
};
|
|
@@ -1943,14 +1942,14 @@ function de(o) {
|
|
|
1943
1942
|
return e.runWebGPUProgram(d, [s], s.dtype, h);
|
|
1944
1943
|
}
|
|
1945
1944
|
const wu = {
|
|
1946
|
-
kernelName:
|
|
1945
|
+
kernelName: Cs,
|
|
1947
1946
|
backendName: "webgpu",
|
|
1948
1947
|
kernelFunc: de
|
|
1949
1948
|
};
|
|
1950
1949
|
const yu = (o) => {
|
|
1951
1950
|
const { inputs: t, backend: e, attrs: i } = o, { x: s } = t, { blockShape: a, crops: r } = i;
|
|
1952
1951
|
L(s.shape.length <= 4, () => "batchToSpaceND for rank > 4 with a WebGPU backend not implemented yet");
|
|
1953
|
-
const n = a.reduce((C, w) => C * w), u =
|
|
1952
|
+
const n = a.reduce((C, w) => C * w), u = lt(s.shape, a, n), d = ct(u.length, a.length), h = ht(s.shape, a, n), l = na(r, a.length), c = ua(h, r, a.length), p = [], f = R({ inputs: { x: s }, backend: e, attrs: { shape: u } }), m = K({ inputs: { x: f }, backend: e, attrs: { perm: d } }), g = R({
|
|
1954
1953
|
inputs: { x: m },
|
|
1955
1954
|
backend: e,
|
|
1956
1955
|
attrs: { shape: h }
|
|
@@ -1961,13 +1960,13 @@ const yu = (o) => {
|
|
|
1961
1960
|
});
|
|
1962
1961
|
return p.push(f), p.push(m), p.push(g), p.forEach((C) => e.disposeData(C.dataId)), x;
|
|
1963
1962
|
}, Su = {
|
|
1964
|
-
kernelName:
|
|
1963
|
+
kernelName: ws,
|
|
1965
1964
|
backendName: "webgpu",
|
|
1966
1965
|
kernelFunc: yu
|
|
1967
1966
|
};
|
|
1968
1967
|
const bu = `
|
|
1969
1968
|
fn bincount_write(index: i32, value: f32) {
|
|
1970
|
-
${
|
|
1969
|
+
${Q("&result[index]", "value", "float32")}
|
|
1971
1970
|
}
|
|
1972
1971
|
`, vu = `
|
|
1973
1972
|
fn bincount_write(index: i32, value: f32) {
|
|
@@ -2005,7 +2004,7 @@ function ku(o) {
|
|
|
2005
2004
|
return e.runWebGPUProgram(p, m, l, f, c);
|
|
2006
2005
|
}
|
|
2007
2006
|
const Iu = {
|
|
2008
|
-
kernelName:
|
|
2007
|
+
kernelName: ys,
|
|
2009
2008
|
backendName: "webgpu",
|
|
2010
2009
|
kernelFunc: ku
|
|
2011
2010
|
};
|
|
@@ -2052,7 +2051,7 @@ function Pu(o) {
|
|
|
2052
2051
|
return e.runWebGPUProgram(u, [i, s], "int32", d);
|
|
2053
2052
|
}
|
|
2054
2053
|
const $u = {
|
|
2055
|
-
kernelName:
|
|
2054
|
+
kernelName: Ss,
|
|
2056
2055
|
backendName: "webgpu",
|
|
2057
2056
|
kernelFunc: Pu
|
|
2058
2057
|
};
|
|
@@ -2061,7 +2060,7 @@ const vt = V({
|
|
|
2061
2060
|
dtype: "bool",
|
|
2062
2061
|
cpuKernelImpl: dn
|
|
2063
2062
|
}), Du = {
|
|
2064
|
-
kernelName:
|
|
2063
|
+
kernelName: bs,
|
|
2065
2064
|
backendName: "webgpu",
|
|
2066
2065
|
kernelFunc: vt
|
|
2067
2066
|
};
|
|
@@ -2070,7 +2069,7 @@ function be(o) {
|
|
|
2070
2069
|
return U({ inputs: { x: s.complexTensorInfos.real }, backend: e });
|
|
2071
2070
|
}
|
|
2072
2071
|
const Nu = {
|
|
2073
|
-
kernelName:
|
|
2072
|
+
kernelName: vs,
|
|
2074
2073
|
backendName: "webgpu",
|
|
2075
2074
|
kernelFunc: be
|
|
2076
2075
|
};
|
|
@@ -2090,7 +2089,7 @@ function Be(o) {
|
|
|
2090
2089
|
const r = be({ inputs: { input: s }, backend: e }), n = Be({ inputs: { x: r }, backend: e, attrs: { dtype: a } });
|
|
2091
2090
|
return e.disposeData(r.dataId), n;
|
|
2092
2091
|
}
|
|
2093
|
-
if (!
|
|
2092
|
+
if (!Is(s.dtype, a)) {
|
|
2094
2093
|
const r = U({ inputs: { x: s }, backend: e });
|
|
2095
2094
|
return { dataId: r.dataId, shape: r.shape, dtype: a };
|
|
2096
2095
|
}
|
|
@@ -2101,18 +2100,18 @@ function Be(o) {
|
|
|
2101
2100
|
if (a === "int32")
|
|
2102
2101
|
return zu(s, e);
|
|
2103
2102
|
if (a === "bool") {
|
|
2104
|
-
const r = e.makeTensorInfo([], "bool",
|
|
2103
|
+
const r = e.makeTensorInfo([], "bool", Rs("bool", 1)), u = vt({ inputs: { a: s, b: r }, backend: e });
|
|
2105
2104
|
return e.disposeData(r.dataId), u;
|
|
2106
2105
|
}
|
|
2107
2106
|
throw new Error(`Error in Cast: failed to cast ${s.dtype} to ${a}`);
|
|
2108
2107
|
}
|
|
2109
2108
|
const Au = {
|
|
2110
|
-
kernelName:
|
|
2109
|
+
kernelName: ks,
|
|
2111
2110
|
backendName: "webgpu",
|
|
2112
2111
|
kernelFunc: Be
|
|
2113
2112
|
};
|
|
2114
2113
|
const Fu = N({ opType: y.CEIL, cpuKernelImpl: Ur }), Wu = {
|
|
2115
|
-
kernelName:
|
|
2114
|
+
kernelName: Ps,
|
|
2116
2115
|
backendName: "webgpu",
|
|
2117
2116
|
kernelFunc: Fu
|
|
2118
2117
|
};
|
|
@@ -2163,7 +2162,7 @@ function Bu(o) {
|
|
|
2163
2162
|
return D(s.shape) % 4 === 0 ? n = new Lu(s.shape) : n = new Vu(s.shape), e.runWebGPUProgram(n, [s], s.dtype, u);
|
|
2164
2163
|
}
|
|
2165
2164
|
const Tu = {
|
|
2166
|
-
kernelName:
|
|
2165
|
+
kernelName: $s,
|
|
2167
2166
|
backendName: "webgpu",
|
|
2168
2167
|
kernelFunc: Bu
|
|
2169
2168
|
};
|
|
@@ -2202,7 +2201,7 @@ function Ou(o) {
|
|
|
2202
2201
|
return e.runWebGPUProgram(a, r, r[0].dtype);
|
|
2203
2202
|
}
|
|
2204
2203
|
const Mu = {
|
|
2205
|
-
kernelName:
|
|
2204
|
+
kernelName: Ds,
|
|
2206
2205
|
backendName: "webgpu",
|
|
2207
2206
|
kernelFunc: Ou
|
|
2208
2207
|
};
|
|
@@ -2249,7 +2248,7 @@ function We(o) {
|
|
|
2249
2248
|
return U({ inputs: { x: s.complexTensorInfos.imag }, backend: e });
|
|
2250
2249
|
}
|
|
2251
2250
|
const Uu = {
|
|
2252
|
-
kernelName:
|
|
2251
|
+
kernelName: Ns,
|
|
2253
2252
|
backendName: "webgpu",
|
|
2254
2253
|
kernelFunc: We
|
|
2255
2254
|
};
|
|
@@ -2317,7 +2316,7 @@ function kt(o) {
|
|
|
2317
2316
|
return u.length === 1 ? U({ inputs: { x: u[0] }, backend: e }) : he(u, a, e);
|
|
2318
2317
|
}
|
|
2319
2318
|
const Gu = {
|
|
2320
|
-
kernelName:
|
|
2319
|
+
kernelName: zs,
|
|
2321
2320
|
backendName: "webgpu",
|
|
2322
2321
|
kernelFunc: kt
|
|
2323
2322
|
};
|
|
@@ -2411,7 +2410,7 @@ function Xu(o, t, e, i, s = !1, a = null, r = !1, n = 4, u = 4, d = 4) {
|
|
|
2411
2410
|
}
|
|
2412
2411
|
class Ku {
|
|
2413
2412
|
constructor(t, e, i, s, a = !1, r = null, n = !1, u = !1) {
|
|
2414
|
-
this.variableNames = ["x", "W"], this.uniforms = "filterDims : vec2<i32>, pads : vec2<i32>, strides : vec2<i32>, dilations : vec2<i32>, dimAOuter : i32, dimBOuter : i32, dimInner : i32,", this.outputShape = t.outShape, this.isChannelsLast = t.dataFormat === "channelsLast", this.isVec4 = ((t.inChannels % 4 === 0 || t.inChannels % 3 === 0) && this.isChannelsLast || t.outWidth % 4 === 0 && !this.isChannelsLast) && t.outChannels % 4 === 0, this.dispatchLayout = this.isChannelsLast ? { x: [3], y: [1, 2], z: [0] } : { x: [2, 3], y: [1], z: [0] }, this.workgroupSize =
|
|
2413
|
+
this.variableNames = ["x", "W"], this.uniforms = "filterDims : vec2<i32>, pads : vec2<i32>, strides : vec2<i32>, dilations : vec2<i32>, dimAOuter : i32, dimBOuter : i32, dimInner : i32,", this.outputShape = t.outShape, this.isChannelsLast = t.dataFormat === "channelsLast", this.isVec4 = ((t.inChannels % 4 === 0 || t.inChannels % 3 === 0) && this.isChannelsLast || t.outWidth % 4 === 0 && !this.isChannelsLast) && t.outChannels % 4 === 0, this.dispatchLayout = this.isChannelsLast ? { x: [3], y: [1, 2], z: [0] } : { x: [2, 3], y: [1], z: [0] }, this.workgroupSize = ut(this.dispatchLayout, this.outputShape, this.isVec4), this.elementsPerThread = dt(this.dispatchLayout, this.outputShape, this.isVec4), this.dispatch = b(this.dispatchLayout, this.outputShape, this.workgroupSize, this.elementsPerThread), this.isVec4 ? (this.outputComponent = 4, this.isChannelsLast && t.inChannels % 4 !== 0 ? (this.innerElementSize = 3, this.variableComponents = [1, 4]) : (this.innerElementSize = 4, this.variableComponents = [4, 4]), a && (this.variableNames.push("bias"), this.variableComponents.push(4)), n && (this.variableNames.push("preluActivationWeights"), this.variableComponents.push(4))) : (this.innerElementSize = this.elementsPerThread[0], a && this.variableNames.push("bias"), n && this.variableNames.push("preluActivationWeights")), this.sequentialAccessByThreads = u, this.addBias = a, this.activation = r, this.hasPreluActivationWeights = n, this.tileAOuter = this.workgroupSize[1] * this.elementsPerThread[1], this.tileBOuter = this.workgroupSize[0] * this.elementsPerThread[0], this.tileInner = Math.max(this.workgroupSize[0] * this.innerElementSize, this.workgroupSize[1]), this.fitAOuter = e % this.tileAOuter === 0, this.fitBOuter = i % this.tileBOuter === 0, this.fitInner = s % this.tileInner === 0, this.shaderKey = `conv2DMM_${this.elementsPerThread}_${this.activation}}_${this.fitAOuter}_${this.fitBOuter}_${this.fitInner}_${this.isVec4}_${this.innerElementSize}_${this.isChannelsLast}_${this.sequentialAccessByThreads}`;
|
|
2415
2414
|
}
|
|
2416
2415
|
getUserCode() {
|
|
2417
2416
|
const t = this.isVec4 ? ze(this.elementsPerThread, this.workgroupSize, !this.isChannelsLast, this.tileInner) : Ae(this.elementsPerThread, this.workgroupSize, !this.isChannelsLast, this.tileInner, !1, null, this.sequentialAccessByThreads), e = this.isVec4 ? [this.innerElementSize, 4, 4] : [1, 1, 1];
|
|
@@ -2675,11 +2674,11 @@ function It({ x: o, filter: t, convInfo: e, backend: i, bias: s = null, preluAct
|
|
|
2675
2674
|
return I;
|
|
2676
2675
|
}
|
|
2677
2676
|
function Zu(o) {
|
|
2678
|
-
const { inputs: t, attrs: e, backend: i } = o, { x: s, filter: a } = t, { strides: r, pad: n, dataFormat: u, dilations: d, dimRoundingMode: h } = e, l =
|
|
2677
|
+
const { inputs: t, attrs: e, backend: i } = o, { x: s, filter: a } = t, { strides: r, pad: n, dataFormat: u, dilations: d, dimRoundingMode: h } = e, l = Se(u), c = Z(s.shape, a.shape, r, d, n, h, !1, l);
|
|
2679
2678
|
return It({ x: s, filter: a, convInfo: c, backend: i });
|
|
2680
2679
|
}
|
|
2681
2680
|
const Ju = {
|
|
2682
|
-
kernelName:
|
|
2681
|
+
kernelName: As,
|
|
2683
2682
|
backendName: "webgpu",
|
|
2684
2683
|
kernelFunc: Zu
|
|
2685
2684
|
};
|
|
@@ -2996,7 +2995,7 @@ class od {
|
|
|
2996
2995
|
}
|
|
2997
2996
|
}
|
|
2998
2997
|
function id(o) {
|
|
2999
|
-
const { inputs: t, backend: e, attrs: i } = o, { x: s, dy: a } = t, { strides: r, pad: n, dataFormat: u, dimRoundingMode: d, filterShape: h } = i, l =
|
|
2998
|
+
const { inputs: t, backend: e, attrs: i } = o, { x: s, dy: a } = t, { strides: r, pad: n, dataFormat: u, dimRoundingMode: d, filterShape: h } = i, l = Se(u), c = Z(s.shape, h, r, 1, n, d, !1, l), p = new td(c), f = [
|
|
3000
2999
|
{ type: "int32", data: [c.padInfo.top, c.padInfo.left] },
|
|
3001
3000
|
{ type: "int32", data: [c.strideHeight, c.strideWidth] },
|
|
3002
3001
|
{ type: "int32", data: [c.batchSize] },
|
|
@@ -3008,7 +3007,7 @@ function id(o) {
|
|
|
3008
3007
|
return e.runWebGPUProgram(p, [s, a], s.dtype, f);
|
|
3009
3008
|
}
|
|
3010
3009
|
const ad = {
|
|
3011
|
-
kernelName:
|
|
3010
|
+
kernelName: Fs,
|
|
3012
3011
|
backendName: "webgpu",
|
|
3013
3012
|
kernelFunc: id
|
|
3014
3013
|
};
|
|
@@ -3087,7 +3086,7 @@ function rd(o = 4) {
|
|
|
3087
3086
|
}
|
|
3088
3087
|
class nd {
|
|
3089
3088
|
constructor(t) {
|
|
3090
|
-
this.variableNames = ["x", "W"], this.uniforms = "filterDims : vec2<i32>, pads : vec2<i32>, strides : vec2<i32>, outBackprop : vec4<i32>, dimAOuter : i32, dimBOuter : i32, dimInner : i32,", this.outputShape = t.inShape, L(t.dataFormat === "channelsLast", () => "TODO: NCHW is unimplemented"), this.isVec4 = t.inChannels % 4 === 0 && t.outChannels % 4 === 0, this.dispatchLayout = { x: [3], y: [1, 2], z: [0] }, this.workgroupSize =
|
|
3089
|
+
this.variableNames = ["x", "W"], this.uniforms = "filterDims : vec2<i32>, pads : vec2<i32>, strides : vec2<i32>, outBackprop : vec4<i32>, dimAOuter : i32, dimBOuter : i32, dimInner : i32,", this.outputShape = t.inShape, L(t.dataFormat === "channelsLast", () => "TODO: NCHW is unimplemented"), this.isVec4 = t.inChannels % 4 === 0 && t.outChannels % 4 === 0, this.dispatchLayout = { x: [3], y: [1, 2], z: [0] }, this.workgroupSize = ut(this.dispatchLayout, this.outputShape, this.isVec4), this.elementsPerThread = dt(this.dispatchLayout, this.outputShape, this.isVec4), this.dispatch = b(this.dispatchLayout, this.outputShape, this.workgroupSize, this.elementsPerThread), this.isVec4 && (this.outputComponent = 4, this.variableComponents = [4, 1]), this.shaderKey = `conv2DDerInputMM_${this.isVec4}_${this.elementsPerThread}`;
|
|
3091
3090
|
}
|
|
3092
3091
|
getUserCode() {
|
|
3093
3092
|
const t = this.isVec4 ? ze(this.elementsPerThread, this.workgroupSize) : Ae(this.elementsPerThread, this.workgroupSize);
|
|
@@ -3098,7 +3097,7 @@ class nd {
|
|
|
3098
3097
|
}
|
|
3099
3098
|
}
|
|
3100
3099
|
function ud(o) {
|
|
3101
|
-
const { inputs: t, backend: e, attrs: i } = o, { dy: s, filter: a } = t, { inputShape: r, strides: n, pad: u, dataFormat: d, dimRoundingMode: h } = i, l =
|
|
3100
|
+
const { inputs: t, backend: e, attrs: i } = o, { dy: s, filter: a } = t, { inputShape: r, strides: n, pad: u, dataFormat: d, dimRoundingMode: h } = i, l = Se(d), c = Z(r, a.shape, n, 1, u, h, !1, l), p = [
|
|
3102
3101
|
{ type: "int32", data: [c.filterHeight, c.filterWidth] },
|
|
3103
3102
|
{
|
|
3104
3103
|
type: "int32",
|
|
@@ -3129,7 +3128,7 @@ function ud(o) {
|
|
|
3129
3128
|
return e.runWebGPUProgram(f, [s, a], "float32", p);
|
|
3130
3129
|
}
|
|
3131
3130
|
const dd = {
|
|
3132
|
-
kernelName:
|
|
3131
|
+
kernelName: Ws,
|
|
3133
3132
|
backendName: "webgpu",
|
|
3134
3133
|
kernelFunc: ud
|
|
3135
3134
|
};
|
|
@@ -3224,7 +3223,7 @@ class ld {
|
|
|
3224
3223
|
}
|
|
3225
3224
|
}
|
|
3226
3225
|
function cd(o) {
|
|
3227
|
-
const { inputs: t, backend: e, attrs: i } = o, { x: s, filter: a } = t, { strides: r, pad: n, dilations: u } = i, d =
|
|
3226
|
+
const { inputs: t, backend: e, attrs: i } = o, { x: s, filter: a } = t, { strides: r, pad: n, dilations: u } = i, d = Ee(s.shape, a.shape, r, u, n), h = [d.padInfo.front, d.padInfo.top, d.padInfo.left], l = [
|
|
3228
3227
|
{
|
|
3229
3228
|
type: "int32",
|
|
3230
3229
|
data: [d.filterDepth, d.filterHeight, d.filterWidth]
|
|
@@ -3246,12 +3245,12 @@ function cd(o) {
|
|
|
3246
3245
|
return e.runWebGPUProgram(c, [s, a], p, l);
|
|
3247
3246
|
}
|
|
3248
3247
|
const hd = {
|
|
3249
|
-
kernelName:
|
|
3248
|
+
kernelName: Ls,
|
|
3250
3249
|
backendName: "webgpu",
|
|
3251
3250
|
kernelFunc: cd
|
|
3252
3251
|
};
|
|
3253
3252
|
function pd(o) {
|
|
3254
|
-
const { inputs: t, backend: e, attrs: i } = o, { x: s, dy: a } = t, { strides: r, pad: n, filterShape: u } = i, d =
|
|
3253
|
+
const { inputs: t, backend: e, attrs: i } = o, { x: s, dy: a } = t, { strides: r, pad: n, filterShape: u } = i, d = Ee(s.shape, u, r, 1, n), h = new sd(d), l = [
|
|
3255
3254
|
{
|
|
3256
3255
|
type: "int32",
|
|
3257
3256
|
data: [d.padInfo.front, d.padInfo.top, d.padInfo.left]
|
|
@@ -3271,12 +3270,12 @@ function pd(o) {
|
|
|
3271
3270
|
return e.runWebGPUProgram(h, [s, a], a.dtype, l);
|
|
3272
3271
|
}
|
|
3273
3272
|
const fd = {
|
|
3274
|
-
kernelName:
|
|
3273
|
+
kernelName: Vs,
|
|
3275
3274
|
backendName: "webgpu",
|
|
3276
3275
|
kernelFunc: pd
|
|
3277
3276
|
};
|
|
3278
3277
|
function md(o) {
|
|
3279
|
-
const { inputs: t, backend: e, attrs: i } = o, { dy: s, filter: a } = t, { strides: r, pad: n, inputShape: u } = i, d =
|
|
3278
|
+
const { inputs: t, backend: e, attrs: i } = o, { dy: s, filter: a } = t, { strides: r, pad: n, inputShape: u } = i, d = Ee(u, a.shape, r, 1, n), h = new od(d), l = [
|
|
3280
3279
|
{
|
|
3281
3280
|
type: "int32",
|
|
3282
3281
|
data: [d.filterDepth, d.filterHeight, d.filterWidth]
|
|
@@ -3301,17 +3300,17 @@ function md(o) {
|
|
|
3301
3300
|
return e.runWebGPUProgram(h, [s, a], s.dtype, l);
|
|
3302
3301
|
}
|
|
3303
3302
|
const gd = {
|
|
3304
|
-
kernelName:
|
|
3303
|
+
kernelName: Bs,
|
|
3305
3304
|
backendName: "webgpu",
|
|
3306
3305
|
kernelFunc: md
|
|
3307
3306
|
};
|
|
3308
3307
|
const xd = N({ opType: y.COS }), Cd = {
|
|
3309
|
-
kernelName:
|
|
3308
|
+
kernelName: Ts,
|
|
3310
3309
|
backendName: "webgpu",
|
|
3311
3310
|
kernelFunc: xd
|
|
3312
3311
|
};
|
|
3313
3312
|
const wd = N({ opType: y.COSH }), yd = {
|
|
3314
|
-
kernelName:
|
|
3313
|
+
kernelName: _s,
|
|
3315
3314
|
backendName: "webgpu",
|
|
3316
3315
|
kernelFunc: wd
|
|
3317
3316
|
};
|
|
@@ -3402,7 +3401,7 @@ const bd = (o) => {
|
|
|
3402
3401
|
const { inputs: t, backend: e, attrs: i } = o, { image: s, boxes: a, boxInd: r } = t, { cropSize: n, method: u, extrapolationValue: d } = i, h = new Sd(s.shape[3], a.shape, n, u), l = [{ type: "float32", data: [d] }];
|
|
3403
3402
|
return e.runWebGPUProgram(h, [s, a, r], "float32", l);
|
|
3404
3403
|
}, vd = {
|
|
3405
|
-
kernelName:
|
|
3404
|
+
kernelName: Os,
|
|
3406
3405
|
backendName: "webgpu",
|
|
3407
3406
|
kernelFunc: bd
|
|
3408
3407
|
};
|
|
@@ -3459,10 +3458,10 @@ function st(o, t, e) {
|
|
|
3459
3458
|
throw Error(`Cumulative ${e} for rank ${o} is not yet supported`);
|
|
3460
3459
|
}
|
|
3461
3460
|
function Rt(o, t, e, i, s, a) {
|
|
3462
|
-
const r = t.shape.length, n =
|
|
3461
|
+
const r = t.shape.length, n = Ce([i], r);
|
|
3463
3462
|
let u = t;
|
|
3464
3463
|
n != null && (u = K({ inputs: { x: t }, backend: e, attrs: { perm: n } }));
|
|
3465
|
-
const d =
|
|
3464
|
+
const d = we(1, r)[0];
|
|
3466
3465
|
if (d !== r - 1)
|
|
3467
3466
|
throw new Error(`WebGPU cumprod shader expects an inner-most axis=${t.shape.length - 1} but got axis=${i}`);
|
|
3468
3467
|
const h = u.shape[d];
|
|
@@ -3476,7 +3475,7 @@ function Rt(o, t, e, i, s, a) {
|
|
|
3476
3475
|
l = e.runWebGPUProgram(c, [l], l.dtype, f), e.disposeData(p.dataId);
|
|
3477
3476
|
}
|
|
3478
3477
|
if (n != null) {
|
|
3479
|
-
const c =
|
|
3478
|
+
const c = ft(n), p = K({ inputs: { x: l }, backend: e, attrs: { perm: c } });
|
|
3480
3479
|
return e.disposeData(l.dataId), e.disposeData(u.dataId), p;
|
|
3481
3480
|
}
|
|
3482
3481
|
return l;
|
|
@@ -3486,7 +3485,7 @@ function kd(o) {
|
|
|
3486
3485
|
return Rt(xe.Prod, s, e, a, r, n);
|
|
3487
3486
|
}
|
|
3488
3487
|
const Id = {
|
|
3489
|
-
kernelName:
|
|
3488
|
+
kernelName: Ms,
|
|
3490
3489
|
backendName: "webgpu",
|
|
3491
3490
|
kernelFunc: kd
|
|
3492
3491
|
};
|
|
@@ -3495,7 +3494,7 @@ function Rd(o) {
|
|
|
3495
3494
|
return Rt(xe.Sum, s, e, a, r, n);
|
|
3496
3495
|
}
|
|
3497
3496
|
const Pd = {
|
|
3498
|
-
kernelName:
|
|
3497
|
+
kernelName: Es,
|
|
3499
3498
|
backendName: "webgpu",
|
|
3500
3499
|
kernelFunc: Rd
|
|
3501
3500
|
};
|
|
@@ -3504,7 +3503,7 @@ function $d(o) {
|
|
|
3504
3503
|
return e.runWebGPUProgram(m, x, l, g, f);
|
|
3505
3504
|
}
|
|
3506
3505
|
const Dd = {
|
|
3507
|
-
kernelName:
|
|
3506
|
+
kernelName: Us,
|
|
3508
3507
|
backendName: "webgpu",
|
|
3509
3508
|
kernelFunc: $d
|
|
3510
3509
|
};
|
|
@@ -3558,7 +3557,7 @@ function zd(o) {
|
|
|
3558
3557
|
return e.runWebGPUProgram(g, [s], s.dtype, m);
|
|
3559
3558
|
}
|
|
3560
3559
|
const Ad = {
|
|
3561
|
-
kernelName:
|
|
3560
|
+
kernelName: Hs,
|
|
3562
3561
|
backendName: "webgpu",
|
|
3563
3562
|
kernelFunc: zd
|
|
3564
3563
|
};
|
|
@@ -3781,10 +3780,10 @@ class $t {
|
|
|
3781
3780
|
}
|
|
3782
3781
|
}
|
|
3783
3782
|
function Wd(o) {
|
|
3784
|
-
const { inputs: t, backend: e, attrs: i } = o, { x: s, filter: a } = t, { strides: r, pad: n, dataFormat: u, dilations: d, dimRoundingMode: h } = i, l =
|
|
3783
|
+
const { inputs: t, backend: e, attrs: i } = o, { x: s, filter: a } = t, { strides: r, pad: n, dataFormat: u, dilations: d, dimRoundingMode: h } = i, l = Se(u);
|
|
3785
3784
|
let c = d;
|
|
3786
3785
|
c == null && (c = [1, 1]);
|
|
3787
|
-
const p =
|
|
3786
|
+
const p = Z(s.shape, a.shape, r, c, n, h, !0, l), f = [
|
|
3788
3787
|
{ type: "int32", data: [p.padInfo.top, p.padInfo.left] },
|
|
3789
3788
|
{ type: "int32", data: [p.inHeight, p.inWidth] }
|
|
3790
3789
|
], m = p.dataFormat === "channelsLast";
|
|
@@ -3795,7 +3794,7 @@ function Wd(o) {
|
|
|
3795
3794
|
})), e.runWebGPUProgram(g, [s, a], s.dtype, f);
|
|
3796
3795
|
}
|
|
3797
3796
|
const Ld = {
|
|
3798
|
-
kernelName:
|
|
3797
|
+
kernelName: Gs,
|
|
3799
3798
|
backendName: "webgpu",
|
|
3800
3799
|
kernelFunc: Wd
|
|
3801
3800
|
};
|
|
@@ -3895,7 +3894,7 @@ class Bd {
|
|
|
3895
3894
|
}
|
|
3896
3895
|
}
|
|
3897
3896
|
function Td(o) {
|
|
3898
|
-
const { inputs: t, backend: e, attrs: i } = o, { x: s, dy: a } = t, { strides: r, dilations: n, pad: u, dimRoundingMode: d, filterShape: h } = i, l =
|
|
3897
|
+
const { inputs: t, backend: e, attrs: i } = o, { x: s, dy: a } = t, { strides: r, dilations: n, pad: u, dimRoundingMode: d, filterShape: h } = i, l = Z(
|
|
3899
3898
|
s.shape,
|
|
3900
3899
|
h,
|
|
3901
3900
|
r,
|
|
@@ -3918,12 +3917,12 @@ function Td(o) {
|
|
|
3918
3917
|
return e.runWebGPUProgram(c, [s, a], "float32", p);
|
|
3919
3918
|
}
|
|
3920
3919
|
const _d = {
|
|
3921
|
-
kernelName:
|
|
3920
|
+
kernelName: Xs,
|
|
3922
3921
|
backendName: "webgpu",
|
|
3923
3922
|
kernelFunc: Td
|
|
3924
3923
|
};
|
|
3925
3924
|
function Od(o) {
|
|
3926
|
-
const { inputs: t, backend: e, attrs: i } = o, { dy: s, filter: a } = t, { strides: r, dilations: n, pad: u, dimRoundingMode: d, inputShape: h } = i, l =
|
|
3925
|
+
const { inputs: t, backend: e, attrs: i } = o, { dy: s, filter: a } = t, { strides: r, dilations: n, pad: u, dimRoundingMode: d, inputShape: h } = i, l = Z(
|
|
3927
3926
|
h,
|
|
3928
3927
|
a.shape,
|
|
3929
3928
|
r,
|
|
@@ -3949,7 +3948,7 @@ function Od(o) {
|
|
|
3949
3948
|
return e.runWebGPUProgram(c, [s, a], s.dtype, p);
|
|
3950
3949
|
}
|
|
3951
3950
|
const Md = {
|
|
3952
|
-
kernelName:
|
|
3951
|
+
kernelName: Ks,
|
|
3953
3952
|
backendName: "webgpu",
|
|
3954
3953
|
kernelFunc: Od
|
|
3955
3954
|
};
|
|
@@ -3974,7 +3973,7 @@ function Ud(o) {
|
|
|
3974
3973
|
return e.disposeData(r.dataId), e.disposeData(u.dataId), d;
|
|
3975
3974
|
}
|
|
3976
3975
|
const Hd = {
|
|
3977
|
-
kernelName:
|
|
3976
|
+
kernelName: qs,
|
|
3978
3977
|
backendName: "webgpu",
|
|
3979
3978
|
kernelFunc: Ud
|
|
3980
3979
|
};
|
|
@@ -4019,7 +4018,7 @@ class Gd {
|
|
|
4019
4018
|
}
|
|
4020
4019
|
}
|
|
4021
4020
|
function Xd(o) {
|
|
4022
|
-
const { inputs: t, backend: e, attrs: i } = o, { x: s, filter: a } = t, { strides: r, pad: n, dilations: u } = i, d =
|
|
4021
|
+
const { inputs: t, backend: e, attrs: i } = o, { x: s, filter: a } = t, { strides: r, pad: n, dilations: u } = i, d = Ue(s.shape, a.shape, r, n, "NHWC", u), h = [d.padInfo.top, d.padInfo.left], l = [
|
|
4023
4022
|
{ type: "int32", data: [d.filterHeight, d.filterWidth] },
|
|
4024
4023
|
{ type: "int32", data: [...h] },
|
|
4025
4024
|
{ type: "int32", data: [d.strideHeight, d.strideWidth] },
|
|
@@ -4028,7 +4027,7 @@ function Xd(o) {
|
|
|
4028
4027
|
return e.runWebGPUProgram(c, [s, a], s.dtype, l);
|
|
4029
4028
|
}
|
|
4030
4029
|
const Kd = {
|
|
4031
|
-
kernelName:
|
|
4030
|
+
kernelName: Ys,
|
|
4032
4031
|
backendName: "webgpu",
|
|
4033
4032
|
kernelFunc: Xd
|
|
4034
4033
|
};
|
|
@@ -4080,7 +4079,7 @@ class qd {
|
|
|
4080
4079
|
let flatIndexIn = d + uniforms.xShape[3] *
|
|
4081
4080
|
(xCMax + uniforms.xShape[2] * (xRMax + uniforms.xShape[1] * b));
|
|
4082
4081
|
let value = getDy(b, r, c, d);
|
|
4083
|
-
${
|
|
4082
|
+
${Q("&result[flatIndexIn]", "value", this.type)}
|
|
4084
4083
|
}
|
|
4085
4084
|
}
|
|
4086
4085
|
`;
|
|
@@ -4133,14 +4132,14 @@ class Yd {
|
|
|
4133
4132
|
|
|
4134
4133
|
let flatIndexIn = d + uniforms.wShape[2] * (wCMax + wRMax * uniforms.wShape[1]);
|
|
4135
4134
|
let value = getDy(b, r, c, d);
|
|
4136
|
-
${
|
|
4135
|
+
${Q("&result[flatIndexIn]", "value", this.type)}
|
|
4137
4136
|
}
|
|
4138
4137
|
}
|
|
4139
4138
|
`;
|
|
4140
4139
|
}
|
|
4141
4140
|
}
|
|
4142
4141
|
function jd(o) {
|
|
4143
|
-
const { inputs: t, backend: e, attrs: i } = o, { x: s, filter: a, dy: r } = t, { strides: n, pad: u, dilations: d } = i, h =
|
|
4142
|
+
const { inputs: t, backend: e, attrs: i } = o, { x: s, filter: a, dy: r } = t, { strides: n, pad: u, dilations: d } = i, h = Ue(s.shape, a.shape, n, u, "NHWC", d), l = a.dtype, c = new Yd(h, a.shape, l), p = [
|
|
4144
4143
|
{ type: "int32", data: [h.filterHeight, h.filterWidth] },
|
|
4145
4144
|
{ type: "int32", data: [h.padInfo.top, h.padInfo.left] },
|
|
4146
4145
|
{ type: "int32", data: [h.strideHeight, h.strideWidth] },
|
|
@@ -4150,12 +4149,12 @@ function jd(o) {
|
|
|
4150
4149
|
return e.runWebGPUProgram(c, [s, a, r], l, p, f);
|
|
4151
4150
|
}
|
|
4152
4151
|
const Qd = {
|
|
4153
|
-
kernelName:
|
|
4152
|
+
kernelName: js,
|
|
4154
4153
|
backendName: "webgpu",
|
|
4155
4154
|
kernelFunc: jd
|
|
4156
4155
|
};
|
|
4157
4156
|
function Zd(o) {
|
|
4158
|
-
const { inputs: t, backend: e, attrs: i } = o, { x: s, filter: a, dy: r } = t, { strides: n, pad: u, dilations: d } = i, h =
|
|
4157
|
+
const { inputs: t, backend: e, attrs: i } = o, { x: s, filter: a, dy: r } = t, { strides: n, pad: u, dilations: d } = i, h = Ue(s.shape, a.shape, n, u, "NHWC", d), l = s.dtype, c = new qd(h, l), p = [
|
|
4159
4158
|
{ type: "int32", data: [h.filterHeight, h.filterWidth] },
|
|
4160
4159
|
{ type: "int32", data: [h.padInfo.top, h.padInfo.left] },
|
|
4161
4160
|
{ type: "int32", data: [h.strideHeight, h.strideWidth] },
|
|
@@ -4165,13 +4164,13 @@ function Zd(o) {
|
|
|
4165
4164
|
return e.runWebGPUProgram(c, [s, a, r], l, p, f);
|
|
4166
4165
|
}
|
|
4167
4166
|
const Jd = {
|
|
4168
|
-
kernelName:
|
|
4167
|
+
kernelName: Qs,
|
|
4169
4168
|
backendName: "webgpu",
|
|
4170
4169
|
kernelFunc: Zd
|
|
4171
4170
|
};
|
|
4172
4171
|
class el {
|
|
4173
4172
|
constructor(t, e, i) {
|
|
4174
|
-
this.variableNames = ["Image"], this.uniforms = "alpha: f32,", this.workgroupSize = [64, 1, 1], this.pixelsOpType =
|
|
4173
|
+
this.variableNames = ["Image"], this.uniforms = "alpha: f32,", this.workgroupSize = [64, 1, 1], this.pixelsOpType = pt.DRAW, this.size = !0, this.outputShape = t, this.dispatchLayout = v(this.outputShape), this.dispatch = b(this.dispatchLayout, this.outputShape, this.workgroupSize), this.type = e, this.textureFormat = i, this.shaderKey = `draw_${e}_${i}`;
|
|
4175
4174
|
}
|
|
4176
4175
|
getUserCode() {
|
|
4177
4176
|
let t;
|
|
@@ -4227,7 +4226,7 @@ function tl(o) {
|
|
|
4227
4226
|
return e.disposeData(w.dataId), s;
|
|
4228
4227
|
}
|
|
4229
4228
|
const sl = {
|
|
4230
|
-
kernelName:
|
|
4229
|
+
kernelName: Zs,
|
|
4231
4230
|
backendName: "webgpu",
|
|
4232
4231
|
kernelFunc: tl
|
|
4233
4232
|
};
|
|
@@ -4236,7 +4235,7 @@ const Dt = V({
|
|
|
4236
4235
|
cpuKernelImpl: nn,
|
|
4237
4236
|
supportsComplex: !0
|
|
4238
4237
|
}), ol = {
|
|
4239
|
-
kernelName:
|
|
4238
|
+
kernelName: Js,
|
|
4240
4239
|
backendName: "webgpu",
|
|
4241
4240
|
kernelFunc: Dt
|
|
4242
4241
|
};
|
|
@@ -4245,21 +4244,21 @@ function Nt(o) {
|
|
|
4245
4244
|
return ie(s, a, r, "sum", e);
|
|
4246
4245
|
}
|
|
4247
4246
|
const il = {
|
|
4248
|
-
kernelName:
|
|
4247
|
+
kernelName: eo,
|
|
4249
4248
|
backendName: "webgpu",
|
|
4250
4249
|
kernelFunc: Nt
|
|
4251
4250
|
};
|
|
4252
4251
|
function al(o) {
|
|
4253
|
-
const { inputs: t, backend: e, attrs: i } = o, { equation: s } = i, a = t, { allDims: r, summedDims: n, idDims: u } =
|
|
4254
|
-
|
|
4255
|
-
const { path: d, steps: h } =
|
|
4252
|
+
const { inputs: t, backend: e, attrs: i } = o, { equation: s } = i, a = t, { allDims: r, summedDims: n, idDims: u } = da(s, a.length);
|
|
4253
|
+
la(r.length, u, a);
|
|
4254
|
+
const { path: d, steps: h } = ca(n, u), l = h.length;
|
|
4256
4255
|
let c = null, p = r.length;
|
|
4257
4256
|
const f = [];
|
|
4258
4257
|
for (let m = 0; m < l; ++m) {
|
|
4259
4258
|
for (const g of h[m]) {
|
|
4260
|
-
const { permutationIndices: x, expandDims: C } =
|
|
4259
|
+
const { permutationIndices: x, expandDims: C } = ha(p, u[g]);
|
|
4261
4260
|
let w;
|
|
4262
|
-
|
|
4261
|
+
pa(x) ? w = a[g] : (w = K({ inputs: { x: a[g] }, backend: e, attrs: { perm: x } }), f.push(w));
|
|
4263
4262
|
const k = w.shape.slice();
|
|
4264
4263
|
for (let I = 0; I < C.length; ++I)
|
|
4265
4264
|
k.splice(C[I], 0, 1);
|
|
@@ -4279,12 +4278,12 @@ function al(o) {
|
|
|
4279
4278
|
return c;
|
|
4280
4279
|
}
|
|
4281
4280
|
const rl = {
|
|
4282
|
-
kernelName:
|
|
4281
|
+
kernelName: to,
|
|
4283
4282
|
backendName: "webgpu",
|
|
4284
4283
|
kernelFunc: al
|
|
4285
4284
|
};
|
|
4286
4285
|
const nl = N({ opType: y.ELU }), ul = {
|
|
4287
|
-
kernelName:
|
|
4286
|
+
kernelName: so,
|
|
4288
4287
|
backendName: "webgpu",
|
|
4289
4288
|
kernelFunc: nl
|
|
4290
4289
|
};
|
|
@@ -4292,17 +4291,17 @@ const dl = (o) => {
|
|
|
4292
4291
|
const { inputs: t, backend: e } = o, { dy: i, y: s } = t, a = new Re(F.ELU_DER, i.shape, s.shape);
|
|
4293
4292
|
return e.runWebGPUProgram(a, [i, s], i.dtype);
|
|
4294
4293
|
}, ll = {
|
|
4295
|
-
kernelName:
|
|
4294
|
+
kernelName: oo,
|
|
4296
4295
|
backendName: "webgpu",
|
|
4297
4296
|
kernelFunc: dl
|
|
4298
4297
|
};
|
|
4299
4298
|
const cl = V({ opType: F.EQUAL, dtype: "bool", cpuKernelImpl: Gr }), hl = {
|
|
4300
|
-
kernelName:
|
|
4299
|
+
kernelName: io,
|
|
4301
4300
|
backendName: "webgpu",
|
|
4302
4301
|
kernelFunc: cl
|
|
4303
4302
|
};
|
|
4304
4303
|
const pl = N({ opType: y.ERF }), fl = {
|
|
4305
|
-
kernelName:
|
|
4304
|
+
kernelName: ao,
|
|
4306
4305
|
backendName: "webgpu",
|
|
4307
4306
|
kernelFunc: pl
|
|
4308
4307
|
};
|
|
@@ -4311,7 +4310,7 @@ const ml = N({
|
|
|
4311
4310
|
cpuKernelImpl: Xr,
|
|
4312
4311
|
dtype: "float32"
|
|
4313
4312
|
}), gl = {
|
|
4314
|
-
kernelName:
|
|
4313
|
+
kernelName: ro,
|
|
4315
4314
|
backendName: "webgpu",
|
|
4316
4315
|
kernelFunc: ml
|
|
4317
4316
|
};
|
|
@@ -4321,12 +4320,12 @@ function Te(o) {
|
|
|
4321
4320
|
return s < 0 && (L(-(r + 1) <= s, () => `Axis must be in the interval [${-(r + 1)}, ${r}]`), u = r + s + 1), n.splice(u, 0, 1), R({ inputs: { x: a }, backend: i, attrs: { shape: n } });
|
|
4322
4321
|
}
|
|
4323
4322
|
const xl = {
|
|
4324
|
-
kernelName:
|
|
4323
|
+
kernelName: no,
|
|
4325
4324
|
backendName: "webgpu",
|
|
4326
4325
|
kernelFunc: Te
|
|
4327
4326
|
};
|
|
4328
4327
|
const Cl = N({ opType: y.EXPM1, cpuKernelImpl: Kr }), wl = {
|
|
4329
|
-
kernelName:
|
|
4328
|
+
kernelName: uo,
|
|
4330
4329
|
backendName: "webgpu",
|
|
4331
4330
|
kernelFunc: Cl
|
|
4332
4331
|
};
|
|
@@ -4402,7 +4401,7 @@ function yl(o) {
|
|
|
4402
4401
|
return zt(i, !1, e);
|
|
4403
4402
|
}
|
|
4404
4403
|
const Sl = {
|
|
4405
|
-
kernelName:
|
|
4404
|
+
kernelName: lo,
|
|
4406
4405
|
backendName: "webgpu",
|
|
4407
4406
|
kernelFunc: yl
|
|
4408
4407
|
};
|
|
@@ -4424,7 +4423,7 @@ class bl {
|
|
|
4424
4423
|
}
|
|
4425
4424
|
}
|
|
4426
4425
|
const vl = {
|
|
4427
|
-
kernelName:
|
|
4426
|
+
kernelName: co,
|
|
4428
4427
|
backendName: "webgpu",
|
|
4429
4428
|
kernelFunc: ({ inputs: o, backend: t }) => {
|
|
4430
4429
|
const { image: e } = o, i = t, s = new bl(e.shape);
|
|
@@ -4432,7 +4431,7 @@ const vl = {
|
|
|
4432
4431
|
}
|
|
4433
4432
|
};
|
|
4434
4433
|
const kl = N({ opType: y.FLOOR, cpuKernelImpl: qr }), Il = {
|
|
4435
|
-
kernelName:
|
|
4434
|
+
kernelName: ho,
|
|
4436
4435
|
backendName: "webgpu",
|
|
4437
4436
|
kernelFunc: kl
|
|
4438
4437
|
};
|
|
@@ -4441,13 +4440,13 @@ const Rl = V({
|
|
|
4441
4440
|
cpuKernelImpl: Yr,
|
|
4442
4441
|
dtype: "int32"
|
|
4443
4442
|
}), Pl = {
|
|
4444
|
-
kernelName:
|
|
4443
|
+
kernelName: po,
|
|
4445
4444
|
backendName: "webgpu",
|
|
4446
4445
|
kernelFunc: Rl
|
|
4447
4446
|
};
|
|
4448
4447
|
class $l {
|
|
4449
4448
|
constructor(t, e, i = !1) {
|
|
4450
|
-
this.pixelsOpType =
|
|
4449
|
+
this.pixelsOpType = pt.FROM_PIXELS, this.outputShape = [0], this.variableNames = [], this.workgroupSize = [256, 1, 1], this.outputShape = t, this.dispatchLayout = v(this.outputShape), this.dispatch = b(this.dispatchLayout, this.outputShape, this.workgroupSize, [e, 1, 1]), this.importVideo = i, this.shaderKey = `fromPixels_${this.importVideo}`;
|
|
4451
4450
|
}
|
|
4452
4451
|
getUserCode() {
|
|
4453
4452
|
const t = this.importVideo ? "textureLoad(src, vec2<i32>(coords.yx));" : "textureLoad(src, vec2<i32>(coords.yx), 0)";
|
|
@@ -4467,7 +4466,7 @@ class $l {
|
|
|
4467
4466
|
}
|
|
4468
4467
|
}
|
|
4469
4468
|
const Dl = {
|
|
4470
|
-
kernelName:
|
|
4469
|
+
kernelName: fo,
|
|
4471
4470
|
backendName: "webgpu",
|
|
4472
4471
|
kernelFunc: Nl
|
|
4473
4472
|
};
|
|
@@ -4494,7 +4493,7 @@ function Nl(o) {
|
|
|
4494
4493
|
const B = GPUTextureUsage.COPY_DST | GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.TEXTURE_BINDING, H = e.textureManager.acquireTexture(c[1], c[0], "rgba8unorm", B);
|
|
4495
4494
|
e.queue.copyExternalImageToTexture({ source: s }, { texture: H }, [c[1], c[0]]), C = H;
|
|
4496
4495
|
}
|
|
4497
|
-
const w = D(c), k =
|
|
4496
|
+
const w = D(c), k = at(c), I = new $l(c, a, p), P = [
|
|
4498
4497
|
{ type: "uint32", data: [w] },
|
|
4499
4498
|
{ type: "uint32", data: [a] },
|
|
4500
4499
|
{ type: "uint32", data: [...k] }
|
|
@@ -4540,7 +4539,7 @@ class zl {
|
|
|
4540
4539
|
}
|
|
4541
4540
|
}
|
|
4542
4541
|
const Al = {
|
|
4543
|
-
kernelName:
|
|
4542
|
+
kernelName: mo,
|
|
4544
4543
|
backendName: "webgpu",
|
|
4545
4544
|
kernelFunc: ({ inputs: o, attrs: t, backend: e }) => {
|
|
4546
4545
|
const { x: i, scale: s, offset: a, mean: r, variance: n } = o, { varianceEpsilon: u } = t, d = e, h = [i, r, n];
|
|
@@ -4553,7 +4552,7 @@ const Al = {
|
|
|
4553
4552
|
}
|
|
4554
4553
|
};
|
|
4555
4554
|
function Fl(o) {
|
|
4556
|
-
const { inputs: t, backend: e, attrs: i } = o, { x: s, filter: a, bias: r, preluActivationWeights: n } = t, { strides: u, pad: d, dataFormat: h, dilations: l, dimRoundingMode: c, activation: p, leakyreluAlpha: f } = i, m =
|
|
4555
|
+
const { inputs: t, backend: e, attrs: i } = o, { x: s, filter: a, bias: r, preluActivationWeights: n } = t, { strides: u, pad: d, dataFormat: h, dilations: l, dimRoundingMode: c, activation: p, leakyreluAlpha: f } = i, m = Se(h), g = Z(s.shape, a.shape, u, l, d, c, !1, m);
|
|
4557
4556
|
return It({
|
|
4558
4557
|
x: s,
|
|
4559
4558
|
filter: a,
|
|
@@ -4566,15 +4565,15 @@ function Fl(o) {
|
|
|
4566
4565
|
});
|
|
4567
4566
|
}
|
|
4568
4567
|
const Wl = {
|
|
4569
|
-
kernelName:
|
|
4568
|
+
kernelName: go,
|
|
4570
4569
|
backendName: "webgpu",
|
|
4571
4570
|
kernelFunc: Fl
|
|
4572
4571
|
};
|
|
4573
4572
|
function Ll(o) {
|
|
4574
4573
|
const { inputs: t, backend: e, attrs: i } = o, { x: s, filter: a, bias: r, preluActivationWeights: n } = t, { strides: u, pad: d, dilations: h, dimRoundingMode: l, activation: c, leakyreluAlpha: p } = i;
|
|
4575
4574
|
let f = h;
|
|
4576
|
-
f == null && (f = [1, 1]), L(
|
|
4577
|
-
const m =
|
|
4575
|
+
f == null && (f = [1, 1]), L(mt(u, f), () => `Error in depthwiseConv2d: Either strides or dilations must be 1. Got strides ${u} and dilations '${f}'`);
|
|
4576
|
+
const m = Z(
|
|
4578
4577
|
s.shape,
|
|
4579
4578
|
a.shape,
|
|
4580
4579
|
u,
|
|
@@ -4596,7 +4595,7 @@ function Ll(o) {
|
|
|
4596
4595
|
})), c === "leakyrelu" && (w.push({ type: "float32", data: [p] }), k.uniforms += " alpha : f32,"), e.runWebGPUProgram(k, g, "float32", w);
|
|
4597
4596
|
}
|
|
4598
4597
|
const Vl = {
|
|
4599
|
-
kernelName:
|
|
4598
|
+
kernelName: xo,
|
|
4600
4599
|
backendName: "webgpu",
|
|
4601
4600
|
kernelFunc: Ll
|
|
4602
4601
|
};
|
|
@@ -4624,7 +4623,7 @@ class Bl {
|
|
|
4624
4623
|
}
|
|
4625
4624
|
}
|
|
4626
4625
|
function Tl(o) {
|
|
4627
|
-
const { inputs: t, backend: e } = o, { params: i, indices: s } = t, a = s.shape, r = a[a.length - 1], n = D(i.shape), [u, d, h, l] =
|
|
4626
|
+
const { inputs: t, backend: e } = o, { params: i, indices: s } = t, a = s.shape, r = a[a.length - 1], n = D(i.shape), [u, d, h, l] = fa(i, s), c = R({ inputs: { x: s }, backend: e, attrs: { shape: [d, r] } }), p = R({
|
|
4628
4627
|
inputs: { x: i },
|
|
4629
4628
|
backend: e,
|
|
4630
4629
|
attrs: { shape: [D(i.shape) / h, h] }
|
|
@@ -4637,7 +4636,7 @@ function Tl(o) {
|
|
|
4637
4636
|
return e.disposeData(c.dataId), e.disposeData(p.dataId), e.disposeData(g.dataId), x;
|
|
4638
4637
|
}
|
|
4639
4638
|
const _l = {
|
|
4640
|
-
kernelName:
|
|
4639
|
+
kernelName: Co,
|
|
4641
4640
|
backendName: "webgpu",
|
|
4642
4641
|
kernelFunc: Tl
|
|
4643
4642
|
};
|
|
@@ -4666,7 +4665,7 @@ function Ml(o) {
|
|
|
4666
4665
|
return e.join();
|
|
4667
4666
|
}
|
|
4668
4667
|
function At(o) {
|
|
4669
|
-
const { inputs: t, backend: e, attrs: i } = o, { x: s, indices: a } = t, { axis: r, batchDims: n } = i, u = te(r, s.shape)[0], d =
|
|
4668
|
+
const { inputs: t, backend: e, attrs: i } = o, { x: s, indices: a } = t, { axis: r, batchDims: n } = i, u = te(r, s.shape)[0], d = ma(s, a, u, n), h = D(a.shape), l = [], c = R({
|
|
4670
4669
|
inputs: { x: s },
|
|
4671
4670
|
backend: e,
|
|
4672
4671
|
attrs: {
|
|
@@ -4699,7 +4698,7 @@ function At(o) {
|
|
|
4699
4698
|
return l.forEach((C) => e.disposeData(C.dataId)), x;
|
|
4700
4699
|
}
|
|
4701
4700
|
const El = {
|
|
4702
|
-
kernelName:
|
|
4701
|
+
kernelName: wo,
|
|
4703
4702
|
backendName: "webgpu",
|
|
4704
4703
|
kernelFunc: At
|
|
4705
4704
|
};
|
|
@@ -4708,7 +4707,7 @@ const Ul = V({
|
|
|
4708
4707
|
cpuKernelImpl: Jr,
|
|
4709
4708
|
dtype: "bool"
|
|
4710
4709
|
}), Hl = {
|
|
4711
|
-
kernelName:
|
|
4710
|
+
kernelName: yo,
|
|
4712
4711
|
backendName: "webgpu",
|
|
4713
4712
|
kernelFunc: Ul
|
|
4714
4713
|
};
|
|
@@ -4717,7 +4716,7 @@ const Gl = V({
|
|
|
4717
4716
|
dtype: "bool",
|
|
4718
4717
|
cpuKernelImpl: Zr
|
|
4719
4718
|
}), Xl = {
|
|
4720
|
-
kernelName:
|
|
4719
|
+
kernelName: So,
|
|
4721
4720
|
backendName: "webgpu",
|
|
4722
4721
|
kernelFunc: Gl
|
|
4723
4722
|
};
|
|
@@ -4726,22 +4725,22 @@ function Kl(o) {
|
|
|
4726
4725
|
return zt(i, !0, e);
|
|
4727
4726
|
}
|
|
4728
4727
|
const ql = {
|
|
4729
|
-
kernelName:
|
|
4728
|
+
kernelName: bo,
|
|
4730
4729
|
backendName: "webgpu",
|
|
4731
4730
|
kernelFunc: Kl
|
|
4732
4731
|
};
|
|
4733
4732
|
const Yl = N({ opType: y.IS_FINITE, dtype: "bool" }), jl = {
|
|
4734
|
-
kernelName:
|
|
4733
|
+
kernelName: vo,
|
|
4735
4734
|
backendName: "webgpu",
|
|
4736
4735
|
kernelFunc: Yl
|
|
4737
4736
|
};
|
|
4738
4737
|
const Ql = N({ opType: y.IS_INF, dtype: "bool" }), Zl = {
|
|
4739
|
-
kernelName:
|
|
4738
|
+
kernelName: ko,
|
|
4740
4739
|
backendName: "webgpu",
|
|
4741
4740
|
kernelFunc: Ql
|
|
4742
4741
|
};
|
|
4743
4742
|
const Jl = N({ opType: y.IS_NAN, dtype: "bool" }), ec = {
|
|
4744
|
-
kernelName:
|
|
4743
|
+
kernelName: Io,
|
|
4745
4744
|
backendName: "webgpu",
|
|
4746
4745
|
kernelFunc: Jl
|
|
4747
4746
|
};
|
|
@@ -4750,12 +4749,12 @@ function tc(o) {
|
|
|
4750
4749
|
return e.runWebGPUProgram(n, [s], "float32", r);
|
|
4751
4750
|
}
|
|
4752
4751
|
const sc = {
|
|
4753
|
-
kernelName:
|
|
4752
|
+
kernelName: Ro,
|
|
4754
4753
|
backendName: "webgpu",
|
|
4755
4754
|
kernelFunc: tc
|
|
4756
4755
|
};
|
|
4757
4756
|
const oc = V({ opType: F.LESS, dtype: "bool", cpuKernelImpl: tn }), ic = {
|
|
4758
|
-
kernelName:
|
|
4757
|
+
kernelName: Po,
|
|
4759
4758
|
backendName: "webgpu",
|
|
4760
4759
|
kernelFunc: oc
|
|
4761
4760
|
};
|
|
@@ -4764,7 +4763,7 @@ const ac = V({
|
|
|
4764
4763
|
dtype: "bool",
|
|
4765
4764
|
cpuKernelImpl: en
|
|
4766
4765
|
}), rc = {
|
|
4767
|
-
kernelName:
|
|
4766
|
+
kernelName: $o,
|
|
4768
4767
|
backendName: "webgpu",
|
|
4769
4768
|
kernelFunc: ac
|
|
4770
4769
|
};
|
|
@@ -4787,32 +4786,32 @@ function uc(o) {
|
|
|
4787
4786
|
return t.runWebGPUProgram(n, [], "float32", u);
|
|
4788
4787
|
}
|
|
4789
4788
|
const dc = {
|
|
4790
|
-
kernelName:
|
|
4789
|
+
kernelName: Do,
|
|
4791
4790
|
backendName: "webgpu",
|
|
4792
4791
|
kernelFunc: uc
|
|
4793
4792
|
};
|
|
4794
4793
|
const lc = N({ opType: y.LOG, cpuKernelImpl: sn }), cc = {
|
|
4795
|
-
kernelName:
|
|
4794
|
+
kernelName: No,
|
|
4796
4795
|
backendName: "webgpu",
|
|
4797
4796
|
kernelFunc: lc
|
|
4798
4797
|
};
|
|
4799
4798
|
const hc = N({ opType: y.LOG1P }), pc = {
|
|
4800
|
-
kernelName:
|
|
4799
|
+
kernelName: zo,
|
|
4801
4800
|
backendName: "webgpu",
|
|
4802
4801
|
kernelFunc: hc
|
|
4803
4802
|
};
|
|
4804
4803
|
const fc = V({ opType: F.LOGICAL_AND, dtype: "bool" }), mc = {
|
|
4805
|
-
kernelName:
|
|
4804
|
+
kernelName: Ao,
|
|
4806
4805
|
backendName: "webgpu",
|
|
4807
4806
|
kernelFunc: fc
|
|
4808
4807
|
};
|
|
4809
4808
|
const gc = N({ opType: y.LOGICAL_NOT }), xc = {
|
|
4810
|
-
kernelName:
|
|
4809
|
+
kernelName: Fo,
|
|
4811
4810
|
backendName: "webgpu",
|
|
4812
4811
|
kernelFunc: gc
|
|
4813
4812
|
};
|
|
4814
4813
|
const Cc = V({ opType: F.LOGICAL_OR }), wc = {
|
|
4815
|
-
kernelName:
|
|
4814
|
+
kernelName: Wo,
|
|
4816
4815
|
backendName: "webgpu",
|
|
4817
4816
|
kernelFunc: Cc
|
|
4818
4817
|
};
|
|
@@ -4915,7 +4914,7 @@ function bc(o) {
|
|
|
4915
4914
|
return e.runWebGPUProgram(d, [s], s.dtype, h);
|
|
4916
4915
|
}
|
|
4917
4916
|
const vc = {
|
|
4918
|
-
kernelName:
|
|
4917
|
+
kernelName: Lo,
|
|
4919
4918
|
backendName: "webgpu",
|
|
4920
4919
|
kernelFunc: bc
|
|
4921
4920
|
};
|
|
@@ -4987,7 +4986,7 @@ function Ic(o) {
|
|
|
4987
4986
|
return e.runWebGPUProgram(l, [s, a, r], s.dtype, c);
|
|
4988
4987
|
}
|
|
4989
4988
|
const Rc = {
|
|
4990
|
-
kernelName:
|
|
4989
|
+
kernelName: Vo,
|
|
4991
4990
|
backendName: "webgpu",
|
|
4992
4991
|
kernelFunc: Ic
|
|
4993
4992
|
};
|
|
@@ -4995,16 +4994,16 @@ const Pc = V({
|
|
|
4995
4994
|
opType: F.MAX,
|
|
4996
4995
|
cpuKernelImpl: an
|
|
4997
4996
|
}), $c = {
|
|
4998
|
-
kernelName:
|
|
4997
|
+
kernelName: Bo,
|
|
4999
4998
|
backendName: "webgpu",
|
|
5000
4999
|
kernelFunc: Pc
|
|
5001
5000
|
};
|
|
5002
5001
|
function Dc(o) {
|
|
5003
|
-
const { inputs: t, backend: e, attrs: i } = o, { x: s } = t, { filterSize: a, strides: r, pad: n, dimRoundingMode: u } = i, h =
|
|
5002
|
+
const { inputs: t, backend: e, attrs: i } = o, { x: s } = t, { filterSize: a, strides: r, pad: n, dimRoundingMode: u } = i, h = ye(s.shape, a, r, 1, n, u);
|
|
5004
5003
|
return St(s, h, "max", e);
|
|
5005
5004
|
}
|
|
5006
5005
|
const Nc = {
|
|
5007
|
-
kernelName:
|
|
5006
|
+
kernelName: To,
|
|
5008
5007
|
backendName: "webgpu",
|
|
5009
5008
|
kernelFunc: Dc
|
|
5010
5009
|
};
|
|
@@ -5034,7 +5033,7 @@ function zc(o) {
|
|
|
5034
5033
|
return e.runWebGPUProgram(c, [s], s.dtype, p);
|
|
5035
5034
|
}
|
|
5036
5035
|
const Ac = {
|
|
5037
|
-
kernelName:
|
|
5036
|
+
kernelName: _o,
|
|
5038
5037
|
backendName: "webgpu",
|
|
5039
5038
|
kernelFunc: zc
|
|
5040
5039
|
};
|
|
@@ -5216,14 +5215,14 @@ function Lc(o) {
|
|
|
5216
5215
|
return e.disposeData(m.dataId), x;
|
|
5217
5216
|
}
|
|
5218
5217
|
const Vc = {
|
|
5219
|
-
kernelName:
|
|
5218
|
+
kernelName: Oo,
|
|
5220
5219
|
backendName: "webgpu",
|
|
5221
5220
|
kernelFunc: Lc
|
|
5222
5221
|
};
|
|
5223
5222
|
function Bc(o) {
|
|
5224
5223
|
const { inputs: t, backend: e, attrs: i } = o, { dy: s, input: a, output: r } = t, n = a;
|
|
5225
|
-
|
|
5226
|
-
const { filterSize: u, strides: d, pad: h, dimRoundingMode: l } = i, c =
|
|
5224
|
+
nt([a, r], "maxPoolGrad");
|
|
5225
|
+
const { filterSize: u, strides: d, pad: h, dimRoundingMode: l } = i, c = ye(n.shape, u, d, 1, h, l), p = new ge(c, "max", !0);
|
|
5227
5226
|
let f = [
|
|
5228
5227
|
{ type: "int32", data: [c.strideHeight, c.strideWidth] },
|
|
5229
5228
|
{ type: "int32", data: [c.padInfo.top, c.padInfo.left] },
|
|
@@ -5256,7 +5255,7 @@ function Bc(o) {
|
|
|
5256
5255
|
return e.disposeData(m.dataId), x;
|
|
5257
5256
|
}
|
|
5258
5257
|
const Tc = {
|
|
5259
|
-
kernelName:
|
|
5258
|
+
kernelName: Mo,
|
|
5260
5259
|
backendName: "webgpu",
|
|
5261
5260
|
kernelFunc: Bc
|
|
5262
5261
|
};
|
|
@@ -5264,8 +5263,8 @@ function _c(o) {
|
|
|
5264
5263
|
const { inputs: t, backend: e, attrs: i } = o, { filterSize: s, strides: a, pad: r, includeBatchInIndex: n } = i, { x: u } = t;
|
|
5265
5264
|
L(u.shape.length === 4, () => `Error in maxPool: input must be rank 4 but got rank ${u.shape.length}.`);
|
|
5266
5265
|
const d = [1, 1];
|
|
5267
|
-
L(
|
|
5268
|
-
const h =
|
|
5266
|
+
L(mt(a, d), () => `Error in maxPool: Either strides or dilations must be 1. Got strides ${a} and dilations '${d}'`);
|
|
5267
|
+
const h = ye(u.shape, s, a, d, r), l = [
|
|
5269
5268
|
{ type: "int32", data: [h.strideHeight, h.strideWidth] },
|
|
5270
5269
|
{ type: "int32", data: [h.padInfo.top, h.padInfo.left] },
|
|
5271
5270
|
{ type: "int32", data: [h.dilationHeight, h.dilationWidth] },
|
|
@@ -5282,7 +5281,7 @@ function _c(o) {
|
|
|
5282
5281
|
return [p, f];
|
|
5283
5282
|
}
|
|
5284
5283
|
const Oc = {
|
|
5285
|
-
kernelName:
|
|
5284
|
+
kernelName: Eo,
|
|
5286
5285
|
backendName: "webgpu",
|
|
5287
5286
|
kernelFunc: _c
|
|
5288
5287
|
};
|
|
@@ -5291,7 +5290,7 @@ function Mc(o) {
|
|
|
5291
5290
|
return ie(s, a, r, "min", e);
|
|
5292
5291
|
}
|
|
5293
5292
|
const Ec = {
|
|
5294
|
-
kernelName:
|
|
5293
|
+
kernelName: Uo,
|
|
5295
5294
|
backendName: "webgpu",
|
|
5296
5295
|
kernelFunc: Mc
|
|
5297
5296
|
};
|
|
@@ -5299,7 +5298,7 @@ const Uc = V({
|
|
|
5299
5298
|
opType: F.MIN,
|
|
5300
5299
|
cpuKernelImpl: rn
|
|
5301
5300
|
}), Hc = {
|
|
5302
|
-
kernelName:
|
|
5301
|
+
kernelName: Ho,
|
|
5303
5302
|
backendName: "webgpu",
|
|
5304
5303
|
kernelFunc: Uc
|
|
5305
5304
|
};
|
|
@@ -5335,7 +5334,7 @@ class Gc {
|
|
|
5335
5334
|
}
|
|
5336
5335
|
}
|
|
5337
5336
|
const Xc = {
|
|
5338
|
-
kernelName:
|
|
5337
|
+
kernelName: Go,
|
|
5339
5338
|
backendName: "webgpu",
|
|
5340
5339
|
kernelFunc: ({ inputs: o, attrs: t, backend: e }) => {
|
|
5341
5340
|
const { x: i } = o, { paddings: s, mode: a } = t, r = e, n = s.map((h) => ({ type: "int32", data: [h[0], h[1]] })), u = new Gc(i.shape, s, a);
|
|
@@ -5343,7 +5342,7 @@ const Xc = {
|
|
|
5343
5342
|
}
|
|
5344
5343
|
};
|
|
5345
5344
|
const Kc = V({ opType: F.MOD }), qc = {
|
|
5346
|
-
kernelName:
|
|
5345
|
+
kernelName: Xo,
|
|
5347
5346
|
backendName: "webgpu",
|
|
5348
5347
|
kernelFunc: Kc
|
|
5349
5348
|
};
|
|
@@ -5469,7 +5468,7 @@ function Wt(o) {
|
|
|
5469
5468
|
return e.disposeData(r.dataId), e.disposeData(u.dataId), d;
|
|
5470
5469
|
}
|
|
5471
5470
|
const Qc = {
|
|
5472
|
-
kernelName:
|
|
5471
|
+
kernelName: Ko,
|
|
5473
5472
|
backendName: "webgpu",
|
|
5474
5473
|
kernelFunc: Wt
|
|
5475
5474
|
};
|
|
@@ -5478,7 +5477,7 @@ function Zc(o) {
|
|
|
5478
5477
|
return n || e.disposeData(u.dataId), p;
|
|
5479
5478
|
}
|
|
5480
5479
|
const Jc = {
|
|
5481
|
-
kernelName:
|
|
5480
|
+
kernelName: qo,
|
|
5482
5481
|
backendName: "webgpu",
|
|
5483
5482
|
kernelFunc: Zc
|
|
5484
5483
|
};
|
|
@@ -5492,7 +5491,7 @@ function eh(o) {
|
|
|
5492
5491
|
return e.runWebGPUProgram(s, [i], i.dtype);
|
|
5493
5492
|
}
|
|
5494
5493
|
const th = {
|
|
5495
|
-
kernelName:
|
|
5494
|
+
kernelName: Yo,
|
|
5496
5495
|
backendName: "webgpu",
|
|
5497
5496
|
kernelFunc: eh
|
|
5498
5497
|
};
|
|
@@ -5502,7 +5501,7 @@ function sh(o) {
|
|
|
5502
5501
|
return e.makeTensorInfo([l.length], "int32", new Int32Array(l));
|
|
5503
5502
|
}
|
|
5504
5503
|
const oh = {
|
|
5505
|
-
kernelName:
|
|
5504
|
+
kernelName: jo,
|
|
5506
5505
|
backendName: "webgpu",
|
|
5507
5506
|
kernelFunc: sh
|
|
5508
5507
|
};
|
|
@@ -5515,7 +5514,7 @@ function ih(o) {
|
|
|
5515
5514
|
];
|
|
5516
5515
|
}
|
|
5517
5516
|
const ah = {
|
|
5518
|
-
kernelName:
|
|
5517
|
+
kernelName: Qo,
|
|
5519
5518
|
backendName: "webgpu",
|
|
5520
5519
|
kernelFunc: ih
|
|
5521
5520
|
};
|
|
@@ -5542,7 +5541,7 @@ function nh(o) {
|
|
|
5542
5541
|
return e.disposeData(p.dataId), m;
|
|
5543
5542
|
}
|
|
5544
5543
|
const uh = {
|
|
5545
|
-
kernelName:
|
|
5544
|
+
kernelName: Zo,
|
|
5546
5545
|
backendName: "webgpu",
|
|
5547
5546
|
kernelFunc: nh
|
|
5548
5547
|
};
|
|
@@ -5562,7 +5561,7 @@ function $e(o) {
|
|
|
5562
5561
|
});
|
|
5563
5562
|
}
|
|
5564
5563
|
const dh = {
|
|
5565
|
-
kernelName:
|
|
5564
|
+
kernelName: Jo,
|
|
5566
5565
|
backendName: "webgpu",
|
|
5567
5566
|
kernelFunc: $e
|
|
5568
5567
|
};
|
|
@@ -5577,7 +5576,7 @@ function Lt(o) {
|
|
|
5577
5576
|
return M({ attrs: { shape: i.shape, dtype: i.dtype, value: 1 }, backend: e });
|
|
5578
5577
|
}
|
|
5579
5578
|
const lh = {
|
|
5580
|
-
kernelName:
|
|
5579
|
+
kernelName: ei,
|
|
5581
5580
|
backendName: "webgpu",
|
|
5582
5581
|
kernelFunc: Lt
|
|
5583
5582
|
};
|
|
@@ -5587,7 +5586,7 @@ function ch(o) {
|
|
|
5587
5586
|
return Te({ inputs: { input: t[0] }, backend: e, attrs: { dim: s } });
|
|
5588
5587
|
const a = t[0].shape, r = t[0].dtype;
|
|
5589
5588
|
t.forEach((h) => {
|
|
5590
|
-
|
|
5589
|
+
si(a, h.shape, "All tensors passed to stack must have matching shapes"), L(r === h.dtype, () => "All tensors passed to stack must have matching dtypes");
|
|
5591
5590
|
});
|
|
5592
5591
|
const n = [], u = t.map((h) => {
|
|
5593
5592
|
const l = Te({ inputs: { input: h }, backend: e, attrs: { dim: s } });
|
|
@@ -5596,7 +5595,7 @@ function ch(o) {
|
|
|
5596
5595
|
return n.forEach((h) => e.disposeData(h.dataId)), d;
|
|
5597
5596
|
}
|
|
5598
5597
|
const hh = {
|
|
5599
|
-
kernelName:
|
|
5598
|
+
kernelName: ti,
|
|
5600
5599
|
backendName: "webgpu",
|
|
5601
5600
|
kernelFunc: ch
|
|
5602
5601
|
};
|
|
@@ -5652,14 +5651,14 @@ const fh = (o) => {
|
|
|
5652
5651
|
const u = new ph(s.shape, a);
|
|
5653
5652
|
return e.runWebGPUProgram(u, [s], s.dtype, n);
|
|
5654
5653
|
}, mh = {
|
|
5655
|
-
kernelName:
|
|
5654
|
+
kernelName: oi,
|
|
5656
5655
|
backendName: "webgpu",
|
|
5657
5656
|
kernelFunc: fh
|
|
5658
5657
|
};
|
|
5659
5658
|
const gh = V({
|
|
5660
5659
|
opType: F.POW
|
|
5661
5660
|
}), xh = {
|
|
5662
|
-
kernelName:
|
|
5661
|
+
kernelName: ii,
|
|
5663
5662
|
backendName: "webgpu",
|
|
5664
5663
|
kernelFunc: gh
|
|
5665
5664
|
};
|
|
@@ -5668,7 +5667,7 @@ function Ch(o) {
|
|
|
5668
5667
|
return e.runWebGPUProgram(a, [i, s], "float32");
|
|
5669
5668
|
}
|
|
5670
5669
|
const wh = {
|
|
5671
|
-
kernelName:
|
|
5670
|
+
kernelName: ai,
|
|
5672
5671
|
backendName: "webgpu",
|
|
5673
5672
|
kernelFunc: Ch
|
|
5674
5673
|
};
|
|
@@ -5677,7 +5676,7 @@ function yh(o) {
|
|
|
5677
5676
|
return ie(s, a, r, "prod", e);
|
|
5678
5677
|
}
|
|
5679
5678
|
const Sh = {
|
|
5680
|
-
kernelName:
|
|
5679
|
+
kernelName: ri,
|
|
5681
5680
|
backendName: "webgpu",
|
|
5682
5681
|
kernelFunc: yh
|
|
5683
5682
|
};
|
|
@@ -5685,27 +5684,27 @@ const bh = (o) => {
|
|
|
5685
5684
|
const { backend: t, attrs: e } = o, { start: i, stop: s, step: a, dtype: r } = e, n = cn(i, s, a, r);
|
|
5686
5685
|
return t.makeTensorInfo([n.length], r, n);
|
|
5687
5686
|
}, vh = {
|
|
5688
|
-
kernelName:
|
|
5687
|
+
kernelName: ni,
|
|
5689
5688
|
backendName: "webgpu",
|
|
5690
5689
|
kernelFunc: bh
|
|
5691
5690
|
};
|
|
5692
5691
|
const kh = V({ opType: F.DIV }), Ih = {
|
|
5693
|
-
kernelName:
|
|
5692
|
+
kernelName: ui,
|
|
5694
5693
|
backendName: "webgpu",
|
|
5695
5694
|
kernelFunc: kh
|
|
5696
5695
|
};
|
|
5697
5696
|
const Rh = N({ opType: y.RECIPROCAL }), Ph = {
|
|
5698
|
-
kernelName:
|
|
5697
|
+
kernelName: di,
|
|
5699
5698
|
backendName: "webgpu",
|
|
5700
5699
|
kernelFunc: Rh
|
|
5701
5700
|
};
|
|
5702
5701
|
const $h = N({ opType: y.RELU }), Dh = {
|
|
5703
|
-
kernelName:
|
|
5702
|
+
kernelName: li,
|
|
5704
5703
|
backendName: "webgpu",
|
|
5705
5704
|
kernelFunc: $h
|
|
5706
5705
|
};
|
|
5707
5706
|
const Nh = N({ opType: y.RELU6 }), zh = {
|
|
5708
|
-
kernelName:
|
|
5707
|
+
kernelName: ci,
|
|
5709
5708
|
backendName: "webgpu",
|
|
5710
5709
|
kernelFunc: Nh
|
|
5711
5710
|
};
|
|
@@ -5768,7 +5767,7 @@ function Fh(o) {
|
|
|
5768
5767
|
return e.runWebGPUProgram(f, [s], "float32", p);
|
|
5769
5768
|
}
|
|
5770
5769
|
const Wh = {
|
|
5771
|
-
kernelName:
|
|
5770
|
+
kernelName: hi,
|
|
5772
5771
|
backendName: "webgpu",
|
|
5773
5772
|
kernelFunc: Fh
|
|
5774
5773
|
};
|
|
@@ -5875,7 +5874,7 @@ function Vh(o) {
|
|
|
5875
5874
|
return e.runWebGPUProgram(w, [a], a.dtype, k);
|
|
5876
5875
|
}
|
|
5877
5876
|
const Bh = {
|
|
5878
|
-
kernelName:
|
|
5877
|
+
kernelName: pi,
|
|
5879
5878
|
backendName: "webgpu",
|
|
5880
5879
|
kernelFunc: Vh
|
|
5881
5880
|
};
|
|
@@ -5927,7 +5926,7 @@ function _h(o) {
|
|
|
5927
5926
|
return e.runWebGPUProgram(f, [s], s.dtype, p);
|
|
5928
5927
|
}
|
|
5929
5928
|
const Oh = {
|
|
5930
|
-
kernelName:
|
|
5929
|
+
kernelName: fi,
|
|
5931
5930
|
backendName: "webgpu",
|
|
5932
5931
|
kernelFunc: _h
|
|
5933
5932
|
};
|
|
@@ -6017,7 +6016,7 @@ function Eh(o) {
|
|
|
6017
6016
|
return e.runWebGPUProgram(w, [a], a.dtype, k);
|
|
6018
6017
|
}
|
|
6019
6018
|
const Uh = {
|
|
6020
|
-
kernelName:
|
|
6019
|
+
kernelName: mi,
|
|
6021
6020
|
backendName: "webgpu",
|
|
6022
6021
|
kernelFunc: Eh
|
|
6023
6022
|
};
|
|
@@ -6079,7 +6078,7 @@ function Gh(o) {
|
|
|
6079
6078
|
return e.disposeData(f.dataId), m;
|
|
6080
6079
|
}
|
|
6081
6080
|
const Xh = {
|
|
6082
|
-
kernelName:
|
|
6081
|
+
kernelName: gi,
|
|
6083
6082
|
backendName: "webgpu",
|
|
6084
6083
|
kernelFunc: Gh
|
|
6085
6084
|
};
|
|
@@ -6113,10 +6112,10 @@ class Kh {
|
|
|
6113
6112
|
}
|
|
6114
6113
|
}
|
|
6115
6114
|
const qh = {
|
|
6116
|
-
kernelName:
|
|
6115
|
+
kernelName: xi,
|
|
6117
6116
|
backendName: "webgpu",
|
|
6118
6117
|
kernelFunc: ({ inputs: o, attrs: t, backend: e }) => {
|
|
6119
|
-
const { image: i } = o, { radians: s, fillValue: a, center: r } = t, n = e, u = new Kh(i.shape, a), [d, h] =
|
|
6118
|
+
const { image: i } = o, { radians: s, fillValue: a, center: r } = t, n = e, u = new Kh(i.shape, a), [d, h] = ga(r, i.shape[1], i.shape[2]), l = [
|
|
6120
6119
|
{ type: "float32", data: [d] },
|
|
6121
6120
|
{ type: "float32", data: [h] },
|
|
6122
6121
|
{ type: "float32", data: [Math.sin(s)] },
|
|
@@ -6126,12 +6125,12 @@ const qh = {
|
|
|
6126
6125
|
}
|
|
6127
6126
|
};
|
|
6128
6127
|
const Yh = N({ opType: y.ROUND }), jh = {
|
|
6129
|
-
kernelName:
|
|
6128
|
+
kernelName: Ci,
|
|
6130
6129
|
backendName: "webgpu",
|
|
6131
6130
|
kernelFunc: Yh
|
|
6132
6131
|
};
|
|
6133
6132
|
const Qh = N({ opType: y.RSQRT, cpuKernelImpl: hn }), Zh = {
|
|
6134
|
-
kernelName:
|
|
6133
|
+
kernelName: wi,
|
|
6135
6134
|
backendName: "webgpu",
|
|
6136
6135
|
kernelFunc: Qh
|
|
6137
6136
|
};
|
|
@@ -6174,10 +6173,10 @@ class pe {
|
|
|
6174
6173
|
flattenedIndex = flattenedIndex + indexInside * ${i};
|
|
6175
6174
|
}
|
|
6176
6175
|
let updateValue =
|
|
6177
|
-
${
|
|
6176
|
+
${wa(this.type)}(${n});
|
|
6178
6177
|
let flatIndex = getOutputIndexFromCoords(${s});
|
|
6179
6178
|
|
|
6180
|
-
${this.sumDupeIndices ?
|
|
6179
|
+
${this.sumDupeIndices ? Q("&result[flatIndex]", "updateValue", this.type) : "atomicStore(&result[flatIndex], bitcast<i32>(updateValue));"}
|
|
6181
6180
|
}
|
|
6182
6181
|
}`;
|
|
6183
6182
|
}
|
|
@@ -6194,7 +6193,7 @@ function Jh(o) {
|
|
|
6194
6193
|
return e.disposeData(p.dataId), e.disposeData(f.dataId), e.disposeData(k.dataId), I;
|
|
6195
6194
|
}
|
|
6196
6195
|
const ep = {
|
|
6197
|
-
kernelName:
|
|
6196
|
+
kernelName: yi,
|
|
6198
6197
|
backendName: "webgpu",
|
|
6199
6198
|
kernelFunc: Jh
|
|
6200
6199
|
};
|
|
@@ -6233,7 +6232,7 @@ function sp(o) {
|
|
|
6233
6232
|
return e.runWebGPUProgram(n, [s, a], "int32", u);
|
|
6234
6233
|
}
|
|
6235
6234
|
const op = {
|
|
6236
|
-
kernelName:
|
|
6235
|
+
kernelName: Si,
|
|
6237
6236
|
backendName: "webgpu",
|
|
6238
6237
|
kernelFunc: sp
|
|
6239
6238
|
};
|
|
@@ -6273,37 +6272,37 @@ function ap(o) {
|
|
|
6273
6272
|
return e.runWebGPUProgram(r, [i, s, a], fe(s.dtype, a.dtype));
|
|
6274
6273
|
}
|
|
6275
6274
|
const rp = {
|
|
6276
|
-
kernelName:
|
|
6275
|
+
kernelName: bi,
|
|
6277
6276
|
backendName: "webgpu",
|
|
6278
6277
|
kernelFunc: ap
|
|
6279
6278
|
};
|
|
6280
6279
|
const np = N({ opType: y.SELU }), up = {
|
|
6281
|
-
kernelName:
|
|
6280
|
+
kernelName: vi,
|
|
6282
6281
|
backendName: "webgpu",
|
|
6283
6282
|
kernelFunc: np
|
|
6284
6283
|
};
|
|
6285
6284
|
const dp = N({ opType: y.SIGMOID }), lp = {
|
|
6286
|
-
kernelName:
|
|
6285
|
+
kernelName: ki,
|
|
6287
6286
|
backendName: "webgpu",
|
|
6288
6287
|
kernelFunc: dp
|
|
6289
6288
|
};
|
|
6290
6289
|
const cp = N({ opType: y.SIGN }), hp = {
|
|
6291
|
-
kernelName:
|
|
6290
|
+
kernelName: Ii,
|
|
6292
6291
|
backendName: "webgpu",
|
|
6293
6292
|
kernelFunc: cp
|
|
6294
6293
|
};
|
|
6295
6294
|
const pp = N({ opType: y.SIN }), fp = {
|
|
6296
|
-
kernelName:
|
|
6295
|
+
kernelName: Ri,
|
|
6297
6296
|
backendName: "webgpu",
|
|
6298
6297
|
kernelFunc: pp
|
|
6299
6298
|
};
|
|
6300
6299
|
const mp = N({ opType: y.SINH }), gp = {
|
|
6301
|
-
kernelName:
|
|
6300
|
+
kernelName: Pi,
|
|
6302
6301
|
backendName: "webgpu",
|
|
6303
6302
|
kernelFunc: mp
|
|
6304
6303
|
};
|
|
6305
6304
|
const xp = N({ opType: y.SOFTPLUS }), Cp = {
|
|
6306
|
-
kernelName:
|
|
6305
|
+
kernelName: $i,
|
|
6307
6306
|
backendName: "webgpu",
|
|
6308
6307
|
kernelFunc: xp
|
|
6309
6308
|
};
|
|
@@ -6320,7 +6319,7 @@ class wp {
|
|
|
6320
6319
|
getUserCode() {
|
|
6321
6320
|
const t = G(this.outputShape.length), e = xt(this.newDim);
|
|
6322
6321
|
return `
|
|
6323
|
-
${
|
|
6322
|
+
${ya(this.paddedXShape, "PaddedX")}
|
|
6324
6323
|
${S("index")} {
|
|
6325
6324
|
if(index < uniforms.size) {
|
|
6326
6325
|
let coords = getCoordsFromIndex(index);
|
|
@@ -6342,7 +6341,7 @@ const yp = (o) => {
|
|
|
6342
6341
|
const d = u.map(
|
|
6343
6342
|
(C, w) => C[0] + s.shape[w] + C[1]
|
|
6344
6343
|
/* afterPad */
|
|
6345
|
-
), h =
|
|
6344
|
+
), h = lt(d, a, n, !1), l = ct(h.length, a.length, !1), c = ht(d, a, n, !1), p = at(d), f = new wp(s.shape, d, u, h, l, p.length), m = [
|
|
6346
6345
|
{ type: "int32", data: h },
|
|
6347
6346
|
{ type: "int32", data: p }
|
|
6348
6347
|
];
|
|
@@ -6350,7 +6349,7 @@ const yp = (o) => {
|
|
|
6350
6349
|
const g = e.runWebGPUProgram(f, [s], s.dtype, m), x = R({ inputs: { x: g }, backend: e, attrs: { shape: c } });
|
|
6351
6350
|
return e.disposeData(g.dataId), x;
|
|
6352
6351
|
}, Sp = {
|
|
6353
|
-
kernelName:
|
|
6352
|
+
kernelName: Di,
|
|
6354
6353
|
backendName: "webgpu",
|
|
6355
6354
|
kernelFunc: yp
|
|
6356
6355
|
};
|
|
@@ -6369,7 +6368,7 @@ class bp {
|
|
|
6369
6368
|
|
|
6370
6369
|
let value = input[indexInInput * uniforms.segmentSize + indexInSegment];
|
|
6371
6370
|
let outIndex = segmentId * uniforms.segmentSize + indexInSegment;
|
|
6372
|
-
${
|
|
6371
|
+
${Q("&result[outIndex]", "value", this.type)}
|
|
6373
6372
|
}
|
|
6374
6373
|
}
|
|
6375
6374
|
`;
|
|
@@ -6384,7 +6383,7 @@ class vp {
|
|
|
6384
6383
|
${S("index")} {
|
|
6385
6384
|
if (index < uniforms.segmentIdsShape) {
|
|
6386
6385
|
let segmentId = segmentIds[index];
|
|
6387
|
-
${
|
|
6386
|
+
${Q("&result[segmentId]", "1", "int32")}
|
|
6388
6387
|
}
|
|
6389
6388
|
}
|
|
6390
6389
|
`;
|
|
@@ -6434,7 +6433,7 @@ function Ip(o) {
|
|
|
6434
6433
|
return Bt(i, s, a, !1, e);
|
|
6435
6434
|
}
|
|
6436
6435
|
const Rp = {
|
|
6437
|
-
kernelName:
|
|
6436
|
+
kernelName: Ni,
|
|
6438
6437
|
backendName: "webgpu",
|
|
6439
6438
|
kernelFunc: Ip
|
|
6440
6439
|
};
|
|
@@ -6443,7 +6442,7 @@ function Pp(o) {
|
|
|
6443
6442
|
return Bt(i, s, a, !0, e);
|
|
6444
6443
|
}
|
|
6445
6444
|
const $p = {
|
|
6446
|
-
kernelName:
|
|
6445
|
+
kernelName: zi,
|
|
6447
6446
|
backendName: "webgpu",
|
|
6448
6447
|
kernelFunc: Pp
|
|
6449
6448
|
};
|
|
@@ -6480,21 +6479,21 @@ function Np(o, t = "") {
|
|
|
6480
6479
|
function Ke(o) {
|
|
6481
6480
|
const { inputs: t, backend: e, attrs: i } = o, { x: s } = t, { reps: a } = i;
|
|
6482
6481
|
if (e.shouldExecuteOnCPU([s]) || s.dtype === "string" || s.shape.length >= 5) {
|
|
6483
|
-
const u = e.readSync(s.dataId), d = s.dtype === "string" ? u.map((c) =>
|
|
6482
|
+
const u = e.readSync(s.dataId), d = s.dtype === "string" ? u.map((c) => rt(c)) : u, h = ke(s.shape, s.dtype, d), l = wn(h, a);
|
|
6484
6483
|
return e.makeTensorInfo(l.shape, l.dtype, l.values);
|
|
6485
6484
|
}
|
|
6486
6485
|
const r = new Dp(s.shape, a);
|
|
6487
6486
|
return e.runWebGPUProgram(r, [s], s.dtype);
|
|
6488
6487
|
}
|
|
6489
6488
|
const zp = {
|
|
6490
|
-
kernelName:
|
|
6489
|
+
kernelName: Ai,
|
|
6491
6490
|
backendName: "webgpu",
|
|
6492
6491
|
kernelFunc: Ke
|
|
6493
6492
|
};
|
|
6494
6493
|
function Ap(o) {
|
|
6495
6494
|
const { inputs: t, backend: e, attrs: i } = o, { sparseIndices: s, sparseValues: a, defaultValue: r } = t, { outputShape: n } = i, { sliceRank: u, numUpdates: d, sliceSize: h, strides: l, outputSize: c } = He(a, s, n), p = !1;
|
|
6496
6495
|
if (a.dtype === "string") {
|
|
6497
|
-
const A = e.bufferSync(s), z = e.bufferSync(a), B =
|
|
6496
|
+
const A = e.bufferSync(s), z = e.bufferSync(a), B = rt(e.readSync(r.dataId)[0]), T = pn(A, z, n, c, h, d, u, l, B, p);
|
|
6498
6497
|
return e.makeTensorInfo(n, T.dtype, T.values);
|
|
6499
6498
|
}
|
|
6500
6499
|
const f = [c / h, h], m = R({
|
|
@@ -6505,7 +6504,7 @@ function Ap(o) {
|
|
|
6505
6504
|
inputs: { x: a },
|
|
6506
6505
|
backend: e,
|
|
6507
6506
|
attrs: { shape: [d, h] }
|
|
6508
|
-
}) : U({ inputs: { x: a }, backend: e }), x = g.dtype, C = e.makeTensorInfo([], x,
|
|
6507
|
+
}) : U({ inputs: { x: a }, backend: e }), x = g.dtype, C = e.makeTensorInfo([], x, Wi(1, x)), w = R({
|
|
6509
6508
|
inputs: { x: r },
|
|
6510
6509
|
backend: e,
|
|
6511
6510
|
attrs: { shape: Array(f.length).fill(1) }
|
|
@@ -6537,12 +6536,12 @@ function Ap(o) {
|
|
|
6537
6536
|
return e.disposeData(m.dataId), e.disposeData(g.dataId), e.disposeData(w.dataId), e.disposeData(C.dataId), e.disposeData(k.dataId), $;
|
|
6538
6537
|
}
|
|
6539
6538
|
const Fp = {
|
|
6540
|
-
kernelName:
|
|
6539
|
+
kernelName: Fi,
|
|
6541
6540
|
backendName: "webgpu",
|
|
6542
6541
|
kernelFunc: Ap
|
|
6543
6542
|
};
|
|
6544
6543
|
function Wp(o) {
|
|
6545
|
-
const { inputs: t, backend: e, attrs: i } = o, { x: s } = t, { numOrSizeSplits: a, axis: r } = i, n = te(r, s.shape)[0], u =
|
|
6544
|
+
const { inputs: t, backend: e, attrs: i } = o, { x: s } = t, { numOrSizeSplits: a, axis: r } = i, n = te(r, s.shape)[0], u = xa(s, a, n), d = s.shape.length, h = new Array(d).fill(0), l = s.shape.slice();
|
|
6546
6545
|
return u.map((c) => {
|
|
6547
6546
|
const p = [...l];
|
|
6548
6547
|
p[n] = c;
|
|
@@ -6551,17 +6550,17 @@ function Wp(o) {
|
|
|
6551
6550
|
});
|
|
6552
6551
|
}
|
|
6553
6552
|
const Lp = {
|
|
6554
|
-
kernelName:
|
|
6553
|
+
kernelName: Li,
|
|
6555
6554
|
backendName: "webgpu",
|
|
6556
6555
|
kernelFunc: Wp
|
|
6557
6556
|
};
|
|
6558
6557
|
const Vp = N({ opType: y.SQRT }), Bp = {
|
|
6559
|
-
kernelName:
|
|
6558
|
+
kernelName: Vi,
|
|
6560
6559
|
backendName: "webgpu",
|
|
6561
6560
|
kernelFunc: Vp
|
|
6562
6561
|
};
|
|
6563
6562
|
const Tp = {
|
|
6564
|
-
kernelName:
|
|
6563
|
+
kernelName: Bi,
|
|
6565
6564
|
backendName: "webgpu",
|
|
6566
6565
|
kernelFunc: ({ inputs: o, backend: t }) => {
|
|
6567
6566
|
const { x: e } = o, i = t, s = new ue(e.shape, y.SQUARE);
|
|
@@ -6571,7 +6570,7 @@ const Tp = {
|
|
|
6571
6570
|
const _p = V({
|
|
6572
6571
|
opType: F.SQUARED_DIFFERENCE
|
|
6573
6572
|
}), Op = {
|
|
6574
|
-
kernelName:
|
|
6573
|
+
kernelName: Ti,
|
|
6575
6574
|
backendName: "webgpu",
|
|
6576
6575
|
kernelFunc: _p
|
|
6577
6576
|
};
|
|
@@ -6580,7 +6579,7 @@ function Mp({ inputs: o, attrs: t, backend: e }) {
|
|
|
6580
6579
|
return e.runWebGPUProgram(s, [i], i.dtype, a);
|
|
6581
6580
|
}
|
|
6582
6581
|
const Ep = {
|
|
6583
|
-
kernelName:
|
|
6582
|
+
kernelName: _i,
|
|
6584
6583
|
backendName: "webgpu",
|
|
6585
6584
|
kernelFunc: Mp
|
|
6586
6585
|
};
|
|
@@ -6628,7 +6627,7 @@ function Hp(o) {
|
|
|
6628
6627
|
return I;
|
|
6629
6628
|
}
|
|
6630
6629
|
const Gp = {
|
|
6631
|
-
kernelName:
|
|
6630
|
+
kernelName: Oi,
|
|
6632
6631
|
backendName: "webgpu",
|
|
6633
6632
|
kernelFunc: Hp
|
|
6634
6633
|
};
|
|
@@ -6640,22 +6639,22 @@ function Xp(o) {
|
|
|
6640
6639
|
];
|
|
6641
6640
|
}
|
|
6642
6641
|
const Kp = {
|
|
6643
|
-
kernelName:
|
|
6642
|
+
kernelName: Mi,
|
|
6644
6643
|
backendName: "webgpu",
|
|
6645
6644
|
kernelFunc: Xp
|
|
6646
6645
|
};
|
|
6647
6646
|
const qp = V({ opType: F.SUB, cpuKernelImpl: Cn, supportsComplex: !0 }), Yp = {
|
|
6648
|
-
kernelName:
|
|
6647
|
+
kernelName: Ei,
|
|
6649
6648
|
backendName: "webgpu",
|
|
6650
6649
|
kernelFunc: qp
|
|
6651
6650
|
};
|
|
6652
6651
|
const jp = N({ opType: y.TAN }), Qp = {
|
|
6653
|
-
kernelName:
|
|
6652
|
+
kernelName: Ui,
|
|
6654
6653
|
backendName: "webgpu",
|
|
6655
6654
|
kernelFunc: jp
|
|
6656
6655
|
};
|
|
6657
6656
|
const Zp = N({ opType: y.TANH }), Jp = {
|
|
6658
|
-
kernelName:
|
|
6657
|
+
kernelName: Hi,
|
|
6659
6658
|
backendName: "webgpu",
|
|
6660
6659
|
kernelFunc: Zp
|
|
6661
6660
|
};
|
|
@@ -6683,7 +6682,7 @@ function ef(o) {
|
|
|
6683
6682
|
return p.forEach(($) => e.disposeData($.dataId)), P;
|
|
6684
6683
|
}
|
|
6685
6684
|
const tf = {
|
|
6686
|
-
kernelName:
|
|
6685
|
+
kernelName: Gi,
|
|
6687
6686
|
backendName: "webgpu",
|
|
6688
6687
|
kernelFunc: ef
|
|
6689
6688
|
};
|
|
@@ -6896,7 +6895,7 @@ function af(o) {
|
|
|
6896
6895
|
return C = R({ inputs: { x: C }, attrs: { shape: w }, backend: e }), ne(e, k), [C, f];
|
|
6897
6896
|
}
|
|
6898
6897
|
const rf = {
|
|
6899
|
-
kernelName:
|
|
6898
|
+
kernelName: Xi,
|
|
6900
6899
|
backendName: "webgpu",
|
|
6901
6900
|
kernelFunc: af
|
|
6902
6901
|
};
|
|
@@ -7057,7 +7056,7 @@ function uf(o) {
|
|
|
7057
7056
|
return e.runWebGPUProgram(x, [s, a], "float32", k);
|
|
7058
7057
|
}
|
|
7059
7058
|
const df = {
|
|
7060
|
-
kernelName:
|
|
7059
|
+
kernelName: Ki,
|
|
7061
7060
|
backendName: "webgpu",
|
|
7062
7061
|
kernelFunc: uf
|
|
7063
7062
|
};
|
|
@@ -7080,7 +7079,7 @@ function lf(o) {
|
|
|
7080
7079
|
return l.forEach((m) => e.disposeData(m.dataId)), f;
|
|
7081
7080
|
}
|
|
7082
7081
|
const cf = {
|
|
7083
|
-
kernelName:
|
|
7082
|
+
kernelName: qi,
|
|
7084
7083
|
backendName: "webgpu",
|
|
7085
7084
|
kernelFunc: lf
|
|
7086
7085
|
};
|
|
@@ -7104,7 +7103,7 @@ class hf {
|
|
|
7104
7103
|
let flatIndex = b * uniforms.numSegments + segmentId % uniforms.numSegments;
|
|
7105
7104
|
let value = getX(b, inCol);
|
|
7106
7105
|
|
|
7107
|
-
${
|
|
7106
|
+
${Q("&result[flatIndex]", "value", this.type)}
|
|
7108
7107
|
}
|
|
7109
7108
|
}
|
|
7110
7109
|
}
|
|
@@ -7114,10 +7113,10 @@ class hf {
|
|
|
7114
7113
|
function pf(o) {
|
|
7115
7114
|
const { inputs: t, backend: e, attrs: i } = o, { x: s, segmentIds: a } = t, { numSegments: r } = i, n = s.shape.length, u = [];
|
|
7116
7115
|
let d = 0;
|
|
7117
|
-
const h =
|
|
7116
|
+
const h = Ce([d], n);
|
|
7118
7117
|
let l = s;
|
|
7119
|
-
h != null && (l = K({ inputs: { x: s }, backend: e, attrs: { perm: h } }), u.push(l), d =
|
|
7120
|
-
const c =
|
|
7118
|
+
h != null && (l = K({ inputs: { x: s }, backend: e, attrs: { perm: h } }), u.push(l), d = we(1, n)[0]);
|
|
7119
|
+
const c = Ca(l.shape, d, r), p = D([l.shape[d]]), f = R({ inputs: { x: l }, backend: e, attrs: { shape: [-1, p] } });
|
|
7121
7120
|
u.push(f);
|
|
7122
7121
|
const m = s.dtype, g = [f.shape[0], r], x = M({ backend: e, attrs: { shape: g, value: 0, dtype: m } }), C = new hf(f.shape, g, m), w = [
|
|
7123
7122
|
{ type: "int32", data: [r] },
|
|
@@ -7127,13 +7126,13 @@ function pf(o) {
|
|
|
7127
7126
|
let P = I;
|
|
7128
7127
|
if (h != null) {
|
|
7129
7128
|
u.push(I);
|
|
7130
|
-
const $ =
|
|
7129
|
+
const $ = ft(h);
|
|
7131
7130
|
P = K({ inputs: { x: P }, backend: e, attrs: { perm: $ } });
|
|
7132
7131
|
}
|
|
7133
7132
|
return u.forEach(($) => e.disposeData($.dataId)), P;
|
|
7134
7133
|
}
|
|
7135
7134
|
const ff = {
|
|
7136
|
-
kernelName:
|
|
7135
|
+
kernelName: Yi,
|
|
7137
7136
|
backendName: "webgpu",
|
|
7138
7137
|
kernelFunc: pf
|
|
7139
7138
|
};
|
|
@@ -7302,7 +7301,7 @@ const mf = [
|
|
|
7302
7301
|
dh
|
|
7303
7302
|
];
|
|
7304
7303
|
for (const o of mf)
|
|
7305
|
-
|
|
7304
|
+
ji(o);
|
|
7306
7305
|
export {
|
|
7307
7306
|
Mt as WebGPUBackend
|
|
7308
7307
|
};
|