@genai-fi/nanogpt 0.10.3 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Generator.d.ts +10 -5
- package/dist/Generator.js +1789 -1765
- package/dist/{RealDiv-KAPDe8zB.js → RealDiv-C8neBwFi.js} +15 -15
- package/dist/{Reshape-BYkmUnAv.js → Reshape-Bd4V_4X7.js} +1 -1
- package/dist/{Reshape-Zt6eb7yh.js → Reshape-Ck29jQSY.js} +5 -5
- package/dist/TeachableLLM.d.ts +5 -3
- package/dist/TeachableLLM.js +14 -14
- package/dist/Trainer.d.ts +3 -1
- package/dist/Trainer.js +11 -8
- package/dist/{axis_util-BaG7mf5A.js → axis_util-DGqbT-FX.js} +3 -3
- package/dist/backend.js +2 -2
- package/dist/{backend_util-RCe-rHaj.js → backend_util-DC3rBo_H.js} +18 -18
- package/dist/{backend_webgpu-DE3ACOLx.js → backend_webgpu-mbhNnlx9.js} +3 -3
- package/dist/{broadcast_to-B3eYlZm7.js → broadcast_to-D1Dmg2Oz.js} +2 -2
- package/dist/checks/appendCache.js +2 -2
- package/dist/checks/attentionMask.js +3 -3
- package/dist/checks/gelu.js +2 -2
- package/dist/checks/matMulGelu.js +2 -2
- package/dist/checks/normRMS.js +4 -4
- package/dist/checks/normRMSGrad.js +3 -3
- package/dist/checks/packUnpack.js +2 -2
- package/dist/checks/qkv.js +4 -4
- package/dist/checks/rope.js +2 -2
- package/dist/{clip_by_value-BnO7-a88.js → clip_by_value-fg2aKzUy.js} +5 -5
- package/dist/complex-Cyg-eQeZ.js +11 -0
- package/dist/concat-CSm2rMwe.js +17 -0
- package/dist/{concat_util-DpW8mL_l.js → concat_util-D0je5Ppu.js} +1 -1
- package/dist/{dataset-BcwmTGYc.js → dataset-CVIJu7Xa.js} +7 -7
- package/dist/{dropout-BcvN9JYi.js → dropout-DLhSMNTZ.js} +9 -9
- package/dist/expand_dims-ChkuOp6I.js +11 -0
- package/dist/{exports_initializers-Hta_rEnm.js → exports_initializers-1KWPiStI.js} +1 -1
- package/dist/{floor-D5QdR_le.js → floor-BRMPgeIs.js} +1 -1
- package/dist/{gather-D3JcZUaI.js → gather-BSULDalH.js} +1 -1
- package/dist/{gelu-CjNPL4OH.js → gelu-BK1k-n1i.js} +1 -1
- package/dist/{gpgpu_math-DAOmgtXR.js → gpgpu_math-BJSTk_mW.js} +25 -25
- package/dist/{index-BwexR4lA.js → index-BBVLAXZD.js} +89 -89
- package/dist/{index-DOvlwCh-.js → index-Duu1Lvvv.js} +53 -53
- package/dist/{kernel_funcs_utils-CCzYdUZg.js → kernel_funcs_utils-BtYrPoJu.js} +6 -6
- package/dist/layers/BaseLayer.js +2 -2
- package/dist/layers/CausalSelfAttention.js +6 -6
- package/dist/layers/MLP.js +4 -4
- package/dist/layers/PositionEmbedding.js +5 -5
- package/dist/layers/RMSNorm.js +3 -3
- package/dist/layers/RoPECache.js +4 -4
- package/dist/layers/TiedEmbedding.js +6 -6
- package/dist/layers/TransformerBlock.js +1 -1
- package/dist/loader/loadTransformers.js +1 -1
- package/dist/loader/oldZipLoad.js +9 -9
- package/dist/log_sum_exp-CVqLsVLl.js +39 -0
- package/dist/main.d.ts +10 -1
- package/dist/main.js +68 -58
- package/dist/{matMul16-BWRSOCWB.js → matMul16-xswmhSuF.js} +3 -3
- package/dist/{matMulGelu-CzfgT6Wq.js → matMulGelu-BpvgnYG8.js} +14 -14
- package/dist/mat_mul-Bn2BDpT4.js +11 -0
- package/dist/{mod-AnXEvvpo.js → mod-B4AUd1Np.js} +1 -1
- package/dist/models/NanoGPTV1.js +2 -2
- package/dist/models/model.js +9 -9
- package/dist/{ones-D2rT0xk2.js → ones-CBI1AQjb.js} +3 -3
- package/dist/ops/adamAdjust.js +1 -1
- package/dist/ops/adamMoments.js +1 -1
- package/dist/ops/add16.js +1 -1
- package/dist/ops/appendCache.js +3 -3
- package/dist/ops/attentionMask.js +1 -1
- package/dist/ops/concat16.js +2 -2
- package/dist/ops/cpu/adamAdjust.js +9 -9
- package/dist/ops/cpu/adamMoments.js +5 -5
- package/dist/ops/cpu/appendCache.js +6 -6
- package/dist/ops/cpu/attentionMask.js +10 -10
- package/dist/ops/cpu/fusedSoftmax.js +5 -5
- package/dist/ops/cpu/gatherSub.js +9 -9
- package/dist/ops/cpu/gelu.js +5 -5
- package/dist/ops/cpu/matMul16.js +2 -2
- package/dist/ops/cpu/matMulGelu.js +3 -3
- package/dist/ops/cpu/matMulMul.js +5 -5
- package/dist/ops/cpu/mulDropout.js +1 -1
- package/dist/ops/cpu/normRMS.js +7 -7
- package/dist/ops/cpu/qkv.js +3 -3
- package/dist/ops/cpu/rope.js +5 -5
- package/dist/ops/cpu/scatterSub.js +11 -11
- package/dist/ops/dot16.js +2 -2
- package/dist/ops/gatherSub.js +1 -1
- package/dist/ops/gelu.js +2 -2
- package/dist/ops/grads/add16.js +4 -4
- package/dist/ops/grads/attentionMask.js +2 -2
- package/dist/ops/grads/gelu.js +2 -2
- package/dist/ops/grads/matMul16.js +3 -3
- package/dist/ops/grads/matMulGelu.js +6 -6
- package/dist/ops/grads/normRMS.js +4 -4
- package/dist/ops/grads/pack16.js +3 -3
- package/dist/ops/grads/qkv.js +10 -10
- package/dist/ops/grads/rope.js +2 -2
- package/dist/ops/grads/softmax16.js +1 -1
- package/dist/ops/grads/unpack16.js +2 -2
- package/dist/ops/matMul16.js +3 -3
- package/dist/ops/matMulGelu.js +2 -2
- package/dist/ops/matMulMul.js +1 -1
- package/dist/ops/mul16.js +1 -1
- package/dist/ops/mulDrop.js +1 -1
- package/dist/ops/normRMS.js +1 -1
- package/dist/ops/pack16.js +2 -2
- package/dist/ops/qkv.js +1 -1
- package/dist/ops/reshape16.js +2 -2
- package/dist/ops/rope.js +2 -2
- package/dist/ops/scatterSub.js +1 -1
- package/dist/ops/slice16.js +2 -2
- package/dist/ops/softmax16.js +1 -1
- package/dist/ops/sub16.js +1 -1
- package/dist/ops/sum16.js +2 -2
- package/dist/ops/transpose16.js +6 -6
- package/dist/ops/unpack16.js +2 -2
- package/dist/ops/webgl/adamAdjust.js +2 -2
- package/dist/ops/webgl/adamMoments.js +1 -1
- package/dist/ops/webgl/appendCache.js +1 -1
- package/dist/ops/webgl/attentionMask.js +1 -1
- package/dist/ops/webgl/fusedSoftmax.js +4 -4
- package/dist/ops/webgl/gatherSub.js +1 -1
- package/dist/ops/webgl/gelu.js +2 -2
- package/dist/ops/webgl/log.js +3 -3
- package/dist/ops/webgl/matMul16.js +8 -8
- package/dist/ops/webgl/matMulGelu.js +4 -4
- package/dist/ops/webgl/matMulMul.js +7 -7
- package/dist/ops/webgl/mulDropout.js +1 -1
- package/dist/ops/webgl/normRMS.js +7 -7
- package/dist/ops/webgl/qkv.js +1 -1
- package/dist/ops/webgl/rope.js +1 -1
- package/dist/ops/webgl/scatterSub.js +1 -1
- package/dist/ops/webgpu/adamAdjust.js +3 -3
- package/dist/ops/webgpu/adamMoments.js +5 -5
- package/dist/ops/webgpu/add16.js +1 -1
- package/dist/ops/webgpu/appendCache.js +3 -3
- package/dist/ops/webgpu/attentionMask.js +2 -2
- package/dist/ops/webgpu/attentionMask32_program.js +2 -2
- package/dist/ops/webgpu/concat16.js +5 -5
- package/dist/ops/webgpu/gatherSub.js +5 -5
- package/dist/ops/webgpu/gelu.js +3 -3
- package/dist/ops/webgpu/matMul16.js +19 -19
- package/dist/ops/webgpu/matMul16_program.js +2 -2
- package/dist/ops/webgpu/mul16.js +4 -4
- package/dist/ops/webgpu/normRMS.js +6 -6
- package/dist/ops/webgpu/normRMSGrad.js +4 -4
- package/dist/ops/webgpu/pack16.js +3 -3
- package/dist/ops/webgpu/pack16_program.js +2 -2
- package/dist/ops/webgpu/qkv.js +8 -8
- package/dist/ops/webgpu/rope.js +3 -3
- package/dist/ops/webgpu/scatterSub.js +3 -3
- package/dist/ops/webgpu/slice16.js +4 -4
- package/dist/ops/webgpu/softmax16.js +4 -4
- package/dist/ops/webgpu/softmax16_program.js +2 -2
- package/dist/ops/webgpu/softmax16_subgroup_program.js +2 -2
- package/dist/ops/webgpu/softmax16grad.js +1 -1
- package/dist/ops/webgpu/sub16.js +4 -4
- package/dist/ops/webgpu/sum16.js +5 -5
- package/dist/ops/webgpu/transpose16.js +2 -2
- package/dist/ops/webgpu/transpose16_program.js +2 -2
- package/dist/ops/webgpu/transpose16_shared_program.js +3 -3
- package/dist/ops/webgpu/unpack16.js +5 -5
- package/dist/ops/webgpu/utils/binary_op.js +3 -3
- package/dist/ops/webgpu/utils/reductions.js +4 -4
- package/dist/{ops-B5yanEdW.js → ops-C2_OXuZ4.js} +69 -69
- package/dist/{pack16-nQ6JaLo-.js → pack16-atD0eYRm.js} +9 -9
- package/dist/patches/webgpu_backend.js +6 -6
- package/dist/patches/webgpu_base.js +1 -1
- package/dist/patches/webgpu_program.js +8 -8
- package/dist/{random_width-or-CEftb.js → random_width-BN4wGJaW.js} +33 -33
- package/dist/range-DKmP1-OQ.js +10 -0
- package/dist/relu-BsXmGzzu.js +9 -0
- package/dist/{reshape-ByE68wS9.js → reshape-BI0yzp1T.js} +1 -1
- package/dist/{resize_nearest_neighbor-B19mCEg2.js → resize_nearest_neighbor-BA_BX-ub.js} +26 -26
- package/dist/{rope-Ir4mTyD1.js → rope-DJ7Y7c-u.js} +1 -1
- package/dist/{scatter_nd_util-lvSiX8q4.js → scatter_nd_util-k9MUVUkn.js} +1 -1
- package/dist/{selu_util-kbhpTdYD.js → selu_util-DyW0X1WG.js} +5 -5
- package/dist/{shared-DT1TkE6w.js → shared-Q3BS6T03.js} +1 -1
- package/dist/{shared-dntlHIDQ.js → shared-nnSWpC3u.js} +86 -86
- package/dist/{slice-BfEGSH82.js → slice-wBNvzVyz.js} +1 -1
- package/dist/{slice_util-uTKwiEpW.js → slice_util-zN8KFC5I.js} +1 -1
- package/dist/{softmax-CA5jFsLR.js → softmax-DfuYyjMh.js} +1 -1
- package/dist/split-BYrLboMq.js +9 -0
- package/dist/squeeze-Bk8Brcct.js +10 -0
- package/dist/{stack-Cf4n9h0N.js → stack-CDWShFHF.js} +1 -1
- package/dist/{step-CINUs5QB.js → step-BS5JXRR6.js} +23 -23
- package/dist/{sum-DWAtNGez.js → sum-BPUfDB2X.js} +3 -3
- package/dist/tensor-CEt9Nm2s.js +8 -0
- package/dist/tensor1d-Cc_KCIDg.js +11 -0
- package/dist/{tensor2d-Bs9wZRc7.js → tensor2d-BN97fF71.js} +3 -3
- package/dist/{tensor4d-BARPdTaS.js → tensor4d-vuDDgdUI.js} +1 -1
- package/dist/{tfjs_backend-y1cvNhLA.js → tfjs_backend-806hyYve.js} +49 -49
- package/dist/{tile-mbfagpsB.js → tile-OWUvpIVt.js} +3 -3
- package/dist/tokeniser/BaseTokeniser.d.ts +25 -0
- package/dist/tokeniser/BaseTokeniser.js +94 -0
- package/dist/tokeniser/CharTokeniser.d.ts +10 -9
- package/dist/tokeniser/CharTokeniser.js +44 -30
- package/dist/tokeniser/bpe.d.ts +10 -9
- package/dist/tokeniser/bpe.js +67 -52
- package/dist/tokeniser/type.d.ts +14 -5
- package/dist/training/Adam.js +2 -2
- package/dist/training/AdamExt.js +1 -1
- package/dist/training/DatasetBuilder.d.ts +3 -3
- package/dist/training/DatasetBuilder.js +34 -38
- package/dist/training/FullTrainer.js +1 -1
- package/dist/training/Trainer.d.ts +4 -3
- package/dist/training/Trainer.js +22 -25
- package/dist/training/sparseCrossEntropy.js +3 -3
- package/dist/training/tasks/ConversationTask.d.ts +11 -0
- package/dist/training/tasks/ConversationTask.js +26 -0
- package/dist/training/tasks/PretrainingTask.d.ts +11 -0
- package/dist/training/tasks/PretrainingTask.js +34 -0
- package/dist/training/tasks/StartSentenceTask.d.ts +12 -0
- package/dist/training/tasks/StartSentenceTask.js +42 -0
- package/dist/training/tasks/Task.d.ts +8 -0
- package/dist/training/tasks/Task.js +41 -0
- package/dist/{transpose-ClWiBS_b.js → transpose-BUkQCJp9.js} +6 -6
- package/dist/{unsorted_segment_sum-BDDhB_E6.js → unsorted_segment_sum-BljxHhCY.js} +5 -5
- package/dist/utilities/dummy.js +3 -3
- package/dist/utilities/multinomialCPU.js +2 -2
- package/dist/utilities/packed.js +1 -1
- package/dist/utilities/performance.js +1 -1
- package/dist/utilities/profile.js +1 -1
- package/dist/utilities/safetensors.js +2 -2
- package/dist/utilities/sentences.d.ts +1 -1
- package/dist/utilities/sentences.js +11 -11
- package/dist/utilities/weights.js +2 -2
- package/dist/{variable-WawDEaAb.js → variable-DPt_Iuog.js} +1 -1
- package/dist/{webgpu_program-DuOXPQol.js → webgpu_program-BpWRlghH.js} +3 -3
- package/dist/{webgpu_util-RxEF33Rj.js → webgpu_util-DMiKzzQM.js} +7 -7
- package/dist/{zeros-KnWaWf-X.js → zeros-5YROwwUH.js} +2 -2
- package/dist/{zeros_like-DvE73F4e.js → zeros_like-De4n1C3m.js} +71 -71
- package/package.json +1 -1
- package/dist/complex-DjxcVmoX.js +0 -11
- package/dist/concat-BV8bt5H-.js +0 -17
- package/dist/expand_dims-DT4tEPwA.js +0 -11
- package/dist/log_sum_exp-ngO0-4pK.js +0 -39
- package/dist/mat_mul-SjpJRLyL.js +0 -11
- package/dist/range-BklejeeW.js +0 -10
- package/dist/relu-CP0ZcxWO.js +0 -9
- package/dist/split-CVLc0w--.js +0 -9
- package/dist/squeeze-C7Z2srUo.js +0 -10
- package/dist/tensor-DJoc7gJU.js +0 -8
- package/dist/tensor1d-D11P_7Dp.js +0 -11
|
@@ -1,19 +1,19 @@
|
|
|
1
|
-
import { W as Mt } from "./backend_webgpu-
|
|
2
|
-
import {
|
|
3
|
-
import { i as Qi, a as Zi, c as b, f as v, M as Y, b as nt, d as ut, e as dt } from "./webgpu_util-
|
|
4
|
-
import { g as
|
|
5
|
-
import { S as Ji, a as ea } from "./selu_util-
|
|
6
|
-
import { E as ta, t as sa, u as oa, w as ia, x as aa, y as ra, f as je, z as lt, A as ct, B as ht, C as na, D as ua, F as da, G as la, H as ca, I as ha, J as pa, K as fa, L as ma, M as ga, N as xa, O as Ca } from "./backend_util-
|
|
7
|
-
import { t as W, e as S, h as Q, b as G, c as Ie, P as pt, d as wa, a as ya } from "./webgpu_program-
|
|
8
|
-
import { r as R, a as Sa } from "./Reshape-
|
|
9
|
-
import { s as ba } from "./shared-
|
|
10
|
-
import { c as Oe, a as Ce, b as we, d as Me, e as va, g as ft } from "./axis_util-
|
|
11
|
-
import { h as ye, i as Ne, j as Se, b as Z, d as Ee, g as Ue, k as mt } from "./step-
|
|
12
|
-
import { p as ka, a as Ia, b as Ra, d as Pa } from "./slice_util-
|
|
13
|
-
import { z as $a } from "./zeros-
|
|
14
|
-
import { c as me, a as Da } from "./concat_util-
|
|
1
|
+
import { W as Mt } from "./backend_webgpu-mbhNnlx9.js";
|
|
2
|
+
import { ad as Et, aa as X, x as L, de as Ut, df as Ht, bZ as Gt, U as D, _ as j, aX as Xt, ag as Ye, aQ as Kt, a7 as qt, ak as fe, bR as Yt, c9 as jt, ca as Qt, bX as Zt, cQ as Jt, as as es, ab as De, af as te, aS as ts, bo as ss, bp as os, bq as is, cb as as, cc as rs, cd as ns, ce as us, cf as ds, cg as ls, am as cs, b7 as hs, br as ps, cA as fs, cR as ms, cS as gs, D as xs, S as Cs, bt as ws, bf as ys, dg as Ss, b9 as bs, ar as vs, bU as ks, bV as Is, ae as Rs, b_ as Ps, C as $s, cU as Ds, ap as Ns, z as zs, bv as As, cF as Fs, bw as Ws, cB as Ls, cV as Vs, cC as Bs, bx as _s, by as Ts, bh as Os, bz as Ms, bA as Es, cD as Us, ch as Hs, bB as Gs, cH as Xs, cI as Ks, dh as qs, ci as Ys, cW as js, cX as Qs, di as Zs, c2 as Js, N as eo, bg as to, aI as so, cY as oo, bC as io, bD as ao, an as ro, A as no, b$ as uo, cr as lo, bi as co, F as ho, c0 as po, dj as fo, a8 as at, bu as mo, cG as go, dk as xo, aj as Co, G as wo, at as ke, b1 as yo, b2 as So, cs as bo, cj as vo, ck as ko, cl as Io, aJ as Ro, b3 as Po, b4 as $o, dl as Do, ao as No, b5 as zo, b6 as Ao, bF as Fo, cn as Wo, cm as Lo, c_ as Vo, c1 as Bo, bG as _o, cE as To, c$ as Oo, d0 as Mo, dm as Eo, a$ as Uo, b8 as Ho, co as Go, M as Xo, I as Ko, dn as qo, aq as Yo, bk as jo, bl as Qo, bH as Zo, d5 as Jo, bI as ei, P as ti, a6 as si, bJ as oi, d1 as ii, aK as ai, c3 as ri, Z as ni, aT as ui, cp as di, H as li, aL as ci, bd as hi, d2 as pi, be as fi, d3 as mi, bL as gi, bj as xi, ba as Ci, bM as wi, ai as yi, dp as Si, a_ as bi, bN as vi, aH as ki, cq as Ii, bO as Ri, bP as Pi, bE as $i, bK as Di, dq as Ni, dr as zi, T as Ai, ax as rt, ds as Fi, Q as Wi, J as Li, c5 as Vi, d4 as Bi, bb as _i, aM as Ti, ct as Oi, dt as Mi, c7 as Ei, cu as Ui, bs as Hi, du as Gi, cv as Xi, bn as Ki, bc as qi, bQ as Yi, f as ji } from "./index-Duu1Lvvv.js";
|
|
3
|
+
import { i as Qi, a as Zi, c as b, f as v, M as Y, b as nt, d as ut, e as dt } from "./webgpu_util-DMiKzzQM.js";
|
|
4
|
+
import { g as Te, B as F } from "./binary_op_util-pKXltfxI.js";
|
|
5
|
+
import { S as Ji, a as ea } from "./selu_util-DyW0X1WG.js";
|
|
6
|
+
import { E as ta, t as sa, u as oa, w as ia, x as aa, y as ra, f as je, z as lt, A as ct, B as ht, C as na, D as ua, F as da, G as la, H as ca, I as ha, J as pa, K as fa, L as ma, M as ga, N as xa, O as Ca } from "./backend_util-DC3rBo_H.js";
|
|
7
|
+
import { t as W, e as S, h as Q, b as G, c as Ie, P as pt, d as wa, a as ya } from "./webgpu_program-BpWRlghH.js";
|
|
8
|
+
import { r as R, a as Sa } from "./Reshape-Bd4V_4X7.js";
|
|
9
|
+
import { s as ba } from "./shared-nnSWpC3u.js";
|
|
10
|
+
import { c as Oe, a as Ce, b as we, d as Me, e as va, g as ft } from "./axis_util-DGqbT-FX.js";
|
|
11
|
+
import { h as ye, i as Ne, j as Se, b as Z, d as Ee, g as Ue, k as mt } from "./step-BS5JXRR6.js";
|
|
12
|
+
import { p as ka, a as Ia, b as Ra, d as Pa } from "./slice_util-zN8KFC5I.js";
|
|
13
|
+
import { z as $a } from "./zeros-5YROwwUH.js";
|
|
14
|
+
import { c as me, a as Da } from "./concat_util-D0je5Ppu.js";
|
|
15
15
|
import { n as Na, a as za } from "./non_max_suppression_impl-B2W7YjZB.js";
|
|
16
|
-
import { c as He } from "./scatter_nd_util-
|
|
16
|
+
import { c as He } from "./scatter_nd_util-k9MUVUkn.js";
|
|
17
17
|
Qi() && Et(
|
|
18
18
|
"webgpu",
|
|
19
19
|
async () => {
|
|
@@ -60,7 +60,7 @@ const Aa = "return abs(a);", Fa = `
|
|
|
60
60
|
return uniforms.NAN;
|
|
61
61
|
}
|
|
62
62
|
return atan(a);
|
|
63
|
-
`,
|
|
63
|
+
`, _a = `
|
|
64
64
|
if (abs(a) > 1.) {
|
|
65
65
|
return uniforms.NAN;
|
|
66
66
|
}
|
|
@@ -71,7 +71,7 @@ const Aa = "return abs(a);", Fa = `
|
|
|
71
71
|
return -uniforms.INFINITY;
|
|
72
72
|
}
|
|
73
73
|
return atanh(a);
|
|
74
|
-
`,
|
|
74
|
+
`, Ta = "return ceil(a);", Oa = "return cos(a);", Ma = `
|
|
75
75
|
let e2x = exp(-a);
|
|
76
76
|
return (e2x + 1.0 / e2x) / 2.0;
|
|
77
77
|
`, Ea = "return exp(a) - 1.0;", Ua = "if (a >= 0.0) { return a; } return (exp(a) - 1.0);", Ha = `
|
|
@@ -162,13 +162,13 @@ function ee(o, t) {
|
|
|
162
162
|
case y.ATAN:
|
|
163
163
|
return Ba;
|
|
164
164
|
case y.ATANH:
|
|
165
|
-
return
|
|
165
|
+
return _a;
|
|
166
166
|
case y.COS:
|
|
167
167
|
return Oa;
|
|
168
168
|
case y.COSH:
|
|
169
169
|
return Ma;
|
|
170
170
|
case y.CEIL:
|
|
171
|
-
return
|
|
171
|
+
return Ta;
|
|
172
172
|
case y.ELU:
|
|
173
173
|
return t ? Ha : Ua;
|
|
174
174
|
case y.ERF:
|
|
@@ -248,7 +248,7 @@ function q(o, t = !1, e = !1, i = 3) {
|
|
|
248
248
|
else if (o === "relu6")
|
|
249
249
|
s = ee(y.RELU6, e);
|
|
250
250
|
else if (o === "prelu")
|
|
251
|
-
s =
|
|
251
|
+
s = Te(F.PRELU, e);
|
|
252
252
|
else if (o === "sigmoid")
|
|
253
253
|
s = ee(y.SIGMOID, e);
|
|
254
254
|
else if (o === "leakyrelu")
|
|
@@ -831,12 +831,12 @@ const Vr = {
|
|
|
831
831
|
function Fe({ a: o, b: t, transposeA: e, transposeB: i, backend: s, bias: a = null, preluActivationWeights: r = null, leakyreluAlpha: n = 0, activation: u = null }) {
|
|
832
832
|
const d = o.shape.length, h = t.shape.length, l = e ? o.shape[d - 2] : o.shape[d - 1], c = i ? t.shape[h - 1] : t.shape[h - 2], p = e ? o.shape[d - 1] : o.shape[d - 2], f = i ? t.shape[h - 2] : t.shape[h - 1], m = o.shape.slice(0, -2), g = t.shape.slice(0, -2), x = D(m), C = D(g), k = j(o.shape.slice(0, -2), t.shape.slice(0, -2)).concat([p, f]);
|
|
833
833
|
L(l === c, () => `Error in matMul: inner shapes (${l}) and (${c}) of Tensors with shapes ${o.shape} and ${t.shape} and transposeA=${e} and transposeB=${i} must match.`);
|
|
834
|
-
const I = e ? [x, l, p] : [x, p, l], P = i ? [C, f, c] : [C, c, f], $ = R({ inputs: { x: o }, backend: s, attrs: { shape: I } }), A = R({ inputs: { x: t }, backend: s, attrs: { shape: P } }), z = [$, A], B = Math.max(x, C),
|
|
834
|
+
const I = e ? [x, l, p] : [x, p, l], P = i ? [C, f, c] : [C, c, f], $ = R({ inputs: { x: o }, backend: s, attrs: { shape: I } }), A = R({ inputs: { x: t }, backend: s, attrs: { shape: P } }), z = [$, A], B = Math.max(x, C), _ = [$, A], H = [
|
|
835
835
|
{ type: "int32", data: [p] },
|
|
836
836
|
{ type: "int32", data: [f] },
|
|
837
837
|
{ type: "int32", data: [l] }
|
|
838
838
|
];
|
|
839
|
-
let
|
|
839
|
+
let T, E;
|
|
840
840
|
const J = [B, p, f];
|
|
841
841
|
let O = X().get("WEBGPU_MATMUL_PROGRAM_TYPE");
|
|
842
842
|
if (O < 0) {
|
|
@@ -845,41 +845,41 @@ function Fe({ a: o, b: t, transposeA: e, transposeB: i, backend: s, bias: a = nu
|
|
|
845
845
|
}
|
|
846
846
|
switch (O) {
|
|
847
847
|
case Y.MatMulReduceProgram:
|
|
848
|
-
|
|
848
|
+
T = new Nr(J, e, i, a, u, r);
|
|
849
849
|
break;
|
|
850
850
|
case Y.MatMulSplitKProgram: {
|
|
851
|
-
if (E = M({ backend: s, attrs: { shape: J, value: 0, dtype: o.dtype } }),
|
|
852
|
-
E = s.runWebGPUProgram(
|
|
851
|
+
if (E = M({ backend: s, attrs: { shape: J, value: 0, dtype: o.dtype } }), T = new Fr(J, c, e, i), a || u) {
|
|
852
|
+
E = s.runWebGPUProgram(T, _, o.dtype, H, E);
|
|
853
853
|
const le = new Wr(E.shape, a, u, r);
|
|
854
854
|
let ce = null;
|
|
855
855
|
const ve = [E];
|
|
856
856
|
a && ve.push(a), r && ve.push(r), u === "leakyrelu" && (ce = [{ type: "float32", data: [n] }], le.uniforms += " alpha : f32,");
|
|
857
857
|
const qe = s.runWebGPUProgram(le, ve, E.dtype, ce);
|
|
858
858
|
z.push(E);
|
|
859
|
-
const
|
|
859
|
+
const Tt = R({ inputs: { x: qe }, backend: s, attrs: { shape: k } });
|
|
860
860
|
z.push(qe);
|
|
861
861
|
for (const Ot of z)
|
|
862
862
|
s.disposeData(Ot.dataId);
|
|
863
|
-
return
|
|
863
|
+
return Tt;
|
|
864
864
|
}
|
|
865
865
|
break;
|
|
866
866
|
}
|
|
867
867
|
case Y.MatMulSmallOutputSizeProgram:
|
|
868
|
-
|
|
868
|
+
T = new Ar(I, P, J, e, i, a, u, r);
|
|
869
869
|
break;
|
|
870
870
|
case Y.MatMulPackedProgram:
|
|
871
871
|
const ae = s.adapterInfo.isIntel();
|
|
872
|
-
|
|
872
|
+
T = new $r(I, J, e, i, a, u, r, ae);
|
|
873
873
|
break;
|
|
874
874
|
default:
|
|
875
875
|
throw new Error(`Unsupported MatMulProgramType ${O}.`);
|
|
876
876
|
}
|
|
877
|
-
a &&
|
|
878
|
-
const
|
|
877
|
+
a && _.push(a), r && _.push(r), u === "leakyrelu" && (H.push({ type: "float32", data: [n] }), T.uniforms += " alpha : f32,"), E = s.runWebGPUProgram(T, _, o.dtype, H, E);
|
|
878
|
+
const _t = R({ inputs: { x: E }, backend: s, attrs: { shape: k } });
|
|
879
879
|
z.push(E);
|
|
880
880
|
for (const ae of z)
|
|
881
881
|
s.disposeData(ae.dataId);
|
|
882
|
-
return
|
|
882
|
+
return _t;
|
|
883
883
|
}
|
|
884
884
|
function Br(o) {
|
|
885
885
|
const { inputs: t, backend: e, attrs: i } = o, { a: s, b: a, bias: r, preluActivationWeights: n } = t, { transposeA: u, transposeB: d, activation: h, leakyreluAlpha: l } = i;
|
|
@@ -895,7 +895,7 @@ function Br(o) {
|
|
|
895
895
|
activation: h
|
|
896
896
|
});
|
|
897
897
|
}
|
|
898
|
-
const
|
|
898
|
+
const _r = {
|
|
899
899
|
kernelName: Xt,
|
|
900
900
|
backendName: "webgpu",
|
|
901
901
|
kernelFunc: Br
|
|
@@ -908,7 +908,7 @@ class Ze {
|
|
|
908
908
|
return `
|
|
909
909
|
fn binaryOpComplex(
|
|
910
910
|
areal : f32, aimag : f32, breal : f32, bimag : f32) -> f32 {
|
|
911
|
-
${
|
|
911
|
+
${Te(this.op, !1)}
|
|
912
912
|
}
|
|
913
913
|
|
|
914
914
|
${S("index")} {
|
|
@@ -937,7 +937,7 @@ class Re {
|
|
|
937
937
|
let t;
|
|
938
938
|
const e = this.outputComponent === 4 ? "vec4<f32>" : "f32", i = `
|
|
939
939
|
fn binaryOperation(a : ${e}, b : ${e}) -> ${e} {
|
|
940
|
-
${
|
|
940
|
+
${Te(this.op, this.outputComponent === 4)}
|
|
941
941
|
};
|
|
942
942
|
`;
|
|
943
943
|
if (this.type === "shared") {
|
|
@@ -981,7 +981,7 @@ function U(o) {
|
|
|
981
981
|
const { inputs: t } = o, { x: e } = t;
|
|
982
982
|
return o.backend.incRef(e.dataId), { dataId: e.dataId, shape: e.shape, dtype: e.dtype };
|
|
983
983
|
}
|
|
984
|
-
const
|
|
984
|
+
const Tr = {
|
|
985
985
|
kernelName: Kt,
|
|
986
986
|
backendName: "webgpu",
|
|
987
987
|
kernelFunc: U
|
|
@@ -1337,14 +1337,14 @@ function ie(o, t, e, i, s) {
|
|
|
1337
1337
|
}
|
|
1338
1338
|
return r.forEach((m) => s.disposeData(m.dataId)), f;
|
|
1339
1339
|
}
|
|
1340
|
-
function
|
|
1340
|
+
function _n(o) {
|
|
1341
1341
|
const { inputs: t, backend: e, attrs: i } = o, { x: s } = t, { keepDims: a, axis: r } = i;
|
|
1342
1342
|
return ie(s, r, a, "all", e);
|
|
1343
1343
|
}
|
|
1344
|
-
const
|
|
1344
|
+
const Tn = {
|
|
1345
1345
|
kernelName: ss,
|
|
1346
1346
|
backendName: "webgpu",
|
|
1347
|
-
kernelFunc:
|
|
1347
|
+
kernelFunc: _n
|
|
1348
1348
|
};
|
|
1349
1349
|
function On(o) {
|
|
1350
1350
|
const { inputs: t, backend: e, attrs: i } = o, { x: s } = t, { keepDims: a, axis: r } = i;
|
|
@@ -2161,12 +2161,12 @@ function Bu(o) {
|
|
|
2161
2161
|
];
|
|
2162
2162
|
return D(s.shape) % 4 === 0 ? n = new Lu(s.shape) : n = new Vu(s.shape), e.runWebGPUProgram(n, [s], s.dtype, u);
|
|
2163
2163
|
}
|
|
2164
|
-
const
|
|
2164
|
+
const _u = {
|
|
2165
2165
|
kernelName: $s,
|
|
2166
2166
|
backendName: "webgpu",
|
|
2167
2167
|
kernelFunc: Bu
|
|
2168
2168
|
};
|
|
2169
|
-
class
|
|
2169
|
+
class Tu {
|
|
2170
2170
|
constructor(t) {
|
|
2171
2171
|
this.outputShape = [], this.variableNames = ["real", "imag"], this.workgroupSize = [64, 1, 1], this.size = !0, this.outputShape = t, this.dispatchLayout = v(this.outputShape), this.dispatch = b(this.dispatchLayout, this.outputShape, this.workgroupSize), this.shaderKey = "complexAbs";
|
|
2172
2172
|
}
|
|
@@ -2194,7 +2194,7 @@ function Je(o, t) {
|
|
|
2194
2194
|
};
|
|
2195
2195
|
}
|
|
2196
2196
|
function Ou(o) {
|
|
2197
|
-
const { inputs: t, backend: e } = o, { x: i } = t, s = e.tensorMap.get(i.dataId), a = new
|
|
2197
|
+
const { inputs: t, backend: e } = o, { x: i } = t, s = e.tensorMap.get(i.dataId), a = new Tu(i.shape), r = [
|
|
2198
2198
|
Je(i, s.complexTensorInfos.real),
|
|
2199
2199
|
Je(i, s.complexTensorInfos.imag)
|
|
2200
2200
|
];
|
|
@@ -2593,8 +2593,8 @@ function Qu({ x: o, filter: t, convInfo: e, backend: i, bias: s = null, preluAct
|
|
|
2593
2593
|
{ type: "int32", data: [h] }
|
|
2594
2594
|
], z = i.runWebGPUProgram($, [o], o.dtype, A), B = [];
|
|
2595
2595
|
B.push(z);
|
|
2596
|
-
const
|
|
2597
|
-
if (B.push(
|
|
2596
|
+
const _ = R({ inputs: { x: t }, backend: i, attrs: { shape: [1, k, -1] } });
|
|
2597
|
+
if (B.push(_), a != null) {
|
|
2598
2598
|
const O = Pe(a.shape, w);
|
|
2599
2599
|
O != null && (a = R({
|
|
2600
2600
|
inputs: { x: a },
|
|
@@ -2607,8 +2607,8 @@ function Qu({ x: o, filter: t, convInfo: e, backend: i, bias: s = null, preluAct
|
|
|
2607
2607
|
O != null && (s = R({ inputs: { x: s }, backend: i, attrs: { shape: O } }), B.push(s));
|
|
2608
2608
|
}
|
|
2609
2609
|
const E = Fe({
|
|
2610
|
-
a: w ? z :
|
|
2611
|
-
b: w ?
|
|
2610
|
+
a: w ? z : _,
|
|
2611
|
+
b: w ? _ : z,
|
|
2612
2612
|
transposeA: !w,
|
|
2613
2613
|
transposeB: !1,
|
|
2614
2614
|
backend: i,
|
|
@@ -3305,12 +3305,12 @@ const gd = {
|
|
|
3305
3305
|
kernelFunc: md
|
|
3306
3306
|
};
|
|
3307
3307
|
const xd = N({ opType: y.COS }), Cd = {
|
|
3308
|
-
kernelName:
|
|
3308
|
+
kernelName: _s,
|
|
3309
3309
|
backendName: "webgpu",
|
|
3310
3310
|
kernelFunc: xd
|
|
3311
3311
|
};
|
|
3312
3312
|
const wd = N({ opType: y.COSH }), yd = {
|
|
3313
|
-
kernelName:
|
|
3313
|
+
kernelName: Ts,
|
|
3314
3314
|
backendName: "webgpu",
|
|
3315
3315
|
kernelFunc: wd
|
|
3316
3316
|
};
|
|
@@ -3893,7 +3893,7 @@ class Bd {
|
|
|
3893
3893
|
`;
|
|
3894
3894
|
}
|
|
3895
3895
|
}
|
|
3896
|
-
function
|
|
3896
|
+
function _d(o) {
|
|
3897
3897
|
const { inputs: t, backend: e, attrs: i } = o, { x: s, dy: a } = t, { strides: r, dilations: n, pad: u, dimRoundingMode: d, filterShape: h } = i, l = Z(
|
|
3898
3898
|
s.shape,
|
|
3899
3899
|
h,
|
|
@@ -3916,10 +3916,10 @@ function Td(o) {
|
|
|
3916
3916
|
];
|
|
3917
3917
|
return e.runWebGPUProgram(c, [s, a], "float32", p);
|
|
3918
3918
|
}
|
|
3919
|
-
const
|
|
3919
|
+
const Td = {
|
|
3920
3920
|
kernelName: Xs,
|
|
3921
3921
|
backendName: "webgpu",
|
|
3922
|
-
kernelFunc:
|
|
3922
|
+
kernelFunc: _d
|
|
3923
3923
|
};
|
|
3924
3924
|
function Od(o) {
|
|
3925
3925
|
const { inputs: t, backend: e, attrs: i } = o, { dy: s, filter: a } = t, { strides: r, dilations: n, pad: u, dimRoundingMode: d, inputShape: h } = i, l = Z(
|
|
@@ -4314,7 +4314,7 @@ const ml = N({
|
|
|
4314
4314
|
backendName: "webgpu",
|
|
4315
4315
|
kernelFunc: ml
|
|
4316
4316
|
};
|
|
4317
|
-
function
|
|
4317
|
+
function _e(o) {
|
|
4318
4318
|
const { inputs: t, attrs: e, backend: i } = o, { dim: s } = e, { input: a } = t, r = a.shape.length, n = a.shape.slice();
|
|
4319
4319
|
let u = s;
|
|
4320
4320
|
return s < 0 && (L(-(r + 1) <= s, () => `Axis must be in the interval [${-(r + 1)}, ${r}]`), u = r + s + 1), n.splice(u, 0, 1), R({ inputs: { x: a }, backend: i, attrs: { shape: n } });
|
|
@@ -4322,7 +4322,7 @@ function Te(o) {
|
|
|
4322
4322
|
const xl = {
|
|
4323
4323
|
kernelName: no,
|
|
4324
4324
|
backendName: "webgpu",
|
|
4325
|
-
kernelFunc:
|
|
4325
|
+
kernelFunc: _e
|
|
4326
4326
|
};
|
|
4327
4327
|
const Cl = N({ opType: y.EXPM1, cpuKernelImpl: Kr }), wl = {
|
|
4328
4328
|
kernelName: uo,
|
|
@@ -4487,8 +4487,8 @@ function Nl(o) {
|
|
|
4487
4487
|
C = e.device.importExternalTexture({ source: s });
|
|
4488
4488
|
else {
|
|
4489
4489
|
if (f) {
|
|
4490
|
-
const
|
|
4491
|
-
(re == null ||
|
|
4490
|
+
const T = X().getBool("CANVAS2D_WILL_READ_FREQUENTLY_FOR_GPU");
|
|
4491
|
+
(re == null || T !== Le) && (Le = T, re = document.createElement("canvas").getContext("2d", { willReadFrequently: Le })), re.canvas.width = h, re.canvas.height = l, re.drawImage(s, 0, 0, h, l), s = re.canvas;
|
|
4492
4492
|
}
|
|
4493
4493
|
const B = GPUTextureUsage.COPY_DST | GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.TEXTURE_BINDING, H = e.textureManager.acquireTexture(c[1], c[0], "rgba8unorm", B);
|
|
4494
4494
|
e.queue.copyExternalImageToTexture({ source: s }, { texture: H }, [c[1], c[0]]), C = H;
|
|
@@ -4622,7 +4622,7 @@ class Bl {
|
|
|
4622
4622
|
`;
|
|
4623
4623
|
}
|
|
4624
4624
|
}
|
|
4625
|
-
function
|
|
4625
|
+
function _l(o) {
|
|
4626
4626
|
const { inputs: t, backend: e } = o, { params: i, indices: s } = t, a = s.shape, r = a[a.length - 1], n = D(i.shape), [u, d, h, l] = fa(i, s), c = R({ inputs: { x: s }, backend: e, attrs: { shape: [d, r] } }), p = R({
|
|
4627
4627
|
inputs: { x: i },
|
|
4628
4628
|
backend: e,
|
|
@@ -4635,10 +4635,10 @@ function Tl(o) {
|
|
|
4635
4635
|
const f = new Bl(r, [d, h]), m = [{ type: "int32", data: [r] }, { type: "int32", data: l }], g = e.runWebGPUProgram(f, [p, c], p.dtype, m), x = R({ inputs: { x: g }, backend: e, attrs: { shape: u } });
|
|
4636
4636
|
return e.disposeData(c.dataId), e.disposeData(p.dataId), e.disposeData(g.dataId), x;
|
|
4637
4637
|
}
|
|
4638
|
-
const
|
|
4638
|
+
const Tl = {
|
|
4639
4639
|
kernelName: Co,
|
|
4640
4640
|
backendName: "webgpu",
|
|
4641
|
-
kernelFunc:
|
|
4641
|
+
kernelFunc: _l
|
|
4642
4642
|
};
|
|
4643
4643
|
class Ol {
|
|
4644
4644
|
constructor(t, e) {
|
|
@@ -5003,7 +5003,7 @@ function Dc(o) {
|
|
|
5003
5003
|
return St(s, h, "max", e);
|
|
5004
5004
|
}
|
|
5005
5005
|
const Nc = {
|
|
5006
|
-
kernelName:
|
|
5006
|
+
kernelName: _o,
|
|
5007
5007
|
backendName: "webgpu",
|
|
5008
5008
|
kernelFunc: Dc
|
|
5009
5009
|
};
|
|
@@ -5033,7 +5033,7 @@ function zc(o) {
|
|
|
5033
5033
|
return e.runWebGPUProgram(c, [s], s.dtype, p);
|
|
5034
5034
|
}
|
|
5035
5035
|
const Ac = {
|
|
5036
|
-
kernelName:
|
|
5036
|
+
kernelName: To,
|
|
5037
5037
|
backendName: "webgpu",
|
|
5038
5038
|
kernelFunc: zc
|
|
5039
5039
|
};
|
|
@@ -5254,12 +5254,12 @@ function Bc(o) {
|
|
|
5254
5254
|
const x = e.runWebGPUProgram(g, [s, m], n.dtype, f);
|
|
5255
5255
|
return e.disposeData(m.dataId), x;
|
|
5256
5256
|
}
|
|
5257
|
-
const
|
|
5257
|
+
const _c = {
|
|
5258
5258
|
kernelName: Mo,
|
|
5259
5259
|
backendName: "webgpu",
|
|
5260
5260
|
kernelFunc: Bc
|
|
5261
5261
|
};
|
|
5262
|
-
function
|
|
5262
|
+
function Tc(o) {
|
|
5263
5263
|
const { inputs: t, backend: e, attrs: i } = o, { filterSize: s, strides: a, pad: r, includeBatchInIndex: n } = i, { x: u } = t;
|
|
5264
5264
|
L(u.shape.length === 4, () => `Error in maxPool: input must be rank 4 but got rank ${u.shape.length}.`);
|
|
5265
5265
|
const d = [1, 1];
|
|
@@ -5283,7 +5283,7 @@ function _c(o) {
|
|
|
5283
5283
|
const Oc = {
|
|
5284
5284
|
kernelName: Eo,
|
|
5285
5285
|
backendName: "webgpu",
|
|
5286
|
-
kernelFunc:
|
|
5286
|
+
kernelFunc: Tc
|
|
5287
5287
|
};
|
|
5288
5288
|
function Mc(o) {
|
|
5289
5289
|
const { inputs: t, backend: e, attrs: i } = o, { x: s } = t, { axis: a, keepDims: r } = i;
|
|
@@ -5583,13 +5583,13 @@ const lh = {
|
|
|
5583
5583
|
function ch(o) {
|
|
5584
5584
|
const { inputs: t, backend: e, attrs: i } = o, { axis: s } = i;
|
|
5585
5585
|
if (t.length === 1)
|
|
5586
|
-
return
|
|
5586
|
+
return _e({ inputs: { input: t[0] }, backend: e, attrs: { dim: s } });
|
|
5587
5587
|
const a = t[0].shape, r = t[0].dtype;
|
|
5588
5588
|
t.forEach((h) => {
|
|
5589
5589
|
si(a, h.shape, "All tensors passed to stack must have matching shapes"), L(r === h.dtype, () => "All tensors passed to stack must have matching dtypes");
|
|
5590
5590
|
});
|
|
5591
5591
|
const n = [], u = t.map((h) => {
|
|
5592
|
-
const l =
|
|
5592
|
+
const l = _e({ inputs: { input: h }, backend: e, attrs: { dim: s } });
|
|
5593
5593
|
return n.push(l), l;
|
|
5594
5594
|
}), d = kt({ inputs: u, backend: e, attrs: { axis: s } });
|
|
5595
5595
|
return n.forEach((h) => e.disposeData(h.dataId)), d;
|
|
@@ -5878,7 +5878,7 @@ const Bh = {
|
|
|
5878
5878
|
backendName: "webgpu",
|
|
5879
5879
|
kernelFunc: Vh
|
|
5880
5880
|
};
|
|
5881
|
-
class
|
|
5881
|
+
class _h {
|
|
5882
5882
|
constructor(t, e, i, s) {
|
|
5883
5883
|
this.variableNames = ["x"], this.uniforms = "adjustHeightWidth : vec2<f32>, roundBase : f32,", this.workgroupSize = [64, 1, 1], this.size = !0, this.outputShape = [t[0], e, i, t[3]], this.dispatchLayout = v(this.outputShape), this.dispatch = b(this.dispatchLayout, this.outputShape, this.workgroupSize), this.halfPixelCenters = s, this.shaderKey = `resizeNearest_${s}`;
|
|
5884
5884
|
}
|
|
@@ -5918,17 +5918,17 @@ class Th {
|
|
|
5918
5918
|
`;
|
|
5919
5919
|
}
|
|
5920
5920
|
}
|
|
5921
|
-
function
|
|
5921
|
+
function Th(o) {
|
|
5922
5922
|
const { inputs: t, backend: e, attrs: i } = o, { images: s } = t, { alignCorners: a, halfPixelCenters: r, size: n } = i, [u, d] = n, h = a && u > 1 ? 1 : 0, l = a && d > 1 ? 1 : 0, p = [
|
|
5923
5923
|
{ type: "float32", data: [h, l] },
|
|
5924
5924
|
{ type: "float32", data: [a ? 0.5 : 0] }
|
|
5925
|
-
], f = new
|
|
5925
|
+
], f = new _h(s.shape, u, d, r);
|
|
5926
5926
|
return e.runWebGPUProgram(f, [s], s.dtype, p);
|
|
5927
5927
|
}
|
|
5928
5928
|
const Oh = {
|
|
5929
5929
|
kernelName: fi,
|
|
5930
5930
|
backendName: "webgpu",
|
|
5931
|
-
kernelFunc:
|
|
5931
|
+
kernelFunc: Th
|
|
5932
5932
|
};
|
|
5933
5933
|
class Mh {
|
|
5934
5934
|
constructor(t, e) {
|
|
@@ -6493,8 +6493,8 @@ const zp = {
|
|
|
6493
6493
|
function Ap(o) {
|
|
6494
6494
|
const { inputs: t, backend: e, attrs: i } = o, { sparseIndices: s, sparseValues: a, defaultValue: r } = t, { outputShape: n } = i, { sliceRank: u, numUpdates: d, sliceSize: h, strides: l, outputSize: c } = He(a, s, n), p = !1;
|
|
6495
6495
|
if (a.dtype === "string") {
|
|
6496
|
-
const A = e.bufferSync(s), z = e.bufferSync(a), B = rt(e.readSync(r.dataId)[0]),
|
|
6497
|
-
return e.makeTensorInfo(n,
|
|
6496
|
+
const A = e.bufferSync(s), z = e.bufferSync(a), B = rt(e.readSync(r.dataId)[0]), _ = pn(A, z, n, c, h, d, u, l, B, p);
|
|
6497
|
+
return e.makeTensorInfo(n, _.dtype, _.values);
|
|
6498
6498
|
}
|
|
6499
6499
|
const f = [c / h, h], m = R({
|
|
6500
6500
|
inputs: { x: s },
|
|
@@ -6559,7 +6559,7 @@ const Vp = N({ opType: y.SQRT }), Bp = {
|
|
|
6559
6559
|
backendName: "webgpu",
|
|
6560
6560
|
kernelFunc: Vp
|
|
6561
6561
|
};
|
|
6562
|
-
const
|
|
6562
|
+
const _p = {
|
|
6563
6563
|
kernelName: Bi,
|
|
6564
6564
|
backendName: "webgpu",
|
|
6565
6565
|
kernelFunc: ({ inputs: o, backend: t }) => {
|
|
@@ -6567,19 +6567,19 @@ const Tp = {
|
|
|
6567
6567
|
return i.runWebGPUProgram(s, [e], e.dtype);
|
|
6568
6568
|
}
|
|
6569
6569
|
};
|
|
6570
|
-
const
|
|
6570
|
+
const Tp = V({
|
|
6571
6571
|
opType: F.SQUARED_DIFFERENCE
|
|
6572
6572
|
}), Op = {
|
|
6573
|
-
kernelName:
|
|
6573
|
+
kernelName: _i,
|
|
6574
6574
|
backendName: "webgpu",
|
|
6575
|
-
kernelFunc:
|
|
6575
|
+
kernelFunc: Tp
|
|
6576
6576
|
};
|
|
6577
6577
|
function Mp({ inputs: o, attrs: t, backend: e }) {
|
|
6578
6578
|
const { x: i } = o, s = new ue(i.shape, y.STEP, "stepAlpha : f32,"), a = [{ type: "float32", data: [t.alpha] }];
|
|
6579
6579
|
return e.runWebGPUProgram(s, [i], i.dtype, a);
|
|
6580
6580
|
}
|
|
6581
6581
|
const Ep = {
|
|
6582
|
-
kernelName:
|
|
6582
|
+
kernelName: Ti,
|
|
6583
6583
|
backendName: "webgpu",
|
|
6584
6584
|
kernelFunc: Mp
|
|
6585
6585
|
};
|
|
@@ -6860,14 +6860,14 @@ function af(o) {
|
|
|
6860
6860
|
const h = D(n) / u, l = R({ inputs: { x: s }, attrs: { shape: [h, u] }, backend: e }), c = it(a), p = it(u);
|
|
6861
6861
|
let f = null;
|
|
6862
6862
|
const m = () => f === null ? [l, l] : [l, f], g = (I, P, $) => {
|
|
6863
|
-
const A = m(), z = new sf($),
|
|
6863
|
+
const A = m(), z = new sf($), _ = [
|
|
6864
6864
|
{ type: "int32", data: [u] },
|
|
6865
6865
|
{ type: "int32", data: [f === null ? 1 : 0] },
|
|
6866
6866
|
{ type: "float32", data: [Number.NEGATIVE_INFINITY] },
|
|
6867
6867
|
{ type: "int32", data: [I] },
|
|
6868
6868
|
{ type: "int32", data: [P] }
|
|
6869
6869
|
], H = f;
|
|
6870
|
-
f = e.runWebGPUProgram(z, A, "int32",
|
|
6870
|
+
f = e.runWebGPUProgram(z, A, "int32", _), ne(e, H);
|
|
6871
6871
|
};
|
|
6872
6872
|
for (let I = 1; I < c; I *= 2) {
|
|
6873
6873
|
const P = I * 2;
|
|
@@ -6881,9 +6881,9 @@ function af(o) {
|
|
|
6881
6881
|
{ type: "int32", data: [c] }
|
|
6882
6882
|
], B = f;
|
|
6883
6883
|
f = e.runWebGPUProgram($, P, "int32", z), ne(e, B);
|
|
6884
|
-
const
|
|
6885
|
-
for (let
|
|
6886
|
-
g(H,
|
|
6884
|
+
const _ = c / 2, H = _ * 2;
|
|
6885
|
+
for (let T = _; T >= 1; T /= 2)
|
|
6886
|
+
g(H, T, f.shape);
|
|
6887
6887
|
}
|
|
6888
6888
|
let x = f;
|
|
6889
6889
|
f = de({ inputs: { x: f }, backend: e, attrs: { begin: 0, size: [h, a] } }), ne(e, x);
|
|
@@ -7137,13 +7137,13 @@ const ff = {
|
|
|
7137
7137
|
kernelFunc: pf
|
|
7138
7138
|
};
|
|
7139
7139
|
const mf = [
|
|
7140
|
-
|
|
7140
|
+
_r,
|
|
7141
7141
|
vn,
|
|
7142
7142
|
In,
|
|
7143
7143
|
Pn,
|
|
7144
7144
|
Dn,
|
|
7145
7145
|
An,
|
|
7146
|
-
|
|
7146
|
+
Tn,
|
|
7147
7147
|
Mn,
|
|
7148
7148
|
Un,
|
|
7149
7149
|
Gn,
|
|
@@ -7162,7 +7162,7 @@ const mf = [
|
|
|
7162
7162
|
$u,
|
|
7163
7163
|
Au,
|
|
7164
7164
|
Wu,
|
|
7165
|
-
|
|
7165
|
+
_u,
|
|
7166
7166
|
Or,
|
|
7167
7167
|
Mu,
|
|
7168
7168
|
Gu,
|
|
@@ -7179,7 +7179,7 @@ const mf = [
|
|
|
7179
7179
|
Pd,
|
|
7180
7180
|
Dd,
|
|
7181
7181
|
Ad,
|
|
7182
|
-
|
|
7182
|
+
Td,
|
|
7183
7183
|
Md,
|
|
7184
7184
|
Ld,
|
|
7185
7185
|
Hd,
|
|
@@ -7204,11 +7204,11 @@ const mf = [
|
|
|
7204
7204
|
Al,
|
|
7205
7205
|
Wl,
|
|
7206
7206
|
Vl,
|
|
7207
|
-
|
|
7207
|
+
Tl,
|
|
7208
7208
|
El,
|
|
7209
7209
|
Hl,
|
|
7210
7210
|
Xl,
|
|
7211
|
-
|
|
7211
|
+
Tr,
|
|
7212
7212
|
ql,
|
|
7213
7213
|
Uu,
|
|
7214
7214
|
jl,
|
|
@@ -7228,7 +7228,7 @@ const mf = [
|
|
|
7228
7228
|
ou,
|
|
7229
7229
|
$c,
|
|
7230
7230
|
Nc,
|
|
7231
|
-
|
|
7231
|
+
_c,
|
|
7232
7232
|
Ac,
|
|
7233
7233
|
Vc,
|
|
7234
7234
|
Oc,
|
|
@@ -7285,7 +7285,7 @@ const mf = [
|
|
|
7285
7285
|
Fp,
|
|
7286
7286
|
Lp,
|
|
7287
7287
|
Bp,
|
|
7288
|
-
|
|
7288
|
+
_p,
|
|
7289
7289
|
Op,
|
|
7290
7290
|
Yp,
|
|
7291
7291
|
il,
|