@genai-fi/nanogpt 0.10.2 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Generator.d.ts +10 -5
- package/dist/Generator.js +11760 -146
- package/dist/{RealDiv-zz7FpkKX.js → RealDiv-Ds-jvL09.js} +28 -30
- package/dist/Reshape-Cd6e-Otn.js +14 -0
- package/dist/{Reshape-CHdUjC72.js → Reshape-Ct266DEk.js} +21 -23
- package/dist/TeachableLLM.d.ts +4 -3
- package/dist/TeachableLLM.js +15 -16
- package/dist/Trainer.d.ts +2 -2
- package/dist/Trainer.js +6 -6
- package/dist/{axis_util-BsIr9ZNu.js → axis_util-DofAuy0p.js} +1 -1
- package/dist/backend.js +2 -2
- package/dist/{backend_util-B1XRLuq9.js → backend_util-C7NWHpv7.js} +72 -73
- package/dist/{backend_webgpu-CqpfEImu.js → backend_webgpu-B0Vls736.js} +52 -54
- package/dist/broadcast_to-DDaNMbX7.js +28 -0
- package/dist/checks/appendCache.js +2 -2
- package/dist/checks/attentionMask.js +3 -3
- package/dist/checks/gelu.js +2 -2
- package/dist/checks/matMulGelu.js +7 -11
- package/dist/checks/normRMS.js +9 -9
- package/dist/checks/normRMSGrad.js +3 -3
- package/dist/checks/packUnpack.js +2 -2
- package/dist/checks/qkv.js +11 -12
- package/dist/checks/rope.js +2 -2
- package/dist/clip_by_value-Dn5tzexi.js +12 -0
- package/dist/complex-DClmWqJt.js +11 -0
- package/dist/concat-C6X3AAlQ.js +17 -0
- package/dist/{concat_util-iBYIyuQe.js → concat_util-CHsJFZJJ.js} +1 -1
- package/dist/{dataset-D2P7rHAw.js → dataset-DcjWqUVQ.js} +135 -137
- package/dist/dropout-OxuaJz6z.js +92 -0
- package/dist/expand_dims-BzfJK2uc.js +11 -0
- package/dist/{exports_initializers-CZSUJoVE.js → exports_initializers-eS9QJ6ut.js} +1 -1
- package/dist/floor-DIb-lN_u.js +9 -0
- package/dist/gather-BcO5UQNJ.js +9 -0
- package/dist/{gelu-Bmhopi0J.js → gelu-DqTbCx5x.js} +10 -11
- package/dist/{gpgpu_math-DsCcikas.js → gpgpu_math-CJcbnKPC.js} +841 -1015
- package/dist/index-D0RBWjq8.js +3520 -0
- package/dist/{index-DRyE072i.js → index-Dj5TkmPY.js} +330 -331
- package/dist/{kernel_funcs_utils-CWfOAPGO.js → kernel_funcs_utils-CSaumNDs.js} +132 -134
- package/dist/layers/BaseLayer.js +15 -16
- package/dist/layers/CausalSelfAttention.js +6 -6
- package/dist/layers/MLP.js +4 -4
- package/dist/layers/PositionEmbedding.js +7 -7
- package/dist/layers/RMSNorm.js +3 -3
- package/dist/layers/RoPECache.js +9 -9
- package/dist/layers/TiedEmbedding.js +6 -6
- package/dist/layers/TransformerBlock.js +1 -1
- package/dist/loader/loadTransformers.js +1 -1
- package/dist/loader/oldZipLoad.js +21 -22
- package/dist/log_sum_exp-VLZgbFAH.js +39 -0
- package/dist/main.d.ts +1 -1
- package/dist/main.js +49 -50
- package/dist/{matMul16-fEAJ4smh.js → matMul16-cDxwemKj.js} +14 -15
- package/dist/matMulGelu-B2s_80-H.js +163 -0
- package/dist/mat_mul-DxpNTCRz.js +11 -0
- package/dist/mod-PrOKlFxH.js +11 -0
- package/dist/models/NanoGPTV1.js +2 -2
- package/dist/models/model.js +13 -14
- package/dist/ones-BX_wEgzB.js +14 -0
- package/dist/ops/adamAdjust.js +1 -1
- package/dist/ops/adamMoments.js +1 -1
- package/dist/ops/add16.js +1 -1
- package/dist/ops/appendCache.js +3 -3
- package/dist/ops/attentionMask.js +1 -1
- package/dist/ops/concat16.js +2 -2
- package/dist/ops/cpu/adamAdjust.js +12 -13
- package/dist/ops/cpu/adamMoments.js +6 -7
- package/dist/ops/cpu/appendCache.js +7 -8
- package/dist/ops/cpu/attentionMask.js +11 -11
- package/dist/ops/cpu/fusedSoftmax.js +10 -11
- package/dist/ops/cpu/gatherSub.js +10 -11
- package/dist/ops/cpu/gelu.js +14 -15
- package/dist/ops/cpu/matMul16.js +6 -7
- package/dist/ops/cpu/matMulGelu.js +5 -6
- package/dist/ops/cpu/matMulMul.js +3 -4
- package/dist/ops/cpu/mulDropout.js +3 -4
- package/dist/ops/cpu/normRMS.js +11 -12
- package/dist/ops/cpu/qkv.js +8 -9
- package/dist/ops/cpu/rope.js +9 -10
- package/dist/ops/cpu/scatterSub.js +14 -16
- package/dist/ops/dot16.js +2 -2
- package/dist/ops/gatherSub.js +1 -1
- package/dist/ops/gelu.js +2 -2
- package/dist/ops/grads/add16.js +10 -11
- package/dist/ops/grads/attentionMask.js +5 -6
- package/dist/ops/grads/gelu.js +3 -4
- package/dist/ops/grads/matMul16.js +4 -5
- package/dist/ops/grads/matMulGelu.js +8 -9
- package/dist/ops/grads/normRMS.js +9 -10
- package/dist/ops/grads/pack16.js +4 -5
- package/dist/ops/grads/qkv.js +17 -19
- package/dist/ops/grads/rope.js +3 -5
- package/dist/ops/grads/softmax16.js +3 -4
- package/dist/ops/grads/unpack16.js +3 -4
- package/dist/ops/grads/utils.d.ts +1 -0
- package/dist/ops/grads/utils.js +8 -4
- package/dist/ops/matMul16.js +3 -3
- package/dist/ops/matMulGelu.js +2 -2
- package/dist/ops/matMulMul.js +1 -1
- package/dist/ops/mul16.js +1 -1
- package/dist/ops/mulDrop.js +1 -1
- package/dist/ops/normRMS.js +1 -1
- package/dist/ops/pack16.js +3 -4
- package/dist/ops/qkv.js +4 -8
- package/dist/ops/reshape16.js +16 -18
- package/dist/ops/rope.d.ts +1 -1
- package/dist/ops/rope.js +3 -8
- package/dist/ops/scatterSub.js +1 -1
- package/dist/ops/slice16.js +2 -2
- package/dist/ops/softmax16.js +5 -8
- package/dist/ops/sub16.js +1 -1
- package/dist/ops/sum16.js +2 -2
- package/dist/ops/transpose16.js +23 -24
- package/dist/ops/unpack16.js +2 -2
- package/dist/ops/webgl/adamAdjust.js +2 -3
- package/dist/ops/webgl/adamMoments.js +1 -2
- package/dist/ops/webgl/appendCache.js +1 -2
- package/dist/ops/webgl/attentionMask.js +5 -6
- package/dist/ops/webgl/fusedSoftmax.js +6 -8
- package/dist/ops/webgl/gatherSub.js +6 -7
- package/dist/ops/webgl/gelu.js +2 -3
- package/dist/ops/webgl/log.js +11 -12
- package/dist/ops/webgl/matMul16.js +15 -16
- package/dist/ops/webgl/matMulGelu.js +7 -111
- package/dist/ops/webgl/matMulMul.js +14 -15
- package/dist/ops/webgl/mulDropout.js +8 -9
- package/dist/ops/webgl/normRMS.js +7 -8
- package/dist/ops/webgl/qkv.js +5 -6
- package/dist/ops/webgl/rope.js +7 -8
- package/dist/ops/webgl/scatterSub.js +5 -6
- package/dist/ops/webgpu/adamAdjust.js +10 -12
- package/dist/ops/webgpu/adamMoments.js +8 -10
- package/dist/ops/webgpu/add16.js +8 -9
- package/dist/ops/webgpu/appendCache.js +23 -25
- package/dist/ops/webgpu/attentionMask.js +10 -12
- package/dist/ops/webgpu/attentionMask32_program.js +2 -2
- package/dist/ops/webgpu/concat16.js +12 -14
- package/dist/ops/webgpu/gatherSub.js +9 -11
- package/dist/ops/webgpu/gelu.js +28 -29
- package/dist/ops/webgpu/matMul16.js +26 -28
- package/dist/ops/webgpu/matMul16_program.js +4 -5
- package/dist/ops/webgpu/mul16.js +7 -8
- package/dist/ops/webgpu/normRMS.js +17 -19
- package/dist/ops/webgpu/normRMSGrad.js +21 -28
- package/dist/ops/webgpu/pack16.js +12 -13
- package/dist/ops/webgpu/pack16_program.js +2 -2
- package/dist/ops/webgpu/qkv.js +13 -15
- package/dist/ops/webgpu/rope.js +25 -27
- package/dist/ops/webgpu/scatterSub.js +7 -9
- package/dist/ops/webgpu/slice16.js +21 -23
- package/dist/ops/webgpu/softmax16.js +17 -19
- package/dist/ops/webgpu/softmax16_program.js +2 -2
- package/dist/ops/webgpu/softmax16_subgroup_program.js +2 -2
- package/dist/ops/webgpu/softmax16grad.js +7 -8
- package/dist/ops/webgpu/sub16.js +8 -9
- package/dist/ops/webgpu/sum16.js +19 -21
- package/dist/ops/webgpu/transpose16.js +19 -20
- package/dist/ops/webgpu/transpose16_program.js +2 -2
- package/dist/ops/webgpu/transpose16_shared_program.js +11 -12
- package/dist/ops/webgpu/unpack16.js +3 -4
- package/dist/ops/webgpu/utils/binary_op.js +7 -8
- package/dist/ops/webgpu/utils/reductions.js +14 -22
- package/dist/ops-FJapAPfm.js +476 -0
- package/dist/pack16-k4jq6aMX.js +39 -0
- package/dist/patches/webgpu_backend.js +19 -20
- package/dist/patches/webgpu_base.js +1 -1
- package/dist/patches/webgpu_program.js +15 -16
- package/dist/{random_width-BVV9HveY.js → random_width-UGQn4OWb.js} +2506 -2761
- package/dist/range-CuGvVN2c.js +10 -0
- package/dist/relu-Cf80uA2p.js +9 -0
- package/dist/reshape-CkjKPPqB.js +9 -0
- package/dist/resize_nearest_neighbor-DB8k9KN_.js +175 -0
- package/dist/rope-BmZmp9uP.js +24 -0
- package/dist/{scatter_nd_util-C7zXRT_h.js → scatter_nd_util-BY22Cc-C.js} +1 -1
- package/dist/selu_util-BuLbmbrl.js +44 -0
- package/dist/{shared-CHhxz-O5.js → shared-B7USJZgw.js} +1 -1
- package/dist/{shared-D2NP_CpY.js → shared-BQboIImQ.js} +379 -381
- package/dist/slice-Aqy7KbJh.js +12 -0
- package/dist/{slice_util-DyjSAD0u.js → slice_util-D8CQRenR.js} +7 -7
- package/dist/{softmax-C9JQEtnO.js → softmax-faLoUZVT.js} +4 -5
- package/dist/split-BNz5jcGc.js +9 -0
- package/dist/squeeze--YMgaAAf.js +10 -0
- package/dist/stack-WJK22CFn.js +11 -0
- package/dist/step-dXR33iOg.js +261 -0
- package/dist/sum-BdplSvq_.js +11 -0
- package/dist/{tensor-0r5yOo2R.js → tensor-BQqrDvpx.js} +1 -1
- package/dist/tensor1d-LxP9asMm.js +11 -0
- package/dist/{tensor2d-CSB4KOb0.js → tensor2d-BN1sSfQO.js} +6 -7
- package/dist/{tensor4d-D7bLqGqz.js → tensor4d-DVwr7pLF.js} +6 -7
- package/dist/{tfjs_backend-CNkSTL0c.js → tfjs_backend-Vi4JfLzT.js} +256 -265
- package/dist/tile-CvN_LyVr.js +11 -0
- package/dist/tokeniser/BaseTokeniser.d.ts +27 -0
- package/dist/tokeniser/BaseTokeniser.js +94 -0
- package/dist/tokeniser/CharTokeniser.d.ts +4 -3
- package/dist/tokeniser/CharTokeniser.js +46 -32
- package/dist/tokeniser/bpe.d.ts +4 -3
- package/dist/tokeniser/bpe.js +60 -45
- package/dist/tokeniser/type.d.ts +11 -0
- package/dist/training/Adam.js +2 -2
- package/dist/training/AdamExt.js +1 -1
- package/dist/training/DatasetBuilder.d.ts +2 -2
- package/dist/training/DatasetBuilder.js +32 -36
- package/dist/training/FullTrainer.js +1 -1
- package/dist/training/Trainer.d.ts +3 -3
- package/dist/training/Trainer.js +2 -2
- package/dist/training/sparseCrossEntropy.js +5 -5
- package/dist/transpose-JawVKyZy.js +36 -0
- package/dist/unsorted_segment_sum-LAbmE9G4.js +277 -0
- package/dist/utilities/dummy.js +3 -3
- package/dist/utilities/multinomialCPU.js +2 -2
- package/dist/utilities/packed.d.ts +1 -4
- package/dist/utilities/packed.js +10 -745
- package/dist/utilities/performance.js +1 -1
- package/dist/utilities/profile.js +1 -1
- package/dist/utilities/safetensors.js +2 -2
- package/dist/utilities/sentences.js +5 -5
- package/dist/utilities/weights.js +2 -2
- package/dist/{variable-DzfrwYuP.js → variable-DQ9yYgEU.js} +1 -1
- package/dist/{webgpu_program-DzaQiqel.js → webgpu_program-CAE4RICo.js} +177 -171
- package/dist/{webgpu_util-0_ubCEHJ.js → webgpu_util-BdovYhXr.js} +34 -35
- package/dist/zeros-DeiE2zTa.js +13 -0
- package/dist/zeros_like-BAz3iKru.js +721 -0
- package/package.json +4 -2
- package/dist/Reshape-CDVLyVfz.js +0 -16
- package/dist/broadcast_to-B0ChcDaz.js +0 -30
- package/dist/complex-BBiRlsVq.js +0 -13
- package/dist/concat-DmBLPVGC.js +0 -19
- package/dist/dropout-B1x1kYMa.js +0 -99
- package/dist/expand_dims-ouvfxQ1n.js +0 -13
- package/dist/gather-CH9sdacz.js +0 -10
- package/dist/index-D6Q1lPZO.js +0 -2157
- package/dist/log_sum_exp-D3ftBNY5.js +0 -41
- package/dist/mat_mul-C59XWcJd.js +0 -12
- package/dist/mod-DESSvHIU.js +0 -12
- package/dist/mulmat_packed_gpu-Coh6qbJk.js +0 -55
- package/dist/ones-jU9jlQvM.js +0 -15
- package/dist/ops-BFDtP6th.js +0 -645
- package/dist/pack16-CmVZs6af.js +0 -41
- package/dist/patches/PackedTensor.d.ts +0 -12
- package/dist/patches/PackedTensor.js +0 -11
- package/dist/patches/engine.d.ts +0 -261
- package/dist/patches/engine.js +0 -12
- package/dist/patches/tape.d.ts +0 -12
- package/dist/patches/tape.js +0 -5
- package/dist/range-ZZZD60Fx.js +0 -11
- package/dist/reciprocal-CrYlsAGD.js +0 -10
- package/dist/register_all_kernels-nvj2k7OC.js +0 -12307
- package/dist/relu-BYDneVPn.js +0 -10
- package/dist/reshape-CaPQzFvz.js +0 -10
- package/dist/rope-s4W2XO9B.js +0 -32
- package/dist/selu_util-BGPXmd4B.js +0 -303
- package/dist/sin-Djs4aQiu.js +0 -16
- package/dist/slice-DvovR5wq.js +0 -13
- package/dist/split-DBck65sX.js +0 -10
- package/dist/squeeze-C00Ipm_7.js +0 -11
- package/dist/stack-ChnHwRpX.js +0 -13
- package/dist/sum-ywRJj3Zr.js +0 -12
- package/dist/tensor-CzmOBsdf.js +0 -909
- package/dist/tensor1d-BlUT89BP.js +0 -12
- package/dist/tensor_util-DfwaWayG.js +0 -523
- package/dist/tile-CR074jmp.js +0 -13
- package/dist/transpose-DH4gmHvu.js +0 -38
- package/dist/zeros-DBFVbpv5.js +0 -14
|
@@ -1,11 +1,10 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { d as
|
|
3
|
-
import {
|
|
4
|
-
import {
|
|
5
|
-
import { S as
|
|
6
|
-
import { s as
|
|
7
|
-
import {
|
|
8
|
-
import { c as ae, v as ie, a as ue } from "./scatter_nd_util-C7zXRT_h.js";
|
|
1
|
+
import { V as m, a9 as w, aU as I, y as d, ax as A, aB as _, $ as y, ad as M, a0 as T, aV as b, ak as D, aW as x } from "./index-D0RBWjq8.js";
|
|
2
|
+
import { d as L, f as W, h as v, c as F, e as N, a as C, b as P, g as z } from "./axis_util-DofAuy0p.js";
|
|
3
|
+
import { a as B, c as U } from "./concat_util-CHsJFZJJ.js";
|
|
4
|
+
import { c as V, b as G, d as H, f as j, g as q, h as Z, i as k, j as J, k as K, m as X, t as Y } from "./step-dXR33iOg.js";
|
|
5
|
+
import { S as Q, a as ee, b as te, g as se, c as ne, s as re } from "./selu_util-BuLbmbrl.js";
|
|
6
|
+
import { s as oe } from "./slice_util-D8CQRenR.js";
|
|
7
|
+
import { c as ae, v as ie, a as ue } from "./scatter_nd_util-BY22Cc-C.js";
|
|
9
8
|
import { a as le, c as pe, b as ce, e as he, d as fe, g as ge, m as de, s as me } from "./complex_util-Yc1A_gV1.js";
|
|
10
9
|
function Ee(e, t) {
|
|
11
10
|
const r = e.shape.length, s = t.shape.length;
|
|
@@ -147,7 +146,7 @@ function Te(e, t, r) {
|
|
|
147
146
|
s.push(e[n + 1] - t[n][0] - t[n][1]);
|
|
148
147
|
return s;
|
|
149
148
|
}
|
|
150
|
-
const be = 0.3275911, De = 0.254829592,
|
|
149
|
+
const be = 0.3275911, De = 0.254829592, xe = -0.284496736, Le = 1.421413741, We = -1.453152027, ve = 1.061405429;
|
|
151
150
|
const E = "->", Fe = /->/g, S = ",", R = "...";
|
|
152
151
|
function Ne(e, t) {
|
|
153
152
|
e = e.replace(/\s/g, "");
|
|
@@ -213,22 +212,22 @@ function ze(e, t) {
|
|
|
213
212
|
s.push([]);
|
|
214
213
|
const o = [];
|
|
215
214
|
for (let a = 0; a < r.length; ++a) {
|
|
216
|
-
const u = r[a], p =
|
|
215
|
+
const u = r[a], p = Ue(t, u);
|
|
217
216
|
for (const c of p)
|
|
218
217
|
o.indexOf(c) === -1 && (s[a].push(c), o.push(c));
|
|
219
218
|
}
|
|
220
219
|
return { path: r, steps: s };
|
|
221
220
|
}
|
|
222
|
-
function
|
|
221
|
+
function Be(e) {
|
|
223
222
|
return e.every((t, r) => t === r);
|
|
224
223
|
}
|
|
225
|
-
function
|
|
224
|
+
function Ue(e, t) {
|
|
226
225
|
const r = [];
|
|
227
226
|
for (let s = 0; s < e.length; ++s)
|
|
228
227
|
(e[s].length === 0 || e[s].indexOf(t) !== -1 || t === -1) && r.push(s);
|
|
229
228
|
return r;
|
|
230
229
|
}
|
|
231
|
-
function
|
|
230
|
+
function Ve(e, t, r = 0) {
|
|
232
231
|
let s = [];
|
|
233
232
|
if (typeof t == "number")
|
|
234
233
|
d(e.shape[r] % t === 0, () => "Number of splits must evenly divide the axis."), s = new Array(t).fill(e.shape[r] / t);
|
|
@@ -244,17 +243,17 @@ function Ue(e, t, r = 0) {
|
|
|
244
243
|
}
|
|
245
244
|
return s;
|
|
246
245
|
}
|
|
247
|
-
function
|
|
246
|
+
function Ge(e) {
|
|
248
247
|
return `Received SparseTensor with denseShape[0] = 0 but
|
|
249
248
|
indices.shape[0] = ${e}`;
|
|
250
249
|
}
|
|
251
|
-
function
|
|
250
|
+
function He(e, t) {
|
|
252
251
|
return `indices(${e}, 0) is invalid: ${t} < 0`;
|
|
253
252
|
}
|
|
254
|
-
function
|
|
253
|
+
function je(e, t, r) {
|
|
255
254
|
return `indices(${e}, 0) is invalid: ${t} >= ${r}`;
|
|
256
255
|
}
|
|
257
|
-
function
|
|
256
|
+
function qe(e, t) {
|
|
258
257
|
return `only one output dimension may be -1, not both ${e} and ${t}`;
|
|
259
258
|
}
|
|
260
259
|
function Ze(e, t) {
|
|
@@ -263,12 +262,12 @@ function Ze(e, t) {
|
|
|
263
262
|
function ke() {
|
|
264
263
|
return "reshape cannot infer the missing input size for an empty tensor unless all specified input sizes are non-zero";
|
|
265
264
|
}
|
|
266
|
-
function
|
|
265
|
+
function Je(e, t) {
|
|
267
266
|
const r = m(e), s = m(t);
|
|
268
267
|
return `Input to reshape is a SparseTensor with ${r}
|
|
269
268
|
dense values, but the requested shape requires a multiple of ${s}. inputShape=${e} outputShape= ${t}`;
|
|
270
269
|
}
|
|
271
|
-
function
|
|
270
|
+
function Ke(e, t) {
|
|
272
271
|
const r = m(e), s = m(t);
|
|
273
272
|
return `Input to reshape is a tensor with ${r} dense values, but the requested shape has ${s}. inputShape=${e} outputShape=${t}`;
|
|
274
273
|
}
|
|
@@ -336,11 +335,11 @@ function ot(e) {
|
|
|
336
335
|
function at(e) {
|
|
337
336
|
return e.map((t) => _(t));
|
|
338
337
|
}
|
|
339
|
-
const
|
|
338
|
+
const dt = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
|
|
340
339
|
__proto__: null,
|
|
341
340
|
ERF_A1: De,
|
|
342
|
-
ERF_A2:
|
|
343
|
-
ERF_A3:
|
|
341
|
+
ERF_A2: xe,
|
|
342
|
+
ERF_A3: Le,
|
|
344
343
|
ERF_A4: We,
|
|
345
344
|
ERF_A5: ve,
|
|
346
345
|
ERF_P: be,
|
|
@@ -348,84 +347,84 @@ const mt = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
|
|
|
348
347
|
get RowPartitionType() {
|
|
349
348
|
return f;
|
|
350
349
|
},
|
|
351
|
-
SELU_SCALE:
|
|
352
|
-
SELU_SCALEALPHA:
|
|
353
|
-
applyActivation:
|
|
354
|
-
assertAndGetBroadcastShape:
|
|
355
|
-
assertAxesAreInnerMostDims:
|
|
356
|
-
assertParamsConsistent:
|
|
350
|
+
SELU_SCALE: Q,
|
|
351
|
+
SELU_SCALEALPHA: ee,
|
|
352
|
+
applyActivation: te,
|
|
353
|
+
assertAndGetBroadcastShape: y,
|
|
354
|
+
assertAxesAreInnerMostDims: L,
|
|
355
|
+
assertParamsConsistent: B,
|
|
357
356
|
assignToTypedArray: le,
|
|
358
|
-
axesAreInnerMostDims:
|
|
357
|
+
axesAreInnerMostDims: W,
|
|
359
358
|
calculateShapes: ae,
|
|
360
359
|
checkEinsumDimSizes: Pe,
|
|
361
|
-
checkPadOnDimRoundingMode:
|
|
362
|
-
combineLocations:
|
|
360
|
+
checkPadOnDimRoundingMode: V,
|
|
361
|
+
combineLocations: v,
|
|
363
362
|
combineRaggedTensorToTensorShapes: Ie,
|
|
364
363
|
complexWithEvenIndex: pe,
|
|
365
364
|
complexWithOddIndex: ce,
|
|
366
|
-
computeConv2DInfo:
|
|
367
|
-
computeConv3DInfo:
|
|
368
|
-
computeDefaultPad:
|
|
369
|
-
computeDilation2DInfo:
|
|
365
|
+
computeConv2DInfo: G,
|
|
366
|
+
computeConv3DInfo: H,
|
|
367
|
+
computeDefaultPad: j,
|
|
368
|
+
computeDilation2DInfo: q,
|
|
370
369
|
computeOptimalWindowSize: Oe,
|
|
371
|
-
computeOutAndReduceShapes:
|
|
372
|
-
computeOutShape:
|
|
370
|
+
computeOutAndReduceShapes: F,
|
|
371
|
+
computeOutShape: U,
|
|
373
372
|
computePool2DInfo: Z,
|
|
374
373
|
computePool3DInfo: k,
|
|
375
|
-
convertConv2DDataFormat:
|
|
374
|
+
convertConv2DDataFormat: J,
|
|
376
375
|
decodeEinsumEquation: Ne,
|
|
377
|
-
eitherStridesOrDilationsAreOne:
|
|
378
|
-
expandShapeToKeepDim:
|
|
376
|
+
eitherStridesOrDilationsAreOne: K,
|
|
377
|
+
expandShapeToKeepDim: N,
|
|
379
378
|
exponent: he,
|
|
380
379
|
exponents: fe,
|
|
381
380
|
fromStringArrayToUint8: at,
|
|
382
381
|
fromUint8ToStringArray: ot,
|
|
383
|
-
getAxesPermutation:
|
|
384
|
-
getBroadcastDims:
|
|
382
|
+
getAxesPermutation: C,
|
|
383
|
+
getBroadcastDims: M,
|
|
385
384
|
getComplexWithIndex: ge,
|
|
386
385
|
getEinsumComputePath: ze,
|
|
387
386
|
getEinsumPermutation: Ce,
|
|
388
|
-
getFusedBiasGradient:
|
|
389
|
-
getFusedDyActivation:
|
|
387
|
+
getFusedBiasGradient: se,
|
|
388
|
+
getFusedDyActivation: ne,
|
|
390
389
|
getImageCenter: we,
|
|
391
|
-
getInnerMostAxes:
|
|
390
|
+
getInnerMostAxes: P,
|
|
392
391
|
getPermuted: _e,
|
|
393
392
|
getRaggedRank: Se,
|
|
394
|
-
getReductionAxes:
|
|
393
|
+
getReductionAxes: T,
|
|
395
394
|
getReshaped: Ae,
|
|
396
395
|
getReshapedPermuted: ye,
|
|
397
396
|
getRowPartitionTypesHelper: $e,
|
|
398
397
|
getSliceBeginCoords: Me,
|
|
399
398
|
getSliceSize: Te,
|
|
400
|
-
getSparseFillEmptyRowsIndicesDenseShapeMismatch:
|
|
401
|
-
getSparseFillEmptyRowsNegativeIndexErrorMessage:
|
|
402
|
-
getSparseFillEmptyRowsOutOfRangeIndexErrorMessage:
|
|
399
|
+
getSparseFillEmptyRowsIndicesDenseShapeMismatch: Ge,
|
|
400
|
+
getSparseFillEmptyRowsNegativeIndexErrorMessage: He,
|
|
401
|
+
getSparseFillEmptyRowsOutOfRangeIndexErrorMessage: je,
|
|
403
402
|
getSparseReshapeEmptyTensorZeroOutputDimErrorMessage: ke,
|
|
404
|
-
getSparseReshapeInputOutputMismatchErrorMessage:
|
|
405
|
-
getSparseReshapeInputOutputMultipleErrorMessage:
|
|
406
|
-
getSparseReshapeMultipleNegativeOneOutputDimErrorMessage:
|
|
403
|
+
getSparseReshapeInputOutputMismatchErrorMessage: Ke,
|
|
404
|
+
getSparseReshapeInputOutputMultipleErrorMessage: Je,
|
|
405
|
+
getSparseReshapeMultipleNegativeOneOutputDimErrorMessage: qe,
|
|
407
406
|
getSparseReshapeNegativeOutputDimErrorMessage: Ze,
|
|
408
407
|
getSparseSegmentReductionIndicesOutOfRangeErrorMessage: et,
|
|
409
408
|
getSparseSegmentReductionNegativeSegmentIdsErrorMessage: Xe,
|
|
410
409
|
getSparseSegmentReductionNonIncreasingSegmentIdsErrorMessage: Ye,
|
|
411
410
|
getSparseSegmentReductionSegmentIdOutOfRangeErrorMessage: Qe,
|
|
412
|
-
getUndoAxesPermutation:
|
|
413
|
-
isIdentityPermutation:
|
|
414
|
-
log:
|
|
411
|
+
getUndoAxesPermutation: z,
|
|
412
|
+
isIdentityPermutation: Be,
|
|
413
|
+
log: b,
|
|
415
414
|
mergeRealAndImagArrays: de,
|
|
416
415
|
prepareAndValidate: Ee,
|
|
417
|
-
prepareSplitSize:
|
|
416
|
+
prepareSplitSize: Ve,
|
|
418
417
|
segment_util: rt,
|
|
419
|
-
shouldFuse:
|
|
420
|
-
slice_util:
|
|
418
|
+
shouldFuse: re,
|
|
419
|
+
slice_util: oe,
|
|
421
420
|
splitRealAndImagArrays: me,
|
|
422
|
-
stridesOrDilationsArePositive:
|
|
423
|
-
tupleValuesAreOne:
|
|
424
|
-
upcastType:
|
|
421
|
+
stridesOrDilationsArePositive: X,
|
|
422
|
+
tupleValuesAreOne: Y,
|
|
423
|
+
upcastType: D,
|
|
425
424
|
validateDefaultValueShape: Re,
|
|
426
425
|
validateInput: ie,
|
|
427
426
|
validateUpdateShape: ue,
|
|
428
|
-
warn:
|
|
427
|
+
warn: x
|
|
429
428
|
}, Symbol.toStringTag, { value: "Module" }));
|
|
430
429
|
export {
|
|
431
430
|
_e as A,
|
|
@@ -437,37 +436,37 @@ export {
|
|
|
437
436
|
Pe as G,
|
|
438
437
|
ze as H,
|
|
439
438
|
Ce as I,
|
|
440
|
-
|
|
439
|
+
Be as J,
|
|
441
440
|
Ee as K,
|
|
442
441
|
nt as L,
|
|
443
442
|
we as M,
|
|
444
|
-
|
|
443
|
+
Ve as N,
|
|
445
444
|
st as O,
|
|
446
445
|
tt as P,
|
|
447
446
|
f as R,
|
|
448
447
|
Se as a,
|
|
449
|
-
|
|
448
|
+
dt as b,
|
|
450
449
|
Oe as c,
|
|
451
450
|
Ie as d,
|
|
452
451
|
at as e,
|
|
453
452
|
ot as f,
|
|
454
453
|
$e as g,
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
454
|
+
Ge as h,
|
|
455
|
+
He as i,
|
|
456
|
+
je as j,
|
|
457
|
+
qe as k,
|
|
459
458
|
Ze as l,
|
|
460
459
|
ke as m,
|
|
461
|
-
|
|
462
|
-
|
|
460
|
+
Je as n,
|
|
461
|
+
Ke as o,
|
|
463
462
|
Xe as p,
|
|
464
463
|
Ye as q,
|
|
465
464
|
Qe as r,
|
|
466
465
|
et as s,
|
|
467
466
|
De as t,
|
|
468
|
-
|
|
467
|
+
xe as u,
|
|
469
468
|
Re as v,
|
|
470
|
-
|
|
469
|
+
Le as w,
|
|
471
470
|
We as x,
|
|
472
471
|
ve as y,
|
|
473
472
|
Ae as z
|
|
@@ -1,23 +1,21 @@
|
|
|
1
|
-
import { e as D,
|
|
2
|
-
import {
|
|
3
|
-
import {
|
|
4
|
-
import {
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
l.registerFlag("WEBGPU_PRINT_SHADER", () => "");
|
|
20
|
-
l.registerFlag("WEBGPU_ENGINE_COMPILE_ONLY", () => !1);
|
|
1
|
+
import { ab as g, au as $, av as K, e as D, y as _, aw as O, V as x, ax as Z, at as W, ay as F, az as j, aA as X, aB as J, ae as ee, a9 as k } from "./index-D0RBWjq8.js";
|
|
2
|
+
import { m as te, f as se, P as re } from "./webgpu_program-CAE4RICo.js";
|
|
3
|
+
import { i as ne, G as q } from "./webgpu_util-BdovYhXr.js";
|
|
4
|
+
import { m as N } from "./complex_util-Yc1A_gV1.js";
|
|
5
|
+
const d = g();
|
|
6
|
+
d.registerFlag("WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE", () => 15);
|
|
7
|
+
d.registerFlag("WEBGPU_CPU_FORWARD", () => !0);
|
|
8
|
+
d.registerFlag("WEBGPU_MATMUL_PROGRAM_TYPE", () => -1);
|
|
9
|
+
d.registerFlag("WEBGPU_USE_NAIVE_CONV2D_TRANSPOSE", () => !0);
|
|
10
|
+
d.registerFlag("WEBGPU_USE_LOW_POWER_GPU", () => !1);
|
|
11
|
+
d.registerFlag("WEBGPU_CPU_HANDOFF_SIZE_THRESHOLD", () => 1e3);
|
|
12
|
+
d.registerFlag("WEBGPU_USE_PROFILE_TOOL", () => !1);
|
|
13
|
+
d.registerFlag("WEBGPU_IMPORT_EXTERNAL_TEXTURE", () => !0);
|
|
14
|
+
d.registerFlag("WEBGPU_USE_NAIVE_CONV2D_DEBUG", () => !1);
|
|
15
|
+
d.registerFlag("WEBGPU_THRESHOLD_TO_INCREASE_WORKGROUPS_FOR_MATMUL", () => -1);
|
|
16
|
+
d.registerFlag("WEBGPU_CONV_SEPARATE_IM2COL_SHADER", () => !1);
|
|
17
|
+
d.registerFlag("WEBGPU_PRINT_SHADER", () => "");
|
|
18
|
+
d.registerFlag("WEBGPU_ENGINE_COMPILE_ONLY", () => !1);
|
|
21
19
|
class ae {
|
|
22
20
|
constructor(e) {
|
|
23
21
|
e && (this.vendor = e.vendor, this.architecture = e.architecture, this.intelGPUGeneration = this.getIntelGPUGeneration());
|
|
@@ -70,8 +68,8 @@ class ie {
|
|
|
70
68
|
}), this.freeBuffers = /* @__PURE__ */ new Map(), this.usedBuffers = /* @__PURE__ */ new Map(), this.numUsedBuffers = 0, this.numFreeBuffers = 0, this.numBytesUsed = 0, this.numBytesAllocated = 0;
|
|
71
69
|
}
|
|
72
70
|
}
|
|
73
|
-
function z(
|
|
74
|
-
return `${
|
|
71
|
+
function z(l, e) {
|
|
72
|
+
return `${l}_${e}`;
|
|
75
73
|
}
|
|
76
74
|
class oe {
|
|
77
75
|
constructor(e) {
|
|
@@ -122,30 +120,30 @@ class oe {
|
|
|
122
120
|
}), this.freeTextures = /* @__PURE__ */ new Map(), this.usedTextures = /* @__PURE__ */ new Map(), this.numUsedTextures = 0, this.numFreeTextures = 0, this.numBytesUsed = 0, this.numBytesAllocated = 0;
|
|
123
121
|
}
|
|
124
122
|
}
|
|
125
|
-
function L(
|
|
126
|
-
return `${
|
|
123
|
+
function L(l, e, t, s) {
|
|
124
|
+
return `${l}_${e}_${t}_${s}`;
|
|
127
125
|
}
|
|
128
|
-
function Q(
|
|
129
|
-
if (
|
|
126
|
+
function Q(l) {
|
|
127
|
+
if (l === "rgba8unorm")
|
|
130
128
|
return 16;
|
|
131
|
-
throw new Error(`${
|
|
129
|
+
throw new Error(`${l} is not supported!`);
|
|
132
130
|
}
|
|
133
|
-
const ue = g().getNumber("WEBGPU_CPU_HANDOFF_SIZE_THRESHOLD"), fe = (
|
|
134
|
-
const t =
|
|
131
|
+
const ue = g().getNumber("WEBGPU_CPU_HANDOFF_SIZE_THRESHOLD"), fe = (l, e) => {
|
|
132
|
+
const t = l.limits.maxComputeWorkgroupsPerDimension, s = e.dispatchLayout, n = e.dispatch;
|
|
135
133
|
if (n.every((a) => a <= t))
|
|
136
134
|
return n;
|
|
137
135
|
_(n[0] > t && s.y === void 0 && s.z === void 0, () => "Dispatch size exceeds WebGPU limits in Y or Z dimension.");
|
|
138
136
|
let r = Math.ceil(Math.sqrt(n[0]));
|
|
139
137
|
return r > t ? (r = Math.ceil(Math.cbrt(n[0])), _(r <= t, () => "Total dispatch size exceeds WebGPU maximum."), [r, r, r]) : [r, r, 1];
|
|
140
138
|
};
|
|
141
|
-
class R extends
|
|
139
|
+
class R extends $ {
|
|
142
140
|
nextDataId() {
|
|
143
141
|
return R.nextDataId++;
|
|
144
142
|
}
|
|
145
143
|
constructor(e, t) {
|
|
146
|
-
if (super(), this.commandQueueOwnedIds = /* @__PURE__ */ new WeakSet(), this.dispatchCountInPass = 0, this.disposed = !1, this.downloadWaitMs = 0, this.tensorDataPendingDisposal = [], this.queryResolveBuffer = null, this.querySet = null, this.querySetCount = 2, this.stagingPendingDisposal = [], this.uniformPendingDisposal = [], this.uploadWaitMs = 0, this.hasReadSyncWarned = !1, this.hasTimestampQueryWarned = !1, !
|
|
144
|
+
if (super(), this.commandQueueOwnedIds = /* @__PURE__ */ new WeakSet(), this.dispatchCountInPass = 0, this.disposed = !1, this.downloadWaitMs = 0, this.tensorDataPendingDisposal = [], this.queryResolveBuffer = null, this.querySet = null, this.querySetCount = 2, this.stagingPendingDisposal = [], this.uniformPendingDisposal = [], this.uploadWaitMs = 0, this.hasReadSyncWarned = !1, this.hasTimestampQueryWarned = !1, !ne())
|
|
147
145
|
throw new Error("WebGPU is not supported on this device");
|
|
148
|
-
this.pipelineCache = {}, this.device = e, this.queue = e.queue, this.commandEncoder = null, this.computePassEncoder = null, this.adapterInfo = new ae(t), this.supportTimestampQuery = this.device.features.has("timestamp-query"), this.thresholdToIncreaseWorkgroups = this.adapterInfo.intelGPUGeneration >= 12 ? 16 : 8, this.bufferManager = new ie(this.device), this.textureManager = new oe(this.device), this.tensorMap = new
|
|
146
|
+
this.pipelineCache = {}, this.device = e, this.queue = e.queue, this.commandEncoder = null, this.computePassEncoder = null, this.adapterInfo = new ae(t), this.supportTimestampQuery = this.device.features.has("timestamp-query"), this.thresholdToIncreaseWorkgroups = this.adapterInfo.intelGPUGeneration >= 12 ? 16 : 8, this.bufferManager = new ie(this.device), this.textureManager = new oe(this.device), this.tensorMap = new K(this, D()), g().getBool("WEBGPU_USE_PROFILE_TOOL") && (this.dummyCanvas = document.createElement("canvas"), this.dummyCanvas.width = 1, this.dummyCanvas.height = 1, this.dummyContext = this.dummyCanvas.getContext("webgpu"), this.dummyContext.configure({
|
|
149
147
|
device: e,
|
|
150
148
|
format: "bgra8unorm"
|
|
151
149
|
}), document.body.appendChild(this.dummyCanvas));
|
|
@@ -250,7 +248,7 @@ class R extends re {
|
|
|
250
248
|
if (s != null || t.dtype === "string")
|
|
251
249
|
return s;
|
|
252
250
|
if (t.dtype === "complex64") {
|
|
253
|
-
const E = this.readSync(n.real.dataId), B = this.readSync(n.imag.dataId), y = O(
|
|
251
|
+
const E = this.readSync(n.real.dataId), B = this.readSync(n.imag.dataId), y = O(N(E, B).buffer, "float32");
|
|
254
252
|
return this.convertAndCacheOnCPU(e, y), y;
|
|
255
253
|
}
|
|
256
254
|
this.hasReadSyncWarned || (this.hasReadSyncWarned = !0, console.warn("The performance of synchronously reading data from GPU to CPU is poor on the webgpu backend, please use asynchronous APIs instead."));
|
|
@@ -266,7 +264,7 @@ class R extends re {
|
|
|
266
264
|
alphaMode: r[B]
|
|
267
265
|
}), y.getCurrentTexture();
|
|
268
266
|
}).map((E, B) => {
|
|
269
|
-
const y = f * 4,
|
|
267
|
+
const y = f * 4, G = (P, S, v) => {
|
|
270
268
|
this.ensureCommandEncoderReady(), this.commandEncoder.copyBufferToTexture({
|
|
271
269
|
buffer: a,
|
|
272
270
|
bytesPerRow: y,
|
|
@@ -281,20 +279,20 @@ class R extends re {
|
|
|
281
279
|
willReadFrequently: !0
|
|
282
280
|
});
|
|
283
281
|
I.clearRect(0, 0, P, S), I.drawImage(h[B], 0, 0);
|
|
284
|
-
const
|
|
282
|
+
const b = I.getImageData(0, 0, P, S).data, H = r[B], M = new Uint8ClampedArray(o, v, P * S * 4);
|
|
285
283
|
for (let p = 0; p < M.length; p += 4)
|
|
286
284
|
if (H === "premultiplied")
|
|
287
|
-
M[p + 3] =
|
|
285
|
+
M[p + 3] = b[p + 3];
|
|
288
286
|
else {
|
|
289
|
-
const V =
|
|
290
|
-
M[p] =
|
|
287
|
+
const V = b[p];
|
|
288
|
+
M[p] = b[p + 2], M[p + 1] = b[p + 1], M[p + 2] = V;
|
|
291
289
|
}
|
|
292
290
|
}, Y = Math.floor(u / (f * c));
|
|
293
291
|
let T = f, U = c, C = 0;
|
|
294
292
|
for (let P = 0; P < Y; P++)
|
|
295
|
-
|
|
293
|
+
G(T, U, C), C += f * c * 4;
|
|
296
294
|
const A = u % (f * c);
|
|
297
|
-
U = Math.floor(A / f), U > 0 && (
|
|
295
|
+
U = Math.floor(A / f), U > 0 && (G(T, U, C), C += U * (f * 4)), T = A % f, T > 0 && G(T, 1, C);
|
|
298
296
|
});
|
|
299
297
|
const w = O(o, t.dtype);
|
|
300
298
|
return this.convertAndCacheOnCPU(e, w), w;
|
|
@@ -311,7 +309,7 @@ class R extends re {
|
|
|
311
309
|
this.read(t.complexTensorInfos.real.dataId),
|
|
312
310
|
this.read(t.complexTensorInfos.imag.dataId)
|
|
313
311
|
]), a = r[0], i = r[1];
|
|
314
|
-
n =
|
|
312
|
+
n = N(a, i);
|
|
315
313
|
} else {
|
|
316
314
|
const r = await this.getBufferData(t.resource);
|
|
317
315
|
n = O(r, t.dtype);
|
|
@@ -339,7 +337,7 @@ class R extends re {
|
|
|
339
337
|
refCount: 1,
|
|
340
338
|
external: e.zeroCopy
|
|
341
339
|
});
|
|
342
|
-
const a = this.tensorMap.get(r), i =
|
|
340
|
+
const a = this.tensorMap.get(r), i = q(a.dtype) * x(a.shape);
|
|
343
341
|
if (e.buffer.size < i)
|
|
344
342
|
throw new Error(`GPUBuffer size(${e.buffer.size}) is smaller than tensor size(${i})!`);
|
|
345
343
|
if ((e.buffer.usage & (GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC)) !== (GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC))
|
|
@@ -365,7 +363,7 @@ class R extends re {
|
|
|
365
363
|
const t = this.readSync(e.dataId);
|
|
366
364
|
if (e.dtype === "string")
|
|
367
365
|
try {
|
|
368
|
-
const s = t.map((n) =>
|
|
366
|
+
const s = t.map((n) => Z(n));
|
|
369
367
|
return W(e.shape, e.dtype, s);
|
|
370
368
|
} catch {
|
|
371
369
|
throw new Error("Failed to decode encoded string bytes into utf-8");
|
|
@@ -385,10 +383,10 @@ class R extends re {
|
|
|
385
383
|
kernelMs: null,
|
|
386
384
|
wallMs: null
|
|
387
385
|
}, u = await Promise.all(r);
|
|
388
|
-
return i.kernelMs =
|
|
386
|
+
return i.kernelMs = j(u), i.getExtraProfileInfo = () => u.map((o, f) => ({ name: a[f], ms: o })).map((o) => `${o.name}: ${o.ms}`).join(", "), this.uploadWaitMs = 0, this.downloadWaitMs = 0, i;
|
|
389
387
|
}
|
|
390
388
|
makeTensorInfo(e, t, s) {
|
|
391
|
-
return t === "string" && s != null && s.length > 0 &&
|
|
389
|
+
return t === "string" && s != null && s.length > 0 && X(s[0]) && (s = s.map((r) => J(r))), { dataId: this.write(s, e, t), shape: e, dtype: t };
|
|
392
390
|
}
|
|
393
391
|
tensorToBinding(e) {
|
|
394
392
|
if (!e)
|
|
@@ -400,16 +398,16 @@ class R extends re {
|
|
|
400
398
|
const t = this.tensorMap.get(e);
|
|
401
399
|
if (t.resource != null)
|
|
402
400
|
return;
|
|
403
|
-
const s =
|
|
401
|
+
const s = q(t.dtype) * x(t.shape);
|
|
404
402
|
let n;
|
|
405
403
|
const r = GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST;
|
|
406
404
|
if (t.values) {
|
|
407
405
|
if (n = this.bufferManager.acquireBuffer(s, r, !0), n.mapState === "unmapped") {
|
|
408
406
|
const a = this.bufferManager.acquireBuffer(s, GPUBufferUsage.MAP_WRITE | GPUBufferUsage.COPY_SRC, !0, !1), i = a.getMappedRange();
|
|
409
|
-
t.dtype === "int32" || t.dtype === "bool" ? new Int32Array(i).set(t.values) : new Float32Array(i).set(t.values), a.unmap(), this.ensureCommandEncoderReady(), this.endComputePassEncoder(), this.commandEncoder.copyBufferToBuffer(a, 0, n, 0, s), this.stagingPendingDisposal.push(a);
|
|
407
|
+
t.dtype === "int32" || t.dtype === "packedF16" || t.dtype === "bool" ? new Int32Array(i).set(t.values) : new Float32Array(i).set(t.values), a.unmap(), this.ensureCommandEncoderReady(), this.endComputePassEncoder(), this.commandEncoder.copyBufferToBuffer(a, 0, n, 0, s), this.stagingPendingDisposal.push(a);
|
|
410
408
|
} else {
|
|
411
409
|
const a = n.getMappedRange();
|
|
412
|
-
t.dtype === "int32" || t.dtype === "bool" ? new Int32Array(a).set(t.values) : new Float32Array(a).set(t.values), n.unmap();
|
|
410
|
+
t.dtype === "int32" || t.dtype === "packedF16" || t.dtype === "bool" ? new Int32Array(a).set(t.values) : new Float32Array(a).set(t.values), n.unmap();
|
|
413
411
|
}
|
|
414
412
|
t.values = null;
|
|
415
413
|
} else
|
|
@@ -457,7 +455,7 @@ class R extends re {
|
|
|
457
455
|
}
|
|
458
456
|
runWebGPUProgram(e, t, s, n, r) {
|
|
459
457
|
if (r || (r = this.makeTensorInfo(e.outputShape, s)), x(r.shape) === 0)
|
|
460
|
-
return this.tensorMap.get(r.dataId).values =
|
|
458
|
+
return this.tensorMap.get(r.dataId).values = ee(r.dtype, 0), r;
|
|
461
459
|
this.uploadToGPU(r.dataId), e.dispatch = fe(this.device, e);
|
|
462
460
|
const a = t.map((u, o) => {
|
|
463
461
|
if (u.dtype === "complex64")
|
|
@@ -470,9 +468,9 @@ class R extends re {
|
|
|
470
468
|
name: e.variableNames[o]
|
|
471
469
|
};
|
|
472
470
|
});
|
|
473
|
-
e.shaderKey =
|
|
471
|
+
e.shaderKey = te(e, a, r);
|
|
474
472
|
const i = g().getBool("WEBGPU_ENGINE_COMPILE_ONLY");
|
|
475
|
-
return e.shaderKey in this.pipelineCache || (this.pipelineCache[e.shaderKey] =
|
|
473
|
+
return e.shaderKey in this.pipelineCache || (this.pipelineCache[e.shaderKey] = se(this.device, e, a, r, i)), e.pipeline = this.pipelineCache[e.shaderKey], i || this.recordAndSubmit(e, r, t, n), r;
|
|
476
474
|
}
|
|
477
475
|
recordAndSubmit(e, t, s, n) {
|
|
478
476
|
if (e.pipeline instanceof Promise)
|
|
@@ -484,11 +482,11 @@ class R extends re {
|
|
|
484
482
|
const h = "int32";
|
|
485
483
|
a.map((m) => {
|
|
486
484
|
r.push({ type: h, data: m });
|
|
487
|
-
const w =
|
|
485
|
+
const w = k(m);
|
|
488
486
|
r.push({ type: h, data: w });
|
|
489
487
|
});
|
|
490
488
|
} else {
|
|
491
|
-
const h =
|
|
489
|
+
const h = k(t.shape);
|
|
492
490
|
r.push({ type: i, data: h });
|
|
493
491
|
}
|
|
494
492
|
if (e.size) {
|
|
@@ -520,7 +518,7 @@ class R extends re {
|
|
|
520
518
|
querySet: this.querySet,
|
|
521
519
|
beginningOfPassWriteIndex: 0,
|
|
522
520
|
endOfPassWriteIndex: 1
|
|
523
|
-
}, this.computePassEncoder = this.commandEncoder.beginComputePass(c)) : this.computePassEncoder || (this.computePassEncoder = this.commandEncoder.beginComputePass(c)), this.computePassEncoder.setPipeline(e.pipeline), this.computePassEncoder.setBindGroup(0, o), this.computePassEncoder.dispatchWorkgroups(e.dispatch[0], e.dispatch[1], e.dispatch[2]), this.dispatchCountInPass++, (f || g().get("WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE") <= this.dispatchCountInPass || e.pixelsOpType ===
|
|
521
|
+
}, this.computePassEncoder = this.commandEncoder.beginComputePass(c)) : this.computePassEncoder || (this.computePassEncoder = this.commandEncoder.beginComputePass(c)), this.computePassEncoder.setPipeline(e.pipeline), this.computePassEncoder.setBindGroup(0, o), this.computePassEncoder.dispatchWorkgroups(e.dispatch[0], e.dispatch[1], e.dispatch[2]), this.dispatchCountInPass++, (f || g().get("WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE") <= this.dispatchCountInPass || e.pixelsOpType === re.DRAW) && (this.endComputePassEncoder(), f ? this.activeTimers.push({ name: e.constructor.name, query: this.getQueryTime() }) : this.submitQueue());
|
|
524
522
|
}
|
|
525
523
|
async getQueryTime() {
|
|
526
524
|
if (!this.supportTimestampQuery)
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { q as h, u as f, w as p, x as g, E as u, T } from "./index-D0RBWjq8.js";
|
|
2
|
+
import { r as b } from "./reshape-CkjKPPqB.js";
|
|
3
|
+
function m(e, r) {
|
|
4
|
+
let n = f(e, "broadcastTo", "x");
|
|
5
|
+
const a = n.shape;
|
|
6
|
+
if (p(r), r.length < n.rank)
|
|
7
|
+
throw new Error(`broadcastTo(): shape.length=${r.length} < input.rank=${n.rank}.`);
|
|
8
|
+
if (r.length > n.rank) {
|
|
9
|
+
const t = n.shape.slice();
|
|
10
|
+
for (; t.length < r.length; )
|
|
11
|
+
t.unshift(1);
|
|
12
|
+
n = b(n, t);
|
|
13
|
+
}
|
|
14
|
+
const s = n.shape, o = Array.from(r);
|
|
15
|
+
for (let t = r.length - 1; t >= 0; t--)
|
|
16
|
+
if (s[t] === r[t])
|
|
17
|
+
o[t] = 1;
|
|
18
|
+
else if (n.shape[t] !== 1)
|
|
19
|
+
throw new Error(`broadcastTo(): [${a}] cannot be broadcast to [${r}].`);
|
|
20
|
+
if (o.map((t, l) => t > 1 ? l : -1).filter((t) => t >= 0).length === 0)
|
|
21
|
+
return g(n);
|
|
22
|
+
const i = { x: n }, c = { reps: o };
|
|
23
|
+
return u.runKernel(T, i, c);
|
|
24
|
+
}
|
|
25
|
+
const E = /* @__PURE__ */ h({ broadcastTo_: m });
|
|
26
|
+
export {
|
|
27
|
+
E as b
|
|
28
|
+
};
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { s as i, e } from "../index-
|
|
2
|
-
import { t } from "../tensor4d-
|
|
3
|
-
import { t as a } from "../tensor2d-
|
|
1
|
+
import { s as i, e } from "../index-D0RBWjq8.js";
|
|
2
|
+
import { t } from "../tensor4d-DVwr7pLF.js";
|
|
3
|
+
import { t as a } from "../tensor2d-BN1sSfQO.js";
|
|
4
4
|
async function k(n) {
|
|
5
5
|
await i(n);
|
|
6
6
|
const s = t(
|
package/dist/checks/gelu.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { s as e, e as o } from "../index-
|
|
2
|
-
import { t as s } from "../tensor2d-
|
|
1
|
+
import { s as e, e as o } from "../index-D0RBWjq8.js";
|
|
2
|
+
import { t as s } from "../tensor2d-BN1sSfQO.js";
|
|
3
3
|
async function m(t) {
|
|
4
4
|
await e(t);
|
|
5
5
|
const r = s(
|
|
@@ -1,11 +1,7 @@
|
|
|
1
|
-
import { s as
|
|
2
|
-
import "../
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
import "../dataset-D2P7rHAw.js";
|
|
6
|
-
import { t as e } from "../tensor2d-CSB4KOb0.js";
|
|
7
|
-
async function f(t) {
|
|
8
|
-
await n(t);
|
|
1
|
+
import { s as o, e as s } from "../index-D0RBWjq8.js";
|
|
2
|
+
import { t as e } from "../tensor2d-BN1sSfQO.js";
|
|
3
|
+
async function i(t) {
|
|
4
|
+
await o(t);
|
|
9
5
|
const r = e(
|
|
10
6
|
[
|
|
11
7
|
[0.1, 0.2, 9, 10, 11],
|
|
@@ -15,7 +11,7 @@ async function f(t) {
|
|
|
15
11
|
[0.3, 0.4, -9, -10, -11]
|
|
16
12
|
],
|
|
17
13
|
[5, 5]
|
|
18
|
-
),
|
|
14
|
+
), n = e(
|
|
19
15
|
[
|
|
20
16
|
[0.5, 0.6, 7e4, -8e3, 0],
|
|
21
17
|
[0.7, 0.8, -7e4, 8e4, 0],
|
|
@@ -25,8 +21,8 @@ async function f(t) {
|
|
|
25
21
|
],
|
|
26
22
|
[5, 5]
|
|
27
23
|
);
|
|
28
|
-
return await s().runKernel("MatMulGelu", { x:
|
|
24
|
+
return await s().runKernel("MatMulGelu", { x: n, kernel: r }).array();
|
|
29
25
|
}
|
|
30
26
|
export {
|
|
31
|
-
|
|
27
|
+
i as execute
|
|
32
28
|
};
|
package/dist/checks/normRMS.js
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
|
-
import { s as
|
|
2
|
-
import { a as h } from "../ops-
|
|
3
|
-
import { t as p } from "../tensor1d-
|
|
4
|
-
import { t as
|
|
1
|
+
import { s as u, a1 as A, e as y } from "../index-D0RBWjq8.js";
|
|
2
|
+
import { a as h } from "../ops-FJapAPfm.js";
|
|
3
|
+
import { t as p } from "../tensor1d-LxP9asMm.js";
|
|
4
|
+
import { t as r } from "../tensor-BQqrDvpx.js";
|
|
5
5
|
const w = Array.from({ length: 2048 * 192 }, () => Math.random()), x = Array.from({ length: 192 }, () => Math.random()), M = Array.from({ length: 2048 * 192 }, () => Math.random());
|
|
6
6
|
async function k(t) {
|
|
7
|
-
await
|
|
8
|
-
const o = p(x, "float32"), n =
|
|
9
|
-
const
|
|
10
|
-
return h.meanSquaredError(
|
|
11
|
-
}, { value: m, grads:
|
|
7
|
+
await u(t);
|
|
8
|
+
const o = p(x, "float32"), n = r(w, [16, 128, 192], "float32"), s = r(M, [16, 128, 192], "float32"), e = (d, g) => {
|
|
9
|
+
const i = y().runKernel("RMSNorm", { x: d, gamma: g });
|
|
10
|
+
return h.meanSquaredError(i, s);
|
|
11
|
+
}, { value: m, grads: a } = A(e)([n, o]), c = await m.array(), f = await a[0].array(), l = await a[1].array();
|
|
12
12
|
return [c, f, l];
|
|
13
13
|
}
|
|
14
14
|
export {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { s as c, e as d } from "../index-
|
|
2
|
-
import { t as f } from "../tensor1d-
|
|
3
|
-
import { t as r } from "../tensor-
|
|
1
|
+
import { s as c, e as d } from "../index-D0RBWjq8.js";
|
|
2
|
+
import { t as f } from "../tensor1d-LxP9asMm.js";
|
|
3
|
+
import { t as r } from "../tensor-BQqrDvpx.js";
|
|
4
4
|
const y = Array.from({ length: 2048 * 192 }, () => Math.random()), i = Array.from({ length: 192 }, () => Math.random()), l = Array.from({ length: 2048 * 192 }, () => Math.random());
|
|
5
5
|
async function x(t) {
|
|
6
6
|
await c(t);
|