@genai-fi/nanogpt 0.10.2 → 0.10.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Generator.js +11761 -171
- package/dist/{RealDiv-zz7FpkKX.js → RealDiv-KAPDe8zB.js} +23 -25
- package/dist/Reshape-BYkmUnAv.js +14 -0
- package/dist/{Reshape-CHdUjC72.js → Reshape-Zt6eb7yh.js} +18 -20
- package/dist/TeachableLLM.js +10 -11
- package/dist/{axis_util-BsIr9ZNu.js → axis_util-BaG7mf5A.js} +3 -3
- package/dist/backend.js +2 -2
- package/dist/{backend_util-B1XRLuq9.js → backend_util-RCe-rHaj.js} +72 -73
- package/dist/{backend_webgpu-CqpfEImu.js → backend_webgpu-DE3ACOLx.js} +45 -47
- package/dist/broadcast_to-B3eYlZm7.js +28 -0
- package/dist/checks/appendCache.js +2 -2
- package/dist/checks/attentionMask.js +3 -3
- package/dist/checks/gelu.js +2 -2
- package/dist/checks/matMulGelu.js +7 -11
- package/dist/checks/normRMS.js +9 -9
- package/dist/checks/normRMSGrad.js +3 -3
- package/dist/checks/packUnpack.js +2 -2
- package/dist/checks/qkv.js +12 -13
- package/dist/checks/rope.js +2 -2
- package/dist/clip_by_value-BnO7-a88.js +12 -0
- package/dist/complex-DjxcVmoX.js +11 -0
- package/dist/concat-BV8bt5H-.js +17 -0
- package/dist/{concat_util-iBYIyuQe.js → concat_util-DpW8mL_l.js} +1 -1
- package/dist/{dataset-D2P7rHAw.js → dataset-BcwmTGYc.js} +137 -139
- package/dist/dropout-BcvN9JYi.js +92 -0
- package/dist/expand_dims-DT4tEPwA.js +11 -0
- package/dist/{exports_initializers-CZSUJoVE.js → exports_initializers-Hta_rEnm.js} +1 -1
- package/dist/floor-D5QdR_le.js +9 -0
- package/dist/gather-D3JcZUaI.js +9 -0
- package/dist/{gelu-Bmhopi0J.js → gelu-CjNPL4OH.js} +10 -11
- package/dist/{gpgpu_math-DsCcikas.js → gpgpu_math-DAOmgtXR.js} +841 -1015
- package/dist/{index-DRyE072i.js → index-BwexR4lA.js} +262 -263
- package/dist/index-DOvlwCh-.js +3520 -0
- package/dist/{kernel_funcs_utils-CWfOAPGO.js → kernel_funcs_utils-CCzYdUZg.js} +130 -132
- package/dist/layers/BaseLayer.js +15 -16
- package/dist/layers/CausalSelfAttention.js +6 -6
- package/dist/layers/MLP.js +4 -4
- package/dist/layers/PositionEmbedding.js +7 -7
- package/dist/layers/RMSNorm.js +3 -3
- package/dist/layers/RoPECache.js +9 -9
- package/dist/layers/TiedEmbedding.js +6 -6
- package/dist/layers/TransformerBlock.js +1 -1
- package/dist/loader/loadTransformers.js +1 -1
- package/dist/loader/oldZipLoad.js +13 -14
- package/dist/log_sum_exp-ngO0-4pK.js +39 -0
- package/dist/main.js +49 -50
- package/dist/{matMul16-fEAJ4smh.js → matMul16-BWRSOCWB.js} +14 -15
- package/dist/matMulGelu-CzfgT6Wq.js +163 -0
- package/dist/mat_mul-SjpJRLyL.js +11 -0
- package/dist/mod-AnXEvvpo.js +11 -0
- package/dist/models/NanoGPTV1.js +2 -2
- package/dist/models/model.js +13 -14
- package/dist/ones-D2rT0xk2.js +14 -0
- package/dist/ops/adamAdjust.js +1 -1
- package/dist/ops/adamMoments.js +1 -1
- package/dist/ops/add16.js +1 -1
- package/dist/ops/appendCache.js +3 -3
- package/dist/ops/attentionMask.js +1 -1
- package/dist/ops/concat16.js +2 -2
- package/dist/ops/cpu/adamAdjust.js +13 -14
- package/dist/ops/cpu/adamMoments.js +6 -7
- package/dist/ops/cpu/appendCache.js +7 -8
- package/dist/ops/cpu/attentionMask.js +7 -7
- package/dist/ops/cpu/fusedSoftmax.js +10 -11
- package/dist/ops/cpu/gatherSub.js +9 -10
- package/dist/ops/cpu/gelu.js +9 -10
- package/dist/ops/cpu/matMul16.js +6 -7
- package/dist/ops/cpu/matMulGelu.js +5 -6
- package/dist/ops/cpu/matMulMul.js +3 -4
- package/dist/ops/cpu/mulDropout.js +3 -4
- package/dist/ops/cpu/normRMS.js +10 -11
- package/dist/ops/cpu/qkv.js +8 -9
- package/dist/ops/cpu/rope.js +5 -6
- package/dist/ops/cpu/scatterSub.js +17 -19
- package/dist/ops/dot16.js +2 -2
- package/dist/ops/gatherSub.js +1 -1
- package/dist/ops/gelu.js +2 -2
- package/dist/ops/grads/add16.js +11 -12
- package/dist/ops/grads/attentionMask.js +5 -6
- package/dist/ops/grads/gelu.js +3 -4
- package/dist/ops/grads/matMul16.js +4 -5
- package/dist/ops/grads/matMulGelu.js +9 -10
- package/dist/ops/grads/normRMS.js +7 -8
- package/dist/ops/grads/pack16.js +4 -5
- package/dist/ops/grads/qkv.js +17 -19
- package/dist/ops/grads/rope.js +3 -5
- package/dist/ops/grads/softmax16.js +3 -4
- package/dist/ops/grads/unpack16.js +3 -4
- package/dist/ops/grads/utils.d.ts +1 -0
- package/dist/ops/grads/utils.js +8 -4
- package/dist/ops/matMul16.js +3 -3
- package/dist/ops/matMulGelu.js +2 -2
- package/dist/ops/matMulMul.js +1 -1
- package/dist/ops/mul16.js +1 -1
- package/dist/ops/mulDrop.js +1 -1
- package/dist/ops/normRMS.js +1 -1
- package/dist/ops/pack16.js +3 -4
- package/dist/ops/qkv.js +4 -8
- package/dist/ops/reshape16.js +14 -16
- package/dist/ops/rope.d.ts +1 -1
- package/dist/ops/rope.js +3 -8
- package/dist/ops/scatterSub.js +1 -1
- package/dist/ops/slice16.js +2 -2
- package/dist/ops/softmax16.js +5 -8
- package/dist/ops/sub16.js +1 -1
- package/dist/ops/sum16.js +2 -2
- package/dist/ops/transpose16.js +23 -24
- package/dist/ops/unpack16.js +2 -2
- package/dist/ops/webgl/adamAdjust.js +2 -3
- package/dist/ops/webgl/adamMoments.js +1 -2
- package/dist/ops/webgl/appendCache.js +1 -2
- package/dist/ops/webgl/attentionMask.js +4 -5
- package/dist/ops/webgl/fusedSoftmax.js +4 -6
- package/dist/ops/webgl/gatherSub.js +6 -7
- package/dist/ops/webgl/gelu.js +2 -3
- package/dist/ops/webgl/log.js +11 -12
- package/dist/ops/webgl/matMul16.js +10 -11
- package/dist/ops/webgl/matMulGelu.js +7 -111
- package/dist/ops/webgl/matMulMul.js +9 -10
- package/dist/ops/webgl/mulDropout.js +8 -9
- package/dist/ops/webgl/normRMS.js +2 -3
- package/dist/ops/webgl/qkv.js +5 -6
- package/dist/ops/webgl/rope.js +7 -8
- package/dist/ops/webgl/scatterSub.js +5 -6
- package/dist/ops/webgpu/adamAdjust.js +10 -12
- package/dist/ops/webgpu/adamMoments.js +8 -10
- package/dist/ops/webgpu/add16.js +8 -9
- package/dist/ops/webgpu/appendCache.js +23 -25
- package/dist/ops/webgpu/attentionMask.js +8 -10
- package/dist/ops/webgpu/attentionMask32_program.js +2 -2
- package/dist/ops/webgpu/concat16.js +12 -14
- package/dist/ops/webgpu/gatherSub.js +11 -13
- package/dist/ops/webgpu/gelu.js +28 -29
- package/dist/ops/webgpu/matMul16.js +26 -28
- package/dist/ops/webgpu/matMul16_program.js +4 -5
- package/dist/ops/webgpu/mul16.js +9 -10
- package/dist/ops/webgpu/normRMS.js +15 -17
- package/dist/ops/webgpu/normRMSGrad.js +21 -28
- package/dist/ops/webgpu/pack16.js +12 -13
- package/dist/ops/webgpu/pack16_program.js +2 -2
- package/dist/ops/webgpu/qkv.js +16 -18
- package/dist/ops/webgpu/rope.js +25 -27
- package/dist/ops/webgpu/scatterSub.js +7 -9
- package/dist/ops/webgpu/slice16.js +21 -23
- package/dist/ops/webgpu/softmax16.js +17 -19
- package/dist/ops/webgpu/softmax16_program.js +2 -2
- package/dist/ops/webgpu/softmax16_subgroup_program.js +2 -2
- package/dist/ops/webgpu/softmax16grad.js +7 -8
- package/dist/ops/webgpu/sub16.js +7 -8
- package/dist/ops/webgpu/sum16.js +18 -20
- package/dist/ops/webgpu/transpose16.js +19 -20
- package/dist/ops/webgpu/transpose16_program.js +2 -2
- package/dist/ops/webgpu/transpose16_shared_program.js +11 -12
- package/dist/ops/webgpu/unpack16.js +3 -4
- package/dist/ops/webgpu/utils/binary_op.js +7 -8
- package/dist/ops/webgpu/utils/reductions.js +14 -22
- package/dist/ops-B5yanEdW.js +476 -0
- package/dist/pack16-nQ6JaLo-.js +39 -0
- package/dist/patches/webgpu_backend.js +19 -20
- package/dist/patches/webgpu_base.js +1 -1
- package/dist/patches/webgpu_program.js +21 -22
- package/dist/{random_width-BVV9HveY.js → random_width-or-CEftb.js} +2506 -2761
- package/dist/range-BklejeeW.js +10 -0
- package/dist/relu-CP0ZcxWO.js +9 -0
- package/dist/reshape-ByE68wS9.js +9 -0
- package/dist/resize_nearest_neighbor-B19mCEg2.js +175 -0
- package/dist/rope-Ir4mTyD1.js +24 -0
- package/dist/{scatter_nd_util-C7zXRT_h.js → scatter_nd_util-lvSiX8q4.js} +1 -1
- package/dist/selu_util-kbhpTdYD.js +44 -0
- package/dist/{shared-CHhxz-O5.js → shared-DT1TkE6w.js} +1 -1
- package/dist/{shared-D2NP_CpY.js → shared-dntlHIDQ.js} +343 -345
- package/dist/slice-BfEGSH82.js +12 -0
- package/dist/{slice_util-DyjSAD0u.js → slice_util-uTKwiEpW.js} +1 -1
- package/dist/{softmax-C9JQEtnO.js → softmax-CA5jFsLR.js} +4 -5
- package/dist/split-CVLc0w--.js +9 -0
- package/dist/squeeze-C7Z2srUo.js +10 -0
- package/dist/stack-Cf4n9h0N.js +11 -0
- package/dist/step-CINUs5QB.js +261 -0
- package/dist/sum-DWAtNGez.js +11 -0
- package/dist/tensor-DJoc7gJU.js +8 -0
- package/dist/tensor1d-D11P_7Dp.js +11 -0
- package/dist/{tensor2d-CSB4KOb0.js → tensor2d-Bs9wZRc7.js} +6 -7
- package/dist/{tensor4d-D7bLqGqz.js → tensor4d-BARPdTaS.js} +6 -7
- package/dist/{tfjs_backend-CNkSTL0c.js → tfjs_backend-y1cvNhLA.js} +255 -264
- package/dist/tile-mbfagpsB.js +11 -0
- package/dist/training/Adam.js +2 -2
- package/dist/training/AdamExt.js +1 -1
- package/dist/training/DatasetBuilder.js +2 -2
- package/dist/training/FullTrainer.js +1 -1
- package/dist/training/Trainer.js +2 -2
- package/dist/training/sparseCrossEntropy.js +5 -5
- package/dist/transpose-ClWiBS_b.js +36 -0
- package/dist/unsorted_segment_sum-BDDhB_E6.js +277 -0
- package/dist/utilities/dummy.js +3 -3
- package/dist/utilities/multinomialCPU.js +2 -2
- package/dist/utilities/packed.d.ts +1 -4
- package/dist/utilities/packed.js +10 -745
- package/dist/utilities/performance.js +1 -1
- package/dist/utilities/profile.js +1 -1
- package/dist/utilities/safetensors.js +2 -2
- package/dist/utilities/sentences.js +5 -5
- package/dist/utilities/weights.js +2 -2
- package/dist/{variable-DzfrwYuP.js → variable-WawDEaAb.js} +1 -1
- package/dist/{webgpu_program-DzaQiqel.js → webgpu_program-DuOXPQol.js} +178 -172
- package/dist/{webgpu_util-0_ubCEHJ.js → webgpu_util-RxEF33Rj.js} +34 -35
- package/dist/zeros-KnWaWf-X.js +13 -0
- package/dist/zeros_like-DvE73F4e.js +721 -0
- package/package.json +4 -2
- package/dist/Reshape-CDVLyVfz.js +0 -16
- package/dist/broadcast_to-B0ChcDaz.js +0 -30
- package/dist/complex-BBiRlsVq.js +0 -13
- package/dist/concat-DmBLPVGC.js +0 -19
- package/dist/dropout-B1x1kYMa.js +0 -99
- package/dist/expand_dims-ouvfxQ1n.js +0 -13
- package/dist/gather-CH9sdacz.js +0 -10
- package/dist/index-D6Q1lPZO.js +0 -2157
- package/dist/log_sum_exp-D3ftBNY5.js +0 -41
- package/dist/mat_mul-C59XWcJd.js +0 -12
- package/dist/mod-DESSvHIU.js +0 -12
- package/dist/mulmat_packed_gpu-Coh6qbJk.js +0 -55
- package/dist/ones-jU9jlQvM.js +0 -15
- package/dist/ops-BFDtP6th.js +0 -645
- package/dist/pack16-CmVZs6af.js +0 -41
- package/dist/patches/PackedTensor.d.ts +0 -12
- package/dist/patches/PackedTensor.js +0 -11
- package/dist/patches/engine.d.ts +0 -261
- package/dist/patches/engine.js +0 -12
- package/dist/patches/tape.d.ts +0 -12
- package/dist/patches/tape.js +0 -5
- package/dist/range-ZZZD60Fx.js +0 -11
- package/dist/reciprocal-CrYlsAGD.js +0 -10
- package/dist/register_all_kernels-nvj2k7OC.js +0 -12307
- package/dist/relu-BYDneVPn.js +0 -10
- package/dist/reshape-CaPQzFvz.js +0 -10
- package/dist/rope-s4W2XO9B.js +0 -32
- package/dist/selu_util-BGPXmd4B.js +0 -303
- package/dist/sin-Djs4aQiu.js +0 -16
- package/dist/slice-DvovR5wq.js +0 -13
- package/dist/split-DBck65sX.js +0 -10
- package/dist/squeeze-C00Ipm_7.js +0 -11
- package/dist/stack-ChnHwRpX.js +0 -13
- package/dist/sum-ywRJj3Zr.js +0 -12
- package/dist/tensor-0r5yOo2R.js +0 -8
- package/dist/tensor-CzmOBsdf.js +0 -909
- package/dist/tensor1d-BlUT89BP.js +0 -12
- package/dist/tensor_util-DfwaWayG.js +0 -523
- package/dist/tile-CR074jmp.js +0 -13
- package/dist/transpose-DH4gmHvu.js +0 -38
- package/dist/zeros-DBFVbpv5.js +0 -14
|
@@ -1,23 +1,21 @@
|
|
|
1
|
-
import { e as D,
|
|
2
|
-
import {
|
|
3
|
-
import {
|
|
4
|
-
import {
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
l.registerFlag("WEBGPU_PRINT_SHADER", () => "");
|
|
20
|
-
l.registerFlag("WEBGPU_ENGINE_COMPILE_ONLY", () => !1);
|
|
1
|
+
import { j as g, au as $, av as K, e as D, l as _, aw as O, h as x, ax as Z, at as W, ay as F, az as j, aA as X, aB as J, i as ee, ad as k } from "./index-DOvlwCh-.js";
|
|
2
|
+
import { m as te, f as se, P as re } from "./webgpu_program-DuOXPQol.js";
|
|
3
|
+
import { i as ne, G as q } from "./webgpu_util-RxEF33Rj.js";
|
|
4
|
+
import { m as N } from "./complex_util-Yc1A_gV1.js";
|
|
5
|
+
const d = g();
|
|
6
|
+
d.registerFlag("WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE", () => 15);
|
|
7
|
+
d.registerFlag("WEBGPU_CPU_FORWARD", () => !0);
|
|
8
|
+
d.registerFlag("WEBGPU_MATMUL_PROGRAM_TYPE", () => -1);
|
|
9
|
+
d.registerFlag("WEBGPU_USE_NAIVE_CONV2D_TRANSPOSE", () => !0);
|
|
10
|
+
d.registerFlag("WEBGPU_USE_LOW_POWER_GPU", () => !1);
|
|
11
|
+
d.registerFlag("WEBGPU_CPU_HANDOFF_SIZE_THRESHOLD", () => 1e3);
|
|
12
|
+
d.registerFlag("WEBGPU_USE_PROFILE_TOOL", () => !1);
|
|
13
|
+
d.registerFlag("WEBGPU_IMPORT_EXTERNAL_TEXTURE", () => !0);
|
|
14
|
+
d.registerFlag("WEBGPU_USE_NAIVE_CONV2D_DEBUG", () => !1);
|
|
15
|
+
d.registerFlag("WEBGPU_THRESHOLD_TO_INCREASE_WORKGROUPS_FOR_MATMUL", () => -1);
|
|
16
|
+
d.registerFlag("WEBGPU_CONV_SEPARATE_IM2COL_SHADER", () => !1);
|
|
17
|
+
d.registerFlag("WEBGPU_PRINT_SHADER", () => "");
|
|
18
|
+
d.registerFlag("WEBGPU_ENGINE_COMPILE_ONLY", () => !1);
|
|
21
19
|
class ae {
|
|
22
20
|
constructor(e) {
|
|
23
21
|
e && (this.vendor = e.vendor, this.architecture = e.architecture, this.intelGPUGeneration = this.getIntelGPUGeneration());
|
|
@@ -70,8 +68,8 @@ class ie {
|
|
|
70
68
|
}), this.freeBuffers = /* @__PURE__ */ new Map(), this.usedBuffers = /* @__PURE__ */ new Map(), this.numUsedBuffers = 0, this.numFreeBuffers = 0, this.numBytesUsed = 0, this.numBytesAllocated = 0;
|
|
71
69
|
}
|
|
72
70
|
}
|
|
73
|
-
function z(
|
|
74
|
-
return `${
|
|
71
|
+
function z(l, e) {
|
|
72
|
+
return `${l}_${e}`;
|
|
75
73
|
}
|
|
76
74
|
class oe {
|
|
77
75
|
constructor(e) {
|
|
@@ -122,30 +120,30 @@ class oe {
|
|
|
122
120
|
}), this.freeTextures = /* @__PURE__ */ new Map(), this.usedTextures = /* @__PURE__ */ new Map(), this.numUsedTextures = 0, this.numFreeTextures = 0, this.numBytesUsed = 0, this.numBytesAllocated = 0;
|
|
123
121
|
}
|
|
124
122
|
}
|
|
125
|
-
function L(
|
|
126
|
-
return `${
|
|
123
|
+
function L(l, e, t, s) {
|
|
124
|
+
return `${l}_${e}_${t}_${s}`;
|
|
127
125
|
}
|
|
128
|
-
function Q(
|
|
129
|
-
if (
|
|
126
|
+
function Q(l) {
|
|
127
|
+
if (l === "rgba8unorm")
|
|
130
128
|
return 16;
|
|
131
|
-
throw new Error(`${
|
|
129
|
+
throw new Error(`${l} is not supported!`);
|
|
132
130
|
}
|
|
133
|
-
const ue = g().getNumber("WEBGPU_CPU_HANDOFF_SIZE_THRESHOLD"), fe = (
|
|
134
|
-
const t =
|
|
131
|
+
const ue = g().getNumber("WEBGPU_CPU_HANDOFF_SIZE_THRESHOLD"), fe = (l, e) => {
|
|
132
|
+
const t = l.limits.maxComputeWorkgroupsPerDimension, s = e.dispatchLayout, n = e.dispatch;
|
|
135
133
|
if (n.every((a) => a <= t))
|
|
136
134
|
return n;
|
|
137
135
|
_(n[0] > t && s.y === void 0 && s.z === void 0, () => "Dispatch size exceeds WebGPU limits in Y or Z dimension.");
|
|
138
136
|
let r = Math.ceil(Math.sqrt(n[0]));
|
|
139
137
|
return r > t ? (r = Math.ceil(Math.cbrt(n[0])), _(r <= t, () => "Total dispatch size exceeds WebGPU maximum."), [r, r, r]) : [r, r, 1];
|
|
140
138
|
};
|
|
141
|
-
class R extends
|
|
139
|
+
class R extends $ {
|
|
142
140
|
nextDataId() {
|
|
143
141
|
return R.nextDataId++;
|
|
144
142
|
}
|
|
145
143
|
constructor(e, t) {
|
|
146
|
-
if (super(), this.commandQueueOwnedIds = /* @__PURE__ */ new WeakSet(), this.dispatchCountInPass = 0, this.disposed = !1, this.downloadWaitMs = 0, this.tensorDataPendingDisposal = [], this.queryResolveBuffer = null, this.querySet = null, this.querySetCount = 2, this.stagingPendingDisposal = [], this.uniformPendingDisposal = [], this.uploadWaitMs = 0, this.hasReadSyncWarned = !1, this.hasTimestampQueryWarned = !1, !
|
|
144
|
+
if (super(), this.commandQueueOwnedIds = /* @__PURE__ */ new WeakSet(), this.dispatchCountInPass = 0, this.disposed = !1, this.downloadWaitMs = 0, this.tensorDataPendingDisposal = [], this.queryResolveBuffer = null, this.querySet = null, this.querySetCount = 2, this.stagingPendingDisposal = [], this.uniformPendingDisposal = [], this.uploadWaitMs = 0, this.hasReadSyncWarned = !1, this.hasTimestampQueryWarned = !1, !ne())
|
|
147
145
|
throw new Error("WebGPU is not supported on this device");
|
|
148
|
-
this.pipelineCache = {}, this.device = e, this.queue = e.queue, this.commandEncoder = null, this.computePassEncoder = null, this.adapterInfo = new ae(t), this.supportTimestampQuery = this.device.features.has("timestamp-query"), this.thresholdToIncreaseWorkgroups = this.adapterInfo.intelGPUGeneration >= 12 ? 16 : 8, this.bufferManager = new ie(this.device), this.textureManager = new oe(this.device), this.tensorMap = new
|
|
146
|
+
this.pipelineCache = {}, this.device = e, this.queue = e.queue, this.commandEncoder = null, this.computePassEncoder = null, this.adapterInfo = new ae(t), this.supportTimestampQuery = this.device.features.has("timestamp-query"), this.thresholdToIncreaseWorkgroups = this.adapterInfo.intelGPUGeneration >= 12 ? 16 : 8, this.bufferManager = new ie(this.device), this.textureManager = new oe(this.device), this.tensorMap = new K(this, D()), g().getBool("WEBGPU_USE_PROFILE_TOOL") && (this.dummyCanvas = document.createElement("canvas"), this.dummyCanvas.width = 1, this.dummyCanvas.height = 1, this.dummyContext = this.dummyCanvas.getContext("webgpu"), this.dummyContext.configure({
|
|
149
147
|
device: e,
|
|
150
148
|
format: "bgra8unorm"
|
|
151
149
|
}), document.body.appendChild(this.dummyCanvas));
|
|
@@ -250,7 +248,7 @@ class R extends re {
|
|
|
250
248
|
if (s != null || t.dtype === "string")
|
|
251
249
|
return s;
|
|
252
250
|
if (t.dtype === "complex64") {
|
|
253
|
-
const E = this.readSync(n.real.dataId), B = this.readSync(n.imag.dataId), y = O(
|
|
251
|
+
const E = this.readSync(n.real.dataId), B = this.readSync(n.imag.dataId), y = O(N(E, B).buffer, "float32");
|
|
254
252
|
return this.convertAndCacheOnCPU(e, y), y;
|
|
255
253
|
}
|
|
256
254
|
this.hasReadSyncWarned || (this.hasReadSyncWarned = !0, console.warn("The performance of synchronously reading data from GPU to CPU is poor on the webgpu backend, please use asynchronous APIs instead."));
|
|
@@ -311,7 +309,7 @@ class R extends re {
|
|
|
311
309
|
this.read(t.complexTensorInfos.real.dataId),
|
|
312
310
|
this.read(t.complexTensorInfos.imag.dataId)
|
|
313
311
|
]), a = r[0], i = r[1];
|
|
314
|
-
n =
|
|
312
|
+
n = N(a, i);
|
|
315
313
|
} else {
|
|
316
314
|
const r = await this.getBufferData(t.resource);
|
|
317
315
|
n = O(r, t.dtype);
|
|
@@ -339,7 +337,7 @@ class R extends re {
|
|
|
339
337
|
refCount: 1,
|
|
340
338
|
external: e.zeroCopy
|
|
341
339
|
});
|
|
342
|
-
const a = this.tensorMap.get(r), i =
|
|
340
|
+
const a = this.tensorMap.get(r), i = q(a.dtype) * x(a.shape);
|
|
343
341
|
if (e.buffer.size < i)
|
|
344
342
|
throw new Error(`GPUBuffer size(${e.buffer.size}) is smaller than tensor size(${i})!`);
|
|
345
343
|
if ((e.buffer.usage & (GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC)) !== (GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC))
|
|
@@ -365,7 +363,7 @@ class R extends re {
|
|
|
365
363
|
const t = this.readSync(e.dataId);
|
|
366
364
|
if (e.dtype === "string")
|
|
367
365
|
try {
|
|
368
|
-
const s = t.map((n) =>
|
|
366
|
+
const s = t.map((n) => Z(n));
|
|
369
367
|
return W(e.shape, e.dtype, s);
|
|
370
368
|
} catch {
|
|
371
369
|
throw new Error("Failed to decode encoded string bytes into utf-8");
|
|
@@ -385,10 +383,10 @@ class R extends re {
|
|
|
385
383
|
kernelMs: null,
|
|
386
384
|
wallMs: null
|
|
387
385
|
}, u = await Promise.all(r);
|
|
388
|
-
return i.kernelMs =
|
|
386
|
+
return i.kernelMs = j(u), i.getExtraProfileInfo = () => u.map((o, f) => ({ name: a[f], ms: o })).map((o) => `${o.name}: ${o.ms}`).join(", "), this.uploadWaitMs = 0, this.downloadWaitMs = 0, i;
|
|
389
387
|
}
|
|
390
388
|
makeTensorInfo(e, t, s) {
|
|
391
|
-
return t === "string" && s != null && s.length > 0 &&
|
|
389
|
+
return t === "string" && s != null && s.length > 0 && X(s[0]) && (s = s.map((r) => J(r))), { dataId: this.write(s, e, t), shape: e, dtype: t };
|
|
392
390
|
}
|
|
393
391
|
tensorToBinding(e) {
|
|
394
392
|
if (!e)
|
|
@@ -400,16 +398,16 @@ class R extends re {
|
|
|
400
398
|
const t = this.tensorMap.get(e);
|
|
401
399
|
if (t.resource != null)
|
|
402
400
|
return;
|
|
403
|
-
const s =
|
|
401
|
+
const s = q(t.dtype) * x(t.shape);
|
|
404
402
|
let n;
|
|
405
403
|
const r = GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST;
|
|
406
404
|
if (t.values) {
|
|
407
405
|
if (n = this.bufferManager.acquireBuffer(s, r, !0), n.mapState === "unmapped") {
|
|
408
406
|
const a = this.bufferManager.acquireBuffer(s, GPUBufferUsage.MAP_WRITE | GPUBufferUsage.COPY_SRC, !0, !1), i = a.getMappedRange();
|
|
409
|
-
t.dtype === "int32" || t.dtype === "bool" ? new Int32Array(i).set(t.values) : new Float32Array(i).set(t.values), a.unmap(), this.ensureCommandEncoderReady(), this.endComputePassEncoder(), this.commandEncoder.copyBufferToBuffer(a, 0, n, 0, s), this.stagingPendingDisposal.push(a);
|
|
407
|
+
t.dtype === "int32" || t.dtype === "packedF16" || t.dtype === "bool" ? new Int32Array(i).set(t.values) : new Float32Array(i).set(t.values), a.unmap(), this.ensureCommandEncoderReady(), this.endComputePassEncoder(), this.commandEncoder.copyBufferToBuffer(a, 0, n, 0, s), this.stagingPendingDisposal.push(a);
|
|
410
408
|
} else {
|
|
411
409
|
const a = n.getMappedRange();
|
|
412
|
-
t.dtype === "int32" || t.dtype === "bool" ? new Int32Array(a).set(t.values) : new Float32Array(a).set(t.values), n.unmap();
|
|
410
|
+
t.dtype === "int32" || t.dtype === "packedF16" || t.dtype === "bool" ? new Int32Array(a).set(t.values) : new Float32Array(a).set(t.values), n.unmap();
|
|
413
411
|
}
|
|
414
412
|
t.values = null;
|
|
415
413
|
} else
|
|
@@ -457,7 +455,7 @@ class R extends re {
|
|
|
457
455
|
}
|
|
458
456
|
runWebGPUProgram(e, t, s, n, r) {
|
|
459
457
|
if (r || (r = this.makeTensorInfo(e.outputShape, s)), x(r.shape) === 0)
|
|
460
|
-
return this.tensorMap.get(r.dataId).values =
|
|
458
|
+
return this.tensorMap.get(r.dataId).values = ee(r.dtype, 0), r;
|
|
461
459
|
this.uploadToGPU(r.dataId), e.dispatch = fe(this.device, e);
|
|
462
460
|
const a = t.map((u, o) => {
|
|
463
461
|
if (u.dtype === "complex64")
|
|
@@ -470,9 +468,9 @@ class R extends re {
|
|
|
470
468
|
name: e.variableNames[o]
|
|
471
469
|
};
|
|
472
470
|
});
|
|
473
|
-
e.shaderKey =
|
|
471
|
+
e.shaderKey = te(e, a, r);
|
|
474
472
|
const i = g().getBool("WEBGPU_ENGINE_COMPILE_ONLY");
|
|
475
|
-
return e.shaderKey in this.pipelineCache || (this.pipelineCache[e.shaderKey] =
|
|
473
|
+
return e.shaderKey in this.pipelineCache || (this.pipelineCache[e.shaderKey] = se(this.device, e, a, r, i)), e.pipeline = this.pipelineCache[e.shaderKey], i || this.recordAndSubmit(e, r, t, n), r;
|
|
476
474
|
}
|
|
477
475
|
recordAndSubmit(e, t, s, n) {
|
|
478
476
|
if (e.pipeline instanceof Promise)
|
|
@@ -484,11 +482,11 @@ class R extends re {
|
|
|
484
482
|
const h = "int32";
|
|
485
483
|
a.map((m) => {
|
|
486
484
|
r.push({ type: h, data: m });
|
|
487
|
-
const w =
|
|
485
|
+
const w = k(m);
|
|
488
486
|
r.push({ type: h, data: w });
|
|
489
487
|
});
|
|
490
488
|
} else {
|
|
491
|
-
const h =
|
|
489
|
+
const h = k(t.shape);
|
|
492
490
|
r.push({ type: i, data: h });
|
|
493
491
|
}
|
|
494
492
|
if (e.size) {
|
|
@@ -520,7 +518,7 @@ class R extends re {
|
|
|
520
518
|
querySet: this.querySet,
|
|
521
519
|
beginningOfPassWriteIndex: 0,
|
|
522
520
|
endOfPassWriteIndex: 1
|
|
523
|
-
}, this.computePassEncoder = this.commandEncoder.beginComputePass(c)) : this.computePassEncoder || (this.computePassEncoder = this.commandEncoder.beginComputePass(c)), this.computePassEncoder.setPipeline(e.pipeline), this.computePassEncoder.setBindGroup(0, o), this.computePassEncoder.dispatchWorkgroups(e.dispatch[0], e.dispatch[1], e.dispatch[2]), this.dispatchCountInPass++, (f || g().get("WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE") <= this.dispatchCountInPass || e.pixelsOpType ===
|
|
521
|
+
}, this.computePassEncoder = this.commandEncoder.beginComputePass(c)) : this.computePassEncoder || (this.computePassEncoder = this.commandEncoder.beginComputePass(c)), this.computePassEncoder.setPipeline(e.pipeline), this.computePassEncoder.setBindGroup(0, o), this.computePassEncoder.dispatchWorkgroups(e.dispatch[0], e.dispatch[1], e.dispatch[2]), this.dispatchCountInPass++, (f || g().get("WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE") <= this.dispatchCountInPass || e.pixelsOpType === re.DRAW) && (this.endComputePassEncoder(), f ? this.activeTimers.push({ name: e.constructor.name, query: this.getQueryTime() }) : this.submitQueue());
|
|
524
522
|
}
|
|
525
523
|
async getQueryTime() {
|
|
526
524
|
if (!this.supportTimestampQuery)
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { A as h, B as f, C as p, D as g, E as u, T } from "./index-DOvlwCh-.js";
|
|
2
|
+
import { r as b } from "./reshape-ByE68wS9.js";
|
|
3
|
+
function m(e, r) {
|
|
4
|
+
let n = f(e, "broadcastTo", "x");
|
|
5
|
+
const a = n.shape;
|
|
6
|
+
if (p(r), r.length < n.rank)
|
|
7
|
+
throw new Error(`broadcastTo(): shape.length=${r.length} < input.rank=${n.rank}.`);
|
|
8
|
+
if (r.length > n.rank) {
|
|
9
|
+
const t = n.shape.slice();
|
|
10
|
+
for (; t.length < r.length; )
|
|
11
|
+
t.unshift(1);
|
|
12
|
+
n = b(n, t);
|
|
13
|
+
}
|
|
14
|
+
const s = n.shape, o = Array.from(r);
|
|
15
|
+
for (let t = r.length - 1; t >= 0; t--)
|
|
16
|
+
if (s[t] === r[t])
|
|
17
|
+
o[t] = 1;
|
|
18
|
+
else if (n.shape[t] !== 1)
|
|
19
|
+
throw new Error(`broadcastTo(): [${a}] cannot be broadcast to [${r}].`);
|
|
20
|
+
if (o.map((t, l) => t > 1 ? l : -1).filter((t) => t >= 0).length === 0)
|
|
21
|
+
return g(n);
|
|
22
|
+
const i = { x: n }, c = { reps: o };
|
|
23
|
+
return u.runKernel(T, i, c);
|
|
24
|
+
}
|
|
25
|
+
const E = /* @__PURE__ */ h({ broadcastTo_: m });
|
|
26
|
+
export {
|
|
27
|
+
E as b
|
|
28
|
+
};
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { s as i, e } from "../index-
|
|
2
|
-
import { t } from "../tensor4d-
|
|
3
|
-
import { t as a } from "../tensor2d-
|
|
1
|
+
import { s as i, e } from "../index-DOvlwCh-.js";
|
|
2
|
+
import { t } from "../tensor4d-BARPdTaS.js";
|
|
3
|
+
import { t as a } from "../tensor2d-Bs9wZRc7.js";
|
|
4
4
|
async function k(n) {
|
|
5
5
|
await i(n);
|
|
6
6
|
const s = t(
|
package/dist/checks/gelu.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { s as e, e as o } from "../index-
|
|
2
|
-
import { t as s } from "../tensor2d-
|
|
1
|
+
import { s as e, e as o } from "../index-DOvlwCh-.js";
|
|
2
|
+
import { t as s } from "../tensor2d-Bs9wZRc7.js";
|
|
3
3
|
async function m(t) {
|
|
4
4
|
await e(t);
|
|
5
5
|
const r = s(
|
|
@@ -1,11 +1,7 @@
|
|
|
1
|
-
import { s as
|
|
2
|
-
import "../
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
import "../dataset-D2P7rHAw.js";
|
|
6
|
-
import { t as e } from "../tensor2d-CSB4KOb0.js";
|
|
7
|
-
async function f(t) {
|
|
8
|
-
await n(t);
|
|
1
|
+
import { s as o, e as s } from "../index-DOvlwCh-.js";
|
|
2
|
+
import { t as e } from "../tensor2d-Bs9wZRc7.js";
|
|
3
|
+
async function i(t) {
|
|
4
|
+
await o(t);
|
|
9
5
|
const r = e(
|
|
10
6
|
[
|
|
11
7
|
[0.1, 0.2, 9, 10, 11],
|
|
@@ -15,7 +11,7 @@ async function f(t) {
|
|
|
15
11
|
[0.3, 0.4, -9, -10, -11]
|
|
16
12
|
],
|
|
17
13
|
[5, 5]
|
|
18
|
-
),
|
|
14
|
+
), n = e(
|
|
19
15
|
[
|
|
20
16
|
[0.5, 0.6, 7e4, -8e3, 0],
|
|
21
17
|
[0.7, 0.8, -7e4, 8e4, 0],
|
|
@@ -25,8 +21,8 @@ async function f(t) {
|
|
|
25
21
|
],
|
|
26
22
|
[5, 5]
|
|
27
23
|
);
|
|
28
|
-
return await s().runKernel("MatMulGelu", { x:
|
|
24
|
+
return await s().runKernel("MatMulGelu", { x: n, kernel: r }).array();
|
|
29
25
|
}
|
|
30
26
|
export {
|
|
31
|
-
|
|
27
|
+
i as execute
|
|
32
28
|
};
|
package/dist/checks/normRMS.js
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
|
-
import { s as
|
|
2
|
-
import { a as h } from "../ops-
|
|
3
|
-
import { t as p } from "../tensor1d-
|
|
4
|
-
import { t as
|
|
1
|
+
import { s as u, a5 as A, e as y } from "../index-DOvlwCh-.js";
|
|
2
|
+
import { a as h } from "../ops-B5yanEdW.js";
|
|
3
|
+
import { t as p } from "../tensor1d-D11P_7Dp.js";
|
|
4
|
+
import { t as r } from "../tensor-DJoc7gJU.js";
|
|
5
5
|
const w = Array.from({ length: 2048 * 192 }, () => Math.random()), x = Array.from({ length: 192 }, () => Math.random()), M = Array.from({ length: 2048 * 192 }, () => Math.random());
|
|
6
6
|
async function k(t) {
|
|
7
|
-
await
|
|
8
|
-
const o = p(x, "float32"), n =
|
|
9
|
-
const
|
|
10
|
-
return h.meanSquaredError(
|
|
11
|
-
}, { value: m, grads:
|
|
7
|
+
await u(t);
|
|
8
|
+
const o = p(x, "float32"), n = r(w, [16, 128, 192], "float32"), s = r(M, [16, 128, 192], "float32"), e = (d, g) => {
|
|
9
|
+
const i = y().runKernel("RMSNorm", { x: d, gamma: g });
|
|
10
|
+
return h.meanSquaredError(i, s);
|
|
11
|
+
}, { value: m, grads: a } = A(e)([n, o]), c = await m.array(), f = await a[0].array(), l = await a[1].array();
|
|
12
12
|
return [c, f, l];
|
|
13
13
|
}
|
|
14
14
|
export {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { s as c, e as d } from "../index-
|
|
2
|
-
import { t as f } from "../tensor1d-
|
|
3
|
-
import { t as r } from "../tensor-
|
|
1
|
+
import { s as c, e as d } from "../index-DOvlwCh-.js";
|
|
2
|
+
import { t as f } from "../tensor1d-D11P_7Dp.js";
|
|
3
|
+
import { t as r } from "../tensor-DJoc7gJU.js";
|
|
4
4
|
const y = Array.from({ length: 2048 * 192 }, () => Math.random()), i = Array.from({ length: 192 }, () => Math.random()), l = Array.from({ length: 2048 * 192 }, () => Math.random());
|
|
5
5
|
async function x(t) {
|
|
6
6
|
await c(t);
|
package/dist/checks/qkv.js
CHANGED
|
@@ -1,19 +1,18 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
if (m(t), e != null && e.length !== 3)
|
|
1
|
+
import { $ as i, a0 as u, a1 as c, s as l, e as h } from "../index-DOvlwCh-.js";
|
|
2
|
+
import { t as f } from "../tensor2d-Bs9wZRc7.js";
|
|
3
|
+
function m(t, e, n) {
|
|
4
|
+
if (i(t), e != null && e.length !== 3)
|
|
6
5
|
throw new Error("tensor3d() requires shape to have three numbers");
|
|
7
|
-
const r =
|
|
6
|
+
const r = u(t, n);
|
|
8
7
|
if (r.length !== 3 && r.length !== 1)
|
|
9
8
|
throw new Error("tensor3d() requires values to be number[][][] or flat/TypedArray");
|
|
10
9
|
if (r.length === 1 && e == null)
|
|
11
10
|
throw new Error("tensor3d() requires shape to be provided when `values` are a flat array");
|
|
12
|
-
return
|
|
11
|
+
return c(t, e, r, n);
|
|
13
12
|
}
|
|
14
|
-
async function
|
|
15
|
-
await
|
|
16
|
-
const e =
|
|
13
|
+
async function y(t) {
|
|
14
|
+
await l(t);
|
|
15
|
+
const e = m(
|
|
17
16
|
[
|
|
18
17
|
[
|
|
19
18
|
[0.1, 0.2],
|
|
@@ -27,9 +26,9 @@ async function p(t) {
|
|
|
27
26
|
[0.7, 0.8, 1.1, 1.2, 1.5, 1.6]
|
|
28
27
|
],
|
|
29
28
|
[2, 6]
|
|
30
|
-
), r =
|
|
31
|
-
return [
|
|
29
|
+
), r = h().runKernel("QKV", { x: e, kernel: n }, { heads: 1 }), a = await r[0].array(), o = await r[1].array(), s = await r[2].array();
|
|
30
|
+
return [a, o, s];
|
|
32
31
|
}
|
|
33
32
|
export {
|
|
34
|
-
|
|
33
|
+
y as execute
|
|
35
34
|
};
|
package/dist/checks/rope.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import t from "../layers/RoPECache.js";
|
|
2
|
-
import { s as c, e as i } from "../index-
|
|
3
|
-
import { t as p } from "../tensor4d-
|
|
2
|
+
import { s as c, e as i } from "../index-DOvlwCh-.js";
|
|
3
|
+
import { t as p } from "../tensor4d-BARPdTaS.js";
|
|
4
4
|
async function y(a) {
|
|
5
5
|
await c(a);
|
|
6
6
|
const o = p(
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { A as a, B as m, l as y, q as B, E as c, F as f } from "./index-DOvlwCh-.js";
|
|
2
|
+
function p(o, s, t) {
|
|
3
|
+
const r = m(o, "x", "clipByValue");
|
|
4
|
+
if (y(s <= t, () => `Error in clip: min (${s}) must be less than or equal to max (${t}).`), s === t)
|
|
5
|
+
return B(r.shape, s, r.dtype);
|
|
6
|
+
const n = { x: r }, e = { clipValueMin: s, clipValueMax: t };
|
|
7
|
+
return c.runKernel(f, n, e);
|
|
8
|
+
}
|
|
9
|
+
const E = /* @__PURE__ */ a({ clipByValue_: p });
|
|
10
|
+
export {
|
|
11
|
+
E as c
|
|
12
|
+
};
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { A as t, B as s, ab as n, E as m, ac as r } from "./index-DOvlwCh-.js";
|
|
2
|
+
function l(o, c) {
|
|
3
|
+
const a = s(o, "real", "complex"), e = s(c, "imag", "complex");
|
|
4
|
+
n(a.shape, e.shape, `real and imag shapes, ${a.shape} and ${e.shape}, must match in call to tf.complex().`);
|
|
5
|
+
const p = { real: a, imag: e };
|
|
6
|
+
return m.runKernel(r, p);
|
|
7
|
+
}
|
|
8
|
+
const i = /* @__PURE__ */ t({ complex_: l });
|
|
9
|
+
export {
|
|
10
|
+
i as c
|
|
11
|
+
};
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { A as s, l as a, G as p, D as i, E as l, H as f } from "./index-DOvlwCh-.js";
|
|
2
|
+
function h(n, e = 0) {
|
|
3
|
+
a(n.length >= 1, () => "Pass at least one tensor to concat");
|
|
4
|
+
const t = p(n, "tensors", "concat", "string_or_numeric");
|
|
5
|
+
if (t[0].dtype === "complex64" && t.forEach((o) => {
|
|
6
|
+
if (o.dtype !== "complex64")
|
|
7
|
+
throw new Error(`Cannot concatenate complex64 tensors with a tensor
|
|
8
|
+
with dtype ${o.dtype}. `);
|
|
9
|
+
}), t.length === 1)
|
|
10
|
+
return i(t[0]);
|
|
11
|
+
const r = t, c = { axis: e };
|
|
12
|
+
return l.runKernel(f, r, c);
|
|
13
|
+
}
|
|
14
|
+
const u = /* @__PURE__ */ s({ concat_: h });
|
|
15
|
+
export {
|
|
16
|
+
u as c
|
|
17
|
+
};
|