npm - @genai-fi/nanogpt - Versions diffs - 0.10.2 → 0.10.3 - Mend

@genai-fi/nanogpt 0.10.2 → 0.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (249) hide show

package/dist/Generator.js +11761 -171
package/dist/{RealDiv-zz7FpkKX.js → RealDiv-KAPDe8zB.js} +23 -25
package/dist/Reshape-BYkmUnAv.js +14 -0
package/dist/{Reshape-CHdUjC72.js → Reshape-Zt6eb7yh.js} +18 -20
package/dist/TeachableLLM.js +10 -11
package/dist/{axis_util-BsIr9ZNu.js → axis_util-BaG7mf5A.js} +3 -3
package/dist/backend.js +2 -2
package/dist/{backend_util-B1XRLuq9.js → backend_util-RCe-rHaj.js} +72 -73
package/dist/{backend_webgpu-CqpfEImu.js → backend_webgpu-DE3ACOLx.js} +45 -47
package/dist/broadcast_to-B3eYlZm7.js +28 -0
package/dist/checks/appendCache.js +2 -2
package/dist/checks/attentionMask.js +3 -3
package/dist/checks/gelu.js +2 -2
package/dist/checks/matMulGelu.js +7 -11
package/dist/checks/normRMS.js +9 -9
package/dist/checks/normRMSGrad.js +3 -3
package/dist/checks/packUnpack.js +2 -2
package/dist/checks/qkv.js +12 -13
package/dist/checks/rope.js +2 -2
package/dist/clip_by_value-BnO7-a88.js +12 -0
package/dist/complex-DjxcVmoX.js +11 -0
package/dist/concat-BV8bt5H-.js +17 -0
package/dist/{concat_util-iBYIyuQe.js → concat_util-DpW8mL_l.js} +1 -1
package/dist/{dataset-D2P7rHAw.js → dataset-BcwmTGYc.js} +137 -139
package/dist/dropout-BcvN9JYi.js +92 -0
package/dist/expand_dims-DT4tEPwA.js +11 -0
package/dist/{exports_initializers-CZSUJoVE.js → exports_initializers-Hta_rEnm.js} +1 -1
package/dist/floor-D5QdR_le.js +9 -0
package/dist/gather-D3JcZUaI.js +9 -0
package/dist/{gelu-Bmhopi0J.js → gelu-CjNPL4OH.js} +10 -11
package/dist/{gpgpu_math-DsCcikas.js → gpgpu_math-DAOmgtXR.js} +841 -1015
package/dist/{index-DRyE072i.js → index-BwexR4lA.js} +262 -263
package/dist/index-DOvlwCh-.js +3520 -0
package/dist/{kernel_funcs_utils-CWfOAPGO.js → kernel_funcs_utils-CCzYdUZg.js} +130 -132
package/dist/layers/BaseLayer.js +15 -16
package/dist/layers/CausalSelfAttention.js +6 -6
package/dist/layers/MLP.js +4 -4
package/dist/layers/PositionEmbedding.js +7 -7
package/dist/layers/RMSNorm.js +3 -3
package/dist/layers/RoPECache.js +9 -9
package/dist/layers/TiedEmbedding.js +6 -6
package/dist/layers/TransformerBlock.js +1 -1
package/dist/loader/loadTransformers.js +1 -1
package/dist/loader/oldZipLoad.js +13 -14
package/dist/log_sum_exp-ngO0-4pK.js +39 -0
package/dist/main.js +49 -50
package/dist/{matMul16-fEAJ4smh.js → matMul16-BWRSOCWB.js} +14 -15
package/dist/matMulGelu-CzfgT6Wq.js +163 -0
package/dist/mat_mul-SjpJRLyL.js +11 -0
package/dist/mod-AnXEvvpo.js +11 -0
package/dist/models/NanoGPTV1.js +2 -2
package/dist/models/model.js +13 -14
package/dist/ones-D2rT0xk2.js +14 -0
package/dist/ops/adamAdjust.js +1 -1
package/dist/ops/adamMoments.js +1 -1
package/dist/ops/add16.js +1 -1
package/dist/ops/appendCache.js +3 -3
package/dist/ops/attentionMask.js +1 -1
package/dist/ops/concat16.js +2 -2
package/dist/ops/cpu/adamAdjust.js +13 -14
package/dist/ops/cpu/adamMoments.js +6 -7
package/dist/ops/cpu/appendCache.js +7 -8
package/dist/ops/cpu/attentionMask.js +7 -7
package/dist/ops/cpu/fusedSoftmax.js +10 -11
package/dist/ops/cpu/gatherSub.js +9 -10
package/dist/ops/cpu/gelu.js +9 -10
package/dist/ops/cpu/matMul16.js +6 -7
package/dist/ops/cpu/matMulGelu.js +5 -6
package/dist/ops/cpu/matMulMul.js +3 -4
package/dist/ops/cpu/mulDropout.js +3 -4
package/dist/ops/cpu/normRMS.js +10 -11
package/dist/ops/cpu/qkv.js +8 -9
package/dist/ops/cpu/rope.js +5 -6
package/dist/ops/cpu/scatterSub.js +17 -19
package/dist/ops/dot16.js +2 -2
package/dist/ops/gatherSub.js +1 -1
package/dist/ops/gelu.js +2 -2
package/dist/ops/grads/add16.js +11 -12
package/dist/ops/grads/attentionMask.js +5 -6
package/dist/ops/grads/gelu.js +3 -4
package/dist/ops/grads/matMul16.js +4 -5
package/dist/ops/grads/matMulGelu.js +9 -10
package/dist/ops/grads/normRMS.js +7 -8
package/dist/ops/grads/pack16.js +4 -5
package/dist/ops/grads/qkv.js +17 -19
package/dist/ops/grads/rope.js +3 -5
package/dist/ops/grads/softmax16.js +3 -4
package/dist/ops/grads/unpack16.js +3 -4
package/dist/ops/grads/utils.d.ts +1 -0
package/dist/ops/grads/utils.js +8 -4
package/dist/ops/matMul16.js +3 -3
package/dist/ops/matMulGelu.js +2 -2
package/dist/ops/matMulMul.js +1 -1
package/dist/ops/mul16.js +1 -1
package/dist/ops/mulDrop.js +1 -1
package/dist/ops/normRMS.js +1 -1
package/dist/ops/pack16.js +3 -4
package/dist/ops/qkv.js +4 -8
package/dist/ops/reshape16.js +14 -16
package/dist/ops/rope.d.ts +1 -1
package/dist/ops/rope.js +3 -8
package/dist/ops/scatterSub.js +1 -1
package/dist/ops/slice16.js +2 -2
package/dist/ops/softmax16.js +5 -8
package/dist/ops/sub16.js +1 -1
package/dist/ops/sum16.js +2 -2
package/dist/ops/transpose16.js +23 -24
package/dist/ops/unpack16.js +2 -2
package/dist/ops/webgl/adamAdjust.js +2 -3
package/dist/ops/webgl/adamMoments.js +1 -2
package/dist/ops/webgl/appendCache.js +1 -2
package/dist/ops/webgl/attentionMask.js +4 -5
package/dist/ops/webgl/fusedSoftmax.js +4 -6
package/dist/ops/webgl/gatherSub.js +6 -7
package/dist/ops/webgl/gelu.js +2 -3
package/dist/ops/webgl/log.js +11 -12
package/dist/ops/webgl/matMul16.js +10 -11
package/dist/ops/webgl/matMulGelu.js +7 -111
package/dist/ops/webgl/matMulMul.js +9 -10
package/dist/ops/webgl/mulDropout.js +8 -9
package/dist/ops/webgl/normRMS.js +2 -3
package/dist/ops/webgl/qkv.js +5 -6
package/dist/ops/webgl/rope.js +7 -8
package/dist/ops/webgl/scatterSub.js +5 -6
package/dist/ops/webgpu/adamAdjust.js +10 -12
package/dist/ops/webgpu/adamMoments.js +8 -10
package/dist/ops/webgpu/add16.js +8 -9
package/dist/ops/webgpu/appendCache.js +23 -25
package/dist/ops/webgpu/attentionMask.js +8 -10
package/dist/ops/webgpu/attentionMask32_program.js +2 -2
package/dist/ops/webgpu/concat16.js +12 -14
package/dist/ops/webgpu/gatherSub.js +11 -13
package/dist/ops/webgpu/gelu.js +28 -29
package/dist/ops/webgpu/matMul16.js +26 -28
package/dist/ops/webgpu/matMul16_program.js +4 -5
package/dist/ops/webgpu/mul16.js +9 -10
package/dist/ops/webgpu/normRMS.js +15 -17
package/dist/ops/webgpu/normRMSGrad.js +21 -28
package/dist/ops/webgpu/pack16.js +12 -13
package/dist/ops/webgpu/pack16_program.js +2 -2
package/dist/ops/webgpu/qkv.js +16 -18
package/dist/ops/webgpu/rope.js +25 -27
package/dist/ops/webgpu/scatterSub.js +7 -9
package/dist/ops/webgpu/slice16.js +21 -23
package/dist/ops/webgpu/softmax16.js +17 -19
package/dist/ops/webgpu/softmax16_program.js +2 -2
package/dist/ops/webgpu/softmax16_subgroup_program.js +2 -2
package/dist/ops/webgpu/softmax16grad.js +7 -8
package/dist/ops/webgpu/sub16.js +7 -8
package/dist/ops/webgpu/sum16.js +18 -20
package/dist/ops/webgpu/transpose16.js +19 -20
package/dist/ops/webgpu/transpose16_program.js +2 -2
package/dist/ops/webgpu/transpose16_shared_program.js +11 -12
package/dist/ops/webgpu/unpack16.js +3 -4
package/dist/ops/webgpu/utils/binary_op.js +7 -8
package/dist/ops/webgpu/utils/reductions.js +14 -22
package/dist/ops-B5yanEdW.js +476 -0
package/dist/pack16-nQ6JaLo-.js +39 -0
package/dist/patches/webgpu_backend.js +19 -20
package/dist/patches/webgpu_base.js +1 -1
package/dist/patches/webgpu_program.js +21 -22
package/dist/{random_width-BVV9HveY.js → random_width-or-CEftb.js} +2506 -2761
package/dist/range-BklejeeW.js +10 -0
package/dist/relu-CP0ZcxWO.js +9 -0
package/dist/reshape-ByE68wS9.js +9 -0
package/dist/resize_nearest_neighbor-B19mCEg2.js +175 -0
package/dist/rope-Ir4mTyD1.js +24 -0
package/dist/{scatter_nd_util-C7zXRT_h.js → scatter_nd_util-lvSiX8q4.js} +1 -1
package/dist/selu_util-kbhpTdYD.js +44 -0
package/dist/{shared-CHhxz-O5.js → shared-DT1TkE6w.js} +1 -1
package/dist/{shared-D2NP_CpY.js → shared-dntlHIDQ.js} +343 -345
package/dist/slice-BfEGSH82.js +12 -0
package/dist/{slice_util-DyjSAD0u.js → slice_util-uTKwiEpW.js} +1 -1
package/dist/{softmax-C9JQEtnO.js → softmax-CA5jFsLR.js} +4 -5
package/dist/split-CVLc0w--.js +9 -0
package/dist/squeeze-C7Z2srUo.js +10 -0
package/dist/stack-Cf4n9h0N.js +11 -0
package/dist/step-CINUs5QB.js +261 -0
package/dist/sum-DWAtNGez.js +11 -0
package/dist/tensor-DJoc7gJU.js +8 -0
package/dist/tensor1d-D11P_7Dp.js +11 -0
package/dist/{tensor2d-CSB4KOb0.js → tensor2d-Bs9wZRc7.js} +6 -7
package/dist/{tensor4d-D7bLqGqz.js → tensor4d-BARPdTaS.js} +6 -7
package/dist/{tfjs_backend-CNkSTL0c.js → tfjs_backend-y1cvNhLA.js} +255 -264
package/dist/tile-mbfagpsB.js +11 -0
package/dist/training/Adam.js +2 -2
package/dist/training/AdamExt.js +1 -1
package/dist/training/DatasetBuilder.js +2 -2
package/dist/training/FullTrainer.js +1 -1
package/dist/training/Trainer.js +2 -2
package/dist/training/sparseCrossEntropy.js +5 -5
package/dist/transpose-ClWiBS_b.js +36 -0
package/dist/unsorted_segment_sum-BDDhB_E6.js +277 -0
package/dist/utilities/dummy.js +3 -3
package/dist/utilities/multinomialCPU.js +2 -2
package/dist/utilities/packed.d.ts +1 -4
package/dist/utilities/packed.js +10 -745
package/dist/utilities/performance.js +1 -1
package/dist/utilities/profile.js +1 -1
package/dist/utilities/safetensors.js +2 -2
package/dist/utilities/sentences.js +5 -5
package/dist/utilities/weights.js +2 -2
package/dist/{variable-DzfrwYuP.js → variable-WawDEaAb.js} +1 -1
package/dist/{webgpu_program-DzaQiqel.js → webgpu_program-DuOXPQol.js} +178 -172
package/dist/{webgpu_util-0_ubCEHJ.js → webgpu_util-RxEF33Rj.js} +34 -35
package/dist/zeros-KnWaWf-X.js +13 -0
package/dist/zeros_like-DvE73F4e.js +721 -0
package/package.json +4 -2
package/dist/Reshape-CDVLyVfz.js +0 -16
package/dist/broadcast_to-B0ChcDaz.js +0 -30
package/dist/complex-BBiRlsVq.js +0 -13
package/dist/concat-DmBLPVGC.js +0 -19
package/dist/dropout-B1x1kYMa.js +0 -99
package/dist/expand_dims-ouvfxQ1n.js +0 -13
package/dist/gather-CH9sdacz.js +0 -10
package/dist/index-D6Q1lPZO.js +0 -2157
package/dist/log_sum_exp-D3ftBNY5.js +0 -41
package/dist/mat_mul-C59XWcJd.js +0 -12
package/dist/mod-DESSvHIU.js +0 -12
package/dist/mulmat_packed_gpu-Coh6qbJk.js +0 -55
package/dist/ones-jU9jlQvM.js +0 -15
package/dist/ops-BFDtP6th.js +0 -645
package/dist/pack16-CmVZs6af.js +0 -41
package/dist/patches/PackedTensor.d.ts +0 -12
package/dist/patches/PackedTensor.js +0 -11
package/dist/patches/engine.d.ts +0 -261
package/dist/patches/engine.js +0 -12
package/dist/patches/tape.d.ts +0 -12
package/dist/patches/tape.js +0 -5
package/dist/range-ZZZD60Fx.js +0 -11
package/dist/reciprocal-CrYlsAGD.js +0 -10
package/dist/register_all_kernels-nvj2k7OC.js +0 -12307
package/dist/relu-BYDneVPn.js +0 -10
package/dist/reshape-CaPQzFvz.js +0 -10
package/dist/rope-s4W2XO9B.js +0 -32
package/dist/selu_util-BGPXmd4B.js +0 -303
package/dist/sin-Djs4aQiu.js +0 -16
package/dist/slice-DvovR5wq.js +0 -13
package/dist/split-DBck65sX.js +0 -10
package/dist/squeeze-C00Ipm_7.js +0 -11
package/dist/stack-ChnHwRpX.js +0 -13
package/dist/sum-ywRJj3Zr.js +0 -12
package/dist/tensor-0r5yOo2R.js +0 -8
package/dist/tensor-CzmOBsdf.js +0 -909
package/dist/tensor1d-BlUT89BP.js +0 -12
package/dist/tensor_util-DfwaWayG.js +0 -523
package/dist/tile-CR074jmp.js +0 -13
package/dist/transpose-DH4gmHvu.js +0 -38
package/dist/zeros-DBFVbpv5.js +0 -14

package/dist/{backend_webgpu-CqpfEImu.js → backend_webgpu-DE3ACOLx.js} RENAMED Viewed

@@ -1,23 +1,21 @@
-import { e as D, J as W } from "./index-D6Q1lPZO.js";
-import { e as g, a as _, y as O, s as x, z as $, A as F, B as K, o as Z, q as j, g as X, i as q } from "./tensor-CzmOBsdf.js";
-import { m as J, f as ee, P as te } from "./webgpu_program-DzaQiqel.js";
-import { i as se, G as N } from "./webgpu_util-0_ubCEHJ.js";
-import { K as re, J as ne } from "./tensor_util-DfwaWayG.js";
-import { m as k } from "./complex_util-Yc1A_gV1.js";
-const l = g();
-l.registerFlag("WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE", () => 15);
-l.registerFlag("WEBGPU_CPU_FORWARD", () => !0);
-l.registerFlag("WEBGPU_MATMUL_PROGRAM_TYPE", () => -1);
-l.registerFlag("WEBGPU_USE_NAIVE_CONV2D_TRANSPOSE", () => !0);
-l.registerFlag("WEBGPU_USE_LOW_POWER_GPU", () => !1);
-l.registerFlag("WEBGPU_CPU_HANDOFF_SIZE_THRESHOLD", () => 1e3);
-l.registerFlag("WEBGPU_USE_PROFILE_TOOL", () => !1);
-l.registerFlag("WEBGPU_IMPORT_EXTERNAL_TEXTURE", () => !0);
-l.registerFlag("WEBGPU_USE_NAIVE_CONV2D_DEBUG", () => !1);
-l.registerFlag("WEBGPU_THRESHOLD_TO_INCREASE_WORKGROUPS_FOR_MATMUL", () => -1);
-l.registerFlag("WEBGPU_CONV_SEPARATE_IM2COL_SHADER", () => !1);
-l.registerFlag("WEBGPU_PRINT_SHADER", () => "");
-l.registerFlag("WEBGPU_ENGINE_COMPILE_ONLY", () => !1);
+import { j as g, au as $, av as K, e as D, l as _, aw as O, h as x, ax as Z, at as W, ay as F, az as j, aA as X, aB as J, i as ee, ad as k } from "./index-DOvlwCh-.js";
+import { m as te, f as se, P as re } from "./webgpu_program-DuOXPQol.js";
+import { i as ne, G as q } from "./webgpu_util-RxEF33Rj.js";
+import { m as N } from "./complex_util-Yc1A_gV1.js";
+const d = g();
+d.registerFlag("WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE", () => 15);
+d.registerFlag("WEBGPU_CPU_FORWARD", () => !0);
+d.registerFlag("WEBGPU_MATMUL_PROGRAM_TYPE", () => -1);
+d.registerFlag("WEBGPU_USE_NAIVE_CONV2D_TRANSPOSE", () => !0);
+d.registerFlag("WEBGPU_USE_LOW_POWER_GPU", () => !1);
+d.registerFlag("WEBGPU_CPU_HANDOFF_SIZE_THRESHOLD", () => 1e3);
+d.registerFlag("WEBGPU_USE_PROFILE_TOOL", () => !1);
+d.registerFlag("WEBGPU_IMPORT_EXTERNAL_TEXTURE", () => !0);
+d.registerFlag("WEBGPU_USE_NAIVE_CONV2D_DEBUG", () => !1);
+d.registerFlag("WEBGPU_THRESHOLD_TO_INCREASE_WORKGROUPS_FOR_MATMUL", () => -1);
+d.registerFlag("WEBGPU_CONV_SEPARATE_IM2COL_SHADER", () => !1);
+d.registerFlag("WEBGPU_PRINT_SHADER", () => "");
+d.registerFlag("WEBGPU_ENGINE_COMPILE_ONLY", () => !1);
 class ae {
   constructor(e) {
     e && (this.vendor = e.vendor, this.architecture = e.architecture, this.intelGPUGeneration = this.getIntelGPUGeneration());
@@ -70,8 +68,8 @@ class ie {
     }), this.freeBuffers = /* @__PURE__ */ new Map(), this.usedBuffers = /* @__PURE__ */ new Map(), this.numUsedBuffers = 0, this.numFreeBuffers = 0, this.numBytesUsed = 0, this.numBytesAllocated = 0;
   }
 }
-function z(d, e) {
-  return `${d}_${e}`;
+function z(l, e) {
+  return `${l}_${e}`;
 }
 class oe {
   constructor(e) {
@@ -122,30 +120,30 @@ class oe {
     }), this.freeTextures = /* @__PURE__ */ new Map(), this.usedTextures = /* @__PURE__ */ new Map(), this.numUsedTextures = 0, this.numFreeTextures = 0, this.numBytesUsed = 0, this.numBytesAllocated = 0;
   }
 }
-function L(d, e, t, s) {
-  return `${d}_${e}_${t}_${s}`;
+function L(l, e, t, s) {
+  return `${l}_${e}_${t}_${s}`;
 }
-function Q(d) {
-  if (d === "rgba8unorm")
+function Q(l) {
+  if (l === "rgba8unorm")
     return 16;
-  throw new Error(`${d} is not supported!`);
+  throw new Error(`${l} is not supported!`);
 }
-const ue = g().getNumber("WEBGPU_CPU_HANDOFF_SIZE_THRESHOLD"), fe = (d, e) => {
-  const t = d.limits.maxComputeWorkgroupsPerDimension, s = e.dispatchLayout, n = e.dispatch;
+const ue = g().getNumber("WEBGPU_CPU_HANDOFF_SIZE_THRESHOLD"), fe = (l, e) => {
+  const t = l.limits.maxComputeWorkgroupsPerDimension, s = e.dispatchLayout, n = e.dispatch;
   if (n.every((a) => a <= t))
     return n;
   _(n[0] > t && s.y === void 0 && s.z === void 0, () => "Dispatch size exceeds WebGPU limits in Y or Z dimension.");
   let r = Math.ceil(Math.sqrt(n[0]));
   return r > t ? (r = Math.ceil(Math.cbrt(n[0])), _(r <= t, () => "Total dispatch size exceeds WebGPU maximum."), [r, r, r]) : [r, r, 1];
 };
-class R extends re {
+class R extends $ {
   nextDataId() {
     return R.nextDataId++;
   }
   constructor(e, t) {
-    if (super(), this.commandQueueOwnedIds = /* @__PURE__ */ new WeakSet(), this.dispatchCountInPass = 0, this.disposed = !1, this.downloadWaitMs = 0, this.tensorDataPendingDisposal = [], this.queryResolveBuffer = null, this.querySet = null, this.querySetCount = 2, this.stagingPendingDisposal = [], this.uniformPendingDisposal = [], this.uploadWaitMs = 0, this.hasReadSyncWarned = !1, this.hasTimestampQueryWarned = !1, !se())
+    if (super(), this.commandQueueOwnedIds = /* @__PURE__ */ new WeakSet(), this.dispatchCountInPass = 0, this.disposed = !1, this.downloadWaitMs = 0, this.tensorDataPendingDisposal = [], this.queryResolveBuffer = null, this.querySet = null, this.querySetCount = 2, this.stagingPendingDisposal = [], this.uniformPendingDisposal = [], this.uploadWaitMs = 0, this.hasReadSyncWarned = !1, this.hasTimestampQueryWarned = !1, !ne())
       throw new Error("WebGPU is not supported on this device");
-    this.pipelineCache = {}, this.device = e, this.queue = e.queue, this.commandEncoder = null, this.computePassEncoder = null, this.adapterInfo = new ae(t), this.supportTimestampQuery = this.device.features.has("timestamp-query"), this.thresholdToIncreaseWorkgroups = this.adapterInfo.intelGPUGeneration >= 12 ? 16 : 8, this.bufferManager = new ie(this.device), this.textureManager = new oe(this.device), this.tensorMap = new ne(this, D()), g().getBool("WEBGPU_USE_PROFILE_TOOL") && (this.dummyCanvas = document.createElement("canvas"), this.dummyCanvas.width = 1, this.dummyCanvas.height = 1, this.dummyContext = this.dummyCanvas.getContext("webgpu"), this.dummyContext.configure({
+    this.pipelineCache = {}, this.device = e, this.queue = e.queue, this.commandEncoder = null, this.computePassEncoder = null, this.adapterInfo = new ae(t), this.supportTimestampQuery = this.device.features.has("timestamp-query"), this.thresholdToIncreaseWorkgroups = this.adapterInfo.intelGPUGeneration >= 12 ? 16 : 8, this.bufferManager = new ie(this.device), this.textureManager = new oe(this.device), this.tensorMap = new K(this, D()), g().getBool("WEBGPU_USE_PROFILE_TOOL") && (this.dummyCanvas = document.createElement("canvas"), this.dummyCanvas.width = 1, this.dummyCanvas.height = 1, this.dummyContext = this.dummyCanvas.getContext("webgpu"), this.dummyContext.configure({
       device: e,
       format: "bgra8unorm"
     }), document.body.appendChild(this.dummyCanvas));
@@ -250,7 +248,7 @@ class R extends re {
     if (s != null || t.dtype === "string")
       return s;
     if (t.dtype === "complex64") {
-      const E = this.readSync(n.real.dataId), B = this.readSync(n.imag.dataId), y = O(k(E, B).buffer, "float32");
+      const E = this.readSync(n.real.dataId), B = this.readSync(n.imag.dataId), y = O(N(E, B).buffer, "float32");
       return this.convertAndCacheOnCPU(e, y), y;
     }
     this.hasReadSyncWarned || (this.hasReadSyncWarned = !0, console.warn("The performance of synchronously reading data from GPU to CPU is poor on the webgpu backend, please use asynchronous APIs instead."));
@@ -311,7 +309,7 @@ class R extends re {
         this.read(t.complexTensorInfos.real.dataId),
         this.read(t.complexTensorInfos.imag.dataId)
       ]), a = r[0], i = r[1];
-      n = k(a, i);
+      n = N(a, i);
     } else {
       const r = await this.getBufferData(t.resource);
       n = O(r, t.dtype);
@@ -339,7 +337,7 @@ class R extends re {
       refCount: 1,
       external: e.zeroCopy
     });
-    const a = this.tensorMap.get(r), i = N(a.dtype) * x(a.shape);
+    const a = this.tensorMap.get(r), i = q(a.dtype) * x(a.shape);
     if (e.buffer.size < i)
       throw new Error(`GPUBuffer size(${e.buffer.size}) is smaller than tensor size(${i})!`);
     if ((e.buffer.usage & (GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC)) !== (GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC))
@@ -365,7 +363,7 @@ class R extends re {
     const t = this.readSync(e.dataId);
     if (e.dtype === "string")
       try {
-        const s = t.map((n) => $(n));
+        const s = t.map((n) => Z(n));
         return W(e.shape, e.dtype, s);
       } catch {
         throw new Error("Failed to decode encoded string bytes into utf-8");
@@ -385,10 +383,10 @@ class R extends re {
       kernelMs: null,
       wallMs: null
     }, u = await Promise.all(r);
-    return i.kernelMs = K(u), i.getExtraProfileInfo = () => u.map((o, f) => ({ name: a[f], ms: o })).map((o) => `${o.name}: ${o.ms}`).join(", "), this.uploadWaitMs = 0, this.downloadWaitMs = 0, i;
+    return i.kernelMs = j(u), i.getExtraProfileInfo = () => u.map((o, f) => ({ name: a[f], ms: o })).map((o) => `${o.name}: ${o.ms}`).join(", "), this.uploadWaitMs = 0, this.downloadWaitMs = 0, i;
   }
   makeTensorInfo(e, t, s) {
-    return t === "string" && s != null && s.length > 0 && Z(s[0]) && (s = s.map((r) => j(r))), { dataId: this.write(s, e, t), shape: e, dtype: t };
+    return t === "string" && s != null && s.length > 0 && X(s[0]) && (s = s.map((r) => J(r))), { dataId: this.write(s, e, t), shape: e, dtype: t };
   }
   tensorToBinding(e) {
     if (!e)
@@ -400,16 +398,16 @@ class R extends re {
     const t = this.tensorMap.get(e);
     if (t.resource != null)
       return;
-    const s = N(t.dtype) * x(t.shape);
+    const s = q(t.dtype) * x(t.shape);
     let n;
     const r = GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST;
     if (t.values) {
       if (n = this.bufferManager.acquireBuffer(s, r, !0), n.mapState === "unmapped") {
         const a = this.bufferManager.acquireBuffer(s, GPUBufferUsage.MAP_WRITE | GPUBufferUsage.COPY_SRC, !0, !1), i = a.getMappedRange();
-        t.dtype === "int32" || t.dtype === "bool" ? new Int32Array(i).set(t.values) : new Float32Array(i).set(t.values), a.unmap(), this.ensureCommandEncoderReady(), this.endComputePassEncoder(), this.commandEncoder.copyBufferToBuffer(a, 0, n, 0, s), this.stagingPendingDisposal.push(a);
+        t.dtype === "int32" || t.dtype === "packedF16" || t.dtype === "bool" ? new Int32Array(i).set(t.values) : new Float32Array(i).set(t.values), a.unmap(), this.ensureCommandEncoderReady(), this.endComputePassEncoder(), this.commandEncoder.copyBufferToBuffer(a, 0, n, 0, s), this.stagingPendingDisposal.push(a);
       } else {
         const a = n.getMappedRange();
-        t.dtype === "int32" || t.dtype === "bool" ? new Int32Array(a).set(t.values) : new Float32Array(a).set(t.values), n.unmap();
+        t.dtype === "int32" || t.dtype === "packedF16" || t.dtype === "bool" ? new Int32Array(a).set(t.values) : new Float32Array(a).set(t.values), n.unmap();
       }
       t.values = null;
     } else
@@ -457,7 +455,7 @@ class R extends re {
   }
   runWebGPUProgram(e, t, s, n, r) {
     if (r || (r = this.makeTensorInfo(e.outputShape, s)), x(r.shape) === 0)
-      return this.tensorMap.get(r.dataId).values = X(r.dtype, 0), r;
+      return this.tensorMap.get(r.dataId).values = ee(r.dtype, 0), r;
     this.uploadToGPU(r.dataId), e.dispatch = fe(this.device, e);
     const a = t.map((u, o) => {
       if (u.dtype === "complex64")
@@ -470,9 +468,9 @@ class R extends re {
         name: e.variableNames[o]
       };
     });
-    e.shaderKey = J(e, a, r);
+    e.shaderKey = te(e, a, r);
     const i = g().getBool("WEBGPU_ENGINE_COMPILE_ONLY");
-    return e.shaderKey in this.pipelineCache || (this.pipelineCache[e.shaderKey] = ee(this.device, e, a, r, i)), e.pipeline = this.pipelineCache[e.shaderKey], i || this.recordAndSubmit(e, r, t, n), r;
+    return e.shaderKey in this.pipelineCache || (this.pipelineCache[e.shaderKey] = se(this.device, e, a, r, i)), e.pipeline = this.pipelineCache[e.shaderKey], i || this.recordAndSubmit(e, r, t, n), r;
   }
   recordAndSubmit(e, t, s, n) {
     if (e.pipeline instanceof Promise)
@@ -484,11 +482,11 @@ class R extends re {
       const h = "int32";
       a.map((m) => {
         r.push({ type: h, data: m });
-        const w = q(m);
+        const w = k(m);
         r.push({ type: h, data: w });
       });
     } else {
-      const h = q(t.shape);
+      const h = k(t.shape);
       r.push({ type: i, data: h });
     }
     if (e.size) {
@@ -520,7 +518,7 @@ class R extends re {
       querySet: this.querySet,
       beginningOfPassWriteIndex: 0,
       endOfPassWriteIndex: 1
-    }, this.computePassEncoder = this.commandEncoder.beginComputePass(c)) : this.computePassEncoder || (this.computePassEncoder = this.commandEncoder.beginComputePass(c)), this.computePassEncoder.setPipeline(e.pipeline), this.computePassEncoder.setBindGroup(0, o), this.computePassEncoder.dispatchWorkgroups(e.dispatch[0], e.dispatch[1], e.dispatch[2]), this.dispatchCountInPass++, (f || g().get("WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE") <= this.dispatchCountInPass || e.pixelsOpType === te.DRAW) && (this.endComputePassEncoder(), f ? this.activeTimers.push({ name: e.constructor.name, query: this.getQueryTime() }) : this.submitQueue());
+    }, this.computePassEncoder = this.commandEncoder.beginComputePass(c)) : this.computePassEncoder || (this.computePassEncoder = this.commandEncoder.beginComputePass(c)), this.computePassEncoder.setPipeline(e.pipeline), this.computePassEncoder.setBindGroup(0, o), this.computePassEncoder.dispatchWorkgroups(e.dispatch[0], e.dispatch[1], e.dispatch[2]), this.dispatchCountInPass++, (f || g().get("WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE") <= this.dispatchCountInPass || e.pixelsOpType === re.DRAW) && (this.endComputePassEncoder(), f ? this.activeTimers.push({ name: e.constructor.name, query: this.getQueryTime() }) : this.submitQueue());
   }
   async getQueryTime() {
     if (!this.supportTimestampQuery)

package/dist/broadcast_to-B3eYlZm7.js ADDED Viewed

@@ -0,0 +1,28 @@
+import { A as h, B as f, C as p, D as g, E as u, T } from "./index-DOvlwCh-.js";
+import { r as b } from "./reshape-ByE68wS9.js";
+function m(e, r) {
+  let n = f(e, "broadcastTo", "x");
+  const a = n.shape;
+  if (p(r), r.length < n.rank)
+    throw new Error(`broadcastTo(): shape.length=${r.length} < input.rank=${n.rank}.`);
+  if (r.length > n.rank) {
+    const t = n.shape.slice();
+    for (; t.length < r.length; )
+      t.unshift(1);
+    n = b(n, t);
+  }
+  const s = n.shape, o = Array.from(r);
+  for (let t = r.length - 1; t >= 0; t--)
+    if (s[t] === r[t])
+      o[t] = 1;
+    else if (n.shape[t] !== 1)
+      throw new Error(`broadcastTo(): [${a}] cannot be broadcast to [${r}].`);
+  if (o.map((t, l) => t > 1 ? l : -1).filter((t) => t >= 0).length === 0)
+    return g(n);
+  const i = { x: n }, c = { reps: o };
+  return u.runKernel(T, i, c);
+}
+const E = /* @__PURE__ */ h({ broadcastTo_: m });
+export {
+  E as b
+};

package/dist/checks/appendCache.js CHANGED Viewed

@@ -1,5 +1,5 @@
-import { s, e as a } from "../index-D6Q1lPZO.js";
-import { t } from "../tensor4d-D7bLqGqz.js";
+import { s, e as a } from "../index-DOvlwCh-.js";
+import { t } from "../tensor4d-BARPdTaS.js";
 async function u(e) {
   await s(e);
   const n = t(

package/dist/checks/attentionMask.js CHANGED Viewed

@@ -1,6 +1,6 @@
-import { s as i, e } from "../index-D6Q1lPZO.js";
-import { t } from "../tensor4d-D7bLqGqz.js";
-import { t as a } from "../tensor2d-CSB4KOb0.js";
+import { s as i, e } from "../index-DOvlwCh-.js";
+import { t } from "../tensor4d-BARPdTaS.js";
+import { t as a } from "../tensor2d-Bs9wZRc7.js";
 async function k(n) {
   await i(n);
   const s = t(

package/dist/checks/gelu.js CHANGED Viewed

@@ -1,5 +1,5 @@
-import { s as e, e as o } from "../index-D6Q1lPZO.js";
-import { t as s } from "../tensor2d-CSB4KOb0.js";
+import { s as e, e as o } from "../index-DOvlwCh-.js";
+import { t as s } from "../tensor2d-Bs9wZRc7.js";
 async function m(t) {
   await e(t);
   const r = s(

package/dist/checks/matMulGelu.js CHANGED Viewed

@@ -1,11 +1,7 @@
-import { s as n, e as s } from "../index-D6Q1lPZO.js";
-import "../random_width-BVV9HveY.js";
-import "../register_all_kernels-nvj2k7OC.js";
-import "../index-Cp39cXWe.js";
-import "../dataset-D2P7rHAw.js";
-import { t as e } from "../tensor2d-CSB4KOb0.js";
-async function f(t) {
-  await n(t);
+import { s as o, e as s } from "../index-DOvlwCh-.js";
+import { t as e } from "../tensor2d-Bs9wZRc7.js";
+async function i(t) {
+  await o(t);
   const r = e(
     [
       [0.1, 0.2, 9, 10, 11],
@@ -15,7 +11,7 @@ async function f(t) {
       [0.3, 0.4, -9, -10, -11]
     ],
     [5, 5]
-  ), o = e(
+  ), n = e(
     [
       [0.5, 0.6, 7e4, -8e3, 0],
       [0.7, 0.8, -7e4, 8e4, 0],
@@ -25,8 +21,8 @@ async function f(t) {
     ],
     [5, 5]
   );
-  return await s().runKernel("MatMulGelu", { x: o, kernel: r }).array();
+  return await s().runKernel("MatMulGelu", { x: n, kernel: r }).array();
 }
 export {
-  f as execute
+  i as execute
 };

package/dist/checks/normRMS.js CHANGED Viewed

@@ -1,14 +1,14 @@
-import { s as i, u as A, e as y } from "../index-D6Q1lPZO.js";
-import { a as h } from "../ops-BFDtP6th.js";
-import { t as p } from "../tensor1d-BlUT89BP.js";
-import { t as a } from "../tensor-0r5yOo2R.js";
+import { s as u, a5 as A, e as y } from "../index-DOvlwCh-.js";
+import { a as h } from "../ops-B5yanEdW.js";
+import { t as p } from "../tensor1d-D11P_7Dp.js";
+import { t as r } from "../tensor-DJoc7gJU.js";
 const w = Array.from({ length: 2048 * 192 }, () => Math.random()), x = Array.from({ length: 192 }, () => Math.random()), M = Array.from({ length: 2048 * 192 }, () => Math.random());
 async function k(t) {
-  await i(t);
-  const o = p(x, "float32"), n = a(w, [16, 128, 192], "float32"), s = a(M, [16, 128, 192], "float32"), e = (d, g) => {
-    const u = y().runKernel("RMSNorm", { x: d, gamma: g });
-    return h.meanSquaredError(u, s);
-  }, { value: m, grads: r } = A(e)([n, o]), c = await m.array(), f = await r[0].array(), l = await r[1].array();
+  await u(t);
+  const o = p(x, "float32"), n = r(w, [16, 128, 192], "float32"), s = r(M, [16, 128, 192], "float32"), e = (d, g) => {
+    const i = y().runKernel("RMSNorm", { x: d, gamma: g });
+    return h.meanSquaredError(i, s);
+  }, { value: m, grads: a } = A(e)([n, o]), c = await m.array(), f = await a[0].array(), l = await a[1].array();
   return [c, f, l];
 }
 export {

package/dist/checks/normRMSGrad.js CHANGED Viewed

@@ -1,6 +1,6 @@
-import { s as c, e as d } from "../index-D6Q1lPZO.js";
-import { t as f } from "../tensor1d-BlUT89BP.js";
-import { t as r } from "../tensor-0r5yOo2R.js";
+import { s as c, e as d } from "../index-DOvlwCh-.js";
+import { t as f } from "../tensor1d-D11P_7Dp.js";
+import { t as r } from "../tensor-DJoc7gJU.js";
 const y = Array.from({ length: 2048 * 192 }, () => Math.random()), i = Array.from({ length: 192 }, () => Math.random()), l = Array.from({ length: 2048 * 192 }, () => Math.random());
 async function x(t) {
   await c(t);

package/dist/checks/packUnpack.js CHANGED Viewed

@@ -1,5 +1,5 @@
-import { s as a, e } from "../index-D6Q1lPZO.js";
-import { t as c } from "../tensor2d-CSB4KOb0.js";
+import { s as a, e } from "../index-DOvlwCh-.js";
+import { t as c } from "../tensor2d-Bs9wZRc7.js";
 async function i(n) {
   await a(n);
   const r = c(

package/dist/checks/qkv.js CHANGED Viewed

@@ -1,19 +1,18 @@
-import { x as i, y as u, s as c, e as l } from "../index-D6Q1lPZO.js";
-import { c as m } from "../tensor-CzmOBsdf.js";
-import { t as f } from "../tensor2d-CSB4KOb0.js";
-function h(t, e, n) {
-  if (m(t), e != null && e.length !== 3)
+import { $ as i, a0 as u, a1 as c, s as l, e as h } from "../index-DOvlwCh-.js";
+import { t as f } from "../tensor2d-Bs9wZRc7.js";
+function m(t, e, n) {
+  if (i(t), e != null && e.length !== 3)
     throw new Error("tensor3d() requires shape to have three numbers");
-  const r = i(t, n);
+  const r = u(t, n);
   if (r.length !== 3 && r.length !== 1)
     throw new Error("tensor3d() requires values to be number[][][] or flat/TypedArray");
   if (r.length === 1 && e == null)
     throw new Error("tensor3d() requires shape to be provided when `values` are a flat array");
-  return u(t, e, r, n);
+  return c(t, e, r, n);
 }
-async function p(t) {
-  await c(t);
-  const e = h(
+async function y(t) {
+  await l(t);
+  const e = m(
     [
       [
         [0.1, 0.2],
@@ -27,9 +26,9 @@ async function p(t) {
       [0.7, 0.8, 1.1, 1.2, 1.5, 1.6]
     ],
     [2, 6]
-  ), r = l().runKernel("QKV", { x: e, kernel: n }, { heads: 1 }), o = await r[0].array(), a = await r[1].array(), s = await r[2].array();
-  return [o, a, s];
+  ), r = h().runKernel("QKV", { x: e, kernel: n }, { heads: 1 }), a = await r[0].array(), o = await r[1].array(), s = await r[2].array();
+  return [a, o, s];
 }
 export {
-  p as execute
+  y as execute
 };

package/dist/checks/rope.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import t from "../layers/RoPECache.js";
-import { s as c, e as i } from "../index-D6Q1lPZO.js";
-import { t as p } from "../tensor4d-D7bLqGqz.js";
+import { s as c, e as i } from "../index-DOvlwCh-.js";
+import { t as p } from "../tensor4d-BARPdTaS.js";
 async function y(a) {
   await c(a);
   const o = p(

package/dist/clip_by_value-BnO7-a88.js ADDED Viewed

@@ -0,0 +1,12 @@
+import { A as a, B as m, l as y, q as B, E as c, F as f } from "./index-DOvlwCh-.js";
+function p(o, s, t) {
+  const r = m(o, "x", "clipByValue");
+  if (y(s <= t, () => `Error in clip: min (${s}) must be less than or equal to max (${t}).`), s === t)
+    return B(r.shape, s, r.dtype);
+  const n = { x: r }, e = { clipValueMin: s, clipValueMax: t };
+  return c.runKernel(f, n, e);
+}
+const E = /* @__PURE__ */ a({ clipByValue_: p });
+export {
+  E as c
+};

package/dist/complex-DjxcVmoX.js ADDED Viewed

@@ -0,0 +1,11 @@
+import { A as t, B as s, ab as n, E as m, ac as r } from "./index-DOvlwCh-.js";
+function l(o, c) {
+  const a = s(o, "real", "complex"), e = s(c, "imag", "complex");
+  n(a.shape, e.shape, `real and imag shapes, ${a.shape} and ${e.shape}, must match in call to tf.complex().`);
+  const p = { real: a, imag: e };
+  return m.runKernel(r, p);
+}
+const i = /* @__PURE__ */ t({ complex_: l });
+export {
+  i as c
+};

package/dist/concat-BV8bt5H-.js ADDED Viewed

@@ -0,0 +1,17 @@
+import { A as s, l as a, G as p, D as i, E as l, H as f } from "./index-DOvlwCh-.js";
+function h(n, e = 0) {
+  a(n.length >= 1, () => "Pass at least one tensor to concat");
+  const t = p(n, "tensors", "concat", "string_or_numeric");
+  if (t[0].dtype === "complex64" && t.forEach((o) => {
+    if (o.dtype !== "complex64")
+      throw new Error(`Cannot concatenate complex64 tensors with a tensor
+          with dtype ${o.dtype}. `);
+  }), t.length === 1)
+    return i(t[0]);
+  const r = t, c = { axis: e };
+  return l.runKernel(f, r, c);
+}
+const u = /* @__PURE__ */ s({ concat_: h });
+export {
+  u as c
+};

package/dist/{concat_util-iBYIyuQe.js → concat_util-DpW8mL_l.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { a as s } from "./tensor-CzmOBsdf.js";
+import { l as s } from "./index-DOvlwCh-.js";
 function h(n, o) {
   const t = n[0].length;
   n.forEach((a, c) => {