@genai-fi/nanogpt 0.10.2 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (262) hide show
  1. package/dist/Generator.d.ts +10 -5
  2. package/dist/Generator.js +11760 -146
  3. package/dist/{RealDiv-zz7FpkKX.js → RealDiv-Ds-jvL09.js} +28 -30
  4. package/dist/Reshape-Cd6e-Otn.js +14 -0
  5. package/dist/{Reshape-CHdUjC72.js → Reshape-Ct266DEk.js} +21 -23
  6. package/dist/TeachableLLM.d.ts +4 -3
  7. package/dist/TeachableLLM.js +15 -16
  8. package/dist/Trainer.d.ts +2 -2
  9. package/dist/Trainer.js +6 -6
  10. package/dist/{axis_util-BsIr9ZNu.js → axis_util-DofAuy0p.js} +1 -1
  11. package/dist/backend.js +2 -2
  12. package/dist/{backend_util-B1XRLuq9.js → backend_util-C7NWHpv7.js} +72 -73
  13. package/dist/{backend_webgpu-CqpfEImu.js → backend_webgpu-B0Vls736.js} +52 -54
  14. package/dist/broadcast_to-DDaNMbX7.js +28 -0
  15. package/dist/checks/appendCache.js +2 -2
  16. package/dist/checks/attentionMask.js +3 -3
  17. package/dist/checks/gelu.js +2 -2
  18. package/dist/checks/matMulGelu.js +7 -11
  19. package/dist/checks/normRMS.js +9 -9
  20. package/dist/checks/normRMSGrad.js +3 -3
  21. package/dist/checks/packUnpack.js +2 -2
  22. package/dist/checks/qkv.js +11 -12
  23. package/dist/checks/rope.js +2 -2
  24. package/dist/clip_by_value-Dn5tzexi.js +12 -0
  25. package/dist/complex-DClmWqJt.js +11 -0
  26. package/dist/concat-C6X3AAlQ.js +17 -0
  27. package/dist/{concat_util-iBYIyuQe.js → concat_util-CHsJFZJJ.js} +1 -1
  28. package/dist/{dataset-D2P7rHAw.js → dataset-DcjWqUVQ.js} +135 -137
  29. package/dist/dropout-OxuaJz6z.js +92 -0
  30. package/dist/expand_dims-BzfJK2uc.js +11 -0
  31. package/dist/{exports_initializers-CZSUJoVE.js → exports_initializers-eS9QJ6ut.js} +1 -1
  32. package/dist/floor-DIb-lN_u.js +9 -0
  33. package/dist/gather-BcO5UQNJ.js +9 -0
  34. package/dist/{gelu-Bmhopi0J.js → gelu-DqTbCx5x.js} +10 -11
  35. package/dist/{gpgpu_math-DsCcikas.js → gpgpu_math-CJcbnKPC.js} +841 -1015
  36. package/dist/index-D0RBWjq8.js +3520 -0
  37. package/dist/{index-DRyE072i.js → index-Dj5TkmPY.js} +330 -331
  38. package/dist/{kernel_funcs_utils-CWfOAPGO.js → kernel_funcs_utils-CSaumNDs.js} +132 -134
  39. package/dist/layers/BaseLayer.js +15 -16
  40. package/dist/layers/CausalSelfAttention.js +6 -6
  41. package/dist/layers/MLP.js +4 -4
  42. package/dist/layers/PositionEmbedding.js +7 -7
  43. package/dist/layers/RMSNorm.js +3 -3
  44. package/dist/layers/RoPECache.js +9 -9
  45. package/dist/layers/TiedEmbedding.js +6 -6
  46. package/dist/layers/TransformerBlock.js +1 -1
  47. package/dist/loader/loadTransformers.js +1 -1
  48. package/dist/loader/oldZipLoad.js +21 -22
  49. package/dist/log_sum_exp-VLZgbFAH.js +39 -0
  50. package/dist/main.d.ts +1 -1
  51. package/dist/main.js +49 -50
  52. package/dist/{matMul16-fEAJ4smh.js → matMul16-cDxwemKj.js} +14 -15
  53. package/dist/matMulGelu-B2s_80-H.js +163 -0
  54. package/dist/mat_mul-DxpNTCRz.js +11 -0
  55. package/dist/mod-PrOKlFxH.js +11 -0
  56. package/dist/models/NanoGPTV1.js +2 -2
  57. package/dist/models/model.js +13 -14
  58. package/dist/ones-BX_wEgzB.js +14 -0
  59. package/dist/ops/adamAdjust.js +1 -1
  60. package/dist/ops/adamMoments.js +1 -1
  61. package/dist/ops/add16.js +1 -1
  62. package/dist/ops/appendCache.js +3 -3
  63. package/dist/ops/attentionMask.js +1 -1
  64. package/dist/ops/concat16.js +2 -2
  65. package/dist/ops/cpu/adamAdjust.js +12 -13
  66. package/dist/ops/cpu/adamMoments.js +6 -7
  67. package/dist/ops/cpu/appendCache.js +7 -8
  68. package/dist/ops/cpu/attentionMask.js +11 -11
  69. package/dist/ops/cpu/fusedSoftmax.js +10 -11
  70. package/dist/ops/cpu/gatherSub.js +10 -11
  71. package/dist/ops/cpu/gelu.js +14 -15
  72. package/dist/ops/cpu/matMul16.js +6 -7
  73. package/dist/ops/cpu/matMulGelu.js +5 -6
  74. package/dist/ops/cpu/matMulMul.js +3 -4
  75. package/dist/ops/cpu/mulDropout.js +3 -4
  76. package/dist/ops/cpu/normRMS.js +11 -12
  77. package/dist/ops/cpu/qkv.js +8 -9
  78. package/dist/ops/cpu/rope.js +9 -10
  79. package/dist/ops/cpu/scatterSub.js +14 -16
  80. package/dist/ops/dot16.js +2 -2
  81. package/dist/ops/gatherSub.js +1 -1
  82. package/dist/ops/gelu.js +2 -2
  83. package/dist/ops/grads/add16.js +10 -11
  84. package/dist/ops/grads/attentionMask.js +5 -6
  85. package/dist/ops/grads/gelu.js +3 -4
  86. package/dist/ops/grads/matMul16.js +4 -5
  87. package/dist/ops/grads/matMulGelu.js +8 -9
  88. package/dist/ops/grads/normRMS.js +9 -10
  89. package/dist/ops/grads/pack16.js +4 -5
  90. package/dist/ops/grads/qkv.js +17 -19
  91. package/dist/ops/grads/rope.js +3 -5
  92. package/dist/ops/grads/softmax16.js +3 -4
  93. package/dist/ops/grads/unpack16.js +3 -4
  94. package/dist/ops/grads/utils.d.ts +1 -0
  95. package/dist/ops/grads/utils.js +8 -4
  96. package/dist/ops/matMul16.js +3 -3
  97. package/dist/ops/matMulGelu.js +2 -2
  98. package/dist/ops/matMulMul.js +1 -1
  99. package/dist/ops/mul16.js +1 -1
  100. package/dist/ops/mulDrop.js +1 -1
  101. package/dist/ops/normRMS.js +1 -1
  102. package/dist/ops/pack16.js +3 -4
  103. package/dist/ops/qkv.js +4 -8
  104. package/dist/ops/reshape16.js +16 -18
  105. package/dist/ops/rope.d.ts +1 -1
  106. package/dist/ops/rope.js +3 -8
  107. package/dist/ops/scatterSub.js +1 -1
  108. package/dist/ops/slice16.js +2 -2
  109. package/dist/ops/softmax16.js +5 -8
  110. package/dist/ops/sub16.js +1 -1
  111. package/dist/ops/sum16.js +2 -2
  112. package/dist/ops/transpose16.js +23 -24
  113. package/dist/ops/unpack16.js +2 -2
  114. package/dist/ops/webgl/adamAdjust.js +2 -3
  115. package/dist/ops/webgl/adamMoments.js +1 -2
  116. package/dist/ops/webgl/appendCache.js +1 -2
  117. package/dist/ops/webgl/attentionMask.js +5 -6
  118. package/dist/ops/webgl/fusedSoftmax.js +6 -8
  119. package/dist/ops/webgl/gatherSub.js +6 -7
  120. package/dist/ops/webgl/gelu.js +2 -3
  121. package/dist/ops/webgl/log.js +11 -12
  122. package/dist/ops/webgl/matMul16.js +15 -16
  123. package/dist/ops/webgl/matMulGelu.js +7 -111
  124. package/dist/ops/webgl/matMulMul.js +14 -15
  125. package/dist/ops/webgl/mulDropout.js +8 -9
  126. package/dist/ops/webgl/normRMS.js +7 -8
  127. package/dist/ops/webgl/qkv.js +5 -6
  128. package/dist/ops/webgl/rope.js +7 -8
  129. package/dist/ops/webgl/scatterSub.js +5 -6
  130. package/dist/ops/webgpu/adamAdjust.js +10 -12
  131. package/dist/ops/webgpu/adamMoments.js +8 -10
  132. package/dist/ops/webgpu/add16.js +8 -9
  133. package/dist/ops/webgpu/appendCache.js +23 -25
  134. package/dist/ops/webgpu/attentionMask.js +10 -12
  135. package/dist/ops/webgpu/attentionMask32_program.js +2 -2
  136. package/dist/ops/webgpu/concat16.js +12 -14
  137. package/dist/ops/webgpu/gatherSub.js +9 -11
  138. package/dist/ops/webgpu/gelu.js +28 -29
  139. package/dist/ops/webgpu/matMul16.js +26 -28
  140. package/dist/ops/webgpu/matMul16_program.js +4 -5
  141. package/dist/ops/webgpu/mul16.js +7 -8
  142. package/dist/ops/webgpu/normRMS.js +17 -19
  143. package/dist/ops/webgpu/normRMSGrad.js +21 -28
  144. package/dist/ops/webgpu/pack16.js +12 -13
  145. package/dist/ops/webgpu/pack16_program.js +2 -2
  146. package/dist/ops/webgpu/qkv.js +13 -15
  147. package/dist/ops/webgpu/rope.js +25 -27
  148. package/dist/ops/webgpu/scatterSub.js +7 -9
  149. package/dist/ops/webgpu/slice16.js +21 -23
  150. package/dist/ops/webgpu/softmax16.js +17 -19
  151. package/dist/ops/webgpu/softmax16_program.js +2 -2
  152. package/dist/ops/webgpu/softmax16_subgroup_program.js +2 -2
  153. package/dist/ops/webgpu/softmax16grad.js +7 -8
  154. package/dist/ops/webgpu/sub16.js +8 -9
  155. package/dist/ops/webgpu/sum16.js +19 -21
  156. package/dist/ops/webgpu/transpose16.js +19 -20
  157. package/dist/ops/webgpu/transpose16_program.js +2 -2
  158. package/dist/ops/webgpu/transpose16_shared_program.js +11 -12
  159. package/dist/ops/webgpu/unpack16.js +3 -4
  160. package/dist/ops/webgpu/utils/binary_op.js +7 -8
  161. package/dist/ops/webgpu/utils/reductions.js +14 -22
  162. package/dist/ops-FJapAPfm.js +476 -0
  163. package/dist/pack16-k4jq6aMX.js +39 -0
  164. package/dist/patches/webgpu_backend.js +19 -20
  165. package/dist/patches/webgpu_base.js +1 -1
  166. package/dist/patches/webgpu_program.js +15 -16
  167. package/dist/{random_width-BVV9HveY.js → random_width-UGQn4OWb.js} +2506 -2761
  168. package/dist/range-CuGvVN2c.js +10 -0
  169. package/dist/relu-Cf80uA2p.js +9 -0
  170. package/dist/reshape-CkjKPPqB.js +9 -0
  171. package/dist/resize_nearest_neighbor-DB8k9KN_.js +175 -0
  172. package/dist/rope-BmZmp9uP.js +24 -0
  173. package/dist/{scatter_nd_util-C7zXRT_h.js → scatter_nd_util-BY22Cc-C.js} +1 -1
  174. package/dist/selu_util-BuLbmbrl.js +44 -0
  175. package/dist/{shared-CHhxz-O5.js → shared-B7USJZgw.js} +1 -1
  176. package/dist/{shared-D2NP_CpY.js → shared-BQboIImQ.js} +379 -381
  177. package/dist/slice-Aqy7KbJh.js +12 -0
  178. package/dist/{slice_util-DyjSAD0u.js → slice_util-D8CQRenR.js} +7 -7
  179. package/dist/{softmax-C9JQEtnO.js → softmax-faLoUZVT.js} +4 -5
  180. package/dist/split-BNz5jcGc.js +9 -0
  181. package/dist/squeeze--YMgaAAf.js +10 -0
  182. package/dist/stack-WJK22CFn.js +11 -0
  183. package/dist/step-dXR33iOg.js +261 -0
  184. package/dist/sum-BdplSvq_.js +11 -0
  185. package/dist/{tensor-0r5yOo2R.js → tensor-BQqrDvpx.js} +1 -1
  186. package/dist/tensor1d-LxP9asMm.js +11 -0
  187. package/dist/{tensor2d-CSB4KOb0.js → tensor2d-BN1sSfQO.js} +6 -7
  188. package/dist/{tensor4d-D7bLqGqz.js → tensor4d-DVwr7pLF.js} +6 -7
  189. package/dist/{tfjs_backend-CNkSTL0c.js → tfjs_backend-Vi4JfLzT.js} +256 -265
  190. package/dist/tile-CvN_LyVr.js +11 -0
  191. package/dist/tokeniser/BaseTokeniser.d.ts +27 -0
  192. package/dist/tokeniser/BaseTokeniser.js +94 -0
  193. package/dist/tokeniser/CharTokeniser.d.ts +4 -3
  194. package/dist/tokeniser/CharTokeniser.js +46 -32
  195. package/dist/tokeniser/bpe.d.ts +4 -3
  196. package/dist/tokeniser/bpe.js +60 -45
  197. package/dist/tokeniser/type.d.ts +11 -0
  198. package/dist/training/Adam.js +2 -2
  199. package/dist/training/AdamExt.js +1 -1
  200. package/dist/training/DatasetBuilder.d.ts +2 -2
  201. package/dist/training/DatasetBuilder.js +32 -36
  202. package/dist/training/FullTrainer.js +1 -1
  203. package/dist/training/Trainer.d.ts +3 -3
  204. package/dist/training/Trainer.js +2 -2
  205. package/dist/training/sparseCrossEntropy.js +5 -5
  206. package/dist/transpose-JawVKyZy.js +36 -0
  207. package/dist/unsorted_segment_sum-LAbmE9G4.js +277 -0
  208. package/dist/utilities/dummy.js +3 -3
  209. package/dist/utilities/multinomialCPU.js +2 -2
  210. package/dist/utilities/packed.d.ts +1 -4
  211. package/dist/utilities/packed.js +10 -745
  212. package/dist/utilities/performance.js +1 -1
  213. package/dist/utilities/profile.js +1 -1
  214. package/dist/utilities/safetensors.js +2 -2
  215. package/dist/utilities/sentences.js +5 -5
  216. package/dist/utilities/weights.js +2 -2
  217. package/dist/{variable-DzfrwYuP.js → variable-DQ9yYgEU.js} +1 -1
  218. package/dist/{webgpu_program-DzaQiqel.js → webgpu_program-CAE4RICo.js} +177 -171
  219. package/dist/{webgpu_util-0_ubCEHJ.js → webgpu_util-BdovYhXr.js} +34 -35
  220. package/dist/zeros-DeiE2zTa.js +13 -0
  221. package/dist/zeros_like-BAz3iKru.js +721 -0
  222. package/package.json +4 -2
  223. package/dist/Reshape-CDVLyVfz.js +0 -16
  224. package/dist/broadcast_to-B0ChcDaz.js +0 -30
  225. package/dist/complex-BBiRlsVq.js +0 -13
  226. package/dist/concat-DmBLPVGC.js +0 -19
  227. package/dist/dropout-B1x1kYMa.js +0 -99
  228. package/dist/expand_dims-ouvfxQ1n.js +0 -13
  229. package/dist/gather-CH9sdacz.js +0 -10
  230. package/dist/index-D6Q1lPZO.js +0 -2157
  231. package/dist/log_sum_exp-D3ftBNY5.js +0 -41
  232. package/dist/mat_mul-C59XWcJd.js +0 -12
  233. package/dist/mod-DESSvHIU.js +0 -12
  234. package/dist/mulmat_packed_gpu-Coh6qbJk.js +0 -55
  235. package/dist/ones-jU9jlQvM.js +0 -15
  236. package/dist/ops-BFDtP6th.js +0 -645
  237. package/dist/pack16-CmVZs6af.js +0 -41
  238. package/dist/patches/PackedTensor.d.ts +0 -12
  239. package/dist/patches/PackedTensor.js +0 -11
  240. package/dist/patches/engine.d.ts +0 -261
  241. package/dist/patches/engine.js +0 -12
  242. package/dist/patches/tape.d.ts +0 -12
  243. package/dist/patches/tape.js +0 -5
  244. package/dist/range-ZZZD60Fx.js +0 -11
  245. package/dist/reciprocal-CrYlsAGD.js +0 -10
  246. package/dist/register_all_kernels-nvj2k7OC.js +0 -12307
  247. package/dist/relu-BYDneVPn.js +0 -10
  248. package/dist/reshape-CaPQzFvz.js +0 -10
  249. package/dist/rope-s4W2XO9B.js +0 -32
  250. package/dist/selu_util-BGPXmd4B.js +0 -303
  251. package/dist/sin-Djs4aQiu.js +0 -16
  252. package/dist/slice-DvovR5wq.js +0 -13
  253. package/dist/split-DBck65sX.js +0 -10
  254. package/dist/squeeze-C00Ipm_7.js +0 -11
  255. package/dist/stack-ChnHwRpX.js +0 -13
  256. package/dist/sum-ywRJj3Zr.js +0 -12
  257. package/dist/tensor-CzmOBsdf.js +0 -909
  258. package/dist/tensor1d-BlUT89BP.js +0 -12
  259. package/dist/tensor_util-DfwaWayG.js +0 -523
  260. package/dist/tile-CR074jmp.js +0 -13
  261. package/dist/transpose-DH4gmHvu.js +0 -38
  262. package/dist/zeros-DBFVbpv5.js +0 -14
@@ -1,11 +1,10 @@
1
- import { s as m, i as w, K as I, a as d, z as A, q as _ } from "./tensor-CzmOBsdf.js";
2
- import { d as y, f as M, h as T, c as b, e as D, a as L, b as x, g as W } from "./axis_util-BsIr9ZNu.js";
3
- import { j as v, h as F, q as N } from "./index-D6Q1lPZO.js";
4
- import { a as C, c as P } from "./concat_util-iBYIyuQe.js";
5
- import { S as z, a as q, b as B, c as U, d as j, e as G, f as H, g as V, h as Z, i as k, j as K, k as J, l as X, m as Y, s as Q, n as ee, t as te } from "./selu_util-BGPXmd4B.js";
6
- import { s as se } from "./slice_util-DyjSAD0u.js";
7
- import { a0 as ne, u as re, w as oe } from "./tensor_util-DfwaWayG.js";
8
- import { c as ae, v as ie, a as ue } from "./scatter_nd_util-C7zXRT_h.js";
1
+ import { V as m, a9 as w, aU as I, y as d, ax as A, aB as _, $ as y, ad as M, a0 as T, aV as b, ak as D, aW as x } from "./index-D0RBWjq8.js";
2
+ import { d as L, f as W, h as v, c as F, e as N, a as C, b as P, g as z } from "./axis_util-DofAuy0p.js";
3
+ import { a as B, c as U } from "./concat_util-CHsJFZJJ.js";
4
+ import { c as V, b as G, d as H, f as j, g as q, h as Z, i as k, j as J, k as K, m as X, t as Y } from "./step-dXR33iOg.js";
5
+ import { S as Q, a as ee, b as te, g as se, c as ne, s as re } from "./selu_util-BuLbmbrl.js";
6
+ import { s as oe } from "./slice_util-D8CQRenR.js";
7
+ import { c as ae, v as ie, a as ue } from "./scatter_nd_util-BY22Cc-C.js";
9
8
  import { a as le, c as pe, b as ce, e as he, d as fe, g as ge, m as de, s as me } from "./complex_util-Yc1A_gV1.js";
10
9
  function Ee(e, t) {
11
10
  const r = e.shape.length, s = t.shape.length;
@@ -147,7 +146,7 @@ function Te(e, t, r) {
147
146
  s.push(e[n + 1] - t[n][0] - t[n][1]);
148
147
  return s;
149
148
  }
150
- const be = 0.3275911, De = 0.254829592, Le = -0.284496736, xe = 1.421413741, We = -1.453152027, ve = 1.061405429;
149
+ const be = 0.3275911, De = 0.254829592, xe = -0.284496736, Le = 1.421413741, We = -1.453152027, ve = 1.061405429;
151
150
  const E = "->", Fe = /->/g, S = ",", R = "...";
152
151
  function Ne(e, t) {
153
152
  e = e.replace(/\s/g, "");
@@ -213,22 +212,22 @@ function ze(e, t) {
213
212
  s.push([]);
214
213
  const o = [];
215
214
  for (let a = 0; a < r.length; ++a) {
216
- const u = r[a], p = Be(t, u);
215
+ const u = r[a], p = Ue(t, u);
217
216
  for (const c of p)
218
217
  o.indexOf(c) === -1 && (s[a].push(c), o.push(c));
219
218
  }
220
219
  return { path: r, steps: s };
221
220
  }
222
- function qe(e) {
221
+ function Be(e) {
223
222
  return e.every((t, r) => t === r);
224
223
  }
225
- function Be(e, t) {
224
+ function Ue(e, t) {
226
225
  const r = [];
227
226
  for (let s = 0; s < e.length; ++s)
228
227
  (e[s].length === 0 || e[s].indexOf(t) !== -1 || t === -1) && r.push(s);
229
228
  return r;
230
229
  }
231
- function Ue(e, t, r = 0) {
230
+ function Ve(e, t, r = 0) {
232
231
  let s = [];
233
232
  if (typeof t == "number")
234
233
  d(e.shape[r] % t === 0, () => "Number of splits must evenly divide the axis."), s = new Array(t).fill(e.shape[r] / t);
@@ -244,17 +243,17 @@ function Ue(e, t, r = 0) {
244
243
  }
245
244
  return s;
246
245
  }
247
- function je(e) {
246
+ function Ge(e) {
248
247
  return `Received SparseTensor with denseShape[0] = 0 but
249
248
  indices.shape[0] = ${e}`;
250
249
  }
251
- function Ge(e, t) {
250
+ function He(e, t) {
252
251
  return `indices(${e}, 0) is invalid: ${t} < 0`;
253
252
  }
254
- function He(e, t, r) {
253
+ function je(e, t, r) {
255
254
  return `indices(${e}, 0) is invalid: ${t} >= ${r}`;
256
255
  }
257
- function Ve(e, t) {
256
+ function qe(e, t) {
258
257
  return `only one output dimension may be -1, not both ${e} and ${t}`;
259
258
  }
260
259
  function Ze(e, t) {
@@ -263,12 +262,12 @@ function Ze(e, t) {
263
262
  function ke() {
264
263
  return "reshape cannot infer the missing input size for an empty tensor unless all specified input sizes are non-zero";
265
264
  }
266
- function Ke(e, t) {
265
+ function Je(e, t) {
267
266
  const r = m(e), s = m(t);
268
267
  return `Input to reshape is a SparseTensor with ${r}
269
268
  dense values, but the requested shape requires a multiple of ${s}. inputShape=${e} outputShape= ${t}`;
270
269
  }
271
- function Je(e, t) {
270
+ function Ke(e, t) {
272
271
  const r = m(e), s = m(t);
273
272
  return `Input to reshape is a tensor with ${r} dense values, but the requested shape has ${s}. inputShape=${e} outputShape=${t}`;
274
273
  }
@@ -336,11 +335,11 @@ function ot(e) {
336
335
  function at(e) {
337
336
  return e.map((t) => _(t));
338
337
  }
339
- const mt = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
338
+ const dt = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
340
339
  __proto__: null,
341
340
  ERF_A1: De,
342
- ERF_A2: Le,
343
- ERF_A3: xe,
341
+ ERF_A2: xe,
342
+ ERF_A3: Le,
344
343
  ERF_A4: We,
345
344
  ERF_A5: ve,
346
345
  ERF_P: be,
@@ -348,84 +347,84 @@ const mt = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
348
347
  get RowPartitionType() {
349
348
  return f;
350
349
  },
351
- SELU_SCALE: z,
352
- SELU_SCALEALPHA: q,
353
- applyActivation: B,
354
- assertAndGetBroadcastShape: v,
355
- assertAxesAreInnerMostDims: y,
356
- assertParamsConsistent: C,
350
+ SELU_SCALE: Q,
351
+ SELU_SCALEALPHA: ee,
352
+ applyActivation: te,
353
+ assertAndGetBroadcastShape: y,
354
+ assertAxesAreInnerMostDims: L,
355
+ assertParamsConsistent: B,
357
356
  assignToTypedArray: le,
358
- axesAreInnerMostDims: M,
357
+ axesAreInnerMostDims: W,
359
358
  calculateShapes: ae,
360
359
  checkEinsumDimSizes: Pe,
361
- checkPadOnDimRoundingMode: U,
362
- combineLocations: T,
360
+ checkPadOnDimRoundingMode: V,
361
+ combineLocations: v,
363
362
  combineRaggedTensorToTensorShapes: Ie,
364
363
  complexWithEvenIndex: pe,
365
364
  complexWithOddIndex: ce,
366
- computeConv2DInfo: j,
367
- computeConv3DInfo: G,
368
- computeDefaultPad: H,
369
- computeDilation2DInfo: V,
365
+ computeConv2DInfo: G,
366
+ computeConv3DInfo: H,
367
+ computeDefaultPad: j,
368
+ computeDilation2DInfo: q,
370
369
  computeOptimalWindowSize: Oe,
371
- computeOutAndReduceShapes: b,
372
- computeOutShape: P,
370
+ computeOutAndReduceShapes: F,
371
+ computeOutShape: U,
373
372
  computePool2DInfo: Z,
374
373
  computePool3DInfo: k,
375
- convertConv2DDataFormat: K,
374
+ convertConv2DDataFormat: J,
376
375
  decodeEinsumEquation: Ne,
377
- eitherStridesOrDilationsAreOne: J,
378
- expandShapeToKeepDim: D,
376
+ eitherStridesOrDilationsAreOne: K,
377
+ expandShapeToKeepDim: N,
379
378
  exponent: he,
380
379
  exponents: fe,
381
380
  fromStringArrayToUint8: at,
382
381
  fromUint8ToStringArray: ot,
383
- getAxesPermutation: L,
384
- getBroadcastDims: F,
382
+ getAxesPermutation: C,
383
+ getBroadcastDims: M,
385
384
  getComplexWithIndex: ge,
386
385
  getEinsumComputePath: ze,
387
386
  getEinsumPermutation: Ce,
388
- getFusedBiasGradient: X,
389
- getFusedDyActivation: Y,
387
+ getFusedBiasGradient: se,
388
+ getFusedDyActivation: ne,
390
389
  getImageCenter: we,
391
- getInnerMostAxes: x,
390
+ getInnerMostAxes: P,
392
391
  getPermuted: _e,
393
392
  getRaggedRank: Se,
394
- getReductionAxes: N,
393
+ getReductionAxes: T,
395
394
  getReshaped: Ae,
396
395
  getReshapedPermuted: ye,
397
396
  getRowPartitionTypesHelper: $e,
398
397
  getSliceBeginCoords: Me,
399
398
  getSliceSize: Te,
400
- getSparseFillEmptyRowsIndicesDenseShapeMismatch: je,
401
- getSparseFillEmptyRowsNegativeIndexErrorMessage: Ge,
402
- getSparseFillEmptyRowsOutOfRangeIndexErrorMessage: He,
399
+ getSparseFillEmptyRowsIndicesDenseShapeMismatch: Ge,
400
+ getSparseFillEmptyRowsNegativeIndexErrorMessage: He,
401
+ getSparseFillEmptyRowsOutOfRangeIndexErrorMessage: je,
403
402
  getSparseReshapeEmptyTensorZeroOutputDimErrorMessage: ke,
404
- getSparseReshapeInputOutputMismatchErrorMessage: Je,
405
- getSparseReshapeInputOutputMultipleErrorMessage: Ke,
406
- getSparseReshapeMultipleNegativeOneOutputDimErrorMessage: Ve,
403
+ getSparseReshapeInputOutputMismatchErrorMessage: Ke,
404
+ getSparseReshapeInputOutputMultipleErrorMessage: Je,
405
+ getSparseReshapeMultipleNegativeOneOutputDimErrorMessage: qe,
407
406
  getSparseReshapeNegativeOutputDimErrorMessage: Ze,
408
407
  getSparseSegmentReductionIndicesOutOfRangeErrorMessage: et,
409
408
  getSparseSegmentReductionNegativeSegmentIdsErrorMessage: Xe,
410
409
  getSparseSegmentReductionNonIncreasingSegmentIdsErrorMessage: Ye,
411
410
  getSparseSegmentReductionSegmentIdOutOfRangeErrorMessage: Qe,
412
- getUndoAxesPermutation: W,
413
- isIdentityPermutation: qe,
414
- log: ne,
411
+ getUndoAxesPermutation: z,
412
+ isIdentityPermutation: Be,
413
+ log: b,
415
414
  mergeRealAndImagArrays: de,
416
415
  prepareAndValidate: Ee,
417
- prepareSplitSize: Ue,
416
+ prepareSplitSize: Ve,
418
417
  segment_util: rt,
419
- shouldFuse: Q,
420
- slice_util: se,
418
+ shouldFuse: re,
419
+ slice_util: oe,
421
420
  splitRealAndImagArrays: me,
422
- stridesOrDilationsArePositive: ee,
423
- tupleValuesAreOne: te,
424
- upcastType: re,
421
+ stridesOrDilationsArePositive: X,
422
+ tupleValuesAreOne: Y,
423
+ upcastType: D,
425
424
  validateDefaultValueShape: Re,
426
425
  validateInput: ie,
427
426
  validateUpdateShape: ue,
428
- warn: oe
427
+ warn: x
429
428
  }, Symbol.toStringTag, { value: "Module" }));
430
429
  export {
431
430
  _e as A,
@@ -437,37 +436,37 @@ export {
437
436
  Pe as G,
438
437
  ze as H,
439
438
  Ce as I,
440
- qe as J,
439
+ Be as J,
441
440
  Ee as K,
442
441
  nt as L,
443
442
  we as M,
444
- Ue as N,
443
+ Ve as N,
445
444
  st as O,
446
445
  tt as P,
447
446
  f as R,
448
447
  Se as a,
449
- mt as b,
448
+ dt as b,
450
449
  Oe as c,
451
450
  Ie as d,
452
451
  at as e,
453
452
  ot as f,
454
453
  $e as g,
455
- je as h,
456
- Ge as i,
457
- He as j,
458
- Ve as k,
454
+ Ge as h,
455
+ He as i,
456
+ je as j,
457
+ qe as k,
459
458
  Ze as l,
460
459
  ke as m,
461
- Ke as n,
462
- Je as o,
460
+ Je as n,
461
+ Ke as o,
463
462
  Xe as p,
464
463
  Ye as q,
465
464
  Qe as r,
466
465
  et as s,
467
466
  De as t,
468
- Le as u,
467
+ xe as u,
469
468
  Re as v,
470
- xe as w,
469
+ Le as w,
471
470
  We as x,
472
471
  ve as y,
473
472
  Ae as z
@@ -1,23 +1,21 @@
1
- import { e as D, J as W } from "./index-D6Q1lPZO.js";
2
- import { e as g, a as _, y as O, s as x, z as $, A as F, B as K, o as Z, q as j, g as X, i as q } from "./tensor-CzmOBsdf.js";
3
- import { m as J, f as ee, P as te } from "./webgpu_program-DzaQiqel.js";
4
- import { i as se, G as N } from "./webgpu_util-0_ubCEHJ.js";
5
- import { K as re, J as ne } from "./tensor_util-DfwaWayG.js";
6
- import { m as k } from "./complex_util-Yc1A_gV1.js";
7
- const l = g();
8
- l.registerFlag("WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE", () => 15);
9
- l.registerFlag("WEBGPU_CPU_FORWARD", () => !0);
10
- l.registerFlag("WEBGPU_MATMUL_PROGRAM_TYPE", () => -1);
11
- l.registerFlag("WEBGPU_USE_NAIVE_CONV2D_TRANSPOSE", () => !0);
12
- l.registerFlag("WEBGPU_USE_LOW_POWER_GPU", () => !1);
13
- l.registerFlag("WEBGPU_CPU_HANDOFF_SIZE_THRESHOLD", () => 1e3);
14
- l.registerFlag("WEBGPU_USE_PROFILE_TOOL", () => !1);
15
- l.registerFlag("WEBGPU_IMPORT_EXTERNAL_TEXTURE", () => !0);
16
- l.registerFlag("WEBGPU_USE_NAIVE_CONV2D_DEBUG", () => !1);
17
- l.registerFlag("WEBGPU_THRESHOLD_TO_INCREASE_WORKGROUPS_FOR_MATMUL", () => -1);
18
- l.registerFlag("WEBGPU_CONV_SEPARATE_IM2COL_SHADER", () => !1);
19
- l.registerFlag("WEBGPU_PRINT_SHADER", () => "");
20
- l.registerFlag("WEBGPU_ENGINE_COMPILE_ONLY", () => !1);
1
+ import { ab as g, au as $, av as K, e as D, y as _, aw as O, V as x, ax as Z, at as W, ay as F, az as j, aA as X, aB as J, ae as ee, a9 as k } from "./index-D0RBWjq8.js";
2
+ import { m as te, f as se, P as re } from "./webgpu_program-CAE4RICo.js";
3
+ import { i as ne, G as q } from "./webgpu_util-BdovYhXr.js";
4
+ import { m as N } from "./complex_util-Yc1A_gV1.js";
5
+ const d = g();
6
+ d.registerFlag("WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE", () => 15);
7
+ d.registerFlag("WEBGPU_CPU_FORWARD", () => !0);
8
+ d.registerFlag("WEBGPU_MATMUL_PROGRAM_TYPE", () => -1);
9
+ d.registerFlag("WEBGPU_USE_NAIVE_CONV2D_TRANSPOSE", () => !0);
10
+ d.registerFlag("WEBGPU_USE_LOW_POWER_GPU", () => !1);
11
+ d.registerFlag("WEBGPU_CPU_HANDOFF_SIZE_THRESHOLD", () => 1e3);
12
+ d.registerFlag("WEBGPU_USE_PROFILE_TOOL", () => !1);
13
+ d.registerFlag("WEBGPU_IMPORT_EXTERNAL_TEXTURE", () => !0);
14
+ d.registerFlag("WEBGPU_USE_NAIVE_CONV2D_DEBUG", () => !1);
15
+ d.registerFlag("WEBGPU_THRESHOLD_TO_INCREASE_WORKGROUPS_FOR_MATMUL", () => -1);
16
+ d.registerFlag("WEBGPU_CONV_SEPARATE_IM2COL_SHADER", () => !1);
17
+ d.registerFlag("WEBGPU_PRINT_SHADER", () => "");
18
+ d.registerFlag("WEBGPU_ENGINE_COMPILE_ONLY", () => !1);
21
19
  class ae {
22
20
  constructor(e) {
23
21
  e && (this.vendor = e.vendor, this.architecture = e.architecture, this.intelGPUGeneration = this.getIntelGPUGeneration());
@@ -70,8 +68,8 @@ class ie {
70
68
  }), this.freeBuffers = /* @__PURE__ */ new Map(), this.usedBuffers = /* @__PURE__ */ new Map(), this.numUsedBuffers = 0, this.numFreeBuffers = 0, this.numBytesUsed = 0, this.numBytesAllocated = 0;
71
69
  }
72
70
  }
73
- function z(d, e) {
74
- return `${d}_${e}`;
71
+ function z(l, e) {
72
+ return `${l}_${e}`;
75
73
  }
76
74
  class oe {
77
75
  constructor(e) {
@@ -122,30 +120,30 @@ class oe {
122
120
  }), this.freeTextures = /* @__PURE__ */ new Map(), this.usedTextures = /* @__PURE__ */ new Map(), this.numUsedTextures = 0, this.numFreeTextures = 0, this.numBytesUsed = 0, this.numBytesAllocated = 0;
123
121
  }
124
122
  }
125
- function L(d, e, t, s) {
126
- return `${d}_${e}_${t}_${s}`;
123
+ function L(l, e, t, s) {
124
+ return `${l}_${e}_${t}_${s}`;
127
125
  }
128
- function Q(d) {
129
- if (d === "rgba8unorm")
126
+ function Q(l) {
127
+ if (l === "rgba8unorm")
130
128
  return 16;
131
- throw new Error(`${d} is not supported!`);
129
+ throw new Error(`${l} is not supported!`);
132
130
  }
133
- const ue = g().getNumber("WEBGPU_CPU_HANDOFF_SIZE_THRESHOLD"), fe = (d, e) => {
134
- const t = d.limits.maxComputeWorkgroupsPerDimension, s = e.dispatchLayout, n = e.dispatch;
131
+ const ue = g().getNumber("WEBGPU_CPU_HANDOFF_SIZE_THRESHOLD"), fe = (l, e) => {
132
+ const t = l.limits.maxComputeWorkgroupsPerDimension, s = e.dispatchLayout, n = e.dispatch;
135
133
  if (n.every((a) => a <= t))
136
134
  return n;
137
135
  _(n[0] > t && s.y === void 0 && s.z === void 0, () => "Dispatch size exceeds WebGPU limits in Y or Z dimension.");
138
136
  let r = Math.ceil(Math.sqrt(n[0]));
139
137
  return r > t ? (r = Math.ceil(Math.cbrt(n[0])), _(r <= t, () => "Total dispatch size exceeds WebGPU maximum."), [r, r, r]) : [r, r, 1];
140
138
  };
141
- class R extends re {
139
+ class R extends $ {
142
140
  nextDataId() {
143
141
  return R.nextDataId++;
144
142
  }
145
143
  constructor(e, t) {
146
- if (super(), this.commandQueueOwnedIds = /* @__PURE__ */ new WeakSet(), this.dispatchCountInPass = 0, this.disposed = !1, this.downloadWaitMs = 0, this.tensorDataPendingDisposal = [], this.queryResolveBuffer = null, this.querySet = null, this.querySetCount = 2, this.stagingPendingDisposal = [], this.uniformPendingDisposal = [], this.uploadWaitMs = 0, this.hasReadSyncWarned = !1, this.hasTimestampQueryWarned = !1, !se())
144
+ if (super(), this.commandQueueOwnedIds = /* @__PURE__ */ new WeakSet(), this.dispatchCountInPass = 0, this.disposed = !1, this.downloadWaitMs = 0, this.tensorDataPendingDisposal = [], this.queryResolveBuffer = null, this.querySet = null, this.querySetCount = 2, this.stagingPendingDisposal = [], this.uniformPendingDisposal = [], this.uploadWaitMs = 0, this.hasReadSyncWarned = !1, this.hasTimestampQueryWarned = !1, !ne())
147
145
  throw new Error("WebGPU is not supported on this device");
148
- this.pipelineCache = {}, this.device = e, this.queue = e.queue, this.commandEncoder = null, this.computePassEncoder = null, this.adapterInfo = new ae(t), this.supportTimestampQuery = this.device.features.has("timestamp-query"), this.thresholdToIncreaseWorkgroups = this.adapterInfo.intelGPUGeneration >= 12 ? 16 : 8, this.bufferManager = new ie(this.device), this.textureManager = new oe(this.device), this.tensorMap = new ne(this, D()), g().getBool("WEBGPU_USE_PROFILE_TOOL") && (this.dummyCanvas = document.createElement("canvas"), this.dummyCanvas.width = 1, this.dummyCanvas.height = 1, this.dummyContext = this.dummyCanvas.getContext("webgpu"), this.dummyContext.configure({
146
+ this.pipelineCache = {}, this.device = e, this.queue = e.queue, this.commandEncoder = null, this.computePassEncoder = null, this.adapterInfo = new ae(t), this.supportTimestampQuery = this.device.features.has("timestamp-query"), this.thresholdToIncreaseWorkgroups = this.adapterInfo.intelGPUGeneration >= 12 ? 16 : 8, this.bufferManager = new ie(this.device), this.textureManager = new oe(this.device), this.tensorMap = new K(this, D()), g().getBool("WEBGPU_USE_PROFILE_TOOL") && (this.dummyCanvas = document.createElement("canvas"), this.dummyCanvas.width = 1, this.dummyCanvas.height = 1, this.dummyContext = this.dummyCanvas.getContext("webgpu"), this.dummyContext.configure({
149
147
  device: e,
150
148
  format: "bgra8unorm"
151
149
  }), document.body.appendChild(this.dummyCanvas));
@@ -250,7 +248,7 @@ class R extends re {
250
248
  if (s != null || t.dtype === "string")
251
249
  return s;
252
250
  if (t.dtype === "complex64") {
253
- const E = this.readSync(n.real.dataId), B = this.readSync(n.imag.dataId), y = O(k(E, B).buffer, "float32");
251
+ const E = this.readSync(n.real.dataId), B = this.readSync(n.imag.dataId), y = O(N(E, B).buffer, "float32");
254
252
  return this.convertAndCacheOnCPU(e, y), y;
255
253
  }
256
254
  this.hasReadSyncWarned || (this.hasReadSyncWarned = !0, console.warn("The performance of synchronously reading data from GPU to CPU is poor on the webgpu backend, please use asynchronous APIs instead."));
@@ -266,7 +264,7 @@ class R extends re {
266
264
  alphaMode: r[B]
267
265
  }), y.getCurrentTexture();
268
266
  }).map((E, B) => {
269
- const y = f * 4, b = (P, S, v) => {
267
+ const y = f * 4, G = (P, S, v) => {
270
268
  this.ensureCommandEncoderReady(), this.commandEncoder.copyBufferToTexture({
271
269
  buffer: a,
272
270
  bytesPerRow: y,
@@ -281,20 +279,20 @@ class R extends re {
281
279
  willReadFrequently: !0
282
280
  });
283
281
  I.clearRect(0, 0, P, S), I.drawImage(h[B], 0, 0);
284
- const G = I.getImageData(0, 0, P, S).data, H = r[B], M = new Uint8ClampedArray(o, v, P * S * 4);
282
+ const b = I.getImageData(0, 0, P, S).data, H = r[B], M = new Uint8ClampedArray(o, v, P * S * 4);
285
283
  for (let p = 0; p < M.length; p += 4)
286
284
  if (H === "premultiplied")
287
- M[p + 3] = G[p + 3];
285
+ M[p + 3] = b[p + 3];
288
286
  else {
289
- const V = G[p];
290
- M[p] = G[p + 2], M[p + 1] = G[p + 1], M[p + 2] = V;
287
+ const V = b[p];
288
+ M[p] = b[p + 2], M[p + 1] = b[p + 1], M[p + 2] = V;
291
289
  }
292
290
  }, Y = Math.floor(u / (f * c));
293
291
  let T = f, U = c, C = 0;
294
292
  for (let P = 0; P < Y; P++)
295
- b(T, U, C), C += f * c * 4;
293
+ G(T, U, C), C += f * c * 4;
296
294
  const A = u % (f * c);
297
- U = Math.floor(A / f), U > 0 && (b(T, U, C), C += U * (f * 4)), T = A % f, T > 0 && b(T, 1, C);
295
+ U = Math.floor(A / f), U > 0 && (G(T, U, C), C += U * (f * 4)), T = A % f, T > 0 && G(T, 1, C);
298
296
  });
299
297
  const w = O(o, t.dtype);
300
298
  return this.convertAndCacheOnCPU(e, w), w;
@@ -311,7 +309,7 @@ class R extends re {
311
309
  this.read(t.complexTensorInfos.real.dataId),
312
310
  this.read(t.complexTensorInfos.imag.dataId)
313
311
  ]), a = r[0], i = r[1];
314
- n = k(a, i);
312
+ n = N(a, i);
315
313
  } else {
316
314
  const r = await this.getBufferData(t.resource);
317
315
  n = O(r, t.dtype);
@@ -339,7 +337,7 @@ class R extends re {
339
337
  refCount: 1,
340
338
  external: e.zeroCopy
341
339
  });
342
- const a = this.tensorMap.get(r), i = N(a.dtype) * x(a.shape);
340
+ const a = this.tensorMap.get(r), i = q(a.dtype) * x(a.shape);
343
341
  if (e.buffer.size < i)
344
342
  throw new Error(`GPUBuffer size(${e.buffer.size}) is smaller than tensor size(${i})!`);
345
343
  if ((e.buffer.usage & (GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC)) !== (GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC))
@@ -365,7 +363,7 @@ class R extends re {
365
363
  const t = this.readSync(e.dataId);
366
364
  if (e.dtype === "string")
367
365
  try {
368
- const s = t.map((n) => $(n));
366
+ const s = t.map((n) => Z(n));
369
367
  return W(e.shape, e.dtype, s);
370
368
  } catch {
371
369
  throw new Error("Failed to decode encoded string bytes into utf-8");
@@ -385,10 +383,10 @@ class R extends re {
385
383
  kernelMs: null,
386
384
  wallMs: null
387
385
  }, u = await Promise.all(r);
388
- return i.kernelMs = K(u), i.getExtraProfileInfo = () => u.map((o, f) => ({ name: a[f], ms: o })).map((o) => `${o.name}: ${o.ms}`).join(", "), this.uploadWaitMs = 0, this.downloadWaitMs = 0, i;
386
+ return i.kernelMs = j(u), i.getExtraProfileInfo = () => u.map((o, f) => ({ name: a[f], ms: o })).map((o) => `${o.name}: ${o.ms}`).join(", "), this.uploadWaitMs = 0, this.downloadWaitMs = 0, i;
389
387
  }
390
388
  makeTensorInfo(e, t, s) {
391
- return t === "string" && s != null && s.length > 0 && Z(s[0]) && (s = s.map((r) => j(r))), { dataId: this.write(s, e, t), shape: e, dtype: t };
389
+ return t === "string" && s != null && s.length > 0 && X(s[0]) && (s = s.map((r) => J(r))), { dataId: this.write(s, e, t), shape: e, dtype: t };
392
390
  }
393
391
  tensorToBinding(e) {
394
392
  if (!e)
@@ -400,16 +398,16 @@ class R extends re {
400
398
  const t = this.tensorMap.get(e);
401
399
  if (t.resource != null)
402
400
  return;
403
- const s = N(t.dtype) * x(t.shape);
401
+ const s = q(t.dtype) * x(t.shape);
404
402
  let n;
405
403
  const r = GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST;
406
404
  if (t.values) {
407
405
  if (n = this.bufferManager.acquireBuffer(s, r, !0), n.mapState === "unmapped") {
408
406
  const a = this.bufferManager.acquireBuffer(s, GPUBufferUsage.MAP_WRITE | GPUBufferUsage.COPY_SRC, !0, !1), i = a.getMappedRange();
409
- t.dtype === "int32" || t.dtype === "bool" ? new Int32Array(i).set(t.values) : new Float32Array(i).set(t.values), a.unmap(), this.ensureCommandEncoderReady(), this.endComputePassEncoder(), this.commandEncoder.copyBufferToBuffer(a, 0, n, 0, s), this.stagingPendingDisposal.push(a);
407
+ t.dtype === "int32" || t.dtype === "packedF16" || t.dtype === "bool" ? new Int32Array(i).set(t.values) : new Float32Array(i).set(t.values), a.unmap(), this.ensureCommandEncoderReady(), this.endComputePassEncoder(), this.commandEncoder.copyBufferToBuffer(a, 0, n, 0, s), this.stagingPendingDisposal.push(a);
410
408
  } else {
411
409
  const a = n.getMappedRange();
412
- t.dtype === "int32" || t.dtype === "bool" ? new Int32Array(a).set(t.values) : new Float32Array(a).set(t.values), n.unmap();
410
+ t.dtype === "int32" || t.dtype === "packedF16" || t.dtype === "bool" ? new Int32Array(a).set(t.values) : new Float32Array(a).set(t.values), n.unmap();
413
411
  }
414
412
  t.values = null;
415
413
  } else
@@ -457,7 +455,7 @@ class R extends re {
457
455
  }
458
456
  runWebGPUProgram(e, t, s, n, r) {
459
457
  if (r || (r = this.makeTensorInfo(e.outputShape, s)), x(r.shape) === 0)
460
- return this.tensorMap.get(r.dataId).values = X(r.dtype, 0), r;
458
+ return this.tensorMap.get(r.dataId).values = ee(r.dtype, 0), r;
461
459
  this.uploadToGPU(r.dataId), e.dispatch = fe(this.device, e);
462
460
  const a = t.map((u, o) => {
463
461
  if (u.dtype === "complex64")
@@ -470,9 +468,9 @@ class R extends re {
470
468
  name: e.variableNames[o]
471
469
  };
472
470
  });
473
- e.shaderKey = J(e, a, r);
471
+ e.shaderKey = te(e, a, r);
474
472
  const i = g().getBool("WEBGPU_ENGINE_COMPILE_ONLY");
475
- return e.shaderKey in this.pipelineCache || (this.pipelineCache[e.shaderKey] = ee(this.device, e, a, r, i)), e.pipeline = this.pipelineCache[e.shaderKey], i || this.recordAndSubmit(e, r, t, n), r;
473
+ return e.shaderKey in this.pipelineCache || (this.pipelineCache[e.shaderKey] = se(this.device, e, a, r, i)), e.pipeline = this.pipelineCache[e.shaderKey], i || this.recordAndSubmit(e, r, t, n), r;
476
474
  }
477
475
  recordAndSubmit(e, t, s, n) {
478
476
  if (e.pipeline instanceof Promise)
@@ -484,11 +482,11 @@ class R extends re {
484
482
  const h = "int32";
485
483
  a.map((m) => {
486
484
  r.push({ type: h, data: m });
487
- const w = q(m);
485
+ const w = k(m);
488
486
  r.push({ type: h, data: w });
489
487
  });
490
488
  } else {
491
- const h = q(t.shape);
489
+ const h = k(t.shape);
492
490
  r.push({ type: i, data: h });
493
491
  }
494
492
  if (e.size) {
@@ -520,7 +518,7 @@ class R extends re {
520
518
  querySet: this.querySet,
521
519
  beginningOfPassWriteIndex: 0,
522
520
  endOfPassWriteIndex: 1
523
- }, this.computePassEncoder = this.commandEncoder.beginComputePass(c)) : this.computePassEncoder || (this.computePassEncoder = this.commandEncoder.beginComputePass(c)), this.computePassEncoder.setPipeline(e.pipeline), this.computePassEncoder.setBindGroup(0, o), this.computePassEncoder.dispatchWorkgroups(e.dispatch[0], e.dispatch[1], e.dispatch[2]), this.dispatchCountInPass++, (f || g().get("WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE") <= this.dispatchCountInPass || e.pixelsOpType === te.DRAW) && (this.endComputePassEncoder(), f ? this.activeTimers.push({ name: e.constructor.name, query: this.getQueryTime() }) : this.submitQueue());
521
+ }, this.computePassEncoder = this.commandEncoder.beginComputePass(c)) : this.computePassEncoder || (this.computePassEncoder = this.commandEncoder.beginComputePass(c)), this.computePassEncoder.setPipeline(e.pipeline), this.computePassEncoder.setBindGroup(0, o), this.computePassEncoder.dispatchWorkgroups(e.dispatch[0], e.dispatch[1], e.dispatch[2]), this.dispatchCountInPass++, (f || g().get("WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE") <= this.dispatchCountInPass || e.pixelsOpType === re.DRAW) && (this.endComputePassEncoder(), f ? this.activeTimers.push({ name: e.constructor.name, query: this.getQueryTime() }) : this.submitQueue());
524
522
  }
525
523
  async getQueryTime() {
526
524
  if (!this.supportTimestampQuery)
@@ -0,0 +1,28 @@
1
+ import { q as h, u as f, w as p, x as g, E as u, T } from "./index-D0RBWjq8.js";
2
+ import { r as b } from "./reshape-CkjKPPqB.js";
3
+ function m(e, r) {
4
+ let n = f(e, "broadcastTo", "x");
5
+ const a = n.shape;
6
+ if (p(r), r.length < n.rank)
7
+ throw new Error(`broadcastTo(): shape.length=${r.length} < input.rank=${n.rank}.`);
8
+ if (r.length > n.rank) {
9
+ const t = n.shape.slice();
10
+ for (; t.length < r.length; )
11
+ t.unshift(1);
12
+ n = b(n, t);
13
+ }
14
+ const s = n.shape, o = Array.from(r);
15
+ for (let t = r.length - 1; t >= 0; t--)
16
+ if (s[t] === r[t])
17
+ o[t] = 1;
18
+ else if (n.shape[t] !== 1)
19
+ throw new Error(`broadcastTo(): [${a}] cannot be broadcast to [${r}].`);
20
+ if (o.map((t, l) => t > 1 ? l : -1).filter((t) => t >= 0).length === 0)
21
+ return g(n);
22
+ const i = { x: n }, c = { reps: o };
23
+ return u.runKernel(T, i, c);
24
+ }
25
+ const E = /* @__PURE__ */ h({ broadcastTo_: m });
26
+ export {
27
+ E as b
28
+ };
@@ -1,5 +1,5 @@
1
- import { s, e as a } from "../index-D6Q1lPZO.js";
2
- import { t } from "../tensor4d-D7bLqGqz.js";
1
+ import { s, e as a } from "../index-D0RBWjq8.js";
2
+ import { t } from "../tensor4d-DVwr7pLF.js";
3
3
  async function u(e) {
4
4
  await s(e);
5
5
  const n = t(
@@ -1,6 +1,6 @@
1
- import { s as i, e } from "../index-D6Q1lPZO.js";
2
- import { t } from "../tensor4d-D7bLqGqz.js";
3
- import { t as a } from "../tensor2d-CSB4KOb0.js";
1
+ import { s as i, e } from "../index-D0RBWjq8.js";
2
+ import { t } from "../tensor4d-DVwr7pLF.js";
3
+ import { t as a } from "../tensor2d-BN1sSfQO.js";
4
4
  async function k(n) {
5
5
  await i(n);
6
6
  const s = t(
@@ -1,5 +1,5 @@
1
- import { s as e, e as o } from "../index-D6Q1lPZO.js";
2
- import { t as s } from "../tensor2d-CSB4KOb0.js";
1
+ import { s as e, e as o } from "../index-D0RBWjq8.js";
2
+ import { t as s } from "../tensor2d-BN1sSfQO.js";
3
3
  async function m(t) {
4
4
  await e(t);
5
5
  const r = s(
@@ -1,11 +1,7 @@
1
- import { s as n, e as s } from "../index-D6Q1lPZO.js";
2
- import "../random_width-BVV9HveY.js";
3
- import "../register_all_kernels-nvj2k7OC.js";
4
- import "../index-Cp39cXWe.js";
5
- import "../dataset-D2P7rHAw.js";
6
- import { t as e } from "../tensor2d-CSB4KOb0.js";
7
- async function f(t) {
8
- await n(t);
1
+ import { s as o, e as s } from "../index-D0RBWjq8.js";
2
+ import { t as e } from "../tensor2d-BN1sSfQO.js";
3
+ async function i(t) {
4
+ await o(t);
9
5
  const r = e(
10
6
  [
11
7
  [0.1, 0.2, 9, 10, 11],
@@ -15,7 +11,7 @@ async function f(t) {
15
11
  [0.3, 0.4, -9, -10, -11]
16
12
  ],
17
13
  [5, 5]
18
- ), o = e(
14
+ ), n = e(
19
15
  [
20
16
  [0.5, 0.6, 7e4, -8e3, 0],
21
17
  [0.7, 0.8, -7e4, 8e4, 0],
@@ -25,8 +21,8 @@ async function f(t) {
25
21
  ],
26
22
  [5, 5]
27
23
  );
28
- return await s().runKernel("MatMulGelu", { x: o, kernel: r }).array();
24
+ return await s().runKernel("MatMulGelu", { x: n, kernel: r }).array();
29
25
  }
30
26
  export {
31
- f as execute
27
+ i as execute
32
28
  };
@@ -1,14 +1,14 @@
1
- import { s as i, u as A, e as y } from "../index-D6Q1lPZO.js";
2
- import { a as h } from "../ops-BFDtP6th.js";
3
- import { t as p } from "../tensor1d-BlUT89BP.js";
4
- import { t as a } from "../tensor-0r5yOo2R.js";
1
+ import { s as u, a1 as A, e as y } from "../index-D0RBWjq8.js";
2
+ import { a as h } from "../ops-FJapAPfm.js";
3
+ import { t as p } from "../tensor1d-LxP9asMm.js";
4
+ import { t as r } from "../tensor-BQqrDvpx.js";
5
5
  const w = Array.from({ length: 2048 * 192 }, () => Math.random()), x = Array.from({ length: 192 }, () => Math.random()), M = Array.from({ length: 2048 * 192 }, () => Math.random());
6
6
  async function k(t) {
7
- await i(t);
8
- const o = p(x, "float32"), n = a(w, [16, 128, 192], "float32"), s = a(M, [16, 128, 192], "float32"), e = (d, g) => {
9
- const u = y().runKernel("RMSNorm", { x: d, gamma: g });
10
- return h.meanSquaredError(u, s);
11
- }, { value: m, grads: r } = A(e)([n, o]), c = await m.array(), f = await r[0].array(), l = await r[1].array();
7
+ await u(t);
8
+ const o = p(x, "float32"), n = r(w, [16, 128, 192], "float32"), s = r(M, [16, 128, 192], "float32"), e = (d, g) => {
9
+ const i = y().runKernel("RMSNorm", { x: d, gamma: g });
10
+ return h.meanSquaredError(i, s);
11
+ }, { value: m, grads: a } = A(e)([n, o]), c = await m.array(), f = await a[0].array(), l = await a[1].array();
12
12
  return [c, f, l];
13
13
  }
14
14
  export {
@@ -1,6 +1,6 @@
1
- import { s as c, e as d } from "../index-D6Q1lPZO.js";
2
- import { t as f } from "../tensor1d-BlUT89BP.js";
3
- import { t as r } from "../tensor-0r5yOo2R.js";
1
+ import { s as c, e as d } from "../index-D0RBWjq8.js";
2
+ import { t as f } from "../tensor1d-LxP9asMm.js";
3
+ import { t as r } from "../tensor-BQqrDvpx.js";
4
4
  const y = Array.from({ length: 2048 * 192 }, () => Math.random()), i = Array.from({ length: 192 }, () => Math.random()), l = Array.from({ length: 2048 * 192 }, () => Math.random());
5
5
  async function x(t) {
6
6
  await c(t);