@genai-fi/nanogpt 0.10.2 → 0.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (249) hide show
  1. package/dist/Generator.js +11761 -171
  2. package/dist/{RealDiv-zz7FpkKX.js → RealDiv-KAPDe8zB.js} +23 -25
  3. package/dist/Reshape-BYkmUnAv.js +14 -0
  4. package/dist/{Reshape-CHdUjC72.js → Reshape-Zt6eb7yh.js} +18 -20
  5. package/dist/TeachableLLM.js +10 -11
  6. package/dist/{axis_util-BsIr9ZNu.js → axis_util-BaG7mf5A.js} +3 -3
  7. package/dist/backend.js +2 -2
  8. package/dist/{backend_util-B1XRLuq9.js → backend_util-RCe-rHaj.js} +72 -73
  9. package/dist/{backend_webgpu-CqpfEImu.js → backend_webgpu-DE3ACOLx.js} +45 -47
  10. package/dist/broadcast_to-B3eYlZm7.js +28 -0
  11. package/dist/checks/appendCache.js +2 -2
  12. package/dist/checks/attentionMask.js +3 -3
  13. package/dist/checks/gelu.js +2 -2
  14. package/dist/checks/matMulGelu.js +7 -11
  15. package/dist/checks/normRMS.js +9 -9
  16. package/dist/checks/normRMSGrad.js +3 -3
  17. package/dist/checks/packUnpack.js +2 -2
  18. package/dist/checks/qkv.js +12 -13
  19. package/dist/checks/rope.js +2 -2
  20. package/dist/clip_by_value-BnO7-a88.js +12 -0
  21. package/dist/complex-DjxcVmoX.js +11 -0
  22. package/dist/concat-BV8bt5H-.js +17 -0
  23. package/dist/{concat_util-iBYIyuQe.js → concat_util-DpW8mL_l.js} +1 -1
  24. package/dist/{dataset-D2P7rHAw.js → dataset-BcwmTGYc.js} +137 -139
  25. package/dist/dropout-BcvN9JYi.js +92 -0
  26. package/dist/expand_dims-DT4tEPwA.js +11 -0
  27. package/dist/{exports_initializers-CZSUJoVE.js → exports_initializers-Hta_rEnm.js} +1 -1
  28. package/dist/floor-D5QdR_le.js +9 -0
  29. package/dist/gather-D3JcZUaI.js +9 -0
  30. package/dist/{gelu-Bmhopi0J.js → gelu-CjNPL4OH.js} +10 -11
  31. package/dist/{gpgpu_math-DsCcikas.js → gpgpu_math-DAOmgtXR.js} +841 -1015
  32. package/dist/{index-DRyE072i.js → index-BwexR4lA.js} +262 -263
  33. package/dist/index-DOvlwCh-.js +3520 -0
  34. package/dist/{kernel_funcs_utils-CWfOAPGO.js → kernel_funcs_utils-CCzYdUZg.js} +130 -132
  35. package/dist/layers/BaseLayer.js +15 -16
  36. package/dist/layers/CausalSelfAttention.js +6 -6
  37. package/dist/layers/MLP.js +4 -4
  38. package/dist/layers/PositionEmbedding.js +7 -7
  39. package/dist/layers/RMSNorm.js +3 -3
  40. package/dist/layers/RoPECache.js +9 -9
  41. package/dist/layers/TiedEmbedding.js +6 -6
  42. package/dist/layers/TransformerBlock.js +1 -1
  43. package/dist/loader/loadTransformers.js +1 -1
  44. package/dist/loader/oldZipLoad.js +13 -14
  45. package/dist/log_sum_exp-ngO0-4pK.js +39 -0
  46. package/dist/main.js +49 -50
  47. package/dist/{matMul16-fEAJ4smh.js → matMul16-BWRSOCWB.js} +14 -15
  48. package/dist/matMulGelu-CzfgT6Wq.js +163 -0
  49. package/dist/mat_mul-SjpJRLyL.js +11 -0
  50. package/dist/mod-AnXEvvpo.js +11 -0
  51. package/dist/models/NanoGPTV1.js +2 -2
  52. package/dist/models/model.js +13 -14
  53. package/dist/ones-D2rT0xk2.js +14 -0
  54. package/dist/ops/adamAdjust.js +1 -1
  55. package/dist/ops/adamMoments.js +1 -1
  56. package/dist/ops/add16.js +1 -1
  57. package/dist/ops/appendCache.js +3 -3
  58. package/dist/ops/attentionMask.js +1 -1
  59. package/dist/ops/concat16.js +2 -2
  60. package/dist/ops/cpu/adamAdjust.js +13 -14
  61. package/dist/ops/cpu/adamMoments.js +6 -7
  62. package/dist/ops/cpu/appendCache.js +7 -8
  63. package/dist/ops/cpu/attentionMask.js +7 -7
  64. package/dist/ops/cpu/fusedSoftmax.js +10 -11
  65. package/dist/ops/cpu/gatherSub.js +9 -10
  66. package/dist/ops/cpu/gelu.js +9 -10
  67. package/dist/ops/cpu/matMul16.js +6 -7
  68. package/dist/ops/cpu/matMulGelu.js +5 -6
  69. package/dist/ops/cpu/matMulMul.js +3 -4
  70. package/dist/ops/cpu/mulDropout.js +3 -4
  71. package/dist/ops/cpu/normRMS.js +10 -11
  72. package/dist/ops/cpu/qkv.js +8 -9
  73. package/dist/ops/cpu/rope.js +5 -6
  74. package/dist/ops/cpu/scatterSub.js +17 -19
  75. package/dist/ops/dot16.js +2 -2
  76. package/dist/ops/gatherSub.js +1 -1
  77. package/dist/ops/gelu.js +2 -2
  78. package/dist/ops/grads/add16.js +11 -12
  79. package/dist/ops/grads/attentionMask.js +5 -6
  80. package/dist/ops/grads/gelu.js +3 -4
  81. package/dist/ops/grads/matMul16.js +4 -5
  82. package/dist/ops/grads/matMulGelu.js +9 -10
  83. package/dist/ops/grads/normRMS.js +7 -8
  84. package/dist/ops/grads/pack16.js +4 -5
  85. package/dist/ops/grads/qkv.js +17 -19
  86. package/dist/ops/grads/rope.js +3 -5
  87. package/dist/ops/grads/softmax16.js +3 -4
  88. package/dist/ops/grads/unpack16.js +3 -4
  89. package/dist/ops/grads/utils.d.ts +1 -0
  90. package/dist/ops/grads/utils.js +8 -4
  91. package/dist/ops/matMul16.js +3 -3
  92. package/dist/ops/matMulGelu.js +2 -2
  93. package/dist/ops/matMulMul.js +1 -1
  94. package/dist/ops/mul16.js +1 -1
  95. package/dist/ops/mulDrop.js +1 -1
  96. package/dist/ops/normRMS.js +1 -1
  97. package/dist/ops/pack16.js +3 -4
  98. package/dist/ops/qkv.js +4 -8
  99. package/dist/ops/reshape16.js +14 -16
  100. package/dist/ops/rope.d.ts +1 -1
  101. package/dist/ops/rope.js +3 -8
  102. package/dist/ops/scatterSub.js +1 -1
  103. package/dist/ops/slice16.js +2 -2
  104. package/dist/ops/softmax16.js +5 -8
  105. package/dist/ops/sub16.js +1 -1
  106. package/dist/ops/sum16.js +2 -2
  107. package/dist/ops/transpose16.js +23 -24
  108. package/dist/ops/unpack16.js +2 -2
  109. package/dist/ops/webgl/adamAdjust.js +2 -3
  110. package/dist/ops/webgl/adamMoments.js +1 -2
  111. package/dist/ops/webgl/appendCache.js +1 -2
  112. package/dist/ops/webgl/attentionMask.js +4 -5
  113. package/dist/ops/webgl/fusedSoftmax.js +4 -6
  114. package/dist/ops/webgl/gatherSub.js +6 -7
  115. package/dist/ops/webgl/gelu.js +2 -3
  116. package/dist/ops/webgl/log.js +11 -12
  117. package/dist/ops/webgl/matMul16.js +10 -11
  118. package/dist/ops/webgl/matMulGelu.js +7 -111
  119. package/dist/ops/webgl/matMulMul.js +9 -10
  120. package/dist/ops/webgl/mulDropout.js +8 -9
  121. package/dist/ops/webgl/normRMS.js +2 -3
  122. package/dist/ops/webgl/qkv.js +5 -6
  123. package/dist/ops/webgl/rope.js +7 -8
  124. package/dist/ops/webgl/scatterSub.js +5 -6
  125. package/dist/ops/webgpu/adamAdjust.js +10 -12
  126. package/dist/ops/webgpu/adamMoments.js +8 -10
  127. package/dist/ops/webgpu/add16.js +8 -9
  128. package/dist/ops/webgpu/appendCache.js +23 -25
  129. package/dist/ops/webgpu/attentionMask.js +8 -10
  130. package/dist/ops/webgpu/attentionMask32_program.js +2 -2
  131. package/dist/ops/webgpu/concat16.js +12 -14
  132. package/dist/ops/webgpu/gatherSub.js +11 -13
  133. package/dist/ops/webgpu/gelu.js +28 -29
  134. package/dist/ops/webgpu/matMul16.js +26 -28
  135. package/dist/ops/webgpu/matMul16_program.js +4 -5
  136. package/dist/ops/webgpu/mul16.js +9 -10
  137. package/dist/ops/webgpu/normRMS.js +15 -17
  138. package/dist/ops/webgpu/normRMSGrad.js +21 -28
  139. package/dist/ops/webgpu/pack16.js +12 -13
  140. package/dist/ops/webgpu/pack16_program.js +2 -2
  141. package/dist/ops/webgpu/qkv.js +16 -18
  142. package/dist/ops/webgpu/rope.js +25 -27
  143. package/dist/ops/webgpu/scatterSub.js +7 -9
  144. package/dist/ops/webgpu/slice16.js +21 -23
  145. package/dist/ops/webgpu/softmax16.js +17 -19
  146. package/dist/ops/webgpu/softmax16_program.js +2 -2
  147. package/dist/ops/webgpu/softmax16_subgroup_program.js +2 -2
  148. package/dist/ops/webgpu/softmax16grad.js +7 -8
  149. package/dist/ops/webgpu/sub16.js +7 -8
  150. package/dist/ops/webgpu/sum16.js +18 -20
  151. package/dist/ops/webgpu/transpose16.js +19 -20
  152. package/dist/ops/webgpu/transpose16_program.js +2 -2
  153. package/dist/ops/webgpu/transpose16_shared_program.js +11 -12
  154. package/dist/ops/webgpu/unpack16.js +3 -4
  155. package/dist/ops/webgpu/utils/binary_op.js +7 -8
  156. package/dist/ops/webgpu/utils/reductions.js +14 -22
  157. package/dist/ops-B5yanEdW.js +476 -0
  158. package/dist/pack16-nQ6JaLo-.js +39 -0
  159. package/dist/patches/webgpu_backend.js +19 -20
  160. package/dist/patches/webgpu_base.js +1 -1
  161. package/dist/patches/webgpu_program.js +21 -22
  162. package/dist/{random_width-BVV9HveY.js → random_width-or-CEftb.js} +2506 -2761
  163. package/dist/range-BklejeeW.js +10 -0
  164. package/dist/relu-CP0ZcxWO.js +9 -0
  165. package/dist/reshape-ByE68wS9.js +9 -0
  166. package/dist/resize_nearest_neighbor-B19mCEg2.js +175 -0
  167. package/dist/rope-Ir4mTyD1.js +24 -0
  168. package/dist/{scatter_nd_util-C7zXRT_h.js → scatter_nd_util-lvSiX8q4.js} +1 -1
  169. package/dist/selu_util-kbhpTdYD.js +44 -0
  170. package/dist/{shared-CHhxz-O5.js → shared-DT1TkE6w.js} +1 -1
  171. package/dist/{shared-D2NP_CpY.js → shared-dntlHIDQ.js} +343 -345
  172. package/dist/slice-BfEGSH82.js +12 -0
  173. package/dist/{slice_util-DyjSAD0u.js → slice_util-uTKwiEpW.js} +1 -1
  174. package/dist/{softmax-C9JQEtnO.js → softmax-CA5jFsLR.js} +4 -5
  175. package/dist/split-CVLc0w--.js +9 -0
  176. package/dist/squeeze-C7Z2srUo.js +10 -0
  177. package/dist/stack-Cf4n9h0N.js +11 -0
  178. package/dist/step-CINUs5QB.js +261 -0
  179. package/dist/sum-DWAtNGez.js +11 -0
  180. package/dist/tensor-DJoc7gJU.js +8 -0
  181. package/dist/tensor1d-D11P_7Dp.js +11 -0
  182. package/dist/{tensor2d-CSB4KOb0.js → tensor2d-Bs9wZRc7.js} +6 -7
  183. package/dist/{tensor4d-D7bLqGqz.js → tensor4d-BARPdTaS.js} +6 -7
  184. package/dist/{tfjs_backend-CNkSTL0c.js → tfjs_backend-y1cvNhLA.js} +255 -264
  185. package/dist/tile-mbfagpsB.js +11 -0
  186. package/dist/training/Adam.js +2 -2
  187. package/dist/training/AdamExt.js +1 -1
  188. package/dist/training/DatasetBuilder.js +2 -2
  189. package/dist/training/FullTrainer.js +1 -1
  190. package/dist/training/Trainer.js +2 -2
  191. package/dist/training/sparseCrossEntropy.js +5 -5
  192. package/dist/transpose-ClWiBS_b.js +36 -0
  193. package/dist/unsorted_segment_sum-BDDhB_E6.js +277 -0
  194. package/dist/utilities/dummy.js +3 -3
  195. package/dist/utilities/multinomialCPU.js +2 -2
  196. package/dist/utilities/packed.d.ts +1 -4
  197. package/dist/utilities/packed.js +10 -745
  198. package/dist/utilities/performance.js +1 -1
  199. package/dist/utilities/profile.js +1 -1
  200. package/dist/utilities/safetensors.js +2 -2
  201. package/dist/utilities/sentences.js +5 -5
  202. package/dist/utilities/weights.js +2 -2
  203. package/dist/{variable-DzfrwYuP.js → variable-WawDEaAb.js} +1 -1
  204. package/dist/{webgpu_program-DzaQiqel.js → webgpu_program-DuOXPQol.js} +178 -172
  205. package/dist/{webgpu_util-0_ubCEHJ.js → webgpu_util-RxEF33Rj.js} +34 -35
  206. package/dist/zeros-KnWaWf-X.js +13 -0
  207. package/dist/zeros_like-DvE73F4e.js +721 -0
  208. package/package.json +4 -2
  209. package/dist/Reshape-CDVLyVfz.js +0 -16
  210. package/dist/broadcast_to-B0ChcDaz.js +0 -30
  211. package/dist/complex-BBiRlsVq.js +0 -13
  212. package/dist/concat-DmBLPVGC.js +0 -19
  213. package/dist/dropout-B1x1kYMa.js +0 -99
  214. package/dist/expand_dims-ouvfxQ1n.js +0 -13
  215. package/dist/gather-CH9sdacz.js +0 -10
  216. package/dist/index-D6Q1lPZO.js +0 -2157
  217. package/dist/log_sum_exp-D3ftBNY5.js +0 -41
  218. package/dist/mat_mul-C59XWcJd.js +0 -12
  219. package/dist/mod-DESSvHIU.js +0 -12
  220. package/dist/mulmat_packed_gpu-Coh6qbJk.js +0 -55
  221. package/dist/ones-jU9jlQvM.js +0 -15
  222. package/dist/ops-BFDtP6th.js +0 -645
  223. package/dist/pack16-CmVZs6af.js +0 -41
  224. package/dist/patches/PackedTensor.d.ts +0 -12
  225. package/dist/patches/PackedTensor.js +0 -11
  226. package/dist/patches/engine.d.ts +0 -261
  227. package/dist/patches/engine.js +0 -12
  228. package/dist/patches/tape.d.ts +0 -12
  229. package/dist/patches/tape.js +0 -5
  230. package/dist/range-ZZZD60Fx.js +0 -11
  231. package/dist/reciprocal-CrYlsAGD.js +0 -10
  232. package/dist/register_all_kernels-nvj2k7OC.js +0 -12307
  233. package/dist/relu-BYDneVPn.js +0 -10
  234. package/dist/reshape-CaPQzFvz.js +0 -10
  235. package/dist/rope-s4W2XO9B.js +0 -32
  236. package/dist/selu_util-BGPXmd4B.js +0 -303
  237. package/dist/sin-Djs4aQiu.js +0 -16
  238. package/dist/slice-DvovR5wq.js +0 -13
  239. package/dist/split-DBck65sX.js +0 -10
  240. package/dist/squeeze-C00Ipm_7.js +0 -11
  241. package/dist/stack-ChnHwRpX.js +0 -13
  242. package/dist/sum-ywRJj3Zr.js +0 -12
  243. package/dist/tensor-0r5yOo2R.js +0 -8
  244. package/dist/tensor-CzmOBsdf.js +0 -909
  245. package/dist/tensor1d-BlUT89BP.js +0 -12
  246. package/dist/tensor_util-DfwaWayG.js +0 -523
  247. package/dist/tile-CR074jmp.js +0 -13
  248. package/dist/transpose-DH4gmHvu.js +0 -38
  249. package/dist/zeros-DBFVbpv5.js +0 -14
@@ -1,23 +1,21 @@
1
- import { e as D, J as W } from "./index-D6Q1lPZO.js";
2
- import { e as g, a as _, y as O, s as x, z as $, A as F, B as K, o as Z, q as j, g as X, i as q } from "./tensor-CzmOBsdf.js";
3
- import { m as J, f as ee, P as te } from "./webgpu_program-DzaQiqel.js";
4
- import { i as se, G as N } from "./webgpu_util-0_ubCEHJ.js";
5
- import { K as re, J as ne } from "./tensor_util-DfwaWayG.js";
6
- import { m as k } from "./complex_util-Yc1A_gV1.js";
7
- const l = g();
8
- l.registerFlag("WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE", () => 15);
9
- l.registerFlag("WEBGPU_CPU_FORWARD", () => !0);
10
- l.registerFlag("WEBGPU_MATMUL_PROGRAM_TYPE", () => -1);
11
- l.registerFlag("WEBGPU_USE_NAIVE_CONV2D_TRANSPOSE", () => !0);
12
- l.registerFlag("WEBGPU_USE_LOW_POWER_GPU", () => !1);
13
- l.registerFlag("WEBGPU_CPU_HANDOFF_SIZE_THRESHOLD", () => 1e3);
14
- l.registerFlag("WEBGPU_USE_PROFILE_TOOL", () => !1);
15
- l.registerFlag("WEBGPU_IMPORT_EXTERNAL_TEXTURE", () => !0);
16
- l.registerFlag("WEBGPU_USE_NAIVE_CONV2D_DEBUG", () => !1);
17
- l.registerFlag("WEBGPU_THRESHOLD_TO_INCREASE_WORKGROUPS_FOR_MATMUL", () => -1);
18
- l.registerFlag("WEBGPU_CONV_SEPARATE_IM2COL_SHADER", () => !1);
19
- l.registerFlag("WEBGPU_PRINT_SHADER", () => "");
20
- l.registerFlag("WEBGPU_ENGINE_COMPILE_ONLY", () => !1);
1
+ import { j as g, au as $, av as K, e as D, l as _, aw as O, h as x, ax as Z, at as W, ay as F, az as j, aA as X, aB as J, i as ee, ad as k } from "./index-DOvlwCh-.js";
2
+ import { m as te, f as se, P as re } from "./webgpu_program-DuOXPQol.js";
3
+ import { i as ne, G as q } from "./webgpu_util-RxEF33Rj.js";
4
+ import { m as N } from "./complex_util-Yc1A_gV1.js";
5
+ const d = g();
6
+ d.registerFlag("WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE", () => 15);
7
+ d.registerFlag("WEBGPU_CPU_FORWARD", () => !0);
8
+ d.registerFlag("WEBGPU_MATMUL_PROGRAM_TYPE", () => -1);
9
+ d.registerFlag("WEBGPU_USE_NAIVE_CONV2D_TRANSPOSE", () => !0);
10
+ d.registerFlag("WEBGPU_USE_LOW_POWER_GPU", () => !1);
11
+ d.registerFlag("WEBGPU_CPU_HANDOFF_SIZE_THRESHOLD", () => 1e3);
12
+ d.registerFlag("WEBGPU_USE_PROFILE_TOOL", () => !1);
13
+ d.registerFlag("WEBGPU_IMPORT_EXTERNAL_TEXTURE", () => !0);
14
+ d.registerFlag("WEBGPU_USE_NAIVE_CONV2D_DEBUG", () => !1);
15
+ d.registerFlag("WEBGPU_THRESHOLD_TO_INCREASE_WORKGROUPS_FOR_MATMUL", () => -1);
16
+ d.registerFlag("WEBGPU_CONV_SEPARATE_IM2COL_SHADER", () => !1);
17
+ d.registerFlag("WEBGPU_PRINT_SHADER", () => "");
18
+ d.registerFlag("WEBGPU_ENGINE_COMPILE_ONLY", () => !1);
21
19
  class ae {
22
20
  constructor(e) {
23
21
  e && (this.vendor = e.vendor, this.architecture = e.architecture, this.intelGPUGeneration = this.getIntelGPUGeneration());
@@ -70,8 +68,8 @@ class ie {
70
68
  }), this.freeBuffers = /* @__PURE__ */ new Map(), this.usedBuffers = /* @__PURE__ */ new Map(), this.numUsedBuffers = 0, this.numFreeBuffers = 0, this.numBytesUsed = 0, this.numBytesAllocated = 0;
71
69
  }
72
70
  }
73
- function z(d, e) {
74
- return `${d}_${e}`;
71
+ function z(l, e) {
72
+ return `${l}_${e}`;
75
73
  }
76
74
  class oe {
77
75
  constructor(e) {
@@ -122,30 +120,30 @@ class oe {
122
120
  }), this.freeTextures = /* @__PURE__ */ new Map(), this.usedTextures = /* @__PURE__ */ new Map(), this.numUsedTextures = 0, this.numFreeTextures = 0, this.numBytesUsed = 0, this.numBytesAllocated = 0;
123
121
  }
124
122
  }
125
- function L(d, e, t, s) {
126
- return `${d}_${e}_${t}_${s}`;
123
+ function L(l, e, t, s) {
124
+ return `${l}_${e}_${t}_${s}`;
127
125
  }
128
- function Q(d) {
129
- if (d === "rgba8unorm")
126
+ function Q(l) {
127
+ if (l === "rgba8unorm")
130
128
  return 16;
131
- throw new Error(`${d} is not supported!`);
129
+ throw new Error(`${l} is not supported!`);
132
130
  }
133
- const ue = g().getNumber("WEBGPU_CPU_HANDOFF_SIZE_THRESHOLD"), fe = (d, e) => {
134
- const t = d.limits.maxComputeWorkgroupsPerDimension, s = e.dispatchLayout, n = e.dispatch;
131
+ const ue = g().getNumber("WEBGPU_CPU_HANDOFF_SIZE_THRESHOLD"), fe = (l, e) => {
132
+ const t = l.limits.maxComputeWorkgroupsPerDimension, s = e.dispatchLayout, n = e.dispatch;
135
133
  if (n.every((a) => a <= t))
136
134
  return n;
137
135
  _(n[0] > t && s.y === void 0 && s.z === void 0, () => "Dispatch size exceeds WebGPU limits in Y or Z dimension.");
138
136
  let r = Math.ceil(Math.sqrt(n[0]));
139
137
  return r > t ? (r = Math.ceil(Math.cbrt(n[0])), _(r <= t, () => "Total dispatch size exceeds WebGPU maximum."), [r, r, r]) : [r, r, 1];
140
138
  };
141
- class R extends re {
139
+ class R extends $ {
142
140
  nextDataId() {
143
141
  return R.nextDataId++;
144
142
  }
145
143
  constructor(e, t) {
146
- if (super(), this.commandQueueOwnedIds = /* @__PURE__ */ new WeakSet(), this.dispatchCountInPass = 0, this.disposed = !1, this.downloadWaitMs = 0, this.tensorDataPendingDisposal = [], this.queryResolveBuffer = null, this.querySet = null, this.querySetCount = 2, this.stagingPendingDisposal = [], this.uniformPendingDisposal = [], this.uploadWaitMs = 0, this.hasReadSyncWarned = !1, this.hasTimestampQueryWarned = !1, !se())
144
+ if (super(), this.commandQueueOwnedIds = /* @__PURE__ */ new WeakSet(), this.dispatchCountInPass = 0, this.disposed = !1, this.downloadWaitMs = 0, this.tensorDataPendingDisposal = [], this.queryResolveBuffer = null, this.querySet = null, this.querySetCount = 2, this.stagingPendingDisposal = [], this.uniformPendingDisposal = [], this.uploadWaitMs = 0, this.hasReadSyncWarned = !1, this.hasTimestampQueryWarned = !1, !ne())
147
145
  throw new Error("WebGPU is not supported on this device");
148
- this.pipelineCache = {}, this.device = e, this.queue = e.queue, this.commandEncoder = null, this.computePassEncoder = null, this.adapterInfo = new ae(t), this.supportTimestampQuery = this.device.features.has("timestamp-query"), this.thresholdToIncreaseWorkgroups = this.adapterInfo.intelGPUGeneration >= 12 ? 16 : 8, this.bufferManager = new ie(this.device), this.textureManager = new oe(this.device), this.tensorMap = new ne(this, D()), g().getBool("WEBGPU_USE_PROFILE_TOOL") && (this.dummyCanvas = document.createElement("canvas"), this.dummyCanvas.width = 1, this.dummyCanvas.height = 1, this.dummyContext = this.dummyCanvas.getContext("webgpu"), this.dummyContext.configure({
146
+ this.pipelineCache = {}, this.device = e, this.queue = e.queue, this.commandEncoder = null, this.computePassEncoder = null, this.adapterInfo = new ae(t), this.supportTimestampQuery = this.device.features.has("timestamp-query"), this.thresholdToIncreaseWorkgroups = this.adapterInfo.intelGPUGeneration >= 12 ? 16 : 8, this.bufferManager = new ie(this.device), this.textureManager = new oe(this.device), this.tensorMap = new K(this, D()), g().getBool("WEBGPU_USE_PROFILE_TOOL") && (this.dummyCanvas = document.createElement("canvas"), this.dummyCanvas.width = 1, this.dummyCanvas.height = 1, this.dummyContext = this.dummyCanvas.getContext("webgpu"), this.dummyContext.configure({
149
147
  device: e,
150
148
  format: "bgra8unorm"
151
149
  }), document.body.appendChild(this.dummyCanvas));
@@ -250,7 +248,7 @@ class R extends re {
250
248
  if (s != null || t.dtype === "string")
251
249
  return s;
252
250
  if (t.dtype === "complex64") {
253
- const E = this.readSync(n.real.dataId), B = this.readSync(n.imag.dataId), y = O(k(E, B).buffer, "float32");
251
+ const E = this.readSync(n.real.dataId), B = this.readSync(n.imag.dataId), y = O(N(E, B).buffer, "float32");
254
252
  return this.convertAndCacheOnCPU(e, y), y;
255
253
  }
256
254
  this.hasReadSyncWarned || (this.hasReadSyncWarned = !0, console.warn("The performance of synchronously reading data from GPU to CPU is poor on the webgpu backend, please use asynchronous APIs instead."));
@@ -311,7 +309,7 @@ class R extends re {
311
309
  this.read(t.complexTensorInfos.real.dataId),
312
310
  this.read(t.complexTensorInfos.imag.dataId)
313
311
  ]), a = r[0], i = r[1];
314
- n = k(a, i);
312
+ n = N(a, i);
315
313
  } else {
316
314
  const r = await this.getBufferData(t.resource);
317
315
  n = O(r, t.dtype);
@@ -339,7 +337,7 @@ class R extends re {
339
337
  refCount: 1,
340
338
  external: e.zeroCopy
341
339
  });
342
- const a = this.tensorMap.get(r), i = N(a.dtype) * x(a.shape);
340
+ const a = this.tensorMap.get(r), i = q(a.dtype) * x(a.shape);
343
341
  if (e.buffer.size < i)
344
342
  throw new Error(`GPUBuffer size(${e.buffer.size}) is smaller than tensor size(${i})!`);
345
343
  if ((e.buffer.usage & (GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC)) !== (GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC))
@@ -365,7 +363,7 @@ class R extends re {
365
363
  const t = this.readSync(e.dataId);
366
364
  if (e.dtype === "string")
367
365
  try {
368
- const s = t.map((n) => $(n));
366
+ const s = t.map((n) => Z(n));
369
367
  return W(e.shape, e.dtype, s);
370
368
  } catch {
371
369
  throw new Error("Failed to decode encoded string bytes into utf-8");
@@ -385,10 +383,10 @@ class R extends re {
385
383
  kernelMs: null,
386
384
  wallMs: null
387
385
  }, u = await Promise.all(r);
388
- return i.kernelMs = K(u), i.getExtraProfileInfo = () => u.map((o, f) => ({ name: a[f], ms: o })).map((o) => `${o.name}: ${o.ms}`).join(", "), this.uploadWaitMs = 0, this.downloadWaitMs = 0, i;
386
+ return i.kernelMs = j(u), i.getExtraProfileInfo = () => u.map((o, f) => ({ name: a[f], ms: o })).map((o) => `${o.name}: ${o.ms}`).join(", "), this.uploadWaitMs = 0, this.downloadWaitMs = 0, i;
389
387
  }
390
388
  makeTensorInfo(e, t, s) {
391
- return t === "string" && s != null && s.length > 0 && Z(s[0]) && (s = s.map((r) => j(r))), { dataId: this.write(s, e, t), shape: e, dtype: t };
389
+ return t === "string" && s != null && s.length > 0 && X(s[0]) && (s = s.map((r) => J(r))), { dataId: this.write(s, e, t), shape: e, dtype: t };
392
390
  }
393
391
  tensorToBinding(e) {
394
392
  if (!e)
@@ -400,16 +398,16 @@ class R extends re {
400
398
  const t = this.tensorMap.get(e);
401
399
  if (t.resource != null)
402
400
  return;
403
- const s = N(t.dtype) * x(t.shape);
401
+ const s = q(t.dtype) * x(t.shape);
404
402
  let n;
405
403
  const r = GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST;
406
404
  if (t.values) {
407
405
  if (n = this.bufferManager.acquireBuffer(s, r, !0), n.mapState === "unmapped") {
408
406
  const a = this.bufferManager.acquireBuffer(s, GPUBufferUsage.MAP_WRITE | GPUBufferUsage.COPY_SRC, !0, !1), i = a.getMappedRange();
409
- t.dtype === "int32" || t.dtype === "bool" ? new Int32Array(i).set(t.values) : new Float32Array(i).set(t.values), a.unmap(), this.ensureCommandEncoderReady(), this.endComputePassEncoder(), this.commandEncoder.copyBufferToBuffer(a, 0, n, 0, s), this.stagingPendingDisposal.push(a);
407
+ t.dtype === "int32" || t.dtype === "packedF16" || t.dtype === "bool" ? new Int32Array(i).set(t.values) : new Float32Array(i).set(t.values), a.unmap(), this.ensureCommandEncoderReady(), this.endComputePassEncoder(), this.commandEncoder.copyBufferToBuffer(a, 0, n, 0, s), this.stagingPendingDisposal.push(a);
410
408
  } else {
411
409
  const a = n.getMappedRange();
412
- t.dtype === "int32" || t.dtype === "bool" ? new Int32Array(a).set(t.values) : new Float32Array(a).set(t.values), n.unmap();
410
+ t.dtype === "int32" || t.dtype === "packedF16" || t.dtype === "bool" ? new Int32Array(a).set(t.values) : new Float32Array(a).set(t.values), n.unmap();
413
411
  }
414
412
  t.values = null;
415
413
  } else
@@ -457,7 +455,7 @@ class R extends re {
457
455
  }
458
456
  runWebGPUProgram(e, t, s, n, r) {
459
457
  if (r || (r = this.makeTensorInfo(e.outputShape, s)), x(r.shape) === 0)
460
- return this.tensorMap.get(r.dataId).values = X(r.dtype, 0), r;
458
+ return this.tensorMap.get(r.dataId).values = ee(r.dtype, 0), r;
461
459
  this.uploadToGPU(r.dataId), e.dispatch = fe(this.device, e);
462
460
  const a = t.map((u, o) => {
463
461
  if (u.dtype === "complex64")
@@ -470,9 +468,9 @@ class R extends re {
470
468
  name: e.variableNames[o]
471
469
  };
472
470
  });
473
- e.shaderKey = J(e, a, r);
471
+ e.shaderKey = te(e, a, r);
474
472
  const i = g().getBool("WEBGPU_ENGINE_COMPILE_ONLY");
475
- return e.shaderKey in this.pipelineCache || (this.pipelineCache[e.shaderKey] = ee(this.device, e, a, r, i)), e.pipeline = this.pipelineCache[e.shaderKey], i || this.recordAndSubmit(e, r, t, n), r;
473
+ return e.shaderKey in this.pipelineCache || (this.pipelineCache[e.shaderKey] = se(this.device, e, a, r, i)), e.pipeline = this.pipelineCache[e.shaderKey], i || this.recordAndSubmit(e, r, t, n), r;
476
474
  }
477
475
  recordAndSubmit(e, t, s, n) {
478
476
  if (e.pipeline instanceof Promise)
@@ -484,11 +482,11 @@ class R extends re {
484
482
  const h = "int32";
485
483
  a.map((m) => {
486
484
  r.push({ type: h, data: m });
487
- const w = q(m);
485
+ const w = k(m);
488
486
  r.push({ type: h, data: w });
489
487
  });
490
488
  } else {
491
- const h = q(t.shape);
489
+ const h = k(t.shape);
492
490
  r.push({ type: i, data: h });
493
491
  }
494
492
  if (e.size) {
@@ -520,7 +518,7 @@ class R extends re {
520
518
  querySet: this.querySet,
521
519
  beginningOfPassWriteIndex: 0,
522
520
  endOfPassWriteIndex: 1
523
- }, this.computePassEncoder = this.commandEncoder.beginComputePass(c)) : this.computePassEncoder || (this.computePassEncoder = this.commandEncoder.beginComputePass(c)), this.computePassEncoder.setPipeline(e.pipeline), this.computePassEncoder.setBindGroup(0, o), this.computePassEncoder.dispatchWorkgroups(e.dispatch[0], e.dispatch[1], e.dispatch[2]), this.dispatchCountInPass++, (f || g().get("WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE") <= this.dispatchCountInPass || e.pixelsOpType === te.DRAW) && (this.endComputePassEncoder(), f ? this.activeTimers.push({ name: e.constructor.name, query: this.getQueryTime() }) : this.submitQueue());
521
+ }, this.computePassEncoder = this.commandEncoder.beginComputePass(c)) : this.computePassEncoder || (this.computePassEncoder = this.commandEncoder.beginComputePass(c)), this.computePassEncoder.setPipeline(e.pipeline), this.computePassEncoder.setBindGroup(0, o), this.computePassEncoder.dispatchWorkgroups(e.dispatch[0], e.dispatch[1], e.dispatch[2]), this.dispatchCountInPass++, (f || g().get("WEBGPU_DEFERRED_SUBMIT_BATCH_SIZE") <= this.dispatchCountInPass || e.pixelsOpType === re.DRAW) && (this.endComputePassEncoder(), f ? this.activeTimers.push({ name: e.constructor.name, query: this.getQueryTime() }) : this.submitQueue());
524
522
  }
525
523
  async getQueryTime() {
526
524
  if (!this.supportTimestampQuery)
@@ -0,0 +1,28 @@
1
+ import { A as h, B as f, C as p, D as g, E as u, T } from "./index-DOvlwCh-.js";
2
+ import { r as b } from "./reshape-ByE68wS9.js";
3
+ function m(e, r) {
4
+ let n = f(e, "broadcastTo", "x");
5
+ const a = n.shape;
6
+ if (p(r), r.length < n.rank)
7
+ throw new Error(`broadcastTo(): shape.length=${r.length} < input.rank=${n.rank}.`);
8
+ if (r.length > n.rank) {
9
+ const t = n.shape.slice();
10
+ for (; t.length < r.length; )
11
+ t.unshift(1);
12
+ n = b(n, t);
13
+ }
14
+ const s = n.shape, o = Array.from(r);
15
+ for (let t = r.length - 1; t >= 0; t--)
16
+ if (s[t] === r[t])
17
+ o[t] = 1;
18
+ else if (n.shape[t] !== 1)
19
+ throw new Error(`broadcastTo(): [${a}] cannot be broadcast to [${r}].`);
20
+ if (o.map((t, l) => t > 1 ? l : -1).filter((t) => t >= 0).length === 0)
21
+ return g(n);
22
+ const i = { x: n }, c = { reps: o };
23
+ return u.runKernel(T, i, c);
24
+ }
25
+ const E = /* @__PURE__ */ h({ broadcastTo_: m });
26
+ export {
27
+ E as b
28
+ };
@@ -1,5 +1,5 @@
1
- import { s, e as a } from "../index-D6Q1lPZO.js";
2
- import { t } from "../tensor4d-D7bLqGqz.js";
1
+ import { s, e as a } from "../index-DOvlwCh-.js";
2
+ import { t } from "../tensor4d-BARPdTaS.js";
3
3
  async function u(e) {
4
4
  await s(e);
5
5
  const n = t(
@@ -1,6 +1,6 @@
1
- import { s as i, e } from "../index-D6Q1lPZO.js";
2
- import { t } from "../tensor4d-D7bLqGqz.js";
3
- import { t as a } from "../tensor2d-CSB4KOb0.js";
1
+ import { s as i, e } from "../index-DOvlwCh-.js";
2
+ import { t } from "../tensor4d-BARPdTaS.js";
3
+ import { t as a } from "../tensor2d-Bs9wZRc7.js";
4
4
  async function k(n) {
5
5
  await i(n);
6
6
  const s = t(
@@ -1,5 +1,5 @@
1
- import { s as e, e as o } from "../index-D6Q1lPZO.js";
2
- import { t as s } from "../tensor2d-CSB4KOb0.js";
1
+ import { s as e, e as o } from "../index-DOvlwCh-.js";
2
+ import { t as s } from "../tensor2d-Bs9wZRc7.js";
3
3
  async function m(t) {
4
4
  await e(t);
5
5
  const r = s(
@@ -1,11 +1,7 @@
1
- import { s as n, e as s } from "../index-D6Q1lPZO.js";
2
- import "../random_width-BVV9HveY.js";
3
- import "../register_all_kernels-nvj2k7OC.js";
4
- import "../index-Cp39cXWe.js";
5
- import "../dataset-D2P7rHAw.js";
6
- import { t as e } from "../tensor2d-CSB4KOb0.js";
7
- async function f(t) {
8
- await n(t);
1
+ import { s as o, e as s } from "../index-DOvlwCh-.js";
2
+ import { t as e } from "../tensor2d-Bs9wZRc7.js";
3
+ async function i(t) {
4
+ await o(t);
9
5
  const r = e(
10
6
  [
11
7
  [0.1, 0.2, 9, 10, 11],
@@ -15,7 +11,7 @@ async function f(t) {
15
11
  [0.3, 0.4, -9, -10, -11]
16
12
  ],
17
13
  [5, 5]
18
- ), o = e(
14
+ ), n = e(
19
15
  [
20
16
  [0.5, 0.6, 7e4, -8e3, 0],
21
17
  [0.7, 0.8, -7e4, 8e4, 0],
@@ -25,8 +21,8 @@ async function f(t) {
25
21
  ],
26
22
  [5, 5]
27
23
  );
28
- return await s().runKernel("MatMulGelu", { x: o, kernel: r }).array();
24
+ return await s().runKernel("MatMulGelu", { x: n, kernel: r }).array();
29
25
  }
30
26
  export {
31
- f as execute
27
+ i as execute
32
28
  };
@@ -1,14 +1,14 @@
1
- import { s as i, u as A, e as y } from "../index-D6Q1lPZO.js";
2
- import { a as h } from "../ops-BFDtP6th.js";
3
- import { t as p } from "../tensor1d-BlUT89BP.js";
4
- import { t as a } from "../tensor-0r5yOo2R.js";
1
+ import { s as u, a5 as A, e as y } from "../index-DOvlwCh-.js";
2
+ import { a as h } from "../ops-B5yanEdW.js";
3
+ import { t as p } from "../tensor1d-D11P_7Dp.js";
4
+ import { t as r } from "../tensor-DJoc7gJU.js";
5
5
  const w = Array.from({ length: 2048 * 192 }, () => Math.random()), x = Array.from({ length: 192 }, () => Math.random()), M = Array.from({ length: 2048 * 192 }, () => Math.random());
6
6
  async function k(t) {
7
- await i(t);
8
- const o = p(x, "float32"), n = a(w, [16, 128, 192], "float32"), s = a(M, [16, 128, 192], "float32"), e = (d, g) => {
9
- const u = y().runKernel("RMSNorm", { x: d, gamma: g });
10
- return h.meanSquaredError(u, s);
11
- }, { value: m, grads: r } = A(e)([n, o]), c = await m.array(), f = await r[0].array(), l = await r[1].array();
7
+ await u(t);
8
+ const o = p(x, "float32"), n = r(w, [16, 128, 192], "float32"), s = r(M, [16, 128, 192], "float32"), e = (d, g) => {
9
+ const i = y().runKernel("RMSNorm", { x: d, gamma: g });
10
+ return h.meanSquaredError(i, s);
11
+ }, { value: m, grads: a } = A(e)([n, o]), c = await m.array(), f = await a[0].array(), l = await a[1].array();
12
12
  return [c, f, l];
13
13
  }
14
14
  export {
@@ -1,6 +1,6 @@
1
- import { s as c, e as d } from "../index-D6Q1lPZO.js";
2
- import { t as f } from "../tensor1d-BlUT89BP.js";
3
- import { t as r } from "../tensor-0r5yOo2R.js";
1
+ import { s as c, e as d } from "../index-DOvlwCh-.js";
2
+ import { t as f } from "../tensor1d-D11P_7Dp.js";
3
+ import { t as r } from "../tensor-DJoc7gJU.js";
4
4
  const y = Array.from({ length: 2048 * 192 }, () => Math.random()), i = Array.from({ length: 192 }, () => Math.random()), l = Array.from({ length: 2048 * 192 }, () => Math.random());
5
5
  async function x(t) {
6
6
  await c(t);
@@ -1,5 +1,5 @@
1
- import { s as a, e } from "../index-D6Q1lPZO.js";
2
- import { t as c } from "../tensor2d-CSB4KOb0.js";
1
+ import { s as a, e } from "../index-DOvlwCh-.js";
2
+ import { t as c } from "../tensor2d-Bs9wZRc7.js";
3
3
  async function i(n) {
4
4
  await a(n);
5
5
  const r = c(
@@ -1,19 +1,18 @@
1
- import { x as i, y as u, s as c, e as l } from "../index-D6Q1lPZO.js";
2
- import { c as m } from "../tensor-CzmOBsdf.js";
3
- import { t as f } from "../tensor2d-CSB4KOb0.js";
4
- function h(t, e, n) {
5
- if (m(t), e != null && e.length !== 3)
1
+ import { $ as i, a0 as u, a1 as c, s as l, e as h } from "../index-DOvlwCh-.js";
2
+ import { t as f } from "../tensor2d-Bs9wZRc7.js";
3
+ function m(t, e, n) {
4
+ if (i(t), e != null && e.length !== 3)
6
5
  throw new Error("tensor3d() requires shape to have three numbers");
7
- const r = i(t, n);
6
+ const r = u(t, n);
8
7
  if (r.length !== 3 && r.length !== 1)
9
8
  throw new Error("tensor3d() requires values to be number[][][] or flat/TypedArray");
10
9
  if (r.length === 1 && e == null)
11
10
  throw new Error("tensor3d() requires shape to be provided when `values` are a flat array");
12
- return u(t, e, r, n);
11
+ return c(t, e, r, n);
13
12
  }
14
- async function p(t) {
15
- await c(t);
16
- const e = h(
13
+ async function y(t) {
14
+ await l(t);
15
+ const e = m(
17
16
  [
18
17
  [
19
18
  [0.1, 0.2],
@@ -27,9 +26,9 @@ async function p(t) {
27
26
  [0.7, 0.8, 1.1, 1.2, 1.5, 1.6]
28
27
  ],
29
28
  [2, 6]
30
- ), r = l().runKernel("QKV", { x: e, kernel: n }, { heads: 1 }), o = await r[0].array(), a = await r[1].array(), s = await r[2].array();
31
- return [o, a, s];
29
+ ), r = h().runKernel("QKV", { x: e, kernel: n }, { heads: 1 }), a = await r[0].array(), o = await r[1].array(), s = await r[2].array();
30
+ return [a, o, s];
32
31
  }
33
32
  export {
34
- p as execute
33
+ y as execute
35
34
  };
@@ -1,6 +1,6 @@
1
1
  import t from "../layers/RoPECache.js";
2
- import { s as c, e as i } from "../index-D6Q1lPZO.js";
3
- import { t as p } from "../tensor4d-D7bLqGqz.js";
2
+ import { s as c, e as i } from "../index-DOvlwCh-.js";
3
+ import { t as p } from "../tensor4d-BARPdTaS.js";
4
4
  async function y(a) {
5
5
  await c(a);
6
6
  const o = p(
@@ -0,0 +1,12 @@
1
+ import { A as a, B as m, l as y, q as B, E as c, F as f } from "./index-DOvlwCh-.js";
2
+ function p(o, s, t) {
3
+ const r = m(o, "x", "clipByValue");
4
+ if (y(s <= t, () => `Error in clip: min (${s}) must be less than or equal to max (${t}).`), s === t)
5
+ return B(r.shape, s, r.dtype);
6
+ const n = { x: r }, e = { clipValueMin: s, clipValueMax: t };
7
+ return c.runKernel(f, n, e);
8
+ }
9
+ const E = /* @__PURE__ */ a({ clipByValue_: p });
10
+ export {
11
+ E as c
12
+ };
@@ -0,0 +1,11 @@
1
+ import { A as t, B as s, ab as n, E as m, ac as r } from "./index-DOvlwCh-.js";
2
+ function l(o, c) {
3
+ const a = s(o, "real", "complex"), e = s(c, "imag", "complex");
4
+ n(a.shape, e.shape, `real and imag shapes, ${a.shape} and ${e.shape}, must match in call to tf.complex().`);
5
+ const p = { real: a, imag: e };
6
+ return m.runKernel(r, p);
7
+ }
8
+ const i = /* @__PURE__ */ t({ complex_: l });
9
+ export {
10
+ i as c
11
+ };
@@ -0,0 +1,17 @@
1
+ import { A as s, l as a, G as p, D as i, E as l, H as f } from "./index-DOvlwCh-.js";
2
+ function h(n, e = 0) {
3
+ a(n.length >= 1, () => "Pass at least one tensor to concat");
4
+ const t = p(n, "tensors", "concat", "string_or_numeric");
5
+ if (t[0].dtype === "complex64" && t.forEach((o) => {
6
+ if (o.dtype !== "complex64")
7
+ throw new Error(`Cannot concatenate complex64 tensors with a tensor
8
+ with dtype ${o.dtype}. `);
9
+ }), t.length === 1)
10
+ return i(t[0]);
11
+ const r = t, c = { axis: e };
12
+ return l.runKernel(f, r, c);
13
+ }
14
+ const u = /* @__PURE__ */ s({ concat_: h });
15
+ export {
16
+ u as c
17
+ };
@@ -1,4 +1,4 @@
1
- import { a as s } from "./tensor-CzmOBsdf.js";
1
+ import { l as s } from "./index-DOvlwCh-.js";
2
2
  function h(n, o) {
3
3
  const t = n[0].length;
4
4
  n.forEach((a, c) => {