@genai-fi/nanogpt 0.10.2 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (262) hide show
  1. package/dist/Generator.d.ts +10 -5
  2. package/dist/Generator.js +11760 -146
  3. package/dist/{RealDiv-zz7FpkKX.js → RealDiv-Ds-jvL09.js} +28 -30
  4. package/dist/Reshape-Cd6e-Otn.js +14 -0
  5. package/dist/{Reshape-CHdUjC72.js → Reshape-Ct266DEk.js} +21 -23
  6. package/dist/TeachableLLM.d.ts +4 -3
  7. package/dist/TeachableLLM.js +15 -16
  8. package/dist/Trainer.d.ts +2 -2
  9. package/dist/Trainer.js +6 -6
  10. package/dist/{axis_util-BsIr9ZNu.js → axis_util-DofAuy0p.js} +1 -1
  11. package/dist/backend.js +2 -2
  12. package/dist/{backend_util-B1XRLuq9.js → backend_util-C7NWHpv7.js} +72 -73
  13. package/dist/{backend_webgpu-CqpfEImu.js → backend_webgpu-B0Vls736.js} +52 -54
  14. package/dist/broadcast_to-DDaNMbX7.js +28 -0
  15. package/dist/checks/appendCache.js +2 -2
  16. package/dist/checks/attentionMask.js +3 -3
  17. package/dist/checks/gelu.js +2 -2
  18. package/dist/checks/matMulGelu.js +7 -11
  19. package/dist/checks/normRMS.js +9 -9
  20. package/dist/checks/normRMSGrad.js +3 -3
  21. package/dist/checks/packUnpack.js +2 -2
  22. package/dist/checks/qkv.js +11 -12
  23. package/dist/checks/rope.js +2 -2
  24. package/dist/clip_by_value-Dn5tzexi.js +12 -0
  25. package/dist/complex-DClmWqJt.js +11 -0
  26. package/dist/concat-C6X3AAlQ.js +17 -0
  27. package/dist/{concat_util-iBYIyuQe.js → concat_util-CHsJFZJJ.js} +1 -1
  28. package/dist/{dataset-D2P7rHAw.js → dataset-DcjWqUVQ.js} +135 -137
  29. package/dist/dropout-OxuaJz6z.js +92 -0
  30. package/dist/expand_dims-BzfJK2uc.js +11 -0
  31. package/dist/{exports_initializers-CZSUJoVE.js → exports_initializers-eS9QJ6ut.js} +1 -1
  32. package/dist/floor-DIb-lN_u.js +9 -0
  33. package/dist/gather-BcO5UQNJ.js +9 -0
  34. package/dist/{gelu-Bmhopi0J.js → gelu-DqTbCx5x.js} +10 -11
  35. package/dist/{gpgpu_math-DsCcikas.js → gpgpu_math-CJcbnKPC.js} +841 -1015
  36. package/dist/index-D0RBWjq8.js +3520 -0
  37. package/dist/{index-DRyE072i.js → index-Dj5TkmPY.js} +330 -331
  38. package/dist/{kernel_funcs_utils-CWfOAPGO.js → kernel_funcs_utils-CSaumNDs.js} +132 -134
  39. package/dist/layers/BaseLayer.js +15 -16
  40. package/dist/layers/CausalSelfAttention.js +6 -6
  41. package/dist/layers/MLP.js +4 -4
  42. package/dist/layers/PositionEmbedding.js +7 -7
  43. package/dist/layers/RMSNorm.js +3 -3
  44. package/dist/layers/RoPECache.js +9 -9
  45. package/dist/layers/TiedEmbedding.js +6 -6
  46. package/dist/layers/TransformerBlock.js +1 -1
  47. package/dist/loader/loadTransformers.js +1 -1
  48. package/dist/loader/oldZipLoad.js +21 -22
  49. package/dist/log_sum_exp-VLZgbFAH.js +39 -0
  50. package/dist/main.d.ts +1 -1
  51. package/dist/main.js +49 -50
  52. package/dist/{matMul16-fEAJ4smh.js → matMul16-cDxwemKj.js} +14 -15
  53. package/dist/matMulGelu-B2s_80-H.js +163 -0
  54. package/dist/mat_mul-DxpNTCRz.js +11 -0
  55. package/dist/mod-PrOKlFxH.js +11 -0
  56. package/dist/models/NanoGPTV1.js +2 -2
  57. package/dist/models/model.js +13 -14
  58. package/dist/ones-BX_wEgzB.js +14 -0
  59. package/dist/ops/adamAdjust.js +1 -1
  60. package/dist/ops/adamMoments.js +1 -1
  61. package/dist/ops/add16.js +1 -1
  62. package/dist/ops/appendCache.js +3 -3
  63. package/dist/ops/attentionMask.js +1 -1
  64. package/dist/ops/concat16.js +2 -2
  65. package/dist/ops/cpu/adamAdjust.js +12 -13
  66. package/dist/ops/cpu/adamMoments.js +6 -7
  67. package/dist/ops/cpu/appendCache.js +7 -8
  68. package/dist/ops/cpu/attentionMask.js +11 -11
  69. package/dist/ops/cpu/fusedSoftmax.js +10 -11
  70. package/dist/ops/cpu/gatherSub.js +10 -11
  71. package/dist/ops/cpu/gelu.js +14 -15
  72. package/dist/ops/cpu/matMul16.js +6 -7
  73. package/dist/ops/cpu/matMulGelu.js +5 -6
  74. package/dist/ops/cpu/matMulMul.js +3 -4
  75. package/dist/ops/cpu/mulDropout.js +3 -4
  76. package/dist/ops/cpu/normRMS.js +11 -12
  77. package/dist/ops/cpu/qkv.js +8 -9
  78. package/dist/ops/cpu/rope.js +9 -10
  79. package/dist/ops/cpu/scatterSub.js +14 -16
  80. package/dist/ops/dot16.js +2 -2
  81. package/dist/ops/gatherSub.js +1 -1
  82. package/dist/ops/gelu.js +2 -2
  83. package/dist/ops/grads/add16.js +10 -11
  84. package/dist/ops/grads/attentionMask.js +5 -6
  85. package/dist/ops/grads/gelu.js +3 -4
  86. package/dist/ops/grads/matMul16.js +4 -5
  87. package/dist/ops/grads/matMulGelu.js +8 -9
  88. package/dist/ops/grads/normRMS.js +9 -10
  89. package/dist/ops/grads/pack16.js +4 -5
  90. package/dist/ops/grads/qkv.js +17 -19
  91. package/dist/ops/grads/rope.js +3 -5
  92. package/dist/ops/grads/softmax16.js +3 -4
  93. package/dist/ops/grads/unpack16.js +3 -4
  94. package/dist/ops/grads/utils.d.ts +1 -0
  95. package/dist/ops/grads/utils.js +8 -4
  96. package/dist/ops/matMul16.js +3 -3
  97. package/dist/ops/matMulGelu.js +2 -2
  98. package/dist/ops/matMulMul.js +1 -1
  99. package/dist/ops/mul16.js +1 -1
  100. package/dist/ops/mulDrop.js +1 -1
  101. package/dist/ops/normRMS.js +1 -1
  102. package/dist/ops/pack16.js +3 -4
  103. package/dist/ops/qkv.js +4 -8
  104. package/dist/ops/reshape16.js +16 -18
  105. package/dist/ops/rope.d.ts +1 -1
  106. package/dist/ops/rope.js +3 -8
  107. package/dist/ops/scatterSub.js +1 -1
  108. package/dist/ops/slice16.js +2 -2
  109. package/dist/ops/softmax16.js +5 -8
  110. package/dist/ops/sub16.js +1 -1
  111. package/dist/ops/sum16.js +2 -2
  112. package/dist/ops/transpose16.js +23 -24
  113. package/dist/ops/unpack16.js +2 -2
  114. package/dist/ops/webgl/adamAdjust.js +2 -3
  115. package/dist/ops/webgl/adamMoments.js +1 -2
  116. package/dist/ops/webgl/appendCache.js +1 -2
  117. package/dist/ops/webgl/attentionMask.js +5 -6
  118. package/dist/ops/webgl/fusedSoftmax.js +6 -8
  119. package/dist/ops/webgl/gatherSub.js +6 -7
  120. package/dist/ops/webgl/gelu.js +2 -3
  121. package/dist/ops/webgl/log.js +11 -12
  122. package/dist/ops/webgl/matMul16.js +15 -16
  123. package/dist/ops/webgl/matMulGelu.js +7 -111
  124. package/dist/ops/webgl/matMulMul.js +14 -15
  125. package/dist/ops/webgl/mulDropout.js +8 -9
  126. package/dist/ops/webgl/normRMS.js +7 -8
  127. package/dist/ops/webgl/qkv.js +5 -6
  128. package/dist/ops/webgl/rope.js +7 -8
  129. package/dist/ops/webgl/scatterSub.js +5 -6
  130. package/dist/ops/webgpu/adamAdjust.js +10 -12
  131. package/dist/ops/webgpu/adamMoments.js +8 -10
  132. package/dist/ops/webgpu/add16.js +8 -9
  133. package/dist/ops/webgpu/appendCache.js +23 -25
  134. package/dist/ops/webgpu/attentionMask.js +10 -12
  135. package/dist/ops/webgpu/attentionMask32_program.js +2 -2
  136. package/dist/ops/webgpu/concat16.js +12 -14
  137. package/dist/ops/webgpu/gatherSub.js +9 -11
  138. package/dist/ops/webgpu/gelu.js +28 -29
  139. package/dist/ops/webgpu/matMul16.js +26 -28
  140. package/dist/ops/webgpu/matMul16_program.js +4 -5
  141. package/dist/ops/webgpu/mul16.js +7 -8
  142. package/dist/ops/webgpu/normRMS.js +17 -19
  143. package/dist/ops/webgpu/normRMSGrad.js +21 -28
  144. package/dist/ops/webgpu/pack16.js +12 -13
  145. package/dist/ops/webgpu/pack16_program.js +2 -2
  146. package/dist/ops/webgpu/qkv.js +13 -15
  147. package/dist/ops/webgpu/rope.js +25 -27
  148. package/dist/ops/webgpu/scatterSub.js +7 -9
  149. package/dist/ops/webgpu/slice16.js +21 -23
  150. package/dist/ops/webgpu/softmax16.js +17 -19
  151. package/dist/ops/webgpu/softmax16_program.js +2 -2
  152. package/dist/ops/webgpu/softmax16_subgroup_program.js +2 -2
  153. package/dist/ops/webgpu/softmax16grad.js +7 -8
  154. package/dist/ops/webgpu/sub16.js +8 -9
  155. package/dist/ops/webgpu/sum16.js +19 -21
  156. package/dist/ops/webgpu/transpose16.js +19 -20
  157. package/dist/ops/webgpu/transpose16_program.js +2 -2
  158. package/dist/ops/webgpu/transpose16_shared_program.js +11 -12
  159. package/dist/ops/webgpu/unpack16.js +3 -4
  160. package/dist/ops/webgpu/utils/binary_op.js +7 -8
  161. package/dist/ops/webgpu/utils/reductions.js +14 -22
  162. package/dist/ops-FJapAPfm.js +476 -0
  163. package/dist/pack16-k4jq6aMX.js +39 -0
  164. package/dist/patches/webgpu_backend.js +19 -20
  165. package/dist/patches/webgpu_base.js +1 -1
  166. package/dist/patches/webgpu_program.js +15 -16
  167. package/dist/{random_width-BVV9HveY.js → random_width-UGQn4OWb.js} +2506 -2761
  168. package/dist/range-CuGvVN2c.js +10 -0
  169. package/dist/relu-Cf80uA2p.js +9 -0
  170. package/dist/reshape-CkjKPPqB.js +9 -0
  171. package/dist/resize_nearest_neighbor-DB8k9KN_.js +175 -0
  172. package/dist/rope-BmZmp9uP.js +24 -0
  173. package/dist/{scatter_nd_util-C7zXRT_h.js → scatter_nd_util-BY22Cc-C.js} +1 -1
  174. package/dist/selu_util-BuLbmbrl.js +44 -0
  175. package/dist/{shared-CHhxz-O5.js → shared-B7USJZgw.js} +1 -1
  176. package/dist/{shared-D2NP_CpY.js → shared-BQboIImQ.js} +379 -381
  177. package/dist/slice-Aqy7KbJh.js +12 -0
  178. package/dist/{slice_util-DyjSAD0u.js → slice_util-D8CQRenR.js} +7 -7
  179. package/dist/{softmax-C9JQEtnO.js → softmax-faLoUZVT.js} +4 -5
  180. package/dist/split-BNz5jcGc.js +9 -0
  181. package/dist/squeeze--YMgaAAf.js +10 -0
  182. package/dist/stack-WJK22CFn.js +11 -0
  183. package/dist/step-dXR33iOg.js +261 -0
  184. package/dist/sum-BdplSvq_.js +11 -0
  185. package/dist/{tensor-0r5yOo2R.js → tensor-BQqrDvpx.js} +1 -1
  186. package/dist/tensor1d-LxP9asMm.js +11 -0
  187. package/dist/{tensor2d-CSB4KOb0.js → tensor2d-BN1sSfQO.js} +6 -7
  188. package/dist/{tensor4d-D7bLqGqz.js → tensor4d-DVwr7pLF.js} +6 -7
  189. package/dist/{tfjs_backend-CNkSTL0c.js → tfjs_backend-Vi4JfLzT.js} +256 -265
  190. package/dist/tile-CvN_LyVr.js +11 -0
  191. package/dist/tokeniser/BaseTokeniser.d.ts +27 -0
  192. package/dist/tokeniser/BaseTokeniser.js +94 -0
  193. package/dist/tokeniser/CharTokeniser.d.ts +4 -3
  194. package/dist/tokeniser/CharTokeniser.js +46 -32
  195. package/dist/tokeniser/bpe.d.ts +4 -3
  196. package/dist/tokeniser/bpe.js +60 -45
  197. package/dist/tokeniser/type.d.ts +11 -0
  198. package/dist/training/Adam.js +2 -2
  199. package/dist/training/AdamExt.js +1 -1
  200. package/dist/training/DatasetBuilder.d.ts +2 -2
  201. package/dist/training/DatasetBuilder.js +32 -36
  202. package/dist/training/FullTrainer.js +1 -1
  203. package/dist/training/Trainer.d.ts +3 -3
  204. package/dist/training/Trainer.js +2 -2
  205. package/dist/training/sparseCrossEntropy.js +5 -5
  206. package/dist/transpose-JawVKyZy.js +36 -0
  207. package/dist/unsorted_segment_sum-LAbmE9G4.js +277 -0
  208. package/dist/utilities/dummy.js +3 -3
  209. package/dist/utilities/multinomialCPU.js +2 -2
  210. package/dist/utilities/packed.d.ts +1 -4
  211. package/dist/utilities/packed.js +10 -745
  212. package/dist/utilities/performance.js +1 -1
  213. package/dist/utilities/profile.js +1 -1
  214. package/dist/utilities/safetensors.js +2 -2
  215. package/dist/utilities/sentences.js +5 -5
  216. package/dist/utilities/weights.js +2 -2
  217. package/dist/{variable-DzfrwYuP.js → variable-DQ9yYgEU.js} +1 -1
  218. package/dist/{webgpu_program-DzaQiqel.js → webgpu_program-CAE4RICo.js} +177 -171
  219. package/dist/{webgpu_util-0_ubCEHJ.js → webgpu_util-BdovYhXr.js} +34 -35
  220. package/dist/zeros-DeiE2zTa.js +13 -0
  221. package/dist/zeros_like-BAz3iKru.js +721 -0
  222. package/package.json +4 -2
  223. package/dist/Reshape-CDVLyVfz.js +0 -16
  224. package/dist/broadcast_to-B0ChcDaz.js +0 -30
  225. package/dist/complex-BBiRlsVq.js +0 -13
  226. package/dist/concat-DmBLPVGC.js +0 -19
  227. package/dist/dropout-B1x1kYMa.js +0 -99
  228. package/dist/expand_dims-ouvfxQ1n.js +0 -13
  229. package/dist/gather-CH9sdacz.js +0 -10
  230. package/dist/index-D6Q1lPZO.js +0 -2157
  231. package/dist/log_sum_exp-D3ftBNY5.js +0 -41
  232. package/dist/mat_mul-C59XWcJd.js +0 -12
  233. package/dist/mod-DESSvHIU.js +0 -12
  234. package/dist/mulmat_packed_gpu-Coh6qbJk.js +0 -55
  235. package/dist/ones-jU9jlQvM.js +0 -15
  236. package/dist/ops-BFDtP6th.js +0 -645
  237. package/dist/pack16-CmVZs6af.js +0 -41
  238. package/dist/patches/PackedTensor.d.ts +0 -12
  239. package/dist/patches/PackedTensor.js +0 -11
  240. package/dist/patches/engine.d.ts +0 -261
  241. package/dist/patches/engine.js +0 -12
  242. package/dist/patches/tape.d.ts +0 -12
  243. package/dist/patches/tape.js +0 -5
  244. package/dist/range-ZZZD60Fx.js +0 -11
  245. package/dist/reciprocal-CrYlsAGD.js +0 -10
  246. package/dist/register_all_kernels-nvj2k7OC.js +0 -12307
  247. package/dist/relu-BYDneVPn.js +0 -10
  248. package/dist/reshape-CaPQzFvz.js +0 -10
  249. package/dist/rope-s4W2XO9B.js +0 -32
  250. package/dist/selu_util-BGPXmd4B.js +0 -303
  251. package/dist/sin-Djs4aQiu.js +0 -16
  252. package/dist/slice-DvovR5wq.js +0 -13
  253. package/dist/split-DBck65sX.js +0 -10
  254. package/dist/squeeze-C00Ipm_7.js +0 -11
  255. package/dist/stack-ChnHwRpX.js +0 -13
  256. package/dist/sum-ywRJj3Zr.js +0 -12
  257. package/dist/tensor-CzmOBsdf.js +0 -909
  258. package/dist/tensor1d-BlUT89BP.js +0 -12
  259. package/dist/tensor_util-DfwaWayG.js +0 -523
  260. package/dist/tile-CR074jmp.js +0 -13
  261. package/dist/transpose-DH4gmHvu.js +0 -38
  262. package/dist/zeros-DBFVbpv5.js +0 -14
@@ -1,10 +1,9 @@
1
- import "../../index-D6Q1lPZO.js";
2
- import { u as r, l as e } from "../../kernel_funcs_utils-CWfOAPGO.js";
3
- import { y as s } from "../../shared-CHhxz-O5.js";
4
- import { r as N, L as l } from "../../tensor_util-DfwaWayG.js";
5
- const t = e + `
1
+ import { h as e, ao as r } from "../../index-D0RBWjq8.js";
2
+ import { u as s, l as N } from "../../kernel_funcs_utils-CSaumNDs.js";
3
+ import { y as l } from "../../shared-B7USJZgw.js";
4
+ const a = N + `
6
5
  return x < 0.0 ? NAN : log(x);
7
- `, a = `
6
+ `, t = `
8
7
  vec4 result = log(x);
9
8
  bvec4 isNaN = isnan(x);
10
9
  result.r = isNaN.r ? x.r : (x.r < 0.0 ? NAN : result.r);
@@ -12,13 +11,13 @@ const t = e + `
12
11
  result.b = isNaN.b ? x.b : (x.b < 0.0 ? NAN : result.b);
13
12
  result.a = isNaN.a ? x.a : (x.a < 0.0 ? NAN : result.a);
14
13
  return result;
15
- `, n = r({
16
- opSnippet: t,
17
- packedOpSnippet: a,
18
- cpuKernelImpl: s
14
+ `, n = s({
15
+ opSnippet: a,
16
+ packedOpSnippet: t,
17
+ cpuKernelImpl: l
19
18
  }), o = {
20
- kernelName: l,
19
+ kernelName: r,
21
20
  backendName: "webgl",
22
21
  kernelFunc: n
23
22
  };
24
- N(o);
23
+ e(o);
@@ -1,28 +1,27 @@
1
1
  import { isPackedTensor as k } from "../../utilities/packed.js";
2
- import { m as M, b as m } from "../../index-D6Q1lPZO.js";
3
- import { matMulMul as g } from "../matMulMul.js";
4
- import { matMulGelu as N } from "../matMulGelu.js";
5
- import { r as U } from "../../tensor_util-DfwaWayG.js";
6
- import { m as G } from "../../mat_mul-C59XWcJd.js";
7
- import { r as w } from "../../reshape-CaPQzFvz.js";
8
- import { t as h } from "../../transpose-DH4gmHvu.js";
9
- function P(p) {
10
- const { A: r, B: o } = p.inputs, { transposeA: l, transposeB: c, scale: u, activation: A, scaleA: f, scaleB: d, forceOutputShape: t, perm: n } = p.attrs, B = !k(r), v = !k(o);
2
+ import { h as g, m as M, b as m } from "../../index-D0RBWjq8.js";
3
+ import { matMulMul as N } from "../matMulMul.js";
4
+ import { matMulGelu as U } from "../matMulGelu.js";
5
+ import { m as G } from "../../mat_mul-DxpNTCRz.js";
6
+ import { r as h } from "../../reshape-CkjKPPqB.js";
7
+ import { t as w } from "../../transpose-JawVKyZy.js";
8
+ function P(l) {
9
+ const { A: r, B: o } = l.inputs, { transposeA: p, transposeB: c, scale: u, activation: A, scaleA: d, scaleB: f, forceOutputShape: t, perm: n } = l.attrs, B = !k(r), v = !k(o);
11
10
  if (B && v) {
12
- const a = f !== void 0 ? M(r, m(f)) : r, i = d !== void 0 ? M(o, m(d)) : o;
11
+ const a = d !== void 0 ? M(r, m(d)) : r, i = f !== void 0 ? M(o, m(f)) : o;
13
12
  let e;
14
- if (u !== void 0 ? e = g(a, i, m(u), l, c) : A === "gelu" ? e = N(a, i) : e = G(a, i, l, c), n)
13
+ if (u !== void 0 ? e = N(a, i, m(u), p, c) : A === "gelu" ? e = U(a, i) : e = G(a, i, p, c), n)
15
14
  if (t) {
16
- const s = w(e, t);
15
+ const s = h(e, t);
17
16
  e.dispose();
18
- const b = h(s, n);
17
+ const b = w(s, n);
19
18
  return s.dispose(), b;
20
19
  } else {
21
- const s = h(e, n);
20
+ const s = w(e, n);
22
21
  return e.dispose(), s;
23
22
  }
24
23
  else if (t) {
25
- const s = w(e, t);
24
+ const s = h(e, t);
26
25
  return e.dispose(), s;
27
26
  } else
28
27
  return e;
@@ -34,4 +33,4 @@ const C = {
34
33
  backendName: "webgl",
35
34
  kernelFunc: P
36
35
  };
37
- U(C);
36
+ g(C);
@@ -1,113 +1,9 @@
1
- import { t as R, e as C, j as N } from "../../index-D6Q1lPZO.js";
2
- import { r as f } from "../../Reshape-CHdUjC72.js";
3
- import { M as H } from "../../mulmat_packed_gpu-Coh6qbJk.js";
4
- import { r as E, u as O } from "../../tensor_util-DfwaWayG.js";
5
- import { m as $ } from "../../mat_mul-C59XWcJd.js";
6
- import { s as A, a as U } from "../../tensor-CzmOBsdf.js";
7
- const M = 0.7978845608028654, g = 0.044715, j = `
8
- vec4 x3 = x * x * x;
9
- vec4 inner = x + ${g} * x3;
10
- inner = ${M} * inner;
11
- inner = vec4(
12
- abs(inner[0]) > 15.0 ? sign(inner[0]) : tanh(inner[0]),
13
- abs(inner[1]) > 15.0 ? sign(inner[1]) : tanh(inner[1]),
14
- abs(inner[2]) > 15.0 ? sign(inner[2]) : tanh(inner[2]),
15
- abs(inner[3]) > 15.0 ? sign(inner[3]) : tanh(inner[3])
16
- );
17
- inner = 0.5 * (1.0 + inner);
18
- vec4 result = x * inner;
19
- return result;
20
- `, q = `
21
- vec4 a2 = a * a;
22
- vec4 a3 = a2 * a;
23
- vec4 u = ${M} * (a + ${g} * a3);
24
- vec4 t = vec4(
25
- abs(u[0]) > 15.0 ? sign(u[0]) : tanh(u[0]),
26
- abs(u[1]) > 15.0 ? sign(u[1]) : tanh(u[1]),
27
- abs(u[2]) > 15.0 ? sign(u[2]) : tanh(u[2]),
28
- abs(u[3]) > 15.0 ? sign(u[3]) : tanh(u[3])
29
- );
30
- vec4 sech2 = 1.0 - t * t;
31
- vec4 du_dx = ${M} * (1.0 + 3.0 * ${g} * a2);
32
- vec4 dgelu = 0.5 * (1.0 + t) + 0.5 * a * sech2 * du_dx;
33
- return dgelu * b;
34
- `, se = 1e3;
35
- function _({
36
- a: e,
37
- b: n,
38
- transposeA: s,
39
- transposeB: t,
40
- backend: a,
41
- activationSnippet: i,
42
- multiplier: o
43
- }) {
44
- const r = e.shape.length, c = n.shape.length, u = s ? e.shape[r - 2] : e.shape[r - 1], p = t ? n.shape[c - 1] : n.shape[c - 2], h = s ? e.shape[r - 1] : e.shape[r - 2], l = t ? n.shape[c - 2] : n.shape[c - 1], w = e.shape.slice(0, -2), K = n.shape.slice(0, -2), m = A(w), d = A(K), T = N(e.shape.slice(0, -2), n.shape.slice(0, -2)).concat([h, l]);
45
- U(
46
- u === p,
47
- () => `Error in matMul: inner shapes (${u}) and (${p}) of Tensors with shapes ${e.shape} and ${n.shape} and transposeA=${s} and transposeB=${t} must match.`
48
- );
49
- const v = s ? [m, u, h] : [m, h, u], x = t ? [d, l, p] : [d, p, l], S = f({ inputs: { x: e }, backend: a, attrs: { shape: v } }), b = f({ inputs: { x: n }, backend: a, attrs: { shape: x } }), D = [S, b], y = Math.max(m, d), L = i, B = O(e.dtype, n.dtype), F = new H(
50
- v,
51
- x,
52
- [y, h, l],
53
- s,
54
- t,
55
- !1,
56
- L,
57
- !!o,
58
- !1
59
- ), G = [S, b];
60
- o && G.push(o);
61
- const k = a.runWebGLProgram(F, G, B), I = f({ inputs: { x: k }, backend: a, attrs: { shape: T } });
62
- D.push(k);
63
- for (const P of D)
64
- a.disposeIntermediateTensorInfo(P);
65
- return I;
66
- }
67
- function z(e) {
68
- const { inputs: n, backend: s } = e, { x: t, kernel: a } = n;
69
- if (t === void 0 || a === void 0)
70
- throw new Error("BatchMatMul requires two input tensors.");
71
- return _({
72
- a: t,
73
- b: a,
74
- transposeA: !1,
75
- transposeB: !1,
76
- backend: s,
77
- activationSnippet: j
78
- });
79
- }
80
- const W = {
81
- kernelName: "MatMulGelu",
82
- backendName: "webgl",
83
- kernelFunc: z
84
- };
85
- E(W);
86
- function J(e) {
87
- const { dy: n, x: s, kernel: t } = e.inputs, a = e.backend;
88
- return R(() => {
89
- const i = C().makeTensorFromTensorInfo(
90
- _({
91
- a: s,
92
- b: t,
93
- transposeA: !1,
94
- transposeB: !1,
95
- backend: a,
96
- activationSnippet: q,
97
- multiplier: n
98
- })
99
- ), o = $(i, t, !1, !0), r = $(s, i, !0, !1);
100
- return [o, r];
101
- });
102
- }
103
- const Q = {
104
- kernelName: "MatMulGeluGrad",
105
- backendName: "webgl",
106
- kernelFunc: J
107
- };
108
- E(Q);
1
+ import "../../index-D0RBWjq8.js";
2
+ import "../../Reshape-Ct266DEk.js";
3
+ import { a as m, b as o, c as p } from "../../matMulGelu-B2s_80-H.js";
4
+ import "../../mat_mul-DxpNTCRz.js";
109
5
  export {
110
- se as MATMUL_SHARED_DIM_THRESHOLD,
111
- _ as batchMatMulGeluImpl,
112
- z as batchMatMulKernel
6
+ m as MATMUL_SHARED_DIM_THRESHOLD,
7
+ o as batchMatMulGeluImpl,
8
+ p as batchMatMulKernel
113
9
  };
@@ -1,29 +1,28 @@
1
- import "../../index-D6Q1lPZO.js";
2
- import { batchMatMulGeluImpl as u } from "./matMulGelu.js";
3
- import { r as c } from "../../tensor_util-DfwaWayG.js";
4
- const p = `
1
+ import { h as u } from "../../index-D0RBWjq8.js";
2
+ import { b as c } from "../../matMulGelu-B2s_80-H.js";
3
+ const M = `
5
4
  return a * b;
6
5
  `;
7
- function M(r) {
8
- const { inputs: n, backend: o, attrs: a } = r, { x: t, kernel: e, y: i } = n, { transposeA: l, transposeB: s } = a;
6
+ function p(r) {
7
+ const { inputs: n, backend: a, attrs: o } = r, { x: t, kernel: e, y: l } = n, { transposeA: s, transposeB: i } = o;
9
8
  if (t === void 0 || e === void 0)
10
9
  throw new Error("BatchMatMul requires two input tensors.");
11
- return u({
10
+ return c({
12
11
  a: t,
13
12
  b: e,
14
- transposeA: l,
15
- transposeB: s,
16
- backend: o,
17
- activationSnippet: p,
18
- multiplier: i
13
+ transposeA: s,
14
+ transposeB: i,
15
+ backend: a,
16
+ activationSnippet: M,
17
+ multiplier: l
19
18
  });
20
19
  }
21
20
  const m = {
22
21
  kernelName: "MatMulMul",
23
22
  backendName: "webgl",
24
- kernelFunc: M
23
+ kernelFunc: p
25
24
  };
26
- c(m);
25
+ u(m);
27
26
  export {
28
- M as batchMatMulKernel
27
+ p as batchMatMulKernel
29
28
  };
@@ -1,5 +1,4 @@
1
- import "../../index-D6Q1lPZO.js";
2
- import { r as m } from "../../tensor_util-DfwaWayG.js";
1
+ import { h as m } from "../../index-D0RBWjq8.js";
3
2
  class f {
4
3
  variableNames = ["a", "b"];
5
4
  outputShape;
@@ -8,8 +7,8 @@ class f {
8
7
  { name: "dropoutRate", type: "float" },
9
8
  { name: "seed", type: "float" }
10
9
  ];
11
- constructor(r, t, o) {
12
- this.outputShape = [r, t, o, o], this.userCode = `
10
+ constructor(t, r, o) {
11
+ this.outputShape = [t, r, o, o], this.userCode = `
13
12
  float random(ivec4 coords) {
14
13
  float x = float(coords.x * 4096 + coords.y * 256 + coords.z * 16 + coords.w);
15
14
  return fract(sin(seed + x) * 43758.5453123);
@@ -27,16 +26,16 @@ class f {
27
26
  `;
28
27
  }
29
28
  }
30
- function i(e) {
31
- const { inputs: r, attrs: t } = e, { a: o, b: s } = r, { dropoutRate: a, seed: c } = t, n = e.backend, d = o.shape[0], u = o.shape[2], p = o.shape[1], l = new f(d, p, u);
29
+ function b(e) {
30
+ const { inputs: t, attrs: r } = e, { a: o, b: s } = t, { dropoutRate: a, seed: c } = r, n = e.backend, d = o.shape[0], u = o.shape[2], p = o.shape[1], l = new f(d, p, u);
32
31
  return n.runWebGLProgram(l, [o, s], "float32", [
33
32
  [a ?? 0],
34
33
  [c ?? Math.random() * 1e4]
35
34
  ]);
36
35
  }
37
- const b = {
36
+ const i = {
38
37
  kernelName: "MulDropout",
39
38
  backendName: "webgl",
40
- kernelFunc: i
39
+ kernelFunc: b
41
40
  };
42
- m(b);
41
+ m(i);
@@ -1,6 +1,5 @@
1
- import { e as G } from "../../index-D6Q1lPZO.js";
2
- import { r as g } from "../../tensor_util-DfwaWayG.js";
3
- import { s as x } from "../../sum-ywRJj3Zr.js";
1
+ import { h as p, e as G } from "../../index-D0RBWjq8.js";
2
+ import { s as x } from "../../sum-BdplSvq_.js";
4
3
  class y {
5
4
  variableNames = ["x", "meanSquare", "gamma"];
6
5
  outputShape;
@@ -29,7 +28,7 @@ const C = {
29
28
  backendName: "webgl",
30
29
  kernelFunc: v
31
30
  };
32
- g(C);
31
+ p(C);
33
32
  class b {
34
33
  variableNames = ["x", "meanSquare", "dyGamma", "dyXMean"];
35
34
  outputShape;
@@ -74,14 +73,14 @@ function M(t) {
74
73
  l.dispose();
75
74
  const f = new b(n, m, u), S = r.runWebGLProgram(f, [e, d, s, i], "float32");
76
75
  s.dispose(), i.dispose();
77
- const h = new N(n, m, u), p = r.runWebGLProgram(h, [e, d, a], "float32");
76
+ const h = new N(n, m, u), g = r.runWebGLProgram(h, [e, d, a], "float32");
78
77
  d.dispose();
79
- const q = x(G().makeTensorFromTensorInfo(p), [0, 1]);
80
- return r.disposeIntermediateTensorInfo(p), [S, q];
78
+ const q = x(G().makeTensorFromTensorInfo(g), [0, 1]);
79
+ return r.disposeIntermediateTensorInfo(g), [S, q];
81
80
  }
82
81
  const k = {
83
82
  kernelName: "RMSNormGrad",
84
83
  backendName: "webgl",
85
84
  kernelFunc: M
86
85
  };
87
- g(k);
86
+ p(k);
@@ -1,6 +1,5 @@
1
- import "../../index-D6Q1lPZO.js";
2
- import { r as i } from "../../tensor_util-DfwaWayG.js";
3
- class m {
1
+ import { h as i } from "../../index-D0RBWjq8.js";
2
+ class l {
4
3
  variableNames = ["x", "kernel"];
5
4
  outputShape;
6
5
  userCode;
@@ -31,8 +30,8 @@ class m {
31
30
  `;
32
31
  }
33
32
  }
34
- function l(r) {
35
- const { x: e, kernel: t } = r.inputs, { heads: s } = r.attrs, o = r.backend, n = e.shape[0], c = e.shape[1], u = e.shape[2], a = new m(n, s, c, u);
33
+ function m(r) {
34
+ const { x: e, kernel: t } = r.inputs, { heads: s } = r.attrs, o = r.backend, n = e.shape[0], c = e.shape[1], u = e.shape[2], a = new l(n, s, c, u);
36
35
  return [
37
36
  o.runWebGLProgram(a, [e, t], "float32", [[0]]),
38
37
  o.runWebGLProgram(a, [e, t], "float32", [[1]]),
@@ -42,6 +41,6 @@ function l(r) {
42
41
  const d = {
43
42
  kernelName: "QKV",
44
43
  backendName: "webgl",
45
- kernelFunc: l
44
+ kernelFunc: m
46
45
  };
47
46
  i(d);
@@ -1,5 +1,4 @@
1
- import "../../index-D6Q1lPZO.js";
2
- import { r as h } from "../../tensor_util-DfwaWayG.js";
1
+ import { h as l } from "../../index-D0RBWjq8.js";
3
2
  class g {
4
3
  variableNames = ["x", "sin", "cos"];
5
4
  outputShape;
@@ -45,13 +44,13 @@ class g {
45
44
  `;
46
45
  }
47
46
  }
48
- function m(o) {
49
- const { x: t } = o.inputs, { pastLen: s, ropeCache: e, negSin: n } = o.attrs, a = n ? e.getNegSin() : e.getSin(), r = e.getCos(), i = o.backend, d = t.shape[0], c = t.shape[1], p = t.shape[2], u = t.shape[3], l = new g(d, c, p, u);
50
- return i.runWebGLProgram(l, [t, a, r], "float32", [[s]]);
47
+ function f(o) {
48
+ const { x: t } = o.inputs, { pastLen: s, ropeCache: e, negSin: n } = o.attrs, a = n ? e.getNegSin() : e.getSin(), r = e.getCos(), d = o.backend, i = t.shape[0], c = t.shape[1], p = t.shape[2], u = t.shape[3], h = new g(i, c, p, u);
49
+ return d.runWebGLProgram(h, [t, a, r], "float32", [[s]]);
51
50
  }
52
- const f = {
51
+ const m = {
53
52
  kernelName: "Rope",
54
53
  backendName: "webgl",
55
- kernelFunc: m
54
+ kernelFunc: f
56
55
  };
57
- h(f);
56
+ l(m);
@@ -1,11 +1,10 @@
1
- import "../../index-D6Q1lPZO.js";
2
- import { r as i } from "../../tensor_util-DfwaWayG.js";
1
+ import { h as i } from "../../index-D0RBWjq8.js";
3
2
  class u {
4
3
  variableNames = ["labels", "softmaxProbs", "dy"];
5
4
  outputShape;
6
5
  userCode;
7
- constructor(t, e) {
8
- this.outputShape = [t, e], this.userCode = `
6
+ constructor(e, t) {
7
+ this.outputShape = [e, t], this.userCode = `
9
8
  void main() {
10
9
  ivec2 coords = getOutputCoords();
11
10
  int index = int(getLabels(coords.x));
@@ -17,8 +16,8 @@ class u {
17
16
  }
18
17
  }
19
18
  function d(o) {
20
- const { logits: t, labels: e, dy: r } = o.inputs, s = o.backend, n = e.shape[0], a = t.shape[1], c = new u(n, a);
21
- return s.runWebGLProgram(c, [e, t, r], "float32");
19
+ const { logits: e, labels: t, dy: r } = o.inputs, s = o.backend, n = t.shape[0], a = e.shape[1], c = new u(n, a);
20
+ return s.runWebGLProgram(c, [t, e, r], "float32");
22
21
  }
23
22
  const b = {
24
23
  kernelName: "EfficientScatterSub",
@@ -1,8 +1,6 @@
1
- import { e as p } from "../../webgpu_program-DzaQiqel.js";
2
- import { f as d, c as l } from "../../webgpu_util-0_ubCEHJ.js";
3
- import "../../index-D6Q1lPZO.js";
4
- import { j as f } from "../../tensor-CzmOBsdf.js";
5
- import { r as c } from "../../tensor_util-DfwaWayG.js";
1
+ import { e as p } from "../../webgpu_program-CAE4RICo.js";
2
+ import { f as d, c as l } from "../../webgpu_util-BdovYhXr.js";
3
+ import { h as f, a7 as c } from "../../index-D0RBWjq8.js";
6
4
  class h {
7
5
  variableNames = ["moments", "value"];
8
6
  outputShape;
@@ -38,19 +36,19 @@ class h {
38
36
  }
39
37
  }
40
38
  function v(t) {
41
- const { moments: e, value: a } = t.inputs, { beta1: n, beta2: o, learningRate: s, epsilon: i } = t.attrs, r = t.backend;
42
- f(e.shape, [...a.shape, 2], "Error in AdamAdjust: ");
43
- const m = new h(a.shape), u = [
39
+ const { moments: e, value: a } = t.inputs, { beta1: n, beta2: s, learningRate: o, epsilon: i } = t.attrs, r = t.backend;
40
+ c(e.shape, [...a.shape, 2], "Error in AdamAdjust: ");
41
+ const u = new h(a.shape), m = [
44
42
  { type: "float32", data: [1 / n] },
45
- { type: "float32", data: [1 / o] },
46
- { type: "float32", data: [s] },
43
+ { type: "float32", data: [1 / s] },
44
+ { type: "float32", data: [o] },
47
45
  { type: "float32", data: [i] }
48
46
  ];
49
- return r.runWebGPUProgram(m, [e, a], "float32", u);
47
+ return r.runWebGPUProgram(u, [e, a], "float32", m);
50
48
  }
51
49
  const b = {
52
50
  kernelName: "AdamAdjust",
53
51
  backendName: "webgpu",
54
52
  kernelFunc: v
55
53
  };
56
- c(b);
54
+ f(b);
@@ -1,8 +1,6 @@
1
- import { e as p } from "../../webgpu_program-DzaQiqel.js";
2
- import { f as u, c as d } from "../../webgpu_util-0_ubCEHJ.js";
3
- import "../../index-D6Q1lPZO.js";
4
- import { j as f } from "../../tensor-CzmOBsdf.js";
5
- import { r as c } from "../../tensor_util-DfwaWayG.js";
1
+ import { e as u } from "../../webgpu_program-CAE4RICo.js";
2
+ import { f as p, c as d } from "../../webgpu_util-BdovYhXr.js";
3
+ import { h as c, a7 as f } from "../../index-D0RBWjq8.js";
6
4
  class l {
7
5
  variableNames = ["moments", "gradient"];
8
6
  outputShape;
@@ -15,7 +13,7 @@ class l {
15
13
  outputComponent = 2;
16
14
  variableComponents = [2, 1];
17
15
  constructor(t) {
18
- this.outputShape = t, this.dispatchLayout = u(this.outputShape.slice(0, -1)), this.dispatch = d(
16
+ this.outputShape = t, this.dispatchLayout = p(this.outputShape.slice(0, -1)), this.dispatch = d(
19
17
  this.dispatchLayout,
20
18
  this.outputShape.slice(0, -1),
21
19
  this.workgroupSize,
@@ -24,7 +22,7 @@ class l {
24
22
  }
25
23
  getUserCode() {
26
24
  return `
27
- ${p("index")} {
25
+ ${u("index")} {
28
26
  if (index < uniforms.size) {
29
27
  let m: vec2<f32> = moments[index];
30
28
 
@@ -41,7 +39,7 @@ class l {
41
39
  }
42
40
  }
43
41
  function h(e) {
44
- const { moments: t, gradient: a } = e.inputs, { beta1: n, beta2: o, lossScaling: r } = e.attrs, s = e.backend;
42
+ const { moments: t, gradient: a } = e.inputs, { beta1: n, beta2: o, lossScaling: s } = e.attrs, r = e.backend;
45
43
  if (a.dtype !== "float32")
46
44
  throw new Error(`Gradient must be float32, but got ${a.dtype}`);
47
45
  if (f(t.shape, [...a.shape, 2], "Error in AdamMoments: "), n < 0 || n >= 1)
@@ -51,9 +49,9 @@ function h(e) {
51
49
  const i = new l(t.shape), m = [
52
50
  { type: "float32", data: [n] },
53
51
  { type: "float32", data: [o] },
54
- { type: "float32", data: [1 / r] }
52
+ { type: "float32", data: [1 / s] }
55
53
  ];
56
- return s.runWebGPUProgram(i, [t, a], "float32", m);
54
+ return r.runWebGPUProgram(i, [t, a], "float32", m);
57
55
  }
58
56
  const g = {
59
57
  kernelName: "AdamMoments",
@@ -1,14 +1,13 @@
1
- import "../../index-D6Q1lPZO.js";
1
+ import { h as t } from "../../index-D0RBWjq8.js";
2
2
  import { BinaryOpProgram as p } from "./utils/binary_op.js";
3
- import { B as m } from "../../binary_op_util-pKXltfxI.js";
4
- import { r as c } from "../../tensor_util-DfwaWayG.js";
5
- function i(r) {
6
- const { a: e, b: n } = r.inputs, a = r.backend, t = new p(m.ADD, e.shape, n.shape), o = a.runWebGPUProgram(t, [e, n], "int32");
7
- return o.packed = !0, o;
3
+ import { B as s } from "../../binary_op_util-pKXltfxI.js";
4
+ function c(e) {
5
+ const { a: r, b: n } = e.inputs, a = e.backend, o = new p(s.ADD, r.shape, n.shape);
6
+ return a.runWebGPUProgram(o, [r, n], "packedF16");
8
7
  }
9
- const s = {
8
+ const m = {
10
9
  kernelName: "Add16",
11
10
  backendName: "webgpu",
12
- kernelFunc: i
11
+ kernelFunc: c
13
12
  };
14
- c(s);
13
+ t(m);
@@ -1,10 +1,8 @@
1
- import { isPackedTensor as S } from "../../utilities/packed.js";
2
- import { e as d } from "../../webgpu_program-DzaQiqel.js";
3
- import { f as u, c as m } from "../../webgpu_util-0_ubCEHJ.js";
4
- import "../../index-D6Q1lPZO.js";
5
- import { j as g } from "../../tensor-CzmOBsdf.js";
6
- import { r as x } from "../../tensor_util-DfwaWayG.js";
7
- class b {
1
+ import { isPackedTensor as T } from "../../utilities/packed.js";
2
+ import { e as p } from "../../webgpu_program-CAE4RICo.js";
3
+ import { f as d, c as u } from "../../webgpu_util-BdovYhXr.js";
4
+ import { h as S, a7 as g } from "../../index-D0RBWjq8.js";
5
+ class x {
8
6
  variableNames = ["cache", "item"];
9
7
  outputShape;
10
8
  shaderKey = "AppendCache";
@@ -13,14 +11,14 @@ class b {
13
11
  workgroupSize = [64, 1, 1];
14
12
  size = !0;
15
13
  uniforms = "cacheT: i32";
16
- constructor(e, t, s, o, i) {
17
- const a = Math.min(s + 1, i);
18
- this.shaderKey = `AppendCache_${a}`, this.outputShape = [e, t, a, o], this.dispatchLayout = u(this.outputShape), this.dispatch = m(this.dispatchLayout, this.outputShape, this.workgroupSize);
14
+ constructor(e, t, a, s, i) {
15
+ const o = Math.min(a + 1, i);
16
+ this.shaderKey = `AppendCache_${o}`, this.outputShape = [e, t, o, s], this.dispatchLayout = d(this.outputShape), this.dispatch = u(this.dispatchLayout, this.outputShape, this.workgroupSize);
19
17
  }
20
18
  getUserCode() {
21
19
  const e = this.outputShape[2];
22
20
  return `
23
- ${d("index")} {
21
+ ${p("index")} {
24
22
  if (index < uniforms.size) {
25
23
  let coords = getCoordsFromIndex(index); // [b, h, t, d]
26
24
  let b = coords[0];
@@ -48,7 +46,7 @@ class b {
48
46
  `;
49
47
  }
50
48
  }
51
- class C {
49
+ class b {
52
50
  variableNames = ["cache", "item"];
53
51
  outputShape;
54
52
  shaderKey = "AppendCache";
@@ -57,14 +55,14 @@ class C {
57
55
  workgroupSize = [64, 1, 1];
58
56
  size = !0;
59
57
  uniforms = "cacheT: i32";
60
- constructor(e, t, s, o, i) {
61
- const a = Math.min(s + 1, i);
62
- this.shaderKey = `AppendCache_${a}`, this.outputShape = [e, t, a, o], this.dispatchLayout = u(this.outputShape), this.dispatch = m(this.dispatchLayout, this.outputShape, this.workgroupSize);
58
+ constructor(e, t, a, s, i) {
59
+ const o = Math.min(a + 1, i);
60
+ this.shaderKey = `AppendCache_${o}`, this.outputShape = [e, t, o, s], this.dispatchLayout = d(this.outputShape), this.dispatch = u(this.dispatchLayout, this.outputShape, this.workgroupSize);
63
61
  }
64
62
  getUserCode() {
65
63
  const e = this.outputShape[2];
66
64
  return `
67
- ${d("index")} {
65
+ ${p("index")} {
68
66
  if (index < uniforms.size) {
69
67
  let coords = getCoordsFromIndex(index); // [b, h, t, d]
70
68
  let b = coords[0];
@@ -92,16 +90,16 @@ class C {
92
90
  `;
93
91
  }
94
92
  }
95
- function v(r) {
96
- const { cache: e, item: t } = r.inputs, { maxSize: s, pastLen: o } = r.attrs, i = r.backend, a = S(e), c = e.shape[0], n = e.shape[2], h = e.shape[1];
97
- if (g(t.shape, [c, h, 1, t.shape[3]], "Error in AppendCache: "), o < 0 || o > s)
98
- throw new Error(`Invalid pastLen value: ${o}. Must be in the range [0, ${s}].`);
99
- const l = a ? new C(c, h, n, t.shape[3], s) : new b(c, h, n, t.shape[3], s), f = [{ type: "int32", data: [o] }], T = a ? "int32" : e.dtype, p = i.runWebGPUProgram(l, [e, t], T, f);
100
- return p.packed = a, p;
93
+ function C(r) {
94
+ const { cache: e, item: t } = r.inputs, { maxSize: a, pastLen: s } = r.attrs, i = r.backend, o = T(e), c = e.shape[0], n = e.shape[2], h = e.shape[1];
95
+ if (g(t.shape, [c, h, 1, t.shape[3]], "Error in AppendCache: "), s < 0 || s > a)
96
+ throw new Error(`Invalid pastLen value: ${s}. Must be in the range [0, ${a}].`);
97
+ const m = o ? new b(c, h, n, t.shape[3], a) : new x(c, h, n, t.shape[3], a), l = [{ type: "int32", data: [s] }], f = o ? "packedF16" : e.dtype;
98
+ return i.runWebGPUProgram(m, [e, t], f, l);
101
99
  }
102
- const z = {
100
+ const v = {
103
101
  kernelName: "AppendCache",
104
102
  backendName: "webgpu",
105
- kernelFunc: v
103
+ kernelFunc: C
106
104
  };
107
- x(z);
105
+ S(v);
@@ -1,28 +1,26 @@
1
- import "../../index-D6Q1lPZO.js";
2
- import { j as d } from "../../tensor-CzmOBsdf.js";
1
+ import { h as d, a7 as b } from "../../index-D0RBWjq8.js";
3
2
  import { isPackedTensor as p } from "../../utilities/packed.js";
4
- import { b } from "../../matMul16-fEAJ4smh.js";
5
- import l from "./attentionMask32_program.js";
6
- import { r as M } from "../../tensor_util-DfwaWayG.js";
3
+ import { b as l } from "../../matMul16-cDxwemKj.js";
4
+ import M from "./attentionMask32_program.js";
7
5
  function w(n) {
8
6
  const { q: t, k: e } = n.inputs, { divisor: a, pastLen: o } = n.attrs, m = n.backend;
9
7
  if (p(t) && p(e))
10
- return b(t, e, !1, !0, { causalMask: !0, pastLen: o, scale: a });
11
- const r = t.shape[0], k = t.shape[2], s = e.shape[2], i = t.shape[1], c = t.shape[3];
12
- if (d(e.shape, [r, i, s, c], "Error in AttentionMask: "), a === 0)
8
+ return l(t, e, !1, !0, { causalMask: !0, pastLen: o, scale: a });
9
+ const s = t.shape[0], k = t.shape[2], r = e.shape[2], i = t.shape[1], c = t.shape[3];
10
+ if (b(e.shape, [s, i, r, c], "Error in AttentionMask: "), a === 0)
13
11
  throw new Error("Divisor must be non-zero in AttentionMask");
14
12
  if (o < 0)
15
13
  throw new Error("pastLen must be non-negative in AttentionMask");
16
- const u = new l(r, i, k, s, c), f = [
14
+ const u = new M(s, i, k, r, c), h = [
17
15
  { type: "float32", data: [a] },
18
16
  { type: "int32", data: [o] },
19
17
  { type: "float32", data: [Number.NEGATIVE_INFINITY] }
20
- ], h = t.dtype;
21
- return m.runWebGPUProgram(u, [t, e], h, f);
18
+ ], f = t.dtype;
19
+ return m.runWebGPUProgram(u, [t, e], f, h);
22
20
  }
23
21
  const A = {
24
22
  kernelName: "AttentionMask",
25
23
  backendName: "webgpu",
26
24
  kernelFunc: w
27
25
  };
28
- M(A);
26
+ d(A);