@genai-fi/nanogpt 0.9.1 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (343) hide show
  1. package/README.md +352 -14
  2. package/dist/Generator.js +69 -78
  3. package/dist/{RealDiv-D4EzDsC0.js → RealDiv-DgA3z9oO.js} +32 -206
  4. package/dist/Reshape-CF6odzV4.js +16 -0
  5. package/dist/Reshape-_kILl6tK.js +81 -0
  6. package/dist/TeachableLLM.js +28 -22
  7. package/dist/Trainer.d.ts +2 -0
  8. package/dist/Trainer.js +3 -2
  9. package/dist/{axis_util-TbGYJ208.js → axis_util-BvHEw88j.js} +7 -23
  10. package/dist/backend.d.ts +2 -1
  11. package/dist/backend.js +10 -4
  12. package/dist/backend_util-D-rUb2ty.js +474 -0
  13. package/dist/backend_webgpu-B0u2ndUn.js +547 -0
  14. package/dist/binary_op_util-pKXltfxI.js +192 -0
  15. package/dist/broadcast_to-CwF7XIeu.js +30 -0
  16. package/dist/checks/appendCache.js +2 -2
  17. package/dist/checks/attentionMask.js +3 -3
  18. package/dist/checks/check.d.ts +1 -1
  19. package/dist/checks/check.js +8 -8
  20. package/dist/checks/gelu.js +2 -2
  21. package/dist/checks/index.d.ts +2 -0
  22. package/dist/checks/index.js +7 -5
  23. package/dist/checks/matMulGelu.js +6 -6
  24. package/dist/checks/normRMS.js +7 -7
  25. package/dist/checks/normRMSGrad.js +3 -3
  26. package/dist/checks/packUnpack.d.ts +1 -0
  27. package/dist/checks/packUnpack.js +18 -0
  28. package/dist/checks/qkv.js +12 -27
  29. package/dist/checks/rope.js +2 -2
  30. package/dist/checks/weights.js +18 -16
  31. package/dist/complex-CSlYz-2T.js +13 -0
  32. package/dist/complex_util-Yc1A_gV1.js +55 -0
  33. package/dist/concat-BHlIJeyT.js +19 -0
  34. package/dist/concat_util-DcJk7YHS.js +22 -0
  35. package/dist/data/docx.js +1 -1
  36. package/dist/data/parquet.js +2 -2
  37. package/dist/data/pdf.js +1 -1
  38. package/dist/data/textLoader.js +1 -1
  39. package/dist/{dataset-DlZtKmBq.js → dataset-0xP8GjwI.js} +136 -236
  40. package/dist/dropout-C1pM3f11.js +99 -0
  41. package/dist/expand_dims-BPG4fwBP.js +13 -0
  42. package/dist/exports_initializers-xuidcwI4.js +7 -0
  43. package/dist/gather-DykLGqmW.js +10 -0
  44. package/dist/{gelu-Bp_-935b.js → gelu-CNLFZWea.js} +11 -10
  45. package/dist/{gpgpu_math-CDaYiyE_.js → gpgpu_math-DDVJCn6-.js} +90 -265
  46. package/dist/{index-C4L8Cm77.js → index-CieiGp4Y.js} +14 -14
  47. package/dist/index-CjOj7j-u.js +7308 -0
  48. package/dist/{index-Tf7vU29b.js → index-Cp39cXWe.js} +3 -10
  49. package/dist/{index-Dwqa6Zy2.js → index-DvYrXKkX.js} +2 -2
  50. package/dist/index-ZyQhjEPo.js +2157 -0
  51. package/dist/{jszip.min-CjP2V1VV.js → jszip.min-Bz5-11Bk.js} +56 -57
  52. package/dist/kernel_funcs_utils-Dg_-E44D.js +308 -0
  53. package/dist/layers/BaseLayer.d.ts +1 -0
  54. package/dist/layers/BaseLayer.js +7 -6
  55. package/dist/layers/CausalSelfAttention.d.ts +0 -1
  56. package/dist/layers/CausalSelfAttention.js +56 -55
  57. package/dist/layers/MLP.js +15 -16
  58. package/dist/layers/PositionEmbedding.js +5 -14
  59. package/dist/layers/RMSNorm.js +3 -3
  60. package/dist/layers/RoPECache.d.ts +2 -0
  61. package/dist/layers/RoPECache.js +22 -17
  62. package/dist/layers/TiedEmbedding.js +22 -17
  63. package/dist/layers/TransformerBlock.js +21 -20
  64. package/dist/loader/load.js +1 -1
  65. package/dist/loader/loadTransformers.js +1 -1
  66. package/dist/loader/oldZipLoad.js +39 -33
  67. package/dist/loader/save.js +1 -1
  68. package/dist/log_sum_exp-DWI-76TI.js +41 -0
  69. package/dist/main.d.ts +8 -0
  70. package/dist/main.js +63 -52
  71. package/dist/matMul16--R5hOwDG.js +77 -0
  72. package/dist/mat_mul-DeAh4uTH.js +12 -0
  73. package/dist/mod-Gt1rMB4n.js +12 -0
  74. package/dist/models/NanoGPTV1.js +40 -31
  75. package/dist/models/model.d.ts +2 -0
  76. package/dist/models/model.js +37 -29
  77. package/dist/{mulmat_packed_gpu-BT60jmzP.js → mulmat_packed_gpu-BMFhLwta.js} +1 -17
  78. package/dist/{non_max_suppression_impl-CsEgBuMA.js → non_max_suppression_impl-B2W7YjZB.js} +0 -32
  79. package/dist/ones-CAMiP4I2.js +15 -0
  80. package/dist/ops/adamAdjust.js +1 -1
  81. package/dist/ops/adamMoments.d.ts +1 -1
  82. package/dist/ops/adamMoments.js +4 -4
  83. package/dist/ops/add16.d.ts +2 -0
  84. package/dist/ops/add16.js +9 -0
  85. package/dist/ops/appendCache.js +16 -9
  86. package/dist/ops/attentionMask.js +4 -4
  87. package/dist/ops/concat16.d.ts +2 -0
  88. package/dist/ops/concat16.js +9 -0
  89. package/dist/ops/cpu/adamAdjust.js +14 -13
  90. package/dist/ops/cpu/adamMoments.js +10 -9
  91. package/dist/ops/cpu/appendCache.js +9 -8
  92. package/dist/ops/cpu/attentionMask.js +15 -14
  93. package/dist/ops/cpu/fusedSoftmax.js +13 -12
  94. package/dist/ops/cpu/gatherSub.js +9 -24
  95. package/dist/ops/cpu/gelu.js +13 -12
  96. package/dist/ops/cpu/matMul16.d.ts +1 -0
  97. package/dist/ops/cpu/matMul16.js +16 -0
  98. package/dist/ops/cpu/matMulGelu.js +18 -16
  99. package/dist/ops/cpu/matMulMul.js +8 -7
  100. package/dist/ops/cpu/mulDropout.js +4 -3
  101. package/dist/ops/cpu/normRMS.js +11 -10
  102. package/dist/ops/cpu/qkv.js +17 -13
  103. package/dist/ops/cpu/rope.js +23 -22
  104. package/dist/ops/cpu/scatterSub.js +16 -30
  105. package/dist/ops/dot16.d.ts +2 -0
  106. package/dist/ops/dot16.js +42 -0
  107. package/dist/ops/gatherSub.js +1 -1
  108. package/dist/ops/gelu.js +2 -2
  109. package/dist/ops/grads/add16.d.ts +1 -0
  110. package/dist/ops/grads/add16.js +27 -0
  111. package/dist/ops/grads/attentionMask.js +12 -19
  112. package/dist/ops/grads/gelu.js +4 -3
  113. package/dist/ops/grads/matMul16.d.ts +2 -0
  114. package/dist/ops/grads/matMul16.js +9 -0
  115. package/dist/ops/grads/matMulGelu.js +8 -7
  116. package/dist/ops/grads/normRMS.js +8 -7
  117. package/dist/ops/grads/{fusedSoftmax.d.ts → pack16.d.ts} +1 -1
  118. package/dist/ops/grads/pack16.js +7 -0
  119. package/dist/ops/grads/qkv.d.ts +3 -1
  120. package/dist/ops/grads/qkv.js +28 -22
  121. package/dist/ops/grads/rope.d.ts +2 -1
  122. package/dist/ops/grads/rope.js +6 -13
  123. package/dist/ops/grads/softmax16.d.ts +2 -0
  124. package/dist/ops/grads/softmax16.js +26 -0
  125. package/dist/ops/grads/unpack16.d.ts +2 -0
  126. package/dist/ops/grads/unpack16.js +6 -0
  127. package/dist/ops/grads/utils.d.ts +3 -0
  128. package/dist/ops/grads/utils.js +10 -0
  129. package/dist/ops/matMul16.d.ts +15 -0
  130. package/dist/ops/matMul16.js +13 -0
  131. package/dist/ops/matMulGelu.js +1 -1
  132. package/dist/ops/matMulMul.js +1 -1
  133. package/dist/ops/mul16.d.ts +2 -0
  134. package/dist/ops/mul16.js +8 -0
  135. package/dist/ops/mulDrop.js +1 -1
  136. package/dist/ops/normRMS.js +1 -1
  137. package/dist/ops/pack16.d.ts +2 -0
  138. package/dist/ops/pack16.js +6 -0
  139. package/dist/ops/qkv.d.ts +1 -1
  140. package/dist/ops/qkv.js +8 -4
  141. package/dist/ops/reshape16.d.ts +2 -0
  142. package/dist/ops/reshape16.js +43 -0
  143. package/dist/ops/rope.d.ts +1 -1
  144. package/dist/ops/rope.js +8 -10
  145. package/dist/ops/scatterSub.js +1 -1
  146. package/dist/ops/slice16.d.ts +2 -0
  147. package/dist/ops/slice16.js +9 -0
  148. package/dist/ops/softmax16.d.ts +2 -0
  149. package/dist/ops/softmax16.js +12 -0
  150. package/dist/ops/sub16.d.ts +2 -0
  151. package/dist/ops/sub16.js +8 -0
  152. package/dist/ops/sum16.d.ts +2 -0
  153. package/dist/ops/sum16.js +13 -0
  154. package/dist/ops/transpose16.d.ts +3 -0
  155. package/dist/ops/transpose16.js +41 -0
  156. package/dist/ops/unpack16.d.ts +2 -0
  157. package/dist/ops/unpack16.js +6 -0
  158. package/dist/ops/webgl/adamAdjust.js +3 -2
  159. package/dist/ops/webgl/adamMoments.js +2 -1
  160. package/dist/ops/webgl/appendCache.js +2 -1
  161. package/dist/ops/webgl/attentionMask.js +5 -4
  162. package/dist/ops/webgl/fusedSoftmax.js +6 -4
  163. package/dist/ops/webgl/gatherSub.js +7 -6
  164. package/dist/ops/webgl/gelu.js +3 -2
  165. package/dist/ops/webgl/log.js +12 -27
  166. package/dist/ops/webgl/matMul16.d.ts +1 -0
  167. package/dist/ops/webgl/matMul16.js +37 -0
  168. package/dist/ops/webgl/matMulGelu.js +17 -15
  169. package/dist/ops/webgl/matMulMul.js +13 -12
  170. package/dist/ops/webgl/mulDropout.js +9 -8
  171. package/dist/ops/webgl/normRMS.js +8 -7
  172. package/dist/ops/webgl/qkv.js +6 -5
  173. package/dist/ops/webgl/rope.js +11 -10
  174. package/dist/ops/webgl/scatterSub.js +6 -5
  175. package/dist/ops/webgpu/adamAdjust.js +12 -10
  176. package/dist/ops/webgpu/adamMoments.js +27 -22
  177. package/dist/ops/webgpu/add16.d.ts +1 -0
  178. package/dist/ops/webgpu/add16.js +14 -0
  179. package/dist/ops/webgpu/appendCache.js +64 -17
  180. package/dist/ops/webgpu/attentionMask.js +19 -62
  181. package/dist/ops/webgpu/attentionMask32_program.d.ts +19 -0
  182. package/dist/ops/webgpu/attentionMask32_program.js +54 -0
  183. package/dist/ops/webgpu/concat16.d.ts +19 -0
  184. package/dist/ops/webgpu/concat16.js +128 -0
  185. package/dist/ops/webgpu/gatherSub.js +9 -7
  186. package/dist/ops/webgpu/gelu.js +78 -31
  187. package/dist/ops/webgpu/index.js +12 -0
  188. package/dist/ops/webgpu/matMul16.d.ts +1 -0
  189. package/dist/ops/webgpu/matMul16.js +58 -0
  190. package/dist/ops/webgpu/matMul16_program.d.ts +42 -0
  191. package/dist/ops/webgpu/matMul16_program.js +336 -0
  192. package/dist/ops/webgpu/mul16.d.ts +1 -0
  193. package/dist/ops/webgpu/mul16.js +14 -0
  194. package/dist/ops/webgpu/normRMS.js +21 -40
  195. package/dist/ops/webgpu/normRMS16_program.d.ts +9 -0
  196. package/dist/ops/webgpu/normRMS16_program.js +24 -0
  197. package/dist/ops/webgpu/normRMS32_program.d.ts +9 -0
  198. package/dist/ops/webgpu/normRMS32_program.js +24 -0
  199. package/dist/ops/webgpu/normRMSGrad.js +113 -64
  200. package/dist/ops/webgpu/pack16.d.ts +1 -0
  201. package/dist/ops/webgpu/pack16.js +19 -0
  202. package/dist/ops/webgpu/pack16_program.d.ts +19 -0
  203. package/dist/ops/webgpu/pack16_program.js +92 -0
  204. package/dist/ops/webgpu/qkv.js +20 -55
  205. package/dist/ops/webgpu/rope.js +77 -22
  206. package/dist/ops/webgpu/scatterSub.js +9 -7
  207. package/dist/ops/webgpu/slice16.d.ts +7 -0
  208. package/dist/ops/webgpu/slice16.js +71 -0
  209. package/dist/{variable-Bm2OFwGI.js → ops/webgpu/softmax16.d.ts} +2 -8
  210. package/dist/ops/webgpu/softmax16.js +23 -0
  211. package/dist/ops/webgpu/softmax16_program.d.ts +13 -0
  212. package/dist/ops/webgpu/softmax16_program.js +73 -0
  213. package/dist/ops/webgpu/softmax16_subgroup_program.d.ts +17 -0
  214. package/dist/ops/webgpu/softmax16_subgroup_program.js +75 -0
  215. package/dist/ops/webgpu/softmax16grad.d.ts +1 -0
  216. package/dist/ops/webgpu/softmax16grad.js +38 -0
  217. package/dist/ops/webgpu/sub16.d.ts +1 -0
  218. package/dist/ops/webgpu/sub16.js +14 -0
  219. package/dist/ops/webgpu/sum16.d.ts +1 -0
  220. package/dist/ops/webgpu/sum16.js +40 -0
  221. package/dist/ops/webgpu/transpose16.d.ts +1 -0
  222. package/dist/ops/webgpu/transpose16.js +35 -0
  223. package/dist/ops/webgpu/transpose16_program.d.ts +16 -0
  224. package/dist/ops/webgpu/transpose16_program.js +50 -0
  225. package/dist/ops/webgpu/transpose16_shared_program.d.ts +15 -0
  226. package/dist/ops/webgpu/transpose16_shared_program.js +71 -0
  227. package/dist/ops/webgpu/unpack16.d.ts +1 -0
  228. package/dist/ops/webgpu/unpack16.js +49 -0
  229. package/dist/ops/webgpu/utils/binary_op.d.ts +19 -0
  230. package/dist/ops/webgpu/utils/binary_op.js +79 -0
  231. package/dist/ops/webgpu/utils/deviceInfo.d.ts +7 -0
  232. package/dist/ops/webgpu/utils/deviceInfo.js +11 -0
  233. package/dist/ops/webgpu/utils/reductions.d.ts +32 -4
  234. package/dist/ops/webgpu/utils/reductions.js +236 -45
  235. package/dist/ops-CNI3TwqM.js +645 -0
  236. package/dist/pack16-CFUqumar.js +41 -0
  237. package/dist/{papaparse.min-C8l2Kvo1.js → papaparse.min-C0cScC2i.js} +2 -8
  238. package/dist/{parquet-C0Tlmv9c.js → parquet-BE8MU_ge.js} +201 -278
  239. package/dist/patches/PackedTensor.d.ts +12 -0
  240. package/dist/patches/PackedTensor.js +11 -0
  241. package/dist/patches/engine.d.ts +261 -0
  242. package/dist/patches/engine.js +10 -0
  243. package/dist/patches/tape.d.ts +12 -0
  244. package/dist/patches/tape.js +5 -0
  245. package/dist/patches/webgpu_backend.d.ts +18 -0
  246. package/dist/patches/webgpu_backend.js +57 -0
  247. package/dist/{tensor-CZr4dh61.js → patches/webgpu_base.d.ts} +5 -8
  248. package/dist/patches/webgpu_base.js +34 -0
  249. package/dist/patches/webgpu_program.d.ts +36 -0
  250. package/dist/patches/webgpu_program.js +401 -0
  251. package/dist/{pdf-kJD-f258.js → pdf-NIhmP3sq.js} +424 -428
  252. package/dist/random_width-DY6Kk2Dl.js +10051 -0
  253. package/dist/range-BMS52eQi.js +11 -0
  254. package/dist/reciprocal-CTmshQ9J.js +10 -0
  255. package/dist/{register_all_kernels-DIGpEwcf.js → register_all_kernels-Bwu1PTuU.js} +719 -9766
  256. package/dist/relu-yZ2-7WxU.js +10 -0
  257. package/dist/reshape-DevtBWtf.js +10 -0
  258. package/dist/rope-B5UUMsPi.js +32 -0
  259. package/dist/{scatter_nd_util-BQdz--Gn.js → scatter_nd_util-5EL-8VAQ.js} +1 -1
  260. package/dist/selu_util-D1w6yyTO.js +303 -0
  261. package/dist/{shared-DuP7ue-R.js → shared-BRksrJb3.js} +1 -17
  262. package/dist/shared-BuAXb4CI.js +2145 -0
  263. package/dist/sin-BGfy2HZo.js +16 -0
  264. package/dist/slice-D_gkkqZK.js +13 -0
  265. package/dist/slice_util-DtEldBfK.js +261 -0
  266. package/dist/softmax-ZHVebtR1.js +13 -0
  267. package/dist/split-DrfihRpZ.js +10 -0
  268. package/dist/squeeze-DZEpeblb.js +11 -0
  269. package/dist/stack-yOIAalTq.js +13 -0
  270. package/dist/sum-_fzj5ZTB.js +12 -0
  271. package/dist/tensor-DdQUJZlz.js +909 -0
  272. package/dist/tensor-f35l8Odg.js +8 -0
  273. package/dist/tensor1d-CeZuc-Rv.js +12 -0
  274. package/dist/tensor2d-G4Ys2GxX.js +15 -0
  275. package/dist/tensor4d-B8roDgtc.js +15 -0
  276. package/dist/tensor_util-DV-FP5Q3.js +523 -0
  277. package/dist/tfjs_backend-kNyO5L2d.js +653 -0
  278. package/dist/tile-BzyEiF-F.js +13 -0
  279. package/dist/tokeniser/CharTokeniser.js +1 -1
  280. package/dist/tokeniser/bpe.js +1 -1
  281. package/dist/training/Adam.d.ts +2 -1
  282. package/dist/training/Adam.js +12 -28
  283. package/dist/training/AdamExt.d.ts +1 -0
  284. package/dist/training/AdamExt.js +2 -2
  285. package/dist/training/DatasetBuilder.js +3 -20
  286. package/dist/training/FullTrainer.js +55 -48
  287. package/dist/training/Trainer.d.ts +11 -6
  288. package/dist/training/Trainer.js +51 -39
  289. package/dist/training/sparseCrossEntropy.js +3 -3
  290. package/dist/transpose-DKELTqhe.js +38 -0
  291. package/dist/utilities/arrayClose.js +7 -7
  292. package/dist/utilities/dummy.js +35 -27
  293. package/dist/utilities/multinomialCPU.js +2 -2
  294. package/dist/utilities/packed.d.ts +7 -0
  295. package/dist/utilities/packed.js +716 -0
  296. package/dist/utilities/performance.js +1 -1
  297. package/dist/utilities/profile.js +1 -1
  298. package/dist/utilities/safetensors.js +2 -2
  299. package/dist/utilities/sentences.d.ts +5 -0
  300. package/dist/utilities/sentences.js +41 -0
  301. package/dist/utilities/weights.js +2 -2
  302. package/dist/variable-Bhn5bHYv.js +7 -0
  303. package/dist/{webgpu_program-DkQJOJSd.js → webgpu_program-Cigz-7RF.js} +15 -44
  304. package/dist/webgpu_util-BBCnKm2X.js +65 -0
  305. package/dist/zeros-2gldETuK.js +14 -0
  306. package/package.json +4 -3
  307. package/dist/Reshape-Bowtk9BP.js +0 -127
  308. package/dist/Reshape-DUqYftGC.js +0 -30
  309. package/dist/backend_util-CJIiDoV1.js +0 -749
  310. package/dist/broadcast_to-DzlNweb8.js +0 -44
  311. package/dist/concat-B912vBbo.js +0 -33
  312. package/dist/dropout-C-csYCLj.js +0 -193
  313. package/dist/exports_initializers-B8iZMgQ0.js +0 -16
  314. package/dist/gather-Dnpgw-YQ.js +0 -25
  315. package/dist/index-BzFyqcy-.js +0 -4457
  316. package/dist/index-C1rx_Ajs.js +0 -12076
  317. package/dist/kernel_funcs_utils-DKLK0Mg3.js +0 -466
  318. package/dist/log_sum_exp-DO6z8tSE.js +0 -103
  319. package/dist/mat_mul-DzjTFx-u.js +0 -27
  320. package/dist/mod-Dobti4j4.js +0 -27
  321. package/dist/ones-tIJeHlq-.js +0 -29
  322. package/dist/ops/fusedSoftmax.d.ts +0 -2
  323. package/dist/ops/fusedSoftmax.js +0 -10
  324. package/dist/ops/grads/fusedSoftmax.js +0 -22
  325. package/dist/ops-LuCMAnmM.js +0 -1525
  326. package/dist/random_width-CXVRloNK.js +0 -13670
  327. package/dist/range-CWcz7xFA.js +0 -26
  328. package/dist/reciprocal-C4rNcM-S.js +0 -25
  329. package/dist/relu-BjCh_SYb.js +0 -25
  330. package/dist/reshape-CnIwVG1c.js +0 -25
  331. package/dist/selu_util-OtRzVwW5.js +0 -719
  332. package/dist/shared-DmRsFyaJ.js +0 -3134
  333. package/dist/sin-gpDNRxE0.js +0 -47
  334. package/dist/slice-d0Vo9XTN.js +0 -28
  335. package/dist/softmax-D7Jj3p_P.js +0 -28
  336. package/dist/split-DK2k5eHf.js +0 -25
  337. package/dist/stack-DFatutCx.js +0 -27
  338. package/dist/sum-CJ0ULhmt.js +0 -27
  339. package/dist/tensor1d-vML0r3q6.js +0 -27
  340. package/dist/tensor2d-D76QGjF3.js +0 -30
  341. package/dist/tensor4d-Df1WlVDY.js +0 -30
  342. package/dist/webgpu_util-pLEV9tks.js +0 -80
  343. package/dist/zeros-Bj5rMYA7.js +0 -52
@@ -0,0 +1,12 @@
1
+ import { Rank } from '@tensorflow/tfjs-core/dist/types';
2
+ import { Tensor, Variable } from '@tensorflow/tfjs-core/dist/tensor';
3
+ import { TensorInfo } from '@tensorflow/tfjs-core/dist/tensor_info';
4
+ export interface PackedTensorInfo extends TensorInfo {
5
+ packed?: boolean;
6
+ }
7
+ export declare class PackableTensor<R extends Rank = Rank> extends Tensor<R> implements PackedTensorInfo {
8
+ packed: boolean;
9
+ }
10
+ export declare class PackableVariable<R extends Rank = Rank> extends Variable<R> implements PackedTensorInfo {
11
+ packed: boolean;
12
+ }
@@ -0,0 +1,11 @@
1
+ import { T as e, V as s } from "../tensor-DdQUJZlz.js";
2
+ class r extends e {
3
+ packed = !1;
4
+ }
5
+ class c extends s {
6
+ packed = !1;
7
+ }
8
+ export {
9
+ r as PackableTensor,
10
+ c as PackableVariable
11
+ };
@@ -0,0 +1,261 @@
1
+ import { BackendTimingInfo, DataMover, KernelBackend } from '@tensorflow/tfjs-core/dist/backends/backend';
2
+ import { Environment } from '@tensorflow/tfjs-core/dist/environment';
3
+ import { NamedAttrMap } from '@tensorflow/tfjs-core/dist/kernel_registry';
4
+ import { TensorInfo, DataId } from '@tensorflow/tfjs-core/dist/tensor_info';
5
+ import { TapeNode } from '@tensorflow/tfjs-core/dist/tape';
6
+ import { DataToGPUOptions, GPUData, Tensor, TensorTracker, Variable } from '@tensorflow/tfjs-core/dist/tensor';
7
+ import { GradSaveFunc, NamedTensorMap, NamedVariableMap, TensorContainer } from '@tensorflow/tfjs-core/dist/tensor_types';
8
+ import { BackendValues, DataType, DataValues } from '@tensorflow/tfjs-core/dist/types';
9
+ /**
10
+ * A function that computes an output. The save function is for saving tensors
11
+ * computed in the forward pass, that we need in the backward pass.
12
+ */
13
+ export type ForwardFunc<T> = (backend: KernelBackend, save?: GradSaveFunc) => T;
14
+ /**
15
+ * @docalias (a: Tensor, b: Tensor,..., save?: Function) => {
16
+ * value: Tensor,
17
+ * gradFunc: (dy: Tensor, saved?: NamedTensorMap) => Tensor | Tensor[]
18
+ * }
19
+ */
20
+ export type CustomGradientFunc<T extends Tensor> = (...inputs: Array<Tensor | GradSaveFunc>) => {
21
+ value: T;
22
+ gradFunc: (dy: T, saved: Tensor[]) => Tensor | Tensor[];
23
+ };
24
+ export type MemoryInfo = {
25
+ numTensors: number;
26
+ numDataBuffers: number;
27
+ numBytes: number;
28
+ unreliable?: boolean;
29
+ reasons: string[];
30
+ };
31
+ type KernelInfo = {
32
+ name: string;
33
+ bytesAdded: number;
34
+ totalBytesSnapshot: number;
35
+ tensorsAdded: number;
36
+ totalTensorsSnapshot: number;
37
+ inputShapes: number[][];
38
+ outputShapes: number[][];
39
+ kernelTimeMs: number | {
40
+ error: string;
41
+ } | Promise<number | {
42
+ error: string;
43
+ }>;
44
+ extraInfo: string | Promise<string>;
45
+ };
46
+ export type ProfileInfo = {
47
+ newBytes: number;
48
+ newTensors: number;
49
+ peakBytes: number;
50
+ kernels: KernelInfo[];
51
+ result: TensorContainer;
52
+ kernelNames: string[];
53
+ };
54
+ export interface TimingInfo extends BackendTimingInfo {
55
+ wallMs: number;
56
+ }
57
+ /** @docalias Function */
58
+ export type ScopeFn<T extends TensorContainer> = () => T;
59
+ interface ScopeState {
60
+ track: Tensor[];
61
+ name: string;
62
+ id: number;
63
+ }
64
+ declare class EngineState {
65
+ registeredVariables: NamedVariableMap;
66
+ nextTapeNodeId: number;
67
+ numBytes: number;
68
+ numTensors: number;
69
+ numStringTensors: number;
70
+ numDataBuffers: number;
71
+ activeTape: TapeNode[];
72
+ gradientDepth: number;
73
+ kernelDepth: number;
74
+ activeScope: ScopeState;
75
+ scopeStack: ScopeState[];
76
+ /**
77
+ * Keeps track of the number of data moves during a kernel execution. We
78
+ * maintain a stack since kernels can call other kernels, recursively.
79
+ */
80
+ numDataMovesStack: number[];
81
+ nextScopeId: number;
82
+ tensorInfo: WeakMap<object, {
83
+ backend: KernelBackend;
84
+ bytes: number;
85
+ dtype: DataType;
86
+ shape: number[];
87
+ }>;
88
+ profiling: boolean;
89
+ activeProfile: ProfileInfo;
90
+ dispose(): void;
91
+ }
92
+ export declare class Engine implements TensorTracker, DataMover {
93
+ ENV: Environment;
94
+ version: string;
95
+ state: EngineState;
96
+ backendName: string;
97
+ registry: {
98
+ [id: string]: KernelBackend;
99
+ };
100
+ registryFactory: {
101
+ [id: string]: {
102
+ factory: () => KernelBackend | Promise<KernelBackend>;
103
+ priority: number;
104
+ };
105
+ };
106
+ private profiler;
107
+ private backendInstance;
108
+ private pendingBackendInit;
109
+ private pendingBackendInitId;
110
+ constructor(ENV: Environment);
111
+ ready(): Promise<void>;
112
+ get backend(): KernelBackend;
113
+ backendNames(): string[];
114
+ findBackend(backendName: string): KernelBackend;
115
+ findBackendFactory(backendName: string): () => KernelBackend | Promise<KernelBackend>;
116
+ registerBackend(backendName: string, factory: () => KernelBackend | Promise<KernelBackend>, priority?: number): boolean;
117
+ setBackend(backendName: string): Promise<boolean>;
118
+ private setupRegisteredKernels;
119
+ private disposeRegisteredKernels;
120
+ /**
121
+ * Initializes a backend by looking up the backend name in the factory
122
+ * registry and calling the factory method. Returns a boolean representing
123
+ * whether the initialization of the backend succeeded. Throws an error if
124
+ * there is no backend in the factory registry.
125
+ */
126
+ private initializeBackend;
127
+ removeBackend(backendName: string): void;
128
+ private getSortedBackends;
129
+ private initializeBackendsAndReturnBest;
130
+ moveData(backend: KernelBackend, dataId: DataId): void;
131
+ tidy<T extends TensorContainer>(nameOrFn: string | ScopeFn<T>, fn?: ScopeFn<T>): T;
132
+ private scopedRun;
133
+ private static nextTensorId;
134
+ private nextTensorId;
135
+ private static nextVariableId;
136
+ private nextVariableId;
137
+ /**
138
+ * This method is called instead of the public-facing tensor.clone() when
139
+ * saving a tensor for backwards pass. It makes sure to add the clone
140
+ * operation to the tape regardless of being called inside a kernel
141
+ * execution.
142
+ */
143
+ private clone;
144
+ /**
145
+ * Execute a kernel with the given name and return the output tensor.
146
+ *
147
+ * @param kernelName The name of the kernel to execute.
148
+ * @param inputs A map of input names to tensors.
149
+ * @param attrs A map of attribute names to their values. An attribute is a
150
+ * primitive (non-tensor) input to the kernel.
151
+ * @param inputsToSave A list of tensors, inputs to save for the backprop
152
+ * computation.
153
+ * @param outputsToSave A list of booleans, specifying which output to save
154
+ * for the backprop computation. These are booleans since the output
155
+ * tensors are not visible to the user.
156
+ */
157
+ runKernel<T extends Tensor | Tensor[]>(kernelName: string, inputs: NamedTensorMap, attrs?: NamedAttrMap): T;
158
+ private shouldCheckForMemLeaks;
159
+ private checkKernelForMemLeak;
160
+ /**
161
+ * Internal helper method to execute a kernel Func
162
+ *
163
+ * Use `runKernel` to execute kernels from outside of engine.
164
+ */
165
+ private runKernelFunc;
166
+ /**
167
+ * Saves tensors used in forward mode for use in backward mode.
168
+ *
169
+ * @param tensors the list of tensors to save.
170
+ */
171
+ private saveTensorsForBackwardMode;
172
+ /**
173
+ * Returns a list of tensors to save for a given gradient calculation.
174
+ *
175
+ * @param kernelName name of kernel to look up gradient for.
176
+ * @param inputs a map of input tensors.
177
+ * @param outputs an array of output tensors from forward mode of kernel.
178
+ */
179
+ private getTensorsForGradient;
180
+ /**
181
+ * Internal method used by public APIs for tensor creation. Makes a new
182
+ * tensor with the provided shape, dtype and values. It always
183
+ * creates a new data id and writes the values to the underlying backend.
184
+ */
185
+ makeTensor(values: DataValues, shape: number[], dtype: DataType, backend?: KernelBackend): Tensor;
186
+ /**
187
+ * Internal method used by backends. Makes a new tensor
188
+ * that is a wrapper around an existing data id. It doesn't create
189
+ * a new data id, only increments the ref count used in memory tracking.
190
+ * @deprecated
191
+ */
192
+ makeTensorFromDataId(dataId: DataId, shape: number[], dtype: DataType, backend?: KernelBackend): Tensor;
193
+ /**
194
+ * Internal method used by backends. Makes a new tensor that is a wrapper
195
+ * around an existing data id in TensorInfo. It doesn't create a new data id,
196
+ * only increments the ref count used in memory tracking.
197
+ */
198
+ makeTensorFromTensorInfo(tensorInfo: TensorInfo, backend?: KernelBackend): Tensor;
199
+ makeVariable(initialValue: Tensor, trainable?: boolean, name?: string, dtype?: DataType): Variable;
200
+ trackTensor(a: Tensor, backend: KernelBackend): void;
201
+ incRef(a: Tensor, backend: KernelBackend): void;
202
+ removeDataId(dataId: DataId, backend: KernelBackend): void;
203
+ disposeTensor(a: Tensor): void;
204
+ disposeVariables(): void;
205
+ disposeVariable(v: Variable): void;
206
+ memory(): MemoryInfo;
207
+ profile(query: () => TensorContainer | Promise<TensorContainer>): Promise<ProfileInfo>;
208
+ isTapeOn(): boolean;
209
+ private addTapeNode;
210
+ keep<T extends Tensor>(result: T): T;
211
+ private startTape;
212
+ private endTape;
213
+ /**
214
+ * Start a scope. Use this with endScope() to achieve the same functionality
215
+ * as scope() without the need for a function closure.
216
+ */
217
+ startScope(name?: string): void;
218
+ /**
219
+ * End a scope. Use this with startScope() to achieve the same functionality
220
+ * as scope() without the need for a function closure.
221
+ */
222
+ endScope(result?: TensorContainer): void;
223
+ /**
224
+ * Returns gradients of `f` with respect to each of the `xs`. The gradients
225
+ * returned are of the same length as `xs`, but some might be null if `f`
226
+ * was not a function of that `x`. It also takes optional dy to multiply the
227
+ * gradient, which defaults to `1`.
228
+ */
229
+ gradients<T extends Tensor>(f: () => T, xs: Tensor[], dy?: T, allowNoGradients?: boolean): {
230
+ value: T;
231
+ grads: Tensor[];
232
+ };
233
+ customGrad<T extends Tensor>(f: CustomGradientFunc<T>): (...args: Array<Tensor>) => T;
234
+ readSync(dataId: DataId): BackendValues;
235
+ read(dataId: DataId): Promise<BackendValues>;
236
+ readToGPU(dataId: DataId, options?: DataToGPUOptions): GPUData;
237
+ time(query: () => void): Promise<TimingInfo>;
238
+ /**
239
+ * Tracks a Tensor in the current scope to be automatically cleaned up
240
+ * when the current scope ends, and returns the value.
241
+ *
242
+ * @param result The Tensor to track in the current scope.
243
+ */
244
+ private track;
245
+ get registeredVariables(): NamedVariableMap;
246
+ /**
247
+ * Resets the engine state. Removes all backends but does not remove
248
+ * registered backend factories.
249
+ */
250
+ reset(): void;
251
+ }
252
+ export declare function getOrMakeEngine(): Engine;
253
+ export declare const ENGINE: Engine;
254
+ /**
255
+ * A implementation of the add op for use within engine and tape.
256
+ *
257
+ * This allows us to avoid a circular dependency between add.ts and engine.
258
+ * It is exported to be available in tape tests.
259
+ */
260
+ export declare function add(a: Tensor, b: Tensor): Tensor;
261
+ export {};
@@ -0,0 +1,10 @@
1
+ import "../tensor_util-DV-FP5Q3.js";
2
+ import "../tensor-DdQUJZlz.js";
3
+ import "./PackedTensor.js";
4
+ import { a as t, E, c as g, g as m } from "../utilities/packed.js";
5
+ export {
6
+ t as ENGINE,
7
+ E as Engine,
8
+ g as add,
9
+ m as getOrMakeEngine
10
+ };
@@ -0,0 +1,12 @@
1
+ import { Tensor } from '@tensorflow/tfjs-core/dist/tensor';
2
+ import { TapeNode } from '@tensorflow/tfjs-core/dist/tape';
3
+ /**
4
+ * Backpropagate gradients through the filtered TapeNodes.
5
+ *
6
+ * @param tensorAccumulatedGradientMap A map of Tensor to its gradient. This map
7
+ * is mutated by this method.
8
+ * @param filteredTape The filtered TapeNodes to backprop through.
9
+ */
10
+ export declare function backpropagateGradients(tensorAccumulatedGradientMap: {
11
+ [tensorId: number]: Tensor;
12
+ }, filteredTape: TapeNode[], tidy: (f: Function) => Tensor, add: (a: Tensor, b: Tensor) => Tensor): void;
@@ -0,0 +1,5 @@
1
+ import { b as p } from "../utilities/packed.js";
2
+ import "../tensor-DdQUJZlz.js";
3
+ export {
4
+ p as backpropagateGradients
5
+ };
@@ -0,0 +1,18 @@
1
+ import { DataType, TensorInfo } from '@tensorflow/tfjs-core';
2
+ import { WebGPUBackend } from '@tensorflow/tfjs-backend-webgpu/dist/webgpu';
3
+ import { WebGPUProgram } from './webgpu_program';
4
+ type ProgramUniform = Array<{
5
+ type: string;
6
+ data: number[];
7
+ }>;
8
+ interface ExtendedAdapterInfo extends GPUAdapterInfo {
9
+ subgroupMaxSize?: number;
10
+ subgroupMinSize?: number;
11
+ }
12
+ export default class WebGPUBackendPatch extends WebGPUBackend {
13
+ readonly subgroupMaxSize: number;
14
+ readonly subgroupMinSize: number;
15
+ constructor(device: GPUDevice, adapterInfo?: ExtendedAdapterInfo);
16
+ runWebGPUProgram(program: WebGPUProgram, inputs: TensorInfo[], outputDtype: DataType, programDefinedUniform?: ProgramUniform, output?: TensorInfo): TensorInfo;
17
+ }
18
+ export {};
@@ -0,0 +1,57 @@
1
+ import "../index-ZyQhjEPo.js";
2
+ import "../webgpu_util-BBCnKm2X.js";
3
+ import { W as c } from "../backend_webgpu-B0u2ndUn.js";
4
+ import { compileProgram as l } from "./webgpu_program.js";
5
+ import { m } from "../webgpu_program-Cigz-7RF.js";
6
+ import { s as P, g as y, e as M, a as n } from "../tensor-DdQUJZlz.js";
7
+ const b = (h, s) => {
8
+ const i = h.limits.maxComputeWorkgroupsPerDimension, t = s.dispatchLayout, a = s.dispatch;
9
+ if (a.every((r) => r <= i))
10
+ return a;
11
+ n(
12
+ a[0] > i && t.y === void 0 && t.z === void 0,
13
+ () => "Dispatch size exceeds WebGPU limits in Y or Z dimension."
14
+ );
15
+ let e = Math.ceil(Math.sqrt(a[0]));
16
+ return e > i ? (e = Math.ceil(Math.cbrt(a[0])), n(
17
+ e <= i,
18
+ () => "Total dispatch size exceeds WebGPU maximum."
19
+ ), [e, e, e]) : [e, e, 1];
20
+ };
21
+ class U extends c {
22
+ subgroupMaxSize;
23
+ subgroupMinSize;
24
+ constructor(s, i) {
25
+ super(s, i), this.subgroupMaxSize = i?.subgroupMaxSize ?? 0, this.subgroupMinSize = i?.subgroupMinSize ?? 0;
26
+ }
27
+ runWebGPUProgram(s, i, t, a, e) {
28
+ if (e || (e = this.makeTensorInfo(s.outputShape, t)), P(e.shape) === 0)
29
+ return this.tensorMap.get(e.dataId).values = y(e.dtype, 0), e;
30
+ this.uploadToGPU(e.dataId), s.dispatch = b(this.device, s);
31
+ const r = i.map((o, p) => {
32
+ if (o.dtype === "complex64")
33
+ throw new Error(
34
+ "GPGPUProgram does not support complex64 input. For complex64 dtypes, please separate the program into real and imaginary parts."
35
+ );
36
+ return this.uploadToGPU(o.dataId), {
37
+ // Returning dtype from tensorMap because it reflects dtype
38
+ // of underlying buffer, rather than abstract dtype.
39
+ dtype: this.tensorMap.get(o.dataId).dtype,
40
+ shape: o.shape,
41
+ name: s.variableNames[p]
42
+ };
43
+ });
44
+ s.shaderKey = m(s, r, e);
45
+ const d = M().getBool("WEBGPU_ENGINE_COMPILE_ONLY");
46
+ return s.shaderKey in this.pipelineCache || (this.pipelineCache[s.shaderKey] = l(
47
+ this.device,
48
+ s,
49
+ r,
50
+ e,
51
+ d
52
+ )), s.pipeline = this.pipelineCache[s.shaderKey], d || this.recordAndSubmit(s, e, i, a), e;
53
+ }
54
+ }
55
+ export {
56
+ U as default
57
+ };
@@ -1,7 +1,6 @@
1
- import { C as t, D as a } from "./index-BzFyqcy-.js";
2
1
  /**
3
2
  * @license
4
- * Copyright 2018 Google LLC. All Rights Reserved.
3
+ * Copyright 2022 Google Inc. All Rights Reserved.
5
4
  * Licensed under the Apache License, Version 2.0 (the "License");
6
5
  * you may not use this file except in compliance with the License.
7
6
  * You may obtain a copy of the License at
@@ -15,10 +14,8 @@ import { C as t, D as a } from "./index-BzFyqcy-.js";
15
14
  * limitations under the License.
16
15
  * =============================================================================
17
16
  */
18
- function f(r, n, e) {
19
- const o = t(r, e);
20
- return a(r, n, o, e);
17
+ export interface GPUOptions {
18
+ powerPreference?: 'low-power' | 'high-performance';
19
+ disableSubgroups?: boolean;
21
20
  }
22
- export {
23
- f as t
24
- };
21
+ export declare function registerWebGPUBackend(options?: GPUOptions): void;
@@ -0,0 +1,34 @@
1
+ import { f as n } from "../index-ZyQhjEPo.js";
2
+ import p from "./webgpu_backend.js";
3
+ function c(a) {
4
+ n(
5
+ "webgpu",
6
+ async () => {
7
+ const i = {
8
+ powerPreference: a?.powerPreference ?? "high-performance"
9
+ };
10
+ console.log("Using custom WebGPU backend with power preference:", i.powerPreference);
11
+ const e = await navigator.gpu.requestAdapter(i), t = {}, o = [];
12
+ e.features.has("timestamp-query") && o.push("timestamp-query"), e.features.has("bgra8unorm-storage") && o.push(["bgra8unorm-storage"]), !a?.disableSubgroups && e.features.has("subgroups") && o.push("subgroups"), t.requiredFeatures = o;
13
+ const r = e.limits;
14
+ t.requiredLimits = {
15
+ maxComputeWorkgroupStorageSize: r.maxComputeWorkgroupStorageSize,
16
+ maxComputeWorkgroupsPerDimension: r.maxComputeWorkgroupsPerDimension,
17
+ maxStorageBufferBindingSize: r.maxStorageBufferBindingSize,
18
+ maxBufferSize: r.maxBufferSize,
19
+ maxComputeWorkgroupSizeX: r.maxComputeWorkgroupSizeX,
20
+ maxComputeInvocationsPerWorkgroup: r.maxComputeInvocationsPerWorkgroup
21
+ };
22
+ const u = await e.requestDevice(t), s = "info" in e ? e.info : "requestAdapterInfo" in e ? (
23
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
24
+ await e.requestAdapterInfo()
25
+ ) : void 0;
26
+ return new p(u, s);
27
+ },
28
+ 3
29
+ /*priority*/
30
+ );
31
+ }
32
+ export {
33
+ c as registerWebGPUBackend
34
+ };
@@ -0,0 +1,36 @@
1
+ import { DataType, TensorInfo } from '@tensorflow/tfjs-core';
2
+ export declare enum PixelsOpType {
3
+ FROM_PIXELS = 0,
4
+ DRAW = 1
5
+ }
6
+ export interface WebGPUProgram {
7
+ atomic?: boolean;
8
+ subgroups?: boolean;
9
+ subgroupBuiltins?: boolean;
10
+ dispatch: [number, number, number];
11
+ dispatchLayout: {
12
+ x: number[];
13
+ y?: number[];
14
+ z?: number[];
15
+ };
16
+ outputComponent?: number;
17
+ outputShape: number[];
18
+ pixelsOpType?: PixelsOpType;
19
+ shaderKey: string;
20
+ size?: boolean;
21
+ uniforms?: string;
22
+ variableNames: string[];
23
+ variableComponents?: number[];
24
+ workgroupSize: [number, number, number];
25
+ workPerThread?: number;
26
+ pipeline?: GPUComputePipeline | Promise<GPUComputePipeline>;
27
+ getUserCode: () => string;
28
+ }
29
+ export declare const compileProgram: (device: GPUDevice, program: WebGPUProgram, inputsData: InputInfo[], output: TensorInfo, parallelCompilation: boolean) => GPUComputePipeline | Promise<GPUComputePipeline>;
30
+ type InputInfo = {
31
+ dtype: DataType;
32
+ shape: number[];
33
+ name: string;
34
+ };
35
+ export declare function getStartHeaderString(useGlobalIndex: boolean, program: WebGPUProgram): string;
36
+ export {};