@agency-lang/whisper-local 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (643) hide show
  1. package/CMakeLists.txt +51 -0
  2. package/README.md +145 -0
  3. package/build/Release/whisper_addon.node +0 -0
  4. package/dist/src/addon.d.ts +11 -0
  5. package/dist/src/addon.js +22 -0
  6. package/dist/src/cli.d.ts +2 -0
  7. package/dist/src/cli.js +117 -0
  8. package/dist/src/ffmpeg.d.ts +11 -0
  9. package/dist/src/ffmpeg.js +154 -0
  10. package/dist/src/handleCache.d.ts +9 -0
  11. package/dist/src/handleCache.js +83 -0
  12. package/dist/src/modelManager.d.ts +12 -0
  13. package/dist/src/modelManager.js +172 -0
  14. package/dist/src/packageRoot.d.ts +8 -0
  15. package/dist/src/packageRoot.js +21 -0
  16. package/dist/src/transcribe.d.ts +2 -0
  17. package/dist/src/transcribe.js +36 -0
  18. package/dist/src/types.d.ts +11 -0
  19. package/dist/src/types.js +17 -0
  20. package/index.agency +32 -0
  21. package/models.lock.json +55 -0
  22. package/package.json +52 -0
  23. package/vendor/whisper.cpp/CMakeLists.txt +251 -0
  24. package/vendor/whisper.cpp/LICENSE +21 -0
  25. package/vendor/whisper.cpp/UPSTREAM_SHA256 +1 -0
  26. package/vendor/whisper.cpp/VERSION +1 -0
  27. package/vendor/whisper.cpp/cmake/DefaultTargetOptions.cmake +16 -0
  28. package/vendor/whisper.cpp/cmake/FindFFmpeg.cmake +163 -0
  29. package/vendor/whisper.cpp/cmake/build-info.cmake +60 -0
  30. package/vendor/whisper.cpp/cmake/git-vars.cmake +22 -0
  31. package/vendor/whisper.cpp/cmake/whisper-config.cmake.in +65 -0
  32. package/vendor/whisper.cpp/cmake/whisper.pc.in +10 -0
  33. package/vendor/whisper.cpp/ggml/CMakeLists.txt +434 -0
  34. package/vendor/whisper.cpp/ggml/cmake/BuildTypes.cmake +54 -0
  35. package/vendor/whisper.cpp/ggml/cmake/GitVars.cmake +22 -0
  36. package/vendor/whisper.cpp/ggml/cmake/common.cmake +50 -0
  37. package/vendor/whisper.cpp/ggml/cmake/ggml-config.cmake.in +152 -0
  38. package/vendor/whisper.cpp/ggml/include/ggml-alloc.h +76 -0
  39. package/vendor/whisper.cpp/ggml/include/ggml-backend.h +354 -0
  40. package/vendor/whisper.cpp/ggml/include/ggml-blas.h +25 -0
  41. package/vendor/whisper.cpp/ggml/include/ggml-cann.h +123 -0
  42. package/vendor/whisper.cpp/ggml/include/ggml-cpp.h +39 -0
  43. package/vendor/whisper.cpp/ggml/include/ggml-cpu.h +143 -0
  44. package/vendor/whisper.cpp/ggml/include/ggml-cuda.h +47 -0
  45. package/vendor/whisper.cpp/ggml/include/ggml-kompute.h +50 -0
  46. package/vendor/whisper.cpp/ggml/include/ggml-metal.h +66 -0
  47. package/vendor/whisper.cpp/ggml/include/ggml-opencl.h +26 -0
  48. package/vendor/whisper.cpp/ggml/include/ggml-opt.h +237 -0
  49. package/vendor/whisper.cpp/ggml/include/ggml-rpc.h +33 -0
  50. package/vendor/whisper.cpp/ggml/include/ggml-sycl.h +49 -0
  51. package/vendor/whisper.cpp/ggml/include/ggml-vulkan.h +29 -0
  52. package/vendor/whisper.cpp/ggml/include/ggml.h +2221 -0
  53. package/vendor/whisper.cpp/ggml/include/gguf.h +202 -0
  54. package/vendor/whisper.cpp/ggml/src/CMakeLists.txt +404 -0
  55. package/vendor/whisper.cpp/ggml/src/ggml-alloc.c +1042 -0
  56. package/vendor/whisper.cpp/ggml/src/ggml-amx/CMakeLists.txt +107 -0
  57. package/vendor/whisper.cpp/ggml/src/ggml-amx/common.h +94 -0
  58. package/vendor/whisper.cpp/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
  59. package/vendor/whisper.cpp/ggml/src/ggml-amx/mmq.cpp +2510 -0
  60. package/vendor/whisper.cpp/ggml/src/ggml-amx/mmq.h +17 -0
  61. package/vendor/whisper.cpp/ggml/src/ggml-backend-impl.h +255 -0
  62. package/vendor/whisper.cpp/ggml/src/ggml-backend-reg.cpp +591 -0
  63. package/vendor/whisper.cpp/ggml/src/ggml-backend.cpp +2016 -0
  64. package/vendor/whisper.cpp/ggml/src/ggml-blas/CMakeLists.txt +87 -0
  65. package/vendor/whisper.cpp/ggml/src/ggml-blas/ggml-blas.cpp +517 -0
  66. package/vendor/whisper.cpp/ggml/src/ggml-cann/CMakeLists.txt +75 -0
  67. package/vendor/whisper.cpp/ggml/src/ggml-cann/Doxyfile +2579 -0
  68. package/vendor/whisper.cpp/ggml/src/ggml-cann/acl_tensor.cpp +181 -0
  69. package/vendor/whisper.cpp/ggml/src/ggml-cann/acl_tensor.h +258 -0
  70. package/vendor/whisper.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +3193 -0
  71. package/vendor/whisper.cpp/ggml/src/ggml-cann/aclnn_ops.h +1125 -0
  72. package/vendor/whisper.cpp/ggml/src/ggml-cann/common.h +425 -0
  73. package/vendor/whisper.cpp/ggml/src/ggml-cann/ggml-cann.cpp +2630 -0
  74. package/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +30 -0
  75. package/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +19 -0
  76. package/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/dup.cpp +234 -0
  77. package/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +197 -0
  78. package/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +190 -0
  79. package/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +204 -0
  80. package/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
  81. package/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +218 -0
  82. package/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +216 -0
  83. package/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +295 -0
  84. package/vendor/whisper.cpp/ggml/src/ggml-common.h +1861 -0
  85. package/vendor/whisper.cpp/ggml/src/ggml-cpu/CMakeLists.txt +584 -0
  86. package/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/amx.cpp +221 -0
  87. package/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
  88. package/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
  89. package/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
  90. package/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  91. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
  92. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4113 -0
  93. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +2162 -0
  94. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2638 -0
  95. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
  96. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2731 -0
  97. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2068 -0
  98. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +396 -0
  99. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1299 -0
  100. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1480 -0
  101. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  102. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +4310 -0
  103. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +3284 -0
  104. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch-fallback.h +184 -0
  105. package/vendor/whisper.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
  106. package/vendor/whisper.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
  107. package/vendor/whisper.cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
  108. package/vendor/whisper.cpp/ggml/src/ggml-cpu/common.h +72 -0
  109. package/vendor/whisper.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +511 -0
  110. package/vendor/whisper.cpp/ggml/src/ggml-cpu/ggml-cpu.c +3473 -0
  111. package/vendor/whisper.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +671 -0
  112. package/vendor/whisper.cpp/ggml/src/ggml-cpu/hbm.cpp +55 -0
  113. package/vendor/whisper.cpp/ggml/src/ggml-cpu/hbm.h +8 -0
  114. package/vendor/whisper.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +337 -0
  115. package/vendor/whisper.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +95 -0
  116. package/vendor/whisper.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +482 -0
  117. package/vendor/whisper.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
  118. package/vendor/whisper.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +3593 -0
  119. package/vendor/whisper.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +19 -0
  120. package/vendor/whisper.cpp/ggml/src/ggml-cpu/ops.cpp +9085 -0
  121. package/vendor/whisper.cpp/ggml/src/ggml-cpu/ops.h +111 -0
  122. package/vendor/whisper.cpp/ggml/src/ggml-cpu/quants.c +1157 -0
  123. package/vendor/whisper.cpp/ggml/src/ggml-cpu/quants.h +89 -0
  124. package/vendor/whisper.cpp/ggml/src/ggml-cpu/repack.cpp +1570 -0
  125. package/vendor/whisper.cpp/ggml/src/ggml-cpu/repack.h +98 -0
  126. package/vendor/whisper.cpp/ggml/src/ggml-cpu/simd-mappings.h +1006 -0
  127. package/vendor/whisper.cpp/ggml/src/ggml-cpu/traits.cpp +36 -0
  128. package/vendor/whisper.cpp/ggml/src/ggml-cpu/traits.h +38 -0
  129. package/vendor/whisper.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
  130. package/vendor/whisper.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
  131. package/vendor/whisper.cpp/ggml/src/ggml-cpu/vec.cpp +321 -0
  132. package/vendor/whisper.cpp/ggml/src/ggml-cpu/vec.h +973 -0
  133. package/vendor/whisper.cpp/ggml/src/ggml-cuda/CMakeLists.txt +184 -0
  134. package/vendor/whisper.cpp/ggml/src/ggml-cuda/acc.cu +61 -0
  135. package/vendor/whisper.cpp/ggml/src/ggml-cuda/acc.cuh +5 -0
  136. package/vendor/whisper.cpp/ggml/src/ggml-cuda/arange.cu +34 -0
  137. package/vendor/whisper.cpp/ggml/src/ggml-cuda/arange.cuh +5 -0
  138. package/vendor/whisper.cpp/ggml/src/ggml-cuda/argmax.cu +91 -0
  139. package/vendor/whisper.cpp/ggml/src/ggml-cuda/argmax.cuh +3 -0
  140. package/vendor/whisper.cpp/ggml/src/ggml-cuda/argsort.cu +104 -0
  141. package/vendor/whisper.cpp/ggml/src/ggml-cuda/argsort.cuh +3 -0
  142. package/vendor/whisper.cpp/ggml/src/ggml-cuda/binbcast.cu +363 -0
  143. package/vendor/whisper.cpp/ggml/src/ggml-cuda/binbcast.cuh +9 -0
  144. package/vendor/whisper.cpp/ggml/src/ggml-cuda/clamp.cu +45 -0
  145. package/vendor/whisper.cpp/ggml/src/ggml-cuda/clamp.cuh +5 -0
  146. package/vendor/whisper.cpp/ggml/src/ggml-cuda/common.cuh +812 -0
  147. package/vendor/whisper.cpp/ggml/src/ggml-cuda/concat.cu +221 -0
  148. package/vendor/whisper.cpp/ggml/src/ggml-cuda/concat.cuh +5 -0
  149. package/vendor/whisper.cpp/ggml/src/ggml-cuda/conv-transpose-1d.cu +89 -0
  150. package/vendor/whisper.cpp/ggml/src/ggml-cuda/conv-transpose-1d.cuh +5 -0
  151. package/vendor/whisper.cpp/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
  152. package/vendor/whisper.cpp/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
  153. package/vendor/whisper.cpp/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
  154. package/vendor/whisper.cpp/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
  155. package/vendor/whisper.cpp/ggml/src/ggml-cuda/convert.cu +730 -0
  156. package/vendor/whisper.cpp/ggml/src/ggml-cuda/convert.cuh +26 -0
  157. package/vendor/whisper.cpp/ggml/src/ggml-cuda/count-equal.cu +64 -0
  158. package/vendor/whisper.cpp/ggml/src/ggml-cuda/count-equal.cuh +5 -0
  159. package/vendor/whisper.cpp/ggml/src/ggml-cuda/cp-async.cuh +57 -0
  160. package/vendor/whisper.cpp/ggml/src/ggml-cuda/cpy.cu +705 -0
  161. package/vendor/whisper.cpp/ggml/src/ggml-cuda/cpy.cuh +11 -0
  162. package/vendor/whisper.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cu +189 -0
  163. package/vendor/whisper.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cuh +7 -0
  164. package/vendor/whisper.cpp/ggml/src/ggml-cuda/dequantize.cuh +103 -0
  165. package/vendor/whisper.cpp/ggml/src/ggml-cuda/diagmask.cu +40 -0
  166. package/vendor/whisper.cpp/ggml/src/ggml-cuda/diagmask.cuh +5 -0
  167. package/vendor/whisper.cpp/ggml/src/ggml-cuda/fattn-common.cuh +881 -0
  168. package/vendor/whisper.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +1474 -0
  169. package/vendor/whisper.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu +357 -0
  170. package/vendor/whisper.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cuh +3 -0
  171. package/vendor/whisper.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu +365 -0
  172. package/vendor/whisper.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cuh +3 -0
  173. package/vendor/whisper.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +482 -0
  174. package/vendor/whisper.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +472 -0
  175. package/vendor/whisper.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +634 -0
  176. package/vendor/whisper.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +3 -0
  177. package/vendor/whisper.cpp/ggml/src/ggml-cuda/fattn.cu +346 -0
  178. package/vendor/whisper.cpp/ggml/src/ggml-cuda/fattn.cuh +3 -0
  179. package/vendor/whisper.cpp/ggml/src/ggml-cuda/getrows.cu +275 -0
  180. package/vendor/whisper.cpp/ggml/src/ggml-cuda/getrows.cuh +15 -0
  181. package/vendor/whisper.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +3562 -0
  182. package/vendor/whisper.cpp/ggml/src/ggml-cuda/gla.cu +93 -0
  183. package/vendor/whisper.cpp/ggml/src/ggml-cuda/gla.cuh +3 -0
  184. package/vendor/whisper.cpp/ggml/src/ggml-cuda/im2col.cu +103 -0
  185. package/vendor/whisper.cpp/ggml/src/ggml-cuda/im2col.cuh +5 -0
  186. package/vendor/whisper.cpp/ggml/src/ggml-cuda/mma.cuh +396 -0
  187. package/vendor/whisper.cpp/ggml/src/ggml-cuda/mmq.cu +324 -0
  188. package/vendor/whisper.cpp/ggml/src/ggml-cuda/mmq.cuh +3217 -0
  189. package/vendor/whisper.cpp/ggml/src/ggml-cuda/mmv.cu +336 -0
  190. package/vendor/whisper.cpp/ggml/src/ggml-cuda/mmv.cuh +12 -0
  191. package/vendor/whisper.cpp/ggml/src/ggml-cuda/mmvq.cu +595 -0
  192. package/vendor/whisper.cpp/ggml/src/ggml-cuda/mmvq.cuh +12 -0
  193. package/vendor/whisper.cpp/ggml/src/ggml-cuda/norm.cu +458 -0
  194. package/vendor/whisper.cpp/ggml/src/ggml-cuda/norm.cuh +11 -0
  195. package/vendor/whisper.cpp/ggml/src/ggml-cuda/opt-step-adamw.cu +78 -0
  196. package/vendor/whisper.cpp/ggml/src/ggml-cuda/opt-step-adamw.cuh +5 -0
  197. package/vendor/whisper.cpp/ggml/src/ggml-cuda/out-prod.cu +68 -0
  198. package/vendor/whisper.cpp/ggml/src/ggml-cuda/out-prod.cuh +3 -0
  199. package/vendor/whisper.cpp/ggml/src/ggml-cuda/pad.cu +49 -0
  200. package/vendor/whisper.cpp/ggml/src/ggml-cuda/pad.cuh +5 -0
  201. package/vendor/whisper.cpp/ggml/src/ggml-cuda/pool2d.cu +94 -0
  202. package/vendor/whisper.cpp/ggml/src/ggml-cuda/pool2d.cuh +5 -0
  203. package/vendor/whisper.cpp/ggml/src/ggml-cuda/quantize.cu +190 -0
  204. package/vendor/whisper.cpp/ggml/src/ggml-cuda/quantize.cuh +27 -0
  205. package/vendor/whisper.cpp/ggml/src/ggml-cuda/rope.cu +456 -0
  206. package/vendor/whisper.cpp/ggml/src/ggml-cuda/rope.cuh +7 -0
  207. package/vendor/whisper.cpp/ggml/src/ggml-cuda/scale.cu +31 -0
  208. package/vendor/whisper.cpp/ggml/src/ggml-cuda/scale.cuh +5 -0
  209. package/vendor/whisper.cpp/ggml/src/ggml-cuda/softmax.cu +283 -0
  210. package/vendor/whisper.cpp/ggml/src/ggml-cuda/softmax.cuh +7 -0
  211. package/vendor/whisper.cpp/ggml/src/ggml-cuda/ssm-conv.cu +148 -0
  212. package/vendor/whisper.cpp/ggml/src/ggml-cuda/ssm-conv.cuh +3 -0
  213. package/vendor/whisper.cpp/ggml/src/ggml-cuda/ssm-scan.cu +155 -0
  214. package/vendor/whisper.cpp/ggml/src/ggml-cuda/ssm-scan.cuh +3 -0
  215. package/vendor/whisper.cpp/ggml/src/ggml-cuda/sum.cu +45 -0
  216. package/vendor/whisper.cpp/ggml/src/ggml-cuda/sum.cuh +5 -0
  217. package/vendor/whisper.cpp/ggml/src/ggml-cuda/sumrows.cu +39 -0
  218. package/vendor/whisper.cpp/ggml/src/ggml-cuda/sumrows.cuh +5 -0
  219. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_16.cu +5 -0
  220. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu +10 -0
  221. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu +10 -0
  222. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu +10 -0
  223. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +10 -0
  224. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_16.cu +5 -0
  225. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +10 -0
  226. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu +10 -0
  227. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu +10 -0
  228. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu +10 -0
  229. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_16.cu +5 -0
  230. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu +10 -0
  231. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +10 -0
  232. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu +10 -0
  233. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu +10 -0
  234. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu +10 -0
  235. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu +10 -0
  236. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +10 -0
  237. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu +10 -0
  238. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +5 -0
  239. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +5 -0
  240. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +5 -0
  241. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +5 -0
  242. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +5 -0
  243. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +5 -0
  244. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +5 -0
  245. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +5 -0
  246. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +5 -0
  247. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +5 -0
  248. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +5 -0
  249. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +5 -0
  250. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +5 -0
  251. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +5 -0
  252. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +5 -0
  253. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +5 -0
  254. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +5 -0
  255. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +5 -0
  256. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +5 -0
  257. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +5 -0
  258. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +5 -0
  259. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +5 -0
  260. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +5 -0
  261. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +5 -0
  262. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +5 -0
  263. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +5 -0
  264. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +5 -0
  265. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +5 -0
  266. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +5 -0
  267. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +5 -0
  268. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +5 -0
  269. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +5 -0
  270. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +5 -0
  271. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +5 -0
  272. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +5 -0
  273. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +5 -0
  274. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +5 -0
  275. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +5 -0
  276. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +5 -0
  277. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +5 -0
  278. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +5 -0
  279. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +5 -0
  280. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +5 -0
  281. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +5 -0
  282. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +5 -0
  283. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +5 -0
  284. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +5 -0
  285. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +5 -0
  286. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +5 -0
  287. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +5 -0
  288. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +5 -0
  289. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +5 -0
  290. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +5 -0
  291. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +5 -0
  292. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +5 -0
  293. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +5 -0
  294. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +5 -0
  295. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +5 -0
  296. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +5 -0
  297. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +5 -0
  298. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +5 -0
  299. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +5 -0
  300. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +5 -0
  301. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +5 -0
  302. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +5 -0
  303. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +5 -0
  304. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +5 -0
  305. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +5 -0
  306. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +5 -0
  307. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +5 -0
  308. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +5 -0
  309. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +5 -0
  310. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +5 -0
  311. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +5 -0
  312. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +5 -0
  313. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +5 -0
  314. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +5 -0
  315. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +5 -0
  316. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +5 -0
  317. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +5 -0
  318. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +5 -0
  319. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +5 -0
  320. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +5 -0
  321. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +5 -0
  322. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +5 -0
  323. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +5 -0
  324. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +78 -0
  325. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu +5 -0
  326. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu +5 -0
  327. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu +5 -0
  328. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu +5 -0
  329. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu +5 -0
  330. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu +5 -0
  331. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu +5 -0
  332. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu +5 -0
  333. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu +5 -0
  334. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu +5 -0
  335. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu +5 -0
  336. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu +5 -0
  337. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu +5 -0
  338. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu +5 -0
  339. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu +5 -0
  340. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu +5 -0
  341. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu +5 -0
  342. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu +5 -0
  343. package/vendor/whisper.cpp/ggml/src/ggml-cuda/tsembd.cu +47 -0
  344. package/vendor/whisper.cpp/ggml/src/ggml-cuda/tsembd.cuh +5 -0
  345. package/vendor/whisper.cpp/ggml/src/ggml-cuda/unary.cu +289 -0
  346. package/vendor/whisper.cpp/ggml/src/ggml-cuda/unary.cuh +59 -0
  347. package/vendor/whisper.cpp/ggml/src/ggml-cuda/upscale.cu +51 -0
  348. package/vendor/whisper.cpp/ggml/src/ggml-cuda/upscale.cuh +5 -0
  349. package/vendor/whisper.cpp/ggml/src/ggml-cuda/vecdotq.cuh +1135 -0
  350. package/vendor/whisper.cpp/ggml/src/ggml-cuda/vendors/cuda.h +15 -0
  351. package/vendor/whisper.cpp/ggml/src/ggml-cuda/vendors/hip.h +243 -0
  352. package/vendor/whisper.cpp/ggml/src/ggml-cuda/vendors/musa.h +140 -0
  353. package/vendor/whisper.cpp/ggml/src/ggml-cuda/wkv.cu +199 -0
  354. package/vendor/whisper.cpp/ggml/src/ggml-cuda/wkv.cuh +7 -0
  355. package/vendor/whisper.cpp/ggml/src/ggml-hip/CMakeLists.txt +135 -0
  356. package/vendor/whisper.cpp/ggml/src/ggml-impl.h +603 -0
  357. package/vendor/whisper.cpp/ggml/src/ggml-kompute/CMakeLists.txt +166 -0
  358. package/vendor/whisper.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +2251 -0
  359. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/common.comp +112 -0
  360. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +58 -0
  361. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +25 -0
  362. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +52 -0
  363. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +52 -0
  364. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +52 -0
  365. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +52 -0
  366. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +30 -0
  367. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +22 -0
  368. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +17 -0
  369. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +31 -0
  370. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +31 -0
  371. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +38 -0
  372. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +39 -0
  373. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +44 -0
  374. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +52 -0
  375. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +69 -0
  376. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +51 -0
  377. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +33 -0
  378. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +35 -0
  379. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +140 -0
  380. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +106 -0
  381. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +73 -0
  382. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +52 -0
  383. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +28 -0
  384. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +84 -0
  385. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +21 -0
  386. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +53 -0
  387. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +52 -0
  388. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +52 -0
  389. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +52 -0
  390. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +52 -0
  391. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +19 -0
  392. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +23 -0
  393. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +22 -0
  394. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +72 -0
  395. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +71 -0
  396. package/vendor/whisper.cpp/ggml/src/ggml-metal/CMakeLists.txt +121 -0
  397. package/vendor/whisper.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +622 -0
  398. package/vendor/whisper.cpp/ggml/src/ggml-metal/ggml-metal.m +6023 -0
  399. package/vendor/whisper.cpp/ggml/src/ggml-metal/ggml-metal.metal +7124 -0
  400. package/vendor/whisper.cpp/ggml/src/ggml-musa/CMakeLists.txt +113 -0
  401. package/vendor/whisper.cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
  402. package/vendor/whisper.cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
  403. package/vendor/whisper.cpp/ggml/src/ggml-opencl/CMakeLists.txt +109 -0
  404. package/vendor/whisper.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +6665 -0
  405. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/add.cl +83 -0
  406. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
  407. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
  408. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/concat.cl +109 -0
  409. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
  410. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/cvt.cl +118 -0
  411. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
  412. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/div.cl +72 -0
  413. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
  414. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/gelu.cl +62 -0
  415. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
  416. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
  417. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/get_rows.cl +163 -0
  418. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +72 -0
  419. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
  420. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
  421. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul.cl +79 -0
  422. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
  423. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
  424. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
  425. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
  426. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
  427. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
  428. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
  429. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
  430. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
  431. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
  432. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
  433. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
  434. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k.cl +190 -0
  435. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/norm.cl +81 -0
  436. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/pad.cl +30 -0
  437. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
  438. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/repeat.cl +39 -0
  439. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +96 -0
  440. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/rope.cl +721 -0
  441. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/scale.cl +16 -0
  442. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
  443. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
  444. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +87 -0
  445. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +87 -0
  446. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +86 -0
  447. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +86 -0
  448. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/sub.cl +72 -0
  449. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
  450. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/tanh.cl +63 -0
  451. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/transpose.cl +84 -0
  452. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
  453. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +121 -0
  454. package/vendor/whisper.cpp/ggml/src/ggml-opt.cpp +1037 -0
  455. package/vendor/whisper.cpp/ggml/src/ggml-quants.c +5230 -0
  456. package/vendor/whisper.cpp/ggml/src/ggml-quants.h +100 -0
  457. package/vendor/whisper.cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
  458. package/vendor/whisper.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +1816 -0
  459. package/vendor/whisper.cpp/ggml/src/ggml-sycl/CMakeLists.txt +189 -0
  460. package/vendor/whisper.cpp/ggml/src/ggml-sycl/backend.hpp +37 -0
  461. package/vendor/whisper.cpp/ggml/src/ggml-sycl/binbcast.cpp +344 -0
  462. package/vendor/whisper.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
  463. package/vendor/whisper.cpp/ggml/src/ggml-sycl/common.cpp +83 -0
  464. package/vendor/whisper.cpp/ggml/src/ggml-sycl/common.hpp +584 -0
  465. package/vendor/whisper.cpp/ggml/src/ggml-sycl/concat.cpp +182 -0
  466. package/vendor/whisper.cpp/ggml/src/ggml-sycl/concat.hpp +20 -0
  467. package/vendor/whisper.cpp/ggml/src/ggml-sycl/conv.cpp +95 -0
  468. package/vendor/whisper.cpp/ggml/src/ggml-sycl/conv.hpp +20 -0
  469. package/vendor/whisper.cpp/ggml/src/ggml-sycl/convert.cpp +575 -0
  470. package/vendor/whisper.cpp/ggml/src/ggml-sycl/convert.hpp +34 -0
  471. package/vendor/whisper.cpp/ggml/src/ggml-sycl/cpy.cpp +839 -0
  472. package/vendor/whisper.cpp/ggml/src/ggml-sycl/cpy.hpp +11 -0
  473. package/vendor/whisper.cpp/ggml/src/ggml-sycl/dequantize.hpp +823 -0
  474. package/vendor/whisper.cpp/ggml/src/ggml-sycl/dmmv.cpp +1144 -0
  475. package/vendor/whisper.cpp/ggml/src/ggml-sycl/dmmv.hpp +27 -0
  476. package/vendor/whisper.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +2987 -0
  477. package/vendor/whisper.cpp/ggml/src/ggml-sycl/element_wise.cpp +1511 -0
  478. package/vendor/whisper.cpp/ggml/src/ggml-sycl/element_wise.hpp +77 -0
  479. package/vendor/whisper.cpp/ggml/src/ggml-sycl/gemm.hpp +102 -0
  480. package/vendor/whisper.cpp/ggml/src/ggml-sycl/getrows.cpp +212 -0
  481. package/vendor/whisper.cpp/ggml/src/ggml-sycl/getrows.hpp +20 -0
  482. package/vendor/whisper.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +4608 -0
  483. package/vendor/whisper.cpp/ggml/src/ggml-sycl/gla.cpp +106 -0
  484. package/vendor/whisper.cpp/ggml/src/ggml-sycl/gla.hpp +8 -0
  485. package/vendor/whisper.cpp/ggml/src/ggml-sycl/im2col.cpp +136 -0
  486. package/vendor/whisper.cpp/ggml/src/ggml-sycl/im2col.hpp +21 -0
  487. package/vendor/whisper.cpp/ggml/src/ggml-sycl/mmq.cpp +3010 -0
  488. package/vendor/whisper.cpp/ggml/src/ggml-sycl/mmq.hpp +33 -0
  489. package/vendor/whisper.cpp/ggml/src/ggml-sycl/mmvq.cpp +1065 -0
  490. package/vendor/whisper.cpp/ggml/src/ggml-sycl/mmvq.hpp +27 -0
  491. package/vendor/whisper.cpp/ggml/src/ggml-sycl/norm.cpp +482 -0
  492. package/vendor/whisper.cpp/ggml/src/ggml-sycl/norm.hpp +26 -0
  493. package/vendor/whisper.cpp/ggml/src/ggml-sycl/outprod.cpp +47 -0
  494. package/vendor/whisper.cpp/ggml/src/ggml-sycl/outprod.hpp +10 -0
  495. package/vendor/whisper.cpp/ggml/src/ggml-sycl/presets.hpp +74 -0
  496. package/vendor/whisper.cpp/ggml/src/ggml-sycl/quants.hpp +111 -0
  497. package/vendor/whisper.cpp/ggml/src/ggml-sycl/rope.cpp +472 -0
  498. package/vendor/whisper.cpp/ggml/src/ggml-sycl/rope.hpp +20 -0
  499. package/vendor/whisper.cpp/ggml/src/ggml-sycl/softmax.cpp +261 -0
  500. package/vendor/whisper.cpp/ggml/src/ggml-sycl/softmax.hpp +20 -0
  501. package/vendor/whisper.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +13 -0
  502. package/vendor/whisper.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +23 -0
  503. package/vendor/whisper.cpp/ggml/src/ggml-sycl/tsembd.cpp +67 -0
  504. package/vendor/whisper.cpp/ggml/src/ggml-sycl/tsembd.hpp +20 -0
  505. package/vendor/whisper.cpp/ggml/src/ggml-sycl/vecdotq.hpp +1307 -0
  506. package/vendor/whisper.cpp/ggml/src/ggml-sycl/wkv.cpp +289 -0
  507. package/vendor/whisper.cpp/ggml/src/ggml-sycl/wkv.hpp +10 -0
  508. package/vendor/whisper.cpp/ggml/src/ggml-threading.cpp +12 -0
  509. package/vendor/whisper.cpp/ggml/src/ggml-threading.h +14 -0
  510. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +189 -0
  511. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/cmake/host-toolchain.cmake.in +15 -0
  512. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +10937 -0
  513. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +27 -0
  514. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +29 -0
  515. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +29 -0
  516. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +51 -0
  517. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +69 -0
  518. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +17 -0
  519. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +41 -0
  520. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +49 -0
  521. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +105 -0
  522. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
  523. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +23 -0
  524. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +51 -0
  525. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +242 -0
  526. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +17 -0
  527. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +31 -0
  528. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +20 -0
  529. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp +462 -0
  530. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp +699 -0
  531. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_head.comp +13 -0
  532. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +42 -0
  533. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +35 -0
  534. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +44 -0
  535. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +43 -0
  536. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +48 -0
  537. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +39 -0
  538. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +49 -0
  539. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +32 -0
  540. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +34 -0
  541. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +34 -0
  542. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +42 -0
  543. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +30 -0
  544. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +32 -0
  545. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +68 -0
  546. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +34 -0
  547. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +35 -0
  548. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +70 -0
  549. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +33 -0
  550. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +31 -0
  551. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +34 -0
  552. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +27 -0
  553. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +337 -0
  554. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +162 -0
  555. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +360 -0
  556. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +267 -0
  557. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +59 -0
  558. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +25 -0
  559. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +23 -0
  560. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +64 -0
  561. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_head.comp +9 -0
  562. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_unary_head.comp +76 -0
  563. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +33 -0
  564. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +41 -0
  565. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +66 -0
  566. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +100 -0
  567. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +41 -0
  568. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +22 -0
  569. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +27 -0
  570. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_split_k_reduce.comp +48 -0
  571. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +169 -0
  572. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp +118 -0
  573. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +82 -0
  574. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +79 -0
  575. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +90 -0
  576. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +87 -0
  577. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +87 -0
  578. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +90 -0
  579. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +88 -0
  580. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +118 -0
  581. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +154 -0
  582. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +130 -0
  583. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +132 -0
  584. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +136 -0
  585. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +167 -0
  586. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +130 -0
  587. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +868 -0
  588. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +441 -0
  589. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +442 -0
  590. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +99 -0
  591. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +44 -0
  592. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +42 -0
  593. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +28 -0
  594. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +74 -0
  595. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +77 -0
  596. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +21 -0
  597. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +26 -0
  598. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +37 -0
  599. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +52 -0
  600. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +55 -0
  601. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +58 -0
  602. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +60 -0
  603. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +43 -0
  604. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +43 -0
  605. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +47 -0
  606. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +24 -0
  607. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +20 -0
  608. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +22 -0
  609. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +26 -0
  610. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +17 -0
  611. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +173 -0
  612. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +50 -0
  613. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +17 -0
  614. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +29 -0
  615. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +37 -0
  616. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +20 -0
  617. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_bfloat16_support.comp +7 -0
  618. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat2_support.comp +7 -0
  619. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat_support.comp +7 -0
  620. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_integer_dot_support.comp +7 -0
  621. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +41 -0
  622. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/types.comp +1373 -0
  623. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +36 -0
  624. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +753 -0
  625. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/wkv6.comp +87 -0
  626. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/wkv7.comp +91 -0
  627. package/vendor/whisper.cpp/ggml/src/ggml.c +6601 -0
  628. package/vendor/whisper.cpp/ggml/src/ggml.cpp +26 -0
  629. package/vendor/whisper.cpp/ggml/src/gguf.cpp +1347 -0
  630. package/vendor/whisper.cpp/include/whisper.h +738 -0
  631. package/vendor/whisper.cpp/src/CMakeLists.txt +145 -0
  632. package/vendor/whisper.cpp/src/coreml/whisper-compat.h +10 -0
  633. package/vendor/whisper.cpp/src/coreml/whisper-compat.m +35 -0
  634. package/vendor/whisper.cpp/src/coreml/whisper-decoder-impl.h +158 -0
  635. package/vendor/whisper.cpp/src/coreml/whisper-decoder-impl.m +227 -0
  636. package/vendor/whisper.cpp/src/coreml/whisper-encoder-impl.h +154 -0
  637. package/vendor/whisper.cpp/src/coreml/whisper-encoder-impl.m +223 -0
  638. package/vendor/whisper.cpp/src/coreml/whisper-encoder.h +26 -0
  639. package/vendor/whisper.cpp/src/coreml/whisper-encoder.mm +73 -0
  640. package/vendor/whisper.cpp/src/openvino/whisper-openvino-encoder.cpp +108 -0
  641. package/vendor/whisper.cpp/src/openvino/whisper-openvino-encoder.h +31 -0
  642. package/vendor/whisper.cpp/src/whisper-arch.h +197 -0
  643. package/vendor/whisper.cpp/src/whisper.cpp +8969 -0
@@ -0,0 +1,30 @@
1
+ file(GLOB SRC_FILES
2
+ get_row_f32.cpp
3
+ get_row_f16.cpp
4
+ get_row_q4_0.cpp
5
+ get_row_q8_0.cpp
6
+ quantize_f32_q8_0.cpp
7
+ quantize_f16_q8_0.cpp
8
+ quantize_float_to_q4_0.cpp
9
+ dup.cpp
10
+ )
11
+
12
+ set(ASCEND_CANN_PACKAGE_PATH ${CANN_INSTALL_DIR})
13
+ set(RUN_MODE "npu" CACHE STRING "run mode: npu/sim")
14
+
15
+ if(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
16
+ set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
17
+ elseif(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/ascendc_devkit/tikcpp/samples/cmake)
18
+ set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/ascendc_devkit/tikcpp/samples/cmake)
19
+ else()
20
+ message(FATAL_ERROR "ascendc_kernel_cmake does not exist, please check whether the compiler package is installed.")
21
+ endif()
22
+ include(${ASCENDC_CMAKE_DIR}/ascendc.cmake)
23
+
24
+ ascendc_library(ascendc_kernels STATIC
25
+ ${SRC_FILES}
26
+ )
27
+
28
+ message(STATUS "CANN: compile ascend kernels witch SOC_TYPE:${SOC_TYPE}, SOC_VERSION:${SOC_VERSION}, compile macro:-D${SOC_TYPE_COMPILE_OPTION}.")
29
+ ascendc_compile_definitions(ascendc_kernels PRIVATE "-D${SOC_TYPE_COMPILE_OPTION}")
30
+ # ascendc_compile_definitions(ascendc_kernels PRIVATE -DASCENDC_DUMP)
@@ -0,0 +1,19 @@
1
+ #ifndef ASCENDC_KERNELS_H
2
+ #define ASCENDC_KERNELS_H
3
+
4
+ #include "aclrtlaunch_ascendc_get_row_f32.h"
5
+ #include "aclrtlaunch_ascendc_get_row_f16.h"
6
+ #include "aclrtlaunch_ascendc_get_row_q8_0.h"
7
+ #include "aclrtlaunch_ascendc_get_row_q4_0.h"
8
+
9
+ #include "aclrtlaunch_ascendc_quantize_f32_q8_0.h"
10
+ #include "aclrtlaunch_ascendc_quantize_f16_q8_0.h"
11
+ #include "aclrtlaunch_ascendc_quantize_f16_to_q4_0.h"
12
+ #include "aclrtlaunch_ascendc_quantize_f32_to_q4_0.h"
13
+
14
+ #include "aclrtlaunch_ascendc_dup_by_rows_fp16.h"
15
+ #include "aclrtlaunch_ascendc_dup_by_rows_fp32.h"
16
+ #include "aclrtlaunch_ascendc_dup_by_rows_fp32_to_fp16.h"
17
+ #include "aclrtlaunch_ascendc_dup_by_rows_fp16_to_fp32.h"
18
+
19
+ #endif // ASCENDC_KERNELS_H
@@ -0,0 +1,234 @@
1
+ #include "kernel_operator.h"
2
+
3
+ using namespace AscendC;
4
+
5
+ #define BUFFER_NUM 2
6
+ const int64_t SUPPORTED_MAX_DIM = 65535; // currently the limit of max block dim supportted by dup kernel is 65535template <typename SRC_T, typename DST_T>
7
+
8
+ template <typename SRC_T, typename DST_T>
9
+ class DupByRows {
10
+ public:
11
+ __aicore__ inline DupByRows() {}
12
+ __aicore__ inline void init(GM_ADDR src, GM_ADDR dst, int64_t *input_ne_ub,
13
+ size_t *input_nb_ub) {
14
+ /* Dup by rows when src is contigous on first dimension and dst is
15
+ contiguous, each kernel process one row.
16
+ */
17
+
18
+ // Input has four dims.
19
+ int64_t op_block_num = GetBlockNum();
20
+ int64_t op_block_idx = GetBlockIdx();
21
+
22
+ // param
23
+ num_rows = input_ne_ub[1] * input_ne_ub[2] * input_ne_ub[3];
24
+ num_elem = input_ne_ub[0];
25
+
26
+ // index for (ne[1], ne[2], ne[3]): (idx_ne1, idx_ne2, idx_ne3)
27
+ idx_ne3 = op_block_idx / (input_ne_ub[1] * input_ne_ub[2]);
28
+ idx_ne2 = (op_block_idx - idx_ne3 * (input_ne_ub[1] * input_ne_ub[2]))
29
+ / (input_ne_ub[1]);
30
+ idx_ne1 = op_block_idx - idx_ne3 * (input_ne_ub[1] * input_ne_ub[2])
31
+ - idx_ne2 * input_ne_ub[1];
32
+
33
+ // src may not contiguous in dim [1,2,3], so stride decited by ne&nb
34
+ src_stride = input_nb_ub[3] * idx_ne3 + input_nb_ub[2] * idx_ne2
35
+ + input_nb_ub[1] * idx_ne1;
36
+
37
+ // dst is contiguous
38
+ dst_stride = op_block_idx * (input_ne_ub[0] * sizeof(DST_T));
39
+
40
+ src_gm.SetGlobalBuffer(reinterpret_cast<__gm__ SRC_T *>(src +
41
+ src_stride));
42
+ dst_gm.SetGlobalBuffer(reinterpret_cast<__gm__ DST_T *>(dst +
43
+ dst_stride));
44
+
45
+ pipe.InitBuffer(src_queue, BUFFER_NUM, (sizeof(SRC_T) * num_elem +
46
+ 32 - 1) / 32 * 32);
47
+ pipe.InitBuffer(dst_queue, BUFFER_NUM, (sizeof(DST_T) * num_elem +
48
+ 32 - 1) / 32 * 32);
49
+ }
50
+
51
+ __aicore__ inline void copy_in() {
52
+ LocalTensor<SRC_T> src_local = src_queue.AllocTensor<SRC_T>();
53
+ const size_t elem_per_block = 32 / sizeof(SRC_T);
54
+ size_t tail = num_elem % elem_per_block;
55
+ size_t cpy_elements_len = tail > 0 ? num_elem + 1 : num_elem;
56
+ DataCopy(src_local, src_gm, cpy_elements_len);
57
+ src_queue.EnQue(src_local);
58
+ }
59
+
60
+ __aicore__ inline void copy_out() {
61
+ LocalTensor<DST_T> dst_local = dst_queue.DeQue<DST_T>();
62
+ #ifdef ASCEND_310P
63
+ const size_t elem_per_block = 32 / sizeof(DST_T);
64
+ size_t tail = num_elem % elem_per_block;
65
+ size_t len = num_elem & ~(elem_per_block - 1);
66
+ if (len > 0) {
67
+ DataCopy(dst_gm, dst_local, len);
68
+ }
69
+ if(tail != 0) {
70
+ for (size_t i = tail; i < elem_per_block; i++) {
71
+ dst_local[len + i].SetValue(0, 0);
72
+ }
73
+ SetAtomicAdd<float>();
74
+ DataCopy(dst_gm[len], dst_local[len], elem_per_block);
75
+ SetAtomicNone();
76
+ }
77
+ #else
78
+ DataCopyExtParams dataCopyParams;
79
+ dataCopyParams.blockCount = 1;
80
+ dataCopyParams.blockLen = num_elem * sizeof(DST_T);
81
+ DataCopyPad(dst_gm, dst_local, dataCopyParams);
82
+ #endif
83
+ dst_queue.FreeTensor(dst_local);
84
+ }
85
+
86
+ __aicore__ inline void dup() {
87
+ // main process, copy one row data from src to dst.
88
+ copy_in();
89
+
90
+ LocalTensor<SRC_T> src_local = src_queue.DeQue<SRC_T>();
91
+ LocalTensor<DST_T> dst_local = dst_queue.AllocTensor<DST_T>();
92
+
93
+ int32_t BLOCK_NUM = 32 / sizeof(DST_T);
94
+ DataCopy(dst_local, src_local, (num_elem + BLOCK_NUM - 1)
95
+ / BLOCK_NUM * BLOCK_NUM);
96
+ dst_queue.EnQue<DST_T>(dst_local);
97
+
98
+ src_queue.FreeTensor(src_local);
99
+ copy_out();
100
+ }
101
+
102
+ __aicore__ inline void dup_with_cast() {
103
+ // main process, copy one row data from src to dst.
104
+ // cast dtype from src to dst.
105
+ copy_in();
106
+
107
+ LocalTensor<SRC_T> src_local = src_queue.DeQue<SRC_T>();
108
+ LocalTensor<DST_T> dst_local = dst_queue.AllocTensor<DST_T>();
109
+
110
+ Cast(dst_local, src_local, RoundMode::CAST_NONE, num_elem);
111
+ dst_queue.EnQue<DST_T>(dst_local);
112
+
113
+ src_queue.FreeTensor(src_local);
114
+ copy_out();
115
+ }
116
+
117
+ private:
118
+
119
+ TPipe pipe;
120
+ GlobalTensor<SRC_T> src_gm;
121
+ GlobalTensor<DST_T> dst_gm;
122
+
123
+ int64_t num_rows;
124
+ int64_t num_elem;
125
+ int64_t idx_ne3;
126
+ int64_t idx_ne2;
127
+ int64_t idx_ne1;
128
+ int64_t src_stride;
129
+ int64_t dst_stride;
130
+
131
+ TQue<QuePosition::VECIN, BUFFER_NUM> src_queue;
132
+ TQue<QuePosition::VECOUT, BUFFER_NUM> dst_queue;
133
+ };
134
+
135
+ template <typename T>
136
+ __aicore__ inline void copy_to_ub(GM_ADDR gm, T *ub, size_t size) {
137
+ auto gm_ptr = (__gm__ uint8_t *)gm;
138
+ auto ub_ptr = (uint8_t *)(ub);
139
+ for (int32_t i = 0; i < size; ++i, ++ub_ptr, ++gm_ptr) {
140
+ *ub_ptr = *gm_ptr;
141
+ }
142
+ }
143
+
144
+ extern "C" __global__ __aicore__ void ascendc_dup_by_rows_fp16(
145
+ GM_ADDR src_gm,
146
+ GM_ADDR dst_gm,
147
+ GM_ADDR input_ne_gm,
148
+ GM_ADDR input_nb_gm,
149
+ GM_ADDR output_ne_gm,
150
+ GM_ADDR output_nb_gm) {
151
+
152
+ int64_t input_ne_ub[4];
153
+ size_t input_nb_ub[4];
154
+ int64_t output_ne_ub[4];
155
+ size_t output_nb_ub[4];
156
+
157
+ copy_to_ub(input_ne_gm, input_ne_ub, 32);
158
+ copy_to_ub(input_nb_gm, input_nb_ub, 32);
159
+ copy_to_ub(output_ne_gm, output_ne_ub, 32);
160
+ copy_to_ub(output_nb_gm, output_nb_ub, 32);
161
+
162
+ DupByRows<half, half> op;
163
+ op.init(src_gm, dst_gm, input_ne_ub, input_nb_ub);
164
+ op.dup();
165
+ }
166
+
167
+ extern "C" __global__ __aicore__ void ascendc_dup_by_rows_fp32(
168
+ GM_ADDR src_gm,
169
+ GM_ADDR dst_gm,
170
+ GM_ADDR input_ne_gm,
171
+ GM_ADDR input_nb_gm,
172
+ GM_ADDR output_ne_gm,
173
+ GM_ADDR output_nb_gm) {
174
+ int64_t input_ne_ub[4];
175
+ size_t input_nb_ub[4];
176
+ int64_t output_ne_ub[4];
177
+ size_t output_nb_ub[4];
178
+
179
+ copy_to_ub(input_ne_gm, input_ne_ub, 32);
180
+ copy_to_ub(input_nb_gm, input_nb_ub, 32);
181
+ copy_to_ub(output_ne_gm, output_ne_ub, 32);
182
+ copy_to_ub(output_nb_gm, output_nb_ub, 32);
183
+
184
+ DupByRows<float, float> op;
185
+ op.init(src_gm, dst_gm, input_ne_ub, input_nb_ub);
186
+ op.dup();
187
+ }
188
+
189
+ extern "C" __global__ __aicore__ void ascendc_dup_by_rows_fp32_to_fp16(
190
+ GM_ADDR src_gm,
191
+ GM_ADDR dst_gm,
192
+ GM_ADDR input_ne_gm,
193
+ GM_ADDR input_nb_gm,
194
+ GM_ADDR output_ne_gm,
195
+ GM_ADDR output_nb_gm) {
196
+
197
+ int64_t input_ne_ub[4];
198
+ size_t input_nb_ub[4];
199
+ int64_t output_ne_ub[4];
200
+ size_t output_nb_ub[4];
201
+
202
+ copy_to_ub(input_ne_gm, input_ne_ub, 32);
203
+ copy_to_ub(input_nb_gm, input_nb_ub, 32);
204
+ copy_to_ub(output_ne_gm, output_ne_ub, 32);
205
+ copy_to_ub(output_nb_gm, output_nb_ub, 32);
206
+
207
+ DupByRows<float, half> op;
208
+ op.init(src_gm, dst_gm, input_ne_ub, input_nb_ub);
209
+ op.dup_with_cast();
210
+ }
211
+
212
+ extern "C" __global__ __aicore__ void ascendc_dup_by_rows_fp16_to_fp32(
213
+ GM_ADDR src_gm,
214
+ GM_ADDR dst_gm,
215
+ GM_ADDR input_ne_gm,
216
+ GM_ADDR input_nb_gm,
217
+ GM_ADDR output_ne_gm,
218
+ GM_ADDR output_nb_gm) {
219
+
220
+ // copy params from gm to ub.
221
+ int64_t input_ne_ub[4];
222
+ size_t input_nb_ub[4];
223
+ int64_t output_ne_ub[4];
224
+ size_t output_nb_ub[4];
225
+
226
+ copy_to_ub(input_ne_gm, input_ne_ub, 32);
227
+ copy_to_ub(input_nb_gm, input_nb_ub, 32);
228
+ copy_to_ub(output_ne_gm, output_ne_ub, 32);
229
+ copy_to_ub(output_nb_gm, output_nb_ub, 32);
230
+
231
+ DupByRows<half, float> op;
232
+ op.init(src_gm, dst_gm, input_ne_ub, input_nb_ub);
233
+ op.dup_with_cast();
234
+ }
@@ -0,0 +1,197 @@
1
+ #include "kernel_operator.h"
2
+
3
+ // optimize me. Use template to avoid copy code.
4
+ using namespace AscendC;
5
+
6
+ #define BUFFER_NUM 2
7
+
8
+ class GET_ROW_F16 {
9
+ public:
10
+ __aicore__ inline GET_ROW_F16() {}
11
+ __aicore__ inline void init(GM_ADDR input, GM_ADDR indices, GM_ADDR output,
12
+ int64_t *input_ne_ub, size_t *input_nb_ub,
13
+ int64_t *indices_ne_ub, size_t *indices_nb_ub,
14
+ int64_t *output_ne_ub, size_t *output_nb_ub) {
15
+ // TODO, use template for F16/f32
16
+ int64_t op_block_num = GetBlockNum();
17
+ op_block_idx = GetBlockIdx();
18
+
19
+ for (int i = 0; i < 4; i++) {
20
+ input_ne[i] = input_ne_ub[i];
21
+ input_stride[i] = input_nb_ub[i] / input_nb_ub[0];
22
+
23
+ indices_ne[i] = indices_ne_ub[i];
24
+ indices_stride[i] = indices_nb_ub[i] / indices_nb_ub[0];
25
+
26
+ output_ne[i] = output_ne_ub[i];
27
+ output_stride[i] = output_nb_ub[i] / output_nb_ub[0];
28
+ }
29
+
30
+ // Indices has two dims. n_elements = all rows should get.
31
+ // dr, all rows should this thread get.
32
+ uint64_t n_elements =
33
+ indices_ne[0] * indices_ne[1] * indices_ne[2] * indices_ne[3];
34
+ dr = n_elements / op_block_num;
35
+
36
+ uint64_t tails = n_elements % op_block_num;
37
+ if (op_block_idx < tails) {
38
+ dr += 1;
39
+ ir = dr * op_block_idx;
40
+ } else {
41
+ ir = dr * op_block_idx + tails;
42
+ }
43
+
44
+ input_gm.SetGlobalBuffer((__gm__ half *)input);
45
+ indices_gm.SetGlobalBuffer((__gm__ int32_t *)indices);
46
+ output_gm.SetGlobalBuffer((__gm__ float *)output);
47
+
48
+ uint64_t input_local_buffer_size = ((input_ne[0] * sizeof(half) + 31)
49
+ & ~31);
50
+ uint64_t output_local_buffer_size = ((input_ne[0] * sizeof(float) + 31)
51
+ & ~31);
52
+
53
+ local_buffer_elems = input_local_buffer_size / sizeof(half);
54
+
55
+ // TODO, consider long row that can't put in UB.
56
+ // All data should asign to 32. It's ok because all data is align to 32.
57
+ pipe.InitBuffer(input_queue, BUFFER_NUM, input_local_buffer_size);
58
+ pipe.InitBuffer(output_queue, BUFFER_NUM, output_local_buffer_size);
59
+ }
60
+
61
+ __aicore__ inline void copy_in(uint32_t offset, size_t len) {
62
+ size_t origin_len = len;
63
+ LocalTensor<half> input_local = input_queue.AllocTensor<half>();
64
+ const size_t elem_per_block = 32 / sizeof(half);
65
+ size_t tail = len % elem_per_block;
66
+ len = len & ~(elem_per_block - 1);
67
+ if(tail != 0) {
68
+ len += elem_per_block;
69
+ }
70
+ DataCopy(input_local, input_gm[offset], len);
71
+ input_queue.EnQue(input_local);
72
+ }
73
+
74
+ __aicore__ inline void copy_out(uint32_t offset, size_t len) {
75
+ LocalTensor<float> output_local = output_queue.DeQue<float>();
76
+ const size_t elem_per_block = 32 / sizeof(float);
77
+ size_t tail = len % elem_per_block;
78
+ len = len & ~(elem_per_block - 1);
79
+ if (len > 0) {
80
+ DataCopy(output_gm[offset], output_local, len);
81
+ }
82
+
83
+ if(tail != 0) {
84
+ #ifdef ASCEND_310P
85
+ for (size_t i = tail; i < elem_per_block; i++) {
86
+ output_local[len + i].SetValue(0, 0);
87
+ }
88
+ SetAtomicAdd<float>();
89
+ DataCopy(output_gm[offset + len], output_local[len], elem_per_block);
90
+ SetAtomicNone();
91
+ #else
92
+ DataCopyExtParams dataCopyParams;
93
+ dataCopyParams.blockCount = 1;
94
+ dataCopyParams.blockLen = tail * sizeof(float);
95
+ DataCopyPad(output_gm[offset + len], output_local[len],
96
+ dataCopyParams);
97
+ #endif
98
+ }
99
+ output_queue.FreeTensor(output_local);
100
+ }
101
+
102
+ __aicore__ inline void calculate_row(int64_t idx) {
103
+ const int64_t indices_ne2_idx = idx / (indices_ne[0] * indices_ne[1]);
104
+ const int64_t indices_ne1_idx =
105
+ (idx - indices_ne2_idx * indices_ne[0] * indices_ne[1]) /
106
+ indices_ne[0];
107
+ const int64_t indices_ne0_idx =
108
+ (idx - indices_ne2_idx * indices_ne[0] * indices_ne[1] -
109
+ indices_ne1_idx * indices_ne[0]);
110
+
111
+ const int64_t indices_offset = indices_ne0_idx * indices_stride[0] +
112
+ indices_ne1_idx * indices_stride[1] +
113
+ indices_ne2_idx * indices_stride[2];
114
+ const int32_t selected_row_idx = indices_gm.GetValue(indices_offset);
115
+
116
+ const int64_t input_offset = selected_row_idx * input_stride[1] +
117
+ indices_ne1_idx * input_stride[2] +
118
+ indices_ne2_idx * input_stride[3];
119
+
120
+ const int64_t output_offset = indices_ne0_idx * output_stride[1] +
121
+ indices_ne1_idx * output_stride[2] +
122
+ indices_ne2_idx * output_stride[3];
123
+
124
+ copy_in(input_offset, input_ne[0]);
125
+ LocalTensor<half> input_local = input_queue.DeQue<half>();
126
+ LocalTensor<float> output_local = output_queue.AllocTensor<float>();
127
+
128
+ Cast(output_local, input_local, RoundMode::CAST_NONE,
129
+ local_buffer_elems);
130
+ output_queue.EnQue(output_local);
131
+ copy_out(output_offset, input_ne[0]);
132
+
133
+ input_queue.FreeTensor(input_local);
134
+ }
135
+
136
+ __aicore__ inline void calculate() {
137
+ for (int64_t i = ir; i < ir + dr; i++) {
138
+ calculate_row(i);
139
+ }
140
+ }
141
+
142
+ private:
143
+ int64_t input_ne[4];
144
+ size_t input_stride[4];
145
+
146
+ int64_t indices_ne[4];
147
+ size_t indices_stride[4];
148
+
149
+ int64_t output_ne[4];
150
+ size_t output_stride[4];
151
+
152
+ size_t local_buffer_elems;
153
+
154
+ int64_t ir;
155
+ int64_t dr;
156
+
157
+ TPipe pipe;
158
+ GlobalTensor<half> input_gm;
159
+ GlobalTensor<int32_t> indices_gm;
160
+ GlobalTensor<float> output_gm;
161
+ TQue<QuePosition::VECIN, BUFFER_NUM> input_queue;
162
+ TQue<QuePosition::VECOUT, BUFFER_NUM> output_queue;
163
+ int64_t op_block_idx;
164
+ };
165
+
166
+ template <typename T>
167
+ __aicore__ inline void copy_to_ub(GM_ADDR gm, T *ub, size_t size) {
168
+ auto gm_ptr = (__gm__ uint8_t *)gm;
169
+ auto ub_ptr = (uint8_t *)(ub);
170
+ for (int32_t i = 0; i < size; ++i, ++ub_ptr, ++gm_ptr) {
171
+ *ub_ptr = *gm_ptr;
172
+ }
173
+ }
174
+
175
+ extern "C" __global__ __aicore__ void ascendc_get_row_f16(
176
+ GM_ADDR input_gm, GM_ADDR indices_gm, GM_ADDR output_gm,
177
+ GM_ADDR input_ne_gm, GM_ADDR input_nb_gm, GM_ADDR indices_ne_gm,
178
+ GM_ADDR indices_nb_gm, GM_ADDR output_ne_gm, GM_ADDR output_nb_gm) {
179
+ int64_t input_ne_ub[4];
180
+ size_t input_nb_ub[4];
181
+ int64_t indices_ne_ub[4];
182
+ size_t indices_nb_ub[4];
183
+ int64_t output_ne_ub[4];
184
+ size_t output_nb_ub[4];
185
+
186
+ copy_to_ub(input_ne_gm, input_ne_ub, 32);
187
+ copy_to_ub(input_nb_gm, input_nb_ub, 32);
188
+ copy_to_ub(indices_ne_gm, indices_ne_ub, 32);
189
+ copy_to_ub(indices_nb_gm, indices_nb_ub, 32);
190
+ copy_to_ub(output_ne_gm, output_ne_ub, 32);
191
+ copy_to_ub(output_nb_gm, output_nb_ub, 32);
192
+
193
+ GET_ROW_F16 op;
194
+ op.init(input_gm, indices_gm, output_gm, input_ne_ub, input_nb_ub,
195
+ indices_ne_ub, indices_nb_ub, output_ne_ub, output_nb_ub);
196
+ op.calculate();
197
+ }
@@ -0,0 +1,190 @@
1
+ #include "kernel_operator.h"
2
+
3
+ // optimize me. Use template to avoid copy code.
4
+ using namespace AscendC;
5
+
6
+ #define BUFFER_NUM 2
7
+
8
+ class GET_ROW_F32 {
9
+ public:
10
+ __aicore__ inline GET_ROW_F32() {}
11
+ __aicore__ inline void init(GM_ADDR input, GM_ADDR indices, GM_ADDR output,
12
+ int64_t *input_ne_ub, size_t *input_nb_ub,
13
+ int64_t *indices_ne_ub, size_t *indices_nb_ub,
14
+ int64_t *output_ne_ub, size_t *output_nb_ub) {
15
+ int64_t op_block_num = GetBlockNum();
16
+ op_block_idx = GetBlockIdx();
17
+
18
+ for (int i = 0; i < 4; i++) {
19
+ input_ne[i] = input_ne_ub[i];
20
+ input_stride[i] = input_nb_ub[i] / input_nb_ub[0];
21
+
22
+ indices_ne[i] = indices_ne_ub[i];
23
+ indices_stride[i] = indices_nb_ub[i] / indices_nb_ub[0];
24
+
25
+ output_ne[i] = output_ne_ub[i];
26
+ output_stride[i] = output_nb_ub[i] / output_nb_ub[0];
27
+ }
28
+
29
+ // Indices has two dims. n_elements = all rows should get.
30
+ // dr, all rows should this thread get.
31
+ uint64_t n_elements =
32
+ indices_ne[0] * indices_ne[1] * indices_ne[2] * indices_ne[3];
33
+ dr = n_elements / op_block_num;
34
+
35
+ uint64_t tails = n_elements % op_block_num;
36
+ if (op_block_idx < tails) {
37
+ dr += 1;
38
+ ir = dr * op_block_idx;
39
+ } else {
40
+ ir = dr * op_block_idx + tails;
41
+ }
42
+
43
+ input_gm.SetGlobalBuffer((__gm__ float *)input);
44
+ indices_gm.SetGlobalBuffer((__gm__ int32_t *)indices);
45
+ output_gm.SetGlobalBuffer((__gm__ float *)output);
46
+
47
+ uint64_t local_buffer_size = ((input_ne[0] * sizeof(float) + 31) & ~31);
48
+ local_buffer_elems = local_buffer_size / sizeof(float);
49
+
50
+ // TODO, consider long row that can't put in UB.
51
+ // All data should asign to 32. It's ok because all data is align to 32.
52
+ pipe.InitBuffer(input_queue, BUFFER_NUM, local_buffer_size);
53
+ pipe.InitBuffer(output_queue, BUFFER_NUM, local_buffer_size);
54
+ }
55
+
56
+ __aicore__ inline void copy_in(uint32_t offset, size_t len) {
57
+ LocalTensor<float> input_local = input_queue.AllocTensor<float>();
58
+ const size_t elem_per_block = 32 / sizeof(float);
59
+ size_t tail = len % elem_per_block;
60
+ len = len & ~(elem_per_block - 1);
61
+ if(tail != 0) {
62
+ len += elem_per_block;
63
+ }
64
+ DataCopy(input_local, input_gm[offset], len);
65
+ input_queue.EnQue(input_local);
66
+ }
67
+
68
+ __aicore__ inline void copy_out(uint32_t offset, size_t len) {
69
+ LocalTensor<float> output_local = output_queue.DeQue<float>();
70
+ const size_t elem_per_block = 32 / sizeof(float);
71
+ size_t tail = len % elem_per_block;
72
+ len = len & ~(elem_per_block - 1);
73
+ if (len > 0) {
74
+ DataCopy(output_gm[offset], output_local, len);
75
+ }
76
+
77
+ if(tail != 0) {
78
+ #ifdef ASCEND_310P
79
+ for (size_t i = tail; i < elem_per_block; i++) {
80
+ output_local[len + i].SetValue(0, 0);
81
+ }
82
+ SetAtomicAdd<float>();
83
+ DataCopy(output_gm[offset + len], output_local[len], elem_per_block);
84
+ SetAtomicNone();
85
+ #else
86
+ DataCopyExtParams dataCopyParams;
87
+ dataCopyParams.blockCount = 1;
88
+ dataCopyParams.blockLen = tail * sizeof(float);
89
+ DataCopyPad(output_gm[offset + len], output_local[len],
90
+ dataCopyParams);
91
+ #endif
92
+ }
93
+ output_queue.FreeTensor(output_local);
94
+ }
95
+
96
+ __aicore__ inline void calculate_row(int64_t idx) {
97
+ const int64_t indices_ne2_idx = idx / (indices_ne[0] * indices_ne[1]);
98
+ const int64_t indices_ne1_idx =
99
+ (idx - indices_ne2_idx * indices_ne[0] * indices_ne[1]) /
100
+ indices_ne[0];
101
+ const int64_t indices_ne0_idx =
102
+ (idx - indices_ne2_idx * indices_ne[0] * indices_ne[1] -
103
+ indices_ne1_idx * indices_ne[0]);
104
+
105
+ const int64_t indices_offset = indices_ne0_idx * indices_stride[0] +
106
+ indices_ne1_idx * indices_stride[1] +
107
+ indices_ne2_idx * indices_stride[2];
108
+ const int32_t selected_row_idx = indices_gm.GetValue(indices_offset);
109
+
110
+ const int64_t input_offset = selected_row_idx * input_stride[1] +
111
+ indices_ne1_idx * input_stride[2] +
112
+ indices_ne2_idx * input_stride[3];
113
+
114
+ const int64_t output_offset = indices_ne0_idx * output_stride[1] +
115
+ indices_ne1_idx * output_stride[2] +
116
+ indices_ne2_idx * output_stride[3];
117
+
118
+ copy_in(input_offset, input_ne[0]);
119
+ LocalTensor<float> input_local = input_queue.DeQue<float>();
120
+ LocalTensor<float> output_local = output_queue.AllocTensor<float>();
121
+
122
+ DataCopy(output_local, input_local, local_buffer_elems);
123
+ output_queue.EnQue(output_local);
124
+ copy_out(output_offset, input_ne[0]);
125
+
126
+ input_queue.FreeTensor(input_local);
127
+ }
128
+
129
+ __aicore__ inline void calculate() {
130
+ for (int64_t i = ir; i < ir + dr; i++) {
131
+ calculate_row(i);
132
+ }
133
+ }
134
+
135
+ private:
136
+ int64_t input_ne[4];
137
+ size_t input_stride[4];
138
+
139
+ int64_t indices_ne[4];
140
+ size_t indices_stride[4];
141
+
142
+ int64_t output_ne[4];
143
+ size_t output_stride[4];
144
+
145
+ size_t local_buffer_elems;
146
+
147
+ int64_t ir;
148
+ int64_t dr;
149
+
150
+ TPipe pipe;
151
+ GlobalTensor<float> input_gm;
152
+ GlobalTensor<int32_t> indices_gm;
153
+ GlobalTensor<float> output_gm;
154
+ TQue<QuePosition::VECIN, BUFFER_NUM> input_queue;
155
+ TQue<QuePosition::VECOUT, BUFFER_NUM> output_queue;
156
+ int64_t op_block_idx;
157
+ };
158
+
159
+ template <typename T>
160
+ __aicore__ inline void copy_to_ub(GM_ADDR gm, T *ub, size_t size) {
161
+ auto gm_ptr = (__gm__ uint8_t *)gm;
162
+ auto ub_ptr = (uint8_t *)(ub);
163
+ for (int32_t i = 0; i < size; ++i, ++ub_ptr, ++gm_ptr) {
164
+ *ub_ptr = *gm_ptr;
165
+ }
166
+ }
167
+
168
+ extern "C" __global__ __aicore__ void ascendc_get_row_f32(
169
+ GM_ADDR input_gm, GM_ADDR indices_gm, GM_ADDR output_gm,
170
+ GM_ADDR input_ne_gm, GM_ADDR input_nb_gm, GM_ADDR indices_ne_gm,
171
+ GM_ADDR indices_nb_gm, GM_ADDR output_ne_gm, GM_ADDR output_nb_gm) {
172
+ int64_t input_ne_ub[4];
173
+ size_t input_nb_ub[4];
174
+ int64_t indices_ne_ub[4];
175
+ size_t indices_nb_ub[4];
176
+ int64_t output_ne_ub[4];
177
+ size_t output_nb_ub[4];
178
+
179
+ copy_to_ub(input_ne_gm, input_ne_ub, 32);
180
+ copy_to_ub(input_nb_gm, input_nb_ub, 32);
181
+ copy_to_ub(indices_ne_gm, indices_ne_ub, 32);
182
+ copy_to_ub(indices_nb_gm, indices_nb_ub, 32);
183
+ copy_to_ub(output_ne_gm, output_ne_ub, 32);
184
+ copy_to_ub(output_nb_gm, output_nb_ub, 32);
185
+
186
+ GET_ROW_F32 op;
187
+ op.init(input_gm, indices_gm, output_gm, input_ne_ub, input_nb_ub,
188
+ indices_ne_ub, indices_nb_ub, output_ne_ub, output_nb_ub);
189
+ op.calculate();
190
+ }