@agency-lang/whisper-local 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (643) hide show
  1. package/CMakeLists.txt +51 -0
  2. package/README.md +145 -0
  3. package/build/Release/whisper_addon.node +0 -0
  4. package/dist/src/addon.d.ts +11 -0
  5. package/dist/src/addon.js +22 -0
  6. package/dist/src/cli.d.ts +2 -0
  7. package/dist/src/cli.js +117 -0
  8. package/dist/src/ffmpeg.d.ts +11 -0
  9. package/dist/src/ffmpeg.js +154 -0
  10. package/dist/src/handleCache.d.ts +9 -0
  11. package/dist/src/handleCache.js +83 -0
  12. package/dist/src/modelManager.d.ts +12 -0
  13. package/dist/src/modelManager.js +172 -0
  14. package/dist/src/packageRoot.d.ts +8 -0
  15. package/dist/src/packageRoot.js +21 -0
  16. package/dist/src/transcribe.d.ts +2 -0
  17. package/dist/src/transcribe.js +36 -0
  18. package/dist/src/types.d.ts +11 -0
  19. package/dist/src/types.js +17 -0
  20. package/index.agency +32 -0
  21. package/models.lock.json +55 -0
  22. package/package.json +52 -0
  23. package/vendor/whisper.cpp/CMakeLists.txt +251 -0
  24. package/vendor/whisper.cpp/LICENSE +21 -0
  25. package/vendor/whisper.cpp/UPSTREAM_SHA256 +1 -0
  26. package/vendor/whisper.cpp/VERSION +1 -0
  27. package/vendor/whisper.cpp/cmake/DefaultTargetOptions.cmake +16 -0
  28. package/vendor/whisper.cpp/cmake/FindFFmpeg.cmake +163 -0
  29. package/vendor/whisper.cpp/cmake/build-info.cmake +60 -0
  30. package/vendor/whisper.cpp/cmake/git-vars.cmake +22 -0
  31. package/vendor/whisper.cpp/cmake/whisper-config.cmake.in +65 -0
  32. package/vendor/whisper.cpp/cmake/whisper.pc.in +10 -0
  33. package/vendor/whisper.cpp/ggml/CMakeLists.txt +434 -0
  34. package/vendor/whisper.cpp/ggml/cmake/BuildTypes.cmake +54 -0
  35. package/vendor/whisper.cpp/ggml/cmake/GitVars.cmake +22 -0
  36. package/vendor/whisper.cpp/ggml/cmake/common.cmake +50 -0
  37. package/vendor/whisper.cpp/ggml/cmake/ggml-config.cmake.in +152 -0
  38. package/vendor/whisper.cpp/ggml/include/ggml-alloc.h +76 -0
  39. package/vendor/whisper.cpp/ggml/include/ggml-backend.h +354 -0
  40. package/vendor/whisper.cpp/ggml/include/ggml-blas.h +25 -0
  41. package/vendor/whisper.cpp/ggml/include/ggml-cann.h +123 -0
  42. package/vendor/whisper.cpp/ggml/include/ggml-cpp.h +39 -0
  43. package/vendor/whisper.cpp/ggml/include/ggml-cpu.h +143 -0
  44. package/vendor/whisper.cpp/ggml/include/ggml-cuda.h +47 -0
  45. package/vendor/whisper.cpp/ggml/include/ggml-kompute.h +50 -0
  46. package/vendor/whisper.cpp/ggml/include/ggml-metal.h +66 -0
  47. package/vendor/whisper.cpp/ggml/include/ggml-opencl.h +26 -0
  48. package/vendor/whisper.cpp/ggml/include/ggml-opt.h +237 -0
  49. package/vendor/whisper.cpp/ggml/include/ggml-rpc.h +33 -0
  50. package/vendor/whisper.cpp/ggml/include/ggml-sycl.h +49 -0
  51. package/vendor/whisper.cpp/ggml/include/ggml-vulkan.h +29 -0
  52. package/vendor/whisper.cpp/ggml/include/ggml.h +2221 -0
  53. package/vendor/whisper.cpp/ggml/include/gguf.h +202 -0
  54. package/vendor/whisper.cpp/ggml/src/CMakeLists.txt +404 -0
  55. package/vendor/whisper.cpp/ggml/src/ggml-alloc.c +1042 -0
  56. package/vendor/whisper.cpp/ggml/src/ggml-amx/CMakeLists.txt +107 -0
  57. package/vendor/whisper.cpp/ggml/src/ggml-amx/common.h +94 -0
  58. package/vendor/whisper.cpp/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
  59. package/vendor/whisper.cpp/ggml/src/ggml-amx/mmq.cpp +2510 -0
  60. package/vendor/whisper.cpp/ggml/src/ggml-amx/mmq.h +17 -0
  61. package/vendor/whisper.cpp/ggml/src/ggml-backend-impl.h +255 -0
  62. package/vendor/whisper.cpp/ggml/src/ggml-backend-reg.cpp +591 -0
  63. package/vendor/whisper.cpp/ggml/src/ggml-backend.cpp +2016 -0
  64. package/vendor/whisper.cpp/ggml/src/ggml-blas/CMakeLists.txt +87 -0
  65. package/vendor/whisper.cpp/ggml/src/ggml-blas/ggml-blas.cpp +517 -0
  66. package/vendor/whisper.cpp/ggml/src/ggml-cann/CMakeLists.txt +75 -0
  67. package/vendor/whisper.cpp/ggml/src/ggml-cann/Doxyfile +2579 -0
  68. package/vendor/whisper.cpp/ggml/src/ggml-cann/acl_tensor.cpp +181 -0
  69. package/vendor/whisper.cpp/ggml/src/ggml-cann/acl_tensor.h +258 -0
  70. package/vendor/whisper.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +3193 -0
  71. package/vendor/whisper.cpp/ggml/src/ggml-cann/aclnn_ops.h +1125 -0
  72. package/vendor/whisper.cpp/ggml/src/ggml-cann/common.h +425 -0
  73. package/vendor/whisper.cpp/ggml/src/ggml-cann/ggml-cann.cpp +2630 -0
  74. package/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +30 -0
  75. package/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +19 -0
  76. package/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/dup.cpp +234 -0
  77. package/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +197 -0
  78. package/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +190 -0
  79. package/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +204 -0
  80. package/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
  81. package/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +218 -0
  82. package/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +216 -0
  83. package/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +295 -0
  84. package/vendor/whisper.cpp/ggml/src/ggml-common.h +1861 -0
  85. package/vendor/whisper.cpp/ggml/src/ggml-cpu/CMakeLists.txt +584 -0
  86. package/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/amx.cpp +221 -0
  87. package/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
  88. package/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
  89. package/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
  90. package/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  91. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
  92. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4113 -0
  93. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +2162 -0
  94. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2638 -0
  95. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
  96. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2731 -0
  97. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2068 -0
  98. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +396 -0
  99. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1299 -0
  100. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1480 -0
  101. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  102. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +4310 -0
  103. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +3284 -0
  104. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch-fallback.h +184 -0
  105. package/vendor/whisper.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
  106. package/vendor/whisper.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
  107. package/vendor/whisper.cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
  108. package/vendor/whisper.cpp/ggml/src/ggml-cpu/common.h +72 -0
  109. package/vendor/whisper.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +511 -0
  110. package/vendor/whisper.cpp/ggml/src/ggml-cpu/ggml-cpu.c +3473 -0
  111. package/vendor/whisper.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +671 -0
  112. package/vendor/whisper.cpp/ggml/src/ggml-cpu/hbm.cpp +55 -0
  113. package/vendor/whisper.cpp/ggml/src/ggml-cpu/hbm.h +8 -0
  114. package/vendor/whisper.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +337 -0
  115. package/vendor/whisper.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +95 -0
  116. package/vendor/whisper.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +482 -0
  117. package/vendor/whisper.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
  118. package/vendor/whisper.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +3593 -0
  119. package/vendor/whisper.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +19 -0
  120. package/vendor/whisper.cpp/ggml/src/ggml-cpu/ops.cpp +9085 -0
  121. package/vendor/whisper.cpp/ggml/src/ggml-cpu/ops.h +111 -0
  122. package/vendor/whisper.cpp/ggml/src/ggml-cpu/quants.c +1157 -0
  123. package/vendor/whisper.cpp/ggml/src/ggml-cpu/quants.h +89 -0
  124. package/vendor/whisper.cpp/ggml/src/ggml-cpu/repack.cpp +1570 -0
  125. package/vendor/whisper.cpp/ggml/src/ggml-cpu/repack.h +98 -0
  126. package/vendor/whisper.cpp/ggml/src/ggml-cpu/simd-mappings.h +1006 -0
  127. package/vendor/whisper.cpp/ggml/src/ggml-cpu/traits.cpp +36 -0
  128. package/vendor/whisper.cpp/ggml/src/ggml-cpu/traits.h +38 -0
  129. package/vendor/whisper.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
  130. package/vendor/whisper.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
  131. package/vendor/whisper.cpp/ggml/src/ggml-cpu/vec.cpp +321 -0
  132. package/vendor/whisper.cpp/ggml/src/ggml-cpu/vec.h +973 -0
  133. package/vendor/whisper.cpp/ggml/src/ggml-cuda/CMakeLists.txt +184 -0
  134. package/vendor/whisper.cpp/ggml/src/ggml-cuda/acc.cu +61 -0
  135. package/vendor/whisper.cpp/ggml/src/ggml-cuda/acc.cuh +5 -0
  136. package/vendor/whisper.cpp/ggml/src/ggml-cuda/arange.cu +34 -0
  137. package/vendor/whisper.cpp/ggml/src/ggml-cuda/arange.cuh +5 -0
  138. package/vendor/whisper.cpp/ggml/src/ggml-cuda/argmax.cu +91 -0
  139. package/vendor/whisper.cpp/ggml/src/ggml-cuda/argmax.cuh +3 -0
  140. package/vendor/whisper.cpp/ggml/src/ggml-cuda/argsort.cu +104 -0
  141. package/vendor/whisper.cpp/ggml/src/ggml-cuda/argsort.cuh +3 -0
  142. package/vendor/whisper.cpp/ggml/src/ggml-cuda/binbcast.cu +363 -0
  143. package/vendor/whisper.cpp/ggml/src/ggml-cuda/binbcast.cuh +9 -0
  144. package/vendor/whisper.cpp/ggml/src/ggml-cuda/clamp.cu +45 -0
  145. package/vendor/whisper.cpp/ggml/src/ggml-cuda/clamp.cuh +5 -0
  146. package/vendor/whisper.cpp/ggml/src/ggml-cuda/common.cuh +812 -0
  147. package/vendor/whisper.cpp/ggml/src/ggml-cuda/concat.cu +221 -0
  148. package/vendor/whisper.cpp/ggml/src/ggml-cuda/concat.cuh +5 -0
  149. package/vendor/whisper.cpp/ggml/src/ggml-cuda/conv-transpose-1d.cu +89 -0
  150. package/vendor/whisper.cpp/ggml/src/ggml-cuda/conv-transpose-1d.cuh +5 -0
  151. package/vendor/whisper.cpp/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
  152. package/vendor/whisper.cpp/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
  153. package/vendor/whisper.cpp/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
  154. package/vendor/whisper.cpp/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
  155. package/vendor/whisper.cpp/ggml/src/ggml-cuda/convert.cu +730 -0
  156. package/vendor/whisper.cpp/ggml/src/ggml-cuda/convert.cuh +26 -0
  157. package/vendor/whisper.cpp/ggml/src/ggml-cuda/count-equal.cu +64 -0
  158. package/vendor/whisper.cpp/ggml/src/ggml-cuda/count-equal.cuh +5 -0
  159. package/vendor/whisper.cpp/ggml/src/ggml-cuda/cp-async.cuh +57 -0
  160. package/vendor/whisper.cpp/ggml/src/ggml-cuda/cpy.cu +705 -0
  161. package/vendor/whisper.cpp/ggml/src/ggml-cuda/cpy.cuh +11 -0
  162. package/vendor/whisper.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cu +189 -0
  163. package/vendor/whisper.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cuh +7 -0
  164. package/vendor/whisper.cpp/ggml/src/ggml-cuda/dequantize.cuh +103 -0
  165. package/vendor/whisper.cpp/ggml/src/ggml-cuda/diagmask.cu +40 -0
  166. package/vendor/whisper.cpp/ggml/src/ggml-cuda/diagmask.cuh +5 -0
  167. package/vendor/whisper.cpp/ggml/src/ggml-cuda/fattn-common.cuh +881 -0
  168. package/vendor/whisper.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +1474 -0
  169. package/vendor/whisper.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu +357 -0
  170. package/vendor/whisper.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cuh +3 -0
  171. package/vendor/whisper.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu +365 -0
  172. package/vendor/whisper.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cuh +3 -0
  173. package/vendor/whisper.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +482 -0
  174. package/vendor/whisper.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +472 -0
  175. package/vendor/whisper.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +634 -0
  176. package/vendor/whisper.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +3 -0
  177. package/vendor/whisper.cpp/ggml/src/ggml-cuda/fattn.cu +346 -0
  178. package/vendor/whisper.cpp/ggml/src/ggml-cuda/fattn.cuh +3 -0
  179. package/vendor/whisper.cpp/ggml/src/ggml-cuda/getrows.cu +275 -0
  180. package/vendor/whisper.cpp/ggml/src/ggml-cuda/getrows.cuh +15 -0
  181. package/vendor/whisper.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +3562 -0
  182. package/vendor/whisper.cpp/ggml/src/ggml-cuda/gla.cu +93 -0
  183. package/vendor/whisper.cpp/ggml/src/ggml-cuda/gla.cuh +3 -0
  184. package/vendor/whisper.cpp/ggml/src/ggml-cuda/im2col.cu +103 -0
  185. package/vendor/whisper.cpp/ggml/src/ggml-cuda/im2col.cuh +5 -0
  186. package/vendor/whisper.cpp/ggml/src/ggml-cuda/mma.cuh +396 -0
  187. package/vendor/whisper.cpp/ggml/src/ggml-cuda/mmq.cu +324 -0
  188. package/vendor/whisper.cpp/ggml/src/ggml-cuda/mmq.cuh +3217 -0
  189. package/vendor/whisper.cpp/ggml/src/ggml-cuda/mmv.cu +336 -0
  190. package/vendor/whisper.cpp/ggml/src/ggml-cuda/mmv.cuh +12 -0
  191. package/vendor/whisper.cpp/ggml/src/ggml-cuda/mmvq.cu +595 -0
  192. package/vendor/whisper.cpp/ggml/src/ggml-cuda/mmvq.cuh +12 -0
  193. package/vendor/whisper.cpp/ggml/src/ggml-cuda/norm.cu +458 -0
  194. package/vendor/whisper.cpp/ggml/src/ggml-cuda/norm.cuh +11 -0
  195. package/vendor/whisper.cpp/ggml/src/ggml-cuda/opt-step-adamw.cu +78 -0
  196. package/vendor/whisper.cpp/ggml/src/ggml-cuda/opt-step-adamw.cuh +5 -0
  197. package/vendor/whisper.cpp/ggml/src/ggml-cuda/out-prod.cu +68 -0
  198. package/vendor/whisper.cpp/ggml/src/ggml-cuda/out-prod.cuh +3 -0
  199. package/vendor/whisper.cpp/ggml/src/ggml-cuda/pad.cu +49 -0
  200. package/vendor/whisper.cpp/ggml/src/ggml-cuda/pad.cuh +5 -0
  201. package/vendor/whisper.cpp/ggml/src/ggml-cuda/pool2d.cu +94 -0
  202. package/vendor/whisper.cpp/ggml/src/ggml-cuda/pool2d.cuh +5 -0
  203. package/vendor/whisper.cpp/ggml/src/ggml-cuda/quantize.cu +190 -0
  204. package/vendor/whisper.cpp/ggml/src/ggml-cuda/quantize.cuh +27 -0
  205. package/vendor/whisper.cpp/ggml/src/ggml-cuda/rope.cu +456 -0
  206. package/vendor/whisper.cpp/ggml/src/ggml-cuda/rope.cuh +7 -0
  207. package/vendor/whisper.cpp/ggml/src/ggml-cuda/scale.cu +31 -0
  208. package/vendor/whisper.cpp/ggml/src/ggml-cuda/scale.cuh +5 -0
  209. package/vendor/whisper.cpp/ggml/src/ggml-cuda/softmax.cu +283 -0
  210. package/vendor/whisper.cpp/ggml/src/ggml-cuda/softmax.cuh +7 -0
  211. package/vendor/whisper.cpp/ggml/src/ggml-cuda/ssm-conv.cu +148 -0
  212. package/vendor/whisper.cpp/ggml/src/ggml-cuda/ssm-conv.cuh +3 -0
  213. package/vendor/whisper.cpp/ggml/src/ggml-cuda/ssm-scan.cu +155 -0
  214. package/vendor/whisper.cpp/ggml/src/ggml-cuda/ssm-scan.cuh +3 -0
  215. package/vendor/whisper.cpp/ggml/src/ggml-cuda/sum.cu +45 -0
  216. package/vendor/whisper.cpp/ggml/src/ggml-cuda/sum.cuh +5 -0
  217. package/vendor/whisper.cpp/ggml/src/ggml-cuda/sumrows.cu +39 -0
  218. package/vendor/whisper.cpp/ggml/src/ggml-cuda/sumrows.cuh +5 -0
  219. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_16.cu +5 -0
  220. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu +10 -0
  221. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu +10 -0
  222. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu +10 -0
  223. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +10 -0
  224. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_16.cu +5 -0
  225. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +10 -0
  226. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu +10 -0
  227. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu +10 -0
  228. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu +10 -0
  229. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_16.cu +5 -0
  230. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu +10 -0
  231. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +10 -0
  232. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu +10 -0
  233. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu +10 -0
  234. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu +10 -0
  235. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu +10 -0
  236. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +10 -0
  237. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu +10 -0
  238. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +5 -0
  239. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +5 -0
  240. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +5 -0
  241. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +5 -0
  242. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +5 -0
  243. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +5 -0
  244. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +5 -0
  245. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +5 -0
  246. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +5 -0
  247. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +5 -0
  248. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +5 -0
  249. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +5 -0
  250. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +5 -0
  251. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +5 -0
  252. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +5 -0
  253. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +5 -0
  254. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +5 -0
  255. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +5 -0
  256. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +5 -0
  257. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +5 -0
  258. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +5 -0
  259. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +5 -0
  260. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +5 -0
  261. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +5 -0
  262. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +5 -0
  263. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +5 -0
  264. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +5 -0
  265. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +5 -0
  266. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +5 -0
  267. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +5 -0
  268. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +5 -0
  269. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +5 -0
  270. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +5 -0
  271. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +5 -0
  272. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +5 -0
  273. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +5 -0
  274. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +5 -0
  275. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +5 -0
  276. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +5 -0
  277. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +5 -0
  278. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +5 -0
  279. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +5 -0
  280. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +5 -0
  281. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +5 -0
  282. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +5 -0
  283. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +5 -0
  284. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +5 -0
  285. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +5 -0
  286. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +5 -0
  287. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +5 -0
  288. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +5 -0
  289. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +5 -0
  290. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +5 -0
  291. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +5 -0
  292. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +5 -0
  293. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +5 -0
  294. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +5 -0
  295. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +5 -0
  296. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +5 -0
  297. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +5 -0
  298. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +5 -0
  299. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +5 -0
  300. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +5 -0
  301. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +5 -0
  302. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +5 -0
  303. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +5 -0
  304. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +5 -0
  305. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +5 -0
  306. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +5 -0
  307. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +5 -0
  308. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +5 -0
  309. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +5 -0
  310. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +5 -0
  311. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +5 -0
  312. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +5 -0
  313. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +5 -0
  314. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +5 -0
  315. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +5 -0
  316. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +5 -0
  317. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +5 -0
  318. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +5 -0
  319. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +5 -0
  320. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +5 -0
  321. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +5 -0
  322. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +5 -0
  323. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +5 -0
  324. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +78 -0
  325. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu +5 -0
  326. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu +5 -0
  327. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu +5 -0
  328. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu +5 -0
  329. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu +5 -0
  330. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu +5 -0
  331. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu +5 -0
  332. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu +5 -0
  333. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu +5 -0
  334. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu +5 -0
  335. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu +5 -0
  336. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu +5 -0
  337. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu +5 -0
  338. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu +5 -0
  339. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu +5 -0
  340. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu +5 -0
  341. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu +5 -0
  342. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu +5 -0
  343. package/vendor/whisper.cpp/ggml/src/ggml-cuda/tsembd.cu +47 -0
  344. package/vendor/whisper.cpp/ggml/src/ggml-cuda/tsembd.cuh +5 -0
  345. package/vendor/whisper.cpp/ggml/src/ggml-cuda/unary.cu +289 -0
  346. package/vendor/whisper.cpp/ggml/src/ggml-cuda/unary.cuh +59 -0
  347. package/vendor/whisper.cpp/ggml/src/ggml-cuda/upscale.cu +51 -0
  348. package/vendor/whisper.cpp/ggml/src/ggml-cuda/upscale.cuh +5 -0
  349. package/vendor/whisper.cpp/ggml/src/ggml-cuda/vecdotq.cuh +1135 -0
  350. package/vendor/whisper.cpp/ggml/src/ggml-cuda/vendors/cuda.h +15 -0
  351. package/vendor/whisper.cpp/ggml/src/ggml-cuda/vendors/hip.h +243 -0
  352. package/vendor/whisper.cpp/ggml/src/ggml-cuda/vendors/musa.h +140 -0
  353. package/vendor/whisper.cpp/ggml/src/ggml-cuda/wkv.cu +199 -0
  354. package/vendor/whisper.cpp/ggml/src/ggml-cuda/wkv.cuh +7 -0
  355. package/vendor/whisper.cpp/ggml/src/ggml-hip/CMakeLists.txt +135 -0
  356. package/vendor/whisper.cpp/ggml/src/ggml-impl.h +603 -0
  357. package/vendor/whisper.cpp/ggml/src/ggml-kompute/CMakeLists.txt +166 -0
  358. package/vendor/whisper.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +2251 -0
  359. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/common.comp +112 -0
  360. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +58 -0
  361. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +25 -0
  362. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +52 -0
  363. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +52 -0
  364. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +52 -0
  365. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +52 -0
  366. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +30 -0
  367. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +22 -0
  368. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +17 -0
  369. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +31 -0
  370. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +31 -0
  371. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +38 -0
  372. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +39 -0
  373. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +44 -0
  374. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +52 -0
  375. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +69 -0
  376. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +51 -0
  377. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +33 -0
  378. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +35 -0
  379. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +140 -0
  380. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +106 -0
  381. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +73 -0
  382. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +52 -0
  383. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +28 -0
  384. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +84 -0
  385. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +21 -0
  386. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +53 -0
  387. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +52 -0
  388. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +52 -0
  389. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +52 -0
  390. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +52 -0
  391. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +19 -0
  392. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +23 -0
  393. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +22 -0
  394. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +72 -0
  395. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +71 -0
  396. package/vendor/whisper.cpp/ggml/src/ggml-metal/CMakeLists.txt +121 -0
  397. package/vendor/whisper.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +622 -0
  398. package/vendor/whisper.cpp/ggml/src/ggml-metal/ggml-metal.m +6023 -0
  399. package/vendor/whisper.cpp/ggml/src/ggml-metal/ggml-metal.metal +7124 -0
  400. package/vendor/whisper.cpp/ggml/src/ggml-musa/CMakeLists.txt +113 -0
  401. package/vendor/whisper.cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
  402. package/vendor/whisper.cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
  403. package/vendor/whisper.cpp/ggml/src/ggml-opencl/CMakeLists.txt +109 -0
  404. package/vendor/whisper.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +6665 -0
  405. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/add.cl +83 -0
  406. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
  407. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
  408. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/concat.cl +109 -0
  409. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
  410. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/cvt.cl +118 -0
  411. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
  412. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/div.cl +72 -0
  413. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
  414. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/gelu.cl +62 -0
  415. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
  416. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
  417. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/get_rows.cl +163 -0
  418. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +72 -0
  419. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
  420. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
  421. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul.cl +79 -0
  422. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
  423. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
  424. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
  425. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
  426. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
  427. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
  428. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
  429. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
  430. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
  431. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
  432. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
  433. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
  434. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k.cl +190 -0
  435. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/norm.cl +81 -0
  436. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/pad.cl +30 -0
  437. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
  438. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/repeat.cl +39 -0
  439. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +96 -0
  440. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/rope.cl +721 -0
  441. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/scale.cl +16 -0
  442. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
  443. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
  444. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +87 -0
  445. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +87 -0
  446. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +86 -0
  447. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +86 -0
  448. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/sub.cl +72 -0
  449. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
  450. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/tanh.cl +63 -0
  451. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/transpose.cl +84 -0
  452. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
  453. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +121 -0
  454. package/vendor/whisper.cpp/ggml/src/ggml-opt.cpp +1037 -0
  455. package/vendor/whisper.cpp/ggml/src/ggml-quants.c +5230 -0
  456. package/vendor/whisper.cpp/ggml/src/ggml-quants.h +100 -0
  457. package/vendor/whisper.cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
  458. package/vendor/whisper.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +1816 -0
  459. package/vendor/whisper.cpp/ggml/src/ggml-sycl/CMakeLists.txt +189 -0
  460. package/vendor/whisper.cpp/ggml/src/ggml-sycl/backend.hpp +37 -0
  461. package/vendor/whisper.cpp/ggml/src/ggml-sycl/binbcast.cpp +344 -0
  462. package/vendor/whisper.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
  463. package/vendor/whisper.cpp/ggml/src/ggml-sycl/common.cpp +83 -0
  464. package/vendor/whisper.cpp/ggml/src/ggml-sycl/common.hpp +584 -0
  465. package/vendor/whisper.cpp/ggml/src/ggml-sycl/concat.cpp +182 -0
  466. package/vendor/whisper.cpp/ggml/src/ggml-sycl/concat.hpp +20 -0
  467. package/vendor/whisper.cpp/ggml/src/ggml-sycl/conv.cpp +95 -0
  468. package/vendor/whisper.cpp/ggml/src/ggml-sycl/conv.hpp +20 -0
  469. package/vendor/whisper.cpp/ggml/src/ggml-sycl/convert.cpp +575 -0
  470. package/vendor/whisper.cpp/ggml/src/ggml-sycl/convert.hpp +34 -0
  471. package/vendor/whisper.cpp/ggml/src/ggml-sycl/cpy.cpp +839 -0
  472. package/vendor/whisper.cpp/ggml/src/ggml-sycl/cpy.hpp +11 -0
  473. package/vendor/whisper.cpp/ggml/src/ggml-sycl/dequantize.hpp +823 -0
  474. package/vendor/whisper.cpp/ggml/src/ggml-sycl/dmmv.cpp +1144 -0
  475. package/vendor/whisper.cpp/ggml/src/ggml-sycl/dmmv.hpp +27 -0
  476. package/vendor/whisper.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +2987 -0
  477. package/vendor/whisper.cpp/ggml/src/ggml-sycl/element_wise.cpp +1511 -0
  478. package/vendor/whisper.cpp/ggml/src/ggml-sycl/element_wise.hpp +77 -0
  479. package/vendor/whisper.cpp/ggml/src/ggml-sycl/gemm.hpp +102 -0
  480. package/vendor/whisper.cpp/ggml/src/ggml-sycl/getrows.cpp +212 -0
  481. package/vendor/whisper.cpp/ggml/src/ggml-sycl/getrows.hpp +20 -0
  482. package/vendor/whisper.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +4608 -0
  483. package/vendor/whisper.cpp/ggml/src/ggml-sycl/gla.cpp +106 -0
  484. package/vendor/whisper.cpp/ggml/src/ggml-sycl/gla.hpp +8 -0
  485. package/vendor/whisper.cpp/ggml/src/ggml-sycl/im2col.cpp +136 -0
  486. package/vendor/whisper.cpp/ggml/src/ggml-sycl/im2col.hpp +21 -0
  487. package/vendor/whisper.cpp/ggml/src/ggml-sycl/mmq.cpp +3010 -0
  488. package/vendor/whisper.cpp/ggml/src/ggml-sycl/mmq.hpp +33 -0
  489. package/vendor/whisper.cpp/ggml/src/ggml-sycl/mmvq.cpp +1065 -0
  490. package/vendor/whisper.cpp/ggml/src/ggml-sycl/mmvq.hpp +27 -0
  491. package/vendor/whisper.cpp/ggml/src/ggml-sycl/norm.cpp +482 -0
  492. package/vendor/whisper.cpp/ggml/src/ggml-sycl/norm.hpp +26 -0
  493. package/vendor/whisper.cpp/ggml/src/ggml-sycl/outprod.cpp +47 -0
  494. package/vendor/whisper.cpp/ggml/src/ggml-sycl/outprod.hpp +10 -0
  495. package/vendor/whisper.cpp/ggml/src/ggml-sycl/presets.hpp +74 -0
  496. package/vendor/whisper.cpp/ggml/src/ggml-sycl/quants.hpp +111 -0
  497. package/vendor/whisper.cpp/ggml/src/ggml-sycl/rope.cpp +472 -0
  498. package/vendor/whisper.cpp/ggml/src/ggml-sycl/rope.hpp +20 -0
  499. package/vendor/whisper.cpp/ggml/src/ggml-sycl/softmax.cpp +261 -0
  500. package/vendor/whisper.cpp/ggml/src/ggml-sycl/softmax.hpp +20 -0
  501. package/vendor/whisper.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +13 -0
  502. package/vendor/whisper.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +23 -0
  503. package/vendor/whisper.cpp/ggml/src/ggml-sycl/tsembd.cpp +67 -0
  504. package/vendor/whisper.cpp/ggml/src/ggml-sycl/tsembd.hpp +20 -0
  505. package/vendor/whisper.cpp/ggml/src/ggml-sycl/vecdotq.hpp +1307 -0
  506. package/vendor/whisper.cpp/ggml/src/ggml-sycl/wkv.cpp +289 -0
  507. package/vendor/whisper.cpp/ggml/src/ggml-sycl/wkv.hpp +10 -0
  508. package/vendor/whisper.cpp/ggml/src/ggml-threading.cpp +12 -0
  509. package/vendor/whisper.cpp/ggml/src/ggml-threading.h +14 -0
  510. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +189 -0
  511. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/cmake/host-toolchain.cmake.in +15 -0
  512. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +10937 -0
  513. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +27 -0
  514. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +29 -0
  515. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +29 -0
  516. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +51 -0
  517. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +69 -0
  518. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +17 -0
  519. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +41 -0
  520. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +49 -0
  521. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +105 -0
  522. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
  523. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +23 -0
  524. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +51 -0
  525. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +242 -0
  526. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +17 -0
  527. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +31 -0
  528. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +20 -0
  529. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp +462 -0
  530. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp +699 -0
  531. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_head.comp +13 -0
  532. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +42 -0
  533. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +35 -0
  534. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +44 -0
  535. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +43 -0
  536. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +48 -0
  537. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +39 -0
  538. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +49 -0
  539. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +32 -0
  540. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +34 -0
  541. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +34 -0
  542. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +42 -0
  543. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +30 -0
  544. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +32 -0
  545. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +68 -0
  546. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +34 -0
  547. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +35 -0
  548. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +70 -0
  549. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +33 -0
  550. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +31 -0
  551. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +34 -0
  552. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +27 -0
  553. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +337 -0
  554. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +162 -0
  555. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +360 -0
  556. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +267 -0
  557. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +59 -0
  558. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +25 -0
  559. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +23 -0
  560. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +64 -0
  561. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_head.comp +9 -0
  562. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_unary_head.comp +76 -0
  563. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +33 -0
  564. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +41 -0
  565. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +66 -0
  566. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +100 -0
  567. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +41 -0
  568. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +22 -0
  569. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +27 -0
  570. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_split_k_reduce.comp +48 -0
  571. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +169 -0
  572. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp +118 -0
  573. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +82 -0
  574. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +79 -0
  575. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +90 -0
  576. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +87 -0
  577. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +87 -0
  578. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +90 -0
  579. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +88 -0
  580. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +118 -0
  581. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +154 -0
  582. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +130 -0
  583. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +132 -0
  584. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +136 -0
  585. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +167 -0
  586. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +130 -0
  587. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +868 -0
  588. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +441 -0
  589. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +442 -0
  590. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +99 -0
  591. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +44 -0
  592. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +42 -0
  593. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +28 -0
  594. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +74 -0
  595. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +77 -0
  596. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +21 -0
  597. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +26 -0
  598. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +37 -0
  599. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +52 -0
  600. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +55 -0
  601. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +58 -0
  602. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +60 -0
  603. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +43 -0
  604. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +43 -0
  605. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +47 -0
  606. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +24 -0
  607. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +20 -0
  608. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +22 -0
  609. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +26 -0
  610. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +17 -0
  611. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +173 -0
  612. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +50 -0
  613. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +17 -0
  614. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +29 -0
  615. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +37 -0
  616. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +20 -0
  617. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_bfloat16_support.comp +7 -0
  618. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat2_support.comp +7 -0
  619. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat_support.comp +7 -0
  620. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_integer_dot_support.comp +7 -0
  621. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +41 -0
  622. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/types.comp +1373 -0
  623. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +36 -0
  624. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +753 -0
  625. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/wkv6.comp +87 -0
  626. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/wkv7.comp +91 -0
  627. package/vendor/whisper.cpp/ggml/src/ggml.c +6601 -0
  628. package/vendor/whisper.cpp/ggml/src/ggml.cpp +26 -0
  629. package/vendor/whisper.cpp/ggml/src/gguf.cpp +1347 -0
  630. package/vendor/whisper.cpp/include/whisper.h +738 -0
  631. package/vendor/whisper.cpp/src/CMakeLists.txt +145 -0
  632. package/vendor/whisper.cpp/src/coreml/whisper-compat.h +10 -0
  633. package/vendor/whisper.cpp/src/coreml/whisper-compat.m +35 -0
  634. package/vendor/whisper.cpp/src/coreml/whisper-decoder-impl.h +158 -0
  635. package/vendor/whisper.cpp/src/coreml/whisper-decoder-impl.m +227 -0
  636. package/vendor/whisper.cpp/src/coreml/whisper-encoder-impl.h +154 -0
  637. package/vendor/whisper.cpp/src/coreml/whisper-encoder-impl.m +223 -0
  638. package/vendor/whisper.cpp/src/coreml/whisper-encoder.h +26 -0
  639. package/vendor/whisper.cpp/src/coreml/whisper-encoder.mm +73 -0
  640. package/vendor/whisper.cpp/src/openvino/whisper-openvino-encoder.cpp +108 -0
  641. package/vendor/whisper.cpp/src/openvino/whisper-openvino-encoder.h +31 -0
  642. package/vendor/whisper.cpp/src/whisper-arch.h +197 -0
  643. package/vendor/whisper.cpp/src/whisper.cpp +8969 -0
@@ -0,0 +1,1006 @@
1
+ #pragma once
2
+
3
+ #include "ggml-cpu-impl.h"
4
+
5
+ //
6
+ // simd mappings
7
+ //
8
+
9
+ // we define a common set of C macros which map to specific intrinsics based on the current architecture
10
+ // we then implement the fundamental computation operations below using only these macros
11
+ // adding support for new architectures requires to define the corresponding SIMD macros
12
+ //
13
+ // GGML_F32_STEP / GGML_F16_STEP
14
+ // number of elements to process in a single step
15
+ //
16
+ // GGML_F32_EPR / GGML_F16_EPR
17
+ // number of elements to fit in a single register
18
+ //
19
+
20
+ #if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_FMA)
21
+
22
+ #define GGML_SIMD
23
+
24
+ // F32 SVE
25
+ #define GGML_F32_EPR 8
26
+ #define DEFAULT_PG svptrue_b32()
27
+
28
+ #define GGML_F32xt svfloat32_t
29
+ #define GGML_F32xt_ZERO svdup_n_f32(0.0f)
30
+ #define GGML_F32xt_SET1(x) svdup_n_f32(x)
31
+ #define GGML_F32xt_LOAD_IMPL(pg, a, ...) svld1_f32(pg, a)
32
+ #define GGML_F32xt_LOAD(...) GGML_F32xt_LOAD_IMPL(DEFAULT_PG, __VA_ARGS__)
33
+ #define GGML_F32xt_STORE_IMPL(pg,a,b) svst1_f32(pg, a, b)
34
+ #define GGML_F32xt_STORE(...) GGML_F32xt_STORE_IMPL(DEFAULT_PG, __VA_ARGS__)
35
+ #define GGML_F32xt_FMA_IMPL(pg, a, b, c) svmad_f32_m(pg, a, b, c)
36
+ #define GGML_F32xt_FMA(...) GGML_F32xt_FMA_IMPL(DEFAULT_PG, __VA_ARGS__)
37
+ #define GGML_F32xt_ADD_IMPL(pg, a, b) svadd_f32_m(pg, a, b)
38
+ #define GGML_F32xt_ADD(...) GGML_F32xt_ADD_IMPL(DEFAULT_PG, __VA_ARGS__)
39
+ #define GGML_F32xt_MUL_IMPL(pg, a, b) svmul_f32_m(pg, a, b)
40
+ #define GGML_F32xt_MUL(...) GGML_F32xt_MUL_IMPL(DEFAULT_PG, __VA_ARGS__)
41
+ #define GGML_F32xt_REDUCE_ONE_IMPL(pg, a) svaddv(pg, a)
42
+ #define GGML_F32xt_REDUCE_ONE(...) GGML_F32xt_REDUCE_ONE_IMPL(DEFAULT_PG, __VA_ARGS__)
43
+ #define GGML_F32xt_REDUCE_IMPL(pg, res, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8) \
44
+ { \
45
+ sum1 = svadd_f32_m(DEFAULT_PG, sum1, sum2); \
46
+ sum3 = svadd_f32_m(DEFAULT_PG, sum3, sum4); \
47
+ sum5 = svadd_f32_m(DEFAULT_PG, sum5, sum6); \
48
+ sum7 = svadd_f32_m(DEFAULT_PG, sum7, sum8); \
49
+ sum1 = svadd_f32_m(DEFAULT_PG, sum1, sum3); \
50
+ sum5 = svadd_f32_m(DEFAULT_PG, sum5, sum7); \
51
+ sum1 = svadd_f32_m(DEFAULT_PG, sum1, sum5); \
52
+ (res) = (ggml_float) GGML_F32xt_REDUCE_ONE(sum1); \
53
+ }
54
+ #define GGML_F32xt_REDUCE(...) GGML_F32xt_REDUCE_IMPL(DEFAULT_PG, __VA_ARGS__)
55
+
56
+ #define GGML_F32_VEC GGML_F32xt
57
+ #define GGML_F32_VEC_ZERO GGML_F32xt_ZERO
58
+ #define GGML_F32_VEC_SET1 GGML_F32xt_SET1
59
+ #define GGML_F32_VEC_LOAD GGML_F32xt_LOAD
60
+ #define GGML_F32_VEC_STORE GGML_F32xt_STORE
61
+ #define GGML_F32_VEC_FMA GGML_F32xt_FMA
62
+ #define GGML_F32_VEC_ADD GGML_F32xt_ADD
63
+ #define GGML_F32_VEC_MUL GGML_F32xt_MUL
64
+ #define GGML_F32_VEC_REDUCE GGML_F32xt_REDUCE
65
+
66
+ // F16 NEON
67
+
68
+ #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
69
+ #define GGML_F16_STEP 32
70
+ #define GGML_F16_EPR 8
71
+
72
+ #define GGML_F16x8 float16x8_t
73
+ #define GGML_F16x8_ZERO vdupq_n_f16(0.0f)
74
+ #define GGML_F16x8_SET1(x) vdupq_n_f16(x)
75
+ #define GGML_F16x8_LOAD(x) vld1q_f16((const __fp16 *)(x))
76
+ #define GGML_F16x8_STORE vst1q_f16
77
+ #define GGML_F16x8_FMA(a, b, c) vfmaq_f16(a, b, c)
78
+ #define GGML_F16x8_ADD vaddq_f16
79
+ #define GGML_F16x8_MUL vmulq_f16
80
+ #define GGML_F16x8_REDUCE(res, x) \
81
+ do { \
82
+ int offset = GGML_F16_ARR >> 1; \
83
+ for (int i = 0; i < offset; ++i) { \
84
+ (x)[i] = vaddq_f16((x)[i], (x)[offset+i]); \
85
+ } \
86
+ offset >>= 1; \
87
+ for (int i = 0; i < offset; ++i) { \
88
+ (x)[i] = vaddq_f16((x)[i], (x)[offset+i]); \
89
+ } \
90
+ offset >>= 1; \
91
+ for (int i = 0; i < offset; ++i) { \
92
+ (x)[i] = vaddq_f16((x)[i], (x)[offset+i]); \
93
+ } \
94
+ const float32x4_t t0 = vcvt_f32_f16(vget_low_f16 ((x)[0])); \
95
+ const float32x4_t t1 = vcvt_f32_f16(vget_high_f16((x)[0])); \
96
+ (res) = (ggml_float) vaddvq_f32(vaddq_f32(t0, t1)); \
97
+ } while (0)
98
+
99
+ #define GGML_F16_VEC GGML_F16x8
100
+ #define GGML_F16_VEC_ZERO GGML_F16x8_ZERO
101
+ #define GGML_F16_VEC_SET1 GGML_F16x8_SET1
102
+ #define GGML_F16_VEC_LOAD(p, i) GGML_F16x8_LOAD(p)
103
+ #define GGML_F16_VEC_STORE(p, r, i) GGML_F16x8_STORE((__fp16 *)(p), (r)[i])
104
+ #define GGML_F16_VEC_FMA GGML_F16x8_FMA
105
+ #define GGML_F16_VEC_ADD GGML_F16x8_ADD
106
+ #define GGML_F16_VEC_MUL GGML_F16x8_MUL
107
+ #define GGML_F16_VEC_REDUCE GGML_F16x8_REDUCE
108
+ #else
109
+ // if FP16 vector arithmetic is not supported, we use FP32 instead
110
+ // and take advantage of the vcvt_ functions to convert to/from FP16
111
+
112
+ #define GGML_F16_STEP 16
113
+ #define GGML_F16_EPR 4
114
+
115
+ #define GGML_F32Cx4 float32x4_t
116
+ #define GGML_F32Cx4_ZERO vdupq_n_f32(0.0f)
117
+ #define GGML_F32Cx4_SET1(x) vdupq_n_f32(x)
118
+ #define GGML_F32Cx4_LOAD(x) vcvt_f32_f16(vld1_f16((const __fp16 *)(x)))
119
+ #define GGML_F32Cx4_STORE(x, y) vst1_f16(x, vcvt_f16_f32(y))
120
+ #define GGML_F32Cx4_FMA(a, b, c) vfmaq_f32(a, b, c)
121
+ #define GGML_F32Cx4_ADD vaddq_f32
122
+ #define GGML_F32Cx4_MUL vmulq_f32
123
+ #define GGML_F32Cx4_REDUCE GGML_F32x4_REDUCE
124
+
125
+ #define GGML_F16_VEC GGML_F32Cx4
126
+ #define GGML_F16_VEC_ZERO GGML_F32Cx4_ZERO
127
+ #define GGML_F16_VEC_SET1 GGML_F32Cx4_SET1
128
+ #define GGML_F16_VEC_LOAD(p, i) GGML_F32Cx4_LOAD(p)
129
+ #define GGML_F16_VEC_STORE(p, r, i) GGML_F32Cx4_STORE((__fp16 *)(p), r[i])
130
+ #define GGML_F16_VEC_FMA GGML_F32Cx4_FMA
131
+ #define GGML_F16_VEC_ADD GGML_F32Cx4_ADD
132
+ #define GGML_F16_VEC_MUL GGML_F32Cx4_MUL
133
+ #define GGML_F16_VEC_REDUCE GGML_F32Cx4_REDUCE
134
+ #endif
135
+
136
+ #elif defined(__ARM_NEON) && defined(__ARM_FEATURE_FMA)
137
+
138
+ #define GGML_SIMD
139
+
140
+ // F32 NEON
141
+
142
+ #define GGML_F32_STEP 16
143
+ #define GGML_F32_EPR 4
144
+
145
+ #define GGML_F32x4 float32x4_t
146
+ #define GGML_F32x4_ZERO vdupq_n_f32(0.0f)
147
+ #define GGML_F32x4_SET1(x) vdupq_n_f32(x)
148
+ #define GGML_F32x4_LOAD vld1q_f32
149
+ #define GGML_F32x4_STORE vst1q_f32
150
+ #define GGML_F32x4_FMA(a, b, c) vfmaq_f32(a, b, c)
151
+ #define GGML_F32x4_ADD vaddq_f32
152
+ #define GGML_F32x4_MUL vmulq_f32
153
+ #define GGML_F32x4_REDUCE_ONE(x) vaddvq_f32(x)
154
+ #define GGML_F32x4_REDUCE(res, x) \
155
+ { \
156
+ int offset = GGML_F32_ARR >> 1; \
157
+ for (int i = 0; i < offset; ++i) { \
158
+ (x)[i] = vaddq_f32((x)[i], (x)[offset+i]); \
159
+ } \
160
+ offset >>= 1; \
161
+ for (int i = 0; i < offset; ++i) { \
162
+ (x)[i] = vaddq_f32((x)[i], (x)[offset+i]); \
163
+ } \
164
+ offset >>= 1; \
165
+ for (int i = 0; i < offset; ++i) { \
166
+ (x)[i] = vaddq_f32((x)[i], (x)[offset+i]); \
167
+ } \
168
+ (res) = (ggml_float) GGML_F32x4_REDUCE_ONE((x)[0]); \
169
+ }
170
+
171
+ #define GGML_F32_VEC GGML_F32x4
172
+ #define GGML_F32_VEC_ZERO GGML_F32x4_ZERO
173
+ #define GGML_F32_VEC_SET1 GGML_F32x4_SET1
174
+ #define GGML_F32_VEC_LOAD GGML_F32x4_LOAD
175
+ #define GGML_F32_VEC_STORE GGML_F32x4_STORE
176
+ #define GGML_F32_VEC_FMA GGML_F32x4_FMA
177
+ #define GGML_F32_VEC_ADD GGML_F32x4_ADD
178
+ #define GGML_F32_VEC_MUL GGML_F32x4_MUL
179
+ #define GGML_F32_VEC_REDUCE GGML_F32x4_REDUCE
180
+
181
+ // F16 NEON
182
+
183
+ #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
184
+ #define GGML_F16_STEP 32
185
+ #define GGML_F16_EPR 8
186
+
187
+ #define GGML_F16x8 float16x8_t
188
+ #define GGML_F16x8_ZERO vdupq_n_f16(0.0f)
189
+ #define GGML_F16x8_SET1(x) vdupq_n_f16(x)
190
+ #define GGML_F16x8_LOAD(x) vld1q_f16((const __fp16 *)(x))
191
+ #define GGML_F16x8_STORE vst1q_f16
192
+ #define GGML_F16x8_FMA(a, b, c) vfmaq_f16(a, b, c)
193
+ #define GGML_F16x8_ADD vaddq_f16
194
+ #define GGML_F16x8_MUL vmulq_f16
195
+ #define GGML_F16x8_REDUCE(res, x) \
196
+ do { \
197
+ int offset = GGML_F16_ARR >> 1; \
198
+ for (int i = 0; i < offset; ++i) { \
199
+ (x)[i] = vaddq_f16((x)[i], (x)[offset+i]); \
200
+ } \
201
+ offset >>= 1; \
202
+ for (int i = 0; i < offset; ++i) { \
203
+ (x)[i] = vaddq_f16((x)[i], (x)[offset+i]); \
204
+ } \
205
+ offset >>= 1; \
206
+ for (int i = 0; i < offset; ++i) { \
207
+ (x)[i] = vaddq_f16((x)[i], (x)[offset+i]); \
208
+ } \
209
+ const float32x4_t t0 = vcvt_f32_f16(vget_low_f16 ((x)[0])); \
210
+ const float32x4_t t1 = vcvt_f32_f16(vget_high_f16((x)[0])); \
211
+ (res) = (ggml_float) vaddvq_f32(vaddq_f32(t0, t1)); \
212
+ } while (0)
213
+
214
+ #define GGML_F16_VEC GGML_F16x8
215
+ #define GGML_F16_VEC_ZERO GGML_F16x8_ZERO
216
+ #define GGML_F16_VEC_SET1 GGML_F16x8_SET1
217
+ #define GGML_F16_VEC_LOAD(p, i) GGML_F16x8_LOAD(p)
218
+ #define GGML_F16_VEC_STORE(p, r, i) GGML_F16x8_STORE((__fp16 *)(p), (r)[i])
219
+ #define GGML_F16_VEC_FMA GGML_F16x8_FMA
220
+ #define GGML_F16_VEC_ADD GGML_F16x8_ADD
221
+ #define GGML_F16_VEC_MUL GGML_F16x8_MUL
222
+ #define GGML_F16_VEC_REDUCE GGML_F16x8_REDUCE
223
+ #else
224
+ // if FP16 vector arithmetic is not supported, we use FP32 instead
225
+ // and take advantage of the vcvt_ functions to convert to/from FP16
226
+
227
+ #define GGML_F16_STEP 16
228
+ #define GGML_F16_EPR 4
229
+
230
+ #define GGML_F32Cx4 float32x4_t
231
+ #define GGML_F32Cx4_ZERO vdupq_n_f32(0.0f)
232
+ #define GGML_F32Cx4_SET1(x) vdupq_n_f32(x)
233
+ #define GGML_F32Cx4_LOAD(x) vcvt_f32_f16(vld1_f16((const __fp16 *)(x)))
234
+ #define GGML_F32Cx4_STORE(x, y) vst1_f16(x, vcvt_f16_f32(y))
235
+ #define GGML_F32Cx4_FMA(a, b, c) vfmaq_f32(a, b, c)
236
+ #define GGML_F32Cx4_ADD vaddq_f32
237
+ #define GGML_F32Cx4_MUL vmulq_f32
238
+ #define GGML_F32Cx4_REDUCE GGML_F32x4_REDUCE
239
+
240
+ #define GGML_F16_VEC GGML_F32Cx4
241
+ #define GGML_F16_VEC_ZERO GGML_F32Cx4_ZERO
242
+ #define GGML_F16_VEC_SET1 GGML_F32Cx4_SET1
243
+ #define GGML_F16_VEC_LOAD(p, i) GGML_F32Cx4_LOAD(p)
244
+ #define GGML_F16_VEC_STORE(p, r, i) GGML_F32Cx4_STORE((__fp16 *)(p), r[i])
245
+ #define GGML_F16_VEC_FMA GGML_F32Cx4_FMA
246
+ #define GGML_F16_VEC_ADD GGML_F32Cx4_ADD
247
+ #define GGML_F16_VEC_MUL GGML_F32Cx4_MUL
248
+ #define GGML_F16_VEC_REDUCE GGML_F32Cx4_REDUCE
249
+ #endif
250
+
251
+ #elif defined(__AVX512F__)
252
+
253
+ #define GGML_SIMD
254
+
255
+ // F32 AVX512
256
+
257
+ #define GGML_F32_STEP 64
258
+ #define GGML_F32_EPR 16
259
+
260
+ #define GGML_F32x16 __m512
261
+ #define GGML_F32x16_ZERO _mm512_setzero_ps()
262
+ #define GGML_F32x16_SET1(x) _mm512_set1_ps(x)
263
+ #define GGML_F32x16_LOAD _mm512_loadu_ps
264
+ #define GGML_F32x16_STORE _mm512_storeu_ps
265
+ // _mm512_fmadd_ps is defined in AVX512F so no guard is required
266
+ #define GGML_F32x16_FMA(a, b, c) _mm512_fmadd_ps(b, c, a)
267
+ #define GGML_F32x16_ADD _mm512_add_ps
268
+ #define GGML_F32x16_MUL _mm512_mul_ps
269
+ #define GGML_F32x16_REDUCE(res, x) \
270
+ do { \
271
+ int offset = GGML_F32_ARR >> 1; \
272
+ for (int i = 0; i < offset; ++i) { \
273
+ x[i] = _mm512_add_ps(x[i], x[offset+i]); \
274
+ } \
275
+ offset >>= 1; \
276
+ for (int i = 0; i < offset; ++i) { \
277
+ x[i] = _mm512_add_ps(x[i], x[offset+i]); \
278
+ } \
279
+ offset >>= 1; \
280
+ for (int i = 0; i < offset; ++i) { \
281
+ x[i] = _mm512_add_ps(x[i], x[offset+i]); \
282
+ } \
283
+ res = (ggml_float) _mm512_reduce_add_ps(x[0]); \
284
+ } while (0)
285
+
286
+ // TODO: is this optimal ?
287
+
288
+ #define GGML_F32_VEC GGML_F32x16
289
+ #define GGML_F32_VEC_ZERO GGML_F32x16_ZERO
290
+ #define GGML_F32_VEC_SET1 GGML_F32x16_SET1
291
+ #define GGML_F32_VEC_LOAD GGML_F32x16_LOAD
292
+ #define GGML_F32_VEC_STORE GGML_F32x16_STORE
293
+ #define GGML_F32_VEC_FMA GGML_F32x16_FMA
294
+ #define GGML_F32_VEC_ADD GGML_F32x16_ADD
295
+ #define GGML_F32_VEC_MUL GGML_F32x16_MUL
296
+ #define GGML_F32_VEC_REDUCE GGML_F32x16_REDUCE
297
+
298
+ // F16 AVX512
299
+
300
+ // F16 AVX
301
+
302
+ #define GGML_F16_STEP 64
303
+ #define GGML_F16_EPR 16
304
+
305
+ // AVX512 has FP16 extension (AVX512_FP16) but I don't have it on my machine so I use FP32 instead
306
+
307
+ #define GGML_F32Cx16 __m512
308
+ #define GGML_F32Cx16_ZERO _mm512_setzero_ps()
309
+ #define GGML_F32Cx16_SET1(x) _mm512_set1_ps(x)
310
+
311
+ // unlike _mm256_cvt intrinsics that require F16C, _mm512_cvt is defined in AVX512F
312
+ // so F16C guard isn't required
313
+ #define GGML_F32Cx16_LOAD(x) _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)(x)))
314
+ #define GGML_F32Cx16_STORE(x, y) _mm256_storeu_si256((__m256i *)(x), _mm512_cvtps_ph(y, 0))
315
+
316
+ #define GGML_F32Cx16_FMA(a, b, c) _mm512_fmadd_ps(b, c, a)
317
+ #define GGML_F32Cx16_ADD _mm512_add_ps
318
+ #define GGML_F32Cx16_MUL _mm512_mul_ps
319
+ #define GGML_F32Cx16_REDUCE(res, x) \
320
+ do { \
321
+ int offset = GGML_F32_ARR >> 1; \
322
+ for (int i = 0; i < offset; ++i) { \
323
+ x[i] = _mm512_add_ps(x[i], x[offset+i]); \
324
+ } \
325
+ offset >>= 1; \
326
+ for (int i = 0; i < offset; ++i) { \
327
+ x[i] = _mm512_add_ps(x[i], x[offset+i]); \
328
+ } \
329
+ offset >>= 1; \
330
+ for (int i = 0; i < offset; ++i) { \
331
+ x[i] = _mm512_add_ps(x[i], x[offset+i]); \
332
+ } \
333
+ res = (ggml_float) _mm512_reduce_add_ps(x[0]); \
334
+ } while (0)
335
+
336
+ #define GGML_F16_VEC GGML_F32Cx16
337
+ #define GGML_F16_VEC_ZERO GGML_F32Cx16_ZERO
338
+ #define GGML_F16_VEC_SET1 GGML_F32Cx16_SET1
339
+ #define GGML_F16_VEC_LOAD(p, i) GGML_F32Cx16_LOAD(p)
340
+ #define GGML_F16_VEC_STORE(p, r, i) GGML_F32Cx16_STORE(p, r[i])
341
+ #define GGML_F16_VEC_FMA GGML_F32Cx16_FMA
342
+ #define GGML_F16_VEC_ADD GGML_F32Cx16_ADD
343
+ #define GGML_F16_VEC_MUL GGML_F32Cx16_MUL
344
+
345
+ #define GGML_F16_VEC_REDUCE GGML_F32Cx16_REDUCE
346
+ #elif defined(__AVX__)
347
+
348
+ #define GGML_SIMD
349
+
350
+ // F32 AVX
351
+
352
+ #define GGML_F32_STEP 32
353
+ #define GGML_F32_EPR 8
354
+
355
+ #define GGML_F32x8 __m256
356
+ #define GGML_F32x8_ZERO _mm256_setzero_ps()
357
+ #define GGML_F32x8_SET1(x) _mm256_set1_ps(x)
358
+ #define GGML_F32x8_LOAD _mm256_loadu_ps
359
+ #define GGML_F32x8_STORE _mm256_storeu_ps
360
+ #if defined(__FMA__)
361
+ #define GGML_F32x8_FMA(a, b, c) _mm256_fmadd_ps(b, c, a)
362
+ #else
363
+ #define GGML_F32x8_FMA(a, b, c) _mm256_add_ps(_mm256_mul_ps(b, c), a)
364
+ #endif
365
+ #define GGML_F32x8_ADD _mm256_add_ps
366
+ #define GGML_F32x8_MUL _mm256_mul_ps
367
+ #define GGML_F32x8_REDUCE(res, x) \
368
+ do { \
369
+ int offset = GGML_F32_ARR >> 1; \
370
+ for (int i = 0; i < offset; ++i) { \
371
+ x[i] = _mm256_add_ps(x[i], x[offset+i]); \
372
+ } \
373
+ offset >>= 1; \
374
+ for (int i = 0; i < offset; ++i) { \
375
+ x[i] = _mm256_add_ps(x[i], x[offset+i]); \
376
+ } \
377
+ offset >>= 1; \
378
+ for (int i = 0; i < offset; ++i) { \
379
+ x[i] = _mm256_add_ps(x[i], x[offset+i]); \
380
+ } \
381
+ const __m128 t0 = _mm_add_ps(_mm256_castps256_ps128(x[0]), \
382
+ _mm256_extractf128_ps(x[0], 1)); \
383
+ const __m128 t1 = _mm_hadd_ps(t0, t0); \
384
+ res = (ggml_float) _mm_cvtss_f32(_mm_hadd_ps(t1, t1)); \
385
+ } while (0)
386
+ // TODO: is this optimal ?
387
+
388
+ #define GGML_F32_VEC GGML_F32x8
389
+ #define GGML_F32_VEC_ZERO GGML_F32x8_ZERO
390
+ #define GGML_F32_VEC_SET1 GGML_F32x8_SET1
391
+ #define GGML_F32_VEC_LOAD GGML_F32x8_LOAD
392
+ #define GGML_F32_VEC_STORE GGML_F32x8_STORE
393
+ #define GGML_F32_VEC_FMA GGML_F32x8_FMA
394
+ #define GGML_F32_VEC_ADD GGML_F32x8_ADD
395
+ #define GGML_F32_VEC_MUL GGML_F32x8_MUL
396
+ #define GGML_F32_VEC_REDUCE GGML_F32x8_REDUCE
397
+
398
+ // F16 AVX
399
+
400
+ #define GGML_F16_STEP 32
401
+ #define GGML_F16_EPR 8
402
+
403
+ // F16 arithmetic is not supported by AVX, so we use F32 instead
404
+
405
+ #define GGML_F32Cx8 __m256
406
+ #define GGML_F32Cx8_ZERO _mm256_setzero_ps()
407
+ #define GGML_F32Cx8_SET1(x) _mm256_set1_ps(x)
408
+
409
+ #if defined(__F16C__)
410
+ // the _mm256_cvt intrinsics require F16C
411
+ #define GGML_F32Cx8_LOAD(x) _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)(x)))
412
+ #define GGML_F32Cx8_STORE(x, y) _mm_storeu_si128((__m128i *)(x), _mm256_cvtps_ph(y, 0))
413
+ #else
414
+ static inline __m256 __avx_f32cx8_load(const ggml_fp16_t * x) {
415
+ float tmp[8];
416
+
417
+ for (int i = 0; i < 8; i++) {
418
+ tmp[i] = GGML_FP16_TO_FP32(x[i]);
419
+ }
420
+
421
+ return _mm256_loadu_ps(tmp);
422
+ }
423
+ static inline void __avx_f32cx8_store(ggml_fp16_t *x, __m256 y) {
424
+ float arr[8];
425
+
426
+ _mm256_storeu_ps(arr, y);
427
+
428
+ for (int i = 0; i < 8; i++)
429
+ x[i] = GGML_FP32_TO_FP16(arr[i]);
430
+ }
431
+ #define GGML_F32Cx8_LOAD(x) __avx_f32cx8_load(x)
432
+ #define GGML_F32Cx8_STORE(x, y) __avx_f32cx8_store(x, y)
433
+ #endif
434
+
435
+ #define GGML_F32Cx8_FMA GGML_F32x8_FMA
436
+ #define GGML_F32Cx8_ADD _mm256_add_ps
437
+ #define GGML_F32Cx8_MUL _mm256_mul_ps
438
+ #define GGML_F32Cx8_REDUCE GGML_F32x8_REDUCE
439
+
440
+ #define GGML_F16_VEC GGML_F32Cx8
441
+ #define GGML_F16_VEC_ZERO GGML_F32Cx8_ZERO
442
+ #define GGML_F16_VEC_SET1 GGML_F32Cx8_SET1
443
+ #define GGML_F16_VEC_LOAD(p, i) GGML_F32Cx8_LOAD(p)
444
+ #define GGML_F16_VEC_STORE(p, r, i) GGML_F32Cx8_STORE(p, r[i])
445
+ #define GGML_F16_VEC_FMA GGML_F32Cx8_FMA
446
+ #define GGML_F16_VEC_ADD GGML_F32Cx8_ADD
447
+ #define GGML_F16_VEC_MUL GGML_F32Cx8_MUL
448
+ #define GGML_F16_VEC_REDUCE GGML_F32Cx8_REDUCE
449
+
450
+ #elif defined(__POWER9_VECTOR__)
451
+
452
+ #define GGML_SIMD
453
+
454
+ // F32 POWER9
455
+
456
+ #define GGML_F32_STEP 32
457
+ #define GGML_F32_EPR 4
458
+
459
+ #define GGML_F32x4 vector float
460
+ #define GGML_F32x4_ZERO {0.0f}
461
+ #define GGML_F32x4_SET1 vec_splats
462
+ #define GGML_F32x4_LOAD(p) vec_xl(0, p)
463
+ #define GGML_F32x4_STORE(p, r) vec_xst(r, 0, p)
464
+ #define GGML_F32x4_FMA(a, b, c) vec_madd(b, c, a)
465
+ #define GGML_F32x4_ADD vec_add
466
+ #define GGML_F32x4_MUL vec_mul
467
+ #define GGML_F32x4_REDUCE(res, x) \
468
+ { \
469
+ int offset = GGML_F32_ARR >> 1; \
470
+ for (int i = 0; i < offset; ++i) { \
471
+ x[i] = vec_add(x[i], x[offset+i]); \
472
+ } \
473
+ offset >>= 1; \
474
+ for (int i = 0; i < offset; ++i) { \
475
+ x[i] = vec_add(x[i], x[offset+i]); \
476
+ } \
477
+ offset >>= 1; \
478
+ for (int i = 0; i < offset; ++i) { \
479
+ x[i] = vec_add(x[i], x[offset+i]); \
480
+ } \
481
+ res = vec_extract(x[0], 0) + \
482
+ vec_extract(x[0], 1) + \
483
+ vec_extract(x[0], 2) + \
484
+ vec_extract(x[0], 3); \
485
+ }
486
+
487
+ #define GGML_F32_VEC GGML_F32x4
488
+ #define GGML_F32_VEC_ZERO GGML_F32x4_ZERO
489
+ #define GGML_F32_VEC_SET1 GGML_F32x4_SET1
490
+ #define GGML_F32_VEC_LOAD GGML_F32x4_LOAD
491
+ #define GGML_F32_VEC_STORE GGML_F32x4_STORE
492
+ #define GGML_F32_VEC_FMA GGML_F32x4_FMA
493
+ #define GGML_F32_VEC_ADD GGML_F32x4_ADD
494
+ #define GGML_F32_VEC_MUL GGML_F32x4_MUL
495
+ #define GGML_F32_VEC_REDUCE GGML_F32x4_REDUCE
496
+
497
+ // F16 POWER9
498
+ #define GGML_F16_STEP GGML_F32_STEP
499
+ #define GGML_F16_EPR GGML_F32_EPR
500
+ #define GGML_F16_VEC GGML_F32x4
501
+ #define GGML_F16_VEC_ZERO GGML_F32x4_ZERO
502
+ #define GGML_F16_VEC_SET1 GGML_F32x4_SET1
503
+ #define GGML_F16_VEC_FMA GGML_F32x4_FMA
504
+ #define GGML_F16_VEC_ADD GGML_F32x4_ADD
505
+ #define GGML_F16_VEC_MUL GGML_F32x4_MUL
506
+ #define GGML_F16_VEC_REDUCE GGML_F32x4_REDUCE
507
+ // Use vec_xl, not vec_ld, in case the load address is not aligned.
508
+ #define GGML_F16_VEC_LOAD(p, i) (i & 0x1) ? \
509
+ vec_extract_fp32_from_shorth(vec_xl(0, p - GGML_F16_EPR)) : \
510
+ vec_extract_fp32_from_shortl(vec_xl(0, p))
511
+ static inline unsigned char ggml_endian_byte(int i) {
512
+ uint16_t tmp_val = 1;
513
+ return ((unsigned char *)&tmp_val)[i];
514
+ }
515
+ #define GGML_ENDIAN_BYTE(i) ggml_endian_byte(i)
516
+ #define GGML_F16_VEC_STORE(p, r, i) \
517
+ if (i & 0x1) \
518
+ vec_xst(vec_pack_to_short_fp32(r[i - GGML_ENDIAN_BYTE(1)], \
519
+ r[i - GGML_ENDIAN_BYTE(0)]), \
520
+ 0, p - GGML_F16_EPR)
521
+
522
+ #elif defined(__wasm_simd128__)
523
+
524
+ #define GGML_SIMD
525
+
526
+ // F32 WASM
527
+
528
+ #define GGML_F32_STEP 16
529
+ #define GGML_F32_EPR 4
530
+
531
+ #define GGML_F32x4 v128_t
532
+ #define GGML_F32x4_ZERO wasm_f32x4_splat(0.0f)
533
+ #define GGML_F32x4_SET1(x) wasm_f32x4_splat(x)
534
+ #define GGML_F32x4_LOAD wasm_v128_load
535
+ #define GGML_F32x4_STORE wasm_v128_store
536
+ #define GGML_F32x4_FMA(a, b, c) wasm_f32x4_add(wasm_f32x4_mul(b, c), a)
537
+ #define GGML_F32x4_ADD wasm_f32x4_add
538
+ #define GGML_F32x4_MUL wasm_f32x4_mul
539
+ #define GGML_F32x4_REDUCE(res, x) \
540
+ { \
541
+ int offset = GGML_F32_ARR >> 1; \
542
+ for (int i = 0; i < offset; ++i) { \
543
+ x[i] = wasm_f32x4_add(x[i], x[offset+i]); \
544
+ } \
545
+ offset >>= 1; \
546
+ for (int i = 0; i < offset; ++i) { \
547
+ x[i] = wasm_f32x4_add(x[i], x[offset+i]); \
548
+ } \
549
+ offset >>= 1; \
550
+ for (int i = 0; i < offset; ++i) { \
551
+ x[i] = wasm_f32x4_add(x[i], x[offset+i]); \
552
+ } \
553
+ res = wasm_f32x4_extract_lane(x[0], 0) + \
554
+ wasm_f32x4_extract_lane(x[0], 1) + \
555
+ wasm_f32x4_extract_lane(x[0], 2) + \
556
+ wasm_f32x4_extract_lane(x[0], 3); \
557
+ }
558
+
559
+ #define GGML_F32_VEC GGML_F32x4
560
+ #define GGML_F32_VEC_ZERO GGML_F32x4_ZERO
561
+ #define GGML_F32_VEC_SET1 GGML_F32x4_SET1
562
+ #define GGML_F32_VEC_LOAD GGML_F32x4_LOAD
563
+ #define GGML_F32_VEC_STORE GGML_F32x4_STORE
564
+ #define GGML_F32_VEC_FMA GGML_F32x4_FMA
565
+ #define GGML_F32_VEC_ADD GGML_F32x4_ADD
566
+ #define GGML_F32_VEC_MUL GGML_F32x4_MUL
567
+ #define GGML_F32_VEC_REDUCE GGML_F32x4_REDUCE
568
+
569
+ // F16 WASM
570
+
571
+ #define GGML_F16_STEP 16
572
+ #define GGML_F16_EPR 4
573
+
574
+ inline static v128_t __wasm_f16x4_load(const ggml_fp16_t * p) {
575
+ float tmp[4];
576
+
577
+ tmp[0] = GGML_FP16_TO_FP32(p[0]);
578
+ tmp[1] = GGML_FP16_TO_FP32(p[1]);
579
+ tmp[2] = GGML_FP16_TO_FP32(p[2]);
580
+ tmp[3] = GGML_FP16_TO_FP32(p[3]);
581
+
582
+ return wasm_v128_load(tmp);
583
+ }
584
+
585
+ inline static void __wasm_f16x4_store(ggml_fp16_t * p, v128_t x) {
586
+ float tmp[4];
587
+
588
+ wasm_v128_store(tmp, x);
589
+
590
+ p[0] = GGML_FP32_TO_FP16(tmp[0]);
591
+ p[1] = GGML_FP32_TO_FP16(tmp[1]);
592
+ p[2] = GGML_FP32_TO_FP16(tmp[2]);
593
+ p[3] = GGML_FP32_TO_FP16(tmp[3]);
594
+ }
595
+
596
+ #define GGML_F16x4 v128_t
597
+ #define GGML_F16x4_ZERO wasm_f32x4_splat(0.0f)
598
+ #define GGML_F16x4_SET1(x) wasm_f32x4_splat(x)
599
+ #define GGML_F16x4_LOAD(x) __wasm_f16x4_load(x)
600
+ #define GGML_F16x4_STORE(x, y) __wasm_f16x4_store(x, y)
601
+ #define GGML_F16x4_FMA GGML_F32x4_FMA
602
+ #define GGML_F16x4_ADD wasm_f32x4_add
603
+ #define GGML_F16x4_MUL wasm_f32x4_mul
604
+ #define GGML_F16x4_REDUCE(res, x) \
605
+ { \
606
+ int offset = GGML_F16_ARR >> 1; \
607
+ for (int i = 0; i < offset; ++i) { \
608
+ x[i] = wasm_f32x4_add(x[i], x[offset+i]); \
609
+ } \
610
+ offset >>= 1; \
611
+ for (int i = 0; i < offset; ++i) { \
612
+ x[i] = wasm_f32x4_add(x[i], x[offset+i]); \
613
+ } \
614
+ offset >>= 1; \
615
+ for (int i = 0; i < offset; ++i) { \
616
+ x[i] = wasm_f32x4_add(x[i], x[offset+i]); \
617
+ } \
618
+ res = (ggml_float) (wasm_f32x4_extract_lane(x[0], 0) + \
619
+ wasm_f32x4_extract_lane(x[0], 1) + \
620
+ wasm_f32x4_extract_lane(x[0], 2) + \
621
+ wasm_f32x4_extract_lane(x[0], 3)); \
622
+ }
623
+
624
+ #define GGML_F16_VEC GGML_F16x4
625
+ #define GGML_F16_VEC_ZERO GGML_F16x4_ZERO
626
+ #define GGML_F16_VEC_SET1 GGML_F16x4_SET1
627
+ #define GGML_F16_VEC_LOAD(p, i) GGML_F16x4_LOAD(p)
628
+ #define GGML_F16_VEC_STORE(p, r, i) GGML_F16x4_STORE(p, r[i])
629
+ #define GGML_F16_VEC_FMA GGML_F16x4_FMA
630
+ #define GGML_F16_VEC_ADD GGML_F16x4_ADD
631
+ #define GGML_F16_VEC_MUL GGML_F16x4_MUL
632
+ #define GGML_F16_VEC_REDUCE GGML_F16x4_REDUCE
633
+
634
+ #elif defined(__SSE3__)
635
+
636
+ #define GGML_SIMD
637
+
638
+ // F32 SSE
639
+
640
+ #define GGML_F32_STEP 32
641
+ #define GGML_F32_EPR 4
642
+
643
+ #define GGML_F32x4 __m128
644
+ #define GGML_F32x4_ZERO _mm_setzero_ps()
645
+ #define GGML_F32x4_SET1(x) _mm_set1_ps(x)
646
+ #define GGML_F32x4_LOAD _mm_loadu_ps
647
+ #define GGML_F32x4_STORE _mm_storeu_ps
648
+ #if defined(__FMA__)
649
+ // TODO: Does this work?
650
+ #define GGML_F32x4_FMA(a, b, c) _mm_fmadd_ps(b, c, a)
651
+ #else
652
+ #define GGML_F32x4_FMA(a, b, c) _mm_add_ps(_mm_mul_ps(b, c), a)
653
+ #endif
654
+ #define GGML_F32x4_ADD _mm_add_ps
655
+ #define GGML_F32x4_MUL _mm_mul_ps
656
+ #define GGML_F32x4_REDUCE(res, x) \
657
+ { \
658
+ int offset = GGML_F32_ARR >> 1; \
659
+ for (int i = 0; i < offset; ++i) { \
660
+ x[i] = _mm_add_ps(x[i], x[offset+i]); \
661
+ } \
662
+ offset >>= 1; \
663
+ for (int i = 0; i < offset; ++i) { \
664
+ x[i] = _mm_add_ps(x[i], x[offset+i]); \
665
+ } \
666
+ offset >>= 1; \
667
+ for (int i = 0; i < offset; ++i) { \
668
+ x[i] = _mm_add_ps(x[i], x[offset+i]); \
669
+ } \
670
+ const __m128 t0 = _mm_hadd_ps(x[0], x[0]); \
671
+ res = (ggml_float) _mm_cvtss_f32(_mm_hadd_ps(t0, t0)); \
672
+ }
673
+ // TODO: is this optimal ?
674
+
675
+ #define GGML_F32_VEC GGML_F32x4
676
+ #define GGML_F32_VEC_ZERO GGML_F32x4_ZERO
677
+ #define GGML_F32_VEC_SET1 GGML_F32x4_SET1
678
+ #define GGML_F32_VEC_LOAD GGML_F32x4_LOAD
679
+ #define GGML_F32_VEC_STORE GGML_F32x4_STORE
680
+ #define GGML_F32_VEC_FMA GGML_F32x4_FMA
681
+ #define GGML_F32_VEC_ADD GGML_F32x4_ADD
682
+ #define GGML_F32_VEC_MUL GGML_F32x4_MUL
683
+ #define GGML_F32_VEC_REDUCE GGML_F32x4_REDUCE
684
+
685
+ // F16 SSE
686
+
687
+ #define GGML_F16_STEP 32
688
+ #define GGML_F16_EPR 4
689
+
690
+ static inline __m128 __sse_f16x4_load(const ggml_fp16_t * x) {
691
+ float tmp[4];
692
+
693
+ tmp[0] = GGML_FP16_TO_FP32(x[0]);
694
+ tmp[1] = GGML_FP16_TO_FP32(x[1]);
695
+ tmp[2] = GGML_FP16_TO_FP32(x[2]);
696
+ tmp[3] = GGML_FP16_TO_FP32(x[3]);
697
+
698
+ return _mm_loadu_ps(tmp);
699
+ }
700
+
701
+ static inline void __sse_f16x4_store(ggml_fp16_t * x, __m128 y) {
702
+ float arr[4];
703
+
704
+ _mm_storeu_ps(arr, y);
705
+
706
+ x[0] = GGML_FP32_TO_FP16(arr[0]);
707
+ x[1] = GGML_FP32_TO_FP16(arr[1]);
708
+ x[2] = GGML_FP32_TO_FP16(arr[2]);
709
+ x[3] = GGML_FP32_TO_FP16(arr[3]);
710
+ }
711
+
712
+ #define GGML_F32Cx4 __m128
713
+ #define GGML_F32Cx4_ZERO _mm_setzero_ps()
714
+ #define GGML_F32Cx4_SET1(x) _mm_set1_ps(x)
715
+ #define GGML_F32Cx4_LOAD(x) __sse_f16x4_load(x)
716
+ #define GGML_F32Cx4_STORE(x, y) __sse_f16x4_store(x, y)
717
+ #define GGML_F32Cx4_FMA GGML_F32x4_FMA
718
+ #define GGML_F32Cx4_ADD _mm_add_ps
719
+ #define GGML_F32Cx4_MUL _mm_mul_ps
720
+ #define GGML_F32Cx4_REDUCE GGML_F32x4_REDUCE
721
+
722
+ #define GGML_F16_VEC GGML_F32Cx4
723
+ #define GGML_F16_VEC_ZERO GGML_F32Cx4_ZERO
724
+ #define GGML_F16_VEC_SET1 GGML_F32Cx4_SET1
725
+ #define GGML_F16_VEC_LOAD(p, i) GGML_F32Cx4_LOAD(p)
726
+ #define GGML_F16_VEC_STORE(p, r, i) GGML_F32Cx4_STORE(p, r[i])
727
+ #define GGML_F16_VEC_FMA GGML_F32Cx4_FMA
728
+ #define GGML_F16_VEC_ADD GGML_F32Cx4_ADD
729
+ #define GGML_F16_VEC_MUL GGML_F32Cx4_MUL
730
+ #define GGML_F16_VEC_REDUCE GGML_F32Cx4_REDUCE
731
+
732
+ #elif defined(__loongarch_asx)
733
+
734
+ #define GGML_SIMD
735
+
736
+ // F32 LASX
737
+ #define GGML_F32_STEP 32
738
+ #define GGML_F32_EPR 8
739
+
740
+ #define GGML_F32x8 __m256
741
+ #define GGML_F32x8_ZERO (__m256)__lasx_xvldi(0)
742
+ #define GGML_F32x8_SET1(x) (__m256)__lasx_xvreplfr2vr_s((x))
743
+ #define GGML_F32x8_LOAD(x) (__m256)__lasx_xvld((x), 0)
744
+ #define GGML_F32x8_STORE(x,y) __lasx_xvst((y), (x), 0)
745
+ #define GGML_F32x8_FMA(a, b, c) __lasx_xvfmadd_s(b, c, a)
746
+ #define GGML_F32x8_ADD __lasx_xvfadd_s
747
+ #define GGML_F32x8_MUL __lasx_xvfmul_s
748
+ #define GGML_F32x8_REDUCE(res, x) \
749
+ do { \
750
+ int offset = GGML_F32_ARR >> 1; \
751
+ for (int i = 0; i < offset; ++i) { \
752
+ x[i] = __lasx_xvfadd_s(x[i], x[offset+i]); \
753
+ } \
754
+ offset >>= 1; \
755
+ for (int i = 0; i < offset; ++i) { \
756
+ x[i] = __lasx_xvfadd_s(x[i], x[offset+i]); \
757
+ } \
758
+ offset >>= 1; \
759
+ for (int i = 0; i < offset; ++i) { \
760
+ x[i] = __lasx_xvfadd_s(x[i], x[offset+i]); \
761
+ } \
762
+ float *tmp_p = (float *)&x[0]; \
763
+ res = tmp_p[0] + tmp_p[1] + tmp_p[2] + tmp_p[3] + tmp_p[4] + tmp_p[5] + tmp_p[6] + tmp_p[7]; \
764
+ } while (0)
765
+ // TODO: is this optimal ?
766
+
767
+ #define GGML_F32_VEC GGML_F32x8
768
+ #define GGML_F32_VEC_ZERO GGML_F32x8_ZERO
769
+ #define GGML_F32_VEC_SET1 GGML_F32x8_SET1
770
+ #define GGML_F32_VEC_LOAD GGML_F32x8_LOAD
771
+ #define GGML_F32_VEC_STORE GGML_F32x8_STORE
772
+ #define GGML_F32_VEC_FMA GGML_F32x8_FMA
773
+ #define GGML_F32_VEC_ADD GGML_F32x8_ADD
774
+ #define GGML_F32_VEC_MUL GGML_F32x8_MUL
775
+ #define GGML_F32_VEC_REDUCE GGML_F32x8_REDUCE
776
+
777
+ // F16 LASX
778
+
779
+ #define GGML_F16_STEP 32
780
+ #define GGML_F16_EPR 8
781
+
782
+ // F16 arithmetic is not supported by LASX, so we use F32 instead
783
+
784
+ #define GGML_F32Cx8 __m256
785
+ #define GGML_F32Cx8_ZERO (__m256)__lasx_xvldi(0)
786
+ #define GGML_F32Cx8_SET1(x) (__m256)__lasx_xvreplgr2vr_w((x))
787
+
788
+ static inline __m256 __lasx_f32cx8_load(const ggml_fp16_t * x) {
789
+ __m256i a;
790
+ memcpy(&a, x, sizeof(ggml_fp16_t) * 8);
791
+ a = __lasx_xvpermi_d(a, 0 | (1 << 4));
792
+ return __lasx_xvfcvtl_s_h(a);
793
+ }
794
+
795
+ static inline void __lasx_f32cx8_store(ggml_fp16_t * x, __m256 y) {
796
+ __m256i a = __lasx_xvfcvt_h_s(y, y);
797
+ a = __lasx_xvpermi_d(a, 0 | (2 << 2));
798
+ memcpy(x, &a, sizeof(ggml_fp16_t) * 8);
799
+ }
800
+ #define GGML_F32Cx8_LOAD(x) __lasx_f32cx8_load(x)
801
+ #define GGML_F32Cx8_STORE(x, y) __lasx_f32cx8_store(x, y)
802
+
803
+ #define GGML_F32Cx8_FMA GGML_F32x8_FMA
804
+ #define GGML_F32Cx8_ADD __lasx_xvfadd_s
805
+ #define GGML_F32Cx8_MUL __lasx_xvfmul_s
806
+ #define GGML_F32Cx8_REDUCE GGML_F32x8_REDUCE
807
+
808
+ #define GGML_F16_VEC GGML_F32Cx8
809
+ #define GGML_F16_VEC_ZERO GGML_F32Cx8_ZERO
810
+ #define GGML_F16_VEC_SET1 GGML_F32Cx8_SET1
811
+ #define GGML_F16_VEC_LOAD(p, i) GGML_F32Cx8_LOAD(p)
812
+ #define GGML_F16_VEC_STORE(p, r, i) GGML_F32Cx8_STORE(p, r[i])
813
+ #define GGML_F16_VEC_FMA GGML_F32Cx8_FMA
814
+ #define GGML_F16_VEC_ADD GGML_F32Cx8_ADD
815
+ #define GGML_F16_VEC_MUL GGML_F32Cx8_MUL
816
+ #define GGML_F16_VEC_REDUCE GGML_F32Cx8_REDUCE
817
+
818
+ #elif defined(__loongarch_sx)
819
+
820
+ #define GGML_SIMD
821
+
822
+ // F32 LSX
823
+
824
+ #define GGML_F32_STEP 32
825
+ #define GGML_F32_EPR 4
826
+
827
+ #define GGML_F32x4 __m128
828
+ #define GGML_F32x4_ZERO __lsx_vldi(0)
829
+ #define GGML_F32x4_SET1(x) __lsx_vinsgr2vr_w(__lsx_vldi(0),(x), 0)
830
+ #define GGML_F32x4_LOAD(x) __lsx_vld((x), 0)
831
+ #define GGML_F32x4_STORE((x),(y)) __lsx_vst((y), (x), 0)
832
+ #define GGML_F32x4_FMA(a, b, c) __lsx_vfmadd_s(b, c, a)
833
+ #define GGML_F32x4_ADD __lsx_vfadd_s
834
+ #define GGML_F32x4_MUL __lsx_vfmul_s
835
+ #define GGML_F32x4_REDUCE(res, x) \
836
+ { \
837
+ int offset = GGML_F32_ARR >> 1; \
838
+ for (int i = 0; i < offset; ++i) { \
839
+ x[i] = __lsx_vfadd_s(x[i], x[offset + i]); \
840
+ } \
841
+ offset >>= 1; \
842
+ for (int i = 0; i < offset; ++i) { \
843
+ x[i] = __lsx_vfadd_s(x[i], x[offset + i]); \
844
+ } \
845
+ offset >>= 1; \
846
+ for (int i = 0; i < offset; ++i) { \
847
+ x[i] = __lsx_vfadd_s(x[i], x[offset + i]); \
848
+ } \
849
+ __m128i tmp = __lsx_vsrli_d((__m128i) x[0], 32); \
850
+ tmp = (__m128i) __lsx_vfadd_s((__m128) tmp, x[0]); \
851
+ tmp = __lsx_vpickev_w(__lsx_vldi(0), tmp); \
852
+ const __m128 t0 = __lsx_vshuf4i_w(tmp, 0x88); \
853
+ tmp = __lsx_vsrli_d((__m128i) t0, 32); \
854
+ tmp = (__m128i) __lsx_vfadd_s((__m128) tmp, t0); \
855
+ tmp = __lsx_vpickev_w(__lsx_vldi(0), tmp); \
856
+ res = (ggml_float) __lsx_vpickve2gr_w(__lsx_vshuf4i_w(tmp, 0x88), 0); \
857
+ }
858
+
859
+ #define GGML_F32_VEC GGML_F32x4
860
+ #define GGML_F32_VEC_ZERO GGML_F32x4_ZERO
861
+ #define GGML_F32_VEC_SET1 GGML_F32x4_SET1
862
+ #define GGML_F32_VEC_LOAD GGML_F32x4_LOAD
863
+ #define GGML_F32_VEC_STORE GGML_F32x4_STORE
864
+ #define GGML_F32_VEC_FMA GGML_F32x4_FMA
865
+ #define GGML_F32_VEC_ADD GGML_F32x4_ADD
866
+ #define GGML_F32_VEC_MUL GGML_F32x4_MUL
867
+ #define GGML_F32_VEC_REDUCE GGML_F32x4_REDUCE
868
+
869
+ // F16 LSX
870
+
871
+ #define GGML_F16_STEP 32
872
+ #define GGML_F16_EPR 4
873
+
874
+ static inline __m128 __lsx_f16x4_load(const ggml_fp16_t * x) {
875
+ float tmp[4];
876
+
877
+ tmp[0] = GGML_FP16_TO_FP32(x[0]);
878
+ tmp[1] = GGML_FP16_TO_FP32(x[1]);
879
+ tmp[2] = GGML_FP16_TO_FP32(x[2]);
880
+ tmp[3] = GGML_FP16_TO_FP32(x[3]);
881
+
882
+ return __lsx_vld(tmp, 0);
883
+ }
884
+
885
+ static inline void __lsx_f16x4_store(ggml_fp16_t * x, __m128 y) {
886
+ float arr[4];
887
+
888
+ __lsx_vst(y, arr, 0);
889
+
890
+ x[0] = GGML_FP32_TO_FP16(arr[0]);
891
+ x[1] = GGML_FP32_TO_FP16(arr[1]);
892
+ x[2] = GGML_FP32_TO_FP16(arr[2]);
893
+ x[3] = GGML_FP32_TO_FP16(arr[3]);
894
+ }
895
+
896
+ #define GGML_F32Cx4 __m128
897
+ #define GGML_F32Cx4_ZERO __lsx_vldi(0)
898
+ #define GGML_F32Cx4_SET1(x) __lsx_vinsgr2vr_w(__lsx_vldi(0),(x), 0)
899
+ #define GGML_F32Cx4_LOAD(x) __lsx_f16x4_load(x)
900
+ #define GGML_F32Cx4_STORE(x, y) __lsx_f16x4_store(x, y)
901
+ #define GGML_F32Cx4_FMA GGML_F32x4_FMA
902
+ #define GGML_F32Cx4_ADD __lsx_vfadd_s
903
+ #define GGML_F32Cx4_MUL __lsx_vfmul_s
904
+ #define GGML_F32Cx4_REDUCE GGML_F32x4_REDUCE
905
+
906
+ #define GGML_F16_VEC GGML_F32Cx4
907
+ #define GGML_F16_VEC_ZERO GGML_F32Cx4_ZERO
908
+ #define GGML_F16_VEC_SET1 GGML_F32Cx4_SET1
909
+ #define GGML_F16_VEC_LOAD(p, i) GGML_F32Cx4_LOAD(p)
910
+ #define GGML_F16_VEC_STORE(p, r, i) GGML_F32Cx4_STORE(p, r[i])
911
+ #define GGML_F16_VEC_FMA GGML_F32Cx4_FMA
912
+ #define GGML_F16_VEC_ADD GGML_F32Cx4_ADD
913
+ #define GGML_F16_VEC_MUL GGML_F32Cx4_MUL
914
+ #define GGML_F16_VEC_REDUCE GGML_F32Cx4_REDUCE
915
+
916
+ #elif defined(__VXE__) || defined(__VXE2__)
917
+
918
+ #define GGML_SIMD
919
+
920
+ // F32 s390x
921
+
922
+ #define GGML_F32_STEP 32
923
+ #define GGML_F32_EPR 4
924
+
925
+ #define GGML_F32x4 __vector float
926
+ #define GGML_F32x4_ZERO vec_splats(0.0f)
927
+ #define GGML_F32x4_SET1 vec_splats
928
+ #define GGML_F32x4_LOAD(p) vec_xl(0, p)
929
+ #define GGML_F32x4_STORE(p, r) vec_xst(r, 0, p)
930
+ #define GGML_F32x4_FMA(a, b, c) vec_madd(b, c, a)
931
+ #define GGML_F32x4_ADD vec_add
932
+ #define GGML_F32x4_MUL vec_mul
933
+ #define GGML_F32x4_REDUCE(res, x) \
934
+ { \
935
+ int offset = GGML_F32_ARR >> 1; \
936
+ for (int i = 0; i < offset; ++i) { \
937
+ x[i] = vec_add(x[i], x[offset + i]); \
938
+ } \
939
+ offset >>= 1; \
940
+ for (int i = 0; i < offset; ++i) { \
941
+ x[i] = vec_add(x[i], x[offset + i]); \
942
+ } \
943
+ offset >>= 1; \
944
+ for (int i = 0; i < offset; ++i) { \
945
+ x[i] = vec_add(x[i], x[offset + i]); \
946
+ } \
947
+ float32x4_t tmp = x[0] + vec_reve(x[0]); \
948
+ res = tmp[0] + tmp[1]; \
949
+ }
950
+
951
+ #define GGML_F32_VEC GGML_F32x4
952
+ #define GGML_F32_VEC_ZERO GGML_F32x4_ZERO
953
+ #define GGML_F32_VEC_SET1 GGML_F32x4_SET1
954
+ #define GGML_F32_VEC_LOAD GGML_F32x4_LOAD
955
+ #define GGML_F32_VEC_STORE GGML_F32x4_STORE
956
+ #define GGML_F32_VEC_FMA GGML_F32x4_FMA
957
+ #define GGML_F32_VEC_ADD GGML_F32x4_ADD
958
+ #define GGML_F32_VEC_MUL GGML_F32x4_MUL
959
+ #define GGML_F32_VEC_REDUCE GGML_F32x4_REDUCE
960
+
961
+ // F16 s390x
962
+ #define GGML_F16_STEP GGML_F32_STEP
963
+ #define GGML_F16_EPR GGML_F32_EPR
964
+
965
+ static inline __vector float __lzs_f16cx4_load(const ggml_fp16_t * x) {
966
+ float tmp[4];
967
+
968
+ for (int i = 0; i < 4; i++) {
969
+ tmp[i] = GGML_FP16_TO_FP32(x[i]);
970
+ }
971
+
972
+ // note: keep type-cast here to prevent compiler bugs
973
+ // see: https://github.com/ggml-org/llama.cpp/issues/12846
974
+ return vec_xl(0, (const float *)(tmp));
975
+ }
976
+
977
+ static inline void __lzs_f16cx4_store(ggml_fp16_t * x, __vector float y) {
978
+ float arr[4];
979
+
980
+ // note: keep type-cast here to prevent compiler bugs
981
+ // see: https://github.com/ggml-org/llama.cpp/issues/12846
982
+ vec_xst(y, 0, (float *)(arr));
983
+
984
+ for (int i = 0; i < 4; i++) {
985
+ x[i] = GGML_FP32_TO_FP16(arr[i]);
986
+ }
987
+ }
988
+
989
+ #define GGML_F16_VEC GGML_F32x4
990
+ #define GGML_F16_VEC_ZERO GGML_F32x4_ZERO
991
+ #define GGML_F16_VEC_SET1 GGML_F32x4_SET1
992
+ #define GGML_F16_VEC_LOAD(p, i) __lzs_f16cx4_load(p)
993
+ #define GGML_F16_VEC_STORE(p, r, i) __lzs_f16cx4_store(p, r[i])
994
+ #define GGML_F16_VEC_FMA GGML_F32x4_FMA
995
+ #define GGML_F16_VEC_ADD GGML_F32x4_ADD
996
+ #define GGML_F16_VEC_MUL GGML_F32x4_MUL
997
+ #define GGML_F16_VEC_REDUCE GGML_F32x4_REDUCE
998
+
999
+ #endif
1000
+
1001
+ // GGML_F32_ARR / GGML_F16_ARR
1002
+ // number of registers to use per step
1003
+ #ifdef GGML_SIMD
1004
+ #define GGML_F32_ARR (GGML_F32_STEP/GGML_F32_EPR)
1005
+ #define GGML_F16_ARR (GGML_F16_STEP/GGML_F16_EPR)
1006
+ #endif