@agency-lang/whisper-local 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (643) hide show
  1. package/CMakeLists.txt +51 -0
  2. package/README.md +145 -0
  3. package/build/Release/whisper_addon.node +0 -0
  4. package/dist/src/addon.d.ts +11 -0
  5. package/dist/src/addon.js +22 -0
  6. package/dist/src/cli.d.ts +2 -0
  7. package/dist/src/cli.js +117 -0
  8. package/dist/src/ffmpeg.d.ts +11 -0
  9. package/dist/src/ffmpeg.js +154 -0
  10. package/dist/src/handleCache.d.ts +9 -0
  11. package/dist/src/handleCache.js +83 -0
  12. package/dist/src/modelManager.d.ts +12 -0
  13. package/dist/src/modelManager.js +172 -0
  14. package/dist/src/packageRoot.d.ts +8 -0
  15. package/dist/src/packageRoot.js +21 -0
  16. package/dist/src/transcribe.d.ts +2 -0
  17. package/dist/src/transcribe.js +36 -0
  18. package/dist/src/types.d.ts +11 -0
  19. package/dist/src/types.js +17 -0
  20. package/index.agency +32 -0
  21. package/models.lock.json +55 -0
  22. package/package.json +52 -0
  23. package/vendor/whisper.cpp/CMakeLists.txt +251 -0
  24. package/vendor/whisper.cpp/LICENSE +21 -0
  25. package/vendor/whisper.cpp/UPSTREAM_SHA256 +1 -0
  26. package/vendor/whisper.cpp/VERSION +1 -0
  27. package/vendor/whisper.cpp/cmake/DefaultTargetOptions.cmake +16 -0
  28. package/vendor/whisper.cpp/cmake/FindFFmpeg.cmake +163 -0
  29. package/vendor/whisper.cpp/cmake/build-info.cmake +60 -0
  30. package/vendor/whisper.cpp/cmake/git-vars.cmake +22 -0
  31. package/vendor/whisper.cpp/cmake/whisper-config.cmake.in +65 -0
  32. package/vendor/whisper.cpp/cmake/whisper.pc.in +10 -0
  33. package/vendor/whisper.cpp/ggml/CMakeLists.txt +434 -0
  34. package/vendor/whisper.cpp/ggml/cmake/BuildTypes.cmake +54 -0
  35. package/vendor/whisper.cpp/ggml/cmake/GitVars.cmake +22 -0
  36. package/vendor/whisper.cpp/ggml/cmake/common.cmake +50 -0
  37. package/vendor/whisper.cpp/ggml/cmake/ggml-config.cmake.in +152 -0
  38. package/vendor/whisper.cpp/ggml/include/ggml-alloc.h +76 -0
  39. package/vendor/whisper.cpp/ggml/include/ggml-backend.h +354 -0
  40. package/vendor/whisper.cpp/ggml/include/ggml-blas.h +25 -0
  41. package/vendor/whisper.cpp/ggml/include/ggml-cann.h +123 -0
  42. package/vendor/whisper.cpp/ggml/include/ggml-cpp.h +39 -0
  43. package/vendor/whisper.cpp/ggml/include/ggml-cpu.h +143 -0
  44. package/vendor/whisper.cpp/ggml/include/ggml-cuda.h +47 -0
  45. package/vendor/whisper.cpp/ggml/include/ggml-kompute.h +50 -0
  46. package/vendor/whisper.cpp/ggml/include/ggml-metal.h +66 -0
  47. package/vendor/whisper.cpp/ggml/include/ggml-opencl.h +26 -0
  48. package/vendor/whisper.cpp/ggml/include/ggml-opt.h +237 -0
  49. package/vendor/whisper.cpp/ggml/include/ggml-rpc.h +33 -0
  50. package/vendor/whisper.cpp/ggml/include/ggml-sycl.h +49 -0
  51. package/vendor/whisper.cpp/ggml/include/ggml-vulkan.h +29 -0
  52. package/vendor/whisper.cpp/ggml/include/ggml.h +2221 -0
  53. package/vendor/whisper.cpp/ggml/include/gguf.h +202 -0
  54. package/vendor/whisper.cpp/ggml/src/CMakeLists.txt +404 -0
  55. package/vendor/whisper.cpp/ggml/src/ggml-alloc.c +1042 -0
  56. package/vendor/whisper.cpp/ggml/src/ggml-amx/CMakeLists.txt +107 -0
  57. package/vendor/whisper.cpp/ggml/src/ggml-amx/common.h +94 -0
  58. package/vendor/whisper.cpp/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
  59. package/vendor/whisper.cpp/ggml/src/ggml-amx/mmq.cpp +2510 -0
  60. package/vendor/whisper.cpp/ggml/src/ggml-amx/mmq.h +17 -0
  61. package/vendor/whisper.cpp/ggml/src/ggml-backend-impl.h +255 -0
  62. package/vendor/whisper.cpp/ggml/src/ggml-backend-reg.cpp +591 -0
  63. package/vendor/whisper.cpp/ggml/src/ggml-backend.cpp +2016 -0
  64. package/vendor/whisper.cpp/ggml/src/ggml-blas/CMakeLists.txt +87 -0
  65. package/vendor/whisper.cpp/ggml/src/ggml-blas/ggml-blas.cpp +517 -0
  66. package/vendor/whisper.cpp/ggml/src/ggml-cann/CMakeLists.txt +75 -0
  67. package/vendor/whisper.cpp/ggml/src/ggml-cann/Doxyfile +2579 -0
  68. package/vendor/whisper.cpp/ggml/src/ggml-cann/acl_tensor.cpp +181 -0
  69. package/vendor/whisper.cpp/ggml/src/ggml-cann/acl_tensor.h +258 -0
  70. package/vendor/whisper.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +3193 -0
  71. package/vendor/whisper.cpp/ggml/src/ggml-cann/aclnn_ops.h +1125 -0
  72. package/vendor/whisper.cpp/ggml/src/ggml-cann/common.h +425 -0
  73. package/vendor/whisper.cpp/ggml/src/ggml-cann/ggml-cann.cpp +2630 -0
  74. package/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +30 -0
  75. package/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +19 -0
  76. package/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/dup.cpp +234 -0
  77. package/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +197 -0
  78. package/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +190 -0
  79. package/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +204 -0
  80. package/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
  81. package/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +218 -0
  82. package/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +216 -0
  83. package/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +295 -0
  84. package/vendor/whisper.cpp/ggml/src/ggml-common.h +1861 -0
  85. package/vendor/whisper.cpp/ggml/src/ggml-cpu/CMakeLists.txt +584 -0
  86. package/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/amx.cpp +221 -0
  87. package/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
  88. package/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
  89. package/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
  90. package/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  91. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
  92. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4113 -0
  93. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +2162 -0
  94. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2638 -0
  95. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
  96. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2731 -0
  97. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2068 -0
  98. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +396 -0
  99. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1299 -0
  100. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1480 -0
  101. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  102. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +4310 -0
  103. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +3284 -0
  104. package/vendor/whisper.cpp/ggml/src/ggml-cpu/arch-fallback.h +184 -0
  105. package/vendor/whisper.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
  106. package/vendor/whisper.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
  107. package/vendor/whisper.cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
  108. package/vendor/whisper.cpp/ggml/src/ggml-cpu/common.h +72 -0
  109. package/vendor/whisper.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +511 -0
  110. package/vendor/whisper.cpp/ggml/src/ggml-cpu/ggml-cpu.c +3473 -0
  111. package/vendor/whisper.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +671 -0
  112. package/vendor/whisper.cpp/ggml/src/ggml-cpu/hbm.cpp +55 -0
  113. package/vendor/whisper.cpp/ggml/src/ggml-cpu/hbm.h +8 -0
  114. package/vendor/whisper.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +337 -0
  115. package/vendor/whisper.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +95 -0
  116. package/vendor/whisper.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +482 -0
  117. package/vendor/whisper.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
  118. package/vendor/whisper.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +3593 -0
  119. package/vendor/whisper.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +19 -0
  120. package/vendor/whisper.cpp/ggml/src/ggml-cpu/ops.cpp +9085 -0
  121. package/vendor/whisper.cpp/ggml/src/ggml-cpu/ops.h +111 -0
  122. package/vendor/whisper.cpp/ggml/src/ggml-cpu/quants.c +1157 -0
  123. package/vendor/whisper.cpp/ggml/src/ggml-cpu/quants.h +89 -0
  124. package/vendor/whisper.cpp/ggml/src/ggml-cpu/repack.cpp +1570 -0
  125. package/vendor/whisper.cpp/ggml/src/ggml-cpu/repack.h +98 -0
  126. package/vendor/whisper.cpp/ggml/src/ggml-cpu/simd-mappings.h +1006 -0
  127. package/vendor/whisper.cpp/ggml/src/ggml-cpu/traits.cpp +36 -0
  128. package/vendor/whisper.cpp/ggml/src/ggml-cpu/traits.h +38 -0
  129. package/vendor/whisper.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
  130. package/vendor/whisper.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
  131. package/vendor/whisper.cpp/ggml/src/ggml-cpu/vec.cpp +321 -0
  132. package/vendor/whisper.cpp/ggml/src/ggml-cpu/vec.h +973 -0
  133. package/vendor/whisper.cpp/ggml/src/ggml-cuda/CMakeLists.txt +184 -0
  134. package/vendor/whisper.cpp/ggml/src/ggml-cuda/acc.cu +61 -0
  135. package/vendor/whisper.cpp/ggml/src/ggml-cuda/acc.cuh +5 -0
  136. package/vendor/whisper.cpp/ggml/src/ggml-cuda/arange.cu +34 -0
  137. package/vendor/whisper.cpp/ggml/src/ggml-cuda/arange.cuh +5 -0
  138. package/vendor/whisper.cpp/ggml/src/ggml-cuda/argmax.cu +91 -0
  139. package/vendor/whisper.cpp/ggml/src/ggml-cuda/argmax.cuh +3 -0
  140. package/vendor/whisper.cpp/ggml/src/ggml-cuda/argsort.cu +104 -0
  141. package/vendor/whisper.cpp/ggml/src/ggml-cuda/argsort.cuh +3 -0
  142. package/vendor/whisper.cpp/ggml/src/ggml-cuda/binbcast.cu +363 -0
  143. package/vendor/whisper.cpp/ggml/src/ggml-cuda/binbcast.cuh +9 -0
  144. package/vendor/whisper.cpp/ggml/src/ggml-cuda/clamp.cu +45 -0
  145. package/vendor/whisper.cpp/ggml/src/ggml-cuda/clamp.cuh +5 -0
  146. package/vendor/whisper.cpp/ggml/src/ggml-cuda/common.cuh +812 -0
  147. package/vendor/whisper.cpp/ggml/src/ggml-cuda/concat.cu +221 -0
  148. package/vendor/whisper.cpp/ggml/src/ggml-cuda/concat.cuh +5 -0
  149. package/vendor/whisper.cpp/ggml/src/ggml-cuda/conv-transpose-1d.cu +89 -0
  150. package/vendor/whisper.cpp/ggml/src/ggml-cuda/conv-transpose-1d.cuh +5 -0
  151. package/vendor/whisper.cpp/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
  152. package/vendor/whisper.cpp/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
  153. package/vendor/whisper.cpp/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
  154. package/vendor/whisper.cpp/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
  155. package/vendor/whisper.cpp/ggml/src/ggml-cuda/convert.cu +730 -0
  156. package/vendor/whisper.cpp/ggml/src/ggml-cuda/convert.cuh +26 -0
  157. package/vendor/whisper.cpp/ggml/src/ggml-cuda/count-equal.cu +64 -0
  158. package/vendor/whisper.cpp/ggml/src/ggml-cuda/count-equal.cuh +5 -0
  159. package/vendor/whisper.cpp/ggml/src/ggml-cuda/cp-async.cuh +57 -0
  160. package/vendor/whisper.cpp/ggml/src/ggml-cuda/cpy.cu +705 -0
  161. package/vendor/whisper.cpp/ggml/src/ggml-cuda/cpy.cuh +11 -0
  162. package/vendor/whisper.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cu +189 -0
  163. package/vendor/whisper.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cuh +7 -0
  164. package/vendor/whisper.cpp/ggml/src/ggml-cuda/dequantize.cuh +103 -0
  165. package/vendor/whisper.cpp/ggml/src/ggml-cuda/diagmask.cu +40 -0
  166. package/vendor/whisper.cpp/ggml/src/ggml-cuda/diagmask.cuh +5 -0
  167. package/vendor/whisper.cpp/ggml/src/ggml-cuda/fattn-common.cuh +881 -0
  168. package/vendor/whisper.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +1474 -0
  169. package/vendor/whisper.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu +357 -0
  170. package/vendor/whisper.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cuh +3 -0
  171. package/vendor/whisper.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu +365 -0
  172. package/vendor/whisper.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cuh +3 -0
  173. package/vendor/whisper.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +482 -0
  174. package/vendor/whisper.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +472 -0
  175. package/vendor/whisper.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +634 -0
  176. package/vendor/whisper.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +3 -0
  177. package/vendor/whisper.cpp/ggml/src/ggml-cuda/fattn.cu +346 -0
  178. package/vendor/whisper.cpp/ggml/src/ggml-cuda/fattn.cuh +3 -0
  179. package/vendor/whisper.cpp/ggml/src/ggml-cuda/getrows.cu +275 -0
  180. package/vendor/whisper.cpp/ggml/src/ggml-cuda/getrows.cuh +15 -0
  181. package/vendor/whisper.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +3562 -0
  182. package/vendor/whisper.cpp/ggml/src/ggml-cuda/gla.cu +93 -0
  183. package/vendor/whisper.cpp/ggml/src/ggml-cuda/gla.cuh +3 -0
  184. package/vendor/whisper.cpp/ggml/src/ggml-cuda/im2col.cu +103 -0
  185. package/vendor/whisper.cpp/ggml/src/ggml-cuda/im2col.cuh +5 -0
  186. package/vendor/whisper.cpp/ggml/src/ggml-cuda/mma.cuh +396 -0
  187. package/vendor/whisper.cpp/ggml/src/ggml-cuda/mmq.cu +324 -0
  188. package/vendor/whisper.cpp/ggml/src/ggml-cuda/mmq.cuh +3217 -0
  189. package/vendor/whisper.cpp/ggml/src/ggml-cuda/mmv.cu +336 -0
  190. package/vendor/whisper.cpp/ggml/src/ggml-cuda/mmv.cuh +12 -0
  191. package/vendor/whisper.cpp/ggml/src/ggml-cuda/mmvq.cu +595 -0
  192. package/vendor/whisper.cpp/ggml/src/ggml-cuda/mmvq.cuh +12 -0
  193. package/vendor/whisper.cpp/ggml/src/ggml-cuda/norm.cu +458 -0
  194. package/vendor/whisper.cpp/ggml/src/ggml-cuda/norm.cuh +11 -0
  195. package/vendor/whisper.cpp/ggml/src/ggml-cuda/opt-step-adamw.cu +78 -0
  196. package/vendor/whisper.cpp/ggml/src/ggml-cuda/opt-step-adamw.cuh +5 -0
  197. package/vendor/whisper.cpp/ggml/src/ggml-cuda/out-prod.cu +68 -0
  198. package/vendor/whisper.cpp/ggml/src/ggml-cuda/out-prod.cuh +3 -0
  199. package/vendor/whisper.cpp/ggml/src/ggml-cuda/pad.cu +49 -0
  200. package/vendor/whisper.cpp/ggml/src/ggml-cuda/pad.cuh +5 -0
  201. package/vendor/whisper.cpp/ggml/src/ggml-cuda/pool2d.cu +94 -0
  202. package/vendor/whisper.cpp/ggml/src/ggml-cuda/pool2d.cuh +5 -0
  203. package/vendor/whisper.cpp/ggml/src/ggml-cuda/quantize.cu +190 -0
  204. package/vendor/whisper.cpp/ggml/src/ggml-cuda/quantize.cuh +27 -0
  205. package/vendor/whisper.cpp/ggml/src/ggml-cuda/rope.cu +456 -0
  206. package/vendor/whisper.cpp/ggml/src/ggml-cuda/rope.cuh +7 -0
  207. package/vendor/whisper.cpp/ggml/src/ggml-cuda/scale.cu +31 -0
  208. package/vendor/whisper.cpp/ggml/src/ggml-cuda/scale.cuh +5 -0
  209. package/vendor/whisper.cpp/ggml/src/ggml-cuda/softmax.cu +283 -0
  210. package/vendor/whisper.cpp/ggml/src/ggml-cuda/softmax.cuh +7 -0
  211. package/vendor/whisper.cpp/ggml/src/ggml-cuda/ssm-conv.cu +148 -0
  212. package/vendor/whisper.cpp/ggml/src/ggml-cuda/ssm-conv.cuh +3 -0
  213. package/vendor/whisper.cpp/ggml/src/ggml-cuda/ssm-scan.cu +155 -0
  214. package/vendor/whisper.cpp/ggml/src/ggml-cuda/ssm-scan.cuh +3 -0
  215. package/vendor/whisper.cpp/ggml/src/ggml-cuda/sum.cu +45 -0
  216. package/vendor/whisper.cpp/ggml/src/ggml-cuda/sum.cuh +5 -0
  217. package/vendor/whisper.cpp/ggml/src/ggml-cuda/sumrows.cu +39 -0
  218. package/vendor/whisper.cpp/ggml/src/ggml-cuda/sumrows.cuh +5 -0
  219. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_16.cu +5 -0
  220. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu +10 -0
  221. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu +10 -0
  222. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu +10 -0
  223. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +10 -0
  224. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_16.cu +5 -0
  225. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +10 -0
  226. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu +10 -0
  227. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu +10 -0
  228. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu +10 -0
  229. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_16.cu +5 -0
  230. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu +10 -0
  231. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +10 -0
  232. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu +10 -0
  233. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu +10 -0
  234. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu +10 -0
  235. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu +10 -0
  236. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +10 -0
  237. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu +10 -0
  238. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +5 -0
  239. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +5 -0
  240. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +5 -0
  241. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +5 -0
  242. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +5 -0
  243. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +5 -0
  244. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +5 -0
  245. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +5 -0
  246. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +5 -0
  247. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +5 -0
  248. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +5 -0
  249. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +5 -0
  250. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +5 -0
  251. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +5 -0
  252. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +5 -0
  253. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +5 -0
  254. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +5 -0
  255. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +5 -0
  256. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +5 -0
  257. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +5 -0
  258. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +5 -0
  259. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +5 -0
  260. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +5 -0
  261. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +5 -0
  262. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +5 -0
  263. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +5 -0
  264. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +5 -0
  265. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +5 -0
  266. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +5 -0
  267. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +5 -0
  268. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +5 -0
  269. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +5 -0
  270. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +5 -0
  271. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +5 -0
  272. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +5 -0
  273. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +5 -0
  274. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +5 -0
  275. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +5 -0
  276. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +5 -0
  277. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +5 -0
  278. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +5 -0
  279. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +5 -0
  280. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +5 -0
  281. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +5 -0
  282. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +5 -0
  283. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +5 -0
  284. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +5 -0
  285. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +5 -0
  286. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +5 -0
  287. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +5 -0
  288. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +5 -0
  289. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +5 -0
  290. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +5 -0
  291. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +5 -0
  292. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +5 -0
  293. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +5 -0
  294. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +5 -0
  295. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +5 -0
  296. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +5 -0
  297. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +5 -0
  298. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +5 -0
  299. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +5 -0
  300. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +5 -0
  301. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +5 -0
  302. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +5 -0
  303. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +5 -0
  304. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +5 -0
  305. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +5 -0
  306. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +5 -0
  307. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +5 -0
  308. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +5 -0
  309. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +5 -0
  310. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +5 -0
  311. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +5 -0
  312. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +5 -0
  313. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +5 -0
  314. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +5 -0
  315. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +5 -0
  316. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +5 -0
  317. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +5 -0
  318. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +5 -0
  319. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +5 -0
  320. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +5 -0
  321. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +5 -0
  322. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +5 -0
  323. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +5 -0
  324. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +78 -0
  325. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu +5 -0
  326. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu +5 -0
  327. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu +5 -0
  328. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu +5 -0
  329. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu +5 -0
  330. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu +5 -0
  331. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu +5 -0
  332. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu +5 -0
  333. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu +5 -0
  334. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu +5 -0
  335. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu +5 -0
  336. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu +5 -0
  337. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu +5 -0
  338. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu +5 -0
  339. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu +5 -0
  340. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu +5 -0
  341. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu +5 -0
  342. package/vendor/whisper.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu +5 -0
  343. package/vendor/whisper.cpp/ggml/src/ggml-cuda/tsembd.cu +47 -0
  344. package/vendor/whisper.cpp/ggml/src/ggml-cuda/tsembd.cuh +5 -0
  345. package/vendor/whisper.cpp/ggml/src/ggml-cuda/unary.cu +289 -0
  346. package/vendor/whisper.cpp/ggml/src/ggml-cuda/unary.cuh +59 -0
  347. package/vendor/whisper.cpp/ggml/src/ggml-cuda/upscale.cu +51 -0
  348. package/vendor/whisper.cpp/ggml/src/ggml-cuda/upscale.cuh +5 -0
  349. package/vendor/whisper.cpp/ggml/src/ggml-cuda/vecdotq.cuh +1135 -0
  350. package/vendor/whisper.cpp/ggml/src/ggml-cuda/vendors/cuda.h +15 -0
  351. package/vendor/whisper.cpp/ggml/src/ggml-cuda/vendors/hip.h +243 -0
  352. package/vendor/whisper.cpp/ggml/src/ggml-cuda/vendors/musa.h +140 -0
  353. package/vendor/whisper.cpp/ggml/src/ggml-cuda/wkv.cu +199 -0
  354. package/vendor/whisper.cpp/ggml/src/ggml-cuda/wkv.cuh +7 -0
  355. package/vendor/whisper.cpp/ggml/src/ggml-hip/CMakeLists.txt +135 -0
  356. package/vendor/whisper.cpp/ggml/src/ggml-impl.h +603 -0
  357. package/vendor/whisper.cpp/ggml/src/ggml-kompute/CMakeLists.txt +166 -0
  358. package/vendor/whisper.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +2251 -0
  359. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/common.comp +112 -0
  360. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +58 -0
  361. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +25 -0
  362. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +52 -0
  363. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +52 -0
  364. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +52 -0
  365. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +52 -0
  366. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +30 -0
  367. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +22 -0
  368. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +17 -0
  369. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +31 -0
  370. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +31 -0
  371. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +38 -0
  372. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +39 -0
  373. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +44 -0
  374. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +52 -0
  375. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +69 -0
  376. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +51 -0
  377. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +33 -0
  378. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +35 -0
  379. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +140 -0
  380. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +106 -0
  381. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +73 -0
  382. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +52 -0
  383. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +28 -0
  384. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +84 -0
  385. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +21 -0
  386. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +53 -0
  387. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +52 -0
  388. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +52 -0
  389. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +52 -0
  390. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +52 -0
  391. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +19 -0
  392. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +23 -0
  393. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +22 -0
  394. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +72 -0
  395. package/vendor/whisper.cpp/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +71 -0
  396. package/vendor/whisper.cpp/ggml/src/ggml-metal/CMakeLists.txt +121 -0
  397. package/vendor/whisper.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +622 -0
  398. package/vendor/whisper.cpp/ggml/src/ggml-metal/ggml-metal.m +6023 -0
  399. package/vendor/whisper.cpp/ggml/src/ggml-metal/ggml-metal.metal +7124 -0
  400. package/vendor/whisper.cpp/ggml/src/ggml-musa/CMakeLists.txt +113 -0
  401. package/vendor/whisper.cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
  402. package/vendor/whisper.cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
  403. package/vendor/whisper.cpp/ggml/src/ggml-opencl/CMakeLists.txt +109 -0
  404. package/vendor/whisper.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +6665 -0
  405. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/add.cl +83 -0
  406. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
  407. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
  408. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/concat.cl +109 -0
  409. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
  410. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/cvt.cl +118 -0
  411. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
  412. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/div.cl +72 -0
  413. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
  414. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/gelu.cl +62 -0
  415. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
  416. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
  417. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/get_rows.cl +163 -0
  418. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +72 -0
  419. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
  420. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
  421. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul.cl +79 -0
  422. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
  423. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
  424. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
  425. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
  426. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
  427. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
  428. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
  429. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
  430. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
  431. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
  432. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
  433. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
  434. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k.cl +190 -0
  435. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/norm.cl +81 -0
  436. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/pad.cl +30 -0
  437. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
  438. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/repeat.cl +39 -0
  439. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +96 -0
  440. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/rope.cl +721 -0
  441. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/scale.cl +16 -0
  442. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
  443. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
  444. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +87 -0
  445. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +87 -0
  446. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +86 -0
  447. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +86 -0
  448. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/sub.cl +72 -0
  449. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
  450. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/tanh.cl +63 -0
  451. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/transpose.cl +84 -0
  452. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
  453. package/vendor/whisper.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +121 -0
  454. package/vendor/whisper.cpp/ggml/src/ggml-opt.cpp +1037 -0
  455. package/vendor/whisper.cpp/ggml/src/ggml-quants.c +5230 -0
  456. package/vendor/whisper.cpp/ggml/src/ggml-quants.h +100 -0
  457. package/vendor/whisper.cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
  458. package/vendor/whisper.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +1816 -0
  459. package/vendor/whisper.cpp/ggml/src/ggml-sycl/CMakeLists.txt +189 -0
  460. package/vendor/whisper.cpp/ggml/src/ggml-sycl/backend.hpp +37 -0
  461. package/vendor/whisper.cpp/ggml/src/ggml-sycl/binbcast.cpp +344 -0
  462. package/vendor/whisper.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
  463. package/vendor/whisper.cpp/ggml/src/ggml-sycl/common.cpp +83 -0
  464. package/vendor/whisper.cpp/ggml/src/ggml-sycl/common.hpp +584 -0
  465. package/vendor/whisper.cpp/ggml/src/ggml-sycl/concat.cpp +182 -0
  466. package/vendor/whisper.cpp/ggml/src/ggml-sycl/concat.hpp +20 -0
  467. package/vendor/whisper.cpp/ggml/src/ggml-sycl/conv.cpp +95 -0
  468. package/vendor/whisper.cpp/ggml/src/ggml-sycl/conv.hpp +20 -0
  469. package/vendor/whisper.cpp/ggml/src/ggml-sycl/convert.cpp +575 -0
  470. package/vendor/whisper.cpp/ggml/src/ggml-sycl/convert.hpp +34 -0
  471. package/vendor/whisper.cpp/ggml/src/ggml-sycl/cpy.cpp +839 -0
  472. package/vendor/whisper.cpp/ggml/src/ggml-sycl/cpy.hpp +11 -0
  473. package/vendor/whisper.cpp/ggml/src/ggml-sycl/dequantize.hpp +823 -0
  474. package/vendor/whisper.cpp/ggml/src/ggml-sycl/dmmv.cpp +1144 -0
  475. package/vendor/whisper.cpp/ggml/src/ggml-sycl/dmmv.hpp +27 -0
  476. package/vendor/whisper.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +2987 -0
  477. package/vendor/whisper.cpp/ggml/src/ggml-sycl/element_wise.cpp +1511 -0
  478. package/vendor/whisper.cpp/ggml/src/ggml-sycl/element_wise.hpp +77 -0
  479. package/vendor/whisper.cpp/ggml/src/ggml-sycl/gemm.hpp +102 -0
  480. package/vendor/whisper.cpp/ggml/src/ggml-sycl/getrows.cpp +212 -0
  481. package/vendor/whisper.cpp/ggml/src/ggml-sycl/getrows.hpp +20 -0
  482. package/vendor/whisper.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +4608 -0
  483. package/vendor/whisper.cpp/ggml/src/ggml-sycl/gla.cpp +106 -0
  484. package/vendor/whisper.cpp/ggml/src/ggml-sycl/gla.hpp +8 -0
  485. package/vendor/whisper.cpp/ggml/src/ggml-sycl/im2col.cpp +136 -0
  486. package/vendor/whisper.cpp/ggml/src/ggml-sycl/im2col.hpp +21 -0
  487. package/vendor/whisper.cpp/ggml/src/ggml-sycl/mmq.cpp +3010 -0
  488. package/vendor/whisper.cpp/ggml/src/ggml-sycl/mmq.hpp +33 -0
  489. package/vendor/whisper.cpp/ggml/src/ggml-sycl/mmvq.cpp +1065 -0
  490. package/vendor/whisper.cpp/ggml/src/ggml-sycl/mmvq.hpp +27 -0
  491. package/vendor/whisper.cpp/ggml/src/ggml-sycl/norm.cpp +482 -0
  492. package/vendor/whisper.cpp/ggml/src/ggml-sycl/norm.hpp +26 -0
  493. package/vendor/whisper.cpp/ggml/src/ggml-sycl/outprod.cpp +47 -0
  494. package/vendor/whisper.cpp/ggml/src/ggml-sycl/outprod.hpp +10 -0
  495. package/vendor/whisper.cpp/ggml/src/ggml-sycl/presets.hpp +74 -0
  496. package/vendor/whisper.cpp/ggml/src/ggml-sycl/quants.hpp +111 -0
  497. package/vendor/whisper.cpp/ggml/src/ggml-sycl/rope.cpp +472 -0
  498. package/vendor/whisper.cpp/ggml/src/ggml-sycl/rope.hpp +20 -0
  499. package/vendor/whisper.cpp/ggml/src/ggml-sycl/softmax.cpp +261 -0
  500. package/vendor/whisper.cpp/ggml/src/ggml-sycl/softmax.hpp +20 -0
  501. package/vendor/whisper.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +13 -0
  502. package/vendor/whisper.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +23 -0
  503. package/vendor/whisper.cpp/ggml/src/ggml-sycl/tsembd.cpp +67 -0
  504. package/vendor/whisper.cpp/ggml/src/ggml-sycl/tsembd.hpp +20 -0
  505. package/vendor/whisper.cpp/ggml/src/ggml-sycl/vecdotq.hpp +1307 -0
  506. package/vendor/whisper.cpp/ggml/src/ggml-sycl/wkv.cpp +289 -0
  507. package/vendor/whisper.cpp/ggml/src/ggml-sycl/wkv.hpp +10 -0
  508. package/vendor/whisper.cpp/ggml/src/ggml-threading.cpp +12 -0
  509. package/vendor/whisper.cpp/ggml/src/ggml-threading.h +14 -0
  510. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +189 -0
  511. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/cmake/host-toolchain.cmake.in +15 -0
  512. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +10937 -0
  513. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +27 -0
  514. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +29 -0
  515. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +29 -0
  516. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +51 -0
  517. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +69 -0
  518. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +17 -0
  519. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +41 -0
  520. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +49 -0
  521. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +105 -0
  522. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
  523. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +23 -0
  524. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +51 -0
  525. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +242 -0
  526. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +17 -0
  527. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +31 -0
  528. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +20 -0
  529. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp +462 -0
  530. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp +699 -0
  531. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_head.comp +13 -0
  532. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +42 -0
  533. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +35 -0
  534. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +44 -0
  535. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +43 -0
  536. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +48 -0
  537. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +39 -0
  538. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +49 -0
  539. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +32 -0
  540. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +34 -0
  541. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +34 -0
  542. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +42 -0
  543. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +30 -0
  544. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +32 -0
  545. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +68 -0
  546. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +34 -0
  547. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +35 -0
  548. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +70 -0
  549. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +33 -0
  550. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +31 -0
  551. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +34 -0
  552. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +27 -0
  553. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +337 -0
  554. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +162 -0
  555. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +360 -0
  556. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +267 -0
  557. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +59 -0
  558. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +25 -0
  559. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +23 -0
  560. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +64 -0
  561. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_head.comp +9 -0
  562. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_unary_head.comp +76 -0
  563. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +33 -0
  564. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +41 -0
  565. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +66 -0
  566. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +100 -0
  567. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +41 -0
  568. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +22 -0
  569. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +27 -0
  570. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_split_k_reduce.comp +48 -0
  571. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +169 -0
  572. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp +118 -0
  573. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +82 -0
  574. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +79 -0
  575. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +90 -0
  576. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +87 -0
  577. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +87 -0
  578. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +90 -0
  579. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +88 -0
  580. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +118 -0
  581. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +154 -0
  582. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +130 -0
  583. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +132 -0
  584. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +136 -0
  585. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +167 -0
  586. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +130 -0
  587. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +868 -0
  588. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +441 -0
  589. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +442 -0
  590. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +99 -0
  591. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +44 -0
  592. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +42 -0
  593. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +28 -0
  594. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +74 -0
  595. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +77 -0
  596. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +21 -0
  597. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +26 -0
  598. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +37 -0
  599. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +52 -0
  600. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +55 -0
  601. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +58 -0
  602. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +60 -0
  603. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +43 -0
  604. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +43 -0
  605. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +47 -0
  606. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +24 -0
  607. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +20 -0
  608. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +22 -0
  609. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +26 -0
  610. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +17 -0
  611. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +173 -0
  612. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +50 -0
  613. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +17 -0
  614. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +29 -0
  615. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +37 -0
  616. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +20 -0
  617. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_bfloat16_support.comp +7 -0
  618. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat2_support.comp +7 -0
  619. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat_support.comp +7 -0
  620. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_integer_dot_support.comp +7 -0
  621. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +41 -0
  622. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/types.comp +1373 -0
  623. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +36 -0
  624. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +753 -0
  625. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/wkv6.comp +87 -0
  626. package/vendor/whisper.cpp/ggml/src/ggml-vulkan/vulkan-shaders/wkv7.comp +91 -0
  627. package/vendor/whisper.cpp/ggml/src/ggml.c +6601 -0
  628. package/vendor/whisper.cpp/ggml/src/ggml.cpp +26 -0
  629. package/vendor/whisper.cpp/ggml/src/gguf.cpp +1347 -0
  630. package/vendor/whisper.cpp/include/whisper.h +738 -0
  631. package/vendor/whisper.cpp/src/CMakeLists.txt +145 -0
  632. package/vendor/whisper.cpp/src/coreml/whisper-compat.h +10 -0
  633. package/vendor/whisper.cpp/src/coreml/whisper-compat.m +35 -0
  634. package/vendor/whisper.cpp/src/coreml/whisper-decoder-impl.h +158 -0
  635. package/vendor/whisper.cpp/src/coreml/whisper-decoder-impl.m +227 -0
  636. package/vendor/whisper.cpp/src/coreml/whisper-encoder-impl.h +154 -0
  637. package/vendor/whisper.cpp/src/coreml/whisper-encoder-impl.m +223 -0
  638. package/vendor/whisper.cpp/src/coreml/whisper-encoder.h +26 -0
  639. package/vendor/whisper.cpp/src/coreml/whisper-encoder.mm +73 -0
  640. package/vendor/whisper.cpp/src/openvino/whisper-openvino-encoder.cpp +108 -0
  641. package/vendor/whisper.cpp/src/openvino/whisper-openvino-encoder.h +31 -0
  642. package/vendor/whisper.cpp/src/whisper-arch.h +197 -0
  643. package/vendor/whisper.cpp/src/whisper.cpp +8969 -0
@@ -0,0 +1,1511 @@
1
+ #include "common.hpp"
2
+ #include "ggml.h"
3
+ #include "element_wise.hpp"
4
+
5
+ static void acc_f32(const float * x, const float * y, float * dst, const int ne,
6
+ const int ne10, const int ne11, const int ne12,
7
+ const int nb1, const int nb2, int offset, const sycl::nd_item<3> &item_ct1) {
8
+ const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) +
9
+ item_ct1.get_local_id(2);
10
+ if (i >= ne) {
11
+ return;
12
+ }
13
+ int src1_idx = i - offset;
14
+ int oz = src1_idx / nb2;
15
+ int oy = (src1_idx - (oz * nb2)) / nb1;
16
+ int ox = src1_idx % nb1;
17
+ if (src1_idx >= 0 && ox < ne10 && oy < ne11 && oz < ne12) {
18
+ dst[i] = x[i] + y[ox + oy * ne10 + oz * ne10 * ne11];
19
+ } else {
20
+ dst[i] = x[i];
21
+ }
22
+ }
23
+
24
+ template<typename T>
25
+ static void sgn(const T * x, T * dst, const int k, const sycl::nd_item<3> &item_ct1) {
26
+ for(auto i = item_ct1.get_global_id(2); i < (const size_t)k; i += item_ct1.get_global_range(2)) {
27
+ dst[i] = x[i] > static_cast<T>(0.f) ? static_cast<T>(1.f) : ((x[i] < static_cast<T>(0.f) ? static_cast<T>(-1.f) : static_cast<T>(0.f)));
28
+ }
29
+ }
30
+
31
+ template<typename T>
32
+ static void abs_op(const T * x, T * dst, const int k, const sycl::nd_item<3> &item_ct1) {
33
+ for(auto i = item_ct1.get_global_id(2); i < (const size_t)k; i += item_ct1.get_global_range(2)) {
34
+ dst[i] = sycl::fabs(x[i]);
35
+ }
36
+ }
37
+
38
+ template<typename T>
39
+ static void elu_op(const T * x, T * dst, const int k, const sycl::nd_item<3> &item_ct1) {
40
+ for(auto i = item_ct1.get_global_id(2); i < (const size_t)k; i += item_ct1.get_global_range(2)) {
41
+ dst[i] = (x[i] > static_cast<T>(0.f)) ? x[i] : sycl::expm1(x[i]);
42
+ }
43
+ }
44
+
45
+ template<typename T>
46
+ static void gelu(const T * x, T * dst, const int k,
47
+ const sycl::nd_item<3> &item_ct1) {
48
+ const T GELU_COEF_A = static_cast<T>(0.044715f);
49
+ const T SQRT_2_OVER_PI = static_cast<T>(0.79788456080286535587989211986876f);
50
+ const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) +
51
+ item_ct1.get_local_id(2);
52
+
53
+ if (i >= k) {
54
+ return;
55
+ }
56
+
57
+ float xi = x[i];
58
+ dst[i] = static_cast<T>(0.5f) * xi *
59
+ (static_cast<T>(1.0f) +
60
+ sycl::tanh(SQRT_2_OVER_PI * xi * (static_cast<T>(1.0f) + GELU_COEF_A * xi * xi)));
61
+ }
62
+
63
+ template<typename T>
64
+ static void silu(const T * x, T * dst, const int k,
65
+ const sycl::nd_item<3> &item_ct1) {
66
+ const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) +
67
+ item_ct1.get_local_id(2);
68
+
69
+ if (i >= k) {
70
+ return;
71
+ }
72
+ dst[i] = x[i] / (static_cast<T>(1.0f) + sycl::native::exp(-x[i]));
73
+ }
74
+
75
+ template<typename T>
76
+ static void gelu_quick(const T *x, T *dst, int k,
77
+ const sycl::nd_item<3> &item_ct1) {
78
+ const float GELU_QUICK_COEF = -1.702f;
79
+ const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) +
80
+ item_ct1.get_local_id(2);
81
+ if (i >= k) {
82
+ return;
83
+ }
84
+ dst[i] = x[i] * (static_cast<T>(1.0f) / (static_cast<T>(1.0f) + sycl::native::exp(GELU_QUICK_COEF * x[i])));
85
+ }
86
+
87
+ template<typename T>
88
+ static void gelu_erf(const T * x, T * dst, const int k, const sycl::nd_item<3> &item_ct1) {
89
+ const T SQRT_2_INV = static_cast<T>(0.70710678118654752440084436210484f);
90
+ for(auto i = item_ct1.get_global_id(2); i < (const size_t)k; i += item_ct1.get_global_range(2)) {
91
+ auto x_i = x[i];
92
+ dst[i] = static_cast<T>(0.5f) * x_i * (static_cast<T>(1.0f) + sycl::erf(x_i * SQRT_2_INV));
93
+ }
94
+ }
95
+
96
+ template<typename T>
97
+ static void tanh(const T *x, T *dst, int k,
98
+ const sycl::nd_item<3> &item_ct1) {
99
+ const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) +
100
+ item_ct1.get_local_id(2);
101
+ if (i >= k) {
102
+ return;
103
+ }
104
+ dst[i] = sycl::tanh((x[i]));
105
+ }
106
+
107
+ template<typename T>
108
+ static void relu(const T * x, T * dst, const int k,
109
+ const sycl::nd_item<3> &item_ct1) {
110
+ const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) +
111
+ item_ct1.get_local_id(2);
112
+
113
+ if (i >= k) {
114
+ return;
115
+ }
116
+ dst[i] = sycl::fmax((x[i]), static_cast<T>(0));
117
+ }
118
+
119
+ template<typename T>
120
+ static void sigmoid(const T * x, T * dst, const int k,
121
+ const sycl::nd_item<3> &item_ct1) {
122
+ const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) +
123
+ item_ct1.get_local_id(2);
124
+
125
+ if (i >= k) {
126
+ return;
127
+ }
128
+ dst[i] = 1.0f / (static_cast<T>(1.0f) + sycl::native::exp(-x[i]));
129
+ }
130
+
131
+ template<typename T>
132
+ static void sqrt(const T * x, T * dst, const int k,
133
+ const sycl::nd_item<3> &item_ct1) {
134
+ const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) +
135
+ item_ct1.get_local_id(2);
136
+
137
+ if (i >= k) {
138
+ return;
139
+ }
140
+ dst[i] = sycl::sqrt(x[i]);
141
+ }
142
+
143
+ template<typename T>
144
+ static void sin(const T * x, T * dst, const int k,
145
+ const sycl::nd_item<3> &item_ct1) {
146
+ const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) +
147
+ item_ct1.get_local_id(2);
148
+
149
+ if (i >= k) {
150
+ return;
151
+ }
152
+ dst[i] = sycl::sin(x[i]);
153
+ }
154
+
155
+ template<typename T>
156
+ static void cos(const T * x, T * dst, const int k,
157
+ const sycl::nd_item<3> &item_ct1) {
158
+ const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) +
159
+ item_ct1.get_local_id(2);
160
+
161
+ if (i >= k) {
162
+ return;
163
+ }
164
+ dst[i] = sycl::cos(x[i]);
165
+ }
166
+
167
+ template<typename T>
168
+ static void hardsigmoid(const T * x, T * dst, const int k,
169
+ const sycl::nd_item<3> &item_ct1) {
170
+ const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) +
171
+ item_ct1.get_local_id(2);
172
+
173
+ if (i >= k) {
174
+ return;
175
+ }
176
+ dst[i] = sycl::fmin(static_cast<T>(1.0f), sycl::fmax(static_cast<T>(0.0f), (x[i] + static_cast<T>(3.0f)) / static_cast<T>(6.0f)));
177
+ }
178
+
179
+ template<typename T>
180
+ static void hardswish(const T * x, T * dst, const int k,
181
+ const sycl::nd_item<3> &item_ct1) {
182
+ const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) +
183
+ item_ct1.get_local_id(2);
184
+
185
+ if (i >= k) {
186
+ return;
187
+ }
188
+ dst[i] = x[i] * sycl::fmin(static_cast<T>(1.0f), sycl::fmax(static_cast<T>(0.0f), (x[i] + static_cast<T>(3.0f)) / static_cast<T>(6.0f)));
189
+ }
190
+
191
+ template<typename T>
192
+ static void exp(const T * x, T * dst, const int k,
193
+ const sycl::nd_item<3> &item_ct1) {
194
+ const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) +
195
+ item_ct1.get_local_id(2);
196
+
197
+ if (i >= k) {
198
+ return;
199
+ }
200
+ dst[i] = sycl::exp(x[i]);
201
+ }
202
+
203
+ template<typename T>
204
+ static void log(const T * x, T * dst, const int k,
205
+ const sycl::nd_item<3> &item_ct1) {
206
+ const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) +
207
+ item_ct1.get_local_id(2);
208
+
209
+ if (i >= k) {
210
+ return;
211
+ }
212
+ T xi = x[i];
213
+ if (xi <= 0) {
214
+ dst[i] = neg_infinity<T>();
215
+ } else {
216
+ dst[i] = sycl::log(xi);
217
+ }
218
+ }
219
+
220
+ template<typename T>
221
+ static void neg(const T * x, T * dst, const int k,
222
+ const sycl::nd_item<3> &item_ct1) {
223
+ const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) +
224
+ item_ct1.get_local_id(2);
225
+
226
+ if (i >= k) {
227
+ return;
228
+ }
229
+ dst[i] = -x[i];
230
+ }
231
+
232
+ template<typename T>
233
+ static void step(const T * x, T * dst, const int k,
234
+ const sycl::nd_item<3> &item_ct1) {
235
+ const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) +
236
+ item_ct1.get_local_id(2);
237
+
238
+ if (i >= k) {
239
+ return;
240
+ }
241
+ dst[i] = x[i] > static_cast<T>(0.0f);
242
+ }
243
+
244
+ template<typename T>
245
+ static void leaky_relu(const T *x, T *dst, const int k, const float negative_slope,
246
+ const sycl::nd_item<3> &item_ct1) {
247
+ const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) +
248
+ item_ct1.get_local_id(2);
249
+ if (i >= k) {
250
+ return;
251
+ }
252
+ dst[i] = sycl::fmax((x[i]), static_cast<T>(0)) +
253
+ sycl::fmin((x[i]), static_cast<T>(0.0f)) * negative_slope;
254
+ }
255
+
256
+ template<typename T>
257
+ static void sqr(const T * x, T * dst, const int k,
258
+ const sycl::nd_item<3> &item_ct1) {
259
+ const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) +
260
+ item_ct1.get_local_id(2);
261
+
262
+ if (i >= k) {
263
+ return;
264
+ }
265
+ dst[i] = x[i] * x[i];
266
+ }
267
+
268
+ template<typename T>
269
+ static void upscale(const T *x, T *dst, const int nb00, const int nb01,
270
+ const int nb02, const int nb03, const int ne10, const int ne11,
271
+ const int ne12, const int ne13, const float sf0, const float sf1,
272
+ const float sf2, const float sf3, const sycl::nd_item<1> &item_ct1) {
273
+ int index = item_ct1.get_local_id(0) +
274
+ item_ct1.get_group(0) * item_ct1.get_local_range(0);
275
+ if (index >= ne10 * ne11 * ne12 * ne13) {
276
+ return;
277
+ }
278
+ // operation
279
+ int i10 = index % ne10;
280
+ int i11 = (index / ne10) % ne11;
281
+ int i12 = (index / (ne10 * ne11)) % ne12;
282
+ int i13 = (index / (ne10 * ne11 * ne12)) % ne13;
283
+
284
+ int i00 = i10 / sf0;
285
+ int i01 = i11 / sf1;
286
+ int i02 = i12 / sf2;
287
+ int i03 = i13 / sf3;
288
+
289
+ dst[index] = *(const T *)((const char *)x + i03 * nb03 + i02 * nb02 + i01 * nb01 + i00 * nb00);
290
+ }
291
+
292
+ template <typename T>
293
+ static void pad(const T *x, T *dst, const int ne0, const int ne00, const int ne01, const int ne02,
294
+ const sycl::nd_item<3> &item_ct1) {
295
+ int nidx = item_ct1.get_local_id(2) +
296
+ item_ct1.get_group(2) * item_ct1.get_local_range(2);
297
+ if (nidx >= ne0) {
298
+ return;
299
+ }
300
+
301
+ // operation
302
+ int offset_dst = nidx + item_ct1.get_group(1) * ne0 +
303
+ item_ct1.get_group(0) * ne0 * item_ct1.get_group_range(1);
304
+ if (nidx < ne00 && item_ct1.get_group(1) < (size_t) ne01 && item_ct1.get_group(0) < (size_t) ne02) {
305
+ int offset_src = nidx + item_ct1.get_group(1) * ne00 +
306
+ item_ct1.get_group(0) * ne00 * ne01;
307
+ dst[offset_dst] = x[offset_src];
308
+ } else {
309
+ dst[offset_dst] = static_cast<T>(0.0f);
310
+ }
311
+ }
312
+
313
+
314
+ template<typename T>
315
+ static void clamp(const T * x, T * dst, const float min, const float max, const int k,
316
+ const sycl::nd_item<3> &item_ct1) {
317
+ const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) +
318
+ item_ct1.get_local_id(2);
319
+
320
+ if (i >= k) {
321
+ return;
322
+ }
323
+
324
+ dst[i] = x[i] < static_cast<T>(min) ? static_cast<T>(min) : (x[i] > static_cast<T>(max) ? static_cast<T>(max) : x[i]);
325
+ }
326
+
327
+ static void acc_f32_sycl(const float *x, const float *y, float *dst,
328
+ const int n_elements, const int ne10, const int ne11,
329
+ const int ne12, const int nb1, const int nb2,
330
+ const int offset, queue_ptr stream) {
331
+ int num_blocks = (n_elements + SYCL_ACC_BLOCK_SIZE - 1) / SYCL_ACC_BLOCK_SIZE;
332
+ sycl_parallel_for(stream,
333
+ sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_ACC_BLOCK_SIZE),
334
+ sycl::range<3>(1, 1, SYCL_ACC_BLOCK_SIZE)),
335
+ [=](sycl::nd_item<3> item_ct1) {
336
+ acc_f32(x, y, dst, n_elements, ne10, ne11, ne12, nb1, nb2, offset, item_ct1);
337
+ });
338
+ }
339
+
340
+ template<typename T>
341
+ static void gelu_sycl(const T *x, T *dst, const int k,
342
+ queue_ptr stream) {
343
+ const int num_blocks = (k + SYCL_GELU_BLOCK_SIZE - 1) / SYCL_GELU_BLOCK_SIZE;
344
+ sycl_parallel_for(stream,
345
+ sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_GELU_BLOCK_SIZE),
346
+ sycl::range<3>(1, 1, SYCL_GELU_BLOCK_SIZE)),
347
+ [=](sycl::nd_item<3> item_ct1) { gelu(x, dst, k, item_ct1); });
348
+ }
349
+
350
+ template<typename T>
351
+ static void silu_sycl(const T *x, T *dst, const int k,
352
+ queue_ptr stream) {
353
+ const int num_blocks = (k + SYCL_SILU_BLOCK_SIZE - 1) / SYCL_SILU_BLOCK_SIZE;
354
+ sycl_parallel_for(stream,
355
+ sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_SILU_BLOCK_SIZE),
356
+ sycl::range<3>(1, 1, SYCL_SILU_BLOCK_SIZE)),
357
+ [=](sycl::nd_item<3> item_ct1) { silu(x, dst, k, item_ct1); });
358
+ }
359
+
360
+ template<typename T>
361
+ static void sgn_sycl(const T * x, T * dst, const int k, queue_ptr stream) {
362
+ // hard code for now
363
+ const int num_blocks = ceil_div(k, 256);
364
+ sycl_parallel_for(
365
+ stream, sycl::nd_range<3>((sycl::range<3>(1, 1, num_blocks) * sycl::range(1, 1, 256)), sycl::range(1, 1, 256)),
366
+ [=](sycl::nd_item<3> item_ct1) { sgn(x, dst, k, item_ct1); });
367
+ }
368
+
369
+ template<typename T>
370
+ static void abs_sycl(const T * x, T * dst, const int k, queue_ptr stream) {
371
+ // hard code for now
372
+ const int num_blocks = ceil_div(k, 256);
373
+ sycl_parallel_for(
374
+ stream,
375
+ sycl::nd_range<3>((sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, 256)), sycl::range<3>(1, 1, 256)),
376
+ [=](sycl::nd_item<3> item_ct1) { abs_op(x, dst, k, item_ct1); });
377
+ }
378
+
379
+
380
+ template<typename T>
381
+ static void elu_sycl(const T * x, T * dst, const int k, queue_ptr stream) {
382
+ // hard code for now
383
+ const int num_blocks = ceil_div(k, 256);
384
+ sycl_parallel_for(
385
+ stream,
386
+ sycl::nd_range<3>((sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, 256)), sycl::range<3>(1, 1, 256)),
387
+ [=](sycl::nd_item<3> item_ct1) { elu_op(x, dst, k, item_ct1); });
388
+ }
389
+
390
+ template<typename T>
391
+ static void gelu_quick_sycl(const T *x, T *dst, const int k,
392
+ queue_ptr stream) {
393
+ const int num_blocks = (k + SYCL_GELU_BLOCK_SIZE - 1) / SYCL_GELU_BLOCK_SIZE;
394
+ sycl_parallel_for(stream,
395
+ sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_GELU_BLOCK_SIZE),
396
+ sycl::range<3>(1, 1, SYCL_GELU_BLOCK_SIZE)),
397
+ [=](sycl::nd_item<3> item_ct1) { gelu_quick(x, dst, k, item_ct1); });
398
+ }
399
+
400
+
401
+ template<typename T>
402
+ static void gelu_erf_sycl(const T *x, T *dst, const int k,
403
+ queue_ptr stream) {
404
+ const int num_blocks = ceil_div(k, SYCL_GELU_BLOCK_SIZE);
405
+ sycl_parallel_for(stream,
406
+ sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_GELU_BLOCK_SIZE),
407
+ sycl::range<3>(1, 1, SYCL_GELU_BLOCK_SIZE)),
408
+ [=](sycl::nd_item<3> item_ct1) { gelu_erf(x, dst, k, item_ct1); });
409
+ }
410
+
411
+ template<typename T>
412
+ static void tanh_sycl(const T *x, T *dst, const int k,
413
+ queue_ptr stream) {
414
+ const int num_blocks = (k + SYCL_TANH_BLOCK_SIZE - 1) / SYCL_TANH_BLOCK_SIZE;
415
+ sycl_parallel_for(stream,
416
+ sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_TANH_BLOCK_SIZE),
417
+ sycl::range<3>(1, 1, SYCL_TANH_BLOCK_SIZE)),
418
+ [=](sycl::nd_item<3> item_ct1) { tanh(x, dst, k, item_ct1); });
419
+ }
420
+
421
+ template<typename T>
422
+ static void relu_sycl(const T *x, T *dst, const int k,
423
+ queue_ptr stream) {
424
+ const int num_blocks = (k + SYCL_RELU_BLOCK_SIZE - 1) / SYCL_RELU_BLOCK_SIZE;
425
+ sycl_parallel_for(stream,
426
+ sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_RELU_BLOCK_SIZE),
427
+ sycl::range<3>(1, 1, SYCL_RELU_BLOCK_SIZE)),
428
+ [=](sycl::nd_item<3> item_ct1) { relu(x, dst, k, item_ct1); });
429
+ }
430
+
431
+ template<typename T>
432
+ static void hardsigmoid_sycl(const T *x, T *dst, const int k,
433
+ queue_ptr stream) {
434
+ const int num_blocks = (k + SYCL_HARDSIGMOID_BLOCK_SIZE - 1) / SYCL_HARDSIGMOID_BLOCK_SIZE;
435
+ sycl_parallel_for(
436
+ stream,
437
+ sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_HARDSIGMOID_BLOCK_SIZE),
438
+ sycl::range<3>(1, 1, SYCL_HARDSIGMOID_BLOCK_SIZE)),
439
+ [=](sycl::nd_item<3> item_ct1) { hardsigmoid(x, dst, k, item_ct1); });
440
+ }
441
+
442
+ template<typename T>
443
+ static void hardswish_sycl(const T *x, T *dst, const int k,
444
+ queue_ptr stream) {
445
+ const int num_blocks = (k + SYCL_HARDSWISH_BLOCK_SIZE - 1) / SYCL_HARDSWISH_BLOCK_SIZE;
446
+ sycl_parallel_for(
447
+ stream,
448
+ sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_HARDSWISH_BLOCK_SIZE),
449
+ sycl::range<3>(1, 1, SYCL_HARDSWISH_BLOCK_SIZE)),
450
+ [=](sycl::nd_item<3> item_ct1) { hardswish(x, dst, k, item_ct1); });
451
+ }
452
+
453
+ template<typename T>
454
+ static void exp_sycl(const T *x, T *dst, const int k,
455
+ queue_ptr stream) {
456
+ const int num_blocks = (k + SYCL_EXP_BLOCK_SIZE - 1) / SYCL_EXP_BLOCK_SIZE;
457
+ sycl_parallel_for(stream,
458
+ sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_EXP_BLOCK_SIZE),
459
+ sycl::range<3>(1, 1, SYCL_EXP_BLOCK_SIZE)),
460
+ [=](sycl::nd_item<3> item_ct1) { exp(x, dst, k, item_ct1); });
461
+ }
462
+
463
+ template<typename T>
464
+ static void log_sycl(const T *x, T *dst, const int k,
465
+ queue_ptr stream) {
466
+ const int num_blocks = (k + SYCL_EXP_BLOCK_SIZE - 1) / SYCL_EXP_BLOCK_SIZE;
467
+ sycl_parallel_for(stream,
468
+ sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_EXP_BLOCK_SIZE),
469
+ sycl::range<3>(1, 1, SYCL_EXP_BLOCK_SIZE)),
470
+ [=](sycl::nd_item<3> item_ct1) { log(x, dst, k, item_ct1); });
471
+ }
472
+
473
+ template<typename T>
474
+ static void neg_sycl(const T *x, T *dst, const int k,
475
+ queue_ptr stream) {
476
+ const int num_blocks = (k + SYCL_NEG_BLOCK_SIZE - 1) / SYCL_NEG_BLOCK_SIZE;
477
+ sycl_parallel_for(stream,
478
+ sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_NEG_BLOCK_SIZE),
479
+ sycl::range<3>(1, 1, SYCL_NEG_BLOCK_SIZE)),
480
+ [=](sycl::nd_item<3> item_ct1) { neg(x, dst, k, item_ct1); });
481
+ }
482
+
483
+ template<typename T>
484
+ static void step_sycl(const T *x, T *dst, const int k,
485
+ queue_ptr stream) {
486
+ const int num_blocks = (k + SYCL_NEG_BLOCK_SIZE - 1) / SYCL_NEG_BLOCK_SIZE;
487
+ sycl_parallel_for(stream,
488
+ sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_NEG_BLOCK_SIZE),
489
+ sycl::range<3>(1, 1, SYCL_NEG_BLOCK_SIZE)),
490
+ [=](sycl::nd_item<3> item_ct1) { step(x, dst, k, item_ct1); });
491
+ }
492
+
493
+ template<typename T>
494
+ static void sigmoid_sycl(const T *x, T *dst, const int k,
495
+ queue_ptr stream) {
496
+ const int num_blocks = (k + SYCL_SIGMOID_BLOCK_SIZE - 1) / SYCL_SIGMOID_BLOCK_SIZE;
497
+ sycl_parallel_for(
498
+ stream,
499
+ sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_SIGMOID_BLOCK_SIZE),
500
+ sycl::range<3>(1, 1, SYCL_SIGMOID_BLOCK_SIZE)),
501
+ [=](sycl::nd_item<3> item_ct1) { sigmoid(x, dst, k, item_ct1); });
502
+ }
503
+
504
+ template<typename T>
505
+ static void sqrt_sycl(const T *x, T *dst, const int k,
506
+ queue_ptr stream) {
507
+ const int num_blocks = (k + SYCL_SQRT_BLOCK_SIZE - 1) / SYCL_SQRT_BLOCK_SIZE;
508
+ sycl_parallel_for(stream,
509
+ sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_SQRT_BLOCK_SIZE),
510
+ sycl::range<3>(1, 1, SYCL_SQRT_BLOCK_SIZE)),
511
+ [=](sycl::nd_item<3> item_ct1) { sqrt(x, dst, k, item_ct1); });
512
+ }
513
+
514
+ template<typename T>
515
+ static void sin_sycl(const T *x, T *dst, const int k,
516
+ queue_ptr stream) {
517
+ const int num_blocks = (k + SYCL_SIN_BLOCK_SIZE - 1) / SYCL_SIN_BLOCK_SIZE;
518
+ sycl_parallel_for(stream,
519
+ sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_SIN_BLOCK_SIZE),
520
+ sycl::range<3>(1, 1, SYCL_SIN_BLOCK_SIZE)),
521
+ [=](sycl::nd_item<3> item_ct1) { sin(x, dst, k, item_ct1); });
522
+ }
523
+
524
+ template<typename T>
525
+ static void cos_sycl(const T *x, T *dst, const int k,
526
+ queue_ptr stream) {
527
+ const int num_blocks = (k + SYCL_SIN_BLOCK_SIZE - 1) / SYCL_SIN_BLOCK_SIZE;
528
+ sycl_parallel_for(stream,
529
+ sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_SIN_BLOCK_SIZE),
530
+ sycl::range<3>(1, 1, SYCL_SIN_BLOCK_SIZE)),
531
+ [=](sycl::nd_item<3> item_ct1) { cos(x, dst, k, item_ct1); });
532
+ }
533
+
534
+ template<typename T>
535
+ static void leaky_relu_sycl(const T *x, T *dst, const int k,
536
+ const float negative_slope,
537
+ queue_ptr stream) {
538
+ const int num_blocks = (k + SYCL_RELU_BLOCK_SIZE - 1) / SYCL_RELU_BLOCK_SIZE;
539
+ sycl_parallel_for(stream,
540
+ sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_RELU_BLOCK_SIZE),
541
+ sycl::range<3>(1, 1, SYCL_RELU_BLOCK_SIZE)),
542
+ [=](sycl::nd_item<3> item_ct1) { leaky_relu(x, dst, k, negative_slope, item_ct1); });
543
+ }
544
+
545
+ template<typename T>
546
+ static void sqr_sycl(const T *x, T *dst, const int k,
547
+ queue_ptr stream) {
548
+ const int num_blocks = (k + SYCL_SQR_BLOCK_SIZE - 1) / SYCL_SQR_BLOCK_SIZE;
549
+ sycl_parallel_for(stream,
550
+ sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_SQR_BLOCK_SIZE),
551
+ sycl::range<3>(1, 1, SYCL_SQR_BLOCK_SIZE)),
552
+ [=](sycl::nd_item<3> item_ct1) { sqr(x, dst, k, item_ct1); });
553
+ }
554
+
555
+ template<typename T>
556
+ static void upscale_sycl(const T *x, T *dst, const int nb00, const int nb01,
557
+ const int nb02, const int nb03, const int ne10, const int ne11,
558
+ const int ne12, const int ne13, const float sf0, const float sf1,
559
+ const float sf2, const float sf3, queue_ptr stream) {
560
+ int dst_size = ne10 * ne11 * ne12 * ne13;
561
+ int num_blocks = (dst_size + SYCL_UPSCALE_BLOCK_SIZE - 1) / SYCL_UPSCALE_BLOCK_SIZE;
562
+ sycl::range<1> gridDim(num_blocks * SYCL_UPSCALE_BLOCK_SIZE);
563
+ sycl_parallel_for<1>(
564
+ stream, sycl::nd_range<1>(gridDim, sycl::range<1>(SYCL_UPSCALE_BLOCK_SIZE)), [=](sycl::nd_item<1> item_ct1) {
565
+ upscale(x, dst, nb00, nb01, nb02, nb03, ne10, ne11, ne12, ne13, sf0, sf1, sf2, sf3, item_ct1);
566
+ });
567
+ }
568
+
569
+ template<typename T>
570
+ static void pad_sycl(const T *x, T *dst, const int ne00,
571
+ const int ne01, const int ne02, const int ne0,
572
+ const int ne1, const int ne2, queue_ptr stream) {
573
+ int num_blocks = (ne0 + SYCL_PAD_BLOCK_SIZE - 1) / SYCL_PAD_BLOCK_SIZE;
574
+ sycl::range<3> gridDim(ne2, ne1, num_blocks);
575
+ sycl_parallel_for(stream,
576
+ sycl::nd_range<3>(gridDim * sycl::range<3>(1, 1, SYCL_PAD_BLOCK_SIZE),
577
+ sycl::range<3>(1, 1, SYCL_PAD_BLOCK_SIZE)),
578
+ [=](sycl::nd_item<3> item_ct1) { pad(x, dst, ne0, ne00, ne01, ne02, item_ct1); });
579
+ }
580
+
581
+ template<typename T>
582
+ static void clamp_sycl(const T *x, T *dst, const float min,
583
+ const float max, const int k,
584
+ queue_ptr stream) {
585
+ const int num_blocks = (k + SYCL_CLAMP_BLOCK_SIZE - 1) / SYCL_CLAMP_BLOCK_SIZE;
586
+ sycl_parallel_for(stream,
587
+ sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_CLAMP_BLOCK_SIZE),
588
+ sycl::range<3>(1, 1, SYCL_CLAMP_BLOCK_SIZE)),
589
+ [=](sycl::nd_item<3> item_ct1) { clamp(x, dst, min, max, k, item_ct1); });
590
+ }
591
+
592
+ inline void ggml_sycl_op_sgn(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
593
+ #if defined (GGML_SYCL_F16)
594
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16);
595
+ GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
596
+
597
+ #else
598
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
599
+ GGML_ASSERT(dst->type == GGML_TYPE_F32);
600
+ #endif
601
+ GGML_ASSERT(dst->src[0]->type == dst->type);
602
+ dpct::queue_ptr main_stream = ctx.stream();
603
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
604
+ switch (dst->type) {
605
+ #if defined (GGML_SYCL_F16)
606
+ case GGML_TYPE_F16:
607
+ {
608
+ auto data_pts = cast_data<sycl::half>(dst);
609
+ sgn_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
610
+ break;
611
+ }
612
+ #endif
613
+ case GGML_TYPE_F32:
614
+ {
615
+ auto data_pts = cast_data<float>(dst);
616
+ sgn_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
617
+ break;
618
+ }
619
+ default:
620
+ GGML_ABORT("GGML tensor type not supported!\n");
621
+ }
622
+ }
623
+
624
+ inline void ggml_sycl_op_abs(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
625
+ #if defined (GGML_SYCL_F16)
626
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16);
627
+ GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
628
+
629
+ #else
630
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
631
+ GGML_ASSERT(dst->type == GGML_TYPE_F32);
632
+ #endif
633
+ GGML_ASSERT(dst->src[0]->type == dst->type);
634
+ dpct::queue_ptr main_stream = ctx.stream();
635
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
636
+ switch (dst->type) {
637
+ #if defined (GGML_SYCL_F16)
638
+ case GGML_TYPE_F16:
639
+ {
640
+ auto data_pts = cast_data<sycl::half>(dst);
641
+ abs_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
642
+ break;
643
+ }
644
+ #endif
645
+ case GGML_TYPE_F32:
646
+ {
647
+ auto data_pts = cast_data<float>(dst);
648
+ abs_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
649
+ break;
650
+ }
651
+ default:
652
+ GGML_ABORT("GGML tensor type not supported!\n");
653
+ }
654
+ }
655
+
656
+
657
+ inline void ggml_sycl_op_elu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
658
+ #if defined (GGML_SYCL_F16)
659
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16);
660
+ GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
661
+
662
+ #else
663
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
664
+ GGML_ASSERT(dst->type == GGML_TYPE_F32);
665
+ #endif
666
+ GGML_ASSERT(dst->src[0]->type == dst->type);
667
+ dpct::queue_ptr main_stream = ctx.stream();
668
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
669
+ switch (dst->type) {
670
+ #if defined (GGML_SYCL_F16)
671
+ case GGML_TYPE_F16:
672
+ {
673
+ auto data_pts = cast_data<sycl::half>(dst);
674
+ elu_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
675
+ break;
676
+ }
677
+ #endif
678
+ case GGML_TYPE_F32:
679
+ {
680
+ auto data_pts = cast_data<float>(dst);
681
+ elu_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
682
+ break;
683
+ }
684
+ default:
685
+ GGML_ABORT("GGML tensor type not supported!\n");
686
+ }
687
+ }
688
+
689
+ inline void ggml_sycl_op_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
690
+ #if defined (GGML_SYCL_F16)
691
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16);
692
+ GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
693
+ #else
694
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
695
+ GGML_ASSERT(dst->type == GGML_TYPE_F32);
696
+ #endif
697
+ GGML_ASSERT(dst->src[0]->type == dst->type);
698
+ dpct::queue_ptr main_stream = ctx.stream();
699
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
700
+ switch (dst->type) {
701
+ #if defined (GGML_SYCL_F16)
702
+ case GGML_TYPE_F16:
703
+ {
704
+ auto data_pts = cast_data<sycl::half>(dst);
705
+ silu_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
706
+ break;
707
+ }
708
+ #endif
709
+ case GGML_TYPE_F32:
710
+ {
711
+ auto data_pts = cast_data<float>(dst);
712
+ silu_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
713
+ break;
714
+ }
715
+ default:
716
+ GGML_ABORT("GGML tensor type not supported!\n");
717
+ }
718
+ }
719
+
720
+ inline void ggml_sycl_op_gelu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
721
+ #if defined (GGML_SYCL_F16)
722
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16);
723
+ GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
724
+ #else
725
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
726
+ GGML_ASSERT(dst->type == GGML_TYPE_F32);
727
+ #endif
728
+ GGML_ASSERT(dst->src[0]->type == dst->type);
729
+ dpct::queue_ptr main_stream = ctx.stream();
730
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
731
+ switch (dst->type) {
732
+ #if defined (GGML_SYCL_F16)
733
+ case GGML_TYPE_F16:
734
+ {
735
+ auto data_pts = cast_data<sycl::half>(dst);
736
+ gelu_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
737
+ break;
738
+ }
739
+ #endif
740
+ case GGML_TYPE_F32:
741
+ {
742
+ auto data_pts = cast_data<float>(dst);
743
+ gelu_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
744
+ break;
745
+ }
746
+ default:
747
+ GGML_ABORT("GGML tensor type not supported!\n");
748
+ }
749
+ }
750
+
751
+ inline void ggml_sycl_op_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
752
+ #if defined (GGML_SYCL_F16)
753
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16);
754
+ GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
755
+ #else
756
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
757
+ GGML_ASSERT(dst->type == GGML_TYPE_F32);
758
+ #endif
759
+ GGML_ASSERT(dst->src[0]->type == dst->type);
760
+ dpct::queue_ptr main_stream = ctx.stream();
761
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
762
+ switch (dst->type) {
763
+ #if defined (GGML_SYCL_F16)
764
+ case GGML_TYPE_F16:
765
+ {
766
+ auto data_pts = cast_data<sycl::half>(dst);
767
+ gelu_quick_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
768
+ break;
769
+ }
770
+ #endif
771
+ case GGML_TYPE_F32:
772
+ {
773
+ auto data_pts = cast_data<float>(dst);
774
+ gelu_quick_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
775
+ break;
776
+ }
777
+ default:
778
+ GGML_ABORT("GGML tensor type not supported!\n");
779
+ }
780
+ }
781
+
782
+ inline void ggml_sycl_op_gelu_erf(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
783
+ #if defined (GGML_SYCL_F16)
784
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16);
785
+ GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
786
+ #else
787
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
788
+ GGML_ASSERT(dst->type == GGML_TYPE_F32);
789
+ #endif
790
+ GGML_ASSERT(dst->src[0]->type == dst->type);
791
+ dpct::queue_ptr main_stream = ctx.stream();
792
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
793
+ switch (dst->type) {
794
+ #if defined (GGML_SYCL_F16)
795
+ case GGML_TYPE_F16:
796
+ {
797
+ auto data_pts = cast_data<sycl::half>(dst);
798
+ gelu_erf_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
799
+ break;
800
+ }
801
+ #endif
802
+ case GGML_TYPE_F32:
803
+ {
804
+ auto data_pts = cast_data<float>(dst);
805
+ gelu_erf_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
806
+ break;
807
+ }
808
+ default:
809
+ GGML_ABORT("GGML tensor type not supported!\n");
810
+ }
811
+ }
812
+
813
+
814
+ inline void ggml_sycl_op_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
815
+ #if defined (GGML_SYCL_F16)
816
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16);
817
+ GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
818
+ #else
819
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
820
+ GGML_ASSERT(dst->type == GGML_TYPE_F32);
821
+ #endif
822
+ GGML_ASSERT(dst->src[0]->type == dst->type);
823
+ dpct::queue_ptr main_stream = ctx.stream();
824
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
825
+ switch (dst->type) {
826
+ #if defined (GGML_SYCL_F16)
827
+ case GGML_TYPE_F16:
828
+ {
829
+ auto data_pts = cast_data<sycl::half>(dst);
830
+ tanh_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
831
+ break;
832
+ }
833
+ #endif
834
+ case GGML_TYPE_F32:
835
+ {
836
+ auto data_pts = cast_data<float>(dst);
837
+ tanh_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
838
+ break;
839
+ }
840
+ default:
841
+ GGML_ABORT("GGML tensor type not supported!\n");
842
+ }
843
+ }
844
+
845
+ inline void ggml_sycl_op_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
846
+ #if defined (GGML_SYCL_F16)
847
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16);
848
+ GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
849
+ #else
850
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
851
+ GGML_ASSERT(dst->type == GGML_TYPE_F32);
852
+ #endif
853
+ GGML_ASSERT(dst->src[0]->type == dst->type);
854
+ dpct::queue_ptr main_stream = ctx.stream();
855
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
856
+
857
+ switch (dst->type) {
858
+ #if defined (GGML_SYCL_F16)
859
+ case GGML_TYPE_F16:
860
+ {
861
+ auto data_pts = cast_data<sycl::half>(dst);
862
+ relu_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
863
+ break;
864
+ }
865
+ #endif
866
+ case GGML_TYPE_F32:
867
+ {
868
+ auto data_pts = cast_data<float>(dst);
869
+ relu_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
870
+ break;
871
+ }
872
+ default:
873
+ GGML_ABORT("GGML tensor type not supported!\n");
874
+ }
875
+ }
876
+
877
+ inline void ggml_sycl_op_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
878
+ #if defined (GGML_SYCL_F16)
879
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16);
880
+ GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
881
+ #else
882
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
883
+ GGML_ASSERT(dst->type == GGML_TYPE_F32);
884
+ #endif
885
+ GGML_ASSERT(dst->src[0]->type == dst->type);
886
+
887
+ dpct::queue_ptr main_stream = ctx.stream();
888
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
889
+
890
+ switch (dst->type) {
891
+ #if defined (GGML_SYCL_F16)
892
+ case GGML_TYPE_F16:
893
+ {
894
+ auto data_pts = cast_data<sycl::half>(dst);
895
+ hardsigmoid_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
896
+ break;
897
+ }
898
+ #endif
899
+ case GGML_TYPE_F32:
900
+ {
901
+ auto data_pts = cast_data<float>(dst);
902
+ hardsigmoid_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
903
+ break;
904
+ }
905
+ default:
906
+ GGML_ABORT("GGML tensor type not supported!\n");
907
+ }
908
+ }
909
+
910
+ inline void ggml_sycl_op_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
911
+ #if defined (GGML_SYCL_F16)
912
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16);
913
+ GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
914
+ #else
915
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
916
+ GGML_ASSERT(dst->type == GGML_TYPE_F32);
917
+ #endif
918
+ GGML_ASSERT(dst->src[0]->type == dst->type);
919
+ dpct::queue_ptr main_stream = ctx.stream();
920
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
921
+ switch (dst->type) {
922
+ #if defined (GGML_SYCL_F16)
923
+ case GGML_TYPE_F16:
924
+ {
925
+ auto data_pts = cast_data<sycl::half>(dst);
926
+ hardswish_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
927
+ break;
928
+ }
929
+ #endif
930
+ case GGML_TYPE_F32:
931
+ {
932
+ auto data_pts = cast_data<float>(dst);
933
+ hardswish_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
934
+ break;
935
+ }
936
+ default:
937
+ GGML_ABORT("GGML tensor type not supported!\n");
938
+ }
939
+ }
940
+
941
+ inline void ggml_sycl_op_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
942
+ #if defined (GGML_SYCL_F16)
943
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16);
944
+ GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
945
+ #else
946
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
947
+ GGML_ASSERT(dst->type == GGML_TYPE_F32);
948
+ #endif
949
+ GGML_ASSERT(dst->src[0]->type == dst->type);
950
+ dpct::queue_ptr main_stream = ctx.stream();
951
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
952
+ switch (dst->type) {
953
+ #if defined (GGML_SYCL_F16)
954
+ case GGML_TYPE_F16:
955
+ {
956
+ auto data_pts = cast_data<sycl::half>(dst);
957
+ exp_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
958
+ break;
959
+ }
960
+ #endif
961
+ case GGML_TYPE_F32:
962
+ {
963
+ auto data_pts = cast_data<float>(dst);
964
+ exp_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
965
+ break;
966
+ }
967
+ default:
968
+ GGML_ABORT("GGML tensor type not supported!\n");
969
+ }
970
+ }
971
+
972
+ inline void ggml_sycl_op_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
973
+ #if defined (GGML_SYCL_F16)
974
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16);
975
+ GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
976
+ #else
977
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
978
+ GGML_ASSERT(dst->type == GGML_TYPE_F32);
979
+ #endif
980
+ GGML_ASSERT(dst->src[0]->type == dst->type);
981
+ dpct::queue_ptr main_stream = ctx.stream();
982
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
983
+ switch (dst->type) {
984
+ #if defined (GGML_SYCL_F16)
985
+ case GGML_TYPE_F16:
986
+ {
987
+ auto data_pts = cast_data<sycl::half>(dst);
988
+ log_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
989
+ break;
990
+ }
991
+ #endif
992
+ case GGML_TYPE_F32:
993
+ {
994
+ auto data_pts = cast_data<float>(dst);
995
+ log_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
996
+ break;
997
+ }
998
+ default:
999
+ GGML_ABORT("GGML tensor type not supported!\n");
1000
+ }
1001
+ }
1002
+
1003
+ inline void ggml_sycl_op_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1004
+ #if defined (GGML_SYCL_F16)
1005
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16);
1006
+ GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
1007
+ #else
1008
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
1009
+ GGML_ASSERT(dst->type == GGML_TYPE_F32);
1010
+ #endif
1011
+ GGML_ASSERT(dst->src[0]->type == dst->type);
1012
+ dpct::queue_ptr main_stream = ctx.stream();
1013
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
1014
+ switch (dst->type) {
1015
+ #if defined (GGML_SYCL_F16)
1016
+ case GGML_TYPE_F16:
1017
+ {
1018
+ auto data_pts = cast_data<sycl::half>(dst);
1019
+ sigmoid_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
1020
+ break;
1021
+ }
1022
+ #endif
1023
+ case GGML_TYPE_F32:
1024
+ {
1025
+ auto data_pts = cast_data<float>(dst);
1026
+ sigmoid_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
1027
+ break;
1028
+ }
1029
+ default:
1030
+ GGML_ABORT("GGML tensor type not supported!\n");
1031
+ }
1032
+ }
1033
+
1034
+ inline void ggml_sycl_op_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1035
+ #if defined (GGML_SYCL_F16)
1036
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16);
1037
+ GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
1038
+ #else
1039
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
1040
+ GGML_ASSERT(dst->type == GGML_TYPE_F32);
1041
+ #endif
1042
+ GGML_ASSERT(dst->src[0]->type == dst->type);
1043
+
1044
+ dpct::queue_ptr main_stream = ctx.stream();
1045
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
1046
+ switch (dst->type) {
1047
+ #if defined (GGML_SYCL_F16)
1048
+ case GGML_TYPE_F16:
1049
+ {
1050
+ auto data_pts = cast_data<sycl::half>(dst);
1051
+ sqrt_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
1052
+ break;
1053
+ }
1054
+ #endif
1055
+ case GGML_TYPE_F32:
1056
+ {
1057
+ auto data_pts = cast_data<float>(dst);
1058
+ sqrt_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
1059
+ break;
1060
+ }
1061
+ default:
1062
+ GGML_ABORT("GGML tensor type not supported!\n");
1063
+ }
1064
+ }
1065
+
1066
+ inline void ggml_sycl_op_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1067
+ #if defined (GGML_SYCL_F16)
1068
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16);
1069
+ GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
1070
+ #else
1071
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
1072
+ GGML_ASSERT(dst->type == GGML_TYPE_F32);
1073
+ #endif
1074
+ GGML_ASSERT(dst->src[0]->type == dst->type);
1075
+ dpct::queue_ptr main_stream = ctx.stream();
1076
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
1077
+ switch (dst->type) {
1078
+ #if defined (GGML_SYCL_F16)
1079
+ case GGML_TYPE_F16:
1080
+ {
1081
+ auto data_pts = cast_data<sycl::half>(dst);
1082
+ sin_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
1083
+ break;
1084
+ }
1085
+ #endif
1086
+ case GGML_TYPE_F32:
1087
+ {
1088
+ auto data_pts = cast_data<float>(dst);
1089
+ sin_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
1090
+ break;
1091
+ }
1092
+ default:
1093
+ GGML_ABORT("GGML tensor type not supported!\n");
1094
+ }
1095
+ }
1096
+
1097
+ inline void ggml_sycl_op_cos(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1098
+ #if defined (GGML_SYCL_F16)
1099
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16);
1100
+ GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
1101
+ #else
1102
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
1103
+ GGML_ASSERT(dst->type == GGML_TYPE_F32);
1104
+ #endif
1105
+ GGML_ASSERT(dst->src[0]->type == dst->type);
1106
+ dpct::queue_ptr main_stream = ctx.stream();
1107
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
1108
+ switch (dst->type) {
1109
+ #if defined (GGML_SYCL_F16)
1110
+ case GGML_TYPE_F16:
1111
+ {
1112
+ auto data_pts = cast_data<sycl::half>(dst);
1113
+ cos_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
1114
+ break;
1115
+ }
1116
+ #endif
1117
+ case GGML_TYPE_F32:
1118
+ {
1119
+ auto data_pts = cast_data<float>(dst);
1120
+ cos_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
1121
+ break;
1122
+ }
1123
+ default:
1124
+ GGML_ABORT("GGML tensor type not supported!\n");
1125
+ }
1126
+ }
1127
+
1128
+ inline void ggml_sycl_op_step(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1129
+ #if defined (GGML_SYCL_F16)
1130
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16);
1131
+ GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
1132
+ #else
1133
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
1134
+ GGML_ASSERT(dst->type == GGML_TYPE_F32);
1135
+ #endif
1136
+ GGML_ASSERT(dst->src[0]->type == dst->type);
1137
+ dpct::queue_ptr main_stream = ctx.stream();
1138
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
1139
+ switch (dst->type) {
1140
+ #if defined (GGML_SYCL_F16)
1141
+ case GGML_TYPE_F16:
1142
+ {
1143
+ auto data_pts = cast_data<sycl::half>(dst);
1144
+ step_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
1145
+ break;
1146
+ }
1147
+ #endif
1148
+ case GGML_TYPE_F32:
1149
+ {
1150
+ auto data_pts = cast_data<float>(dst);
1151
+ step_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
1152
+ break;
1153
+ }
1154
+ default:
1155
+ GGML_ABORT("GGML tensor type not supported!\n");
1156
+ }
1157
+ }
1158
+
1159
+ inline void ggml_sycl_op_neg(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1160
+ #if defined (GGML_SYCL_F16)
1161
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16);
1162
+ GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
1163
+ #else
1164
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
1165
+ GGML_ASSERT(dst->type == GGML_TYPE_F32);
1166
+ #endif
1167
+ GGML_ASSERT(dst->src[0]->type == dst->type);
1168
+ dpct::queue_ptr main_stream = ctx.stream();
1169
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
1170
+ switch (dst->type) {
1171
+ #if defined (GGML_SYCL_F16)
1172
+ case GGML_TYPE_F16:
1173
+ {
1174
+ auto data_pts = cast_data<sycl::half>(dst);
1175
+ neg_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
1176
+ break;
1177
+ }
1178
+ #endif
1179
+ case GGML_TYPE_F32:
1180
+ {
1181
+ auto data_pts = cast_data<float>(dst);
1182
+ neg_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
1183
+ break;
1184
+ }
1185
+ default:
1186
+ GGML_ABORT("GGML tensor type not supported!\n");
1187
+ }
1188
+ }
1189
+
1190
+ inline void ggml_sycl_op_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1191
+ #if defined (GGML_SYCL_F16)
1192
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16);
1193
+ GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
1194
+ #else
1195
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
1196
+ GGML_ASSERT(dst->type == GGML_TYPE_F32);
1197
+ #endif
1198
+
1199
+ GGML_ASSERT(dst->src[0]->type == dst->type);
1200
+ float negative_slope;
1201
+ memcpy(&negative_slope, dst->op_params, sizeof(float));
1202
+ dpct::queue_ptr main_stream = ctx.stream();
1203
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
1204
+ switch (dst->type) {
1205
+ #if defined (GGML_SYCL_F16)
1206
+ case GGML_TYPE_F16:
1207
+ {
1208
+ auto data_pts = cast_data<sycl::half>(dst);
1209
+ leaky_relu_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), negative_slope, main_stream);
1210
+ break;
1211
+ }
1212
+ #endif
1213
+ case GGML_TYPE_F32:
1214
+ {
1215
+ auto data_pts = cast_data<float>(dst);
1216
+ leaky_relu_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), negative_slope, main_stream);
1217
+ break;
1218
+ }
1219
+ default:
1220
+ GGML_ABORT("GGML tensor type not supported!\n");
1221
+ }
1222
+ }
1223
+
1224
+ inline void ggml_sycl_op_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1225
+ #if defined (GGML_SYCL_F16)
1226
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16);
1227
+ GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
1228
+ #else
1229
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
1230
+ GGML_ASSERT(dst->type == GGML_TYPE_F32);
1231
+ #endif
1232
+ GGML_ASSERT(dst->src[0]->type == dst->type);
1233
+ dpct::queue_ptr main_stream = ctx.stream();
1234
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
1235
+ switch (dst->type) {
1236
+ #if defined (GGML_SYCL_F16)
1237
+ case GGML_TYPE_F16:
1238
+ {
1239
+ auto data_pts = cast_data<sycl::half>(dst);
1240
+ sqr_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
1241
+ break;
1242
+ }
1243
+ #endif
1244
+ case GGML_TYPE_F32:
1245
+ {
1246
+ auto data_pts = cast_data<float>(dst);
1247
+ sqr_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
1248
+ break;
1249
+ }
1250
+ default:
1251
+ GGML_ABORT("GGML tensor type not supported!\n");
1252
+ }
1253
+ }
1254
+
1255
+ inline void ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1256
+ #if defined (GGML_SYCL_F16)
1257
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16);
1258
+ GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
1259
+ #else
1260
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
1261
+ GGML_ASSERT(dst->type == GGML_TYPE_F32);
1262
+ #endif
1263
+ GGML_ASSERT(dst->src[0]->type == dst->type);
1264
+
1265
+ dpct::queue_ptr main_stream = ctx.stream();
1266
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
1267
+
1268
+ const float sf0 = (float) dst->ne[0] / dst->src[0]->ne[0];
1269
+ const float sf1 = (float) dst->ne[1] / dst->src[0]->ne[1];
1270
+ const float sf2 = (float) dst->ne[2] / dst->src[0]->ne[2];
1271
+ const float sf3 = (float) dst->ne[3] / dst->src[0]->ne[3];
1272
+ switch (dst->type) {
1273
+ #if defined (GGML_SYCL_F16)
1274
+ case GGML_TYPE_F16:
1275
+ {
1276
+ auto data_pts = cast_data<sycl::half>(dst);
1277
+ upscale_sycl(data_pts.src, data_pts.dst, dst->src[0]->nb[0], dst->src[0]->nb[1], dst->src[0]->nb[2],
1278
+ dst->src[0]->nb[3], dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], sf0, sf1, sf2, sf3,
1279
+ main_stream);
1280
+ break;
1281
+ }
1282
+ #endif
1283
+ case GGML_TYPE_F32:
1284
+ {
1285
+ auto data_pts = cast_data<float>(dst);
1286
+ upscale_sycl(data_pts.src, data_pts.dst, dst->src[0]->nb[0], dst->src[0]->nb[1], dst->src[0]->nb[2],
1287
+ dst->src[0]->nb[3], dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], sf0, sf1, sf2, sf3,
1288
+ main_stream);
1289
+ break;
1290
+ }
1291
+ default:
1292
+ GGML_ABORT("GGML tensor type not supported!\n");
1293
+ }
1294
+ }
1295
+
1296
+ inline void ggml_sycl_op_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1297
+ #if defined (GGML_SYCL_F16)
1298
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16);
1299
+ GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
1300
+ #else
1301
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
1302
+ GGML_ASSERT(dst->type == GGML_TYPE_F32);
1303
+ #endif
1304
+ GGML_ASSERT(dst->src[0]->type == dst->type);
1305
+ GGML_ASSERT(dst->src[0]->ne[3] == 1 && dst->ne[3] == 1); // just 3D tensors
1306
+ dpct::queue_ptr main_stream = ctx.stream();
1307
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
1308
+ switch (dst->type) {
1309
+ #if defined (GGML_SYCL_F16)
1310
+ case GGML_TYPE_F16:
1311
+ {
1312
+ auto data_pts = cast_data<sycl::half>(dst);
1313
+ pad_sycl(data_pts.src, data_pts.dst, dst->src[0]->ne[0], dst->src[0]->ne[1], dst->src[0]->ne[2], dst->ne[0],
1314
+ dst->ne[1], dst->ne[2], main_stream);
1315
+ break;
1316
+ }
1317
+ #endif
1318
+ case GGML_TYPE_F32:
1319
+ {
1320
+ auto data_pts = cast_data<float>(dst);
1321
+ pad_sycl(data_pts.src, data_pts.dst, dst->src[0]->ne[0], dst->src[0]->ne[1], dst->src[0]->ne[2], dst->ne[0],
1322
+ dst->ne[1], dst->ne[2], main_stream);
1323
+ break;
1324
+ }
1325
+ default:
1326
+ GGML_ABORT("GGML tensor type not supported!\n");
1327
+ }
1328
+ }
1329
+
1330
+ inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1331
+ #if defined(GGML_SYCL_F16)
1332
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16);
1333
+ GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
1334
+ #else
1335
+
1336
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
1337
+ GGML_ASSERT(dst->type == GGML_TYPE_F32);
1338
+ #endif
1339
+ GGML_ASSERT(dst->src[0]->type == dst->type);
1340
+ dpct::queue_ptr main_stream = ctx.stream();
1341
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
1342
+ float min;
1343
+ float max;
1344
+ memcpy(&min, dst->op_params, sizeof(float));
1345
+ memcpy(&max, (float *) dst->op_params + 1, sizeof(float));
1346
+
1347
+ switch (dst->type) {
1348
+ #if defined(GGML_SYCL_F16)
1349
+ case GGML_TYPE_F16:
1350
+ {
1351
+ auto data_pts = cast_data<sycl::half>(dst);
1352
+ clamp_sycl(data_pts.src, data_pts.dst, min, max, ggml_nelements(dst->src[0]), main_stream);
1353
+ break;
1354
+ }
1355
+ #endif
1356
+ case GGML_TYPE_F32:
1357
+ {
1358
+ auto data_pts = cast_data<float>(dst);
1359
+ clamp_sycl(data_pts.src, data_pts.dst, min, max, ggml_nelements(dst->src[0]), main_stream);
1360
+ break;
1361
+ }
1362
+ default:
1363
+ GGML_ABORT("GGML tensor type not supported!\n");
1364
+ }
1365
+ }
1366
+
1367
+ inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
1368
+
1369
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
1370
+ GGML_ASSERT(dst->src[1]->type == GGML_TYPE_F32);
1371
+ GGML_ASSERT( dst->type == GGML_TYPE_F32);
1372
+ GGML_ASSERT(dst->ne[3] == 1); // just 3D tensors supported
1373
+ dpct::queue_ptr main_stream = ctx.stream();
1374
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
1375
+ const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
1376
+ const float * src1_dd = static_cast<const float*>(dst->src[1]->data);
1377
+ float * dst_dd = static_cast<float *>(dst->data);
1378
+
1379
+ int nb1 = dst->op_params[0] / 4; // 4 bytes of float32
1380
+ int nb2 = dst->op_params[1] / 4; // 4 bytes of float32
1381
+ // int nb3 = dst->op_params[2] / 4; // 4 bytes of float32 - unused
1382
+ int offset = dst->op_params[3] / 4; // offset in bytes
1383
+
1384
+ acc_f32_sycl(src0_dd, src1_dd, dst_dd, ggml_nelements(dst), dst->src[1]->ne[0], dst->src[1]->ne[1], dst->src[1]->ne[2], nb1, nb2, offset, main_stream);
1385
+ }
1386
+
1387
+
1388
+ void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1389
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1390
+ ggml_sycl_op_sqrt(ctx, dst);
1391
+ }
1392
+
1393
+ void ggml_sycl_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1394
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1395
+ ggml_sycl_op_sin(ctx, dst);
1396
+ }
1397
+
1398
+ void ggml_sycl_cos(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1399
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1400
+ ggml_sycl_op_cos(ctx, dst);
1401
+ }
1402
+
1403
+ void ggml_sycl_acc(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1404
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
1405
+ ggml_sycl_op_acc(ctx, dst);
1406
+ }
1407
+
1408
+ void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1409
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1410
+ ggml_sycl_op_gelu(ctx, dst);
1411
+ }
1412
+
1413
+ void ggml_sycl_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1414
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1415
+ ggml_sycl_op_silu(ctx, dst);
1416
+ }
1417
+
1418
+ void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1419
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1420
+ ggml_sycl_op_gelu_quick(ctx, dst);
1421
+ }
1422
+
1423
+ void ggml_sycl_gelu_erf(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1424
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1425
+ ggml_sycl_op_gelu_erf(ctx, dst);
1426
+ }
1427
+
1428
+ void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1429
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1430
+ ggml_sycl_op_tanh(ctx, dst);
1431
+ }
1432
+
1433
+ void ggml_sycl_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1434
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1435
+ ggml_sycl_op_relu(ctx, dst);
1436
+ }
1437
+
1438
+ void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1439
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1440
+ ggml_sycl_op_sigmoid(ctx, dst);
1441
+ }
1442
+
1443
+ void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1444
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1445
+ ggml_sycl_op_hardsigmoid(ctx, dst);
1446
+ }
1447
+
1448
+ void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1449
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1450
+ ggml_sycl_op_hardswish(ctx, dst);
1451
+ }
1452
+
1453
+ void ggml_sycl_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1454
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1455
+ ggml_sycl_op_exp(ctx, dst);
1456
+ }
1457
+
1458
+ void ggml_sycl_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1459
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1460
+ ggml_sycl_op_log(ctx, dst);
1461
+ }
1462
+
1463
+ void ggml_sycl_neg(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1464
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1465
+ ggml_sycl_op_neg(ctx, dst);
1466
+ }
1467
+
1468
+ void ggml_sycl_step(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1469
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1470
+ ggml_sycl_op_step(ctx, dst);
1471
+ }
1472
+
1473
+ void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1474
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1475
+ ggml_sycl_op_leaky_relu(ctx, dst);
1476
+ }
1477
+
1478
+ void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1479
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1480
+ ggml_sycl_op_sqr(ctx, dst);
1481
+ }
1482
+
1483
+ void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1484
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1485
+ ggml_sycl_op_upscale(ctx, dst);
1486
+ }
1487
+
1488
+ void ggml_sycl_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1489
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1490
+ ggml_sycl_op_pad(ctx, dst);
1491
+ }
1492
+
1493
+ void ggml_sycl_clamp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1494
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1495
+ ggml_sycl_op_clamp(ctx, dst);
1496
+ }
1497
+
1498
+ void ggml_sycl_sgn(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1499
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1500
+ ggml_sycl_op_sgn(ctx, dst);
1501
+ }
1502
+
1503
+ void ggml_sycl_abs(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1504
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1505
+ ggml_sycl_op_abs(ctx, dst);
1506
+ }
1507
+
1508
+ void ggml_sycl_elu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1509
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
1510
+ ggml_sycl_op_elu(ctx, dst);
1511
+ }