whispercpp 1.3.4 → 1.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (891) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +1 -1
  3. data/README.md +158 -44
  4. data/ext/extconf.rb +3 -2
  5. data/ext/ruby_whisper.c +34 -6
  6. data/ext/ruby_whisper.h +67 -0
  7. data/ext/ruby_whisper_context.c +236 -144
  8. data/ext/ruby_whisper_context_params.c +163 -0
  9. data/ext/ruby_whisper_model.c +12 -13
  10. data/ext/ruby_whisper_params.c +47 -24
  11. data/ext/ruby_whisper_segment.c +84 -20
  12. data/ext/ruby_whisper_token.c +371 -0
  13. data/ext/ruby_whisper_transcribe.cpp +5 -2
  14. data/ext/ruby_whisper_vad_context.c +122 -0
  15. data/ext/ruby_whisper_vad_context_detect.cpp +51 -0
  16. data/ext/ruby_whisper_vad_params.c +0 -1
  17. data/ext/ruby_whisper_vad_segment.c +138 -0
  18. data/ext/ruby_whisper_vad_segments.c +105 -0
  19. data/ext/sources/CMakeLists.txt +4 -1
  20. data/ext/sources/bindings/javascript/package.json +1 -1
  21. data/ext/sources/cmake/arm64-apple-clang.cmake +16 -0
  22. data/ext/sources/cmake/arm64-windows-llvm.cmake +16 -0
  23. data/ext/sources/cmake/riscv64-spacemit-linux-gnu-gcc.cmake +29 -0
  24. data/ext/sources/cmake/whisper-config.cmake.in +5 -40
  25. data/ext/sources/cmake/x64-windows-llvm.cmake +5 -0
  26. data/ext/sources/examples/addon.node/vad-example.js +2 -2
  27. data/ext/sources/examples/bench/bench.cpp +23 -18
  28. data/ext/sources/examples/cli/cli.cpp +129 -112
  29. data/ext/sources/examples/common-ggml.cpp +2 -0
  30. data/ext/sources/examples/lsp/CMakeLists.txt +2 -1
  31. data/ext/sources/examples/miniaudio.h +4507 -2131
  32. data/ext/sources/examples/quantize/CMakeLists.txt +2 -1
  33. data/ext/sources/examples/server/server.cpp +28 -15
  34. data/ext/sources/examples/talk-llama/CMakeLists.txt +8 -3
  35. data/ext/sources/examples/talk-llama/llama-adapter.cpp +5 -2
  36. data/ext/sources/examples/talk-llama/llama-adapter.h +7 -0
  37. data/ext/sources/examples/talk-llama/llama-arch.cpp +2378 -1988
  38. data/ext/sources/examples/talk-llama/llama-arch.h +109 -2
  39. data/ext/sources/examples/talk-llama/llama-batch.cpp +78 -34
  40. data/ext/sources/examples/talk-llama/llama-batch.h +17 -4
  41. data/ext/sources/examples/talk-llama/llama-chat.cpp +100 -4
  42. data/ext/sources/examples/talk-llama/llama-chat.h +5 -0
  43. data/ext/sources/examples/talk-llama/llama-context.cpp +1088 -403
  44. data/ext/sources/examples/talk-llama/llama-context.h +70 -23
  45. data/ext/sources/examples/talk-llama/llama-cparams.h +6 -0
  46. data/ext/sources/examples/talk-llama/llama-ext.h +12 -0
  47. data/ext/sources/examples/talk-llama/llama-grammar.cpp +295 -60
  48. data/ext/sources/examples/talk-llama/llama-grammar.h +22 -1
  49. data/ext/sources/examples/talk-llama/llama-graph.cpp +925 -155
  50. data/ext/sources/examples/talk-llama/llama-graph.h +234 -23
  51. data/ext/sources/examples/talk-llama/llama-hparams.cpp +79 -38
  52. data/ext/sources/examples/talk-llama/llama-hparams.h +118 -18
  53. data/ext/sources/examples/talk-llama/llama-impl.cpp +11 -7
  54. data/ext/sources/examples/talk-llama/llama-impl.h +14 -2
  55. data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.cpp +8 -4
  56. data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +405 -140
  57. data/ext/sources/examples/talk-llama/llama-kv-cache.h +24 -10
  58. data/ext/sources/examples/talk-llama/llama-kv-cells.h +44 -2
  59. data/ext/sources/examples/talk-llama/llama-memory-hybrid-iswa.cpp +275 -0
  60. data/ext/sources/examples/talk-llama/llama-memory-hybrid-iswa.h +140 -0
  61. data/ext/sources/examples/talk-llama/llama-memory-hybrid.cpp +12 -10
  62. data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +42 -31
  63. data/ext/sources/examples/talk-llama/llama-memory-recurrent.h +2 -2
  64. data/ext/sources/examples/talk-llama/llama-mmap.cpp +197 -45
  65. data/ext/sources/examples/talk-llama/llama-mmap.h +8 -3
  66. data/ext/sources/examples/talk-llama/llama-model-loader.cpp +606 -116
  67. data/ext/sources/examples/talk-llama/llama-model-loader.h +41 -5
  68. data/ext/sources/examples/talk-llama/llama-model-saver.cpp +61 -44
  69. data/ext/sources/examples/talk-llama/llama-model-saver.h +5 -2
  70. data/ext/sources/examples/talk-llama/llama-model.cpp +2756 -13643
  71. data/ext/sources/examples/talk-llama/llama-model.h +112 -18
  72. data/ext/sources/examples/talk-llama/llama-quant.cpp +582 -365
  73. data/ext/sources/examples/talk-llama/{llama-sampling.cpp → llama-sampler.cpp} +1409 -199
  74. data/ext/sources/examples/talk-llama/llama-sampler.h +42 -0
  75. data/ext/sources/examples/talk-llama/llama-vocab.cpp +248 -82
  76. data/ext/sources/examples/talk-llama/llama-vocab.h +50 -40
  77. data/ext/sources/examples/talk-llama/llama.cpp +802 -21
  78. data/ext/sources/examples/talk-llama/llama.h +210 -39
  79. data/ext/sources/examples/talk-llama/models/afmoe.cpp +190 -0
  80. data/ext/sources/examples/talk-llama/models/apertus.cpp +125 -0
  81. data/ext/sources/examples/talk-llama/models/arcee.cpp +135 -0
  82. data/ext/sources/examples/talk-llama/models/arctic.cpp +137 -0
  83. data/ext/sources/examples/talk-llama/models/arwkv7.cpp +86 -0
  84. data/ext/sources/examples/talk-llama/models/baichuan.cpp +123 -0
  85. data/ext/sources/examples/talk-llama/models/bailingmoe.cpp +143 -0
  86. data/ext/sources/examples/talk-llama/models/bailingmoe2.cpp +133 -0
  87. data/ext/sources/examples/talk-llama/models/bert.cpp +184 -0
  88. data/ext/sources/examples/talk-llama/models/bitnet.cpp +145 -0
  89. data/ext/sources/examples/talk-llama/models/bloom.cpp +101 -0
  90. data/ext/sources/examples/talk-llama/models/chameleon.cpp +178 -0
  91. data/ext/sources/examples/talk-llama/models/chatglm.cpp +132 -0
  92. data/ext/sources/examples/talk-llama/models/codeshell.cpp +111 -0
  93. data/ext/sources/examples/talk-llama/models/cogvlm.cpp +102 -0
  94. data/ext/sources/examples/talk-llama/models/cohere2-iswa.cpp +134 -0
  95. data/ext/sources/examples/talk-llama/models/command-r.cpp +122 -0
  96. data/ext/sources/examples/talk-llama/models/dbrx.cpp +122 -0
  97. data/ext/sources/examples/talk-llama/models/deci.cpp +135 -0
  98. data/ext/sources/examples/talk-llama/models/deepseek.cpp +142 -0
  99. data/ext/sources/examples/talk-llama/models/deepseek2.cpp +262 -0
  100. data/ext/sources/examples/talk-llama/models/delta-net-base.cpp +445 -0
  101. data/ext/sources/examples/talk-llama/models/dots1.cpp +132 -0
  102. data/ext/sources/examples/talk-llama/models/dream.cpp +105 -0
  103. data/ext/sources/examples/talk-llama/models/ernie4-5-moe.cpp +148 -0
  104. data/ext/sources/examples/talk-llama/models/ernie4-5.cpp +110 -0
  105. data/ext/sources/examples/talk-llama/models/eurobert.cpp +97 -0
  106. data/ext/sources/examples/talk-llama/models/exaone-moe.cpp +145 -0
  107. data/ext/sources/examples/talk-llama/models/exaone.cpp +114 -0
  108. data/ext/sources/examples/talk-llama/models/exaone4.cpp +123 -0
  109. data/ext/sources/examples/talk-llama/models/falcon-h1.cpp +111 -0
  110. data/ext/sources/examples/talk-llama/models/falcon.cpp +120 -0
  111. data/ext/sources/examples/talk-llama/models/gemma-embedding.cpp +116 -0
  112. data/ext/sources/examples/talk-llama/models/gemma.cpp +112 -0
  113. data/ext/sources/examples/talk-llama/models/gemma2-iswa.cpp +128 -0
  114. data/ext/sources/examples/talk-llama/models/gemma3.cpp +155 -0
  115. data/ext/sources/examples/talk-llama/models/gemma3n-iswa.cpp +384 -0
  116. data/ext/sources/examples/talk-llama/models/glm4-moe.cpp +170 -0
  117. data/ext/sources/examples/talk-llama/models/glm4.cpp +157 -0
  118. data/ext/sources/examples/talk-llama/models/gpt2.cpp +105 -0
  119. data/ext/sources/examples/talk-llama/models/gptneox.cpp +144 -0
  120. data/ext/sources/examples/talk-llama/models/granite-hybrid.cpp +195 -0
  121. data/ext/sources/examples/talk-llama/models/granite.cpp +210 -0
  122. data/ext/sources/examples/talk-llama/models/grok.cpp +159 -0
  123. data/ext/sources/examples/talk-llama/models/grovemoe.cpp +139 -0
  124. data/ext/sources/examples/talk-llama/models/hunyuan-dense.cpp +132 -0
  125. data/ext/sources/examples/talk-llama/models/hunyuan-moe.cpp +153 -0
  126. data/ext/sources/examples/talk-llama/models/internlm2.cpp +120 -0
  127. data/ext/sources/examples/talk-llama/models/jais.cpp +86 -0
  128. data/ext/sources/examples/talk-llama/models/jais2.cpp +123 -0
  129. data/ext/sources/examples/talk-llama/models/jamba.cpp +106 -0
  130. data/ext/sources/examples/talk-llama/models/kimi-linear.cpp +381 -0
  131. data/ext/sources/examples/talk-llama/models/lfm2.cpp +196 -0
  132. data/ext/sources/examples/talk-llama/models/llada-moe.cpp +122 -0
  133. data/ext/sources/examples/talk-llama/models/llada.cpp +99 -0
  134. data/ext/sources/examples/talk-llama/models/llama-iswa.cpp +178 -0
  135. data/ext/sources/examples/talk-llama/models/llama.cpp +175 -0
  136. data/ext/sources/examples/talk-llama/models/maincoder.cpp +117 -0
  137. data/ext/sources/examples/talk-llama/models/mamba-base.cpp +289 -0
  138. data/ext/sources/examples/talk-llama/models/mamba.cpp +54 -0
  139. data/ext/sources/examples/talk-llama/models/mimo2-iswa.cpp +129 -0
  140. data/ext/sources/examples/talk-llama/models/minicpm3.cpp +200 -0
  141. data/ext/sources/examples/talk-llama/models/minimax-m2.cpp +123 -0
  142. data/ext/sources/examples/talk-llama/models/mistral3.cpp +160 -0
  143. data/ext/sources/examples/talk-llama/models/models.h +704 -0
  144. data/ext/sources/examples/talk-llama/models/modern-bert.cpp +109 -0
  145. data/ext/sources/examples/talk-llama/models/mpt.cpp +126 -0
  146. data/ext/sources/examples/talk-llama/models/nemotron-h.cpp +162 -0
  147. data/ext/sources/examples/talk-llama/models/nemotron.cpp +122 -0
  148. data/ext/sources/examples/talk-llama/models/neo-bert.cpp +104 -0
  149. data/ext/sources/examples/talk-llama/models/olmo.cpp +121 -0
  150. data/ext/sources/examples/talk-llama/models/olmo2.cpp +150 -0
  151. data/ext/sources/examples/talk-llama/models/olmoe.cpp +124 -0
  152. data/ext/sources/examples/talk-llama/models/openai-moe-iswa.cpp +127 -0
  153. data/ext/sources/examples/talk-llama/models/openelm.cpp +124 -0
  154. data/ext/sources/examples/talk-llama/models/orion.cpp +123 -0
  155. data/ext/sources/examples/talk-llama/models/paddleocr.cpp +122 -0
  156. data/ext/sources/examples/talk-llama/models/pangu-embedded.cpp +121 -0
  157. data/ext/sources/examples/talk-llama/models/phi2.cpp +121 -0
  158. data/ext/sources/examples/talk-llama/models/phi3.cpp +152 -0
  159. data/ext/sources/examples/talk-llama/models/plamo.cpp +110 -0
  160. data/ext/sources/examples/talk-llama/models/plamo2.cpp +320 -0
  161. data/ext/sources/examples/talk-llama/models/plamo3.cpp +128 -0
  162. data/ext/sources/examples/talk-llama/models/plm.cpp +169 -0
  163. data/ext/sources/examples/talk-llama/models/qwen.cpp +108 -0
  164. data/ext/sources/examples/talk-llama/models/qwen2.cpp +126 -0
  165. data/ext/sources/examples/talk-llama/models/qwen2moe.cpp +151 -0
  166. data/ext/sources/examples/talk-llama/models/qwen2vl.cpp +117 -0
  167. data/ext/sources/examples/talk-llama/models/qwen3.cpp +120 -0
  168. data/ext/sources/examples/talk-llama/models/qwen35.cpp +381 -0
  169. data/ext/sources/examples/talk-llama/models/qwen35moe.cpp +422 -0
  170. data/ext/sources/examples/talk-llama/models/qwen3moe.cpp +131 -0
  171. data/ext/sources/examples/talk-llama/models/qwen3next.cpp +525 -0
  172. data/ext/sources/examples/talk-llama/models/qwen3vl-moe.cpp +140 -0
  173. data/ext/sources/examples/talk-llama/models/qwen3vl.cpp +132 -0
  174. data/ext/sources/examples/talk-llama/models/refact.cpp +94 -0
  175. data/ext/sources/examples/talk-llama/models/rnd1.cpp +126 -0
  176. data/ext/sources/examples/talk-llama/models/rwkv6-base.cpp +164 -0
  177. data/ext/sources/examples/talk-llama/models/rwkv6.cpp +94 -0
  178. data/ext/sources/examples/talk-llama/models/rwkv6qwen2.cpp +86 -0
  179. data/ext/sources/examples/talk-llama/models/rwkv7-base.cpp +137 -0
  180. data/ext/sources/examples/talk-llama/models/rwkv7.cpp +90 -0
  181. data/ext/sources/examples/talk-llama/models/seed-oss.cpp +124 -0
  182. data/ext/sources/examples/talk-llama/models/smallthinker.cpp +126 -0
  183. data/ext/sources/examples/talk-llama/models/smollm3.cpp +128 -0
  184. data/ext/sources/examples/talk-llama/models/stablelm.cpp +146 -0
  185. data/ext/sources/examples/talk-llama/models/starcoder.cpp +100 -0
  186. data/ext/sources/examples/talk-llama/models/starcoder2.cpp +121 -0
  187. data/ext/sources/examples/talk-llama/models/step35-iswa.cpp +165 -0
  188. data/ext/sources/examples/talk-llama/models/t5-dec.cpp +166 -0
  189. data/ext/sources/examples/talk-llama/models/t5-enc.cpp +96 -0
  190. data/ext/sources/examples/talk-llama/models/wavtokenizer-dec.cpp +149 -0
  191. data/ext/sources/examples/talk-llama/models/xverse.cpp +108 -0
  192. data/ext/sources/examples/talk-llama/unicode.cpp +121 -79
  193. data/ext/sources/examples/vad-speech-segments/CMakeLists.txt +1 -1
  194. data/ext/sources/examples/whisper.wasm/index-tmpl.html +1 -1
  195. data/ext/sources/ggml/CMakeLists.txt +90 -56
  196. data/ext/sources/ggml/include/ggml-alloc.h +9 -0
  197. data/ext/sources/ggml/include/ggml-backend.h +5 -2
  198. data/ext/sources/ggml/include/ggml-cann.h +1 -1
  199. data/ext/sources/ggml/include/ggml-cpu.h +6 -0
  200. data/ext/sources/ggml/include/ggml-hexagon.h +19 -0
  201. data/ext/sources/ggml/include/ggml-openvino.h +37 -0
  202. data/ext/sources/ggml/include/ggml-opt.h +1 -1
  203. data/ext/sources/ggml/include/ggml-rpc.h +14 -12
  204. data/ext/sources/ggml/include/ggml-virtgpu.h +14 -0
  205. data/ext/sources/ggml/include/ggml-zendnn.h +22 -0
  206. data/ext/sources/ggml/include/ggml.h +246 -21
  207. data/ext/sources/ggml/src/CMakeLists.txt +85 -11
  208. data/ext/sources/ggml/src/ggml-alloc.c +128 -50
  209. data/ext/sources/ggml/src/ggml-backend-dl.cpp +48 -0
  210. data/ext/sources/ggml/src/ggml-backend-dl.h +45 -0
  211. data/ext/sources/ggml/src/ggml-backend-impl.h +1 -4
  212. data/ext/sources/ggml/src/ggml-backend-reg.cpp +54 -88
  213. data/ext/sources/ggml/src/ggml-backend.cpp +76 -23
  214. data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +18 -4
  215. data/ext/sources/ggml/src/ggml-blas/ggml-blas.cpp +11 -11
  216. data/ext/sources/ggml/src/ggml-cann/acl_tensor.cpp +58 -46
  217. data/ext/sources/ggml/src/ggml-cann/acl_tensor.h +139 -48
  218. data/ext/sources/ggml/src/ggml-cann/aclnn_ops.cpp +2427 -1785
  219. data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +238 -362
  220. data/ext/sources/ggml/src/ggml-cann/common.h +285 -211
  221. data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +663 -831
  222. data/ext/sources/ggml/src/ggml-common.h +11 -0
  223. data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +170 -95
  224. data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +42 -18
  225. data/ext/sources/ggml/src/ggml-cpu/amx/common.h +34 -10
  226. data/ext/sources/ggml/src/ggml-cpu/amx/mmq.cpp +85 -85
  227. data/ext/sources/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +4 -0
  228. data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +513 -27
  229. data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +4192 -992
  230. data/ext/sources/ggml/src/ggml-cpu/arch/loongarch/quants.c +4 -5
  231. data/ext/sources/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
  232. data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +1761 -49
  233. data/ext/sources/ggml/src/ggml-cpu/arch/riscv/repack.cpp +1391 -0
  234. data/ext/sources/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
  235. data/ext/sources/ggml/src/ggml-cpu/arch/s390/quants.c +8 -10
  236. data/ext/sources/ggml/src/ggml-cpu/arch/x86/quants.c +9 -9
  237. data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +124 -24
  238. data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +157 -28
  239. data/ext/sources/ggml/src/ggml-cpu/binary-ops.cpp +2 -6
  240. data/ext/sources/ggml/src/ggml-cpu/common.h +8 -0
  241. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +8 -3
  242. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +251 -80
  243. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +19 -0
  244. data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +587 -119
  245. data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.h +33 -44
  246. data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1093 -194
  247. data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1284 -203
  248. data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.h +6 -0
  249. data/ext/sources/ggml/src/ggml-cpu/ops.cpp +1519 -527
  250. data/ext/sources/ggml/src/ggml-cpu/ops.h +6 -4
  251. data/ext/sources/ggml/src/ggml-cpu/quants.c +40 -0
  252. data/ext/sources/ggml/src/ggml-cpu/quants.h +3 -0
  253. data/ext/sources/ggml/src/ggml-cpu/repack.cpp +3632 -781
  254. data/ext/sources/ggml/src/ggml-cpu/repack.h +129 -4
  255. data/ext/sources/ggml/src/ggml-cpu/simd-gemm.h +136 -0
  256. data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +152 -46
  257. data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.cpp +3 -2
  258. data/ext/sources/ggml/src/ggml-cpu/unary-ops.cpp +152 -1
  259. data/ext/sources/ggml/src/ggml-cpu/unary-ops.h +7 -0
  260. data/ext/sources/ggml/src/ggml-cpu/vec.cpp +140 -0
  261. data/ext/sources/ggml/src/ggml-cpu/vec.h +261 -146
  262. data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +72 -1
  263. data/ext/sources/ggml/src/ggml-cuda/argmax.cu +2 -2
  264. data/ext/sources/ggml/src/ggml-cuda/argsort.cu +132 -6
  265. data/ext/sources/ggml/src/ggml-cuda/argsort.cuh +16 -0
  266. data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +33 -31
  267. data/ext/sources/ggml/src/ggml-cuda/common.cuh +474 -85
  268. data/ext/sources/ggml/src/ggml-cuda/convert.cu +41 -27
  269. data/ext/sources/ggml/src/ggml-cuda/convert.cuh +10 -0
  270. data/ext/sources/ggml/src/ggml-cuda/cpy-utils.cuh +1 -1
  271. data/ext/sources/ggml/src/ggml-cuda/cpy.cu +342 -246
  272. data/ext/sources/ggml/src/ggml-cuda/cpy.cuh +1 -5
  273. data/ext/sources/ggml/src/ggml-cuda/cumsum.cu +307 -0
  274. data/ext/sources/ggml/src/ggml-cuda/cumsum.cuh +5 -0
  275. data/ext/sources/ggml/src/ggml-cuda/diag.cu +77 -0
  276. data/ext/sources/ggml/src/ggml-cuda/diag.cuh +5 -0
  277. data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +98 -74
  278. data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +973 -665
  279. data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cu +35 -741
  280. data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cuh +1255 -0
  281. data/ext/sources/ggml/src/ggml-cuda/fattn-vec.cuh +33 -40
  282. data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +40 -18
  283. data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +48 -0
  284. data/ext/sources/ggml/src/ggml-cuda/fattn.cu +206 -45
  285. data/ext/sources/ggml/src/ggml-cuda/fill.cu +37 -0
  286. data/ext/sources/ggml/src/ggml-cuda/fill.cuh +3 -0
  287. data/ext/sources/ggml/src/ggml-cuda/gated_delta_net.cu +263 -0
  288. data/ext/sources/ggml/src/ggml-cuda/gated_delta_net.cuh +4 -0
  289. data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +1688 -302
  290. data/ext/sources/ggml/src/ggml-cuda/mean.cu +12 -10
  291. data/ext/sources/ggml/src/ggml-cuda/mma.cuh +908 -48
  292. data/ext/sources/ggml/src/ggml-cuda/mmf.cu +88 -20
  293. data/ext/sources/ggml/src/ggml-cuda/mmf.cuh +502 -90
  294. data/ext/sources/ggml/src/ggml-cuda/mmid.cu +164 -0
  295. data/ext/sources/ggml/src/ggml-cuda/mmid.cuh +5 -0
  296. data/ext/sources/ggml/src/ggml-cuda/mmq.cu +69 -176
  297. data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +532 -193
  298. data/ext/sources/ggml/src/ggml-cuda/mmvf.cu +460 -104
  299. data/ext/sources/ggml/src/ggml-cuda/mmvf.cuh +5 -2
  300. data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +360 -122
  301. data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +2 -1
  302. data/ext/sources/ggml/src/ggml-cuda/norm.cu +18 -76
  303. data/ext/sources/ggml/src/ggml-cuda/pad.cu +73 -39
  304. data/ext/sources/ggml/src/ggml-cuda/quantize.cu +152 -1
  305. data/ext/sources/ggml/src/ggml-cuda/quantize.cuh +14 -0
  306. data/ext/sources/ggml/src/ggml-cuda/reduce_rows.cuh +2 -16
  307. data/ext/sources/ggml/src/ggml-cuda/rope.cu +364 -149
  308. data/ext/sources/ggml/src/ggml-cuda/rope.cuh +2 -0
  309. data/ext/sources/ggml/src/ggml-cuda/set-rows.cu +101 -47
  310. data/ext/sources/ggml/src/ggml-cuda/set.cu +39 -0
  311. data/ext/sources/ggml/src/ggml-cuda/set.cuh +7 -0
  312. data/ext/sources/ggml/src/ggml-cuda/softmax.cu +163 -41
  313. data/ext/sources/ggml/src/ggml-cuda/solve_tri.cu +275 -0
  314. data/ext/sources/ggml/src/ggml-cuda/solve_tri.cuh +3 -0
  315. data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +68 -50
  316. data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cuh +1 -1
  317. data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +49 -84
  318. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_32.cu +5 -0
  319. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +1 -0
  320. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_32.cu +5 -0
  321. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +1 -0
  322. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +1 -0
  323. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +1 -0
  324. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq112-dv112.cu +5 -0
  325. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq128-dv128.cu +5 -0
  326. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq256-dv256.cu +5 -0
  327. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq40-dv40.cu +5 -0
  328. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq576-dv512.cu +5 -0
  329. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq64-dv64.cu +5 -0
  330. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq72-dv72.cu +5 -0
  331. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq80-dv80.cu +5 -0
  332. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq96-dv96.cu +5 -0
  333. data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +22 -4
  334. data/ext/sources/ggml/src/ggml-cuda/top-k.cu +95 -0
  335. data/ext/sources/ggml/src/ggml-cuda/top-k.cuh +3 -0
  336. data/ext/sources/ggml/src/ggml-cuda/topk-moe.cu +275 -119
  337. data/ext/sources/ggml/src/ggml-cuda/topk-moe.cuh +20 -7
  338. data/ext/sources/ggml/src/ggml-cuda/tri.cu +136 -0
  339. data/ext/sources/ggml/src/ggml-cuda/tri.cuh +5 -0
  340. data/ext/sources/ggml/src/ggml-cuda/unary.cu +160 -11
  341. data/ext/sources/ggml/src/ggml-cuda/unary.cuh +38 -0
  342. data/ext/sources/ggml/src/ggml-cuda/upscale.cu +163 -7
  343. data/ext/sources/ggml/src/ggml-cuda/vecdotq.cuh +31 -17
  344. data/ext/sources/ggml/src/ggml-cuda/vendors/cuda.h +4 -0
  345. data/ext/sources/ggml/src/ggml-cuda/vendors/hip.h +22 -1
  346. data/ext/sources/ggml/src/ggml-cuda/vendors/musa.h +6 -0
  347. data/ext/sources/ggml/src/ggml-hexagon/CMakeLists.txt +117 -0
  348. data/ext/sources/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3325 -0
  349. data/ext/sources/ggml/src/ggml-hexagon/htp/CMakeLists.txt +46 -0
  350. data/ext/sources/ggml/src/ggml-hexagon/htp/act-ops.c +813 -0
  351. data/ext/sources/ggml/src/ggml-hexagon/htp/argsort-ops.c +281 -0
  352. data/ext/sources/ggml/src/ggml-hexagon/htp/binary-ops.c +891 -0
  353. data/ext/sources/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
  354. data/ext/sources/ggml/src/ggml-hexagon/htp/cpy-ops.c +252 -0
  355. data/ext/sources/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +713 -0
  356. data/ext/sources/ggml/src/ggml-hexagon/htp/get-rows-ops.c +112 -0
  357. data/ext/sources/ggml/src/ggml-hexagon/htp/hex-dma.c +63 -0
  358. data/ext/sources/ggml/src/ggml-hexagon/htp/hex-dma.h +182 -0
  359. data/ext/sources/ggml/src/ggml-hexagon/htp/hex-dump.h +77 -0
  360. data/ext/sources/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
  361. data/ext/sources/ggml/src/ggml-hexagon/htp/hex-utils.h +51 -0
  362. data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ctx.h +35 -0
  363. data/ext/sources/ggml/src/ggml-hexagon/htp/htp-msg.h +155 -0
  364. data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ops.h +63 -0
  365. data/ext/sources/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
  366. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-arith.h +443 -0
  367. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-base.h +240 -0
  368. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-copy.h +245 -0
  369. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-div.h +251 -0
  370. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
  371. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-exp.h +215 -0
  372. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
  373. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-inverse.h +210 -0
  374. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-reduce.h +296 -0
  375. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
  376. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +141 -0
  377. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
  378. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
  379. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.h +26 -0
  380. data/ext/sources/ggml/src/ggml-hexagon/htp/main.c +1199 -0
  381. data/ext/sources/ggml/src/ggml-hexagon/htp/matmul-ops.c +2670 -0
  382. data/ext/sources/ggml/src/ggml-hexagon/htp/rope-ops.c +497 -0
  383. data/ext/sources/ggml/src/ggml-hexagon/htp/set-rows-ops.c +168 -0
  384. data/ext/sources/ggml/src/ggml-hexagon/htp/softmax-ops.c +419 -0
  385. data/ext/sources/ggml/src/ggml-hexagon/htp/ssm-conv.c +339 -0
  386. data/ext/sources/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +128 -0
  387. data/ext/sources/ggml/src/ggml-hexagon/htp/unary-ops.c +382 -0
  388. data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.c +293 -0
  389. data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
  390. data/ext/sources/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
  391. data/ext/sources/ggml/src/ggml-hexagon/htp-drv.h +121 -0
  392. data/ext/sources/ggml/src/ggml-hexagon/libdl.h +79 -0
  393. data/ext/sources/ggml/src/ggml-hexagon/libggml-htp.inf +38 -0
  394. data/ext/sources/ggml/src/ggml-hexagon/op-desc.h +153 -0
  395. data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +14 -13
  396. data/ext/sources/ggml/src/ggml-impl.h +129 -6
  397. data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +10 -10
  398. data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.cpp +15 -4
  399. data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.h +8 -0
  400. data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.m +173 -34
  401. data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.cpp +912 -344
  402. data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.h +124 -59
  403. data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.m +588 -144
  404. data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +396 -23
  405. data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.cpp +1724 -421
  406. data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.h +16 -3
  407. data/ext/sources/ggml/src/ggml-metal/ggml-metal.cpp +333 -114
  408. data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +3050 -1539
  409. data/ext/sources/ggml/src/ggml-musa/CMakeLists.txt +3 -1
  410. data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +30 -1
  411. data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +4279 -497
  412. data/ext/sources/ggml/src/ggml-opencl/kernels/concat.cl +41 -99
  413. data/ext/sources/ggml/src/ggml-opencl/kernels/cpy.cl +45 -0
  414. data/ext/sources/ggml/src/ggml-opencl/kernels/cumsum.cl +139 -0
  415. data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +267 -0
  416. data/ext/sources/ggml/src/ggml-opencl/kernels/diag.cl +27 -0
  417. data/ext/sources/ggml/src/ggml-opencl/kernels/exp.cl +125 -0
  418. data/ext/sources/ggml/src/ggml-opencl/kernels/expm1.cl +113 -0
  419. data/ext/sources/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
  420. data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +4 -3
  421. data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
  422. data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q4_1_f32.cl +132 -0
  423. data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
  424. data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general_q8_0_f32.cl +195 -0
  425. data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_1_f32.cl +283 -0
  426. data/ext/sources/ggml/src/ggml-opencl/kernels/get_rows.cl +36 -12
  427. data/ext/sources/ggml/src/ggml-opencl/kernels/l2_norm.cl +71 -0
  428. data/ext/sources/ggml/src/ggml-opencl/kernels/mean.cl +140 -0
  429. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
  430. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +24 -10
  431. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +24 -10
  432. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
  433. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
  434. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
  435. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_8x4.cl +129 -0
  436. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
  437. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
  438. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
  439. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
  440. data/ext/sources/ggml/src/ggml-opencl/kernels/{mul_mv_q6_k.cl → mul_mv_q6_k_f32.cl} +4 -0
  441. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
  442. data/ext/sources/ggml/src/ggml-opencl/kernels/neg.cl +125 -0
  443. data/ext/sources/ggml/src/ggml-opencl/kernels/pad.cl +29 -20
  444. data/ext/sources/ggml/src/ggml-opencl/kernels/repeat.cl +31 -32
  445. data/ext/sources/ggml/src/ggml-opencl/kernels/rms_norm.cl +25 -10
  446. data/ext/sources/ggml/src/ggml-opencl/kernels/rope.cl +50 -24
  447. data/ext/sources/ggml/src/ggml-opencl/kernels/scale.cl +14 -4
  448. data/ext/sources/ggml/src/ggml-opencl/kernels/set_rows.cl +35 -16
  449. data/ext/sources/ggml/src/ggml-opencl/kernels/softplus.cl +116 -0
  450. data/ext/sources/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
  451. data/ext/sources/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
  452. data/ext/sources/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
  453. data/ext/sources/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
  454. data/ext/sources/ggml/src/ggml-opencl/kernels/sum_rows.cl +114 -13
  455. data/ext/sources/ggml/src/ggml-opencl/kernels/tanh.cl +94 -48
  456. data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +39 -0
  457. data/ext/sources/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
  458. data/ext/sources/ggml/src/ggml-openvino/.clang-format +154 -0
  459. data/ext/sources/ggml/src/ggml-openvino/CMakeLists.txt +22 -0
  460. data/ext/sources/ggml/src/ggml-openvino/ggml-decoder.cpp +975 -0
  461. data/ext/sources/ggml/src/ggml-openvino/ggml-decoder.h +294 -0
  462. data/ext/sources/ggml/src/ggml-openvino/ggml-openvino-extra.cpp +373 -0
  463. data/ext/sources/ggml/src/ggml-openvino/ggml-openvino-extra.h +182 -0
  464. data/ext/sources/ggml/src/ggml-openvino/ggml-openvino.cpp +1110 -0
  465. data/ext/sources/ggml/src/ggml-openvino/ggml-quants.cpp +884 -0
  466. data/ext/sources/ggml/src/ggml-openvino/ggml-quants.h +153 -0
  467. data/ext/sources/ggml/src/ggml-openvino/openvino/decoder.h +74 -0
  468. data/ext/sources/ggml/src/ggml-openvino/openvino/frontend.cpp +27 -0
  469. data/ext/sources/ggml/src/ggml-openvino/openvino/frontend.h +23 -0
  470. data/ext/sources/ggml/src/ggml-openvino/openvino/input_model.cpp +17 -0
  471. data/ext/sources/ggml/src/ggml-openvino/openvino/input_model.h +29 -0
  472. data/ext/sources/ggml/src/ggml-openvino/openvino/node_context.h +112 -0
  473. data/ext/sources/ggml/src/ggml-openvino/openvino/op/cont.cpp +48 -0
  474. data/ext/sources/ggml/src/ggml-openvino/openvino/op/cpy.cpp +21 -0
  475. data/ext/sources/ggml/src/ggml-openvino/openvino/op/flash_attn_ext.cpp +90 -0
  476. data/ext/sources/ggml/src/ggml-openvino/openvino/op/get_rows.cpp +69 -0
  477. data/ext/sources/ggml/src/ggml-openvino/openvino/op/glu_geglu.cpp +61 -0
  478. data/ext/sources/ggml/src/ggml-openvino/openvino/op/glu_swiglu.cpp +62 -0
  479. data/ext/sources/ggml/src/ggml-openvino/openvino/op/mulmat.cpp +90 -0
  480. data/ext/sources/ggml/src/ggml-openvino/openvino/op/permute.cpp +102 -0
  481. data/ext/sources/ggml/src/ggml-openvino/openvino/op/reshape.cpp +83 -0
  482. data/ext/sources/ggml/src/ggml-openvino/openvino/op/rms_norm.cpp +46 -0
  483. data/ext/sources/ggml/src/ggml-openvino/openvino/op/rope.cpp +123 -0
  484. data/ext/sources/ggml/src/ggml-openvino/openvino/op/scale.cpp +41 -0
  485. data/ext/sources/ggml/src/ggml-openvino/openvino/op/set_rows.cpp +76 -0
  486. data/ext/sources/ggml/src/ggml-openvino/openvino/op/softmax.cpp +89 -0
  487. data/ext/sources/ggml/src/ggml-openvino/openvino/op/transpose.cpp +23 -0
  488. data/ext/sources/ggml/src/ggml-openvino/openvino/op/unary_silu.cpp +27 -0
  489. data/ext/sources/ggml/src/ggml-openvino/openvino/op/view.cpp +53 -0
  490. data/ext/sources/ggml/src/ggml-openvino/openvino/op_table.cpp +46 -0
  491. data/ext/sources/ggml/src/ggml-openvino/openvino/op_table.h +39 -0
  492. data/ext/sources/ggml/src/ggml-openvino/openvino/pass/eliminate_zp.cpp +123 -0
  493. data/ext/sources/ggml/src/ggml-openvino/openvino/pass/eliminate_zp.h +17 -0
  494. data/ext/sources/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.cpp +60 -0
  495. data/ext/sources/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.h +17 -0
  496. data/ext/sources/ggml/src/ggml-openvino/openvino/pass/mark_decompression_convert_constant_folding.h +29 -0
  497. data/ext/sources/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.cpp +58 -0
  498. data/ext/sources/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.h +17 -0
  499. data/ext/sources/ggml/src/ggml-openvino/openvino/translate_session.cpp +293 -0
  500. data/ext/sources/ggml/src/ggml-openvino/openvino/translate_session.h +28 -0
  501. data/ext/sources/ggml/src/ggml-openvino/openvino/utils.cpp +226 -0
  502. data/ext/sources/ggml/src/ggml-openvino/openvino/utils.h +85 -0
  503. data/ext/sources/ggml/src/ggml-openvino/utils.cpp +823 -0
  504. data/ext/sources/ggml/src/ggml-openvino/utils.h +123 -0
  505. data/ext/sources/ggml/src/ggml-quants.c +96 -5
  506. data/ext/sources/ggml/src/ggml-quants.h +3 -0
  507. data/ext/sources/ggml/src/ggml-rpc/ggml-rpc.cpp +438 -156
  508. data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +59 -87
  509. data/ext/sources/ggml/src/ggml-sycl/add-id.cpp +81 -0
  510. data/ext/sources/ggml/src/ggml-sycl/add-id.hpp +8 -0
  511. data/ext/sources/ggml/src/ggml-sycl/backend.hpp +7 -0
  512. data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +21 -29
  513. data/ext/sources/ggml/src/ggml-sycl/binbcast.hpp +0 -6
  514. data/ext/sources/ggml/src/ggml-sycl/common.hpp +427 -20
  515. data/ext/sources/ggml/src/ggml-sycl/concat.cpp +55 -44
  516. data/ext/sources/ggml/src/ggml-sycl/convert.cpp +103 -1
  517. data/ext/sources/ggml/src/ggml-sycl/convert.hpp +22 -1
  518. data/ext/sources/ggml/src/ggml-sycl/count-equal.cpp +79 -0
  519. data/ext/sources/ggml/src/ggml-sycl/count-equal.hpp +9 -0
  520. data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +0 -3
  521. data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +18 -0
  522. data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +867 -50
  523. data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +401 -358
  524. data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +12 -2
  525. data/ext/sources/ggml/src/ggml-sycl/fattn-common.hpp +1179 -0
  526. data/ext/sources/ggml/src/ggml-sycl/fattn-tile.cpp +55 -0
  527. data/ext/sources/ggml/src/ggml-sycl/fattn-tile.hpp +1338 -0
  528. data/ext/sources/ggml/src/ggml-sycl/fattn-vec.hpp +667 -0
  529. data/ext/sources/ggml/src/ggml-sycl/fattn.cpp +225 -0
  530. data/ext/sources/ggml/src/ggml-sycl/fattn.hpp +22 -0
  531. data/ext/sources/ggml/src/ggml-sycl/gated_delta_net.cpp +309 -0
  532. data/ext/sources/ggml/src/ggml-sycl/gated_delta_net.hpp +8 -0
  533. data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +645 -155
  534. data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +22 -0
  535. data/ext/sources/ggml/src/ggml-sycl/norm.cpp +221 -66
  536. data/ext/sources/ggml/src/ggml-sycl/norm.hpp +2 -0
  537. data/ext/sources/ggml/src/ggml-sycl/outprod.cpp +3 -3
  538. data/ext/sources/ggml/src/ggml-sycl/pad.cpp +97 -0
  539. data/ext/sources/ggml/src/ggml-sycl/pad.hpp +24 -0
  540. data/ext/sources/ggml/src/ggml-sycl/pad_reflect_1d.cpp +100 -0
  541. data/ext/sources/ggml/src/ggml-sycl/pad_reflect_1d.hpp +10 -0
  542. data/ext/sources/ggml/src/ggml-sycl/presets.hpp +5 -0
  543. data/ext/sources/ggml/src/ggml-sycl/quants.hpp +1 -1
  544. data/ext/sources/ggml/src/ggml-sycl/repeat_back.cpp +76 -0
  545. data/ext/sources/ggml/src/ggml-sycl/repeat_back.hpp +8 -0
  546. data/ext/sources/ggml/src/ggml-sycl/roll.cpp +122 -0
  547. data/ext/sources/ggml/src/ggml-sycl/roll.hpp +20 -0
  548. data/ext/sources/ggml/src/ggml-sycl/rope.cpp +457 -281
  549. data/ext/sources/ggml/src/ggml-sycl/rope.hpp +6 -0
  550. data/ext/sources/ggml/src/ggml-sycl/set.cpp +73 -0
  551. data/ext/sources/ggml/src/ggml-sycl/set.hpp +5 -0
  552. data/ext/sources/ggml/src/ggml-sycl/softmax.cpp +327 -162
  553. data/ext/sources/ggml/src/ggml-sycl/softmax.hpp +4 -0
  554. data/ext/sources/ggml/src/ggml-sycl/ssm_conv.cpp +127 -0
  555. data/ext/sources/ggml/src/ggml-sycl/ssm_conv.hpp +5 -0
  556. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq112-dv112.cpp +5 -0
  557. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq128-dv128.cpp +5 -0
  558. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq256-dv256.cpp +5 -0
  559. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq40-dv40.cpp +5 -0
  560. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq576-dv512.cpp +5 -0
  561. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq64-dv64.cpp +5 -0
  562. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq72-dv72.cpp +5 -0
  563. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq80-dv80.cpp +5 -0
  564. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq96-dv96.cpp +5 -0
  565. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-f16.cpp +7 -0
  566. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_0.cpp +7 -0
  567. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_1.cpp +7 -0
  568. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_0.cpp +7 -0
  569. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_1.cpp +7 -0
  570. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q8_0.cpp +7 -0
  571. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-f16.cpp +7 -0
  572. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_0.cpp +7 -0
  573. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_1.cpp +7 -0
  574. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_0.cpp +7 -0
  575. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_1.cpp +7 -0
  576. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q8_0.cpp +7 -0
  577. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-f16.cpp +7 -0
  578. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_0.cpp +7 -0
  579. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_1.cpp +7 -0
  580. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_0.cpp +7 -0
  581. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_1.cpp +7 -0
  582. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q8_0.cpp +7 -0
  583. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-f16.cpp +7 -0
  584. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_0.cpp +7 -0
  585. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_1.cpp +7 -0
  586. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_0.cpp +7 -0
  587. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_1.cpp +7 -0
  588. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q8_0.cpp +7 -0
  589. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-f16.cpp +7 -0
  590. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_0.cpp +7 -0
  591. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_1.cpp +7 -0
  592. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_0.cpp +7 -0
  593. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_1.cpp +7 -0
  594. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q8_0.cpp +7 -0
  595. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-f16.cpp +7 -0
  596. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_0.cpp +7 -0
  597. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_1.cpp +7 -0
  598. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_0.cpp +7 -0
  599. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_1.cpp +7 -0
  600. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q8_0.cpp +7 -0
  601. data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +71 -0
  602. data/ext/sources/ggml/src/ggml-sycl/wkv.cpp +1 -1
  603. data/ext/sources/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
  604. data/ext/sources/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
  605. data/ext/sources/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
  606. data/ext/sources/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
  607. data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
  608. data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
  609. data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
  610. data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
  611. data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
  612. data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
  613. data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
  614. data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
  615. data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
  616. data/ext/sources/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
  617. data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
  618. data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
  619. data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
  620. data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
  621. data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
  622. data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
  623. data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
  624. data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +119 -0
  625. data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +158 -0
  626. data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
  627. data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend.cpp +69 -0
  628. data/ext/sources/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
  629. data/ext/sources/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
  630. data/ext/sources/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
  631. data/ext/sources/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
  632. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
  633. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
  634. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
  635. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
  636. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
  637. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
  638. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
  639. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +98 -0
  640. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
  641. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
  642. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
  643. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu.cpp +544 -0
  644. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu.h +117 -0
  645. data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +39 -19
  646. data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +5994 -3055
  647. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/abs.comp +21 -0
  648. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +18 -10
  649. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +2 -2
  650. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add1.comp +28 -0
  651. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp +1 -1
  652. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/arange.comp +20 -0
  653. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +2 -2
  654. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +33 -26
  655. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort_large.comp +114 -0
  656. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ceil.comp +22 -0
  657. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +2 -2
  658. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +2 -2
  659. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +2 -2
  660. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +1 -1
  661. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +47 -49
  662. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +1 -1
  663. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +2 -2
  664. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +3 -3
  665. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +4 -4
  666. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_transpose.comp +67 -0
  667. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +2 -2
  668. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +2 -2
  669. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_experts.comp +51 -0
  670. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum.comp +83 -0
  671. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass1.comp +60 -0
  672. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass2.comp +66 -0
  673. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +1 -1
  674. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs.comp → dequant_funcs.glsl} +9 -21
  675. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs_cm2.comp → dequant_funcs_cm2.glsl} +18 -4
  676. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_head.comp → dequant_head.glsl} +1 -1
  677. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
  678. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +1 -1
  679. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +1 -1
  680. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +1 -1
  681. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +1 -1
  682. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +1 -1
  683. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +1 -1
  684. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +1 -1
  685. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +1 -1
  686. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp +3 -3
  687. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +3 -3
  688. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +1 -1
  689. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +1 -1
  690. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +1 -1
  691. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +3 -3
  692. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +1 -1
  693. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +1 -1
  694. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +3 -3
  695. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +1 -1
  696. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +1 -1
  697. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag.comp +29 -0
  698. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +1 -1
  699. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +2 -2
  700. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/elu.comp +27 -0
  701. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +3 -3
  702. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/fill.comp +19 -0
  703. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +386 -160
  704. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{flash_attn_base.comp → flash_attn_base.glsl} +82 -20
  705. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +400 -174
  706. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +123 -37
  707. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_mask_opt.comp +162 -0
  708. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +10 -9
  709. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/floor.comp +22 -0
  710. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gated_delta_net.comp +128 -0
  711. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +2 -2
  712. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +2 -2
  713. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +2 -2
  714. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +2 -2
  715. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +2 -2
  716. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +2 -2
  717. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_binary_head.comp → generic_binary_head.glsl} +17 -2
  718. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_head.comp → generic_head.glsl} +2 -0
  719. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_unary_head.comp → generic_unary_head.glsl} +7 -0
  720. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +4 -4
  721. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +3 -3
  722. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{glu_head.comp → glu_head.glsl} +1 -1
  723. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +2 -2
  724. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp +2 -2
  725. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp +2 -2
  726. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +19 -7
  727. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +2 -3
  728. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +13 -10
  729. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +2 -2
  730. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/log.comp +18 -0
  731. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +2 -2
  732. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +2 -2
  733. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{mul_mat_vec_base.comp → mul_mat_vec_base.glsl} +77 -29
  734. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iface.glsl +35 -0
  735. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +71 -21
  736. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +41 -25
  737. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +2 -2
  738. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +44 -26
  739. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +2 -2
  740. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +2 -2
  741. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +2 -2
  742. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +9 -7
  743. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +9 -7
  744. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +4 -6
  745. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +2 -2
  746. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +4 -6
  747. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +4 -6
  748. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +2 -2
  749. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +39 -36
  750. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq_funcs.glsl +494 -0
  751. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +88 -105
  752. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +41 -26
  753. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{mul_mm_funcs.comp → mul_mm_funcs.glsl} +69 -59
  754. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_id_funcs.glsl +74 -0
  755. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +92 -230
  756. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +454 -0
  757. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_shmem_types.glsl +78 -0
  758. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +97 -13
  759. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/neg.comp +20 -0
  760. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +2 -2
  761. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +2 -2
  762. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp +1 -1
  763. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +21 -6
  764. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +1 -1
  765. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +10 -10
  766. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +2 -2
  767. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +2 -2
  768. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +2 -2
  769. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +2 -2
  770. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +49 -4
  771. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +2 -2
  772. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp +2 -2
  773. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +2 -2
  774. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +207 -0
  775. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.glsl +20 -0
  776. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +8 -49
  777. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +8 -32
  778. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +8 -32
  779. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +33 -0
  780. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +8 -38
  781. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/round.comp +29 -0
  782. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +2 -2
  783. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sgn.comp +21 -0
  784. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +2 -2
  785. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +2 -2
  786. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +2 -2
  787. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +2 -2
  788. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +1 -1
  789. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +2 -2
  790. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large1.comp +62 -0
  791. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large2.comp +79 -0
  792. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large3.comp +65 -0
  793. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large_common.glsl +53 -0
  794. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/softplus.comp +23 -0
  795. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/solve_tri.comp +81 -0
  796. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp +2 -2
  797. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +2 -2
  798. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +50 -0
  799. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_scan.comp +124 -0
  800. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/step.comp +22 -0
  801. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +2 -2
  802. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +2 -25
  803. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.glsl +25 -0
  804. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +2 -2
  805. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp +2 -2
  806. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +2 -2
  807. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +1 -1
  808. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_argsort.comp +118 -0
  809. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_moe.comp +213 -0
  810. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_nary_search.comp +246 -0
  811. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tri.comp +43 -0
  812. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/trunc.comp +22 -0
  813. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{types.comp → types.glsl} +345 -26
  814. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +90 -12
  815. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +384 -180
  816. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/xielu.comp +35 -0
  817. data/ext/sources/ggml/src/ggml-webgpu/CMakeLists.txt +28 -2
  818. data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +1374 -0
  819. data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu.cpp +2544 -726
  820. data/ext/sources/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
  821. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
  822. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
  823. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
  824. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +141 -0
  825. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +65 -72
  826. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/concat.wgsl +75 -0
  827. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +107 -0
  828. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
  829. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +73 -15
  830. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +636 -0
  831. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{get_rows.tmpl.wgsl → get_rows.wgsl} +53 -259
  832. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
  833. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{mul_mat.tmpl.wgsl → mul_mat.wgsl} +72 -261
  834. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +766 -0
  835. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.wgsl +147 -0
  836. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.wgsl +196 -0
  837. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +480 -0
  838. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
  839. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/repeat.wgsl +67 -0
  840. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +83 -17
  841. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
  842. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/scale.wgsl +63 -0
  843. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +40 -12
  844. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
  845. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
  846. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +193 -0
  847. data/ext/sources/ggml/src/ggml-zdnn/ggml-zdnn.cpp +6 -1
  848. data/ext/sources/ggml/src/ggml-zendnn/CMakeLists.txt +91 -0
  849. data/ext/sources/ggml/src/ggml-zendnn/ggml-zendnn.cpp +469 -0
  850. data/ext/sources/ggml/src/ggml.c +590 -64
  851. data/ext/sources/ggml/src/gguf.cpp +229 -44
  852. data/ext/sources/include/whisper.h +1 -0
  853. data/ext/sources/src/CMakeLists.txt +3 -1
  854. data/ext/sources/src/whisper.cpp +106 -62
  855. data/ext/sources/tests/CMakeLists.txt +2 -2
  856. data/ext/sources/tests/test-vad-full.cpp +4 -2
  857. data/ext/sources/tests/test-vad.cpp +1 -1
  858. data/extsources.rb +1 -0
  859. data/lib/whisper/model/uri.rb +17 -18
  860. data/sig/whisper.rbs +162 -4
  861. data/test/test_context_params.rb +82 -0
  862. data/test/test_params.rb +16 -8
  863. data/test/test_segment.rb +0 -1
  864. data/test/test_token.rb +81 -0
  865. data/test/test_vad.rb +1 -1
  866. data/test/test_vad_context.rb +100 -0
  867. data/test/test_vad_segment.rb +19 -0
  868. data/test/test_vad_segments.rb +16 -0
  869. data/test/test_whisper.rb +27 -0
  870. data/whispercpp.gemspec +1 -1
  871. metadata +502 -37
  872. data/ext/sources/build-xcframework.sh +0 -571
  873. data/ext/sources/examples/talk-llama/llama-sampling.h +0 -32
  874. data/ext/sources/ggml/cmake/BuildTypes.cmake +0 -54
  875. data/ext/sources/ggml/src/ggml-cann/Doxyfile +0 -2579
  876. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +0 -105
  877. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +0 -55
  878. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add.tmpl.wgsl +0 -44
  879. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add_in_place.tmpl.wgsl +0 -41
  880. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary_head.tmpl +0 -45
  881. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +0 -60
  882. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul.tmpl.wgsl +0 -44
  883. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_in_place.tmpl.wgsl +0 -41
  884. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm_in_place.wgsl +0 -48
  885. /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_bfloat16_support.comp → feature-tests/bfloat16.comp} +0 -0
  886. /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat_support.comp → feature-tests/coopmat.comp} +0 -0
  887. /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat2_support.comp → feature-tests/coopmat2.comp} +0 -0
  888. /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_integer_dot_support.comp → feature-tests/integer_dot.comp} +0 -0
  889. /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{glu_main.comp → glu_main.glsl} +0 -0
  890. /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{rte.comp → rte.glsl} +0 -0
  891. /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{utils.comp → utils.glsl} +0 -0
@@ -1,5 +1,5 @@
1
1
  /**
2
- * Copyright (c) 2023-2024 The ggml authors
2
+ * Copyright (c) 2023-2026 The ggml authors
3
3
  *
4
4
  * Permission is hereby granted, free of charge, to any person obtaining a copy
5
5
  * of this software and associated documentation files (the "Software"), to
@@ -23,31 +23,36 @@
23
23
  #ifndef CANN_ACLNN_OPS
24
24
  #define CANN_ACLNN_OPS
25
25
 
26
- #include <unordered_set>
27
- #include <functional>
26
+ #include "acl_tensor.h"
27
+ #include "common.h"
28
+
28
29
  #include <aclnnop/aclnn_abs.h>
29
- #include <aclnnop/aclnn_neg.h>
30
- #include <aclnnop/aclnn_exp.h>
31
30
  #include <aclnnop/aclnn_arange.h>
32
31
  #include <aclnnop/aclnn_argsort.h>
33
32
  #include <aclnnop/aclnn_cat.h>
34
33
  #include <aclnnop/aclnn_clamp.h>
34
+ #include <aclnnop/aclnn_cos.h>
35
+ #include <aclnnop/aclnn_exp.h>
35
36
  #include <aclnnop/aclnn_gelu.h>
36
37
  #include <aclnnop/aclnn_gelu_v2.h>
37
- #include <aclnnop/aclnn_sigmoid.h>
38
38
  #include <aclnnop/aclnn_hardsigmoid.h>
39
39
  #include <aclnnop/aclnn_hardswish.h>
40
40
  #include <aclnnop/aclnn_leaky_relu.h>
41
+ #include <aclnnop/aclnn_log.h>
42
+ #include <aclnnop/aclnn_logsoftmax.h>
43
+ #include <aclnnop/aclnn_neg.h>
44
+ #include <aclnnop/aclnn_norm.h>
41
45
  #include <aclnnop/aclnn_relu.h>
46
+ #include <aclnnop/aclnn_sigmoid.h>
47
+ #include <aclnnop/aclnn_sign.h>
42
48
  #include <aclnnop/aclnn_silu.h>
43
- #include <aclnnop/aclnn_tanh.h>
44
- #include <aclnnop/aclnn_sqrt.h>
45
49
  #include <aclnnop/aclnn_sin.h>
46
- #include <aclnnop/aclnn_cos.h>
47
- #include <aclnnop/aclnn_log.h>
48
- #include <aclnnop/aclnn_sign.h>
49
- #include "acl_tensor.h"
50
- #include "common.h"
50
+ #include <aclnnop/aclnn_slice.h>
51
+ #include <aclnnop/aclnn_sqrt.h>
52
+ #include <aclnnop/aclnn_tanh.h>
53
+
54
+ #include <functional>
55
+ #include <unordered_set>
51
56
 
52
57
  /**
53
58
  * @brief Repeats a ggml tensor along each dimension to match the dimensions
@@ -62,7 +67,7 @@
62
67
  * @param dst The ggml tensor representing the destination, which op is
63
68
  * GGML_OP_REPEAT and specifies the desired dimensions.
64
69
  */
65
- void ggml_cann_repeat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
70
+ void ggml_cann_repeat(ggml_backend_cann_context & ctx, ggml_tensor * dst);
66
71
 
67
72
  /**
68
73
  * @brief Applies the Leaky ReLU activation function to a tensor using the CANN
@@ -82,7 +87,7 @@ void ggml_cann_repeat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
82
87
  * @param dst The destination tensor where the result of the Leaky ReLU
83
88
  * activation is stored, which op is `GGML_OP_LEAKY_RELU`
84
89
  */
85
- void ggml_cann_leaky_relu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
90
+ void ggml_cann_leaky_relu(ggml_backend_cann_context & ctx, ggml_tensor * dst);
86
91
 
87
92
  /**
88
93
  * @brief Concatenates multiple tensors along a specified dimension using the
@@ -97,7 +102,7 @@ void ggml_cann_leaky_relu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
97
102
  * @attention tensorList length should be 2 and the dimension using for concat
98
103
  * default to 1.
99
104
  */
100
- void ggml_cann_concat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
105
+ void ggml_cann_concat(ggml_backend_cann_context & ctx, ggml_tensor * dst);
101
106
 
102
107
  /**
103
108
  * @brief Generates a sequence of evenly spaced values within a specified
@@ -113,7 +118,7 @@ void ggml_cann_concat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
113
118
  * `start`, 'stop' and 'step' are in dst->op_params and dst->op is
114
119
  * `GGML_OP_ARANGE`.
115
120
  */
116
- void ggml_cann_arange(ggml_backend_cann_context& ctx, ggml_tensor* dst);
121
+ void ggml_cann_arange(ggml_backend_cann_context & ctx, ggml_tensor * dst);
117
122
 
118
123
  /**
119
124
  * @brief Applies a clamp operation to the elements of a ggml tensor using the
@@ -131,7 +136,7 @@ void ggml_cann_arange(ggml_backend_cann_context& ctx, ggml_tensor* dst);
131
136
  * @param dst The destination tensor where the clamped values will be stored.
132
137
  * dst->op is `GGML_OP_CLAMP`, `min` and `max` value is in dst->params.
133
138
  */
134
- void ggml_cann_clamp(ggml_backend_cann_context& ctx, ggml_tensor* dst);
139
+ void ggml_cann_clamp(ggml_backend_cann_context & ctx, ggml_tensor * dst);
135
140
 
136
141
  /**
137
142
  * @brief Scales the elements of a ggml tensor by a constant factor using the
@@ -148,7 +153,7 @@ void ggml_cann_clamp(ggml_backend_cann_context& ctx, ggml_tensor* dst);
148
153
  * @param dst The destination tensor where the scaled values will be stored.
149
154
  * dst->op is `GGML_OP_SCALE` and `scale` value is in dst->params.
150
155
  */
151
- void ggml_cann_scale(ggml_backend_cann_context& ctx, ggml_tensor* dst);
156
+ void ggml_cann_scale(ggml_backend_cann_context & ctx, ggml_tensor * dst);
152
157
 
153
158
  /**
154
159
  * @brief Sorts the elements of a ggml tensor and returns the indices that
@@ -163,7 +168,7 @@ void ggml_cann_scale(ggml_backend_cann_context& ctx, ggml_tensor* dst);
163
168
  * @param dst The destination tensor where the sorted indices will be stored.
164
169
  * dst->op is `GGML_OP_ARGSORT`.
165
170
  */
166
- void ggml_cann_argsort(ggml_backend_cann_context& ctx, ggml_tensor* dst);
171
+ void ggml_cann_argsort(ggml_backend_cann_context & ctx, ggml_tensor * dst);
167
172
 
168
173
  /**
169
174
  * @brief Computes the Layer Normalization for a ggml tensor using the CANN
@@ -185,7 +190,67 @@ void ggml_cann_argsort(ggml_backend_cann_context& ctx, ggml_tensor* dst);
185
190
  * @param dst The destination tensor where the normalized values will be stored.
186
191
  * @attention `Var` defaults to dst->ne[0].
187
192
  */
188
- void ggml_cann_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
193
+ void ggml_cann_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst);
194
+
195
+ /**
196
+ * @brief Computes the L2 Normalization for a ggml tensor using the CANN
197
+ * backend.
198
+ *
199
+ * @details This function applies the L2 Normalization operation on the
200
+ * input tensor `src` and stores the result in the destination tensor
201
+ * `dst`. L2 Normalization scales the input tensor such that the
202
+ * L2 norm along the specified dimension equals 1. This operation
203
+ * is commonly used in neural networks for feature normalization
204
+ * and vector scaling.
205
+ * The operation is defined as:
206
+ * \f[
207
+ * \text{out} = \frac{x}{\sqrt{\sum{x^2}}}
208
+ * \f]
209
+ * The normalization is performed along the last dimension by default.
210
+ *
211
+ * @param ctx The CANN context used for operations.
212
+ * @param dst The destination tensor where the normalized values will be stored.
213
+ * @attention The normalization is performed along the last dimension of the
214
+ * input tensor by default.
215
+ */
216
+ void ggml_cann_l2_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst);
217
+
218
+ /**
219
+ * @brief Computes the Cross Entropy Loss for a ggml tensor using the CANN
220
+ * backend.
221
+ *
222
+ * @details This function computes the cross entropy loss between the predicted
223
+ * logits and target probability distributions. The operation follows
224
+ * the same computation pattern as the CPU implementation:
225
+ * 1. Applies log_softmax to the logits along the class dimension
226
+ * 2. Element-wise multiplication with target distributions
227
+ * 3. Summation along the class dimension to get per-sample losses
228
+ * 4. Global summation and scaling by -1/nr to get final loss
229
+ *
230
+ * The computation can be expressed as:
231
+ * \f[
232
+ * \text{loss} = -\frac{1}{N} \sum_{i=1}^{N} \sum_{j=1}^{C} y_{ij} \cdot \log(\text{softmax}(x_{ij}))
233
+ * \f]
234
+ * where \f$N\f$ is the total number of samples, \f$C\f$ is the number
235
+ * of classes, \f$x\f$ are the logits, and \f$y\f$ are the target
236
+ * probability distributions.
237
+ *
238
+ * @param ctx The CANN context used for operations.
239
+ * @param dst The destination tensor where the computed loss will be stored.
240
+ * This should be a scalar tensor containing the final loss value.
241
+ *
242
+ * @note This implementation computes cross entropy between probability
243
+ * distributions, not the typical classification cross entropy that
244
+ * expects class indices as targets. Both input tensors (src0 and src1)
245
+ * should have the same shape and represent probability distributions
246
+ * over the class dimension.
247
+ * @note The function expects two source tensors:
248
+ * - dst->src[0]: Logits tensor (before softmax)
249
+ * - dst->src[1]: Target probability distributions tensor
250
+ * @note The computation is performed using CANN backend operators including
251
+ * LogSoftmax, Mul, ReduceSum, and Muls for the final scaling.
252
+ */
253
+ void ggml_cann_cross_entropy_loss(ggml_backend_cann_context & ctx, ggml_tensor * dst);
189
254
 
190
255
  /**
191
256
  * @brief Computes the Group Normalization for a ggml tensor using the CANN
@@ -209,7 +274,7 @@ void ggml_cann_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
209
274
  *
210
275
  * @attention eps defaults to 1e-6f.
211
276
  */
212
- void ggml_cann_group_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
277
+ void ggml_cann_group_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst);
213
278
 
214
279
  /**
215
280
  * @brief Computes the accumulation of tensors using the CANN backend.
@@ -228,7 +293,7 @@ void ggml_cann_group_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
228
293
  * @param dst The destination tensor where the accumulated values will be stored.
229
294
  * `inplace` is in dst->params, and dst->op is `GGML_OP_ACC`.
230
295
  */
231
- void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst);
296
+ void ggml_cann_acc(ggml_backend_cann_context & ctx, ggml_tensor * dst);
232
297
 
233
298
  /**
234
299
  * @brief Computes the sum of elements along the last dimension of a ggml tensor
@@ -244,7 +309,7 @@ void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst);
244
309
  *
245
310
  * @attention `reduce_dims` defaults to 3, which means the last dimension.
246
311
  */
247
- void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
312
+ void ggml_cann_sum_rows(ggml_backend_cann_context & ctx, ggml_tensor * dst);
248
313
 
249
314
  /**
250
315
  * @brief Computes the sum of elements in a ggml tensor.
@@ -258,7 +323,7 @@ void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
258
323
  *
259
324
  */
260
325
 
261
- void ggml_cann_sum(ggml_backend_cann_context& ctx, ggml_tensor* dst);
326
+ void ggml_cann_sum(ggml_backend_cann_context & ctx, ggml_tensor * dst);
262
327
 
263
328
  /**
264
329
  * @brief Upsamples a ggml tensor using nearest neighbor interpolation using
@@ -274,8 +339,7 @@ void ggml_cann_sum(ggml_backend_cann_context& ctx, ggml_tensor* dst);
274
339
  * @param dst The destination tensor where the upsampled values will be stored.
275
340
  * dst->op is `GGML_OP_UPSCALE`.
276
341
  */
277
- void ggml_cann_upsample_nearest2d(ggml_backend_cann_context& ctx,
278
- ggml_tensor* dst);
342
+ void ggml_cann_upsample_nearest2d(ggml_backend_cann_context & ctx, ggml_tensor * dst);
279
343
 
280
344
  /**
281
345
  * @brief Pads a ggml tensor to match the dimensions of the destination tensor
@@ -290,7 +354,7 @@ void ggml_cann_upsample_nearest2d(ggml_backend_cann_context& ctx,
290
354
  * @param dst The destination tensor, which specifies the target dimensions for
291
355
  * padding. dst->op is `GGML_OP_PAD`.
292
356
  */
293
- void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst);
357
+ void ggml_cann_pad(ggml_backend_cann_context & ctx, ggml_tensor * dst);
294
358
 
295
359
  /**
296
360
  * @brief Executes a 2D pooling operation on a ggml tensor using the CANN
@@ -307,7 +371,7 @@ void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst);
307
371
  * @param dst The destination tensor on which the pooling operation is to be
308
372
  * performed. dst->op is `GGML_OP_POOL_2D`.
309
373
  */
310
- void ggml_cann_pool2d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
374
+ void ggml_cann_pool2d(ggml_backend_cann_context & ctx, ggml_tensor * dst);
311
375
 
312
376
  /**
313
377
  * @brief Duplicates a ggml tensor using the CANN backend.
@@ -326,7 +390,7 @@ void ggml_cann_pool2d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
326
390
  * different shape and dst is no-contiguous.
327
391
  * @note: This func need to simplify.
328
392
  */
329
- void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst);
393
+ void ggml_cann_dup(ggml_backend_cann_context & ctx, ggml_tensor * dst);
330
394
 
331
395
  /**
332
396
  * @brief Computes the Root Mean Square (RMS) normalization of a ggml tensor
@@ -348,7 +412,7 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst);
348
412
  * @param dst The destination tensor where the normalized values will be stored.
349
413
  * dst->op is `GGML_OP_RMS_NORM`.
350
414
  */
351
- void ggml_cann_rms_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
415
+ void ggml_cann_rms_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst);
352
416
 
353
417
  /**
354
418
  * @brief Applies a diagonal mask to the tensor with a specified value.
@@ -363,7 +427,7 @@ void ggml_cann_rms_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
363
427
  * `GGML_OP_DIAG_MASK`
364
428
  * @param value The value to use for masking.
365
429
  */
366
- void ggml_cann_diag_mask(ggml_backend_cann_context& ctx, ggml_tensor* dst, float value);
430
+ void ggml_cann_diag_mask(ggml_backend_cann_context & ctx, ggml_tensor * dst, float value);
367
431
 
368
432
  /**
369
433
  * @brief Performs an image-to-column transformation on the input tensor.
@@ -378,7 +442,7 @@ void ggml_cann_diag_mask(ggml_backend_cann_context& ctx, ggml_tensor* dst, float
378
442
  * @param dst The destination tensor that stores the result of the operation.
379
443
  * dst->op is `GGML_OP_IM2COL`.
380
444
  */
381
- void ggml_cann_im2col(ggml_backend_cann_context& ctx, ggml_tensor* dst);
445
+ void ggml_cann_im2col(ggml_backend_cann_context & ctx, ggml_tensor * dst);
382
446
 
383
447
  /**
384
448
  * @brief Computes time step embeddings using sine and cosine functions.
@@ -392,10 +456,10 @@ void ggml_cann_im2col(ggml_backend_cann_context& ctx, ggml_tensor* dst);
392
456
  * @param dst The destination tensor where the result of the embedding operation
393
457
  * will be stored. dst->op is `GGML_OP_TIMESTEP_EMBEDDING`.
394
458
  */
395
- void ggml_cann_timestep_embedding(ggml_backend_cann_context& ctx, ggml_tensor* dst);
459
+ void ggml_cann_timestep_embedding(ggml_backend_cann_context & ctx, ggml_tensor * dst);
396
460
 
397
461
  // @see ggml_cann_dup.
398
- void ggml_cann_cpy(ggml_backend_cann_context& ctx, ggml_tensor* dst);
462
+ void ggml_cann_cpy(ggml_backend_cann_context & ctx, ggml_tensor * dst);
399
463
 
400
464
  /**
401
465
  * @brief Computes the softmax activation with optional masking.
@@ -417,7 +481,7 @@ void ggml_cann_cpy(ggml_backend_cann_context& ctx, ggml_tensor* dst);
417
481
  * @param dst The destination tensor where the result will be stored. dst->op is
418
482
  * `GGML_OP_SOFTMAX`.
419
483
  */
420
- void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
484
+ void ggml_cann_softmax(ggml_backend_cann_context & ctx, ggml_tensor * dst);
421
485
 
422
486
  /**
423
487
  * @brief Extracts specific rows from a tensor based on indices.
@@ -429,7 +493,7 @@ void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
429
493
  * @param ctx The backend CANN context for executing operations.
430
494
  * @param dst The destination tensor where the extracted rows will be stored.
431
495
  */
432
- void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
496
+ void ggml_cann_get_rows(ggml_backend_cann_context & ctx, ggml_tensor * dst);
433
497
 
434
498
  /**
435
499
  * @brief Writes specific rows into a tensor at positions specified by indices.
@@ -441,7 +505,7 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
441
505
  * @param ctx The backend CANN context for executing operations.
442
506
  * @param dst The destination tensor where the specified rows will be updated.
443
507
  */
444
- void ggml_cann_set_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
508
+ void ggml_cann_set_rows(ggml_backend_cann_context & ctx, ggml_tensor * dst);
445
509
 
446
510
  /**
447
511
  * @brief Executes matrix multiplication for the given tensor.
@@ -454,7 +518,7 @@ void ggml_cann_set_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
454
518
  * @param dst The destination tensor for storing the result of the matrix
455
519
  * multiplication. dst->op is `GGML_OP_MUL_MAT`.
456
520
  */
457
- void ggml_cann_mul_mat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
521
+ void ggml_cann_mul_mat(ggml_backend_cann_context & ctx, ggml_tensor * dst);
458
522
 
459
523
  /**
460
524
  * @brief Applies Rotary Positional Embedding (RoPE) to the input tensor.
@@ -477,7 +541,7 @@ void ggml_cann_mul_mat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
477
541
  * @note The function currently does not support cases where the freq_scale is
478
542
  * not equal 1.
479
543
  */
480
- void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst);
544
+ void ggml_cann_rope(ggml_backend_cann_context & ctx, ggml_tensor * dst);
481
545
 
482
546
  /**
483
547
  * @brief Computes the index of the maximum value along the specified dimension
@@ -492,7 +556,7 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst);
492
556
  * @param dst The destination tensor where the indices of the maximum values will
493
557
  * be stored. dst->op is `GGML_OP_ARGMAX`.
494
558
  */
495
- void ggml_cann_argmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
559
+ void ggml_cann_argmax(ggml_backend_cann_context & ctx, ggml_tensor * dst);
496
560
 
497
561
  /**
498
562
  * @brief Adds two tensors element-wise and stores the result in a destination
@@ -509,8 +573,10 @@ void ggml_cann_argmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
509
573
  * @param acl_src1 The second source tensor.
510
574
  * @param acl_dst The destination tensor where the result will be stored.
511
575
  */
512
- void aclnn_add(ggml_backend_cann_context& ctx, aclTensor* acl_src0,
513
- aclTensor* acl_src1, aclTensor* acl_dst = nullptr);
576
+ void aclnn_add(ggml_backend_cann_context & ctx,
577
+ aclTensor * acl_src0,
578
+ aclTensor * acl_src1,
579
+ aclTensor * acl_dst = nullptr);
514
580
 
515
581
  /**
516
582
  * @brief Sub two tensors element-wise and stores the result in a destination
@@ -527,8 +593,10 @@ void aclnn_add(ggml_backend_cann_context& ctx, aclTensor* acl_src0,
527
593
  * @param acl_src1 The second source tensor.
528
594
  * @param acl_dst The destination tensor where the result will be stored.
529
595
  */
530
- void aclnn_sub(ggml_backend_cann_context& ctx, aclTensor* acl_src0,
531
- aclTensor* acl_src1, aclTensor* acl_dst = nullptr);
596
+ void aclnn_sub(ggml_backend_cann_context & ctx,
597
+ aclTensor * acl_src0,
598
+ aclTensor * acl_src1,
599
+ aclTensor * acl_dst = nullptr);
532
600
 
533
601
  /**
534
602
  * @brief Performs element-wise multiplication of two tensors and stores the
@@ -546,8 +614,10 @@ void aclnn_sub(ggml_backend_cann_context& ctx, aclTensor* acl_src0,
546
614
  * @param acl_other The second tensor for element-wise multiplication.
547
615
  * @param acl_dst The destination tensor where the result will be stored.
548
616
  */
549
- void aclnn_mul(ggml_backend_cann_context& ctx, aclTensor* acl_src,
550
- aclTensor* acl_other, aclTensor* acl_dst = nullptr);
617
+ void aclnn_mul(ggml_backend_cann_context & ctx,
618
+ aclTensor * acl_src,
619
+ aclTensor * acl_other,
620
+ aclTensor * acl_dst = nullptr);
551
621
 
552
622
  /**
553
623
  * @brief Matrix division, optionally in-place.
@@ -567,8 +637,10 @@ void aclnn_mul(ggml_backend_cann_context& ctx, aclTensor* acl_src,
567
637
  * @param inplace Flag indicating whether to perform the operation in-place on
568
638
  * `acl_src`.
569
639
  */
570
- void aclnn_div(ggml_backend_cann_context& ctx, aclTensor* acl_src,
571
- aclTensor* acl_other, aclTensor* acl_dst = nullptr);
640
+ void aclnn_div(ggml_backend_cann_context & ctx,
641
+ aclTensor * acl_src,
642
+ aclTensor * acl_other,
643
+ aclTensor * acl_dst = nullptr);
572
644
 
573
645
  /**
574
646
  * @brief Applies element-wise cosine function to the elements of a tensor.
@@ -584,8 +656,7 @@ void aclnn_div(ggml_backend_cann_context& ctx, aclTensor* acl_src,
584
656
  * @param acl_dst The destination tensor where the cosine results will be
585
657
  * stored.
586
658
  */
587
- void aclnn_cos(ggml_backend_cann_context& ctx, aclTensor* acl_src,
588
- aclTensor* acl_dst);
659
+ void aclnn_cos(ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor * acl_dst);
589
660
 
590
661
  /**
591
662
  * @brief Applies element-wise sine function to the elements of a tensor.
@@ -602,8 +673,7 @@ void aclnn_cos(ggml_backend_cann_context& ctx, aclTensor* acl_src,
602
673
  * @param acl_src The source tensor on which the sine function will be applied.
603
674
  * @param acl_dst The destination tensor where the sine results will be stored.
604
675
  */
605
- void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src,
606
- aclTensor* acl_dst);
676
+ void aclnn_sin(ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor * acl_dst);
607
677
 
608
678
  /**
609
679
  * @brief Prepares broadcast-compatible ACL tensors for two input tensors and one
@@ -621,8 +691,12 @@ void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src,
621
691
  * @param acl_src1 Output pointer to the created ACL tensor corresponding to src1.
622
692
  * @param acl_dst Output pointer to the created ACL tensor corresponding to dst.
623
693
  */
624
- void bcast_shape(ggml_tensor * src0, ggml_tensor * src1, ggml_tensor * dst,
625
- aclTensor ** acl_src0, aclTensor ** acl_src1, aclTensor ** acl_dst);
694
+ void bcast_shape(ggml_tensor * src0,
695
+ ggml_tensor * src1,
696
+ ggml_tensor * dst,
697
+ acl_tensor_ptr & acl_src0,
698
+ acl_tensor_ptr & acl_src1,
699
+ acl_tensor_ptr & acl_dst);
626
700
 
627
701
  /**
628
702
  * @brief Computes the 1D transposed convolution (deconvolution) of a ggml
@@ -637,7 +711,7 @@ void bcast_shape(ggml_tensor * src0, ggml_tensor * src1, ggml_tensor * dst,
637
711
  * @param dst The destination tensor where the transposed convolution result
638
712
  * will be stored. dst->op is `GGML_OP_CONV_TRANSPOSE_1D`.
639
713
  */
640
- void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
714
+ void ggml_cann_conv_transpose_1d(ggml_backend_cann_context & ctx, ggml_tensor * dst);
641
715
 
642
716
  /**
643
717
  * @brief Applies the ELU (Exponential Linear Unit) activation to a ggml tensor
@@ -662,7 +736,7 @@ void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* ds
662
736
  * @param dst The destination tensor where the ELU-activated result will be stored.
663
737
  * dst->op is expected to be `GGML_OP_ELU`.
664
738
  */
665
- void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
739
+ void ggml_cann_elu(ggml_backend_cann_context & ctx, ggml_tensor * dst);
666
740
 
667
741
  /**
668
742
  * @brief Computes the mean of a ggml tensor element-wise using the CANN backend.
@@ -677,7 +751,7 @@ void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
677
751
  * @param dst The destination tensor where the mean result will be stored.
678
752
  * dst->op is expected to be `GGML_OP_MEAN`.
679
753
  */
680
- void ggml_cann_mean(ggml_backend_cann_context& ctx, ggml_tensor* dst);
754
+ void ggml_cann_mean(ggml_backend_cann_context & ctx, ggml_tensor * dst);
681
755
 
682
756
  /**
683
757
  * @brief Applies 1D reflect padding to a ggml tensor using the CANN backend.
@@ -692,7 +766,7 @@ void ggml_cann_mean(ggml_backend_cann_context& ctx, ggml_tensor* dst);
692
766
  * @param dst The destination tensor where the padded result will be stored.
693
767
  * dst->op is expected to be `GGML_OP_PAD_REFLECT_1D`.
694
768
  */
695
- void ggml_cann_pad_reflect_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
769
+ void ggml_cann_pad_reflect_1d(ggml_backend_cann_context & ctx, ggml_tensor * dst);
696
770
 
697
771
  /**
698
772
  * @brief Counts the number of equal elements in two ggml tensors using the CANN backend.
@@ -708,7 +782,7 @@ void ggml_cann_pad_reflect_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
708
782
  * @param dst The destination tensor where the result will be stored.
709
783
  * dst->op is expected to be `GGML_OP_COUNT_EQUAL`.
710
784
  */
711
- void ggml_cann_count_equal(ggml_backend_cann_context& ctx, ggml_tensor* dst);
785
+ void ggml_cann_count_equal(ggml_backend_cann_context & ctx, ggml_tensor * dst);
712
786
 
713
787
  /**
714
788
  * @brief Applies the Step activation function to a ggml tensor using the CANN backend.
@@ -723,7 +797,7 @@ void ggml_cann_count_equal(ggml_backend_cann_context& ctx, ggml_tensor* dst);
723
797
  * @param dst The destination tensor where the result will be stored.
724
798
  * dst->op is expected to be `GGML_OP_STEP`.
725
799
  */
726
- void ggml_cann_step(ggml_backend_cann_context& ctx, ggml_tensor* dst);
800
+ void ggml_cann_step(ggml_backend_cann_context & ctx, ggml_tensor * dst);
727
801
 
728
802
  /**
729
803
  * @brief Performs the Flash Attention extended operator using the CANN backend.
@@ -738,167 +812,22 @@ void ggml_cann_step(ggml_backend_cann_context& ctx, ggml_tensor* dst);
738
812
  * @param dst The destination tensor where the result will be stored.
739
813
  * dst->op is expected to be `GGML_OP_FLASH_ATTN_EXT`.
740
814
  */
741
- void ggml_cann_flash_attn_ext(ggml_backend_cann_context& ctx, ggml_tensor* dst);
742
-
743
- /*
744
- * @brief A generic wrapper for ACL resources with custom deleter support.
745
- */
746
- using any_acl_resource = std::unique_ptr<void, std::function<void(void*)>>;
815
+ void ggml_cann_flash_attn_ext(ggml_backend_cann_context & ctx, ggml_tensor * dst);
747
816
 
748
817
  /**
749
- * @brief Trait structure used to define how to destroy a given ACL resource type.
818
+ * @brief Forward Gated Linear Attention on the CANN backend.
750
819
  *
751
- * @tparam T ACL resource type.
752
- */
753
- template<typename T>
754
- struct acl_resource_traits;
755
-
756
- /**
757
- * @brief Specialization for aclTensor, defines how to destroy an aclTensor resource.
758
- */
759
- template<>
760
- struct acl_resource_traits<aclTensor> {
761
- static void destroy(void* p) {
762
- ACL_CHECK(aclDestroyTensor(static_cast<aclTensor*>(p)));
763
- }
764
- };
765
-
766
- /**
767
- * @brief Specialization for aclIntArray, defines how to destroy an aclIntArray resource.
768
- */
769
- template<>
770
- struct acl_resource_traits<aclIntArray> {
771
- static void destroy(void* p) {
772
- ACL_CHECK(aclDestroyIntArray(static_cast<aclIntArray*>(p)));
773
- }
774
- };
775
-
776
- /**
777
- * @brief Specialization for aclScalar, defines how to destroy an aclScalar resource.
778
- */
779
- template<>
780
- struct acl_resource_traits<aclScalar> {
781
- static void destroy(void* p) {
782
- ACL_CHECK(aclDestroyScalar(static_cast<aclScalar*>(p)));
783
- }
784
- };
785
-
786
- /**
787
- * @brief Specialization for aclTensorList, defines how to destroy an aclTensorList resource.
788
- */
789
- template<>
790
- struct acl_resource_traits<aclTensorList> {
791
- static void destroy(void* p) {
792
- ACL_CHECK(aclDestroyTensorList(static_cast<aclTensorList*>(p)));
793
- }
794
- };
795
-
796
- /**
797
- * @brief Creates a generic ACL resource wrapper with proper destruction logic.
820
+ * Expects dst->src[0..4] = {k, v, q, g, s} with shape conventions:
821
+ * k, v, q, g: [D] with outer dims T x H batched as ne[2]=T, ne[1]=H
822
+ * s: initial state [B, H, D, D], where B is batch and D=C/H
823
+ * dst holds both outputs (o) and updated state; a scale factor is read from op params.
798
824
  *
799
- * @tparam T ACL resource type.
800
- * @param ptr Raw pointer to ACL resource.
801
- * @return any_acl_resource Smart pointer that handles destruction.
802
- */
803
- template<typename T>
804
- any_acl_resource make_acl_resource(T* ptr) {
805
- return any_acl_resource(
806
- static_cast<void*>(ptr),
807
- [](void* p) {
808
- acl_resource_traits<T>::destroy(p);
809
- }
810
- );
811
- }
812
-
813
- /**
814
- * @brief Registers multiple ACL resources into a vector for lifetime management.
825
+ * The kernel updates per time step l: S_new = g ⊗ S_old + k ⊗ v, then computes o = (S_new^T q) * scale.
815
826
  *
816
- * @tparam Args Variadic list of ACL resource types.
817
- * @param vec Target vector to hold ACL resources.
818
- * @param args Raw pointers to ACL resources.
819
- */
820
- template<typename... Args>
821
- void register_acl_resources(std::vector<any_acl_resource>& vec, Args*... args) {
822
- (vec.emplace_back(make_acl_resource(args)), ...);
823
- }
824
-
825
- /**
826
- * @brief Task class that wraps the execution of an aclnn function call.
827
- */
828
- class aclnn_task : public cann_task {
829
- public:
830
- aclnn_task(aclnn_func_t aclnn_func, void * workspace_addr,
831
- uint64_t workspace_size, aclOpExecutor * executor,
832
- aclrtStream stream) :
833
- aclnn_func_(aclnn_func),
834
- workspace_addr_(workspace_addr),
835
- workspace_size_(workspace_size),
836
- executor_(executor),
837
- stream_(stream) {}
838
- virtual void run_task() override {
839
- ACL_CHECK(aclnn_func_(workspace_addr_, workspace_size_, executor_, stream_));
840
- }
841
- private:
842
- aclnn_func_t aclnn_func_;
843
- void * workspace_addr_;
844
- uint64_t workspace_size_;
845
- aclOpExecutor * executor_;
846
- aclrtStream stream_;
847
- };
848
-
849
- /**
850
- * @brief Task class that releases ACL resources after usage.
851
- */
852
- class release_resource_task : public cann_task {
853
- public:
854
- release_resource_task(std::vector<any_acl_resource>&& resources){
855
- resource_ = std::move(resources);
856
- }
857
-
858
- virtual void run_task() override {
859
- resource_.clear();
860
- }
861
- private:
862
- std::vector<any_acl_resource> resource_;
863
- };
864
-
865
- /**
866
- * @brief Task class for performing asynchronous memory copy operations.
827
+ * @param ctx Backend context providing stream/allocator utilities.
828
+ * @param dst Output tensor; src deps are k, v, q, g, s as above.
867
829
  */
868
- class async_memcpy_task : public cann_task {
869
- public:
870
- async_memcpy_task(void* dst, const void* src, size_t size,
871
- aclrtMemcpyKind kind, aclrtStream stream)
872
- : dst_(dst), src_(src), size_(size), kind_(kind), stream_(stream) {}
873
-
874
- virtual void run_task() override {
875
- ACL_CHECK(aclrtMemcpyAsync(dst_, size_, src_, size_, kind_, stream_));
876
- }
877
- private:
878
- void* dst_;
879
- const void* src_;
880
- size_t size_;
881
- aclrtMemcpyKind kind_;
882
- aclrtStream stream_;
883
- };
884
-
885
- /**
886
- * @brief Task class for performing asynchronous memory set operations.
887
- */
888
- class async_memset_task : public cann_task {
889
- public:
890
- async_memset_task(void* buffer, size_t size, int32_t value, aclrtStream stream)
891
- : buffer_(buffer), size_(size), value_(value), stream_(stream) {}
892
-
893
- virtual void run_task() override {
894
- ACL_CHECK(aclrtMemsetAsync(buffer_, size_, value_, size_, stream_));
895
- }
896
- private:
897
- void* buffer_;
898
- size_t size_;
899
- int32_t value_;
900
- aclrtStream stream_;
901
- };
830
+ void ggml_cann_gated_linear_attn(ggml_backend_cann_context & ctx, ggml_tensor * dst);
902
831
 
903
832
  /**
904
833
  * @brief Launches an asynchronous task using the memory allocator.
@@ -918,91 +847,19 @@ class async_memset_task : public cann_task {
918
847
  * same stream are executed in queue order.
919
848
  */
920
849
 
921
- #define GGML_CANN_CALL_ACLNN_OP(CTX, OP_NAME, ...) \
922
- do { \
923
- uint64_t workspaceSize = 0; \
924
- aclOpExecutor * executor; \
925
- void * workspaceAddr = nullptr; \
926
- ACL_CHECK(aclnn##OP_NAME##GetWorkspaceSize(__VA_ARGS__, &workspaceSize, &executor));\
927
- /* workspace should alloced in main thread to keep malloc order when using vmm. */ \
928
- if (workspaceSize > 0) { \
929
- ggml_cann_pool_alloc workspace_allocator(CTX.pool(), workspaceSize); \
930
- workspaceAddr = workspace_allocator.get(); \
931
- } \
932
- if (CTX.async_mode) { \
933
- auto task = \
934
- std::make_unique<aclnn_task>(aclnn##OP_NAME, workspaceAddr, workspaceSize, \
935
- executor, CTX.stream()); \
936
- CTX.task_queue.submit_task(std::move(task)); \
937
- } else { \
938
- ACL_CHECK(aclnn##OP_NAME(workspaceAddr, workspaceSize, executor, CTX.stream()));\
939
- } \
940
- } while (0)
941
-
942
- /**
943
- * @brief Registers and releases multiple ACL resources, optionally deferring the release
944
- * using a task.
945
- *
946
- * @tparam Args Types of the ACL resources.
947
- * @param ctx Backend context which manages task submission and async mode.
948
- * @param args Pointers to ACL resources to be released.
949
- */
950
- template <typename... Args>
951
- void ggml_cann_release_resources(ggml_backend_cann_context & ctx, Args &&... args) {
952
- std::vector<any_acl_resource> resources;
953
- register_acl_resources(resources, std::forward<Args>(args)...);
954
- if(ctx.async_mode) {
955
- auto task = std::make_unique<release_resource_task>(std::move(resources));
956
- ctx.task_queue.submit_task(std::move(task));
957
- }
958
- }
959
-
960
- /**
961
- * @brief Performs an asynchronous memory copy operation, optionally deferred via task submission.
962
- *
963
- * @param ctx Backend context containing stream and async configuration.
964
- * @param dst Destination memory address.
965
- * @param src Source memory address.
966
- * @param len Size of memory to copy (in bytes).
967
- * @param kind Type of memory copy (host-to-device, device-to-host, etc).
968
- */
969
- inline void ggml_cann_async_memcpy(ggml_backend_cann_context & ctx, void * dst,
970
- const void * src, size_t len, aclrtMemcpyKind kind) {
971
- if (ctx.async_mode) {
972
- auto task = std::make_unique<async_memcpy_task>(dst, const_cast<void *>(src), len, kind, ctx.stream());
973
- ctx.task_queue.submit_task(std::move(task));
974
- } else {
975
- ACL_CHECK(aclrtMemcpyAsync(dst, len, src, len, kind, ctx.stream()));
976
- }
977
- }
978
-
979
- inline void ggml_cann_async_memcpy(ggml_backend_cann_context * ctx, void * dst,
980
- const void * src, size_t len, aclrtMemcpyKind kind) {
981
- if (ctx->async_mode) {
982
- auto task = std::make_unique<async_memcpy_task>(dst, const_cast<void *>(src), len, kind, ctx->stream());
983
- ctx->task_queue.submit_task(std::move(task));
984
- } else {
985
- ACL_CHECK(aclrtMemcpyAsync(dst, len, src, len, kind, ctx->stream()));
986
- }
987
- }
988
-
989
- /**
990
- * @brief Performs an asynchronous memory set operation, optionally deferred via task submission.
991
- *
992
- * @param ctx Backend context containing stream and async configuration.
993
- * @param buffer Memory buffer to be set.
994
- * @param size Size of the memory buffer (in bytes).
995
- * @param value Value to set in the buffer.
996
- */
997
- inline void ggml_cann_async_memset(ggml_backend_cann_context & ctx, void * buffer,
998
- size_t size, int value) {
999
- if (ctx.async_mode) {
1000
- auto task = std::make_unique<async_memset_task>(buffer, size, value, ctx.stream());
1001
- ctx.task_queue.submit_task(std::move(task));
1002
- } else {
1003
- ACL_CHECK(aclrtMemsetAsync(buffer, size, value, size, ctx.stream()));
1004
- }
1005
- }
850
+ # define GGML_CANN_CALL_ACLNN_OP(CTX, OP_NAME, ...) \
851
+ do { \
852
+ uint64_t workspaceSize = 0; \
853
+ aclOpExecutor * executor; \
854
+ void * workspaceAddr = nullptr; \
855
+ ACL_CHECK(aclnn##OP_NAME##GetWorkspaceSize(__VA_ARGS__, &workspaceSize, &executor)); \
856
+ /* workspace should alloced in main thread to keep malloc order when using vmm. */ \
857
+ if (workspaceSize > 0) { \
858
+ ggml_cann_pool_alloc workspace_allocator(CTX.pool(), workspaceSize); \
859
+ workspaceAddr = workspace_allocator.get(); \
860
+ } \
861
+ ACL_CHECK(aclnn##OP_NAME(workspaceAddr, workspaceSize, executor, CTX.stream())); \
862
+ } while (0)
1006
863
 
1007
864
  /**
1008
865
  * @brief Performs sparse expert-based matrix multiplication using the CANN backend.
@@ -1029,7 +886,23 @@ inline void ggml_cann_async_memset(ggml_backend_cann_context & ctx, void * buffe
1029
886
  * @param dst The destination tensor where the expert-weighted token outputs are stored.
1030
887
  * Expected to be of shape [M, K, N, 1].
1031
888
  */
1032
- void ggml_cann_mul_mat_id(ggml_backend_cann_context& ctx, ggml_tensor* dst);
889
+ void ggml_cann_mul_mat_id(ggml_backend_cann_context & ctx, ggml_tensor * dst);
890
+
891
+ /**
892
+ * @brief Performs fused ADD + RMS_NORM operation using the CANN backend.
893
+ *
894
+ * This function fuses the ADD and RMS_NORM operations into a single kernel call
895
+ * for better performance. It first adds two input tensors (x1 + x2), then applies
896
+ * RMS normalization to the result.
897
+ *
898
+ * @param ctx The context for the CANN backend operations.
899
+ * @param dst The ADD operation node, contains the two input tensors to be added.
900
+ * @param rms_norm_tensor The RMS_NORM operation node, contains the gamma weights
901
+ * and epsilon parameter.
902
+ */
903
+ void ggml_cann_op_add_rms_norm_fused(ggml_backend_cann_context & ctx,
904
+ ggml_tensor * add_node,
905
+ ggml_tensor * rms_norm_node);
1033
906
 
1034
907
  /**
1035
908
  * @brief Check whether a tensor is a weight tensor for matrix multiplication.
@@ -1041,20 +914,14 @@ void ggml_cann_mul_mat_id(ggml_backend_cann_context& ctx, ggml_tensor* dst);
1041
914
  *
1042
915
  * @param tensor Pointer to the target ggml_tensor object (const-qualified).
1043
916
  */
1044
- static bool is_matmul_weight(const ggml_tensor* tensor) {
1045
- std::string name = ggml_get_name(tensor);
1046
- static const std::unordered_set<std::string> weight_suffixes{
1047
- "output.weight",
1048
- "attn_q.weight",
1049
- "attn_k.weight",
1050
- "attn_v.weight",
1051
- "attn_output.weight",
1052
- "ffn_gate.weight",
1053
- "ffn_up.weight",
1054
- "ffn_down.weight"
1055
- };
1056
-
1057
- for (const auto& suffix : weight_suffixes) {
917
+ static bool is_matmul_weight(const ggml_tensor * tensor) {
918
+ std::string name = ggml_get_name(tensor);
919
+ static const std::unordered_set<std::string> weight_suffixes{ "output.weight", "attn_q.weight",
920
+ "attn_k.weight", "attn_v.weight",
921
+ "attn_output.weight", "ffn_gate.weight",
922
+ "ffn_up.weight", "ffn_down.weight" };
923
+
924
+ for (const auto & suffix : weight_suffixes) {
1058
925
  if (name.find(suffix) != std::string::npos) {
1059
926
  return true;
1060
927
  }
@@ -1078,23 +945,17 @@ static bool is_matmul_weight(const ggml_tensor* tensor) {
1078
945
  * @param ctx The CANN backend context used to manage execution and resources.
1079
946
  * @param dst The destination tensor.
1080
947
  */
1081
- template <auto binary_op>
1082
- void ggml_cann_binary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
1083
- ggml_tensor* src0 = dst->src[0];
1084
- ggml_tensor* src1 = dst->src[1];
948
+ template <auto binary_op> void ggml_cann_binary_op(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
949
+ ggml_tensor * src0 = dst->src[0];
950
+ ggml_tensor * src1 = dst->src[1];
1085
951
 
1086
- aclTensor* acl_src0;
1087
- aclTensor* acl_src1;
1088
- aclTensor* acl_dst;
952
+ acl_tensor_ptr acl_src0, acl_src1, acl_dst;
1089
953
 
1090
954
  // Need bcast
1091
- bcast_shape(src0, src1, dst, &acl_src0, &acl_src1, &acl_dst);
1092
- binary_op(ctx, acl_src0, acl_src1, acl_dst);
1093
-
1094
- ggml_cann_release_resources(ctx, acl_src0, acl_src1, acl_dst);
955
+ bcast_shape(src0, src1, dst, acl_src0, acl_src1, acl_dst);
956
+ binary_op(ctx, acl_src0.get(), acl_src1.get(), acl_dst.get());
1095
957
  }
1096
958
 
1097
-
1098
959
  /**
1099
960
  * @brief Applies a unary operation to an input tensor using the CANN backend.
1100
961
  *
@@ -1102,20 +963,19 @@ void ggml_cann_binary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
1102
963
  * and stores the result in the destination tensor.
1103
964
  *
1104
965
  * @tparam unary_op A callable with the signature:
1105
- * void(ggml_backend_cann_context&, aclTensor*, aclTensor*)
966
+ * void(ggml_backend_cann_context&, aclTensor *, aclTensor *)
1106
967
  * where the first aclTensor is the source and the second is the destination.
1107
968
  * @param ctx The CANN backend context for managing resources and execution.
1108
969
  * @param dst The destination tensor. Its src[0] is treated as the input tensor.
1109
970
  */
1110
- template <void unary_op(ggml_backend_cann_context&, aclTensor*, aclTensor*)>
1111
- void ggml_cann_op_unary(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
1112
- ggml_tensor* src = dst->src[0];
971
+ template <void unary_op(ggml_backend_cann_context &, aclTensor *, aclTensor *)>
972
+ void ggml_cann_op_unary(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
973
+ ggml_tensor * src = dst->src[0];
1113
974
 
1114
- aclTensor* acl_src = ggml_cann_create_tensor(src);
1115
- aclTensor* acl_dst = ggml_cann_create_tensor(dst);
975
+ acl_tensor_ptr acl_src = ggml_cann_create_tensor(src);
976
+ acl_tensor_ptr acl_dst = ggml_cann_create_tensor(dst);
1116
977
 
1117
- unary_op(ctx, acl_src, acl_dst);
1118
- ggml_cann_release_resources(ctx, acl_src, acl_dst);
978
+ unary_op(ctx, acl_src.get(), acl_dst.get());
1119
979
  }
1120
980
 
1121
981
  /**
@@ -1138,9 +998,11 @@ template <void unary_op(ggml_backend_cann_context&, aclTensor*, aclTensor*)>
1138
998
  *
1139
999
  * @see GGML_CANN_CALL_OP_UNARY
1140
1000
  */
1141
- void ggml_cann_op_unary(
1142
- std::function<void(ggml_backend_cann_context&, aclTensor*, aclTensor*)> unary_op,
1143
- ggml_backend_cann_context& ctx, ggml_tensor* dst);
1001
+ void ggml_cann_op_unary(std::function<void(ggml_backend_cann_context &, aclTensor *, aclTensor *)> unary_op,
1002
+ ggml_backend_cann_context & ctx,
1003
+ ggml_tensor * dst);
1004
+
1005
+ void ggml_cann_ssm_conv(ggml_backend_cann_context & ctx, ggml_tensor * dst);
1144
1006
 
1145
1007
  /**
1146
1008
  * @brief Applies a gated (GLU-style) unary operation using the CANN backend.
@@ -1172,9 +1034,9 @@ void ggml_cann_op_unary(
1172
1034
  *
1173
1035
  * @see GGML_CANN_CALL_OP_UNARY_GATED
1174
1036
  */
1175
- void ggml_cann_op_unary_gated(
1176
- std::function<void(ggml_backend_cann_context&, aclTensor*, aclTensor*)> unary_op,
1177
- ggml_backend_cann_context& ctx, ggml_tensor* dst);
1037
+ void ggml_cann_op_unary_gated(std::function<void(ggml_backend_cann_context &, aclTensor *, aclTensor *)> unary_op,
1038
+ ggml_backend_cann_context & ctx,
1039
+ ggml_tensor * dst);
1178
1040
 
1179
1041
  /**
1180
1042
  * @brief Helper macro to call a unary ACL operator via ggml_cann_op_unary.
@@ -1197,16 +1059,13 @@ void ggml_cann_op_unary_gated(
1197
1059
  * @see ggml_cann_op_unary
1198
1060
  * @see GGML_CANN_CALL_ACLNN_OP
1199
1061
  */
1200
- #define GGML_CANN_CALL_OP_UNARY(OP_NAME) \
1201
- do { \
1202
- auto lambda = [](ggml_backend_cann_context& ctx, \
1203
- aclTensor* acl_src, \
1204
- aclTensor* acl_dst) { \
1205
- GGML_CANN_CALL_ACLNN_OP(ctx, OP_NAME, acl_src, acl_dst); \
1206
- }; \
1207
- ggml_cann_op_unary(lambda, ctx, dst); \
1208
- } \
1209
- while (0)
1062
+ # define GGML_CANN_CALL_OP_UNARY(OP_NAME) \
1063
+ do { \
1064
+ auto lambda = [](ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor * acl_dst) { \
1065
+ GGML_CANN_CALL_ACLNN_OP(ctx, OP_NAME, acl_src, acl_dst); \
1066
+ }; \
1067
+ ggml_cann_op_unary(lambda, ctx, dst); \
1068
+ } while (0)
1210
1069
 
1211
1070
  /**
1212
1071
  * @brief Helper macro to call a gated unary ACL operator via ggml_cann_op_unary_gated.
@@ -1229,15 +1088,32 @@ void ggml_cann_op_unary_gated(
1229
1088
  * @see ggml_cann_op_unary_gated
1230
1089
  * @see GGML_CANN_CALL_ACLNN_OP
1231
1090
  */
1232
- #define GGML_CANN_CALL_OP_UNARY_GATED(OP_NAME) \
1233
- do { \
1234
- auto lambda = [](ggml_backend_cann_context& ctx, \
1235
- aclTensor* acl_src, \
1236
- aclTensor* acl_dst) { \
1237
- GGML_CANN_CALL_ACLNN_OP(ctx, OP_NAME, acl_src, acl_dst); \
1238
- }; \
1239
- ggml_cann_op_unary_gated(lambda, ctx, dst); \
1240
- } \
1241
- while (0)
1091
+ # define GGML_CANN_CALL_OP_UNARY_GATED(OP_NAME) \
1092
+ do { \
1093
+ auto lambda = [](ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor * acl_dst) { \
1094
+ GGML_CANN_CALL_ACLNN_OP(ctx, OP_NAME, acl_src, acl_dst); \
1095
+ }; \
1096
+ ggml_cann_op_unary_gated(lambda, ctx, dst); \
1097
+ } while (0)
1242
1098
 
1243
1099
  #endif // CANN_ACLNN_OPS
1100
+
1101
+ /**
1102
+ * @brief Performs outer product operation on two ggml tensors using the CANN backend.
1103
+ *
1104
+ * @details This function computes the outer product of two input tensors (src0 and src1)
1105
+ * and stores the result in the destination tensor. The outer product operation is defined as:
1106
+ * dst[i,j,k,l] = sum_m (src0[i,m,k,l] * src1[j,m,k,l])
1107
+ *
1108
+ * The function supports multiple data types including F32, F16. For floating-point
1109
+ * types, it uses batch matrix multiplication for efficient computation.
1110
+ *
1111
+ * The implementation handles 4D tensor broadcasting and batch processing automatically.
1112
+ *
1113
+ * @param ctx The CANN backend context for operation execution and memory management.
1114
+ * @param dst The destination ggml_tensor where the outer product result will be stored.
1115
+ * The input tensors are assumed to be `dst->src[0]` and `dst->src[1]`.
1116
+ *
1117
+ * @see GGML_CANN_CALL_ACLNN_OP for CANN operator invocation
1118
+ */
1119
+ void ggml_cann_out_prod(ggml_backend_cann_context & ctx, ggml_tensor * dst);