whispercpp 1.3.4 → 1.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (630) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +60 -43
  3. data/ext/extconf.rb +2 -2
  4. data/ext/ruby_whisper.c +14 -2
  5. data/ext/ruby_whisper.h +39 -0
  6. data/ext/ruby_whisper_context.c +22 -22
  7. data/ext/ruby_whisper_model.c +12 -12
  8. data/ext/ruby_whisper_params.c +47 -23
  9. data/ext/ruby_whisper_segment.c +84 -19
  10. data/ext/ruby_whisper_token.c +351 -0
  11. data/ext/ruby_whisper_transcribe.cpp +1 -1
  12. data/ext/ruby_whisper_vad_context.c +75 -0
  13. data/ext/ruby_whisper_vad_context_detect.cpp +50 -0
  14. data/ext/ruby_whisper_vad_segment.c +139 -0
  15. data/ext/ruby_whisper_vad_segments.c +106 -0
  16. data/ext/sources/CMakeLists.txt +4 -1
  17. data/ext/sources/bindings/javascript/package.json +1 -1
  18. data/ext/sources/cmake/arm64-apple-clang.cmake +16 -0
  19. data/ext/sources/cmake/arm64-windows-llvm.cmake +16 -0
  20. data/ext/sources/cmake/riscv64-spacemit-linux-gnu-gcc.cmake +29 -0
  21. data/ext/sources/cmake/x64-windows-llvm.cmake +5 -0
  22. data/ext/sources/examples/addon.node/vad-example.js +2 -2
  23. data/ext/sources/examples/cli/cli.cpp +121 -112
  24. data/ext/sources/examples/lsp/CMakeLists.txt +2 -1
  25. data/ext/sources/examples/quantize/CMakeLists.txt +2 -1
  26. data/ext/sources/examples/server/server.cpp +10 -11
  27. data/ext/sources/examples/talk-llama/CMakeLists.txt +5 -1
  28. data/ext/sources/examples/talk-llama/llama-adapter.cpp +12 -3
  29. data/ext/sources/examples/talk-llama/llama-adapter.h +7 -1
  30. data/ext/sources/examples/talk-llama/llama-arch.cpp +2046 -1974
  31. data/ext/sources/examples/talk-llama/llama-arch.h +67 -2
  32. data/ext/sources/examples/talk-llama/llama-batch.cpp +75 -33
  33. data/ext/sources/examples/talk-llama/llama-batch.h +17 -4
  34. data/ext/sources/examples/talk-llama/llama-chat.cpp +79 -3
  35. data/ext/sources/examples/talk-llama/llama-chat.h +4 -0
  36. data/ext/sources/examples/talk-llama/llama-context.cpp +775 -78
  37. data/ext/sources/examples/talk-llama/llama-context.h +57 -9
  38. data/ext/sources/examples/talk-llama/llama-cparams.h +1 -0
  39. data/ext/sources/examples/talk-llama/llama-grammar.cpp +288 -53
  40. data/ext/sources/examples/talk-llama/llama-grammar.h +22 -1
  41. data/ext/sources/examples/talk-llama/llama-graph.cpp +381 -64
  42. data/ext/sources/examples/talk-llama/llama-graph.h +103 -13
  43. data/ext/sources/examples/talk-llama/llama-hparams.cpp +26 -2
  44. data/ext/sources/examples/talk-llama/llama-hparams.h +41 -10
  45. data/ext/sources/examples/talk-llama/llama-impl.cpp +7 -3
  46. data/ext/sources/examples/talk-llama/llama-impl.h +1 -1
  47. data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.cpp +5 -3
  48. data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +145 -65
  49. data/ext/sources/examples/talk-llama/llama-kv-cache.h +22 -7
  50. data/ext/sources/examples/talk-llama/llama-kv-cells.h +44 -2
  51. data/ext/sources/examples/talk-llama/llama-memory-hybrid.cpp +12 -10
  52. data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +32 -19
  53. data/ext/sources/examples/talk-llama/llama-memory-recurrent.h +2 -2
  54. data/ext/sources/examples/talk-llama/llama-mmap.cpp +172 -37
  55. data/ext/sources/examples/talk-llama/llama-mmap.h +8 -3
  56. data/ext/sources/examples/talk-llama/llama-model-loader.cpp +91 -9
  57. data/ext/sources/examples/talk-llama/llama-model-loader.h +6 -0
  58. data/ext/sources/examples/talk-llama/llama-model-saver.cpp +3 -0
  59. data/ext/sources/examples/talk-llama/llama-model.cpp +1529 -13134
  60. data/ext/sources/examples/talk-llama/llama-model.h +44 -3
  61. data/ext/sources/examples/talk-llama/llama-quant.cpp +8 -23
  62. data/ext/sources/examples/talk-llama/llama-sampling.cpp +1294 -198
  63. data/ext/sources/examples/talk-llama/llama-sampling.h +19 -7
  64. data/ext/sources/examples/talk-llama/llama-vocab.cpp +133 -37
  65. data/ext/sources/examples/talk-llama/llama-vocab.h +45 -40
  66. data/ext/sources/examples/talk-llama/llama.cpp +729 -2
  67. data/ext/sources/examples/talk-llama/llama.h +152 -14
  68. data/ext/sources/examples/talk-llama/models/afmoe.cpp +191 -0
  69. data/ext/sources/examples/talk-llama/models/apertus.cpp +125 -0
  70. data/ext/sources/examples/talk-llama/models/arcee.cpp +135 -0
  71. data/ext/sources/examples/talk-llama/models/arctic.cpp +138 -0
  72. data/ext/sources/examples/talk-llama/models/arwkv7.cpp +86 -0
  73. data/ext/sources/examples/talk-llama/models/baichuan.cpp +122 -0
  74. data/ext/sources/examples/talk-llama/models/bailingmoe.cpp +144 -0
  75. data/ext/sources/examples/talk-llama/models/bailingmoe2.cpp +135 -0
  76. data/ext/sources/examples/talk-llama/models/bert.cpp +178 -0
  77. data/ext/sources/examples/talk-llama/models/bitnet.cpp +160 -0
  78. data/ext/sources/examples/talk-llama/models/bloom.cpp +101 -0
  79. data/ext/sources/examples/talk-llama/models/chameleon.cpp +178 -0
  80. data/ext/sources/examples/talk-llama/models/chatglm.cpp +132 -0
  81. data/ext/sources/examples/talk-llama/models/codeshell.cpp +111 -0
  82. data/ext/sources/examples/talk-llama/models/cogvlm.cpp +102 -0
  83. data/ext/sources/examples/talk-llama/models/cohere2-iswa.cpp +134 -0
  84. data/ext/sources/examples/talk-llama/models/command-r.cpp +122 -0
  85. data/ext/sources/examples/talk-llama/models/dbrx.cpp +123 -0
  86. data/ext/sources/examples/talk-llama/models/deci.cpp +135 -0
  87. data/ext/sources/examples/talk-llama/models/deepseek.cpp +144 -0
  88. data/ext/sources/examples/talk-llama/models/deepseek2.cpp +259 -0
  89. data/ext/sources/examples/talk-llama/models/dots1.cpp +134 -0
  90. data/ext/sources/examples/talk-llama/models/dream.cpp +105 -0
  91. data/ext/sources/examples/talk-llama/models/ernie4-5-moe.cpp +150 -0
  92. data/ext/sources/examples/talk-llama/models/ernie4-5.cpp +110 -0
  93. data/ext/sources/examples/talk-llama/models/exaone.cpp +114 -0
  94. data/ext/sources/examples/talk-llama/models/exaone4.cpp +123 -0
  95. data/ext/sources/examples/talk-llama/models/falcon-h1.cpp +113 -0
  96. data/ext/sources/examples/talk-llama/models/falcon.cpp +120 -0
  97. data/ext/sources/examples/talk-llama/models/gemma-embedding.cpp +116 -0
  98. data/ext/sources/examples/talk-llama/models/gemma.cpp +112 -0
  99. data/ext/sources/examples/talk-llama/models/gemma2-iswa.cpp +128 -0
  100. data/ext/sources/examples/talk-llama/models/gemma3.cpp +155 -0
  101. data/ext/sources/examples/talk-llama/models/gemma3n-iswa.cpp +384 -0
  102. data/ext/sources/examples/talk-llama/models/glm4-moe.cpp +170 -0
  103. data/ext/sources/examples/talk-llama/models/glm4.cpp +150 -0
  104. data/ext/sources/examples/talk-llama/models/gpt2.cpp +105 -0
  105. data/ext/sources/examples/talk-llama/models/gptneox.cpp +144 -0
  106. data/ext/sources/examples/talk-llama/models/granite-hybrid.cpp +196 -0
  107. data/ext/sources/examples/talk-llama/models/granite.cpp +211 -0
  108. data/ext/sources/examples/talk-llama/models/graph-context-mamba.cpp +283 -0
  109. data/ext/sources/examples/talk-llama/models/grok.cpp +159 -0
  110. data/ext/sources/examples/talk-llama/models/grovemoe.cpp +141 -0
  111. data/ext/sources/examples/talk-llama/models/hunyuan-dense.cpp +132 -0
  112. data/ext/sources/examples/talk-llama/models/hunyuan-moe.cpp +154 -0
  113. data/ext/sources/examples/talk-llama/models/internlm2.cpp +120 -0
  114. data/ext/sources/examples/talk-llama/models/jais.cpp +86 -0
  115. data/ext/sources/examples/talk-llama/models/jamba.cpp +106 -0
  116. data/ext/sources/examples/talk-llama/models/lfm2.cpp +175 -0
  117. data/ext/sources/examples/talk-llama/models/llada-moe.cpp +122 -0
  118. data/ext/sources/examples/talk-llama/models/llada.cpp +99 -0
  119. data/ext/sources/examples/talk-llama/models/llama-iswa.cpp +178 -0
  120. data/ext/sources/examples/talk-llama/models/llama.cpp +168 -0
  121. data/ext/sources/examples/talk-llama/models/maincoder.cpp +117 -0
  122. data/ext/sources/examples/talk-llama/models/mamba.cpp +55 -0
  123. data/ext/sources/examples/talk-llama/models/mimo2-iswa.cpp +123 -0
  124. data/ext/sources/examples/talk-llama/models/minicpm3.cpp +199 -0
  125. data/ext/sources/examples/talk-llama/models/minimax-m2.cpp +124 -0
  126. data/ext/sources/examples/talk-llama/models/mistral3.cpp +160 -0
  127. data/ext/sources/examples/talk-llama/models/models.h +569 -0
  128. data/ext/sources/examples/talk-llama/models/modern-bert.cpp +116 -0
  129. data/ext/sources/examples/talk-llama/models/mpt.cpp +126 -0
  130. data/ext/sources/examples/talk-llama/models/nemotron-h.cpp +150 -0
  131. data/ext/sources/examples/talk-llama/models/nemotron.cpp +122 -0
  132. data/ext/sources/examples/talk-llama/models/neo-bert.cpp +104 -0
  133. data/ext/sources/examples/talk-llama/models/olmo.cpp +121 -0
  134. data/ext/sources/examples/talk-llama/models/olmo2.cpp +150 -0
  135. data/ext/sources/examples/talk-llama/models/olmoe.cpp +124 -0
  136. data/ext/sources/examples/talk-llama/models/openai-moe-iswa.cpp +127 -0
  137. data/ext/sources/examples/talk-llama/models/openelm.cpp +124 -0
  138. data/ext/sources/examples/talk-llama/models/orion.cpp +123 -0
  139. data/ext/sources/examples/talk-llama/models/pangu-embedded.cpp +121 -0
  140. data/ext/sources/examples/talk-llama/models/phi2.cpp +121 -0
  141. data/ext/sources/examples/talk-llama/models/phi3.cpp +152 -0
  142. data/ext/sources/examples/talk-llama/models/plamo.cpp +110 -0
  143. data/ext/sources/examples/talk-llama/models/plamo2.cpp +316 -0
  144. data/ext/sources/examples/talk-llama/models/plamo3.cpp +128 -0
  145. data/ext/sources/examples/talk-llama/models/plm.cpp +168 -0
  146. data/ext/sources/examples/talk-llama/models/qwen.cpp +108 -0
  147. data/ext/sources/examples/talk-llama/models/qwen2.cpp +126 -0
  148. data/ext/sources/examples/talk-llama/models/qwen2moe.cpp +151 -0
  149. data/ext/sources/examples/talk-llama/models/qwen2vl.cpp +117 -0
  150. data/ext/sources/examples/talk-llama/models/qwen3.cpp +117 -0
  151. data/ext/sources/examples/talk-llama/models/qwen3moe.cpp +124 -0
  152. data/ext/sources/examples/talk-llama/models/qwen3next.cpp +873 -0
  153. data/ext/sources/examples/talk-llama/models/qwen3vl-moe.cpp +149 -0
  154. data/ext/sources/examples/talk-llama/models/qwen3vl.cpp +141 -0
  155. data/ext/sources/examples/talk-llama/models/refact.cpp +94 -0
  156. data/ext/sources/examples/talk-llama/models/rnd1.cpp +126 -0
  157. data/ext/sources/examples/talk-llama/models/rwkv6-base.cpp +162 -0
  158. data/ext/sources/examples/talk-llama/models/rwkv6.cpp +94 -0
  159. data/ext/sources/examples/talk-llama/models/rwkv6qwen2.cpp +86 -0
  160. data/ext/sources/examples/talk-llama/models/rwkv7-base.cpp +135 -0
  161. data/ext/sources/examples/talk-llama/models/rwkv7.cpp +90 -0
  162. data/ext/sources/examples/talk-llama/models/seed-oss.cpp +124 -0
  163. data/ext/sources/examples/talk-llama/models/smallthinker.cpp +126 -0
  164. data/ext/sources/examples/talk-llama/models/smollm3.cpp +128 -0
  165. data/ext/sources/examples/talk-llama/models/stablelm.cpp +146 -0
  166. data/ext/sources/examples/talk-llama/models/starcoder.cpp +100 -0
  167. data/ext/sources/examples/talk-llama/models/starcoder2.cpp +121 -0
  168. data/ext/sources/examples/talk-llama/models/t5-dec.cpp +166 -0
  169. data/ext/sources/examples/talk-llama/models/t5-enc.cpp +96 -0
  170. data/ext/sources/examples/talk-llama/models/wavtokenizer-dec.cpp +149 -0
  171. data/ext/sources/examples/talk-llama/models/xverse.cpp +108 -0
  172. data/ext/sources/examples/talk-llama/unicode.cpp +102 -16
  173. data/ext/sources/examples/vad-speech-segments/CMakeLists.txt +1 -1
  174. data/ext/sources/examples/whisper.wasm/index-tmpl.html +1 -1
  175. data/ext/sources/ggml/CMakeLists.txt +82 -54
  176. data/ext/sources/ggml/include/ggml-alloc.h +9 -0
  177. data/ext/sources/ggml/include/ggml-backend.h +4 -1
  178. data/ext/sources/ggml/include/ggml-cpu.h +1 -0
  179. data/ext/sources/ggml/include/ggml-hexagon.h +19 -0
  180. data/ext/sources/ggml/include/ggml-rpc.h +8 -11
  181. data/ext/sources/ggml/include/ggml-zendnn.h +22 -0
  182. data/ext/sources/ggml/include/ggml.h +190 -12
  183. data/ext/sources/ggml/src/CMakeLists.txt +82 -11
  184. data/ext/sources/ggml/src/ggml-alloc.c +124 -41
  185. data/ext/sources/ggml/src/ggml-backend-impl.h +1 -4
  186. data/ext/sources/ggml/src/ggml-backend-reg.cpp +27 -3
  187. data/ext/sources/ggml/src/ggml-backend.cpp +71 -21
  188. data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +17 -3
  189. data/ext/sources/ggml/src/ggml-blas/ggml-blas.cpp +5 -9
  190. data/ext/sources/ggml/src/ggml-cann/acl_tensor.cpp +57 -45
  191. data/ext/sources/ggml/src/ggml-cann/acl_tensor.h +138 -47
  192. data/ext/sources/ggml/src/ggml-cann/aclnn_ops.cpp +2179 -1696
  193. data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +238 -317
  194. data/ext/sources/ggml/src/ggml-cann/common.h +283 -208
  195. data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +626 -776
  196. data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +156 -86
  197. data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +1 -0
  198. data/ext/sources/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +4 -0
  199. data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +428 -26
  200. data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +1004 -0
  201. data/ext/sources/ggml/src/ggml-cpu/arch/loongarch/quants.c +4 -5
  202. data/ext/sources/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
  203. data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +108 -49
  204. data/ext/sources/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
  205. data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +6 -6
  206. data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +50 -2
  207. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +5 -3
  208. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +195 -71
  209. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +4 -0
  210. data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +573 -106
  211. data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.h +33 -44
  212. data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +298 -112
  213. data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm-ppc.h +333 -0
  214. data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +819 -125
  215. data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.h +6 -0
  216. data/ext/sources/ggml/src/ggml-cpu/ops.cpp +708 -431
  217. data/ext/sources/ggml/src/ggml-cpu/ops.h +5 -4
  218. data/ext/sources/ggml/src/ggml-cpu/repack.cpp +671 -31
  219. data/ext/sources/ggml/src/ggml-cpu/repack.h +14 -0
  220. data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +41 -43
  221. data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.cpp +3 -2
  222. data/ext/sources/ggml/src/ggml-cpu/unary-ops.cpp +151 -0
  223. data/ext/sources/ggml/src/ggml-cpu/unary-ops.h +7 -0
  224. data/ext/sources/ggml/src/ggml-cpu/vec.cpp +124 -1
  225. data/ext/sources/ggml/src/ggml-cpu/vec.h +261 -146
  226. data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +72 -1
  227. data/ext/sources/ggml/src/ggml-cuda/argmax.cu +2 -2
  228. data/ext/sources/ggml/src/ggml-cuda/argsort.cu +123 -6
  229. data/ext/sources/ggml/src/ggml-cuda/argsort.cuh +16 -0
  230. data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +1 -1
  231. data/ext/sources/ggml/src/ggml-cuda/common.cuh +353 -80
  232. data/ext/sources/ggml/src/ggml-cuda/convert.cuh +10 -0
  233. data/ext/sources/ggml/src/ggml-cuda/cpy-utils.cuh +1 -1
  234. data/ext/sources/ggml/src/ggml-cuda/cpy.cu +339 -246
  235. data/ext/sources/ggml/src/ggml-cuda/cpy.cuh +1 -5
  236. data/ext/sources/ggml/src/ggml-cuda/cumsum.cu +307 -0
  237. data/ext/sources/ggml/src/ggml-cuda/cumsum.cuh +5 -0
  238. data/ext/sources/ggml/src/ggml-cuda/diag.cu +77 -0
  239. data/ext/sources/ggml/src/ggml-cuda/diag.cuh +5 -0
  240. data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +31 -21
  241. data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +663 -596
  242. data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cu +35 -741
  243. data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cuh +1241 -0
  244. data/ext/sources/ggml/src/ggml-cuda/fattn-vec.cuh +30 -37
  245. data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +14 -13
  246. data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +48 -0
  247. data/ext/sources/ggml/src/ggml-cuda/fattn.cu +83 -37
  248. data/ext/sources/ggml/src/ggml-cuda/fill.cu +37 -0
  249. data/ext/sources/ggml/src/ggml-cuda/fill.cuh +3 -0
  250. data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +1155 -164
  251. data/ext/sources/ggml/src/ggml-cuda/mean.cu +5 -4
  252. data/ext/sources/ggml/src/ggml-cuda/mma.cuh +741 -48
  253. data/ext/sources/ggml/src/ggml-cuda/mmf.cu +60 -12
  254. data/ext/sources/ggml/src/ggml-cuda/mmf.cuh +381 -42
  255. data/ext/sources/ggml/src/ggml-cuda/mmid.cu +164 -0
  256. data/ext/sources/ggml/src/ggml-cuda/mmid.cuh +5 -0
  257. data/ext/sources/ggml/src/ggml-cuda/mmq.cu +69 -176
  258. data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +498 -171
  259. data/ext/sources/ggml/src/ggml-cuda/mmvf.cu +375 -79
  260. data/ext/sources/ggml/src/ggml-cuda/mmvf.cuh +3 -2
  261. data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +241 -95
  262. data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +1 -1
  263. data/ext/sources/ggml/src/ggml-cuda/pad.cu +64 -33
  264. data/ext/sources/ggml/src/ggml-cuda/quantize.cu +151 -0
  265. data/ext/sources/ggml/src/ggml-cuda/quantize.cuh +14 -0
  266. data/ext/sources/ggml/src/ggml-cuda/rope.cu +192 -77
  267. data/ext/sources/ggml/src/ggml-cuda/rope.cuh +2 -0
  268. data/ext/sources/ggml/src/ggml-cuda/set-rows.cu +101 -47
  269. data/ext/sources/ggml/src/ggml-cuda/set.cu +39 -0
  270. data/ext/sources/ggml/src/ggml-cuda/set.cuh +7 -0
  271. data/ext/sources/ggml/src/ggml-cuda/softmax.cu +203 -6
  272. data/ext/sources/ggml/src/ggml-cuda/solve_tri.cu +275 -0
  273. data/ext/sources/ggml/src/ggml-cuda/solve_tri.cuh +3 -0
  274. data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +14 -20
  275. data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +49 -84
  276. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq112-dv112.cu +5 -0
  277. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq128-dv128.cu +5 -0
  278. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq256-dv256.cu +5 -0
  279. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq40-dv40.cu +5 -0
  280. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq576-dv512.cu +5 -0
  281. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq64-dv64.cu +5 -0
  282. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq72-dv72.cu +5 -0
  283. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq80-dv80.cu +5 -0
  284. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq96-dv96.cu +5 -0
  285. data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +19 -1
  286. data/ext/sources/ggml/src/ggml-cuda/top-k.cu +96 -0
  287. data/ext/sources/ggml/src/ggml-cuda/top-k.cuh +3 -0
  288. data/ext/sources/ggml/src/ggml-cuda/topk-moe.cu +168 -76
  289. data/ext/sources/ggml/src/ggml-cuda/topk-moe.cuh +11 -4
  290. data/ext/sources/ggml/src/ggml-cuda/tri.cu +136 -0
  291. data/ext/sources/ggml/src/ggml-cuda/tri.cuh +5 -0
  292. data/ext/sources/ggml/src/ggml-cuda/unary.cu +105 -11
  293. data/ext/sources/ggml/src/ggml-cuda/unary.cuh +36 -0
  294. data/ext/sources/ggml/src/ggml-cuda/upscale.cu +163 -7
  295. data/ext/sources/ggml/src/ggml-cuda/vendors/cuda.h +4 -0
  296. data/ext/sources/ggml/src/ggml-cuda/vendors/hip.h +12 -1
  297. data/ext/sources/ggml/src/ggml-cuda/vendors/musa.h +6 -0
  298. data/ext/sources/ggml/src/ggml-hexagon/CMakeLists.txt +80 -0
  299. data/ext/sources/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3151 -0
  300. data/ext/sources/ggml/src/ggml-hexagon/htp/CMakeLists.txt +44 -0
  301. data/ext/sources/ggml/src/ggml-hexagon/htp/act-ops.c +682 -0
  302. data/ext/sources/ggml/src/ggml-hexagon/htp/binary-ops.c +360 -0
  303. data/ext/sources/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
  304. data/ext/sources/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +566 -0
  305. data/ext/sources/ggml/src/ggml-hexagon/htp/get-rows-ops.c +112 -0
  306. data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ctx.h +35 -0
  307. data/ext/sources/ggml/src/ggml-hexagon/htp/htp-dma.c +63 -0
  308. data/ext/sources/ggml/src/ggml-hexagon/htp/htp-dma.h +157 -0
  309. data/ext/sources/ggml/src/ggml-hexagon/htp/htp-msg.h +165 -0
  310. data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ops.h +92 -0
  311. data/ext/sources/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
  312. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-exp.c +94 -0
  313. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-inverse.c +72 -0
  314. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sigmoid.c +49 -0
  315. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.c +1020 -0
  316. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.h +1353 -0
  317. data/ext/sources/ggml/src/ggml-hexagon/htp/main.c +1001 -0
  318. data/ext/sources/ggml/src/ggml-hexagon/htp/matmul-ops.c +2503 -0
  319. data/ext/sources/ggml/src/ggml-hexagon/htp/ops-utils.h +149 -0
  320. data/ext/sources/ggml/src/ggml-hexagon/htp/rope-ops.c +487 -0
  321. data/ext/sources/ggml/src/ggml-hexagon/htp/set-rows-ops.c +168 -0
  322. data/ext/sources/ggml/src/ggml-hexagon/htp/softmax-ops.c +402 -0
  323. data/ext/sources/ggml/src/ggml-hexagon/htp/unary-ops.c +287 -0
  324. data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.c +297 -0
  325. data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
  326. data/ext/sources/ggml/src/ggml-hexagon/htp-utils.c +454 -0
  327. data/ext/sources/ggml/src/ggml-hexagon/htp-utils.h +221 -0
  328. data/ext/sources/ggml/src/ggml-hexagon/op-desc.h +153 -0
  329. data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +8 -13
  330. data/ext/sources/ggml/src/ggml-impl.h +67 -6
  331. data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.cpp +2 -2
  332. data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.m +29 -20
  333. data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.cpp +652 -285
  334. data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.h +103 -56
  335. data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.m +496 -118
  336. data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +231 -9
  337. data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.cpp +1227 -224
  338. data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.h +12 -0
  339. data/ext/sources/ggml/src/ggml-metal/ggml-metal.cpp +14 -8
  340. data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +1972 -704
  341. data/ext/sources/ggml/src/ggml-musa/CMakeLists.txt +3 -1
  342. data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +11 -0
  343. data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +1430 -120
  344. data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +63 -0
  345. data/ext/sources/ggml/src/ggml-opencl/kernels/expm1.cl +82 -0
  346. data/ext/sources/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
  347. data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +4 -3
  348. data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
  349. data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
  350. data/ext/sources/ggml/src/ggml-opencl/kernels/get_rows.cl +36 -12
  351. data/ext/sources/ggml/src/ggml-opencl/kernels/mean.cl +39 -0
  352. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
  353. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +24 -10
  354. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +24 -10
  355. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
  356. data/ext/sources/ggml/src/ggml-opencl/kernels/pad.cl +29 -20
  357. data/ext/sources/ggml/src/ggml-opencl/kernels/rms_norm.cl +25 -10
  358. data/ext/sources/ggml/src/ggml-opencl/kernels/rope.cl +50 -24
  359. data/ext/sources/ggml/src/ggml-opencl/kernels/set_rows.cl +35 -16
  360. data/ext/sources/ggml/src/ggml-opencl/kernels/softplus.cl +88 -0
  361. data/ext/sources/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
  362. data/ext/sources/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
  363. data/ext/sources/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
  364. data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +13 -0
  365. data/ext/sources/ggml/src/ggml-rpc/ggml-rpc.cpp +438 -156
  366. data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +48 -3
  367. data/ext/sources/ggml/src/ggml-sycl/add-id.cpp +77 -0
  368. data/ext/sources/ggml/src/ggml-sycl/add-id.hpp +8 -0
  369. data/ext/sources/ggml/src/ggml-sycl/backend.hpp +6 -0
  370. data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +0 -9
  371. data/ext/sources/ggml/src/ggml-sycl/binbcast.hpp +0 -6
  372. data/ext/sources/ggml/src/ggml-sycl/common.hpp +117 -15
  373. data/ext/sources/ggml/src/ggml-sycl/concat.cpp +55 -44
  374. data/ext/sources/ggml/src/ggml-sycl/convert.cpp +34 -0
  375. data/ext/sources/ggml/src/ggml-sycl/count-equal.cpp +79 -0
  376. data/ext/sources/ggml/src/ggml-sycl/count-equal.hpp +9 -0
  377. data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +0 -3
  378. data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +18 -0
  379. data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +76 -3
  380. data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +333 -300
  381. data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +10 -2
  382. data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +335 -110
  383. data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +22 -0
  384. data/ext/sources/ggml/src/ggml-sycl/norm.cpp +156 -0
  385. data/ext/sources/ggml/src/ggml-sycl/norm.hpp +2 -0
  386. data/ext/sources/ggml/src/ggml-sycl/pad.cpp +97 -0
  387. data/ext/sources/ggml/src/ggml-sycl/pad.hpp +24 -0
  388. data/ext/sources/ggml/src/ggml-sycl/pad_reflect_1d.cpp +100 -0
  389. data/ext/sources/ggml/src/ggml-sycl/pad_reflect_1d.hpp +10 -0
  390. data/ext/sources/ggml/src/ggml-sycl/presets.hpp +2 -0
  391. data/ext/sources/ggml/src/ggml-sycl/repeat_back.cpp +76 -0
  392. data/ext/sources/ggml/src/ggml-sycl/repeat_back.hpp +8 -0
  393. data/ext/sources/ggml/src/ggml-sycl/roll.cpp +122 -0
  394. data/ext/sources/ggml/src/ggml-sycl/roll.hpp +20 -0
  395. data/ext/sources/ggml/src/ggml-sycl/rope.cpp +30 -17
  396. data/ext/sources/ggml/src/ggml-sycl/set.cpp +73 -0
  397. data/ext/sources/ggml/src/ggml-sycl/set.hpp +5 -0
  398. data/ext/sources/ggml/src/ggml-sycl/softmax.cpp +327 -162
  399. data/ext/sources/ggml/src/ggml-sycl/softmax.hpp +4 -0
  400. data/ext/sources/ggml/src/ggml-sycl/ssm_conv.cpp +127 -0
  401. data/ext/sources/ggml/src/ggml-sycl/ssm_conv.hpp +5 -0
  402. data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +58 -0
  403. data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +38 -18
  404. data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +5013 -2859
  405. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/abs.comp +21 -0
  406. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +2 -2
  407. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +2 -2
  408. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add1.comp +28 -0
  409. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp +1 -1
  410. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/arange.comp +20 -0
  411. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +2 -2
  412. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +33 -26
  413. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort_large.comp +114 -0
  414. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ceil.comp +22 -0
  415. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +2 -2
  416. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +2 -2
  417. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +2 -2
  418. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +1 -1
  419. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +47 -49
  420. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +1 -1
  421. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +2 -2
  422. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +3 -3
  423. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +4 -4
  424. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_transpose.comp +67 -0
  425. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +2 -2
  426. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +2 -2
  427. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_experts.comp +51 -0
  428. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum.comp +83 -0
  429. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass1.comp +60 -0
  430. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass2.comp +66 -0
  431. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +1 -1
  432. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs.comp → dequant_funcs.glsl} +9 -21
  433. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs_cm2.comp → dequant_funcs_cm2.glsl} +18 -4
  434. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_head.comp → dequant_head.glsl} +1 -1
  435. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
  436. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +1 -1
  437. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +1 -1
  438. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +1 -1
  439. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +1 -1
  440. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +1 -1
  441. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +1 -1
  442. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +1 -1
  443. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +1 -1
  444. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp +3 -3
  445. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +3 -3
  446. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +1 -1
  447. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +1 -1
  448. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +1 -1
  449. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +3 -3
  450. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +1 -1
  451. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +1 -1
  452. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +3 -3
  453. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +1 -1
  454. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +1 -1
  455. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag.comp +29 -0
  456. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +1 -1
  457. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +2 -2
  458. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +3 -3
  459. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/fill.comp +19 -0
  460. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +39 -17
  461. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{flash_attn_base.comp → flash_attn_base.glsl} +19 -1
  462. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +45 -7
  463. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +50 -12
  464. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +1 -1
  465. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/floor.comp +22 -0
  466. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +2 -2
  467. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +2 -2
  468. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +2 -2
  469. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +2 -2
  470. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +2 -2
  471. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +2 -2
  472. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_binary_head.comp → generic_binary_head.glsl} +17 -2
  473. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_head.comp → generic_head.glsl} +2 -0
  474. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_unary_head.comp → generic_unary_head.glsl} +7 -0
  475. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +4 -4
  476. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +3 -3
  477. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{glu_head.comp → glu_head.glsl} +1 -1
  478. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +2 -2
  479. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp +2 -2
  480. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp +2 -2
  481. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +19 -7
  482. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +2 -3
  483. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +2 -2
  484. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +2 -2
  485. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/log.comp +18 -0
  486. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +2 -2
  487. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +2 -2
  488. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{mul_mat_vec_base.comp → mul_mat_vec_base.glsl} +70 -25
  489. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iface.glsl +35 -0
  490. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +71 -21
  491. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +41 -25
  492. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +2 -2
  493. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +44 -26
  494. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +2 -2
  495. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +2 -2
  496. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +2 -2
  497. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +9 -7
  498. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +9 -7
  499. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +4 -6
  500. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +2 -2
  501. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +4 -6
  502. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +4 -6
  503. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +2 -2
  504. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +39 -36
  505. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq_funcs.glsl +494 -0
  506. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +78 -103
  507. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +34 -23
  508. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{mul_mm_funcs.comp → mul_mm_funcs.glsl} +69 -59
  509. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_id_funcs.glsl +72 -0
  510. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +88 -228
  511. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +454 -0
  512. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_shmem_types.glsl +78 -0
  513. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +97 -13
  514. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/neg.comp +20 -0
  515. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +2 -2
  516. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +2 -2
  517. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp +1 -1
  518. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +21 -6
  519. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +1 -1
  520. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +10 -10
  521. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +2 -2
  522. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +2 -2
  523. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +2 -2
  524. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +2 -2
  525. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +50 -4
  526. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +2 -2
  527. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp +2 -2
  528. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +2 -2
  529. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +234 -0
  530. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.glsl +20 -0
  531. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +6 -50
  532. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +6 -33
  533. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +6 -33
  534. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +28 -0
  535. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +6 -39
  536. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/round.comp +29 -0
  537. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +2 -2
  538. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +2 -2
  539. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +2 -2
  540. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +2 -2
  541. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +2 -2
  542. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +1 -1
  543. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +2 -2
  544. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large1.comp +62 -0
  545. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large2.comp +79 -0
  546. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large3.comp +65 -0
  547. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large_common.glsl +53 -0
  548. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/softplus.comp +23 -0
  549. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/solve_tri.comp +81 -0
  550. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp +2 -2
  551. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +2 -2
  552. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +44 -0
  553. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_scan.comp +124 -0
  554. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/step.comp +22 -0
  555. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +2 -2
  556. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +2 -25
  557. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.glsl +25 -0
  558. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +2 -2
  559. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp +2 -2
  560. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +2 -2
  561. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +1 -1
  562. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_argsort.comp +118 -0
  563. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_moe.comp +213 -0
  564. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_nary_search.comp +246 -0
  565. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tri.comp +43 -0
  566. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/trunc.comp +22 -0
  567. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{types.comp → types.glsl} +345 -26
  568. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +90 -12
  569. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +335 -151
  570. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/xielu.comp +35 -0
  571. data/ext/sources/ggml/src/ggml-webgpu/CMakeLists.txt +28 -2
  572. data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +169 -0
  573. data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu.cpp +1964 -435
  574. data/ext/sources/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
  575. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/bin_op.tmpl.wgsl +188 -0
  576. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +101 -0
  577. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +33 -10
  578. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +591 -0
  579. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.tmpl.wgsl +1 -1
  580. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
  581. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.tmpl.wgsl +6 -6
  582. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +97 -0
  583. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.tmpl.wgsl +247 -0
  584. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.tmpl.wgsl +302 -0
  585. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.tmpl.wgsl +267 -0
  586. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +83 -17
  587. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
  588. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/scale.tmpl.wgsl +90 -0
  589. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.tmpl.wgsl +112 -0
  590. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
  591. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl +483 -0
  592. data/ext/sources/ggml/src/ggml-zendnn/CMakeLists.txt +92 -0
  593. data/ext/sources/ggml/src/ggml-zendnn/ggml-zendnn.cpp +466 -0
  594. data/ext/sources/ggml/src/ggml.c +425 -33
  595. data/ext/sources/include/whisper.h +1 -0
  596. data/ext/sources/src/CMakeLists.txt +3 -1
  597. data/ext/sources/src/whisper.cpp +101 -35
  598. data/ext/sources/tests/CMakeLists.txt +2 -2
  599. data/ext/sources/tests/test-vad-full.cpp +4 -2
  600. data/ext/sources/tests/test-vad.cpp +1 -1
  601. data/extsources.rb +1 -0
  602. data/lib/whisper/model/uri.rb +17 -18
  603. data/sig/whisper.rbs +119 -2
  604. data/test/test_params.rb +16 -8
  605. data/test/test_segment.rb +0 -1
  606. data/test/test_token.rb +70 -0
  607. data/test/test_vad.rb +1 -1
  608. data/test/test_vad_context.rb +50 -0
  609. data/test/test_vad_segment.rb +19 -0
  610. data/test/test_vad_segments.rb +16 -0
  611. data/test/test_whisper.rb +7 -0
  612. data/whispercpp.gemspec +1 -1
  613. metadata +287 -34
  614. data/ext/sources/build-xcframework.sh +0 -571
  615. data/ext/sources/ggml/src/ggml-cann/Doxyfile +0 -2579
  616. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +0 -105
  617. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +0 -55
  618. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add.tmpl.wgsl +0 -44
  619. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add_in_place.tmpl.wgsl +0 -41
  620. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +0 -60
  621. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul.tmpl.wgsl +0 -44
  622. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_in_place.tmpl.wgsl +0 -41
  623. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm_in_place.wgsl +0 -48
  624. /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_bfloat16_support.comp → feature-tests/bfloat16.comp} +0 -0
  625. /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat_support.comp → feature-tests/coopmat.comp} +0 -0
  626. /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat2_support.comp → feature-tests/coopmat2.comp} +0 -0
  627. /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_integer_dot_support.comp → feature-tests/integer_dot.comp} +0 -0
  628. /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{glu_main.comp → glu_main.glsl} +0 -0
  629. /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{rte.comp → rte.glsl} +0 -0
  630. /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{utils.comp → utils.glsl} +0 -0
@@ -53,13 +53,15 @@
53
53
 
54
54
  #define UNUSED GGML_UNUSED
55
55
 
56
+ // Needed for ggml_fp32_to_bf16_row()
57
+ #if defined(__AVX512BF16__)
56
58
  #if defined(_MSC_VER)
57
- #define m512bh(p) p
58
59
  #define m512i(p) p
59
60
  #else
60
- #define m512bh(p) (__m512bh)(p)
61
+ #include <immintrin.h>
61
62
  #define m512i(p) (__m512i)(p)
62
- #endif
63
+ #endif // defined(_MSC_VER)
64
+ #endif // defined(__AVX512BF16__)
63
65
 
64
66
  #if defined(__linux__) || \
65
67
  defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
@@ -124,6 +126,13 @@ static void ggml_print_backtrace_symbols(void) {
124
126
  int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
125
127
  backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
126
128
  }
129
+ #elif defined(__APPLE__)
130
+ #include <execinfo.h>
131
+ static void ggml_print_backtrace_symbols(void) {
132
+ void * trace[100];
133
+ int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
134
+ backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
135
+ }
127
136
  #else
128
137
  static void ggml_print_backtrace_symbols(void) {
129
138
  // platform not supported
@@ -135,6 +144,20 @@ void ggml_print_backtrace(void) {
135
144
  if (GGML_NO_BACKTRACE) {
136
145
  return;
137
146
  }
147
+ #if defined(__APPLE__)
148
+ // On macOS, fork+debugger attachment is problematic due to:
149
+ // 1. libdispatch "poisons" forked child processes
150
+ // 2. lldb has issues attaching to parent from forked child
151
+ // Use simple backtrace() instead to avoid Terminal.app crashes
152
+ const char * GGML_BACKTRACE_LLDB = getenv("GGML_BACKTRACE_LLDB");
153
+ if (!GGML_BACKTRACE_LLDB) {
154
+ fprintf(stderr, "WARNING: Using native backtrace. Set GGML_BACKTRACE_LLDB for more info.\n");
155
+ fprintf(stderr, "WARNING: GGML_BACKTRACE_LLDB may cause native MacOS Terminal.app to crash.\n");
156
+ fprintf(stderr, "See: https://github.com/ggml-org/llama.cpp/pull/17869\n");
157
+ ggml_print_backtrace_symbols();
158
+ return;
159
+ }
160
+ #endif
138
161
  #if defined(__linux__)
139
162
  FILE * f = fopen("/proc/self/status", "r");
140
163
  size_t size = 0;
@@ -935,6 +958,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
935
958
  "COS",
936
959
  "SUM",
937
960
  "SUM_ROWS",
961
+ "CUMSUM",
938
962
  "MEAN",
939
963
  "ARGMAX",
940
964
  "COUNT_EQUAL",
@@ -989,7 +1013,10 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
989
1013
  "ARANGE",
990
1014
  "TIMESTEP_EMBEDDING",
991
1015
  "ARGSORT",
1016
+ "TOP_K",
992
1017
  "LEAKY_RELU",
1018
+ "TRI",
1019
+ "FILL",
993
1020
 
994
1021
  "FLASH_ATTN_EXT",
995
1022
  "FLASH_ATTN_BACK",
@@ -1002,6 +1029,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
1002
1029
  "RWKV_WKV6",
1003
1030
  "GATED_LINEAR_ATTN",
1004
1031
  "RWKV_WKV7",
1032
+ "SOLVE_TRI",
1005
1033
 
1006
1034
  "UNARY",
1007
1035
 
@@ -1019,7 +1047,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
1019
1047
  "GLU",
1020
1048
  };
1021
1049
 
1022
- static_assert(GGML_OP_COUNT == 90, "GGML_OP_COUNT != 90");
1050
+ static_assert(GGML_OP_COUNT == 95, "GGML_OP_COUNT != 95");
1023
1051
 
1024
1052
  static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
1025
1053
  "none",
@@ -1039,6 +1067,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
1039
1067
  "cos(x)",
1040
1068
  "Σx",
1041
1069
  "Σx_k",
1070
+ "cumsum(x)",
1042
1071
  "Σx/n",
1043
1072
  "argmax(x)",
1044
1073
  "count_equal(x)",
@@ -1093,7 +1122,10 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
1093
1122
  "arange(start, stop, step)",
1094
1123
  "timestep_embedding(timesteps, dim, max_period)",
1095
1124
  "argsort(x)",
1125
+ "top_k(x)",
1096
1126
  "leaky_relu(x)",
1127
+ "tri(x)",
1128
+ "fill(x, c)",
1097
1129
 
1098
1130
  "flash_attn_ext(x)",
1099
1131
  "flash_attn_back(x)",
@@ -1106,6 +1138,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
1106
1138
  "rwkv_wkv6(k, v, r, tf, td, s)",
1107
1139
  "gated_linear_attn(k, v, q, gate, s)",
1108
1140
  "rwkv_wkv7(r, w, k, v, a, b, s)",
1141
+ "A X = B, A triangular, solve X",
1109
1142
 
1110
1143
  "unary(x)",
1111
1144
 
@@ -1123,7 +1156,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
1123
1156
  "glu(x)",
1124
1157
  };
1125
1158
 
1126
- static_assert(GGML_OP_COUNT == 90, "GGML_OP_COUNT != 90");
1159
+ static_assert(GGML_OP_COUNT == 95, "GGML_OP_COUNT != 95");
1127
1160
 
1128
1161
  static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
1129
1162
 
@@ -1142,11 +1175,17 @@ static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
1142
1175
  "HARDSWISH",
1143
1176
  "HARDSIGMOID",
1144
1177
  "EXP",
1178
+ "EXPM1",
1179
+ "SOFTPLUS",
1145
1180
  "GELU_ERF",
1181
+ "XIELU",
1182
+ "FLOOR",
1183
+ "CEIL",
1184
+ "ROUND",
1185
+ "TRUNC",
1146
1186
  };
1147
1187
 
1148
- static_assert(GGML_UNARY_OP_COUNT == 15, "GGML_UNARY_OP_COUNT != 15");
1149
-
1188
+ static_assert(GGML_UNARY_OP_COUNT == 22, "GGML_UNARY_OP_COUNT != 22");
1150
1189
 
1151
1190
  static const char * GGML_GLU_OP_NAME[GGML_GLU_OP_COUNT] = {
1152
1191
  "REGLU",
@@ -2254,6 +2293,30 @@ struct ggml_tensor * ggml_log_inplace(
2254
2293
  return ggml_log_impl(ctx, a, true);
2255
2294
  }
2256
2295
 
2296
+ struct ggml_tensor * ggml_expm1(
2297
+ struct ggml_context * ctx,
2298
+ struct ggml_tensor * a) {
2299
+ return ggml_unary(ctx, a, GGML_UNARY_OP_EXPM1);
2300
+ }
2301
+
2302
+ struct ggml_tensor * ggml_expm1_inplace(
2303
+ struct ggml_context * ctx,
2304
+ struct ggml_tensor * a) {
2305
+ return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_EXPM1);
2306
+ }
2307
+
2308
+ struct ggml_tensor * ggml_softplus(
2309
+ struct ggml_context * ctx,
2310
+ struct ggml_tensor * a) {
2311
+ return ggml_unary(ctx, a, GGML_UNARY_OP_SOFTPLUS);
2312
+ }
2313
+
2314
+ struct ggml_tensor * ggml_softplus_inplace(
2315
+ struct ggml_context * ctx,
2316
+ struct ggml_tensor * a) {
2317
+ return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SOFTPLUS);
2318
+ }
2319
+
2257
2320
  // ggml_sin
2258
2321
 
2259
2322
  static struct ggml_tensor * ggml_sin_impl(
@@ -2337,6 +2400,21 @@ struct ggml_tensor * ggml_sum_rows(
2337
2400
  return result;
2338
2401
  }
2339
2402
 
2403
+ // ggml_cumsum
2404
+
2405
+ struct ggml_tensor * ggml_cumsum(
2406
+ struct ggml_context * ctx,
2407
+ struct ggml_tensor * a) {
2408
+ GGML_ASSERT(a->type == GGML_TYPE_F32);
2409
+
2410
+ struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
2411
+
2412
+ result->op = GGML_OP_CUMSUM;
2413
+ result->src[0] = a;
2414
+
2415
+ return result;
2416
+ }
2417
+
2340
2418
  // ggml_mean
2341
2419
 
2342
2420
  struct ggml_tensor * ggml_mean(
@@ -2652,6 +2730,29 @@ struct ggml_tensor * ggml_silu_inplace(
2652
2730
  return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SILU);
2653
2731
  }
2654
2732
 
2733
+ // ggml_xielu
2734
+
2735
+ struct ggml_tensor * ggml_xielu(
2736
+ struct ggml_context * ctx,
2737
+ struct ggml_tensor * a,
2738
+ float alpha_n,
2739
+ float alpha_p,
2740
+ float beta,
2741
+ float eps) {
2742
+ struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
2743
+
2744
+ ggml_set_op_params_i32(result, 0, (int32_t) GGML_UNARY_OP_XIELU);
2745
+ ggml_set_op_params_f32(result, 1, beta + ggml_compute_softplus_f32(alpha_n));
2746
+ ggml_set_op_params_f32(result, 2, ggml_compute_softplus_f32(alpha_p));
2747
+ ggml_set_op_params_f32(result, 3, beta);
2748
+ ggml_set_op_params_f32(result, 4, eps);
2749
+
2750
+ result->op = GGML_OP_UNARY;
2751
+ result->src[0] = a;
2752
+
2753
+ return result;
2754
+ }
2755
+
2655
2756
  // ggml_silu_back
2656
2757
 
2657
2758
  struct ggml_tensor * ggml_silu_back(
@@ -2726,6 +2827,62 @@ static struct ggml_tensor * ggml_glu_impl(
2726
2827
  return result;
2727
2828
  }
2728
2829
 
2830
+ // ggml_floor
2831
+
2832
+ struct ggml_tensor * ggml_floor(
2833
+ struct ggml_context * ctx,
2834
+ struct ggml_tensor * a) {
2835
+ return ggml_unary(ctx, a, GGML_UNARY_OP_FLOOR);
2836
+ }
2837
+
2838
+ struct ggml_tensor * ggml_floor_inplace(
2839
+ struct ggml_context * ctx,
2840
+ struct ggml_tensor * a) {
2841
+ return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_FLOOR);
2842
+ }
2843
+
2844
+ // ggml_ceil
2845
+
2846
+ struct ggml_tensor * ggml_ceil(
2847
+ struct ggml_context * ctx,
2848
+ struct ggml_tensor * a) {
2849
+ return ggml_unary(ctx, a, GGML_UNARY_OP_CEIL);
2850
+ }
2851
+
2852
+ struct ggml_tensor * ggml_ceil_inplace(
2853
+ struct ggml_context * ctx,
2854
+ struct ggml_tensor * a) {
2855
+ return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_CEIL);
2856
+ }
2857
+
2858
+ //ggml_round
2859
+
2860
+ struct ggml_tensor * ggml_round(
2861
+ struct ggml_context * ctx,
2862
+ struct ggml_tensor * a) {
2863
+ return ggml_unary(ctx, a, GGML_UNARY_OP_ROUND);
2864
+ }
2865
+
2866
+ struct ggml_tensor * ggml_round_inplace(
2867
+ struct ggml_context * ctx,
2868
+ struct ggml_tensor * a) {
2869
+ return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ROUND);
2870
+ }
2871
+
2872
+ //ggml_trunc
2873
+
2874
+ struct ggml_tensor * ggml_trunc(
2875
+ struct ggml_context * ctx,
2876
+ struct ggml_tensor * a) {
2877
+ return ggml_unary(ctx, a, GGML_UNARY_OP_TRUNC);
2878
+ }
2879
+
2880
+ struct ggml_tensor * ggml_trunc_inplace(
2881
+ struct ggml_context * ctx,
2882
+ struct ggml_tensor * a) {
2883
+ return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TRUNC);
2884
+ }
2885
+
2729
2886
  struct ggml_tensor * ggml_glu(
2730
2887
  struct ggml_context * ctx,
2731
2888
  struct ggml_tensor * a,
@@ -3829,6 +3986,15 @@ struct ggml_tensor * ggml_soft_max_ext(
3829
3986
  return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, false);
3830
3987
  }
3831
3988
 
3989
+ struct ggml_tensor * ggml_soft_max_ext_inplace(
3990
+ struct ggml_context * ctx,
3991
+ struct ggml_tensor * a,
3992
+ struct ggml_tensor * mask,
3993
+ float scale,
3994
+ float max_bias) {
3995
+ return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, true);
3996
+ }
3997
+
3832
3998
  void ggml_soft_max_add_sinks(
3833
3999
  struct ggml_tensor * a,
3834
4000
  struct ggml_tensor * sinks) {
@@ -4748,6 +4914,8 @@ static struct ggml_tensor * ggml_interpolate_impl(
4748
4914
  int64_t ne3,
4749
4915
  uint32_t mode) {
4750
4916
  GGML_ASSERT((mode & 0xFF) < GGML_SCALE_MODE_COUNT);
4917
+ // TODO: implement antialias for modes other than bilinear
4918
+ GGML_ASSERT(!(mode & GGML_SCALE_FLAG_ANTIALIAS) || (mode & 0xFF) == GGML_SCALE_MODE_BILINEAR);
4751
4919
 
4752
4920
  struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
4753
4921
 
@@ -4802,6 +4970,18 @@ struct ggml_tensor * ggml_pad(
4802
4970
  return ggml_pad_ext(ctx, a, 0, p0, 0, p1, 0, p2, 0, p3);
4803
4971
  }
4804
4972
 
4973
+ // ggml_pad_circular
4974
+
4975
+ struct ggml_tensor * ggml_pad_circular(
4976
+ struct ggml_context * ctx,
4977
+ struct ggml_tensor * a,
4978
+ int p0,
4979
+ int p1,
4980
+ int p2,
4981
+ int p3) {
4982
+ return ggml_pad_ext_circular(ctx, a, 0, p0, 0, p1, 0, p2, 0, p3);
4983
+ }
4984
+
4805
4985
  struct ggml_tensor * ggml_pad_ext(
4806
4986
  struct ggml_context * ctx,
4807
4987
  struct ggml_tensor * a,
@@ -4828,6 +5008,7 @@ struct ggml_tensor * ggml_pad_ext(
4828
5008
  ggml_set_op_params_i32(result, 5, rp2);
4829
5009
  ggml_set_op_params_i32(result, 6, lp3);
4830
5010
  ggml_set_op_params_i32(result, 7, rp3);
5011
+ ggml_set_op_params_i32(result, 8, 0); // not circular by default
4831
5012
 
4832
5013
 
4833
5014
  result->op = GGML_OP_PAD;
@@ -4836,6 +5017,25 @@ struct ggml_tensor * ggml_pad_ext(
4836
5017
  return result;
4837
5018
  }
4838
5019
 
5020
+ // ggml_pad_ext_circular
5021
+
5022
+ struct ggml_tensor * ggml_pad_ext_circular(
5023
+ struct ggml_context * ctx,
5024
+ struct ggml_tensor * a,
5025
+ int lp0,
5026
+ int rp0,
5027
+ int lp1,
5028
+ int rp1,
5029
+ int lp2,
5030
+ int rp2,
5031
+ int lp3,
5032
+ int rp3
5033
+ ) {
5034
+ struct ggml_tensor * result = ggml_pad_ext(ctx, a, lp0, rp0, lp1, rp1, lp2, rp2, lp3, rp3);
5035
+ ggml_set_op_params_i32(result, 8, 1); // circular
5036
+ return result;
5037
+ }
5038
+
4839
5039
  // ggml_pad_reflect_1d
4840
5040
 
4841
5041
  struct ggml_tensor * ggml_pad_reflect_1d(
@@ -4895,28 +5095,6 @@ struct ggml_tensor * ggml_roll(
4895
5095
  return result;
4896
5096
  }
4897
5097
 
4898
- // ggml_arange
4899
-
4900
- struct ggml_tensor * ggml_arange(
4901
- struct ggml_context * ctx,
4902
- float start,
4903
- float stop,
4904
- float step) {
4905
- GGML_ASSERT(stop > start);
4906
-
4907
- const int64_t steps = (int64_t) ceilf((stop - start) / step);
4908
-
4909
- struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, steps);
4910
-
4911
- ggml_set_op_params_f32(result, 0, start);
4912
- ggml_set_op_params_f32(result, 1, stop);
4913
- ggml_set_op_params_f32(result, 2, step);
4914
-
4915
- result->op = GGML_OP_ARANGE;
4916
-
4917
- return result;
4918
- }
4919
-
4920
5098
  // ggml_timestep_embedding
4921
5099
 
4922
5100
  struct ggml_tensor * ggml_timestep_embedding(
@@ -4936,6 +5114,61 @@ struct ggml_tensor * ggml_timestep_embedding(
4936
5114
  return result;
4937
5115
  }
4938
5116
 
5117
+ // ggml_tri
5118
+
5119
+ struct ggml_tensor * ggml_tri(
5120
+ struct ggml_context * ctx,
5121
+ struct ggml_tensor * a,
5122
+ enum ggml_tri_type type) {
5123
+ GGML_ASSERT(a->type == GGML_TYPE_F32);
5124
+
5125
+ GGML_ASSERT(ggml_is_contiguous(a));
5126
+ GGML_ASSERT(a->ne[0] == a->ne[1]);
5127
+
5128
+ struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
5129
+
5130
+ ggml_set_op_params_i32(result, 0, type);
5131
+
5132
+ result->op = GGML_OP_TRI;
5133
+ result->src[0] = a;
5134
+
5135
+ return result;
5136
+ }
5137
+
5138
+ // ggml_fill
5139
+
5140
+ static struct ggml_tensor * ggml_fill_impl(
5141
+ struct ggml_context * ctx,
5142
+ struct ggml_tensor * a,
5143
+ float c,
5144
+ bool inplace) {
5145
+ GGML_ASSERT(a->type == GGML_TYPE_F32);
5146
+ GGML_ASSERT(ggml_is_contiguous(a));
5147
+
5148
+ struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5149
+
5150
+ ggml_set_op_params_f32(result, 0, c);
5151
+
5152
+ result->op = GGML_OP_FILL;
5153
+ result->src[0] = a;
5154
+
5155
+ return result;
5156
+ }
5157
+
5158
+ struct ggml_tensor * ggml_fill(
5159
+ struct ggml_context * ctx,
5160
+ struct ggml_tensor * a,
5161
+ float c) {
5162
+ return ggml_fill_impl(ctx, a, c, false);
5163
+ }
5164
+
5165
+ struct ggml_tensor * ggml_fill_inplace(
5166
+ struct ggml_context * ctx,
5167
+ struct ggml_tensor * a,
5168
+ float c) {
5169
+ return ggml_fill_impl(ctx, a, c, true);
5170
+ }
5171
+
4939
5172
  // ggml_argsort
4940
5173
 
4941
5174
  struct ggml_tensor * ggml_argsort(
@@ -4943,6 +5176,7 @@ struct ggml_tensor * ggml_argsort(
4943
5176
  struct ggml_tensor * a,
4944
5177
  enum ggml_sort_order order) {
4945
5178
  GGML_ASSERT(a->ne[0] <= INT32_MAX);
5179
+
4946
5180
  struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, GGML_MAX_DIMS, a->ne);
4947
5181
 
4948
5182
  ggml_set_op_params_i32(result, 0, (int32_t) order);
@@ -4953,9 +5187,9 @@ struct ggml_tensor * ggml_argsort(
4953
5187
  return result;
4954
5188
  }
4955
5189
 
4956
- // ggml_top_k
5190
+ // ggml_argsort_top_k
4957
5191
 
4958
- struct ggml_tensor * ggml_top_k(
5192
+ struct ggml_tensor * ggml_argsort_top_k(
4959
5193
  struct ggml_context * ctx,
4960
5194
  struct ggml_tensor * a,
4961
5195
  int k) {
@@ -4971,6 +5205,44 @@ struct ggml_tensor * ggml_top_k(
4971
5205
  return result;
4972
5206
  }
4973
5207
 
5208
+ // ggml_top_k
5209
+
5210
+ struct ggml_tensor * ggml_top_k(
5211
+ struct ggml_context * ctx,
5212
+ struct ggml_tensor * a,
5213
+ int k) {
5214
+ GGML_ASSERT(a->ne[0] >= k);
5215
+
5216
+ struct ggml_tensor * result = ggml_new_tensor_4d(ctx, GGML_TYPE_I32, k, a->ne[1], a->ne[2], a->ne[3]);
5217
+
5218
+ result->op = GGML_OP_TOP_K;
5219
+ result->src[0] = a;
5220
+
5221
+ return result;
5222
+ }
5223
+
5224
+ // ggml_arange
5225
+
5226
+ struct ggml_tensor * ggml_arange(
5227
+ struct ggml_context * ctx,
5228
+ float start,
5229
+ float stop,
5230
+ float step) {
5231
+ GGML_ASSERT(stop > start);
5232
+
5233
+ const int64_t steps = (int64_t) ceilf((stop - start) / step);
5234
+
5235
+ struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, steps);
5236
+
5237
+ ggml_set_op_params_f32(result, 0, start);
5238
+ ggml_set_op_params_f32(result, 1, stop);
5239
+ ggml_set_op_params_f32(result, 2, step);
5240
+
5241
+ result->op = GGML_OP_ARANGE;
5242
+
5243
+ return result;
5244
+ }
5245
+
4974
5246
  // ggml_flash_attn_ext
4975
5247
 
4976
5248
  struct ggml_tensor * ggml_flash_attn_ext(
@@ -4990,8 +5262,6 @@ struct ggml_tensor * ggml_flash_attn_ext(
4990
5262
 
4991
5263
  if (mask) {
4992
5264
  GGML_ASSERT(ggml_is_contiguous(mask));
4993
- GGML_ASSERT(mask->ne[1] >= GGML_PAD(q->ne[1], GGML_KQ_MASK_PAD) &&
4994
- "the Flash-Attention kernel requires the mask to be padded to GGML_KQ_MASK_PAD and at least n_queries big");
4995
5265
  //GGML_ASSERT(ggml_can_repeat_rows(mask, qk));
4996
5266
 
4997
5267
  GGML_ASSERT(q->ne[2] % mask->ne[2] == 0);
@@ -5790,6 +6060,41 @@ struct ggml_tensor * ggml_opt_step_sgd(
5790
6060
  return result;
5791
6061
  }
5792
6062
 
6063
+ // solve_tri
6064
+
6065
+ struct ggml_tensor * ggml_solve_tri(
6066
+ struct ggml_context * ctx,
6067
+ struct ggml_tensor * a,
6068
+ struct ggml_tensor * b,
6069
+ bool left,
6070
+ bool lower,
6071
+ bool uni) {
6072
+ GGML_ASSERT(a->type == GGML_TYPE_F32);
6073
+ GGML_ASSERT(b->type == GGML_TYPE_F32);
6074
+
6075
+ // A must be square and lower diagonal
6076
+ GGML_ASSERT(a->ne[0] == a->ne[1]);
6077
+ // B must have same outer dimension as A
6078
+ GGML_ASSERT(a->ne[1] == b->ne[1]);
6079
+
6080
+ // batch dimensions must be equal
6081
+ GGML_ASSERT(a->ne[2] == b->ne[2]);
6082
+ GGML_ASSERT(a->ne[3] == b->ne[3]);
6083
+
6084
+ GGML_ASSERT(ggml_is_contiguous(a));
6085
+ GGML_ASSERT(ggml_is_contiguous(b));
6086
+
6087
+ GGML_ASSERT(lower && left && !uni); // TODO: support other variants
6088
+
6089
+ struct ggml_tensor * result = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, b->ne[0], b->ne[1], b->ne[2], b->ne[3]);
6090
+
6091
+ result->op = GGML_OP_SOLVE_TRI;
6092
+ result->src[0] = a;
6093
+ result->src[1] = b;
6094
+
6095
+ return result;
6096
+ }
6097
+
5793
6098
  ////////////////////////////////////////////////////////////////////////////////
5794
6099
 
5795
6100
  struct ggml_hash_set ggml_hash_set_new(size_t size) {
@@ -6362,6 +6667,16 @@ static void ggml_compute_backward(
6362
6667
  ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, tensor, grad));
6363
6668
  }
6364
6669
  } break;
6670
+ case GGML_UNARY_OP_EXPM1: {
6671
+ if (src0_needs_grads) {
6672
+ ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_exp(ctx, src0)));
6673
+ }
6674
+ } break;
6675
+ case GGML_UNARY_OP_SOFTPLUS: {
6676
+ if (src0_needs_grads) {
6677
+ ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_sigmoid(ctx, src0)));
6678
+ }
6679
+ } break;
6365
6680
  default: {
6366
6681
  fprintf(stderr, "%s: unsupported unary op for backward pass: %s\n",
6367
6682
  __func__, ggml_unary_op_name(ggml_get_unary_op(tensor)));
@@ -6872,6 +7187,78 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
6872
7187
  GGML_LOG_INFO("========================================\n");
6873
7188
  }
6874
7189
 
7190
+ static int ggml_node_list_find_tensor(const struct ggml_cgraph * cgraph,
7191
+ const int * idxs,
7192
+ int count,
7193
+ const struct ggml_tensor * tensor) {
7194
+ GGML_ASSERT(cgraph && idxs);
7195
+ for (int i = 0; i < count; ++i) {
7196
+ const int node_idx = idxs[i];
7197
+
7198
+ if (node_idx >= cgraph->n_nodes) {
7199
+ return -1;
7200
+ }
7201
+ if (cgraph->nodes[node_idx] == tensor) {
7202
+ return i;
7203
+ }
7204
+ }
7205
+ return -1;
7206
+ }
7207
+
7208
+ bool ggml_can_fuse_subgraph_ext(const struct ggml_cgraph * cgraph,
7209
+ const int * node_idxs,
7210
+ int count,
7211
+ const enum ggml_op * ops,
7212
+ const int * outputs,
7213
+ int num_outputs) {
7214
+ GGML_ASSERT(outputs && num_outputs > 0);
7215
+
7216
+ for (int i = 0; i < count; ++i) {
7217
+ if (node_idxs[i] >= cgraph->n_nodes) {
7218
+ return false;
7219
+ }
7220
+
7221
+ const struct ggml_tensor * node = cgraph->nodes[node_idxs[i]];
7222
+
7223
+ if (node->op != ops[i]) {
7224
+ return false;
7225
+ }
7226
+
7227
+ if (ggml_node_list_find_tensor(cgraph, outputs, num_outputs, node) != -1) {
7228
+ continue;
7229
+ }
7230
+
7231
+ if (node->flags & GGML_TENSOR_FLAG_OUTPUT) {
7232
+ return false;
7233
+ }
7234
+
7235
+ int subgraph_uses = 0;
7236
+ for (int j = i + 1; j < count; ++j) {
7237
+ const struct ggml_tensor * other_node = cgraph->nodes[node_idxs[j]];
7238
+ for (int src_idx = 0; src_idx < GGML_MAX_SRC; src_idx++) {
7239
+ if (other_node->src[src_idx] == node) {
7240
+ subgraph_uses++;
7241
+ }
7242
+ }
7243
+ }
7244
+
7245
+ if (subgraph_uses != ggml_node_get_use_count(cgraph, node_idxs[i])) {
7246
+ return false;
7247
+ }
7248
+
7249
+ // if node is a view, check if the view_src and all it's parent view_srcs are within the subgraph
7250
+ struct ggml_tensor * view_src = node->view_src;
7251
+ while (view_src) {
7252
+ if (ggml_node_list_find_tensor(cgraph, node_idxs, count, view_src) == -1) {
7253
+ return false;
7254
+ }
7255
+ view_src = view_src->view_src;
7256
+ }
7257
+ }
7258
+
7259
+ return true;
7260
+ }
7261
+
6875
7262
  // check if node is part of the graph
6876
7263
  static bool ggml_graph_find(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
6877
7264
  if (cgraph == NULL) {
@@ -7181,6 +7568,11 @@ size_t ggml_quantize_chunk(
7181
7568
 
7182
7569
  ////////////////////////////////////////////////////////////////////////////////
7183
7570
 
7571
+ void ggml_log_get(ggml_log_callback * log_callback, void ** user_data) {
7572
+ *log_callback = g_logger_state.log_callback;
7573
+ *user_data = g_logger_state.log_callback_user_data;
7574
+ }
7575
+
7184
7576
  void ggml_log_set(ggml_log_callback log_callback, void * user_data) {
7185
7577
  g_logger_state.log_callback = log_callback ? log_callback : ggml_log_callback_default;
7186
7578
  g_logger_state.log_callback_user_data = user_data;
@@ -525,6 +525,7 @@ extern "C" {
525
525
  // use whisper_tokenize() to convert text to tokens
526
526
  // maximum of whisper_n_text_ctx()/2 tokens are used (typically 224)
527
527
  const char * initial_prompt;
528
+ bool carry_initial_prompt; // if true, always prepend initial_prompt to every decode window (may reduce conditioning on previous text)
528
529
  const whisper_token * prompt_tokens;
529
530
  int prompt_n_tokens;
530
531
 
@@ -79,6 +79,7 @@ if (WHISPER_COREML)
79
79
  )
80
80
 
81
81
  set_target_properties(${TARGET} PROPERTIES FOLDER "libs")
82
+ install(TARGETS ${TARGET} LIBRARY)
82
83
  endif()
83
84
 
84
85
  if (WHISPER_OPENVINO)
@@ -125,7 +126,8 @@ if (WHISPER_EXTRA_FLAGS)
125
126
  target_compile_options(whisper PRIVATE ${WHISPER_EXTRA_FLAGS})
126
127
  endif()
127
128
 
128
- target_link_libraries(whisper PUBLIC ggml)
129
+ find_package(Threads REQUIRED)
130
+ target_link_libraries(whisper PUBLIC ggml Threads::Threads)
129
131
 
130
132
  if (WHISPER_COREML)
131
133
  target_link_libraries(whisper PRIVATE whisper.coreml)