whispercpp 1.3.4 → 1.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (891) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +1 -1
  3. data/README.md +158 -44
  4. data/ext/extconf.rb +3 -2
  5. data/ext/ruby_whisper.c +34 -6
  6. data/ext/ruby_whisper.h +67 -0
  7. data/ext/ruby_whisper_context.c +236 -144
  8. data/ext/ruby_whisper_context_params.c +163 -0
  9. data/ext/ruby_whisper_model.c +12 -13
  10. data/ext/ruby_whisper_params.c +47 -24
  11. data/ext/ruby_whisper_segment.c +84 -20
  12. data/ext/ruby_whisper_token.c +371 -0
  13. data/ext/ruby_whisper_transcribe.cpp +5 -2
  14. data/ext/ruby_whisper_vad_context.c +122 -0
  15. data/ext/ruby_whisper_vad_context_detect.cpp +51 -0
  16. data/ext/ruby_whisper_vad_params.c +0 -1
  17. data/ext/ruby_whisper_vad_segment.c +138 -0
  18. data/ext/ruby_whisper_vad_segments.c +105 -0
  19. data/ext/sources/CMakeLists.txt +4 -1
  20. data/ext/sources/bindings/javascript/package.json +1 -1
  21. data/ext/sources/cmake/arm64-apple-clang.cmake +16 -0
  22. data/ext/sources/cmake/arm64-windows-llvm.cmake +16 -0
  23. data/ext/sources/cmake/riscv64-spacemit-linux-gnu-gcc.cmake +29 -0
  24. data/ext/sources/cmake/whisper-config.cmake.in +5 -40
  25. data/ext/sources/cmake/x64-windows-llvm.cmake +5 -0
  26. data/ext/sources/examples/addon.node/vad-example.js +2 -2
  27. data/ext/sources/examples/bench/bench.cpp +23 -18
  28. data/ext/sources/examples/cli/cli.cpp +129 -112
  29. data/ext/sources/examples/common-ggml.cpp +2 -0
  30. data/ext/sources/examples/lsp/CMakeLists.txt +2 -1
  31. data/ext/sources/examples/miniaudio.h +4507 -2131
  32. data/ext/sources/examples/quantize/CMakeLists.txt +2 -1
  33. data/ext/sources/examples/server/server.cpp +28 -15
  34. data/ext/sources/examples/talk-llama/CMakeLists.txt +8 -3
  35. data/ext/sources/examples/talk-llama/llama-adapter.cpp +5 -2
  36. data/ext/sources/examples/talk-llama/llama-adapter.h +7 -0
  37. data/ext/sources/examples/talk-llama/llama-arch.cpp +2378 -1988
  38. data/ext/sources/examples/talk-llama/llama-arch.h +109 -2
  39. data/ext/sources/examples/talk-llama/llama-batch.cpp +78 -34
  40. data/ext/sources/examples/talk-llama/llama-batch.h +17 -4
  41. data/ext/sources/examples/talk-llama/llama-chat.cpp +100 -4
  42. data/ext/sources/examples/talk-llama/llama-chat.h +5 -0
  43. data/ext/sources/examples/talk-llama/llama-context.cpp +1088 -403
  44. data/ext/sources/examples/talk-llama/llama-context.h +70 -23
  45. data/ext/sources/examples/talk-llama/llama-cparams.h +6 -0
  46. data/ext/sources/examples/talk-llama/llama-ext.h +12 -0
  47. data/ext/sources/examples/talk-llama/llama-grammar.cpp +295 -60
  48. data/ext/sources/examples/talk-llama/llama-grammar.h +22 -1
  49. data/ext/sources/examples/talk-llama/llama-graph.cpp +925 -155
  50. data/ext/sources/examples/talk-llama/llama-graph.h +234 -23
  51. data/ext/sources/examples/talk-llama/llama-hparams.cpp +79 -38
  52. data/ext/sources/examples/talk-llama/llama-hparams.h +118 -18
  53. data/ext/sources/examples/talk-llama/llama-impl.cpp +11 -7
  54. data/ext/sources/examples/talk-llama/llama-impl.h +14 -2
  55. data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.cpp +8 -4
  56. data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +405 -140
  57. data/ext/sources/examples/talk-llama/llama-kv-cache.h +24 -10
  58. data/ext/sources/examples/talk-llama/llama-kv-cells.h +44 -2
  59. data/ext/sources/examples/talk-llama/llama-memory-hybrid-iswa.cpp +275 -0
  60. data/ext/sources/examples/talk-llama/llama-memory-hybrid-iswa.h +140 -0
  61. data/ext/sources/examples/talk-llama/llama-memory-hybrid.cpp +12 -10
  62. data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +42 -31
  63. data/ext/sources/examples/talk-llama/llama-memory-recurrent.h +2 -2
  64. data/ext/sources/examples/talk-llama/llama-mmap.cpp +197 -45
  65. data/ext/sources/examples/talk-llama/llama-mmap.h +8 -3
  66. data/ext/sources/examples/talk-llama/llama-model-loader.cpp +606 -116
  67. data/ext/sources/examples/talk-llama/llama-model-loader.h +41 -5
  68. data/ext/sources/examples/talk-llama/llama-model-saver.cpp +61 -44
  69. data/ext/sources/examples/talk-llama/llama-model-saver.h +5 -2
  70. data/ext/sources/examples/talk-llama/llama-model.cpp +2756 -13643
  71. data/ext/sources/examples/talk-llama/llama-model.h +112 -18
  72. data/ext/sources/examples/talk-llama/llama-quant.cpp +582 -365
  73. data/ext/sources/examples/talk-llama/{llama-sampling.cpp → llama-sampler.cpp} +1409 -199
  74. data/ext/sources/examples/talk-llama/llama-sampler.h +42 -0
  75. data/ext/sources/examples/talk-llama/llama-vocab.cpp +248 -82
  76. data/ext/sources/examples/talk-llama/llama-vocab.h +50 -40
  77. data/ext/sources/examples/talk-llama/llama.cpp +802 -21
  78. data/ext/sources/examples/talk-llama/llama.h +210 -39
  79. data/ext/sources/examples/talk-llama/models/afmoe.cpp +190 -0
  80. data/ext/sources/examples/talk-llama/models/apertus.cpp +125 -0
  81. data/ext/sources/examples/talk-llama/models/arcee.cpp +135 -0
  82. data/ext/sources/examples/talk-llama/models/arctic.cpp +137 -0
  83. data/ext/sources/examples/talk-llama/models/arwkv7.cpp +86 -0
  84. data/ext/sources/examples/talk-llama/models/baichuan.cpp +123 -0
  85. data/ext/sources/examples/talk-llama/models/bailingmoe.cpp +143 -0
  86. data/ext/sources/examples/talk-llama/models/bailingmoe2.cpp +133 -0
  87. data/ext/sources/examples/talk-llama/models/bert.cpp +184 -0
  88. data/ext/sources/examples/talk-llama/models/bitnet.cpp +145 -0
  89. data/ext/sources/examples/talk-llama/models/bloom.cpp +101 -0
  90. data/ext/sources/examples/talk-llama/models/chameleon.cpp +178 -0
  91. data/ext/sources/examples/talk-llama/models/chatglm.cpp +132 -0
  92. data/ext/sources/examples/talk-llama/models/codeshell.cpp +111 -0
  93. data/ext/sources/examples/talk-llama/models/cogvlm.cpp +102 -0
  94. data/ext/sources/examples/talk-llama/models/cohere2-iswa.cpp +134 -0
  95. data/ext/sources/examples/talk-llama/models/command-r.cpp +122 -0
  96. data/ext/sources/examples/talk-llama/models/dbrx.cpp +122 -0
  97. data/ext/sources/examples/talk-llama/models/deci.cpp +135 -0
  98. data/ext/sources/examples/talk-llama/models/deepseek.cpp +142 -0
  99. data/ext/sources/examples/talk-llama/models/deepseek2.cpp +262 -0
  100. data/ext/sources/examples/talk-llama/models/delta-net-base.cpp +445 -0
  101. data/ext/sources/examples/talk-llama/models/dots1.cpp +132 -0
  102. data/ext/sources/examples/talk-llama/models/dream.cpp +105 -0
  103. data/ext/sources/examples/talk-llama/models/ernie4-5-moe.cpp +148 -0
  104. data/ext/sources/examples/talk-llama/models/ernie4-5.cpp +110 -0
  105. data/ext/sources/examples/talk-llama/models/eurobert.cpp +97 -0
  106. data/ext/sources/examples/talk-llama/models/exaone-moe.cpp +145 -0
  107. data/ext/sources/examples/talk-llama/models/exaone.cpp +114 -0
  108. data/ext/sources/examples/talk-llama/models/exaone4.cpp +123 -0
  109. data/ext/sources/examples/talk-llama/models/falcon-h1.cpp +111 -0
  110. data/ext/sources/examples/talk-llama/models/falcon.cpp +120 -0
  111. data/ext/sources/examples/talk-llama/models/gemma-embedding.cpp +116 -0
  112. data/ext/sources/examples/talk-llama/models/gemma.cpp +112 -0
  113. data/ext/sources/examples/talk-llama/models/gemma2-iswa.cpp +128 -0
  114. data/ext/sources/examples/talk-llama/models/gemma3.cpp +155 -0
  115. data/ext/sources/examples/talk-llama/models/gemma3n-iswa.cpp +384 -0
  116. data/ext/sources/examples/talk-llama/models/glm4-moe.cpp +170 -0
  117. data/ext/sources/examples/talk-llama/models/glm4.cpp +157 -0
  118. data/ext/sources/examples/talk-llama/models/gpt2.cpp +105 -0
  119. data/ext/sources/examples/talk-llama/models/gptneox.cpp +144 -0
  120. data/ext/sources/examples/talk-llama/models/granite-hybrid.cpp +195 -0
  121. data/ext/sources/examples/talk-llama/models/granite.cpp +210 -0
  122. data/ext/sources/examples/talk-llama/models/grok.cpp +159 -0
  123. data/ext/sources/examples/talk-llama/models/grovemoe.cpp +139 -0
  124. data/ext/sources/examples/talk-llama/models/hunyuan-dense.cpp +132 -0
  125. data/ext/sources/examples/talk-llama/models/hunyuan-moe.cpp +153 -0
  126. data/ext/sources/examples/talk-llama/models/internlm2.cpp +120 -0
  127. data/ext/sources/examples/talk-llama/models/jais.cpp +86 -0
  128. data/ext/sources/examples/talk-llama/models/jais2.cpp +123 -0
  129. data/ext/sources/examples/talk-llama/models/jamba.cpp +106 -0
  130. data/ext/sources/examples/talk-llama/models/kimi-linear.cpp +381 -0
  131. data/ext/sources/examples/talk-llama/models/lfm2.cpp +196 -0
  132. data/ext/sources/examples/talk-llama/models/llada-moe.cpp +122 -0
  133. data/ext/sources/examples/talk-llama/models/llada.cpp +99 -0
  134. data/ext/sources/examples/talk-llama/models/llama-iswa.cpp +178 -0
  135. data/ext/sources/examples/talk-llama/models/llama.cpp +175 -0
  136. data/ext/sources/examples/talk-llama/models/maincoder.cpp +117 -0
  137. data/ext/sources/examples/talk-llama/models/mamba-base.cpp +289 -0
  138. data/ext/sources/examples/talk-llama/models/mamba.cpp +54 -0
  139. data/ext/sources/examples/talk-llama/models/mimo2-iswa.cpp +129 -0
  140. data/ext/sources/examples/talk-llama/models/minicpm3.cpp +200 -0
  141. data/ext/sources/examples/talk-llama/models/minimax-m2.cpp +123 -0
  142. data/ext/sources/examples/talk-llama/models/mistral3.cpp +160 -0
  143. data/ext/sources/examples/talk-llama/models/models.h +704 -0
  144. data/ext/sources/examples/talk-llama/models/modern-bert.cpp +109 -0
  145. data/ext/sources/examples/talk-llama/models/mpt.cpp +126 -0
  146. data/ext/sources/examples/talk-llama/models/nemotron-h.cpp +162 -0
  147. data/ext/sources/examples/talk-llama/models/nemotron.cpp +122 -0
  148. data/ext/sources/examples/talk-llama/models/neo-bert.cpp +104 -0
  149. data/ext/sources/examples/talk-llama/models/olmo.cpp +121 -0
  150. data/ext/sources/examples/talk-llama/models/olmo2.cpp +150 -0
  151. data/ext/sources/examples/talk-llama/models/olmoe.cpp +124 -0
  152. data/ext/sources/examples/talk-llama/models/openai-moe-iswa.cpp +127 -0
  153. data/ext/sources/examples/talk-llama/models/openelm.cpp +124 -0
  154. data/ext/sources/examples/talk-llama/models/orion.cpp +123 -0
  155. data/ext/sources/examples/talk-llama/models/paddleocr.cpp +122 -0
  156. data/ext/sources/examples/talk-llama/models/pangu-embedded.cpp +121 -0
  157. data/ext/sources/examples/talk-llama/models/phi2.cpp +121 -0
  158. data/ext/sources/examples/talk-llama/models/phi3.cpp +152 -0
  159. data/ext/sources/examples/talk-llama/models/plamo.cpp +110 -0
  160. data/ext/sources/examples/talk-llama/models/plamo2.cpp +320 -0
  161. data/ext/sources/examples/talk-llama/models/plamo3.cpp +128 -0
  162. data/ext/sources/examples/talk-llama/models/plm.cpp +169 -0
  163. data/ext/sources/examples/talk-llama/models/qwen.cpp +108 -0
  164. data/ext/sources/examples/talk-llama/models/qwen2.cpp +126 -0
  165. data/ext/sources/examples/talk-llama/models/qwen2moe.cpp +151 -0
  166. data/ext/sources/examples/talk-llama/models/qwen2vl.cpp +117 -0
  167. data/ext/sources/examples/talk-llama/models/qwen3.cpp +120 -0
  168. data/ext/sources/examples/talk-llama/models/qwen35.cpp +381 -0
  169. data/ext/sources/examples/talk-llama/models/qwen35moe.cpp +422 -0
  170. data/ext/sources/examples/talk-llama/models/qwen3moe.cpp +131 -0
  171. data/ext/sources/examples/talk-llama/models/qwen3next.cpp +525 -0
  172. data/ext/sources/examples/talk-llama/models/qwen3vl-moe.cpp +140 -0
  173. data/ext/sources/examples/talk-llama/models/qwen3vl.cpp +132 -0
  174. data/ext/sources/examples/talk-llama/models/refact.cpp +94 -0
  175. data/ext/sources/examples/talk-llama/models/rnd1.cpp +126 -0
  176. data/ext/sources/examples/talk-llama/models/rwkv6-base.cpp +164 -0
  177. data/ext/sources/examples/talk-llama/models/rwkv6.cpp +94 -0
  178. data/ext/sources/examples/talk-llama/models/rwkv6qwen2.cpp +86 -0
  179. data/ext/sources/examples/talk-llama/models/rwkv7-base.cpp +137 -0
  180. data/ext/sources/examples/talk-llama/models/rwkv7.cpp +90 -0
  181. data/ext/sources/examples/talk-llama/models/seed-oss.cpp +124 -0
  182. data/ext/sources/examples/talk-llama/models/smallthinker.cpp +126 -0
  183. data/ext/sources/examples/talk-llama/models/smollm3.cpp +128 -0
  184. data/ext/sources/examples/talk-llama/models/stablelm.cpp +146 -0
  185. data/ext/sources/examples/talk-llama/models/starcoder.cpp +100 -0
  186. data/ext/sources/examples/talk-llama/models/starcoder2.cpp +121 -0
  187. data/ext/sources/examples/talk-llama/models/step35-iswa.cpp +165 -0
  188. data/ext/sources/examples/talk-llama/models/t5-dec.cpp +166 -0
  189. data/ext/sources/examples/talk-llama/models/t5-enc.cpp +96 -0
  190. data/ext/sources/examples/talk-llama/models/wavtokenizer-dec.cpp +149 -0
  191. data/ext/sources/examples/talk-llama/models/xverse.cpp +108 -0
  192. data/ext/sources/examples/talk-llama/unicode.cpp +121 -79
  193. data/ext/sources/examples/vad-speech-segments/CMakeLists.txt +1 -1
  194. data/ext/sources/examples/whisper.wasm/index-tmpl.html +1 -1
  195. data/ext/sources/ggml/CMakeLists.txt +90 -56
  196. data/ext/sources/ggml/include/ggml-alloc.h +9 -0
  197. data/ext/sources/ggml/include/ggml-backend.h +5 -2
  198. data/ext/sources/ggml/include/ggml-cann.h +1 -1
  199. data/ext/sources/ggml/include/ggml-cpu.h +6 -0
  200. data/ext/sources/ggml/include/ggml-hexagon.h +19 -0
  201. data/ext/sources/ggml/include/ggml-openvino.h +37 -0
  202. data/ext/sources/ggml/include/ggml-opt.h +1 -1
  203. data/ext/sources/ggml/include/ggml-rpc.h +14 -12
  204. data/ext/sources/ggml/include/ggml-virtgpu.h +14 -0
  205. data/ext/sources/ggml/include/ggml-zendnn.h +22 -0
  206. data/ext/sources/ggml/include/ggml.h +246 -21
  207. data/ext/sources/ggml/src/CMakeLists.txt +85 -11
  208. data/ext/sources/ggml/src/ggml-alloc.c +128 -50
  209. data/ext/sources/ggml/src/ggml-backend-dl.cpp +48 -0
  210. data/ext/sources/ggml/src/ggml-backend-dl.h +45 -0
  211. data/ext/sources/ggml/src/ggml-backend-impl.h +1 -4
  212. data/ext/sources/ggml/src/ggml-backend-reg.cpp +54 -88
  213. data/ext/sources/ggml/src/ggml-backend.cpp +76 -23
  214. data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +18 -4
  215. data/ext/sources/ggml/src/ggml-blas/ggml-blas.cpp +11 -11
  216. data/ext/sources/ggml/src/ggml-cann/acl_tensor.cpp +58 -46
  217. data/ext/sources/ggml/src/ggml-cann/acl_tensor.h +139 -48
  218. data/ext/sources/ggml/src/ggml-cann/aclnn_ops.cpp +2427 -1785
  219. data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +238 -362
  220. data/ext/sources/ggml/src/ggml-cann/common.h +285 -211
  221. data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +663 -831
  222. data/ext/sources/ggml/src/ggml-common.h +11 -0
  223. data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +170 -95
  224. data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +42 -18
  225. data/ext/sources/ggml/src/ggml-cpu/amx/common.h +34 -10
  226. data/ext/sources/ggml/src/ggml-cpu/amx/mmq.cpp +85 -85
  227. data/ext/sources/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +4 -0
  228. data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +513 -27
  229. data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +4192 -992
  230. data/ext/sources/ggml/src/ggml-cpu/arch/loongarch/quants.c +4 -5
  231. data/ext/sources/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
  232. data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +1761 -49
  233. data/ext/sources/ggml/src/ggml-cpu/arch/riscv/repack.cpp +1391 -0
  234. data/ext/sources/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
  235. data/ext/sources/ggml/src/ggml-cpu/arch/s390/quants.c +8 -10
  236. data/ext/sources/ggml/src/ggml-cpu/arch/x86/quants.c +9 -9
  237. data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +124 -24
  238. data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +157 -28
  239. data/ext/sources/ggml/src/ggml-cpu/binary-ops.cpp +2 -6
  240. data/ext/sources/ggml/src/ggml-cpu/common.h +8 -0
  241. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +8 -3
  242. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +251 -80
  243. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +19 -0
  244. data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +587 -119
  245. data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.h +33 -44
  246. data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1093 -194
  247. data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1284 -203
  248. data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.h +6 -0
  249. data/ext/sources/ggml/src/ggml-cpu/ops.cpp +1519 -527
  250. data/ext/sources/ggml/src/ggml-cpu/ops.h +6 -4
  251. data/ext/sources/ggml/src/ggml-cpu/quants.c +40 -0
  252. data/ext/sources/ggml/src/ggml-cpu/quants.h +3 -0
  253. data/ext/sources/ggml/src/ggml-cpu/repack.cpp +3632 -781
  254. data/ext/sources/ggml/src/ggml-cpu/repack.h +129 -4
  255. data/ext/sources/ggml/src/ggml-cpu/simd-gemm.h +136 -0
  256. data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +152 -46
  257. data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.cpp +3 -2
  258. data/ext/sources/ggml/src/ggml-cpu/unary-ops.cpp +152 -1
  259. data/ext/sources/ggml/src/ggml-cpu/unary-ops.h +7 -0
  260. data/ext/sources/ggml/src/ggml-cpu/vec.cpp +140 -0
  261. data/ext/sources/ggml/src/ggml-cpu/vec.h +261 -146
  262. data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +72 -1
  263. data/ext/sources/ggml/src/ggml-cuda/argmax.cu +2 -2
  264. data/ext/sources/ggml/src/ggml-cuda/argsort.cu +132 -6
  265. data/ext/sources/ggml/src/ggml-cuda/argsort.cuh +16 -0
  266. data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +33 -31
  267. data/ext/sources/ggml/src/ggml-cuda/common.cuh +474 -85
  268. data/ext/sources/ggml/src/ggml-cuda/convert.cu +41 -27
  269. data/ext/sources/ggml/src/ggml-cuda/convert.cuh +10 -0
  270. data/ext/sources/ggml/src/ggml-cuda/cpy-utils.cuh +1 -1
  271. data/ext/sources/ggml/src/ggml-cuda/cpy.cu +342 -246
  272. data/ext/sources/ggml/src/ggml-cuda/cpy.cuh +1 -5
  273. data/ext/sources/ggml/src/ggml-cuda/cumsum.cu +307 -0
  274. data/ext/sources/ggml/src/ggml-cuda/cumsum.cuh +5 -0
  275. data/ext/sources/ggml/src/ggml-cuda/diag.cu +77 -0
  276. data/ext/sources/ggml/src/ggml-cuda/diag.cuh +5 -0
  277. data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +98 -74
  278. data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +973 -665
  279. data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cu +35 -741
  280. data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cuh +1255 -0
  281. data/ext/sources/ggml/src/ggml-cuda/fattn-vec.cuh +33 -40
  282. data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +40 -18
  283. data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +48 -0
  284. data/ext/sources/ggml/src/ggml-cuda/fattn.cu +206 -45
  285. data/ext/sources/ggml/src/ggml-cuda/fill.cu +37 -0
  286. data/ext/sources/ggml/src/ggml-cuda/fill.cuh +3 -0
  287. data/ext/sources/ggml/src/ggml-cuda/gated_delta_net.cu +263 -0
  288. data/ext/sources/ggml/src/ggml-cuda/gated_delta_net.cuh +4 -0
  289. data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +1688 -302
  290. data/ext/sources/ggml/src/ggml-cuda/mean.cu +12 -10
  291. data/ext/sources/ggml/src/ggml-cuda/mma.cuh +908 -48
  292. data/ext/sources/ggml/src/ggml-cuda/mmf.cu +88 -20
  293. data/ext/sources/ggml/src/ggml-cuda/mmf.cuh +502 -90
  294. data/ext/sources/ggml/src/ggml-cuda/mmid.cu +164 -0
  295. data/ext/sources/ggml/src/ggml-cuda/mmid.cuh +5 -0
  296. data/ext/sources/ggml/src/ggml-cuda/mmq.cu +69 -176
  297. data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +532 -193
  298. data/ext/sources/ggml/src/ggml-cuda/mmvf.cu +460 -104
  299. data/ext/sources/ggml/src/ggml-cuda/mmvf.cuh +5 -2
  300. data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +360 -122
  301. data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +2 -1
  302. data/ext/sources/ggml/src/ggml-cuda/norm.cu +18 -76
  303. data/ext/sources/ggml/src/ggml-cuda/pad.cu +73 -39
  304. data/ext/sources/ggml/src/ggml-cuda/quantize.cu +152 -1
  305. data/ext/sources/ggml/src/ggml-cuda/quantize.cuh +14 -0
  306. data/ext/sources/ggml/src/ggml-cuda/reduce_rows.cuh +2 -16
  307. data/ext/sources/ggml/src/ggml-cuda/rope.cu +364 -149
  308. data/ext/sources/ggml/src/ggml-cuda/rope.cuh +2 -0
  309. data/ext/sources/ggml/src/ggml-cuda/set-rows.cu +101 -47
  310. data/ext/sources/ggml/src/ggml-cuda/set.cu +39 -0
  311. data/ext/sources/ggml/src/ggml-cuda/set.cuh +7 -0
  312. data/ext/sources/ggml/src/ggml-cuda/softmax.cu +163 -41
  313. data/ext/sources/ggml/src/ggml-cuda/solve_tri.cu +275 -0
  314. data/ext/sources/ggml/src/ggml-cuda/solve_tri.cuh +3 -0
  315. data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +68 -50
  316. data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cuh +1 -1
  317. data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +49 -84
  318. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_32.cu +5 -0
  319. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +1 -0
  320. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_32.cu +5 -0
  321. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +1 -0
  322. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +1 -0
  323. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +1 -0
  324. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq112-dv112.cu +5 -0
  325. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq128-dv128.cu +5 -0
  326. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq256-dv256.cu +5 -0
  327. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq40-dv40.cu +5 -0
  328. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq576-dv512.cu +5 -0
  329. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq64-dv64.cu +5 -0
  330. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq72-dv72.cu +5 -0
  331. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq80-dv80.cu +5 -0
  332. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq96-dv96.cu +5 -0
  333. data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +22 -4
  334. data/ext/sources/ggml/src/ggml-cuda/top-k.cu +95 -0
  335. data/ext/sources/ggml/src/ggml-cuda/top-k.cuh +3 -0
  336. data/ext/sources/ggml/src/ggml-cuda/topk-moe.cu +275 -119
  337. data/ext/sources/ggml/src/ggml-cuda/topk-moe.cuh +20 -7
  338. data/ext/sources/ggml/src/ggml-cuda/tri.cu +136 -0
  339. data/ext/sources/ggml/src/ggml-cuda/tri.cuh +5 -0
  340. data/ext/sources/ggml/src/ggml-cuda/unary.cu +160 -11
  341. data/ext/sources/ggml/src/ggml-cuda/unary.cuh +38 -0
  342. data/ext/sources/ggml/src/ggml-cuda/upscale.cu +163 -7
  343. data/ext/sources/ggml/src/ggml-cuda/vecdotq.cuh +31 -17
  344. data/ext/sources/ggml/src/ggml-cuda/vendors/cuda.h +4 -0
  345. data/ext/sources/ggml/src/ggml-cuda/vendors/hip.h +22 -1
  346. data/ext/sources/ggml/src/ggml-cuda/vendors/musa.h +6 -0
  347. data/ext/sources/ggml/src/ggml-hexagon/CMakeLists.txt +117 -0
  348. data/ext/sources/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3325 -0
  349. data/ext/sources/ggml/src/ggml-hexagon/htp/CMakeLists.txt +46 -0
  350. data/ext/sources/ggml/src/ggml-hexagon/htp/act-ops.c +813 -0
  351. data/ext/sources/ggml/src/ggml-hexagon/htp/argsort-ops.c +281 -0
  352. data/ext/sources/ggml/src/ggml-hexagon/htp/binary-ops.c +891 -0
  353. data/ext/sources/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
  354. data/ext/sources/ggml/src/ggml-hexagon/htp/cpy-ops.c +252 -0
  355. data/ext/sources/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +713 -0
  356. data/ext/sources/ggml/src/ggml-hexagon/htp/get-rows-ops.c +112 -0
  357. data/ext/sources/ggml/src/ggml-hexagon/htp/hex-dma.c +63 -0
  358. data/ext/sources/ggml/src/ggml-hexagon/htp/hex-dma.h +182 -0
  359. data/ext/sources/ggml/src/ggml-hexagon/htp/hex-dump.h +77 -0
  360. data/ext/sources/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
  361. data/ext/sources/ggml/src/ggml-hexagon/htp/hex-utils.h +51 -0
  362. data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ctx.h +35 -0
  363. data/ext/sources/ggml/src/ggml-hexagon/htp/htp-msg.h +155 -0
  364. data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ops.h +63 -0
  365. data/ext/sources/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
  366. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-arith.h +443 -0
  367. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-base.h +240 -0
  368. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-copy.h +245 -0
  369. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-div.h +251 -0
  370. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
  371. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-exp.h +215 -0
  372. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
  373. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-inverse.h +210 -0
  374. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-reduce.h +296 -0
  375. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
  376. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +141 -0
  377. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
  378. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
  379. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.h +26 -0
  380. data/ext/sources/ggml/src/ggml-hexagon/htp/main.c +1199 -0
  381. data/ext/sources/ggml/src/ggml-hexagon/htp/matmul-ops.c +2670 -0
  382. data/ext/sources/ggml/src/ggml-hexagon/htp/rope-ops.c +497 -0
  383. data/ext/sources/ggml/src/ggml-hexagon/htp/set-rows-ops.c +168 -0
  384. data/ext/sources/ggml/src/ggml-hexagon/htp/softmax-ops.c +419 -0
  385. data/ext/sources/ggml/src/ggml-hexagon/htp/ssm-conv.c +339 -0
  386. data/ext/sources/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +128 -0
  387. data/ext/sources/ggml/src/ggml-hexagon/htp/unary-ops.c +382 -0
  388. data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.c +293 -0
  389. data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
  390. data/ext/sources/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
  391. data/ext/sources/ggml/src/ggml-hexagon/htp-drv.h +121 -0
  392. data/ext/sources/ggml/src/ggml-hexagon/libdl.h +79 -0
  393. data/ext/sources/ggml/src/ggml-hexagon/libggml-htp.inf +38 -0
  394. data/ext/sources/ggml/src/ggml-hexagon/op-desc.h +153 -0
  395. data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +14 -13
  396. data/ext/sources/ggml/src/ggml-impl.h +129 -6
  397. data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +10 -10
  398. data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.cpp +15 -4
  399. data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.h +8 -0
  400. data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.m +173 -34
  401. data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.cpp +912 -344
  402. data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.h +124 -59
  403. data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.m +588 -144
  404. data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +396 -23
  405. data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.cpp +1724 -421
  406. data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.h +16 -3
  407. data/ext/sources/ggml/src/ggml-metal/ggml-metal.cpp +333 -114
  408. data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +3050 -1539
  409. data/ext/sources/ggml/src/ggml-musa/CMakeLists.txt +3 -1
  410. data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +30 -1
  411. data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +4279 -497
  412. data/ext/sources/ggml/src/ggml-opencl/kernels/concat.cl +41 -99
  413. data/ext/sources/ggml/src/ggml-opencl/kernels/cpy.cl +45 -0
  414. data/ext/sources/ggml/src/ggml-opencl/kernels/cumsum.cl +139 -0
  415. data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +267 -0
  416. data/ext/sources/ggml/src/ggml-opencl/kernels/diag.cl +27 -0
  417. data/ext/sources/ggml/src/ggml-opencl/kernels/exp.cl +125 -0
  418. data/ext/sources/ggml/src/ggml-opencl/kernels/expm1.cl +113 -0
  419. data/ext/sources/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
  420. data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +4 -3
  421. data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
  422. data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q4_1_f32.cl +132 -0
  423. data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
  424. data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general_q8_0_f32.cl +195 -0
  425. data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_1_f32.cl +283 -0
  426. data/ext/sources/ggml/src/ggml-opencl/kernels/get_rows.cl +36 -12
  427. data/ext/sources/ggml/src/ggml-opencl/kernels/l2_norm.cl +71 -0
  428. data/ext/sources/ggml/src/ggml-opencl/kernels/mean.cl +140 -0
  429. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
  430. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +24 -10
  431. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +24 -10
  432. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
  433. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
  434. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
  435. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_8x4.cl +129 -0
  436. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
  437. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
  438. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
  439. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
  440. data/ext/sources/ggml/src/ggml-opencl/kernels/{mul_mv_q6_k.cl → mul_mv_q6_k_f32.cl} +4 -0
  441. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
  442. data/ext/sources/ggml/src/ggml-opencl/kernels/neg.cl +125 -0
  443. data/ext/sources/ggml/src/ggml-opencl/kernels/pad.cl +29 -20
  444. data/ext/sources/ggml/src/ggml-opencl/kernels/repeat.cl +31 -32
  445. data/ext/sources/ggml/src/ggml-opencl/kernels/rms_norm.cl +25 -10
  446. data/ext/sources/ggml/src/ggml-opencl/kernels/rope.cl +50 -24
  447. data/ext/sources/ggml/src/ggml-opencl/kernels/scale.cl +14 -4
  448. data/ext/sources/ggml/src/ggml-opencl/kernels/set_rows.cl +35 -16
  449. data/ext/sources/ggml/src/ggml-opencl/kernels/softplus.cl +116 -0
  450. data/ext/sources/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
  451. data/ext/sources/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
  452. data/ext/sources/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
  453. data/ext/sources/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
  454. data/ext/sources/ggml/src/ggml-opencl/kernels/sum_rows.cl +114 -13
  455. data/ext/sources/ggml/src/ggml-opencl/kernels/tanh.cl +94 -48
  456. data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +39 -0
  457. data/ext/sources/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
  458. data/ext/sources/ggml/src/ggml-openvino/.clang-format +154 -0
  459. data/ext/sources/ggml/src/ggml-openvino/CMakeLists.txt +22 -0
  460. data/ext/sources/ggml/src/ggml-openvino/ggml-decoder.cpp +975 -0
  461. data/ext/sources/ggml/src/ggml-openvino/ggml-decoder.h +294 -0
  462. data/ext/sources/ggml/src/ggml-openvino/ggml-openvino-extra.cpp +373 -0
  463. data/ext/sources/ggml/src/ggml-openvino/ggml-openvino-extra.h +182 -0
  464. data/ext/sources/ggml/src/ggml-openvino/ggml-openvino.cpp +1110 -0
  465. data/ext/sources/ggml/src/ggml-openvino/ggml-quants.cpp +884 -0
  466. data/ext/sources/ggml/src/ggml-openvino/ggml-quants.h +153 -0
  467. data/ext/sources/ggml/src/ggml-openvino/openvino/decoder.h +74 -0
  468. data/ext/sources/ggml/src/ggml-openvino/openvino/frontend.cpp +27 -0
  469. data/ext/sources/ggml/src/ggml-openvino/openvino/frontend.h +23 -0
  470. data/ext/sources/ggml/src/ggml-openvino/openvino/input_model.cpp +17 -0
  471. data/ext/sources/ggml/src/ggml-openvino/openvino/input_model.h +29 -0
  472. data/ext/sources/ggml/src/ggml-openvino/openvino/node_context.h +112 -0
  473. data/ext/sources/ggml/src/ggml-openvino/openvino/op/cont.cpp +48 -0
  474. data/ext/sources/ggml/src/ggml-openvino/openvino/op/cpy.cpp +21 -0
  475. data/ext/sources/ggml/src/ggml-openvino/openvino/op/flash_attn_ext.cpp +90 -0
  476. data/ext/sources/ggml/src/ggml-openvino/openvino/op/get_rows.cpp +69 -0
  477. data/ext/sources/ggml/src/ggml-openvino/openvino/op/glu_geglu.cpp +61 -0
  478. data/ext/sources/ggml/src/ggml-openvino/openvino/op/glu_swiglu.cpp +62 -0
  479. data/ext/sources/ggml/src/ggml-openvino/openvino/op/mulmat.cpp +90 -0
  480. data/ext/sources/ggml/src/ggml-openvino/openvino/op/permute.cpp +102 -0
  481. data/ext/sources/ggml/src/ggml-openvino/openvino/op/reshape.cpp +83 -0
  482. data/ext/sources/ggml/src/ggml-openvino/openvino/op/rms_norm.cpp +46 -0
  483. data/ext/sources/ggml/src/ggml-openvino/openvino/op/rope.cpp +123 -0
  484. data/ext/sources/ggml/src/ggml-openvino/openvino/op/scale.cpp +41 -0
  485. data/ext/sources/ggml/src/ggml-openvino/openvino/op/set_rows.cpp +76 -0
  486. data/ext/sources/ggml/src/ggml-openvino/openvino/op/softmax.cpp +89 -0
  487. data/ext/sources/ggml/src/ggml-openvino/openvino/op/transpose.cpp +23 -0
  488. data/ext/sources/ggml/src/ggml-openvino/openvino/op/unary_silu.cpp +27 -0
  489. data/ext/sources/ggml/src/ggml-openvino/openvino/op/view.cpp +53 -0
  490. data/ext/sources/ggml/src/ggml-openvino/openvino/op_table.cpp +46 -0
  491. data/ext/sources/ggml/src/ggml-openvino/openvino/op_table.h +39 -0
  492. data/ext/sources/ggml/src/ggml-openvino/openvino/pass/eliminate_zp.cpp +123 -0
  493. data/ext/sources/ggml/src/ggml-openvino/openvino/pass/eliminate_zp.h +17 -0
  494. data/ext/sources/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.cpp +60 -0
  495. data/ext/sources/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.h +17 -0
  496. data/ext/sources/ggml/src/ggml-openvino/openvino/pass/mark_decompression_convert_constant_folding.h +29 -0
  497. data/ext/sources/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.cpp +58 -0
  498. data/ext/sources/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.h +17 -0
  499. data/ext/sources/ggml/src/ggml-openvino/openvino/translate_session.cpp +293 -0
  500. data/ext/sources/ggml/src/ggml-openvino/openvino/translate_session.h +28 -0
  501. data/ext/sources/ggml/src/ggml-openvino/openvino/utils.cpp +226 -0
  502. data/ext/sources/ggml/src/ggml-openvino/openvino/utils.h +85 -0
  503. data/ext/sources/ggml/src/ggml-openvino/utils.cpp +823 -0
  504. data/ext/sources/ggml/src/ggml-openvino/utils.h +123 -0
  505. data/ext/sources/ggml/src/ggml-quants.c +96 -5
  506. data/ext/sources/ggml/src/ggml-quants.h +3 -0
  507. data/ext/sources/ggml/src/ggml-rpc/ggml-rpc.cpp +438 -156
  508. data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +59 -87
  509. data/ext/sources/ggml/src/ggml-sycl/add-id.cpp +81 -0
  510. data/ext/sources/ggml/src/ggml-sycl/add-id.hpp +8 -0
  511. data/ext/sources/ggml/src/ggml-sycl/backend.hpp +7 -0
  512. data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +21 -29
  513. data/ext/sources/ggml/src/ggml-sycl/binbcast.hpp +0 -6
  514. data/ext/sources/ggml/src/ggml-sycl/common.hpp +427 -20
  515. data/ext/sources/ggml/src/ggml-sycl/concat.cpp +55 -44
  516. data/ext/sources/ggml/src/ggml-sycl/convert.cpp +103 -1
  517. data/ext/sources/ggml/src/ggml-sycl/convert.hpp +22 -1
  518. data/ext/sources/ggml/src/ggml-sycl/count-equal.cpp +79 -0
  519. data/ext/sources/ggml/src/ggml-sycl/count-equal.hpp +9 -0
  520. data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +0 -3
  521. data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +18 -0
  522. data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +867 -50
  523. data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +401 -358
  524. data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +12 -2
  525. data/ext/sources/ggml/src/ggml-sycl/fattn-common.hpp +1179 -0
  526. data/ext/sources/ggml/src/ggml-sycl/fattn-tile.cpp +55 -0
  527. data/ext/sources/ggml/src/ggml-sycl/fattn-tile.hpp +1338 -0
  528. data/ext/sources/ggml/src/ggml-sycl/fattn-vec.hpp +667 -0
  529. data/ext/sources/ggml/src/ggml-sycl/fattn.cpp +225 -0
  530. data/ext/sources/ggml/src/ggml-sycl/fattn.hpp +22 -0
  531. data/ext/sources/ggml/src/ggml-sycl/gated_delta_net.cpp +309 -0
  532. data/ext/sources/ggml/src/ggml-sycl/gated_delta_net.hpp +8 -0
  533. data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +645 -155
  534. data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +22 -0
  535. data/ext/sources/ggml/src/ggml-sycl/norm.cpp +221 -66
  536. data/ext/sources/ggml/src/ggml-sycl/norm.hpp +2 -0
  537. data/ext/sources/ggml/src/ggml-sycl/outprod.cpp +3 -3
  538. data/ext/sources/ggml/src/ggml-sycl/pad.cpp +97 -0
  539. data/ext/sources/ggml/src/ggml-sycl/pad.hpp +24 -0
  540. data/ext/sources/ggml/src/ggml-sycl/pad_reflect_1d.cpp +100 -0
  541. data/ext/sources/ggml/src/ggml-sycl/pad_reflect_1d.hpp +10 -0
  542. data/ext/sources/ggml/src/ggml-sycl/presets.hpp +5 -0
  543. data/ext/sources/ggml/src/ggml-sycl/quants.hpp +1 -1
  544. data/ext/sources/ggml/src/ggml-sycl/repeat_back.cpp +76 -0
  545. data/ext/sources/ggml/src/ggml-sycl/repeat_back.hpp +8 -0
  546. data/ext/sources/ggml/src/ggml-sycl/roll.cpp +122 -0
  547. data/ext/sources/ggml/src/ggml-sycl/roll.hpp +20 -0
  548. data/ext/sources/ggml/src/ggml-sycl/rope.cpp +457 -281
  549. data/ext/sources/ggml/src/ggml-sycl/rope.hpp +6 -0
  550. data/ext/sources/ggml/src/ggml-sycl/set.cpp +73 -0
  551. data/ext/sources/ggml/src/ggml-sycl/set.hpp +5 -0
  552. data/ext/sources/ggml/src/ggml-sycl/softmax.cpp +327 -162
  553. data/ext/sources/ggml/src/ggml-sycl/softmax.hpp +4 -0
  554. data/ext/sources/ggml/src/ggml-sycl/ssm_conv.cpp +127 -0
  555. data/ext/sources/ggml/src/ggml-sycl/ssm_conv.hpp +5 -0
  556. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq112-dv112.cpp +5 -0
  557. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq128-dv128.cpp +5 -0
  558. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq256-dv256.cpp +5 -0
  559. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq40-dv40.cpp +5 -0
  560. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq576-dv512.cpp +5 -0
  561. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq64-dv64.cpp +5 -0
  562. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq72-dv72.cpp +5 -0
  563. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq80-dv80.cpp +5 -0
  564. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq96-dv96.cpp +5 -0
  565. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-f16.cpp +7 -0
  566. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_0.cpp +7 -0
  567. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_1.cpp +7 -0
  568. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_0.cpp +7 -0
  569. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_1.cpp +7 -0
  570. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q8_0.cpp +7 -0
  571. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-f16.cpp +7 -0
  572. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_0.cpp +7 -0
  573. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_1.cpp +7 -0
  574. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_0.cpp +7 -0
  575. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_1.cpp +7 -0
  576. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q8_0.cpp +7 -0
  577. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-f16.cpp +7 -0
  578. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_0.cpp +7 -0
  579. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_1.cpp +7 -0
  580. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_0.cpp +7 -0
  581. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_1.cpp +7 -0
  582. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q8_0.cpp +7 -0
  583. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-f16.cpp +7 -0
  584. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_0.cpp +7 -0
  585. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_1.cpp +7 -0
  586. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_0.cpp +7 -0
  587. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_1.cpp +7 -0
  588. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q8_0.cpp +7 -0
  589. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-f16.cpp +7 -0
  590. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_0.cpp +7 -0
  591. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_1.cpp +7 -0
  592. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_0.cpp +7 -0
  593. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_1.cpp +7 -0
  594. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q8_0.cpp +7 -0
  595. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-f16.cpp +7 -0
  596. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_0.cpp +7 -0
  597. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_1.cpp +7 -0
  598. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_0.cpp +7 -0
  599. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_1.cpp +7 -0
  600. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q8_0.cpp +7 -0
  601. data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +71 -0
  602. data/ext/sources/ggml/src/ggml-sycl/wkv.cpp +1 -1
  603. data/ext/sources/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
  604. data/ext/sources/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
  605. data/ext/sources/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
  606. data/ext/sources/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
  607. data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
  608. data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
  609. data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
  610. data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
  611. data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
  612. data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
  613. data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
  614. data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
  615. data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
  616. data/ext/sources/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
  617. data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
  618. data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
  619. data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
  620. data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
  621. data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
  622. data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
  623. data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
  624. data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +119 -0
  625. data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +158 -0
  626. data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
  627. data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend.cpp +69 -0
  628. data/ext/sources/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
  629. data/ext/sources/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
  630. data/ext/sources/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
  631. data/ext/sources/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
  632. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
  633. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
  634. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
  635. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
  636. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
  637. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
  638. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
  639. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +98 -0
  640. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
  641. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
  642. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
  643. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu.cpp +544 -0
  644. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu.h +117 -0
  645. data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +39 -19
  646. data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +5994 -3055
  647. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/abs.comp +21 -0
  648. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +18 -10
  649. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +2 -2
  650. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add1.comp +28 -0
  651. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp +1 -1
  652. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/arange.comp +20 -0
  653. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +2 -2
  654. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +33 -26
  655. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort_large.comp +114 -0
  656. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ceil.comp +22 -0
  657. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +2 -2
  658. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +2 -2
  659. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +2 -2
  660. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +1 -1
  661. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +47 -49
  662. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +1 -1
  663. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +2 -2
  664. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +3 -3
  665. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +4 -4
  666. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_transpose.comp +67 -0
  667. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +2 -2
  668. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +2 -2
  669. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_experts.comp +51 -0
  670. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum.comp +83 -0
  671. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass1.comp +60 -0
  672. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass2.comp +66 -0
  673. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +1 -1
  674. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs.comp → dequant_funcs.glsl} +9 -21
  675. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs_cm2.comp → dequant_funcs_cm2.glsl} +18 -4
  676. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_head.comp → dequant_head.glsl} +1 -1
  677. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
  678. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +1 -1
  679. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +1 -1
  680. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +1 -1
  681. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +1 -1
  682. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +1 -1
  683. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +1 -1
  684. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +1 -1
  685. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +1 -1
  686. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp +3 -3
  687. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +3 -3
  688. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +1 -1
  689. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +1 -1
  690. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +1 -1
  691. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +3 -3
  692. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +1 -1
  693. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +1 -1
  694. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +3 -3
  695. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +1 -1
  696. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +1 -1
  697. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag.comp +29 -0
  698. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +1 -1
  699. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +2 -2
  700. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/elu.comp +27 -0
  701. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +3 -3
  702. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/fill.comp +19 -0
  703. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +386 -160
  704. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{flash_attn_base.comp → flash_attn_base.glsl} +82 -20
  705. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +400 -174
  706. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +123 -37
  707. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_mask_opt.comp +162 -0
  708. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +10 -9
  709. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/floor.comp +22 -0
  710. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gated_delta_net.comp +128 -0
  711. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +2 -2
  712. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +2 -2
  713. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +2 -2
  714. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +2 -2
  715. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +2 -2
  716. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +2 -2
  717. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_binary_head.comp → generic_binary_head.glsl} +17 -2
  718. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_head.comp → generic_head.glsl} +2 -0
  719. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_unary_head.comp → generic_unary_head.glsl} +7 -0
  720. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +4 -4
  721. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +3 -3
  722. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{glu_head.comp → glu_head.glsl} +1 -1
  723. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +2 -2
  724. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp +2 -2
  725. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp +2 -2
  726. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +19 -7
  727. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +2 -3
  728. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +13 -10
  729. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +2 -2
  730. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/log.comp +18 -0
  731. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +2 -2
  732. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +2 -2
  733. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{mul_mat_vec_base.comp → mul_mat_vec_base.glsl} +77 -29
  734. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iface.glsl +35 -0
  735. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +71 -21
  736. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +41 -25
  737. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +2 -2
  738. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +44 -26
  739. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +2 -2
  740. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +2 -2
  741. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +2 -2
  742. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +9 -7
  743. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +9 -7
  744. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +4 -6
  745. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +2 -2
  746. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +4 -6
  747. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +4 -6
  748. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +2 -2
  749. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +39 -36
  750. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq_funcs.glsl +494 -0
  751. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +88 -105
  752. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +41 -26
  753. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{mul_mm_funcs.comp → mul_mm_funcs.glsl} +69 -59
  754. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_id_funcs.glsl +74 -0
  755. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +92 -230
  756. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +454 -0
  757. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_shmem_types.glsl +78 -0
  758. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +97 -13
  759. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/neg.comp +20 -0
  760. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +2 -2
  761. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +2 -2
  762. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp +1 -1
  763. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +21 -6
  764. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +1 -1
  765. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +10 -10
  766. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +2 -2
  767. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +2 -2
  768. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +2 -2
  769. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +2 -2
  770. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +49 -4
  771. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +2 -2
  772. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp +2 -2
  773. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +2 -2
  774. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +207 -0
  775. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.glsl +20 -0
  776. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +8 -49
  777. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +8 -32
  778. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +8 -32
  779. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +33 -0
  780. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +8 -38
  781. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/round.comp +29 -0
  782. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +2 -2
  783. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sgn.comp +21 -0
  784. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +2 -2
  785. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +2 -2
  786. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +2 -2
  787. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +2 -2
  788. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +1 -1
  789. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +2 -2
  790. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large1.comp +62 -0
  791. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large2.comp +79 -0
  792. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large3.comp +65 -0
  793. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large_common.glsl +53 -0
  794. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/softplus.comp +23 -0
  795. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/solve_tri.comp +81 -0
  796. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp +2 -2
  797. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +2 -2
  798. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +50 -0
  799. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_scan.comp +124 -0
  800. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/step.comp +22 -0
  801. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +2 -2
  802. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +2 -25
  803. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.glsl +25 -0
  804. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +2 -2
  805. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp +2 -2
  806. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +2 -2
  807. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +1 -1
  808. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_argsort.comp +118 -0
  809. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_moe.comp +213 -0
  810. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_nary_search.comp +246 -0
  811. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tri.comp +43 -0
  812. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/trunc.comp +22 -0
  813. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{types.comp → types.glsl} +345 -26
  814. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +90 -12
  815. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +384 -180
  816. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/xielu.comp +35 -0
  817. data/ext/sources/ggml/src/ggml-webgpu/CMakeLists.txt +28 -2
  818. data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +1374 -0
  819. data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu.cpp +2544 -726
  820. data/ext/sources/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
  821. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
  822. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
  823. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
  824. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +141 -0
  825. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +65 -72
  826. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/concat.wgsl +75 -0
  827. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +107 -0
  828. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
  829. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +73 -15
  830. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +636 -0
  831. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{get_rows.tmpl.wgsl → get_rows.wgsl} +53 -259
  832. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
  833. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{mul_mat.tmpl.wgsl → mul_mat.wgsl} +72 -261
  834. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +766 -0
  835. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.wgsl +147 -0
  836. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.wgsl +196 -0
  837. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +480 -0
  838. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
  839. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/repeat.wgsl +67 -0
  840. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +83 -17
  841. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
  842. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/scale.wgsl +63 -0
  843. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +40 -12
  844. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
  845. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
  846. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +193 -0
  847. data/ext/sources/ggml/src/ggml-zdnn/ggml-zdnn.cpp +6 -1
  848. data/ext/sources/ggml/src/ggml-zendnn/CMakeLists.txt +91 -0
  849. data/ext/sources/ggml/src/ggml-zendnn/ggml-zendnn.cpp +469 -0
  850. data/ext/sources/ggml/src/ggml.c +590 -64
  851. data/ext/sources/ggml/src/gguf.cpp +229 -44
  852. data/ext/sources/include/whisper.h +1 -0
  853. data/ext/sources/src/CMakeLists.txt +3 -1
  854. data/ext/sources/src/whisper.cpp +106 -62
  855. data/ext/sources/tests/CMakeLists.txt +2 -2
  856. data/ext/sources/tests/test-vad-full.cpp +4 -2
  857. data/ext/sources/tests/test-vad.cpp +1 -1
  858. data/extsources.rb +1 -0
  859. data/lib/whisper/model/uri.rb +17 -18
  860. data/sig/whisper.rbs +162 -4
  861. data/test/test_context_params.rb +82 -0
  862. data/test/test_params.rb +16 -8
  863. data/test/test_segment.rb +0 -1
  864. data/test/test_token.rb +81 -0
  865. data/test/test_vad.rb +1 -1
  866. data/test/test_vad_context.rb +100 -0
  867. data/test/test_vad_segment.rb +19 -0
  868. data/test/test_vad_segments.rb +16 -0
  869. data/test/test_whisper.rb +27 -0
  870. data/whispercpp.gemspec +1 -1
  871. metadata +502 -37
  872. data/ext/sources/build-xcframework.sh +0 -571
  873. data/ext/sources/examples/talk-llama/llama-sampling.h +0 -32
  874. data/ext/sources/ggml/cmake/BuildTypes.cmake +0 -54
  875. data/ext/sources/ggml/src/ggml-cann/Doxyfile +0 -2579
  876. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +0 -105
  877. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +0 -55
  878. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add.tmpl.wgsl +0 -44
  879. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add_in_place.tmpl.wgsl +0 -41
  880. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary_head.tmpl +0 -45
  881. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +0 -60
  882. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul.tmpl.wgsl +0 -44
  883. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_in_place.tmpl.wgsl +0 -41
  884. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm_in_place.wgsl +0 -48
  885. /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_bfloat16_support.comp → feature-tests/bfloat16.comp} +0 -0
  886. /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat_support.comp → feature-tests/coopmat.comp} +0 -0
  887. /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat2_support.comp → feature-tests/coopmat2.comp} +0 -0
  888. /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_integer_dot_support.comp → feature-tests/integer_dot.comp} +0 -0
  889. /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{glu_main.comp → glu_main.glsl} +0 -0
  890. /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{rte.comp → rte.glsl} +0 -0
  891. /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{utils.comp → utils.glsl} +0 -0
@@ -53,13 +53,15 @@
53
53
 
54
54
  #define UNUSED GGML_UNUSED
55
55
 
56
+ // Needed for ggml_fp32_to_bf16_row()
57
+ #if defined(__AVX512BF16__)
56
58
  #if defined(_MSC_VER)
57
- #define m512bh(p) p
58
59
  #define m512i(p) p
59
60
  #else
60
- #define m512bh(p) (__m512bh)(p)
61
+ #include <immintrin.h>
61
62
  #define m512i(p) (__m512i)(p)
62
- #endif
63
+ #endif // defined(_MSC_VER)
64
+ #endif // defined(__AVX512BF16__)
63
65
 
64
66
  #if defined(__linux__) || \
65
67
  defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
@@ -124,6 +126,13 @@ static void ggml_print_backtrace_symbols(void) {
124
126
  int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
125
127
  backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
126
128
  }
129
+ #elif defined(__APPLE__)
130
+ #include <execinfo.h>
131
+ static void ggml_print_backtrace_symbols(void) {
132
+ void * trace[100];
133
+ int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
134
+ backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
135
+ }
127
136
  #else
128
137
  static void ggml_print_backtrace_symbols(void) {
129
138
  // platform not supported
@@ -135,6 +144,20 @@ void ggml_print_backtrace(void) {
135
144
  if (GGML_NO_BACKTRACE) {
136
145
  return;
137
146
  }
147
+ #if defined(__APPLE__)
148
+ // On macOS, fork+debugger attachment is problematic due to:
149
+ // 1. libdispatch "poisons" forked child processes
150
+ // 2. lldb has issues attaching to parent from forked child
151
+ // Use simple backtrace() instead to avoid Terminal.app crashes
152
+ const char * GGML_BACKTRACE_LLDB = getenv("GGML_BACKTRACE_LLDB");
153
+ if (!GGML_BACKTRACE_LLDB) {
154
+ fprintf(stderr, "WARNING: Using native backtrace. Set GGML_BACKTRACE_LLDB for more info.\n");
155
+ fprintf(stderr, "WARNING: GGML_BACKTRACE_LLDB may cause native MacOS Terminal.app to crash.\n");
156
+ fprintf(stderr, "See: https://github.com/ggml-org/llama.cpp/pull/17869\n");
157
+ ggml_print_backtrace_symbols();
158
+ return;
159
+ }
160
+ #endif
138
161
  #if defined(__linux__)
139
162
  FILE * f = fopen("/proc/self/status", "r");
140
163
  size_t size = 0;
@@ -695,6 +718,14 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
695
718
  .to_float = (ggml_to_float_t) dequantize_row_mxfp4,
696
719
  .from_float_ref = (ggml_from_float_t)quantize_row_mxfp4_ref,
697
720
  },
721
+ [GGML_TYPE_NVFP4] = {
722
+ .type_name = "nvfp4",
723
+ .blck_size = QK_NVFP4,
724
+ .type_size = sizeof(block_nvfp4),
725
+ .is_quantized = true,
726
+ .to_float = (ggml_to_float_t) dequantize_row_nvfp4,
727
+ .from_float_ref = (ggml_from_float_t)quantize_row_nvfp4_ref,
728
+ },
698
729
  [GGML_TYPE_Q2_K] = {
699
730
  .type_name = "q2_K",
700
731
  .blck_size = QK_K,
@@ -876,7 +907,8 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
876
907
  };
877
908
 
878
909
  const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type) {
879
- GGML_ASSERT(type < GGML_TYPE_COUNT);
910
+ assert(type >= 0);
911
+ assert(type < GGML_TYPE_COUNT);
880
912
  return &type_traits[type];
881
913
  }
882
914
 
@@ -935,6 +967,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
935
967
  "COS",
936
968
  "SUM",
937
969
  "SUM_ROWS",
970
+ "CUMSUM",
938
971
  "MEAN",
939
972
  "ARGMAX",
940
973
  "COUNT_EQUAL",
@@ -989,7 +1022,10 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
989
1022
  "ARANGE",
990
1023
  "TIMESTEP_EMBEDDING",
991
1024
  "ARGSORT",
1025
+ "TOP_K",
992
1026
  "LEAKY_RELU",
1027
+ "TRI",
1028
+ "FILL",
993
1029
 
994
1030
  "FLASH_ATTN_EXT",
995
1031
  "FLASH_ATTN_BACK",
@@ -1002,6 +1038,8 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
1002
1038
  "RWKV_WKV6",
1003
1039
  "GATED_LINEAR_ATTN",
1004
1040
  "RWKV_WKV7",
1041
+ "SOLVE_TRI",
1042
+ "GATED_DELTA_NET",
1005
1043
 
1006
1044
  "UNARY",
1007
1045
 
@@ -1019,7 +1057,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
1019
1057
  "GLU",
1020
1058
  };
1021
1059
 
1022
- static_assert(GGML_OP_COUNT == 90, "GGML_OP_COUNT != 90");
1060
+ static_assert(GGML_OP_COUNT == 96, "GGML_OP_COUNT != 96");
1023
1061
 
1024
1062
  static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
1025
1063
  "none",
@@ -1039,6 +1077,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
1039
1077
  "cos(x)",
1040
1078
  "Σx",
1041
1079
  "Σx_k",
1080
+ "cumsum(x)",
1042
1081
  "Σx/n",
1043
1082
  "argmax(x)",
1044
1083
  "count_equal(x)",
@@ -1093,7 +1132,10 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
1093
1132
  "arange(start, stop, step)",
1094
1133
  "timestep_embedding(timesteps, dim, max_period)",
1095
1134
  "argsort(x)",
1135
+ "top_k(x)",
1096
1136
  "leaky_relu(x)",
1137
+ "tri(x)",
1138
+ "fill(x, c)",
1097
1139
 
1098
1140
  "flash_attn_ext(x)",
1099
1141
  "flash_attn_back(x)",
@@ -1106,6 +1148,8 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
1106
1148
  "rwkv_wkv6(k, v, r, tf, td, s)",
1107
1149
  "gated_linear_attn(k, v, q, gate, s)",
1108
1150
  "rwkv_wkv7(r, w, k, v, a, b, s)",
1151
+ "A X = B, A triangular, solve X",
1152
+ "gated_delta_net(q, k, v, g, beta, s)",
1109
1153
 
1110
1154
  "unary(x)",
1111
1155
 
@@ -1123,7 +1167,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
1123
1167
  "glu(x)",
1124
1168
  };
1125
1169
 
1126
- static_assert(GGML_OP_COUNT == 90, "GGML_OP_COUNT != 90");
1170
+ static_assert(GGML_OP_COUNT == 96, "GGML_OP_COUNT != 96");
1127
1171
 
1128
1172
  static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
1129
1173
 
@@ -1142,11 +1186,17 @@ static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
1142
1186
  "HARDSWISH",
1143
1187
  "HARDSIGMOID",
1144
1188
  "EXP",
1189
+ "EXPM1",
1190
+ "SOFTPLUS",
1145
1191
  "GELU_ERF",
1192
+ "XIELU",
1193
+ "FLOOR",
1194
+ "CEIL",
1195
+ "ROUND",
1196
+ "TRUNC",
1146
1197
  };
1147
1198
 
1148
- static_assert(GGML_UNARY_OP_COUNT == 15, "GGML_UNARY_OP_COUNT != 15");
1149
-
1199
+ static_assert(GGML_UNARY_OP_COUNT == 22, "GGML_UNARY_OP_COUNT != 22");
1150
1200
 
1151
1201
  static const char * GGML_GLU_OP_NAME[GGML_GLU_OP_COUNT] = {
1152
1202
  "REGLU",
@@ -1226,27 +1276,39 @@ size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) {
1226
1276
  }
1227
1277
 
1228
1278
  int64_t ggml_blck_size(enum ggml_type type) {
1279
+ assert(type >= 0);
1280
+ assert(type < GGML_TYPE_COUNT);
1229
1281
  return type_traits[type].blck_size;
1230
1282
  }
1231
1283
 
1232
1284
  size_t ggml_type_size(enum ggml_type type) {
1285
+ assert(type >= 0);
1286
+ assert(type < GGML_TYPE_COUNT);
1233
1287
  return type_traits[type].type_size;
1234
1288
  }
1235
1289
 
1236
1290
  size_t ggml_row_size(enum ggml_type type, int64_t ne) {
1291
+ assert(type >= 0);
1292
+ assert(type < GGML_TYPE_COUNT);
1237
1293
  assert(ne % ggml_blck_size(type) == 0);
1238
1294
  return ggml_type_size(type)*ne/ggml_blck_size(type);
1239
1295
  }
1240
1296
 
1241
1297
  double ggml_type_sizef(enum ggml_type type) {
1298
+ assert(type >= 0);
1299
+ assert(type < GGML_TYPE_COUNT);
1242
1300
  return ((double)(type_traits[type].type_size))/type_traits[type].blck_size;
1243
1301
  }
1244
1302
 
1245
1303
  const char * ggml_type_name(enum ggml_type type) {
1246
- return type < GGML_TYPE_COUNT ? type_traits[type].type_name : "NONE";
1304
+ assert(type >= 0);
1305
+ assert(type < GGML_TYPE_COUNT);
1306
+ return type_traits[type].type_name;
1247
1307
  }
1248
1308
 
1249
1309
  bool ggml_is_quantized(enum ggml_type type) {
1310
+ assert(type >= 0);
1311
+ assert(type < GGML_TYPE_COUNT);
1250
1312
  return type_traits[type].is_quantized;
1251
1313
  }
1252
1314
 
@@ -1326,6 +1388,7 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
1326
1388
  case GGML_FTYPE_MOSTLY_Q5_1: wtype = GGML_TYPE_Q5_1; break;
1327
1389
  case GGML_FTYPE_MOSTLY_Q8_0: wtype = GGML_TYPE_Q8_0; break;
1328
1390
  case GGML_FTYPE_MOSTLY_MXFP4: wtype = GGML_TYPE_MXFP4; break;
1391
+ case GGML_FTYPE_MOSTLY_NVFP4: wtype = GGML_TYPE_NVFP4; break;
1329
1392
  case GGML_FTYPE_MOSTLY_Q2_K: wtype = GGML_TYPE_Q2_K; break;
1330
1393
  case GGML_FTYPE_MOSTLY_Q3_K: wtype = GGML_TYPE_Q3_K; break;
1331
1394
  case GGML_FTYPE_MOSTLY_Q4_K: wtype = GGML_TYPE_Q4_K; break;
@@ -1364,16 +1427,14 @@ static bool ggml_is_contiguous_n(const struct ggml_tensor * tensor, int n) {
1364
1427
  }
1365
1428
  next_nb *= tensor->ne[0]/ggml_blck_size(tensor->type);
1366
1429
  for (int i = 1; i < GGML_MAX_DIMS; i++) {
1367
- if (tensor->ne[i] != 1) {
1368
- if (i > n) {
1369
- if (tensor->nb[i] != next_nb) {
1370
- return false;
1371
- }
1372
- next_nb *= tensor->ne[i];
1373
- } else {
1374
- // this dimension does not need to be contiguous
1375
- next_nb = tensor->ne[i]*tensor->nb[i];
1430
+ if (i > n) {
1431
+ if (tensor->ne[i] != 1 && tensor->nb[i] != next_nb) {
1432
+ return false;
1376
1433
  }
1434
+ next_nb *= tensor->ne[i];
1435
+ } else {
1436
+ // this dimension does not need to be contiguous
1437
+ next_nb = tensor->ne[i]*tensor->nb[i];
1377
1438
  }
1378
1439
  }
1379
1440
  return true;
@@ -1457,6 +1518,10 @@ bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tenso
1457
1518
  (t0->nb[3] == t1->nb[3]);
1458
1519
  }
1459
1520
 
1521
+ bool ggml_is_view(const struct ggml_tensor * t) {
1522
+ return ggml_impl_is_view(t);
1523
+ }
1524
+
1460
1525
  // check if t1 can be represented as a repetition of t0
1461
1526
  bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
1462
1527
  static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
@@ -1586,11 +1651,23 @@ static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml
1586
1651
  const size_t cur_end = cur_offs + cur_size;
1587
1652
 
1588
1653
  // align to GGML_MEM_ALIGN
1654
+ GGML_ASSERT(size <= SIZE_MAX - (GGML_MEM_ALIGN - 1));
1589
1655
  size_t size_needed = GGML_PAD(size, GGML_MEM_ALIGN);
1590
1656
 
1591
1657
  char * const mem_buffer = ctx->mem_buffer;
1592
1658
  struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);
1593
1659
 
1660
+ // integer overflow checks
1661
+ if (cur_end > SIZE_MAX - size_needed) {
1662
+ GGML_LOG_WARN("%s: overflow detected in cur_end (%zu) + size_needed (%zu)\n", __func__, cur_end, size_needed);
1663
+ return NULL;
1664
+ }
1665
+ if (cur_end + size_needed > SIZE_MAX - GGML_OBJECT_SIZE) {
1666
+ GGML_LOG_WARN("%s: overflow detected in cur_end (%zu) + size_needed (%zu) + GGML_OBJECT_SIZE (%zu)\n", __func__,
1667
+ cur_end, size_needed, (size_t) GGML_OBJECT_SIZE);
1668
+ return NULL;
1669
+ }
1670
+
1594
1671
  if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
1595
1672
  GGML_LOG_WARN("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
1596
1673
  __func__, cur_end + size_needed + GGML_OBJECT_SIZE, ctx->mem_size);
@@ -1659,6 +1736,8 @@ static struct ggml_tensor * ggml_new_tensor_impl(
1659
1736
  obj_alloc_size = data_size;
1660
1737
  }
1661
1738
 
1739
+ GGML_ASSERT(GGML_TENSOR_SIZE <= SIZE_MAX - obj_alloc_size);
1740
+
1662
1741
  struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
1663
1742
  GGML_ASSERT(obj_new);
1664
1743
 
@@ -2254,6 +2333,30 @@ struct ggml_tensor * ggml_log_inplace(
2254
2333
  return ggml_log_impl(ctx, a, true);
2255
2334
  }
2256
2335
 
2336
+ struct ggml_tensor * ggml_expm1(
2337
+ struct ggml_context * ctx,
2338
+ struct ggml_tensor * a) {
2339
+ return ggml_unary(ctx, a, GGML_UNARY_OP_EXPM1);
2340
+ }
2341
+
2342
+ struct ggml_tensor * ggml_expm1_inplace(
2343
+ struct ggml_context * ctx,
2344
+ struct ggml_tensor * a) {
2345
+ return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_EXPM1);
2346
+ }
2347
+
2348
+ struct ggml_tensor * ggml_softplus(
2349
+ struct ggml_context * ctx,
2350
+ struct ggml_tensor * a) {
2351
+ return ggml_unary(ctx, a, GGML_UNARY_OP_SOFTPLUS);
2352
+ }
2353
+
2354
+ struct ggml_tensor * ggml_softplus_inplace(
2355
+ struct ggml_context * ctx,
2356
+ struct ggml_tensor * a) {
2357
+ return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SOFTPLUS);
2358
+ }
2359
+
2257
2360
  // ggml_sin
2258
2361
 
2259
2362
  static struct ggml_tensor * ggml_sin_impl(
@@ -2337,6 +2440,21 @@ struct ggml_tensor * ggml_sum_rows(
2337
2440
  return result;
2338
2441
  }
2339
2442
 
2443
+ // ggml_cumsum
2444
+
2445
+ struct ggml_tensor * ggml_cumsum(
2446
+ struct ggml_context * ctx,
2447
+ struct ggml_tensor * a) {
2448
+ GGML_ASSERT(a->type == GGML_TYPE_F32);
2449
+
2450
+ struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
2451
+
2452
+ result->op = GGML_OP_CUMSUM;
2453
+ result->src[0] = a;
2454
+
2455
+ return result;
2456
+ }
2457
+
2340
2458
  // ggml_mean
2341
2459
 
2342
2460
  struct ggml_tensor * ggml_mean(
@@ -2652,6 +2770,29 @@ struct ggml_tensor * ggml_silu_inplace(
2652
2770
  return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SILU);
2653
2771
  }
2654
2772
 
2773
+ // ggml_xielu
2774
+
2775
+ struct ggml_tensor * ggml_xielu(
2776
+ struct ggml_context * ctx,
2777
+ struct ggml_tensor * a,
2778
+ float alpha_n,
2779
+ float alpha_p,
2780
+ float beta,
2781
+ float eps) {
2782
+ struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
2783
+
2784
+ ggml_set_op_params_i32(result, 0, (int32_t) GGML_UNARY_OP_XIELU);
2785
+ ggml_set_op_params_f32(result, 1, beta + ggml_compute_softplus_f32(alpha_n));
2786
+ ggml_set_op_params_f32(result, 2, ggml_compute_softplus_f32(alpha_p));
2787
+ ggml_set_op_params_f32(result, 3, beta);
2788
+ ggml_set_op_params_f32(result, 4, eps);
2789
+
2790
+ result->op = GGML_OP_UNARY;
2791
+ result->src[0] = a;
2792
+
2793
+ return result;
2794
+ }
2795
+
2655
2796
  // ggml_silu_back
2656
2797
 
2657
2798
  struct ggml_tensor * ggml_silu_back(
@@ -2726,6 +2867,62 @@ static struct ggml_tensor * ggml_glu_impl(
2726
2867
  return result;
2727
2868
  }
2728
2869
 
2870
+ // ggml_floor
2871
+
2872
+ struct ggml_tensor * ggml_floor(
2873
+ struct ggml_context * ctx,
2874
+ struct ggml_tensor * a) {
2875
+ return ggml_unary(ctx, a, GGML_UNARY_OP_FLOOR);
2876
+ }
2877
+
2878
+ struct ggml_tensor * ggml_floor_inplace(
2879
+ struct ggml_context * ctx,
2880
+ struct ggml_tensor * a) {
2881
+ return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_FLOOR);
2882
+ }
2883
+
2884
+ // ggml_ceil
2885
+
2886
+ struct ggml_tensor * ggml_ceil(
2887
+ struct ggml_context * ctx,
2888
+ struct ggml_tensor * a) {
2889
+ return ggml_unary(ctx, a, GGML_UNARY_OP_CEIL);
2890
+ }
2891
+
2892
+ struct ggml_tensor * ggml_ceil_inplace(
2893
+ struct ggml_context * ctx,
2894
+ struct ggml_tensor * a) {
2895
+ return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_CEIL);
2896
+ }
2897
+
2898
+ //ggml_round
2899
+
2900
+ struct ggml_tensor * ggml_round(
2901
+ struct ggml_context * ctx,
2902
+ struct ggml_tensor * a) {
2903
+ return ggml_unary(ctx, a, GGML_UNARY_OP_ROUND);
2904
+ }
2905
+
2906
+ struct ggml_tensor * ggml_round_inplace(
2907
+ struct ggml_context * ctx,
2908
+ struct ggml_tensor * a) {
2909
+ return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ROUND);
2910
+ }
2911
+
2912
+ //ggml_trunc
2913
+
2914
+ struct ggml_tensor * ggml_trunc(
2915
+ struct ggml_context * ctx,
2916
+ struct ggml_tensor * a) {
2917
+ return ggml_unary(ctx, a, GGML_UNARY_OP_TRUNC);
2918
+ }
2919
+
2920
+ struct ggml_tensor * ggml_trunc_inplace(
2921
+ struct ggml_context * ctx,
2922
+ struct ggml_tensor * a) {
2923
+ return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TRUNC);
2924
+ }
2925
+
2729
2926
  struct ggml_tensor * ggml_glu(
2730
2927
  struct ggml_context * ctx,
2731
2928
  struct ggml_tensor * a,
@@ -3284,7 +3481,8 @@ struct ggml_tensor * ggml_cast(
3284
3481
 
3285
3482
  result->op = GGML_OP_CPY;
3286
3483
  result->src[0] = a;
3287
- result->src[1] = result;
3484
+ result->src[1] = result; // note: this self-reference might seem redundant, but it's actually needed by some
3485
+ // backends for consistency with ggml_cpy_impl() above
3288
3486
 
3289
3487
  return result;
3290
3488
  }
@@ -3829,6 +4027,15 @@ struct ggml_tensor * ggml_soft_max_ext(
3829
4027
  return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, false);
3830
4028
  }
3831
4029
 
4030
+ struct ggml_tensor * ggml_soft_max_ext_inplace(
4031
+ struct ggml_context * ctx,
4032
+ struct ggml_tensor * a,
4033
+ struct ggml_tensor * mask,
4034
+ float scale,
4035
+ float max_bias) {
4036
+ return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, true);
4037
+ }
4038
+
3832
4039
  void ggml_soft_max_add_sinks(
3833
4040
  struct ggml_tensor * a,
3834
4041
  struct ggml_tensor * sinks) {
@@ -4672,6 +4879,8 @@ struct ggml_tensor * ggml_pool_1d(
4672
4879
  a->ne[2],
4673
4880
  a->ne[3],
4674
4881
  };
4882
+ GGML_ASSERT(ne[0] > 0);
4883
+
4675
4884
  struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4676
4885
 
4677
4886
  int32_t params[] = { op, k0, s0, p0 };
@@ -4702,6 +4911,9 @@ struct ggml_tensor * ggml_pool_2d(
4702
4911
  a->ne[2],
4703
4912
  a->ne[3],
4704
4913
  };
4914
+ GGML_ASSERT(ne[0] > 0);
4915
+ GGML_ASSERT(ne[1] > 0);
4916
+
4705
4917
  result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4706
4918
 
4707
4919
  int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
@@ -4748,6 +4960,8 @@ static struct ggml_tensor * ggml_interpolate_impl(
4748
4960
  int64_t ne3,
4749
4961
  uint32_t mode) {
4750
4962
  GGML_ASSERT((mode & 0xFF) < GGML_SCALE_MODE_COUNT);
4963
+ // TODO: implement antialias for modes other than bilinear
4964
+ GGML_ASSERT(!(mode & GGML_SCALE_FLAG_ANTIALIAS) || (mode & 0xFF) == GGML_SCALE_MODE_BILINEAR);
4751
4965
 
4752
4966
  struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
4753
4967
 
@@ -4802,6 +5016,18 @@ struct ggml_tensor * ggml_pad(
4802
5016
  return ggml_pad_ext(ctx, a, 0, p0, 0, p1, 0, p2, 0, p3);
4803
5017
  }
4804
5018
 
5019
+ // ggml_pad_circular
5020
+
5021
+ struct ggml_tensor * ggml_pad_circular(
5022
+ struct ggml_context * ctx,
5023
+ struct ggml_tensor * a,
5024
+ int p0,
5025
+ int p1,
5026
+ int p2,
5027
+ int p3) {
5028
+ return ggml_pad_ext_circular(ctx, a, 0, p0, 0, p1, 0, p2, 0, p3);
5029
+ }
5030
+
4805
5031
  struct ggml_tensor * ggml_pad_ext(
4806
5032
  struct ggml_context * ctx,
4807
5033
  struct ggml_tensor * a,
@@ -4828,6 +5054,7 @@ struct ggml_tensor * ggml_pad_ext(
4828
5054
  ggml_set_op_params_i32(result, 5, rp2);
4829
5055
  ggml_set_op_params_i32(result, 6, lp3);
4830
5056
  ggml_set_op_params_i32(result, 7, rp3);
5057
+ ggml_set_op_params_i32(result, 8, 0); // not circular by default
4831
5058
 
4832
5059
 
4833
5060
  result->op = GGML_OP_PAD;
@@ -4836,6 +5063,25 @@ struct ggml_tensor * ggml_pad_ext(
4836
5063
  return result;
4837
5064
  }
4838
5065
 
5066
+ // ggml_pad_ext_circular
5067
+
5068
+ struct ggml_tensor * ggml_pad_ext_circular(
5069
+ struct ggml_context * ctx,
5070
+ struct ggml_tensor * a,
5071
+ int lp0,
5072
+ int rp0,
5073
+ int lp1,
5074
+ int rp1,
5075
+ int lp2,
5076
+ int rp2,
5077
+ int lp3,
5078
+ int rp3
5079
+ ) {
5080
+ struct ggml_tensor * result = ggml_pad_ext(ctx, a, lp0, rp0, lp1, rp1, lp2, rp2, lp3, rp3);
5081
+ ggml_set_op_params_i32(result, 8, 1); // circular
5082
+ return result;
5083
+ }
5084
+
4839
5085
  // ggml_pad_reflect_1d
4840
5086
 
4841
5087
  struct ggml_tensor * ggml_pad_reflect_1d(
@@ -4895,28 +5141,6 @@ struct ggml_tensor * ggml_roll(
4895
5141
  return result;
4896
5142
  }
4897
5143
 
4898
- // ggml_arange
4899
-
4900
- struct ggml_tensor * ggml_arange(
4901
- struct ggml_context * ctx,
4902
- float start,
4903
- float stop,
4904
- float step) {
4905
- GGML_ASSERT(stop > start);
4906
-
4907
- const int64_t steps = (int64_t) ceilf((stop - start) / step);
4908
-
4909
- struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, steps);
4910
-
4911
- ggml_set_op_params_f32(result, 0, start);
4912
- ggml_set_op_params_f32(result, 1, stop);
4913
- ggml_set_op_params_f32(result, 2, step);
4914
-
4915
- result->op = GGML_OP_ARANGE;
4916
-
4917
- return result;
4918
- }
4919
-
4920
5144
  // ggml_timestep_embedding
4921
5145
 
4922
5146
  struct ggml_tensor * ggml_timestep_embedding(
@@ -4936,6 +5160,61 @@ struct ggml_tensor * ggml_timestep_embedding(
4936
5160
  return result;
4937
5161
  }
4938
5162
 
5163
+ // ggml_tri
5164
+
5165
+ struct ggml_tensor * ggml_tri(
5166
+ struct ggml_context * ctx,
5167
+ struct ggml_tensor * a,
5168
+ enum ggml_tri_type type) {
5169
+ GGML_ASSERT(a->type == GGML_TYPE_F32);
5170
+
5171
+ GGML_ASSERT(ggml_is_contiguous(a));
5172
+ GGML_ASSERT(a->ne[0] == a->ne[1]);
5173
+
5174
+ struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
5175
+
5176
+ ggml_set_op_params_i32(result, 0, type);
5177
+
5178
+ result->op = GGML_OP_TRI;
5179
+ result->src[0] = a;
5180
+
5181
+ return result;
5182
+ }
5183
+
5184
+ // ggml_fill
5185
+
5186
+ static struct ggml_tensor * ggml_fill_impl(
5187
+ struct ggml_context * ctx,
5188
+ struct ggml_tensor * a,
5189
+ float c,
5190
+ bool inplace) {
5191
+ GGML_ASSERT(a->type == GGML_TYPE_F32);
5192
+ GGML_ASSERT(ggml_is_contiguous(a));
5193
+
5194
+ struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5195
+
5196
+ ggml_set_op_params_f32(result, 0, c);
5197
+
5198
+ result->op = GGML_OP_FILL;
5199
+ result->src[0] = a;
5200
+
5201
+ return result;
5202
+ }
5203
+
5204
+ struct ggml_tensor * ggml_fill(
5205
+ struct ggml_context * ctx,
5206
+ struct ggml_tensor * a,
5207
+ float c) {
5208
+ return ggml_fill_impl(ctx, a, c, false);
5209
+ }
5210
+
5211
+ struct ggml_tensor * ggml_fill_inplace(
5212
+ struct ggml_context * ctx,
5213
+ struct ggml_tensor * a,
5214
+ float c) {
5215
+ return ggml_fill_impl(ctx, a, c, true);
5216
+ }
5217
+
4939
5218
  // ggml_argsort
4940
5219
 
4941
5220
  struct ggml_tensor * ggml_argsort(
@@ -4943,6 +5222,7 @@ struct ggml_tensor * ggml_argsort(
4943
5222
  struct ggml_tensor * a,
4944
5223
  enum ggml_sort_order order) {
4945
5224
  GGML_ASSERT(a->ne[0] <= INT32_MAX);
5225
+
4946
5226
  struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, GGML_MAX_DIMS, a->ne);
4947
5227
 
4948
5228
  ggml_set_op_params_i32(result, 0, (int32_t) order);
@@ -4953,9 +5233,9 @@ struct ggml_tensor * ggml_argsort(
4953
5233
  return result;
4954
5234
  }
4955
5235
 
4956
- // ggml_top_k
5236
+ // ggml_argsort_top_k
4957
5237
 
4958
- struct ggml_tensor * ggml_top_k(
5238
+ struct ggml_tensor * ggml_argsort_top_k(
4959
5239
  struct ggml_context * ctx,
4960
5240
  struct ggml_tensor * a,
4961
5241
  int k) {
@@ -4971,6 +5251,44 @@ struct ggml_tensor * ggml_top_k(
4971
5251
  return result;
4972
5252
  }
4973
5253
 
5254
+ // ggml_top_k
5255
+
5256
+ struct ggml_tensor * ggml_top_k(
5257
+ struct ggml_context * ctx,
5258
+ struct ggml_tensor * a,
5259
+ int k) {
5260
+ GGML_ASSERT(a->ne[0] >= k);
5261
+
5262
+ struct ggml_tensor * result = ggml_new_tensor_4d(ctx, GGML_TYPE_I32, k, a->ne[1], a->ne[2], a->ne[3]);
5263
+
5264
+ result->op = GGML_OP_TOP_K;
5265
+ result->src[0] = a;
5266
+
5267
+ return result;
5268
+ }
5269
+
5270
+ // ggml_arange
5271
+
5272
+ struct ggml_tensor * ggml_arange(
5273
+ struct ggml_context * ctx,
5274
+ float start,
5275
+ float stop,
5276
+ float step) {
5277
+ GGML_ASSERT(stop > start);
5278
+
5279
+ const int64_t steps = (int64_t) ceilf((stop - start) / step);
5280
+
5281
+ struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, steps);
5282
+
5283
+ ggml_set_op_params_f32(result, 0, start);
5284
+ ggml_set_op_params_f32(result, 1, stop);
5285
+ ggml_set_op_params_f32(result, 2, step);
5286
+
5287
+ result->op = GGML_OP_ARANGE;
5288
+
5289
+ return result;
5290
+ }
5291
+
4974
5292
  // ggml_flash_attn_ext
4975
5293
 
4976
5294
  struct ggml_tensor * ggml_flash_attn_ext(
@@ -4990,8 +5308,6 @@ struct ggml_tensor * ggml_flash_attn_ext(
4990
5308
 
4991
5309
  if (mask) {
4992
5310
  GGML_ASSERT(ggml_is_contiguous(mask));
4993
- GGML_ASSERT(mask->ne[1] >= GGML_PAD(q->ne[1], GGML_KQ_MASK_PAD) &&
4994
- "the Flash-Attention kernel requires the mask to be padded to GGML_KQ_MASK_PAD and at least n_queries big");
4995
5311
  //GGML_ASSERT(ggml_can_repeat_rows(mask, qk));
4996
5312
 
4997
5313
  GGML_ASSERT(q->ne[2] % mask->ne[2] == 0);
@@ -5473,7 +5789,7 @@ static struct ggml_tensor * ggml_unary_impl(
5473
5789
  struct ggml_tensor * a,
5474
5790
  enum ggml_unary_op op,
5475
5791
  bool inplace) {
5476
- GGML_ASSERT(ggml_is_contiguous_1(a));
5792
+ GGML_ASSERT(ggml_is_contiguous_rows(a));
5477
5793
 
5478
5794
  struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5479
5795
 
@@ -5790,6 +6106,92 @@ struct ggml_tensor * ggml_opt_step_sgd(
5790
6106
  return result;
5791
6107
  }
5792
6108
 
6109
+ // solve_tri
6110
+
6111
+ struct ggml_tensor * ggml_solve_tri(
6112
+ struct ggml_context * ctx,
6113
+ struct ggml_tensor * a,
6114
+ struct ggml_tensor * b,
6115
+ bool left,
6116
+ bool lower,
6117
+ bool uni) {
6118
+ GGML_ASSERT(a->type == GGML_TYPE_F32);
6119
+ GGML_ASSERT(b->type == GGML_TYPE_F32);
6120
+
6121
+ // A must be square and lower diagonal
6122
+ GGML_ASSERT(a->ne[0] == a->ne[1]);
6123
+ // B must have same outer dimension as A
6124
+ GGML_ASSERT(a->ne[1] == b->ne[1]);
6125
+
6126
+ // batch dimensions must be equal
6127
+ GGML_ASSERT(a->ne[2] == b->ne[2]);
6128
+ GGML_ASSERT(a->ne[3] == b->ne[3]);
6129
+
6130
+ GGML_ASSERT(ggml_is_contiguous(a));
6131
+ GGML_ASSERT(ggml_is_contiguous(b));
6132
+
6133
+ GGML_ASSERT(lower && left && !uni); // TODO: support other variants
6134
+
6135
+ struct ggml_tensor * result = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, b->ne[0], b->ne[1], b->ne[2], b->ne[3]);
6136
+
6137
+ result->op = GGML_OP_SOLVE_TRI;
6138
+ result->src[0] = a;
6139
+ result->src[1] = b;
6140
+
6141
+ return result;
6142
+ }
6143
+
6144
+ // ggml_gated_delta_net
6145
+
6146
+ struct ggml_tensor * ggml_gated_delta_net(
6147
+ struct ggml_context * ctx,
6148
+ struct ggml_tensor * q,
6149
+ struct ggml_tensor * k,
6150
+ struct ggml_tensor * v,
6151
+ struct ggml_tensor * g,
6152
+ struct ggml_tensor * beta,
6153
+ struct ggml_tensor * state) {
6154
+ GGML_ASSERT(ggml_is_contiguous_rows(q));
6155
+ GGML_ASSERT(ggml_is_contiguous_rows(k));
6156
+ GGML_ASSERT(ggml_is_contiguous_rows(v));
6157
+ GGML_ASSERT(ggml_is_contiguous(g));
6158
+ GGML_ASSERT(ggml_is_contiguous(beta));
6159
+ GGML_ASSERT(ggml_is_contiguous(state));
6160
+
6161
+ GGML_ASSERT(q->type == GGML_TYPE_F32);
6162
+ GGML_ASSERT(k->type == GGML_TYPE_F32);
6163
+ GGML_ASSERT(v->type == GGML_TYPE_F32);
6164
+ GGML_ASSERT(g->type == GGML_TYPE_F32);
6165
+ GGML_ASSERT(beta->type == GGML_TYPE_F32);
6166
+ GGML_ASSERT(state->type == GGML_TYPE_F32);
6167
+
6168
+ const int64_t S_v = v->ne[0];
6169
+ const int64_t H = v->ne[1];
6170
+ const int64_t n_tokens = v->ne[2];
6171
+ const int64_t n_seqs = v->ne[3];
6172
+
6173
+ // gate: scalar [1, H, T, B] or vector [S_v, H, T, B] (KDA)
6174
+ GGML_ASSERT(g->ne[0] == 1 || g->ne[0] == S_v);
6175
+ GGML_ASSERT(beta->ne[0] == 1);
6176
+
6177
+ GGML_ASSERT(ggml_nelements(state) == S_v * S_v * H * n_seqs);
6178
+
6179
+ // concat output and new_state into a single tensor
6180
+ // output: S_v * H * n_tokens * n_seqs, state: S_v * S_v * H * n_seqs
6181
+ const int64_t ne[4] = { S_v * H, n_tokens * n_seqs + S_v * n_seqs, 1, 1 };
6182
+ struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
6183
+
6184
+ result->op = GGML_OP_GATED_DELTA_NET;
6185
+ result->src[0] = q;
6186
+ result->src[1] = k;
6187
+ result->src[2] = v;
6188
+ result->src[3] = g;
6189
+ result->src[4] = beta;
6190
+ result->src[5] = state;
6191
+
6192
+ return result;
6193
+ }
6194
+
5793
6195
  ////////////////////////////////////////////////////////////////////////////////
5794
6196
 
5795
6197
  struct ggml_hash_set ggml_hash_set_new(size_t size) {
@@ -6251,7 +6653,7 @@ static void ggml_compute_backward(
6251
6653
  case GGML_OP_DIAG_MASK_INF: {
6252
6654
  if (src0_needs_grads) {
6253
6655
  /* ggml_diag_mask_inf_impl() shouldn't be here */
6254
- /* ref: https://github.com/ggerganov/llama.cpp/pull/4203#discussion_r1412377992 */
6656
+ /* ref: https://github.com/ggml-org/llama.cpp/pull/4203#discussion_r1412377992 */
6255
6657
  const int n_past = ((const int32_t *) tensor->op_params)[0];
6256
6658
  ggml_add_or_set(ctx, cgraph, isrc0, ggml_diag_mask_zero_impl(ctx, grad, n_past, false));
6257
6659
  }
@@ -6362,6 +6764,16 @@ static void ggml_compute_backward(
6362
6764
  ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, tensor, grad));
6363
6765
  }
6364
6766
  } break;
6767
+ case GGML_UNARY_OP_EXPM1: {
6768
+ if (src0_needs_grads) {
6769
+ ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_exp(ctx, src0)));
6770
+ }
6771
+ } break;
6772
+ case GGML_UNARY_OP_SOFTPLUS: {
6773
+ if (src0_needs_grads) {
6774
+ ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_sigmoid(ctx, src0)));
6775
+ }
6776
+ } break;
6365
6777
  default: {
6366
6778
  fprintf(stderr, "%s: unsupported unary op for backward pass: %s\n",
6367
6779
  __func__, ggml_unary_op_name(ggml_get_unary_op(tensor)));
@@ -6405,20 +6817,35 @@ static void ggml_compute_backward(
6405
6817
  GGML_ASSERT(!src2_needs_grads || ggml_are_same_shape(src2, cgraph->grads[isrc2]));
6406
6818
  }
6407
6819
 
6408
- static size_t ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * node) {
6409
- // check if already visited
6410
- size_t node_hash_pos = ggml_hash_find(&cgraph->visited_hash_set, node);
6820
+ static size_t ggml_visit_parents_graph(struct ggml_cgraph * cgraph, struct ggml_tensor * node, bool compute) {
6821
+ if (node->op != GGML_OP_NONE && compute) {
6822
+ node->flags |= GGML_TENSOR_FLAG_COMPUTE;
6823
+ }
6824
+
6825
+ const size_t node_hash_pos = ggml_hash_find(&cgraph->visited_hash_set, node);
6411
6826
  GGML_ASSERT(node_hash_pos != GGML_HASHSET_FULL);
6412
- if (!ggml_bitset_get(cgraph->visited_hash_set.used, node_hash_pos)) {
6413
- // This is the first time we see this node in the current graph.
6414
- cgraph->visited_hash_set.keys[node_hash_pos] = node;
6415
- ggml_bitset_set(cgraph->visited_hash_set.used, node_hash_pos);
6416
- cgraph->use_counts[node_hash_pos] = 0;
6417
- } else {
6827
+
6828
+ if (ggml_bitset_get(cgraph->visited_hash_set.used, node_hash_pos)) {
6418
6829
  // already visited
6830
+
6831
+ if (compute) {
6832
+ // update the compute flag regardless
6833
+ for (int i = 0; i < GGML_MAX_SRC; ++i) {
6834
+ struct ggml_tensor * src = node->src[i];
6835
+ if (src && ((src->flags & GGML_TENSOR_FLAG_COMPUTE) == 0)) {
6836
+ ggml_visit_parents_graph(cgraph, src, true);
6837
+ }
6838
+ }
6839
+ }
6840
+
6419
6841
  return node_hash_pos;
6420
6842
  }
6421
6843
 
6844
+ // This is the first time we see this node in the current graph.
6845
+ cgraph->visited_hash_set.keys[node_hash_pos] = node;
6846
+ ggml_bitset_set(cgraph->visited_hash_set.used, node_hash_pos);
6847
+ cgraph->use_counts[node_hash_pos] = 0;
6848
+
6422
6849
  for (int i = 0; i < GGML_MAX_SRC; ++i) {
6423
6850
  const int k =
6424
6851
  (cgraph->order == GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT) ? i :
@@ -6427,7 +6854,7 @@ static size_t ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor
6427
6854
 
6428
6855
  struct ggml_tensor * src = node->src[k];
6429
6856
  if (src) {
6430
- size_t src_hash_pos = ggml_visit_parents(cgraph, src);
6857
+ const size_t src_hash_pos = ggml_visit_parents_graph(cgraph, src, compute);
6431
6858
 
6432
6859
  // Update the use count for this operand.
6433
6860
  cgraph->use_counts[src_hash_pos]++;
@@ -6458,17 +6885,17 @@ static size_t ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor
6458
6885
  return node_hash_pos;
6459
6886
  }
6460
6887
 
6461
- static void ggml_build_forward_impl(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor, bool expand) {
6888
+ static void ggml_build_forward_impl(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor, bool expand, bool compute) {
6462
6889
  if (!expand) {
6463
6890
  // TODO: this branch isn't accessible anymore, maybe move this to ggml_build_forward_expand
6464
6891
  ggml_graph_clear(cgraph);
6465
6892
  }
6466
6893
 
6467
- const int n0 = cgraph->n_nodes;
6894
+ const int n_old = cgraph->n_nodes;
6468
6895
 
6469
- ggml_visit_parents(cgraph, tensor);
6896
+ ggml_visit_parents_graph(cgraph, tensor, compute);
6470
6897
 
6471
- const int n_new = cgraph->n_nodes - n0;
6898
+ const int n_new = cgraph->n_nodes - n_old;
6472
6899
  GGML_PRINT_DEBUG("%s: visited %d new nodes\n", __func__, n_new);
6473
6900
 
6474
6901
  if (n_new > 0) {
@@ -6477,8 +6904,22 @@ static void ggml_build_forward_impl(struct ggml_cgraph * cgraph, struct ggml_ten
6477
6904
  }
6478
6905
  }
6479
6906
 
6907
+ struct ggml_tensor * ggml_build_forward_select(
6908
+ struct ggml_cgraph * cgraph,
6909
+ struct ggml_tensor ** tensors,
6910
+ int n_tensors,
6911
+ int idx) {
6912
+ GGML_ASSERT(idx >= 0 && idx < n_tensors);
6913
+
6914
+ for (int i = 0; i < n_tensors; i++) {
6915
+ ggml_build_forward_impl(cgraph, tensors[i], true, i == idx ? true : false);
6916
+ }
6917
+
6918
+ return tensors[idx];
6919
+ }
6920
+
6480
6921
  void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor) {
6481
- ggml_build_forward_impl(cgraph, tensor, true);
6922
+ ggml_build_forward_impl(cgraph, tensor, true, true);
6482
6923
  }
6483
6924
 
6484
6925
  void ggml_build_backward_expand(
@@ -6872,6 +7313,82 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
6872
7313
  GGML_LOG_INFO("========================================\n");
6873
7314
  }
6874
7315
 
7316
+ static int ggml_node_list_find_tensor(const struct ggml_cgraph * cgraph,
7317
+ const int * idxs,
7318
+ int count,
7319
+ const struct ggml_tensor * tensor) {
7320
+ GGML_ASSERT(cgraph && idxs);
7321
+ for (int i = 0; i < count; ++i) {
7322
+ const int node_idx = idxs[i];
7323
+
7324
+ if (node_idx >= cgraph->n_nodes) {
7325
+ return -1;
7326
+ }
7327
+ if (cgraph->nodes[node_idx] == tensor) {
7328
+ return i;
7329
+ }
7330
+ }
7331
+ return -1;
7332
+ }
7333
+
7334
+ bool ggml_can_fuse_subgraph_ext(const struct ggml_cgraph * cgraph,
7335
+ const int * node_idxs,
7336
+ int count,
7337
+ const enum ggml_op * ops,
7338
+ const int * outputs,
7339
+ int num_outputs) {
7340
+ GGML_ASSERT(outputs && num_outputs > 0);
7341
+
7342
+ for (int i = 0; i < count; ++i) {
7343
+ if (node_idxs[i] >= cgraph->n_nodes) {
7344
+ return false;
7345
+ }
7346
+
7347
+ const struct ggml_tensor * node = cgraph->nodes[node_idxs[i]];
7348
+
7349
+ if (node->op != ops[i]) {
7350
+ return false;
7351
+ }
7352
+
7353
+ if ((node->flags & GGML_TENSOR_FLAG_COMPUTE) == 0) {
7354
+ return false;
7355
+ }
7356
+
7357
+ if (ggml_node_list_find_tensor(cgraph, outputs, num_outputs, node) != -1) {
7358
+ continue;
7359
+ }
7360
+
7361
+ if (node->flags & GGML_TENSOR_FLAG_OUTPUT) {
7362
+ return false;
7363
+ }
7364
+
7365
+ int subgraph_uses = 0;
7366
+ for (int j = i + 1; j < count; ++j) {
7367
+ const struct ggml_tensor * other_node = cgraph->nodes[node_idxs[j]];
7368
+ for (int src_idx = 0; src_idx < GGML_MAX_SRC; src_idx++) {
7369
+ if (other_node->src[src_idx] == node) {
7370
+ subgraph_uses++;
7371
+ }
7372
+ }
7373
+ }
7374
+
7375
+ if (subgraph_uses != ggml_node_get_use_count(cgraph, node_idxs[i])) {
7376
+ return false;
7377
+ }
7378
+
7379
+ // if node is a view, check if the view_src and all it's parent view_srcs are within the subgraph
7380
+ struct ggml_tensor * view_src = node->view_src;
7381
+ while (view_src) {
7382
+ if (ggml_node_list_find_tensor(cgraph, node_idxs, count, view_src) == -1) {
7383
+ return false;
7384
+ }
7385
+ view_src = view_src->view_src;
7386
+ }
7387
+ }
7388
+
7389
+ return true;
7390
+ }
7391
+
6875
7392
  // check if node is part of the graph
6876
7393
  static bool ggml_graph_find(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
6877
7394
  if (cgraph == NULL) {
@@ -6918,7 +7435,7 @@ static void ggml_graph_dump_dot_leaf_edge(FILE * fp, struct ggml_tensor * node,
6918
7435
  label);
6919
7436
  }
6920
7437
 
6921
- void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename) {
7438
+ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * cgraph, const char * filename) {
6922
7439
  char color[16];
6923
7440
 
6924
7441
  FILE * fp = ggml_fopen(filename, "w");
@@ -6939,7 +7456,7 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph
6939
7456
  if (node->flags & GGML_TENSOR_FLAG_PARAM) {
6940
7457
  snprintf(color, sizeof(color), "yellow");
6941
7458
  } else if (grad) {
6942
- if (ggml_graph_find(gf, node)) {
7459
+ if (ggml_graph_find(cgraph, node)) {
6943
7460
  snprintf(color, sizeof(color), "green");
6944
7461
  } else {
6945
7462
  snprintf(color, sizeof(color), "lightblue");
@@ -7091,8 +7608,11 @@ void ggml_quantize_free(void) {
7091
7608
 
7092
7609
  iq2xs_free_impl(GGML_TYPE_IQ2_XXS);
7093
7610
  iq2xs_free_impl(GGML_TYPE_IQ2_XS);
7611
+ iq2xs_free_impl(GGML_TYPE_IQ2_S);
7094
7612
  iq2xs_free_impl(GGML_TYPE_IQ1_S);
7613
+ iq2xs_free_impl(GGML_TYPE_IQ1_M);
7095
7614
  iq3xs_free_impl(256);
7615
+ iq3xs_free_impl(512);
7096
7616
 
7097
7617
  ggml_critical_section_end();
7098
7618
  }
@@ -7136,6 +7656,7 @@ size_t ggml_quantize_chunk(
7136
7656
  case GGML_TYPE_Q5_1: result = quantize_q5_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7137
7657
  case GGML_TYPE_Q8_0: result = quantize_q8_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7138
7658
  case GGML_TYPE_MXFP4: result = quantize_mxfp4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7659
+ case GGML_TYPE_NVFP4: result = quantize_nvfp4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7139
7660
  case GGML_TYPE_Q2_K: result = quantize_q2_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7140
7661
  case GGML_TYPE_Q3_K: result = quantize_q3_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7141
7662
  case GGML_TYPE_Q4_K: result = quantize_q4_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
@@ -7181,6 +7702,11 @@ size_t ggml_quantize_chunk(
7181
7702
 
7182
7703
  ////////////////////////////////////////////////////////////////////////////////
7183
7704
 
7705
+ void ggml_log_get(ggml_log_callback * log_callback, void ** user_data) {
7706
+ *log_callback = g_logger_state.log_callback;
7707
+ *user_data = g_logger_state.log_callback_user_data;
7708
+ }
7709
+
7184
7710
  void ggml_log_set(ggml_log_callback log_callback, void * user_data) {
7185
7711
  g_logger_state.log_callback = log_callback ? log_callback : ggml_log_callback_default;
7186
7712
  g_logger_state.log_callback_user_data = user_data;