whispercpp 1.3.2 → 1.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (664) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +6 -3
  3. data/README.md +71 -14
  4. data/Rakefile +20 -7
  5. data/ext/.gitignore +4 -6
  6. data/ext/dependencies.rb +36 -24
  7. data/ext/extconf.rb +1 -1
  8. data/ext/options.rb +48 -184
  9. data/ext/ruby_whisper.c +18 -0
  10. data/ext/ruby_whisper_context.c +43 -12
  11. data/ext/ruby_whisper_model.c +1 -1
  12. data/ext/ruby_whisper_params.c +59 -27
  13. data/ext/ruby_whisper_segment.c +81 -4
  14. data/ext/ruby_whisper_transcribe.cpp +13 -7
  15. data/ext/ruby_whisper_vad_params.c +1 -1
  16. data/ext/sources/CMakeLists.txt +5 -1
  17. data/ext/sources/bindings/javascript/package.json +1 -1
  18. data/ext/sources/build-xcframework.sh +24 -0
  19. data/ext/sources/examples/CMakeLists.txt +1 -0
  20. data/ext/sources/examples/addon.node/__test__/whisper.spec.js +120 -24
  21. data/ext/sources/examples/addon.node/addon.cpp +154 -35
  22. data/ext/sources/examples/addon.node/index.js +10 -5
  23. data/ext/sources/examples/addon.node/vad-example.js +132 -0
  24. data/ext/sources/examples/bench/bench.cpp +29 -18
  25. data/ext/sources/examples/bench.wasm/index-tmpl.html +10 -9
  26. data/ext/sources/examples/cli/cli.cpp +7 -4
  27. data/ext/sources/examples/command/command.cpp +58 -32
  28. data/ext/sources/examples/command.wasm/index-tmpl.html +5 -4
  29. data/ext/sources/examples/common-ggml.cpp +2 -0
  30. data/ext/sources/examples/common-whisper.cpp +14 -7
  31. data/ext/sources/examples/lsp/lsp.cpp +21 -17
  32. data/ext/sources/examples/quantize/quantize.cpp +3 -0
  33. data/ext/sources/examples/server/CMakeLists.txt +3 -0
  34. data/ext/sources/examples/server/server.cpp +193 -35
  35. data/ext/sources/examples/server.py +6 -1
  36. data/ext/sources/examples/stream/stream.cpp +10 -2
  37. data/ext/sources/examples/stream.wasm/emscripten.cpp +6 -6
  38. data/ext/sources/examples/stream.wasm/index-tmpl.html +82 -5
  39. data/ext/sources/examples/talk-llama/CMakeLists.txt +3 -0
  40. data/ext/sources/examples/talk-llama/llama-adapter.cpp +101 -4
  41. data/ext/sources/examples/talk-llama/llama-adapter.h +6 -0
  42. data/ext/sources/examples/talk-llama/llama-arch.cpp +756 -15
  43. data/ext/sources/examples/talk-llama/llama-arch.h +85 -1
  44. data/ext/sources/examples/talk-llama/llama-batch.cpp +773 -272
  45. data/ext/sources/examples/talk-llama/llama-batch.h +126 -55
  46. data/ext/sources/examples/talk-llama/llama-chat.cpp +150 -13
  47. data/ext/sources/examples/talk-llama/llama-chat.h +8 -0
  48. data/ext/sources/examples/talk-llama/llama-context.cpp +814 -542
  49. data/ext/sources/examples/talk-llama/llama-context.h +68 -32
  50. data/ext/sources/examples/talk-llama/llama-cparams.cpp +1 -1
  51. data/ext/sources/examples/talk-llama/llama-cparams.h +4 -4
  52. data/ext/sources/examples/talk-llama/llama-graph.cpp +787 -440
  53. data/ext/sources/examples/talk-llama/llama-graph.h +333 -153
  54. data/ext/sources/examples/talk-llama/llama-hparams.cpp +128 -6
  55. data/ext/sources/examples/talk-llama/llama-hparams.h +80 -17
  56. data/ext/sources/examples/talk-llama/llama-impl.h +2 -0
  57. data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.cpp +326 -0
  58. data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.h +137 -0
  59. data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +1248 -1967
  60. data/ext/sources/examples/talk-llama/llama-kv-cache.h +218 -345
  61. data/ext/sources/examples/talk-llama/llama-kv-cells.h +164 -52
  62. data/ext/sources/examples/talk-llama/llama-memory-hybrid.cpp +266 -0
  63. data/ext/sources/examples/talk-llama/llama-memory-hybrid.h +139 -0
  64. data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +1154 -0
  65. data/ext/sources/examples/talk-llama/llama-memory-recurrent.h +182 -0
  66. data/ext/sources/examples/talk-llama/llama-memory.cpp +58 -0
  67. data/ext/sources/examples/talk-llama/llama-memory.h +94 -4
  68. data/ext/sources/examples/talk-llama/llama-mmap.cpp +1 -1
  69. data/ext/sources/examples/talk-llama/llama-model-loader.cpp +44 -17
  70. data/ext/sources/examples/talk-llama/llama-model-loader.h +3 -2
  71. data/ext/sources/examples/talk-llama/llama-model-saver.cpp +1 -0
  72. data/ext/sources/examples/talk-llama/llama-model.cpp +11377 -5248
  73. data/ext/sources/examples/talk-llama/llama-model.h +87 -9
  74. data/ext/sources/examples/talk-llama/llama-quant.cpp +137 -16
  75. data/ext/sources/examples/talk-llama/llama-sampling.cpp +226 -126
  76. data/ext/sources/examples/talk-llama/llama-vocab.cpp +502 -38
  77. data/ext/sources/examples/talk-llama/llama-vocab.h +46 -0
  78. data/ext/sources/examples/talk-llama/llama.cpp +76 -17
  79. data/ext/sources/examples/talk-llama/llama.h +176 -151
  80. data/ext/sources/examples/talk-llama/talk-llama.cpp +11 -6
  81. data/ext/sources/examples/talk-llama/unicode.cpp +212 -0
  82. data/ext/sources/examples/talk-llama/unicode.h +45 -0
  83. data/ext/sources/examples/vad-speech-segments/speech.cpp +6 -0
  84. data/ext/sources/examples/wchess/wchess.cmd/wchess.cmd.cpp +6 -2
  85. data/ext/sources/examples/whisper.wasm/index-tmpl.html +17 -16
  86. data/ext/sources/ggml/CMakeLists.txt +106 -33
  87. data/ext/sources/ggml/cmake/common.cmake +24 -0
  88. data/ext/sources/ggml/cmake/ggml-config.cmake.in +132 -93
  89. data/ext/sources/ggml/include/ggml-backend.h +18 -2
  90. data/ext/sources/ggml/include/ggml-cpu.h +2 -0
  91. data/ext/sources/ggml/include/ggml-metal.h +1 -6
  92. data/ext/sources/ggml/include/ggml-opt.h +25 -6
  93. data/ext/sources/ggml/include/ggml-webgpu.h +19 -0
  94. data/ext/sources/ggml/include/ggml-zdnn.h +17 -0
  95. data/ext/sources/ggml/include/ggml.h +365 -21
  96. data/ext/sources/ggml/src/CMakeLists.txt +98 -25
  97. data/ext/sources/ggml/src/ggml-alloc.c +265 -141
  98. data/ext/sources/ggml/src/ggml-backend-impl.h +4 -1
  99. data/ext/sources/ggml/src/ggml-backend-reg.cpp +35 -13
  100. data/ext/sources/ggml/src/ggml-backend.cpp +266 -60
  101. data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +4 -4
  102. data/ext/sources/ggml/src/ggml-blas/ggml-blas.cpp +5 -4
  103. data/ext/sources/ggml/src/ggml-cann/CMakeLists.txt +15 -0
  104. data/ext/sources/ggml/src/ggml-cann/acl_tensor.cpp +3 -1
  105. data/ext/sources/ggml/src/ggml-cann/aclnn_ops.cpp +903 -717
  106. data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +143 -25
  107. data/ext/sources/ggml/src/ggml-cann/common.h +149 -2
  108. data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +521 -78
  109. data/ext/sources/ggml/src/ggml-common.h +21 -0
  110. data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +165 -50
  111. data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +5 -3
  112. data/ext/sources/ggml/src/ggml-cpu/amx/mmq.cpp +11 -10
  113. data/ext/sources/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
  114. data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +3650 -0
  115. data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +1891 -0
  116. data/ext/sources/ggml/src/ggml-cpu/arch/loongarch/quants.c +2160 -0
  117. data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
  118. data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/quants.c +2305 -0
  119. data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +1897 -0
  120. data/ext/sources/ggml/src/ggml-cpu/arch/riscv/repack.cpp +342 -0
  121. data/ext/sources/ggml/src/ggml-cpu/arch/s390/quants.c +1468 -0
  122. data/ext/sources/ggml/src/ggml-cpu/arch/wasm/quants.c +1221 -0
  123. data/ext/sources/ggml/src/ggml-cpu/arch/x86/quants.c +3820 -0
  124. data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +6307 -0
  125. data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +214 -0
  126. data/ext/sources/ggml/src/ggml-cpu/common.h +18 -3
  127. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +23 -7
  128. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +179 -110
  129. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +44 -33
  130. data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-hbm.cpp → hbm.cpp} +1 -1
  131. data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +152 -18
  132. data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.h +7 -1
  133. data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +228 -98
  134. data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +532 -1124
  135. data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.h +5 -0
  136. data/ext/sources/ggml/src/ggml-cpu/ops.cpp +3374 -2081
  137. data/ext/sources/ggml/src/ggml-cpu/ops.h +13 -8
  138. data/ext/sources/ggml/src/ggml-cpu/quants.c +1193 -0
  139. data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-quants.h → quants.h} +34 -0
  140. data/ext/sources/ggml/src/ggml-cpu/repack.cpp +1982 -0
  141. data/ext/sources/ggml/src/ggml-cpu/repack.h +120 -0
  142. data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +367 -46
  143. data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.cpp +1024 -0
  144. data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
  145. data/ext/sources/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
  146. data/ext/sources/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
  147. data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +3 -3
  148. data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-traits.h → traits.h} +1 -1
  149. data/ext/sources/ggml/src/ggml-cpu/vec.cpp +272 -35
  150. data/ext/sources/ggml/src/ggml-cpu/vec.h +794 -142
  151. data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +20 -16
  152. data/ext/sources/ggml/src/ggml-cuda/add-id.cu +58 -0
  153. data/ext/sources/ggml/src/ggml-cuda/add-id.cuh +3 -0
  154. data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +330 -191
  155. data/ext/sources/ggml/src/ggml-cuda/binbcast.cuh +2 -0
  156. data/ext/sources/ggml/src/ggml-cuda/common.cuh +291 -81
  157. data/ext/sources/ggml/src/ggml-cuda/conv-transpose-1d.cu +1 -4
  158. data/ext/sources/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
  159. data/ext/sources/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
  160. data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
  161. data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
  162. data/ext/sources/ggml/src/ggml-cuda/conv2d.cu +166 -0
  163. data/ext/sources/ggml/src/ggml-cuda/conv2d.cuh +5 -0
  164. data/ext/sources/ggml/src/ggml-cuda/convert.cu +117 -22
  165. data/ext/sources/ggml/src/ggml-cuda/convert.cuh +20 -0
  166. data/ext/sources/ggml/src/ggml-cuda/cpy-utils.cuh +217 -0
  167. data/ext/sources/ggml/src/ggml-cuda/cpy.cu +64 -307
  168. data/ext/sources/ggml/src/ggml-cuda/cross-entropy-loss.cu +2 -14
  169. data/ext/sources/ggml/src/ggml-cuda/dequantize.cuh +14 -40
  170. data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +499 -368
  171. data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +142 -93
  172. data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cu +755 -0
  173. data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cuh +3 -0
  174. data/ext/sources/ggml/src/ggml-cuda/fattn-vec.cuh +593 -0
  175. data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +90 -50
  176. data/ext/sources/ggml/src/ggml-cuda/fattn.cu +185 -198
  177. data/ext/sources/ggml/src/ggml-cuda/fattn.cuh +2 -0
  178. data/ext/sources/ggml/src/ggml-cuda/getrows.cu +50 -39
  179. data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +636 -222
  180. data/ext/sources/ggml/src/ggml-cuda/im2col.cu +196 -35
  181. data/ext/sources/ggml/src/ggml-cuda/im2col.cuh +1 -0
  182. data/ext/sources/ggml/src/ggml-cuda/mean.cu +73 -0
  183. data/ext/sources/ggml/src/ggml-cuda/mean.cuh +3 -0
  184. data/ext/sources/ggml/src/ggml-cuda/mma.cuh +198 -45
  185. data/ext/sources/ggml/src/ggml-cuda/mmf.cu +123 -0
  186. data/ext/sources/ggml/src/ggml-cuda/mmf.cuh +496 -0
  187. data/ext/sources/ggml/src/ggml-cuda/mmq.cu +206 -57
  188. data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +1262 -721
  189. data/ext/sources/ggml/src/ggml-cuda/mmvf.cu +506 -0
  190. data/ext/sources/ggml/src/ggml-cuda/{mmv.cuh → mmvf.cuh} +4 -5
  191. data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +64 -73
  192. data/ext/sources/ggml/src/ggml-cuda/norm.cu +284 -12
  193. data/ext/sources/ggml/src/ggml-cuda/norm.cuh +7 -0
  194. data/ext/sources/ggml/src/ggml-cuda/opt-step-sgd.cu +49 -0
  195. data/ext/sources/ggml/src/ggml-cuda/opt-step-sgd.cuh +5 -0
  196. data/ext/sources/ggml/src/ggml-cuda/pad.cu +46 -23
  197. data/ext/sources/ggml/src/ggml-cuda/pad_reflect_1d.cu +91 -0
  198. data/ext/sources/ggml/src/ggml-cuda/pad_reflect_1d.cuh +5 -0
  199. data/ext/sources/ggml/src/ggml-cuda/quantize.cu +12 -10
  200. data/ext/sources/ggml/src/ggml-cuda/reduce_rows.cuh +53 -0
  201. data/ext/sources/ggml/src/ggml-cuda/roll.cu +67 -0
  202. data/ext/sources/ggml/src/ggml-cuda/roll.cuh +5 -0
  203. data/ext/sources/ggml/src/ggml-cuda/rope.cu +21 -27
  204. data/ext/sources/ggml/src/ggml-cuda/scale.cu +14 -11
  205. data/ext/sources/ggml/src/ggml-cuda/set-rows.cu +276 -0
  206. data/ext/sources/ggml/src/ggml-cuda/set-rows.cuh +7 -0
  207. data/ext/sources/ggml/src/ggml-cuda/softcap.cu +34 -0
  208. data/ext/sources/ggml/src/ggml-cuda/softcap.cuh +5 -0
  209. data/ext/sources/ggml/src/ggml-cuda/softmax.cu +126 -59
  210. data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +10 -2
  211. data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +322 -98
  212. data/ext/sources/ggml/src/ggml-cuda/sum.cu +6 -10
  213. data/ext/sources/ggml/src/ggml-cuda/sumrows.cu +23 -19
  214. data/ext/sources/ggml/src/ggml-cuda/sumrows.cuh +0 -1
  215. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-f16.cu +7 -0
  216. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_0.cu +7 -0
  217. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_1.cu +7 -0
  218. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_0.cu +7 -0
  219. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_1.cu +7 -0
  220. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q8_0.cu +7 -0
  221. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-f16.cu +7 -0
  222. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_0.cu +7 -0
  223. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_1.cu +7 -0
  224. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_0.cu +7 -0
  225. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_1.cu +7 -0
  226. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q8_0.cu +7 -0
  227. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-f16.cu +7 -0
  228. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_0.cu +7 -0
  229. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_1.cu +7 -0
  230. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_0.cu +7 -0
  231. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_1.cu +7 -0
  232. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q8_0.cu +7 -0
  233. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-f16.cu +7 -0
  234. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_0.cu +7 -0
  235. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_1.cu +7 -0
  236. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_0.cu +7 -0
  237. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_1.cu +7 -0
  238. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q8_0.cu +7 -0
  239. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-f16.cu +7 -0
  240. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_0.cu +7 -0
  241. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_1.cu +7 -0
  242. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_0.cu +7 -0
  243. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_1.cu +7 -0
  244. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q8_0.cu +7 -0
  245. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-f16.cu +7 -0
  246. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_0.cu +7 -0
  247. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_1.cu +7 -0
  248. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_0.cu +7 -0
  249. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_1.cu +7 -0
  250. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q8_0.cu +7 -0
  251. data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +21 -18
  252. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_1.cu +5 -0
  253. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_10.cu +5 -0
  254. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_11.cu +5 -0
  255. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_12.cu +5 -0
  256. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_13.cu +5 -0
  257. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_14.cu +5 -0
  258. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_15.cu +5 -0
  259. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_16.cu +5 -0
  260. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_2.cu +5 -0
  261. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_3.cu +5 -0
  262. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_4.cu +5 -0
  263. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_5.cu +5 -0
  264. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_6.cu +5 -0
  265. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_7.cu +5 -0
  266. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_8.cu +5 -0
  267. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_9.cu +5 -0
  268. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-mxfp4.cu +5 -0
  269. data/ext/sources/ggml/src/ggml-cuda/topk-moe.cu +259 -0
  270. data/ext/sources/ggml/src/ggml-cuda/topk-moe.cuh +14 -0
  271. data/ext/sources/ggml/src/ggml-cuda/tsembd.cu +3 -3
  272. data/ext/sources/ggml/src/ggml-cuda/unary.cu +179 -0
  273. data/ext/sources/ggml/src/ggml-cuda/unary.cuh +15 -0
  274. data/ext/sources/ggml/src/ggml-cuda/upscale.cu +92 -6
  275. data/ext/sources/ggml/src/ggml-cuda/vecdotq.cuh +110 -22
  276. data/ext/sources/ggml/src/ggml-cuda/vendors/cuda.h +4 -0
  277. data/ext/sources/ggml/src/ggml-cuda/vendors/hip.h +58 -36
  278. data/ext/sources/ggml/src/ggml-cuda/vendors/musa.h +4 -3
  279. data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +14 -2
  280. data/ext/sources/ggml/src/ggml-impl.h +229 -175
  281. data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +21 -17
  282. data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.cpp +446 -0
  283. data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
  284. data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.h +33 -0
  285. data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.m +600 -0
  286. data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.cpp +1376 -0
  287. data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.h +226 -0
  288. data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.m +1308 -0
  289. data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +163 -63
  290. data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.cpp +3158 -0
  291. data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.h +82 -0
  292. data/ext/sources/ggml/src/ggml-metal/ggml-metal.cpp +718 -0
  293. data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +3208 -1575
  294. data/ext/sources/ggml/src/ggml-musa/CMakeLists.txt +18 -8
  295. data/ext/sources/ggml/src/ggml-musa/mudnn.cuh +2 -2
  296. data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +32 -0
  297. data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +4430 -792
  298. data/ext/sources/ggml/src/ggml-opencl/kernels/add.cl +107 -0
  299. data/ext/sources/ggml/src/ggml-opencl/kernels/add_id.cl +42 -0
  300. data/ext/sources/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
  301. data/ext/sources/ggml/src/ggml-opencl/kernels/concat.cl +109 -0
  302. data/ext/sources/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
  303. data/ext/sources/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
  304. data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +84 -0
  305. data/ext/sources/ggml/src/ggml-opencl/kernels/div.cl +138 -0
  306. data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +370 -0
  307. data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +370 -0
  308. data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +373 -0
  309. data/ext/sources/ggml/src/ggml-opencl/kernels/gelu.cl +27 -0
  310. data/ext/sources/ggml/src/ggml-opencl/kernels/glu.cl +378 -0
  311. data/ext/sources/ggml/src/ggml-opencl/kernels/group_norm.cl +121 -0
  312. data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f16.cl +1 -1
  313. data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f32.cl +1 -1
  314. data/ext/sources/ggml/src/ggml-opencl/kernels/mul.cl +73 -0
  315. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
  316. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +132 -0
  317. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +133 -0
  318. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +189 -0
  319. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
  320. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
  321. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
  322. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
  323. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +144 -0
  324. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
  325. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
  326. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
  327. data/ext/sources/ggml/src/ggml-opencl/kernels/norm.cl +80 -0
  328. data/ext/sources/ggml/src/ggml-opencl/kernels/pad.cl +30 -0
  329. data/ext/sources/ggml/src/ggml-opencl/kernels/repeat.cl +39 -0
  330. data/ext/sources/ggml/src/ggml-opencl/kernels/rms_norm.cl +79 -0
  331. data/ext/sources/ggml/src/ggml-opencl/kernels/scale.cl +3 -2
  332. data/ext/sources/ggml/src/ggml-opencl/kernels/set_rows.cl +189 -0
  333. data/ext/sources/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
  334. data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +34 -13
  335. data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +34 -13
  336. data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f16.cl +34 -13
  337. data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f32.cl +34 -13
  338. data/ext/sources/ggml/src/ggml-opencl/kernels/sub.cl +138 -0
  339. data/ext/sources/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
  340. data/ext/sources/ggml/src/ggml-opencl/kernels/tanh.cl +63 -0
  341. data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +20 -0
  342. data/ext/sources/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
  343. data/ext/sources/ggml/src/ggml-opencl/kernels/upscale.cl +120 -0
  344. data/ext/sources/ggml/src/ggml-opt.cpp +97 -41
  345. data/ext/sources/ggml/src/ggml-quants.c +117 -24
  346. data/ext/sources/ggml/src/ggml-quants.h +6 -0
  347. data/ext/sources/ggml/src/ggml-rpc/ggml-rpc.cpp +85 -62
  348. data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +3 -3
  349. data/ext/sources/ggml/src/ggml-sycl/backend.hpp +2 -0
  350. data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +9 -0
  351. data/ext/sources/ggml/src/ggml-sycl/binbcast.hpp +6 -0
  352. data/ext/sources/ggml/src/ggml-sycl/common.hpp +20 -48
  353. data/ext/sources/ggml/src/ggml-sycl/concat.cpp +13 -17
  354. data/ext/sources/ggml/src/ggml-sycl/convert.cpp +21 -2
  355. data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +116 -211
  356. data/ext/sources/ggml/src/ggml-sycl/cpy.hpp +213 -1
  357. data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +32 -0
  358. data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +700 -1041
  359. data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +20 -9
  360. data/ext/sources/ggml/src/ggml-sycl/gemm.hpp +17 -26
  361. data/ext/sources/ggml/src/ggml-sycl/getrows.cpp +2 -96
  362. data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +393 -250
  363. data/ext/sources/ggml/src/ggml-sycl/im2col.cpp +1 -1
  364. data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +32 -8
  365. data/ext/sources/ggml/src/ggml-sycl/quantize.hpp +133 -0
  366. data/ext/sources/ggml/src/ggml-sycl/quants.hpp +38 -11
  367. data/ext/sources/ggml/src/ggml-sycl/rope.cpp +125 -21
  368. data/ext/sources/ggml/src/ggml-sycl/set_rows.cpp +234 -0
  369. data/ext/sources/ggml/src/ggml-sycl/set_rows.hpp +8 -0
  370. data/ext/sources/ggml/src/ggml-sycl/sycl_hw.cpp +3 -1
  371. data/ext/sources/ggml/src/ggml-sycl/sycl_hw.hpp +3 -0
  372. data/ext/sources/ggml/src/ggml-sycl/tsembd.cpp +4 -3
  373. data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +105 -17
  374. data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +36 -32
  375. data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +4198 -1145
  376. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -12
  377. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +41 -1
  378. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp +42 -0
  379. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +13 -4
  380. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +39 -29
  381. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +349 -0
  382. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
  383. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +2 -2
  384. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +66 -12
  385. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp +154 -0
  386. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp +21 -0
  387. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +1 -1
  388. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +2 -1
  389. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +6 -5
  390. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +4 -2
  391. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp +32 -0
  392. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +1 -1
  393. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +1 -1
  394. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +1 -1
  395. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +1 -1
  396. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +1 -1
  397. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +21 -0
  398. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +69 -24
  399. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +60 -20
  400. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +98 -42
  401. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +64 -27
  402. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +74 -13
  403. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
  404. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
  405. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
  406. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
  407. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +4 -17
  408. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +19 -10
  409. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +25 -15
  410. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +19 -0
  411. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
  412. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp +22 -0
  413. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp +22 -0
  414. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +18 -14
  415. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +126 -0
  416. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp +65 -1
  417. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +11 -7
  418. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +140 -0
  419. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +144 -531
  420. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +206 -38
  421. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.comp +556 -0
  422. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +12 -5
  423. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +15 -9
  424. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +111 -0
  425. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp +22 -0
  426. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +24 -3
  427. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +53 -3
  428. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
  429. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +64 -11
  430. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp +65 -0
  431. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
  432. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +1 -4
  433. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -9
  434. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -9
  435. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -9
  436. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rte.comp +5 -0
  437. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +1 -1
  438. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +29 -7
  439. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +4 -0
  440. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp +17 -0
  441. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +38 -5
  442. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
  443. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp +14 -0
  444. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +4 -3
  445. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/types.comp +101 -9
  446. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +69 -5
  447. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/utils.comp +25 -0
  448. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +338 -71
  449. data/ext/sources/ggml/src/ggml-webgpu/CMakeLists.txt +54 -0
  450. data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu.cpp +1558 -0
  451. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add.tmpl.wgsl +44 -0
  452. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add_in_place.tmpl.wgsl +41 -0
  453. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary_head.tmpl +45 -0
  454. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +930 -0
  455. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +60 -0
  456. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +124 -0
  457. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.tmpl.wgsl +874 -0
  458. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
  459. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul.tmpl.wgsl +44 -0
  460. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_in_place.tmpl.wgsl +41 -0
  461. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.tmpl.wgsl +907 -0
  462. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +57 -0
  463. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm_in_place.wgsl +48 -0
  464. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +81 -0
  465. data/ext/sources/ggml/src/ggml-zdnn/CMakeLists.txt +36 -0
  466. data/ext/sources/ggml/src/ggml-zdnn/common.hpp +59 -0
  467. data/ext/sources/ggml/src/ggml-zdnn/ggml-zdnn.cpp +628 -0
  468. data/ext/sources/ggml/src/ggml-zdnn/mmf.cpp +80 -0
  469. data/ext/sources/ggml/src/ggml-zdnn/mmf.hpp +12 -0
  470. data/ext/sources/ggml/src/ggml-zdnn/utils.cpp +79 -0
  471. data/ext/sources/ggml/src/ggml-zdnn/utils.hpp +19 -0
  472. data/ext/sources/ggml/src/ggml.c +802 -142
  473. data/ext/sources/ggml/src/ggml.cpp +26 -0
  474. data/ext/sources/ggml/src/gguf.cpp +32 -4
  475. data/ext/sources/include/whisper.h +2 -0
  476. data/ext/sources/src/CMakeLists.txt +2 -0
  477. data/ext/sources/src/coreml/whisper-compat.h +10 -0
  478. data/ext/sources/src/coreml/whisper-compat.m +35 -0
  479. data/ext/sources/src/coreml/whisper-decoder-impl.m +1 -0
  480. data/ext/sources/src/coreml/whisper-encoder-impl.m +1 -0
  481. data/ext/sources/src/whisper.cpp +241 -215
  482. data/ext/sources/tests/CMakeLists.txt +8 -1
  483. data/ext/sources/tests/test-vad-full.cpp +3 -3
  484. data/ext/sources/tests/test-vad.cpp +2 -2
  485. data/extsources.rb +15 -9
  486. data/lib/whisper/context.rb +15 -0
  487. data/lib/whisper/model/uri.rb +57 -2
  488. data/lib/whisper/segment.rb +58 -0
  489. data/sig/whisper.rbs +75 -38
  490. data/{tests → test}/helper.rb +1 -12
  491. data/{tests → test}/test_model.rb +9 -0
  492. data/test/test_package.rb +51 -0
  493. data/{tests → test}/test_params.rb +8 -0
  494. data/test/test_segment.rb +146 -0
  495. data/{tests → test}/test_whisper.rb +70 -0
  496. data/whispercpp.gemspec +2 -3
  497. metadata +246 -191
  498. data/ext/sources/.dockerignore +0 -3
  499. data/ext/sources/.github/workflows/bindings-ruby.yml +0 -21
  500. data/ext/sources/ci/run.sh +0 -336
  501. data/ext/sources/close-issue.yml +0 -28
  502. data/ext/sources/ggml/include/ggml-kompute.h +0 -50
  503. data/ext/sources/ggml/src/ggml-amx/CMakeLists.txt +0 -107
  504. data/ext/sources/ggml/src/ggml-amx/common.h +0 -94
  505. data/ext/sources/ggml/src/ggml-amx/ggml-amx.cpp +0 -446
  506. data/ext/sources/ggml/src/ggml-amx/mmq.cpp +0 -2510
  507. data/ext/sources/ggml/src/ggml-amx/mmq.h +0 -17
  508. data/ext/sources/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
  509. data/ext/sources/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
  510. data/ext/sources/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
  511. data/ext/sources/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
  512. data/ext/sources/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
  513. data/ext/sources/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
  514. data/ext/sources/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
  515. data/ext/sources/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
  516. data/ext/sources/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
  517. data/ext/sources/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
  518. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +0 -6431
  519. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
  520. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -13747
  521. data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cu +0 -357
  522. data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cuh +0 -3
  523. data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cu +0 -365
  524. data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cuh +0 -3
  525. data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f16.cuh +0 -482
  526. data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f32.cuh +0 -472
  527. data/ext/sources/ggml/src/ggml-cuda/mmv.cu +0 -336
  528. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +0 -5
  529. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +0 -5
  530. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +0 -5
  531. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +0 -5
  532. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +0 -5
  533. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +0 -5
  534. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +0 -5
  535. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +0 -5
  536. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +0 -5
  537. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +0 -5
  538. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +0 -5
  539. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +0 -5
  540. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +0 -5
  541. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +0 -5
  542. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +0 -5
  543. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +0 -5
  544. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +0 -5
  545. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +0 -5
  546. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +0 -5
  547. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +0 -5
  548. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +0 -5
  549. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +0 -5
  550. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +0 -5
  551. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +0 -5
  552. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +0 -5
  553. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +0 -5
  554. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +0 -5
  555. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +0 -5
  556. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +0 -5
  557. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +0 -5
  558. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +0 -5
  559. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +0 -5
  560. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +0 -5
  561. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +0 -5
  562. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +0 -5
  563. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +0 -5
  564. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +0 -5
  565. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +0 -5
  566. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +0 -5
  567. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +0 -5
  568. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +0 -5
  569. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +0 -5
  570. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +0 -5
  571. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +0 -5
  572. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +0 -5
  573. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +0 -5
  574. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +0 -5
  575. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +0 -5
  576. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +0 -5
  577. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +0 -5
  578. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +0 -5
  579. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +0 -5
  580. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +0 -5
  581. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +0 -5
  582. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +0 -5
  583. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +0 -5
  584. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +0 -5
  585. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +0 -5
  586. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +0 -5
  587. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +0 -5
  588. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +0 -5
  589. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +0 -5
  590. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +0 -5
  591. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +0 -5
  592. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +0 -5
  593. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +0 -5
  594. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +0 -5
  595. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +0 -5
  596. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +0 -5
  597. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +0 -5
  598. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +0 -5
  599. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +0 -5
  600. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +0 -5
  601. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +0 -5
  602. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +0 -5
  603. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +0 -5
  604. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +0 -5
  605. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +0 -5
  606. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +0 -5
  607. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +0 -5
  608. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +0 -5
  609. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +0 -5
  610. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +0 -5
  611. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +0 -5
  612. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +0 -5
  613. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +0 -5
  614. data/ext/sources/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
  615. data/ext/sources/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
  616. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/common.comp +0 -112
  617. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +0 -58
  618. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +0 -25
  619. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +0 -52
  620. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +0 -52
  621. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +0 -52
  622. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +0 -52
  623. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +0 -30
  624. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +0 -22
  625. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +0 -17
  626. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +0 -31
  627. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +0 -31
  628. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +0 -38
  629. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +0 -39
  630. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +0 -44
  631. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +0 -52
  632. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +0 -69
  633. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +0 -51
  634. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +0 -33
  635. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +0 -35
  636. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +0 -140
  637. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +0 -106
  638. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +0 -73
  639. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +0 -52
  640. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +0 -28
  641. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +0 -84
  642. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +0 -21
  643. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +0 -53
  644. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +0 -52
  645. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +0 -52
  646. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +0 -52
  647. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +0 -52
  648. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +0 -19
  649. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +0 -23
  650. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +0 -22
  651. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +0 -72
  652. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +0 -71
  653. data/ext/sources/ggml/src/ggml-metal/ggml-metal.m +0 -5998
  654. data/tests/test_package.rb +0 -46
  655. data/tests/test_segment.rb +0 -74
  656. /data/ext/sources/ggml/src/ggml-cpu/{cpu-feats-x86.cpp → arch/x86/cpu-feats.cpp} +0 -0
  657. /data/ext/sources/ggml/src/ggml-cpu/{ggml-cpu-hbm.h → hbm.h} +0 -0
  658. /data/{tests → test}/jfk_reader/.gitignore +0 -0
  659. /data/{tests → test}/jfk_reader/extconf.rb +0 -0
  660. /data/{tests → test}/jfk_reader/jfk_reader.c +0 -0
  661. /data/{tests → test}/test_callback.rb +0 -0
  662. /data/{tests → test}/test_error.rb +0 -0
  663. /data/{tests → test}/test_vad.rb +0 -0
  664. /data/{tests → test}/test_vad_params.rb +0 -0
@@ -241,7 +241,16 @@
241
241
  #define GGML_ROPE_TYPE_MROPE 8
242
242
  #define GGML_ROPE_TYPE_VISION 24
243
243
 
244
+ #define GGML_MROPE_SECTIONS 4
245
+
244
246
  #define GGML_UNUSED(x) (void)(x)
247
+ #ifdef __CUDACC__
248
+ template<typename... Args>
249
+ __host__ __device__ constexpr inline void ggml_unused_vars_impl(Args&&...) noexcept {}
250
+ #define GGML_UNUSED_VARS(...) ggml_unused_vars_impl(__VA_ARGS__)
251
+ #else
252
+ #define GGML_UNUSED_VARS(...) do { (void)sizeof((__VA_ARGS__, 0)); } while(0)
253
+ #endif // __CUDACC__
245
254
 
246
255
  #define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
247
256
 
@@ -275,19 +284,19 @@
275
284
  // GGML_TENSOR_LOCALS(size_t, nb1, src1, nb);
276
285
  //
277
286
  #define GGML_TENSOR_LOCALS_1(type, prefix, pointer, array) \
278
- const type prefix##0 = (pointer)->array[0]; \
287
+ const type prefix##0 = (pointer) ? (pointer)->array[0] : 0; \
279
288
  GGML_UNUSED(prefix##0);
280
289
  #define GGML_TENSOR_LOCALS_2(type, prefix, pointer, array) \
281
290
  GGML_TENSOR_LOCALS_1 (type, prefix, pointer, array) \
282
- const type prefix##1 = (pointer)->array[1]; \
291
+ const type prefix##1 = (pointer) ? (pointer)->array[1] : 0; \
283
292
  GGML_UNUSED(prefix##1);
284
293
  #define GGML_TENSOR_LOCALS_3(type, prefix, pointer, array) \
285
294
  GGML_TENSOR_LOCALS_2 (type, prefix, pointer, array) \
286
- const type prefix##2 = (pointer)->array[2]; \
295
+ const type prefix##2 = (pointer) ? (pointer)->array[2] : 0; \
287
296
  GGML_UNUSED(prefix##2);
288
297
  #define GGML_TENSOR_LOCALS(type, prefix, pointer, array) \
289
298
  GGML_TENSOR_LOCALS_3 (type, prefix, pointer, array) \
290
- const type prefix##3 = (pointer)->array[3]; \
299
+ const type prefix##3 = (pointer) ? (pointer)->array[3] : 0; \
291
300
  GGML_UNUSED(prefix##3);
292
301
 
293
302
  #define GGML_TENSOR_UNARY_OP_LOCALS \
@@ -304,6 +313,16 @@
304
313
  GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
305
314
  GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
306
315
 
316
+ #define GGML_TENSOR_TERNARY_OP_LOCALS \
317
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
318
+ GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
319
+ GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
320
+ GGML_TENSOR_LOCALS(size_t, nb1, src1, nb) \
321
+ GGML_TENSOR_LOCALS(int64_t, ne2, src2, ne) \
322
+ GGML_TENSOR_LOCALS(size_t, nb2, src2, nb) \
323
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
324
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
325
+
307
326
  #define GGML_TENSOR_BINARY_OP_LOCALS01 \
308
327
  GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
309
328
  GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
@@ -314,6 +333,13 @@
314
333
  extern "C" {
315
334
  #endif
316
335
 
336
+ // Function type used in fatal error callbacks
337
+ typedef void (*ggml_abort_callback_t)(const char * error_message);
338
+
339
+ // Set the abort callback (passing null will restore original abort functionality: printing a message to stdout)
340
+ // Returns the old callback for chaining
341
+ GGML_API ggml_abort_callback_t ggml_set_abort_callback(ggml_abort_callback_t callback);
342
+
317
343
  GGML_NORETURN GGML_ATTRIBUTE_FORMAT(3, 4)
318
344
  GGML_API void ggml_abort(const char * file, int line, const char * fmt, ...);
319
345
 
@@ -388,7 +414,8 @@ extern "C" {
388
414
  // GGML_TYPE_IQ4_NL_4_4 = 36,
389
415
  // GGML_TYPE_IQ4_NL_4_8 = 37,
390
416
  // GGML_TYPE_IQ4_NL_8_8 = 38,
391
- GGML_TYPE_COUNT = 39,
417
+ GGML_TYPE_MXFP4 = 39, // MXFP4 (1 block)
418
+ GGML_TYPE_COUNT = 40,
392
419
  };
393
420
 
394
421
  // precision
@@ -423,6 +450,7 @@ extern "C" {
423
450
  GGML_FTYPE_MOSTLY_IQ4_XS = 22, // except 1d tensors
424
451
  GGML_FTYPE_MOSTLY_IQ1_M = 23, // except 1d tensors
425
452
  GGML_FTYPE_MOSTLY_BF16 = 24, // except 1d tensors
453
+ GGML_FTYPE_MOSTLY_MXFP4 = 25, // except 1d tensors
426
454
  };
427
455
 
428
456
  // available tensor operations:
@@ -431,6 +459,7 @@ extern "C" {
431
459
 
432
460
  GGML_OP_DUP,
433
461
  GGML_OP_ADD,
462
+ GGML_OP_ADD_ID,
434
463
  GGML_OP_ADD1,
435
464
  GGML_OP_ACC,
436
465
  GGML_OP_SUB,
@@ -470,6 +499,7 @@ extern "C" {
470
499
  GGML_OP_TRANSPOSE,
471
500
  GGML_OP_GET_ROWS,
472
501
  GGML_OP_GET_ROWS_BACK,
502
+ GGML_OP_SET_ROWS,
473
503
  GGML_OP_DIAG,
474
504
  GGML_OP_DIAG_MASK_INF,
475
505
  GGML_OP_DIAG_MASK_ZERO,
@@ -481,14 +511,18 @@ extern "C" {
481
511
  GGML_OP_CONV_TRANSPOSE_1D,
482
512
  GGML_OP_IM2COL,
483
513
  GGML_OP_IM2COL_BACK,
514
+ GGML_OP_IM2COL_3D,
515
+ GGML_OP_CONV_2D,
516
+ GGML_OP_CONV_3D,
484
517
  GGML_OP_CONV_2D_DW,
485
518
  GGML_OP_CONV_TRANSPOSE_2D,
486
519
  GGML_OP_POOL_1D,
487
520
  GGML_OP_POOL_2D,
488
521
  GGML_OP_POOL_2D_BACK,
489
- GGML_OP_UPSCALE, // nearest interpolate
522
+ GGML_OP_UPSCALE,
490
523
  GGML_OP_PAD,
491
524
  GGML_OP_PAD_REFLECT_1D,
525
+ GGML_OP_ROLL,
492
526
  GGML_OP_ARANGE,
493
527
  GGML_OP_TIMESTEP_EMBEDDING,
494
528
  GGML_OP_ARGSORT,
@@ -517,6 +551,9 @@ extern "C" {
517
551
  GGML_OP_CROSS_ENTROPY_LOSS,
518
552
  GGML_OP_CROSS_ENTROPY_LOSS_BACK,
519
553
  GGML_OP_OPT_STEP_ADAMW,
554
+ GGML_OP_OPT_STEP_SGD,
555
+
556
+ GGML_OP_GLU,
520
557
 
521
558
  GGML_OP_COUNT,
522
559
  };
@@ -541,6 +578,17 @@ extern "C" {
541
578
  GGML_UNARY_OP_COUNT,
542
579
  };
543
580
 
581
+ enum ggml_glu_op {
582
+ GGML_GLU_OP_REGLU,
583
+ GGML_GLU_OP_GEGLU,
584
+ GGML_GLU_OP_SWIGLU,
585
+ GGML_GLU_OP_SWIGLU_OAI,
586
+ GGML_GLU_OP_GEGLU_ERF,
587
+ GGML_GLU_OP_GEGLU_QUICK,
588
+
589
+ GGML_GLU_OP_COUNT,
590
+ };
591
+
544
592
  enum ggml_object_type {
545
593
  GGML_OBJECT_TYPE_TENSOR,
546
594
  GGML_OBJECT_TYPE_GRAPH,
@@ -626,6 +674,9 @@ extern "C" {
626
674
 
627
675
  // misc
628
676
 
677
+ GGML_API const char * ggml_version(void);
678
+ GGML_API const char * ggml_commit(void);
679
+
629
680
  GGML_API void ggml_time_init(void); // call this once at the beginning of the program
630
681
  GGML_API int64_t ggml_time_ms(void);
631
682
  GGML_API int64_t ggml_time_us(void);
@@ -656,6 +707,7 @@ extern "C" {
656
707
  GGML_API const char * ggml_op_symbol(enum ggml_op op);
657
708
 
658
709
  GGML_API const char * ggml_unary_op_name(enum ggml_unary_op op);
710
+ GGML_API const char * ggml_glu_op_name(enum ggml_glu_op op);
659
711
  GGML_API const char * ggml_op_desc(const struct ggml_tensor * t); // unary or op name
660
712
 
661
713
  GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
@@ -686,6 +738,9 @@ extern "C" {
686
738
  // true for tensor that is stored in memory as CxWxHxN and has been permuted to WxHxCxN
687
739
  GGML_API bool ggml_is_contiguous_channels(const struct ggml_tensor * tensor);
688
740
 
741
+ // true if the elements in dimension 0 are contiguous, or there is just 1 block of elements
742
+ GGML_API bool ggml_is_contiguous_rows(const struct ggml_tensor * tensor);
743
+
689
744
  GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1);
690
745
  GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
691
746
 
@@ -757,6 +812,7 @@ extern "C" {
757
812
  GGML_API void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3);
758
813
 
759
814
  GGML_API enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor);
815
+ GGML_API enum ggml_glu_op ggml_get_glu_op(const struct ggml_tensor * tensor);
760
816
 
761
817
  GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
762
818
  GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
@@ -801,6 +857,13 @@ extern "C" {
801
857
  struct ggml_tensor * b,
802
858
  enum ggml_type type);
803
859
 
860
+ // dst[i0, i1, i2] = a[i0, i1, i2] + b[i0, ids[i1, i2]]
861
+ GGML_API struct ggml_tensor * ggml_add_id(
862
+ struct ggml_context * ctx,
863
+ struct ggml_tensor * a,
864
+ struct ggml_tensor * b,
865
+ struct ggml_tensor * ids);
866
+
804
867
  GGML_API struct ggml_tensor * ggml_add1(
805
868
  struct ggml_context * ctx,
806
869
  struct ggml_tensor * a,
@@ -935,6 +998,15 @@ extern "C" {
935
998
  struct ggml_tensor * a,
936
999
  struct ggml_tensor * b);
937
1000
 
1001
+ // repeat a to the specified shape
1002
+ GGML_API struct ggml_tensor * ggml_repeat_4d(
1003
+ struct ggml_context * ctx,
1004
+ struct ggml_tensor * a,
1005
+ int64_t ne0,
1006
+ int64_t ne1,
1007
+ int64_t ne2,
1008
+ int64_t ne3);
1009
+
938
1010
  // sums repetitions in a into shape of b
939
1011
  GGML_API struct ggml_tensor * ggml_repeat_back(
940
1012
  struct ggml_context * ctx,
@@ -1076,6 +1148,96 @@ extern "C" {
1076
1148
  struct ggml_context * ctx,
1077
1149
  struct ggml_tensor * a);
1078
1150
 
1151
+ // gated linear unit ops
1152
+ // A: n columns, r rows,
1153
+ // result is n / 2 columns, r rows,
1154
+ // expects gate in second half of row, unless swapped is true
1155
+ GGML_API struct ggml_tensor * ggml_glu(
1156
+ struct ggml_context * ctx,
1157
+ struct ggml_tensor * a,
1158
+ enum ggml_glu_op op,
1159
+ bool swapped);
1160
+
1161
+ GGML_API struct ggml_tensor * ggml_reglu(
1162
+ struct ggml_context * ctx,
1163
+ struct ggml_tensor * a);
1164
+
1165
+ GGML_API struct ggml_tensor * ggml_reglu_swapped(
1166
+ struct ggml_context * ctx,
1167
+ struct ggml_tensor * a);
1168
+
1169
+ GGML_API struct ggml_tensor * ggml_geglu(
1170
+ struct ggml_context * ctx,
1171
+ struct ggml_tensor * a);
1172
+
1173
+ GGML_API struct ggml_tensor * ggml_geglu_swapped(
1174
+ struct ggml_context * ctx,
1175
+ struct ggml_tensor * a);
1176
+
1177
+ GGML_API struct ggml_tensor * ggml_swiglu(
1178
+ struct ggml_context * ctx,
1179
+ struct ggml_tensor * a);
1180
+
1181
+ GGML_API struct ggml_tensor * ggml_swiglu_swapped(
1182
+ struct ggml_context * ctx,
1183
+ struct ggml_tensor * a);
1184
+
1185
+ GGML_API struct ggml_tensor * ggml_geglu_erf(
1186
+ struct ggml_context * ctx,
1187
+ struct ggml_tensor * a);
1188
+
1189
+ GGML_API struct ggml_tensor * ggml_geglu_erf_swapped(
1190
+ struct ggml_context * ctx,
1191
+ struct ggml_tensor * a);
1192
+
1193
+ GGML_API struct ggml_tensor * ggml_geglu_quick(
1194
+ struct ggml_context * ctx,
1195
+ struct ggml_tensor * a);
1196
+
1197
+ GGML_API struct ggml_tensor * ggml_geglu_quick_swapped(
1198
+ struct ggml_context * ctx,
1199
+ struct ggml_tensor * a);
1200
+
1201
+ // A: n columns, r rows,
1202
+ // B: n columns, r rows,
1203
+ GGML_API struct ggml_tensor * ggml_glu_split(
1204
+ struct ggml_context * ctx,
1205
+ struct ggml_tensor * a,
1206
+ struct ggml_tensor * b,
1207
+ enum ggml_glu_op op);
1208
+
1209
+ GGML_API struct ggml_tensor * ggml_reglu_split(
1210
+ struct ggml_context * ctx,
1211
+ struct ggml_tensor * a,
1212
+ struct ggml_tensor * b);
1213
+
1214
+ GGML_API struct ggml_tensor * ggml_geglu_split(
1215
+ struct ggml_context * ctx,
1216
+ struct ggml_tensor * a,
1217
+ struct ggml_tensor * b);
1218
+
1219
+ GGML_API struct ggml_tensor * ggml_swiglu_split(
1220
+ struct ggml_context * ctx,
1221
+ struct ggml_tensor * a,
1222
+ struct ggml_tensor * b);
1223
+
1224
+ GGML_API struct ggml_tensor * ggml_geglu_erf_split(
1225
+ struct ggml_context * ctx,
1226
+ struct ggml_tensor * a,
1227
+ struct ggml_tensor * b);
1228
+
1229
+ GGML_API struct ggml_tensor * ggml_geglu_quick_split(
1230
+ struct ggml_context * ctx,
1231
+ struct ggml_tensor * a,
1232
+ struct ggml_tensor * b);
1233
+
1234
+ GGML_API struct ggml_tensor * ggml_swiglu_oai(
1235
+ struct ggml_context * ctx,
1236
+ struct ggml_tensor * a,
1237
+ struct ggml_tensor * b,
1238
+ float alpha,
1239
+ float limit);
1240
+
1079
1241
  // normalize along rows
1080
1242
  GGML_API struct ggml_tensor * ggml_norm(
1081
1243
  struct ggml_context * ctx,
@@ -1175,6 +1337,19 @@ extern "C" {
1175
1337
  struct ggml_tensor * a,
1176
1338
  float s);
1177
1339
 
1340
+ // x = s * a + b
1341
+ GGML_API struct ggml_tensor * ggml_scale_bias(
1342
+ struct ggml_context * ctx,
1343
+ struct ggml_tensor * a,
1344
+ float s,
1345
+ float b);
1346
+
1347
+ GGML_API struct ggml_tensor * ggml_scale_bias_inplace(
1348
+ struct ggml_context * ctx,
1349
+ struct ggml_tensor * a,
1350
+ float s,
1351
+ float b);
1352
+
1178
1353
  // b -> view(a,offset,nb1,nb2,3), return modified a
1179
1354
  GGML_API struct ggml_tensor * ggml_set(
1180
1355
  struct ggml_context * ctx,
@@ -1229,6 +1404,7 @@ extern "C" {
1229
1404
  struct ggml_tensor * a,
1230
1405
  struct ggml_tensor * b);
1231
1406
 
1407
+ // note: casting from f32 to i32 will discard the fractional part
1232
1408
  GGML_API struct ggml_tensor * ggml_cast(
1233
1409
  struct ggml_context * ctx,
1234
1410
  struct ggml_tensor * a,
@@ -1353,7 +1529,11 @@ extern "C" {
1353
1529
  struct ggml_context * ctx,
1354
1530
  struct ggml_tensor * a);
1355
1531
 
1356
- // supports 3D: a->ne[2] == b->ne[1]
1532
+ // supports 4D a:
1533
+ // a [n_embd, ne1, ne2, ne3]
1534
+ // b I32 [n_rows, ne2, ne3, 1]
1535
+ //
1536
+ // return [n_embd, n_rows, ne2, ne3]
1357
1537
  GGML_API struct ggml_tensor * ggml_get_rows(
1358
1538
  struct ggml_context * ctx,
1359
1539
  struct ggml_tensor * a, // data
@@ -1365,6 +1545,23 @@ extern "C" {
1365
1545
  struct ggml_tensor * b, // row indices
1366
1546
  struct ggml_tensor * c); // data for ggml_get_rows, only used for its shape
1367
1547
 
1548
+ // a TD [n_embd, ne1, ne2, ne3]
1549
+ // b TS [n_embd, n_rows, ne02, ne03] | ne02 == ne2, ne03 == ne3
1550
+ // c I64 [n_rows, ne11, ne12, 1] | c[i] in [0, ne1)
1551
+ //
1552
+ // undefined behavior if destination rows overlap
1553
+ //
1554
+ // broadcast:
1555
+ // ne2 % ne11 == 0
1556
+ // ne3 % ne12 == 0
1557
+ //
1558
+ // return view(a)
1559
+ GGML_API struct ggml_tensor * ggml_set_rows(
1560
+ struct ggml_context * ctx,
1561
+ struct ggml_tensor * a, // destination
1562
+ struct ggml_tensor * b, // source
1563
+ struct ggml_tensor * c); // row indices
1564
+
1368
1565
  GGML_API struct ggml_tensor * ggml_diag(
1369
1566
  struct ggml_context * ctx,
1370
1567
  struct ggml_tensor * a);
@@ -1402,8 +1599,14 @@ extern "C" {
1402
1599
  struct ggml_context * ctx,
1403
1600
  struct ggml_tensor * a);
1404
1601
 
1602
+ // a [ne0, ne01, ne02, ne03]
1603
+ // mask [ne0, ne11, ne12, ne13] | ne11 >= ne01, F16 or F32, optional
1604
+ //
1605
+ // broadcast:
1606
+ // ne02 % ne12 == 0
1607
+ // ne03 % ne13 == 0
1608
+ //
1405
1609
  // fused soft_max(a*scale + mask*(ALiBi slope))
1406
- // mask is optional
1407
1610
  // max_bias = 0.0f for no ALiBi
1408
1611
  GGML_API struct ggml_tensor * ggml_soft_max_ext(
1409
1612
  struct ggml_context * ctx,
@@ -1412,6 +1615,10 @@ extern "C" {
1412
1615
  float scale,
1413
1616
  float max_bias);
1414
1617
 
1618
+ GGML_API void ggml_soft_max_add_sinks(
1619
+ struct ggml_tensor * a,
1620
+ struct ggml_tensor * sinks);
1621
+
1415
1622
  GGML_API struct ggml_tensor * ggml_soft_max_ext_back(
1416
1623
  struct ggml_context * ctx,
1417
1624
  struct ggml_tensor * a,
@@ -1470,7 +1677,7 @@ extern "C" {
1470
1677
  struct ggml_tensor * b,
1471
1678
  struct ggml_tensor * c,
1472
1679
  int n_dims,
1473
- int sections[4],
1680
+ int sections[GGML_MROPE_SECTIONS],
1474
1681
  int mode,
1475
1682
  int n_ctx_orig,
1476
1683
  float freq_base,
@@ -1496,6 +1703,22 @@ extern "C" {
1496
1703
  float beta_fast,
1497
1704
  float beta_slow);
1498
1705
 
1706
+ GGML_API struct ggml_tensor * ggml_rope_multi_inplace(
1707
+ struct ggml_context * ctx,
1708
+ struct ggml_tensor * a,
1709
+ struct ggml_tensor * b,
1710
+ struct ggml_tensor * c,
1711
+ int n_dims,
1712
+ int sections[GGML_MROPE_SECTIONS],
1713
+ int mode,
1714
+ int n_ctx_orig,
1715
+ float freq_base,
1716
+ float freq_scale,
1717
+ float ext_factor,
1718
+ float attn_factor,
1719
+ float beta_fast,
1720
+ float beta_slow);
1721
+
1499
1722
  GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_rope_custom(
1500
1723
  struct ggml_context * ctx,
1501
1724
  struct ggml_tensor * a,
@@ -1653,6 +1876,41 @@ extern "C" {
1653
1876
  int d0, // dilation dimension 0
1654
1877
  int d1); // dilation dimension 1
1655
1878
 
1879
+ GGML_API struct ggml_tensor * ggml_im2col_3d(
1880
+ struct ggml_context * ctx,
1881
+ struct ggml_tensor * a,
1882
+ struct ggml_tensor * b,
1883
+ int64_t IC,
1884
+ int s0, // stride width
1885
+ int s1, // stride height
1886
+ int s2, // stride depth
1887
+ int p0, // padding width
1888
+ int p1, // padding height
1889
+ int p2, // padding depth
1890
+ int d0, // dilation width
1891
+ int d1, // dilation height
1892
+ int d2, // dilation depth
1893
+ enum ggml_type dst_type);
1894
+
1895
+ // a: [OC*IC, KD, KH, KW]
1896
+ // b: [N*IC, ID, IH, IW]
1897
+ // result: [N*OC, OD, OH, OW]
1898
+ GGML_API struct ggml_tensor * ggml_conv_3d(
1899
+ struct ggml_context * ctx,
1900
+ struct ggml_tensor * a,
1901
+ struct ggml_tensor * b,
1902
+ int64_t IC,
1903
+ int s0, // stride width
1904
+ int s1, // stride height
1905
+ int s2, // stride depth
1906
+ int p0, // padding width
1907
+ int p1, // padding height
1908
+ int p2, // padding depth
1909
+ int d0, // dilation width
1910
+ int d1, // dilation height
1911
+ int d2 // dilation depth
1912
+ );
1913
+
1656
1914
  // kernel size is a->ne[0] x a->ne[1]
1657
1915
  // stride is equal to kernel size
1658
1916
  // padding is zero
@@ -1713,6 +1971,34 @@ extern "C" {
1713
1971
  struct ggml_tensor * b,
1714
1972
  int stride);
1715
1973
 
1974
+ GGML_API struct ggml_tensor * ggml_conv_2d_direct(
1975
+ struct ggml_context * ctx,
1976
+ struct ggml_tensor * a, // convolution kernel [KW, KH, IC, OC]
1977
+ struct ggml_tensor * b, // input data [W, H, C, N]
1978
+ int s0, // stride dimension 0
1979
+ int s1, // stride dimension 1
1980
+ int p0, // padding dimension 0
1981
+ int p1, // padding dimension 1
1982
+ int d0, // dilation dimension 0
1983
+ int d1); // dilation dimension 1
1984
+
1985
+ GGML_API struct ggml_tensor * ggml_conv_3d_direct(
1986
+ struct ggml_context * ctx,
1987
+ struct ggml_tensor * a, // kernel [KW, KH, KD, IC * OC]
1988
+ struct ggml_tensor * b, // input [W, H, D, C * N]
1989
+ int s0, // stride
1990
+ int s1,
1991
+ int s2,
1992
+ int p0, // padding
1993
+ int p1,
1994
+ int p2,
1995
+ int d0, // dilation
1996
+ int d1,
1997
+ int d2,
1998
+ int n_channels,
1999
+ int n_batch,
2000
+ int n_channels_out);
2001
+
1716
2002
  enum ggml_op_pool {
1717
2003
  GGML_OP_POOL_MAX,
1718
2004
  GGML_OP_POOL_AVG,
@@ -1755,6 +2041,12 @@ extern "C" {
1755
2041
  enum ggml_scale_mode {
1756
2042
  GGML_SCALE_MODE_NEAREST = 0,
1757
2043
  GGML_SCALE_MODE_BILINEAR = 1,
2044
+
2045
+ GGML_SCALE_MODE_COUNT
2046
+ };
2047
+
2048
+ enum ggml_scale_flag {
2049
+ GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8)
1758
2050
  };
1759
2051
 
1760
2052
  // interpolate
@@ -1767,14 +2059,26 @@ extern "C" {
1767
2059
 
1768
2060
  // interpolate
1769
2061
  // interpolate scale to specified dimensions
1770
- GGML_API struct ggml_tensor * ggml_upscale_ext(
2062
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_upscale_ext(
1771
2063
  struct ggml_context * ctx,
1772
2064
  struct ggml_tensor * a,
1773
2065
  int ne0,
1774
2066
  int ne1,
1775
2067
  int ne2,
1776
2068
  int ne3,
1777
- enum ggml_scale_mode mode);
2069
+ enum ggml_scale_mode mode),
2070
+ "use ggml_interpolate instead");
2071
+
2072
+ // Up- or downsamples the input to the specified size.
2073
+ // 2D scale modes (eg. bilinear) are applied to the first two dimensions.
2074
+ GGML_API struct ggml_tensor * ggml_interpolate(
2075
+ struct ggml_context * ctx,
2076
+ struct ggml_tensor * a,
2077
+ int64_t ne0,
2078
+ int64_t ne1,
2079
+ int64_t ne2,
2080
+ int64_t ne3,
2081
+ uint32_t mode); // ggml_scale_mode [ | ggml_scale_flag...]
1778
2082
 
1779
2083
  // pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
1780
2084
  GGML_API struct ggml_tensor * ggml_pad(
@@ -1785,6 +2089,19 @@ extern "C" {
1785
2089
  int p2,
1786
2090
  int p3);
1787
2091
 
2092
+ GGML_API struct ggml_tensor * ggml_pad_ext(
2093
+ struct ggml_context * ctx,
2094
+ struct ggml_tensor * a,
2095
+ int lp0,
2096
+ int rp0,
2097
+ int lp1,
2098
+ int rp1,
2099
+ int lp2,
2100
+ int rp2,
2101
+ int lp3,
2102
+ int rp3
2103
+ );
2104
+
1788
2105
  // pad each dimension with reflection: [a, b, c, d] -> [b, a, b, c, d, c]
1789
2106
  GGML_API struct ggml_tensor * ggml_pad_reflect_1d(
1790
2107
  struct ggml_context * ctx,
@@ -1792,6 +2109,17 @@ extern "C" {
1792
2109
  int p0,
1793
2110
  int p1);
1794
2111
 
2112
+ // Move tensor elements by an offset given for each dimension. Elements that
2113
+ // are shifted beyond the last position are wrapped around to the beginning.
2114
+ GGML_API struct ggml_tensor * ggml_roll(
2115
+ struct ggml_context * ctx,
2116
+ struct ggml_tensor * a,
2117
+ int shift0,
2118
+ int shift1,
2119
+ int shift2,
2120
+ int shift3);
2121
+
2122
+
1795
2123
  // Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
1796
2124
  // timesteps: [N,]
1797
2125
  // return: [N, dim]
@@ -1826,11 +2154,17 @@ extern "C" {
1826
2154
 
1827
2155
  #define GGML_KQ_MASK_PAD 64
1828
2156
 
1829
- // q: [n_embd_k, n_batch, n_head, 1]
1830
- // k: [n_embd_k, n_kv, n_head_kv, 1]
1831
- // v: [n_embd_v, n_kv, n_head_kv, 1] !! not transposed !!
1832
- // mask: [n_kv, n_batch_pad, 1, 1] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !!
1833
- // res: [n_embd_v, n_head, n_batch, 1] !! permuted !!
2157
+ // q: [n_embd_k, n_batch, n_head, ne3 ]
2158
+ // k: [n_embd_k, n_kv, n_head_kv, ne3 ]
2159
+ // v: [n_embd_v, n_kv, n_head_kv, ne3 ] !! not transposed !!
2160
+ // mask: [n_kv, n_batch_pad, ne32, ne33] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !!
2161
+ // res: [n_embd_v, n_head, n_batch, ne3 ] !! permuted !!
2162
+ //
2163
+ // broadcast:
2164
+ // n_head % n_head_kv == 0
2165
+ // n_head % ne32 == 0
2166
+ // ne3 % ne33 == 0
2167
+ //
1834
2168
  GGML_API struct ggml_tensor * ggml_flash_attn_ext(
1835
2169
  struct ggml_context * ctx,
1836
2170
  struct ggml_tensor * q,
@@ -1848,6 +2182,10 @@ extern "C" {
1848
2182
  GGML_API enum ggml_prec ggml_flash_attn_ext_get_prec(
1849
2183
  const struct ggml_tensor * a);
1850
2184
 
2185
+ GGML_API void ggml_flash_attn_ext_add_sinks(
2186
+ struct ggml_tensor * a,
2187
+ struct ggml_tensor * sinks);
2188
+
1851
2189
  // TODO: needs to be adapted to ggml_flash_attn_ext
1852
2190
  GGML_API struct ggml_tensor * ggml_flash_attn_back(
1853
2191
  struct ggml_context * ctx,
@@ -1869,7 +2207,8 @@ extern "C" {
1869
2207
  struct ggml_tensor * dt,
1870
2208
  struct ggml_tensor * A,
1871
2209
  struct ggml_tensor * B,
1872
- struct ggml_tensor * C);
2210
+ struct ggml_tensor * C,
2211
+ struct ggml_tensor * ids);
1873
2212
 
1874
2213
  // partition into non-overlapping windows with padding if needed
1875
2214
  // example:
@@ -2052,7 +2391,14 @@ extern "C" {
2052
2391
  struct ggml_tensor * grad,
2053
2392
  struct ggml_tensor * m,
2054
2393
  struct ggml_tensor * v,
2055
- struct ggml_tensor * adamw_params); // parameters such a the learning rate
2394
+ struct ggml_tensor * adamw_params); // parameters such as the learning rate
2395
+
2396
+ // stochastic gradient descent step (with weight decay)
2397
+ GGML_API struct ggml_tensor * ggml_opt_step_sgd(
2398
+ struct ggml_context * ctx,
2399
+ struct ggml_tensor * a,
2400
+ struct ggml_tensor * grad,
2401
+ struct ggml_tensor * sgd_params); // alpha, weight decay
2056
2402
 
2057
2403
  //
2058
2404
  // automatic differentiation
@@ -2086,9 +2432,6 @@ extern "C" {
2086
2432
  GGML_API struct ggml_tensor * ggml_graph_get_grad (const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
2087
2433
  GGML_API struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
2088
2434
 
2089
- GGML_API void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname);
2090
- GGML_API struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval);
2091
-
2092
2435
  // print info and performance information for the graph
2093
2436
  GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph);
2094
2437
 
@@ -2172,6 +2515,7 @@ extern "C" {
2172
2515
 
2173
2516
  // scheduling priorities
2174
2517
  enum ggml_sched_priority {
2518
+ GGML_SCHED_PRIO_LOW = -1,
2175
2519
  GGML_SCHED_PRIO_NORMAL,
2176
2520
  GGML_SCHED_PRIO_MEDIUM,
2177
2521
  GGML_SCHED_PRIO_HIGH,