whispercpp 1.3.1 → 1.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (857) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +7 -3
  3. data/README.md +161 -43
  4. data/Rakefile +45 -13
  5. data/ext/.gitignore +4 -8
  6. data/ext/dependencies.rb +73 -0
  7. data/ext/extconf.rb +21 -198
  8. data/ext/options.rb +85 -0
  9. data/ext/ruby_whisper.c +177 -0
  10. data/ext/ruby_whisper.h +17 -2
  11. data/ext/ruby_whisper_context.c +672 -0
  12. data/ext/ruby_whisper_error.c +52 -0
  13. data/ext/ruby_whisper_model.c +232 -0
  14. data/ext/ruby_whisper_params.c +1303 -0
  15. data/ext/ruby_whisper_segment.c +220 -0
  16. data/ext/ruby_whisper_transcribe.cpp +93 -0
  17. data/ext/ruby_whisper_vad_params.c +288 -0
  18. data/ext/sources/CMakeGraphVizOptions.cmake +8 -0
  19. data/ext/sources/CMakeLists.txt +255 -0
  20. data/ext/sources/bindings/javascript/CMakeLists.txt +41 -0
  21. data/ext/sources/bindings/javascript/emscripten.cpp +93 -0
  22. data/ext/sources/bindings/javascript/libwhisper.worker.js +1 -0
  23. data/ext/sources/bindings/javascript/package-tmpl.json +26 -0
  24. data/ext/sources/bindings/javascript/package.json +26 -0
  25. data/ext/sources/bindings/javascript/whisper.js +19 -0
  26. data/ext/sources/build-xcframework.sh +547 -0
  27. data/ext/sources/cmake/DefaultTargetOptions.cmake +16 -0
  28. data/ext/sources/cmake/FindFFmpeg.cmake +163 -0
  29. data/ext/sources/cmake/build-info.cmake +60 -0
  30. data/ext/sources/cmake/git-vars.cmake +22 -0
  31. data/ext/sources/cmake/whisper-config.cmake.in +65 -0
  32. data/ext/sources/cmake/whisper.pc.in +10 -0
  33. data/ext/sources/examples/CMakeLists.txt +124 -0
  34. data/ext/sources/examples/addon.node/CMakeLists.txt +31 -0
  35. data/ext/sources/examples/addon.node/__test__/whisper.spec.js +133 -0
  36. data/ext/sources/examples/addon.node/addon.cpp +557 -0
  37. data/ext/sources/examples/addon.node/index.js +57 -0
  38. data/ext/sources/examples/addon.node/package.json +16 -0
  39. data/ext/sources/examples/addon.node/vad-example.js +132 -0
  40. data/ext/sources/examples/bench/CMakeLists.txt +8 -0
  41. data/ext/sources/examples/bench/bench.cpp +176 -0
  42. data/ext/sources/examples/bench.wasm/CMakeLists.txt +49 -0
  43. data/ext/sources/examples/bench.wasm/emscripten.cpp +87 -0
  44. data/ext/sources/examples/bench.wasm/index-tmpl.html +284 -0
  45. data/ext/sources/examples/cli/CMakeLists.txt +8 -0
  46. data/ext/sources/examples/cli/cli.cpp +1295 -0
  47. data/ext/sources/examples/coi-serviceworker.js +146 -0
  48. data/ext/sources/examples/command/CMakeLists.txt +10 -0
  49. data/ext/sources/examples/command/command.cpp +800 -0
  50. data/ext/sources/examples/command/commands.txt +9 -0
  51. data/ext/sources/examples/command.wasm/CMakeLists.txt +50 -0
  52. data/ext/sources/examples/command.wasm/emscripten.cpp +327 -0
  53. data/ext/sources/examples/command.wasm/index-tmpl.html +414 -0
  54. data/ext/sources/examples/common-ggml.cpp +238 -0
  55. data/ext/sources/examples/common-ggml.h +18 -0
  56. data/ext/sources/examples/common-sdl.cpp +227 -0
  57. data/ext/sources/examples/common-sdl.h +49 -0
  58. data/ext/sources/examples/common-whisper.cpp +175 -0
  59. data/ext/sources/examples/common-whisper.h +24 -0
  60. data/ext/sources/examples/common.cpp +675 -0
  61. data/ext/sources/examples/common.h +322 -0
  62. data/ext/sources/examples/deprecation-warning/CMakeLists.txt +6 -0
  63. data/ext/sources/examples/deprecation-warning/deprecation-warning.cpp +38 -0
  64. data/ext/sources/examples/ffmpeg-transcode.cpp +368 -0
  65. data/ext/sources/examples/generate-karaoke.sh +57 -0
  66. data/ext/sources/examples/grammar-parser.cpp +423 -0
  67. data/ext/sources/examples/grammar-parser.h +29 -0
  68. data/ext/sources/examples/helpers.js +191 -0
  69. data/ext/sources/examples/json.hpp +24596 -0
  70. data/ext/sources/examples/livestream.sh +112 -0
  71. data/ext/sources/examples/lsp/CMakeLists.txt +9 -0
  72. data/ext/sources/examples/lsp/lsp.cpp +469 -0
  73. data/ext/sources/examples/lsp/whisper.vim +362 -0
  74. data/ext/sources/examples/miniaudio.h +93468 -0
  75. data/ext/sources/examples/python/test_whisper_processor.py +7 -0
  76. data/ext/sources/examples/python/whisper_processor.py +54 -0
  77. data/ext/sources/examples/quantize/CMakeLists.txt +6 -0
  78. data/ext/sources/examples/quantize/quantize.cpp +226 -0
  79. data/ext/sources/examples/server/CMakeLists.txt +15 -0
  80. data/ext/sources/examples/server/bench.js +29 -0
  81. data/ext/sources/examples/server/httplib.h +10497 -0
  82. data/ext/sources/examples/server/server.cpp +1238 -0
  83. data/ext/sources/examples/server.py +115 -0
  84. data/ext/sources/examples/stb_vorbis.c +5584 -0
  85. data/ext/sources/examples/stream/CMakeLists.txt +10 -0
  86. data/ext/sources/examples/stream/stream.cpp +435 -0
  87. data/ext/sources/examples/stream.wasm/CMakeLists.txt +49 -0
  88. data/ext/sources/examples/stream.wasm/emscripten.cpp +216 -0
  89. data/ext/sources/examples/stream.wasm/index-tmpl.html +414 -0
  90. data/ext/sources/examples/sycl/CMakeLists.txt +9 -0
  91. data/ext/sources/examples/sycl/build.sh +22 -0
  92. data/ext/sources/examples/sycl/ls-sycl-device.cpp +11 -0
  93. data/ext/sources/examples/sycl/run-whisper.sh +17 -0
  94. data/ext/sources/examples/talk-llama/CMakeLists.txt +43 -0
  95. data/ext/sources/examples/talk-llama/eleven-labs.py +80 -0
  96. data/ext/sources/examples/talk-llama/llama-adapter.cpp +388 -0
  97. data/ext/sources/examples/talk-llama/llama-adapter.h +76 -0
  98. data/ext/sources/examples/talk-llama/llama-arch.cpp +1914 -0
  99. data/ext/sources/examples/talk-llama/llama-arch.h +464 -0
  100. data/ext/sources/examples/talk-llama/llama-batch.cpp +843 -0
  101. data/ext/sources/examples/talk-llama/llama-batch.h +147 -0
  102. data/ext/sources/examples/talk-llama/llama-chat.cpp +685 -0
  103. data/ext/sources/examples/talk-llama/llama-chat.h +59 -0
  104. data/ext/sources/examples/talk-llama/llama-context.cpp +2845 -0
  105. data/ext/sources/examples/talk-llama/llama-context.h +297 -0
  106. data/ext/sources/examples/talk-llama/llama-cparams.cpp +5 -0
  107. data/ext/sources/examples/talk-llama/llama-cparams.h +41 -0
  108. data/ext/sources/examples/talk-llama/llama-grammar.cpp +1229 -0
  109. data/ext/sources/examples/talk-llama/llama-grammar.h +173 -0
  110. data/ext/sources/examples/talk-llama/llama-graph.cpp +1693 -0
  111. data/ext/sources/examples/talk-llama/llama-graph.h +710 -0
  112. data/ext/sources/examples/talk-llama/llama-hparams.cpp +103 -0
  113. data/ext/sources/examples/talk-llama/llama-hparams.h +207 -0
  114. data/ext/sources/examples/talk-llama/llama-impl.cpp +167 -0
  115. data/ext/sources/examples/talk-llama/llama-impl.h +61 -0
  116. data/ext/sources/examples/talk-llama/llama-io.cpp +15 -0
  117. data/ext/sources/examples/talk-llama/llama-io.h +35 -0
  118. data/ext/sources/examples/talk-llama/llama-kv-cache-unified-iswa.cpp +279 -0
  119. data/ext/sources/examples/talk-llama/llama-kv-cache-unified-iswa.h +128 -0
  120. data/ext/sources/examples/talk-llama/llama-kv-cache-unified.cpp +1841 -0
  121. data/ext/sources/examples/talk-llama/llama-kv-cache-unified.h +303 -0
  122. data/ext/sources/examples/talk-llama/llama-kv-cache.h +44 -0
  123. data/ext/sources/examples/talk-llama/llama-kv-cells.h +439 -0
  124. data/ext/sources/examples/talk-llama/llama-memory-hybrid.cpp +246 -0
  125. data/ext/sources/examples/talk-llama/llama-memory-hybrid.h +138 -0
  126. data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +1125 -0
  127. data/ext/sources/examples/talk-llama/llama-memory-recurrent.h +183 -0
  128. data/ext/sources/examples/talk-llama/llama-memory.cpp +59 -0
  129. data/ext/sources/examples/talk-llama/llama-memory.h +116 -0
  130. data/ext/sources/examples/talk-llama/llama-mmap.cpp +600 -0
  131. data/ext/sources/examples/talk-llama/llama-mmap.h +68 -0
  132. data/ext/sources/examples/talk-llama/llama-model-loader.cpp +1163 -0
  133. data/ext/sources/examples/talk-llama/llama-model-loader.h +169 -0
  134. data/ext/sources/examples/talk-llama/llama-model-saver.cpp +282 -0
  135. data/ext/sources/examples/talk-llama/llama-model-saver.h +37 -0
  136. data/ext/sources/examples/talk-llama/llama-model.cpp +15114 -0
  137. data/ext/sources/examples/talk-llama/llama-model.h +452 -0
  138. data/ext/sources/examples/talk-llama/llama-quant.cpp +1049 -0
  139. data/ext/sources/examples/talk-llama/llama-quant.h +1 -0
  140. data/ext/sources/examples/talk-llama/llama-sampling.cpp +2575 -0
  141. data/ext/sources/examples/talk-llama/llama-sampling.h +32 -0
  142. data/ext/sources/examples/talk-llama/llama-vocab.cpp +3377 -0
  143. data/ext/sources/examples/talk-llama/llama-vocab.h +132 -0
  144. data/ext/sources/examples/talk-llama/llama.cpp +358 -0
  145. data/ext/sources/examples/talk-llama/llama.h +1484 -0
  146. data/ext/sources/examples/talk-llama/prompts/talk-alpaca.txt +23 -0
  147. data/ext/sources/examples/talk-llama/speak +40 -0
  148. data/ext/sources/examples/talk-llama/speak.bat +1 -0
  149. data/ext/sources/examples/talk-llama/speak.ps1 +14 -0
  150. data/ext/sources/examples/talk-llama/talk-llama.cpp +810 -0
  151. data/ext/sources/examples/talk-llama/unicode-data.cpp +7034 -0
  152. data/ext/sources/examples/talk-llama/unicode-data.h +20 -0
  153. data/ext/sources/examples/talk-llama/unicode.cpp +854 -0
  154. data/ext/sources/examples/talk-llama/unicode.h +66 -0
  155. data/ext/sources/examples/vad-speech-segments/CMakeLists.txt +8 -0
  156. data/ext/sources/examples/vad-speech-segments/speech.cpp +149 -0
  157. data/ext/sources/examples/wchess/CMakeLists.txt +10 -0
  158. data/ext/sources/examples/wchess/libwchess/CMakeLists.txt +19 -0
  159. data/ext/sources/examples/wchess/libwchess/Chessboard.cpp +803 -0
  160. data/ext/sources/examples/wchess/libwchess/Chessboard.h +33 -0
  161. data/ext/sources/examples/wchess/libwchess/WChess.cpp +193 -0
  162. data/ext/sources/examples/wchess/libwchess/WChess.h +63 -0
  163. data/ext/sources/examples/wchess/libwchess/test-chessboard.cpp +117 -0
  164. data/ext/sources/examples/wchess/wchess.cmd/CMakeLists.txt +8 -0
  165. data/ext/sources/examples/wchess/wchess.cmd/wchess.cmd.cpp +251 -0
  166. data/ext/sources/examples/whisper.wasm/CMakeLists.txt +50 -0
  167. data/ext/sources/examples/whisper.wasm/emscripten.cpp +118 -0
  168. data/ext/sources/examples/whisper.wasm/index-tmpl.html +658 -0
  169. data/ext/sources/ggml/CMakeLists.txt +435 -0
  170. data/ext/sources/ggml/cmake/BuildTypes.cmake +54 -0
  171. data/ext/sources/ggml/cmake/GitVars.cmake +22 -0
  172. data/ext/sources/ggml/cmake/common.cmake +50 -0
  173. data/ext/sources/ggml/cmake/ggml-config.cmake.in +152 -0
  174. data/ext/{ggml → sources/ggml}/include/ggml-alloc.h +1 -1
  175. data/ext/{ggml → sources/ggml}/include/ggml-backend.h +10 -8
  176. data/ext/{ggml → sources/ggml}/include/ggml-cpp.h +2 -1
  177. data/ext/{ggml → sources/ggml}/include/ggml-cpu.h +11 -1
  178. data/ext/{ggml → sources/ggml}/include/ggml-metal.h +1 -1
  179. data/ext/{ggml → sources/ggml}/include/ggml-opt.h +49 -28
  180. data/ext/{ggml → sources/ggml}/include/ggml-rpc.h +6 -1
  181. data/ext/{ggml → sources/ggml}/include/ggml-vulkan.h +0 -2
  182. data/ext/{ggml → sources/ggml}/include/ggml.h +325 -269
  183. data/ext/sources/ggml/include/gguf.h +202 -0
  184. data/ext/sources/ggml/src/CMakeLists.txt +404 -0
  185. data/ext/{ggml → sources/ggml}/src/ggml-alloc.c +34 -29
  186. data/ext/sources/ggml/src/ggml-amx/CMakeLists.txt +107 -0
  187. data/ext/{ggml → sources/ggml}/src/ggml-backend-impl.h +1 -2
  188. data/ext/{ggml → sources/ggml}/src/ggml-backend-reg.cpp +92 -53
  189. data/ext/{ggml → sources/ggml}/src/ggml-backend.cpp +69 -34
  190. data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +87 -0
  191. data/ext/sources/ggml/src/ggml-cann/CMakeLists.txt +75 -0
  192. data/ext/sources/ggml/src/ggml-cann/Doxyfile +2579 -0
  193. data/ext/{ggml → sources/ggml}/src/ggml-cann/acl_tensor.cpp +10 -4
  194. data/ext/{ggml → sources/ggml}/src/ggml-cann/acl_tensor.h +5 -5
  195. data/ext/{ggml → sources/ggml}/src/ggml-cann/aclnn_ops.cpp +1272 -1506
  196. data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +1125 -0
  197. data/ext/{ggml → sources/ggml}/src/ggml-cann/common.h +140 -1
  198. data/ext/{ggml → sources/ggml}/src/ggml-cann/ggml-cann.cpp +588 -146
  199. data/ext/sources/ggml/src/ggml-cann/kernels/CMakeLists.txt +30 -0
  200. data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/dup.cpp +3 -5
  201. data/ext/{ggml → sources/ggml}/src/ggml-common.h +16 -8
  202. data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +597 -0
  203. data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/amx.cpp +3 -2
  204. data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/mmq.cpp +11 -10
  205. data/ext/sources/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
  206. data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +4114 -0
  207. data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +2163 -0
  208. data/ext/sources/ggml/src/ggml-cpu/arch/loongarch/quants.c +2639 -0
  209. data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
  210. data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/quants.c +2732 -0
  211. data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +2069 -0
  212. data/ext/sources/ggml/src/ggml-cpu/arch/riscv/repack.cpp +397 -0
  213. data/ext/sources/ggml/src/ggml-cpu/arch/s390/quants.c +1300 -0
  214. data/ext/sources/ggml/src/ggml-cpu/arch/wasm/quants.c +1481 -0
  215. data/ext/{ggml/src/ggml-cpu/cpu-feats-x86.cpp → sources/ggml/src/ggml-cpu/arch/x86/cpu-feats.cpp} +5 -1
  216. data/ext/sources/ggml/src/ggml-cpu/arch/x86/quants.c +4311 -0
  217. data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +3285 -0
  218. data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +184 -0
  219. data/ext/sources/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
  220. data/ext/sources/ggml/src/ggml-cpu/binary-ops.h +16 -0
  221. data/ext/sources/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
  222. data/ext/sources/ggml/src/ggml-cpu/common.h +73 -0
  223. data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu-impl.h +172 -41
  224. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +3551 -0
  225. data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu.cpp +78 -25
  226. data/ext/{ggml/src/ggml-cpu/ggml-cpu-hbm.cpp → sources/ggml/src/ggml-cpu/hbm.cpp} +1 -1
  227. data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +337 -0
  228. data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.h +95 -0
  229. data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +482 -0
  230. data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
  231. data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +3594 -0
  232. data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.h +19 -0
  233. data/ext/sources/ggml/src/ggml-cpu/ops.cpp +9786 -0
  234. data/ext/sources/ggml/src/ggml-cpu/ops.h +118 -0
  235. data/ext/sources/ggml/src/ggml-cpu/quants.c +1158 -0
  236. data/ext/{ggml/src/ggml-cpu/ggml-cpu-quants.h → sources/ggml/src/ggml-cpu/quants.h} +26 -0
  237. data/ext/sources/ggml/src/ggml-cpu/repack.cpp +1571 -0
  238. data/ext/sources/ggml/src/ggml-cpu/repack.h +98 -0
  239. data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +1184 -0
  240. data/ext/{ggml/src/ggml-cpu/ggml-cpu-traits.cpp → sources/ggml/src/ggml-cpu/traits.cpp} +1 -1
  241. data/ext/sources/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
  242. data/ext/sources/ggml/src/ggml-cpu/unary-ops.h +28 -0
  243. data/ext/sources/ggml/src/ggml-cpu/vec.cpp +345 -0
  244. data/ext/sources/ggml/src/ggml-cpu/vec.h +1027 -0
  245. data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +184 -0
  246. data/ext/sources/ggml/src/ggml-cuda/acc.cu +61 -0
  247. data/ext/sources/ggml/src/ggml-cuda/acc.cuh +5 -0
  248. data/ext/sources/ggml/src/ggml-cuda/arange.cu +34 -0
  249. data/ext/sources/ggml/src/ggml-cuda/arange.cuh +5 -0
  250. data/ext/sources/ggml/src/ggml-cuda/argmax.cu +91 -0
  251. data/ext/sources/ggml/src/ggml-cuda/argmax.cuh +3 -0
  252. data/ext/sources/ggml/src/ggml-cuda/argsort.cu +104 -0
  253. data/ext/sources/ggml/src/ggml-cuda/argsort.cuh +3 -0
  254. data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +363 -0
  255. data/ext/sources/ggml/src/ggml-cuda/binbcast.cuh +9 -0
  256. data/ext/sources/ggml/src/ggml-cuda/clamp.cu +45 -0
  257. data/ext/sources/ggml/src/ggml-cuda/clamp.cuh +5 -0
  258. data/ext/sources/ggml/src/ggml-cuda/common.cuh +851 -0
  259. data/ext/sources/ggml/src/ggml-cuda/concat.cu +221 -0
  260. data/ext/sources/ggml/src/ggml-cuda/concat.cuh +5 -0
  261. data/ext/sources/ggml/src/ggml-cuda/conv-transpose-1d.cu +89 -0
  262. data/ext/sources/ggml/src/ggml-cuda/conv-transpose-1d.cuh +5 -0
  263. data/ext/sources/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
  264. data/ext/sources/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
  265. data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
  266. data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
  267. data/ext/sources/ggml/src/ggml-cuda/convert.cu +752 -0
  268. data/ext/sources/ggml/src/ggml-cuda/convert.cuh +31 -0
  269. data/ext/sources/ggml/src/ggml-cuda/count-equal.cu +64 -0
  270. data/ext/sources/ggml/src/ggml-cuda/count-equal.cuh +5 -0
  271. data/ext/sources/ggml/src/ggml-cuda/cp-async.cuh +57 -0
  272. data/ext/sources/ggml/src/ggml-cuda/cpy.cu +705 -0
  273. data/ext/sources/ggml/src/ggml-cuda/cpy.cuh +11 -0
  274. data/ext/sources/ggml/src/ggml-cuda/cross-entropy-loss.cu +189 -0
  275. data/ext/sources/ggml/src/ggml-cuda/cross-entropy-loss.cuh +7 -0
  276. data/ext/sources/ggml/src/ggml-cuda/dequantize.cuh +103 -0
  277. data/ext/sources/ggml/src/ggml-cuda/diagmask.cu +40 -0
  278. data/ext/sources/ggml/src/ggml-cuda/diagmask.cuh +5 -0
  279. data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +881 -0
  280. data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +1474 -0
  281. data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cu +357 -0
  282. data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cuh +3 -0
  283. data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cu +365 -0
  284. data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cuh +3 -0
  285. data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f16.cuh +482 -0
  286. data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f32.cuh +472 -0
  287. data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +638 -0
  288. data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +3 -0
  289. data/ext/sources/ggml/src/ggml-cuda/fattn.cu +346 -0
  290. data/ext/sources/ggml/src/ggml-cuda/fattn.cuh +3 -0
  291. data/ext/sources/ggml/src/ggml-cuda/getrows.cu +275 -0
  292. data/ext/sources/ggml/src/ggml-cuda/getrows.cuh +15 -0
  293. data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +3647 -0
  294. data/ext/sources/ggml/src/ggml-cuda/gla.cu +93 -0
  295. data/ext/sources/ggml/src/ggml-cuda/gla.cuh +3 -0
  296. data/ext/sources/ggml/src/ggml-cuda/im2col.cu +103 -0
  297. data/ext/sources/ggml/src/ggml-cuda/im2col.cuh +5 -0
  298. data/ext/sources/ggml/src/ggml-cuda/mean.cu +19 -0
  299. data/ext/sources/ggml/src/ggml-cuda/mean.cuh +3 -0
  300. data/ext/sources/ggml/src/ggml-cuda/mma.cuh +396 -0
  301. data/ext/sources/ggml/src/ggml-cuda/mmq.cu +324 -0
  302. data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +3217 -0
  303. data/ext/sources/ggml/src/ggml-cuda/mmv.cu +506 -0
  304. data/ext/sources/ggml/src/ggml-cuda/mmv.cuh +11 -0
  305. data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +595 -0
  306. data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +12 -0
  307. data/ext/sources/ggml/src/ggml-cuda/norm.cu +458 -0
  308. data/ext/sources/ggml/src/ggml-cuda/norm.cuh +11 -0
  309. data/ext/sources/ggml/src/ggml-cuda/opt-step-adamw.cu +78 -0
  310. data/ext/sources/ggml/src/ggml-cuda/opt-step-adamw.cuh +5 -0
  311. data/ext/sources/ggml/src/ggml-cuda/out-prod.cu +68 -0
  312. data/ext/sources/ggml/src/ggml-cuda/out-prod.cuh +3 -0
  313. data/ext/sources/ggml/src/ggml-cuda/pad.cu +49 -0
  314. data/ext/sources/ggml/src/ggml-cuda/pad.cuh +5 -0
  315. data/ext/sources/ggml/src/ggml-cuda/pool2d.cu +94 -0
  316. data/ext/sources/ggml/src/ggml-cuda/pool2d.cuh +5 -0
  317. data/ext/sources/ggml/src/ggml-cuda/quantize.cu +190 -0
  318. data/ext/sources/ggml/src/ggml-cuda/quantize.cuh +27 -0
  319. data/ext/sources/ggml/src/ggml-cuda/rope.cu +456 -0
  320. data/ext/sources/ggml/src/ggml-cuda/rope.cuh +7 -0
  321. data/ext/sources/ggml/src/ggml-cuda/scale.cu +31 -0
  322. data/ext/sources/ggml/src/ggml-cuda/scale.cuh +5 -0
  323. data/ext/sources/ggml/src/ggml-cuda/softmax.cu +283 -0
  324. data/ext/sources/ggml/src/ggml-cuda/softmax.cuh +7 -0
  325. data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +148 -0
  326. data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cuh +3 -0
  327. data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +155 -0
  328. data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cuh +3 -0
  329. data/ext/sources/ggml/src/ggml-cuda/sum.cu +45 -0
  330. data/ext/sources/ggml/src/ggml-cuda/sum.cuh +5 -0
  331. data/ext/sources/ggml/src/ggml-cuda/sumrows.cu +26 -0
  332. data/ext/sources/ggml/src/ggml-cuda/sumrows.cuh +4 -0
  333. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_16.cu +5 -0
  334. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu +10 -0
  335. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu +10 -0
  336. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu +10 -0
  337. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +10 -0
  338. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_16.cu +5 -0
  339. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +10 -0
  340. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu +10 -0
  341. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu +10 -0
  342. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu +10 -0
  343. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_16.cu +5 -0
  344. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu +10 -0
  345. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +10 -0
  346. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu +10 -0
  347. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu +10 -0
  348. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu +10 -0
  349. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu +10 -0
  350. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +10 -0
  351. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu +10 -0
  352. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +5 -0
  353. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +5 -0
  354. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +5 -0
  355. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +5 -0
  356. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +5 -0
  357. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +5 -0
  358. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +5 -0
  359. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +5 -0
  360. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +5 -0
  361. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +5 -0
  362. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +5 -0
  363. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +5 -0
  364. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +5 -0
  365. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +5 -0
  366. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +5 -0
  367. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +5 -0
  368. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +5 -0
  369. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +5 -0
  370. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +5 -0
  371. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +5 -0
  372. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +5 -0
  373. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +5 -0
  374. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +5 -0
  375. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +5 -0
  376. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +5 -0
  377. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +5 -0
  378. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +5 -0
  379. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +5 -0
  380. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +5 -0
  381. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +5 -0
  382. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +5 -0
  383. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +5 -0
  384. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +5 -0
  385. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +5 -0
  386. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +5 -0
  387. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +5 -0
  388. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +5 -0
  389. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +5 -0
  390. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +5 -0
  391. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +5 -0
  392. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +5 -0
  393. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +5 -0
  394. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +5 -0
  395. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +5 -0
  396. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +5 -0
  397. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +5 -0
  398. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +5 -0
  399. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +5 -0
  400. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +5 -0
  401. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +5 -0
  402. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +5 -0
  403. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +5 -0
  404. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +5 -0
  405. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +5 -0
  406. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +5 -0
  407. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +5 -0
  408. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +5 -0
  409. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +5 -0
  410. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +5 -0
  411. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +5 -0
  412. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +5 -0
  413. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +5 -0
  414. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +5 -0
  415. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +5 -0
  416. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +5 -0
  417. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +5 -0
  418. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +5 -0
  419. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +5 -0
  420. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +5 -0
  421. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +5 -0
  422. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +5 -0
  423. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +5 -0
  424. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +5 -0
  425. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +5 -0
  426. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +5 -0
  427. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +5 -0
  428. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +5 -0
  429. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +5 -0
  430. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +5 -0
  431. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +5 -0
  432. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +5 -0
  433. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +5 -0
  434. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +5 -0
  435. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +5 -0
  436. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +5 -0
  437. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +5 -0
  438. data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +78 -0
  439. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu +5 -0
  440. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu +5 -0
  441. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu +5 -0
  442. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu +5 -0
  443. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu +5 -0
  444. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu +5 -0
  445. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu +5 -0
  446. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu +5 -0
  447. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu +5 -0
  448. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu +5 -0
  449. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu +5 -0
  450. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu +5 -0
  451. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu +5 -0
  452. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu +5 -0
  453. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu +5 -0
  454. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu +5 -0
  455. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu +5 -0
  456. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu +5 -0
  457. data/ext/sources/ggml/src/ggml-cuda/tsembd.cu +47 -0
  458. data/ext/sources/ggml/src/ggml-cuda/tsembd.cuh +5 -0
  459. data/ext/sources/ggml/src/ggml-cuda/unary.cu +378 -0
  460. data/ext/sources/ggml/src/ggml-cuda/unary.cuh +66 -0
  461. data/ext/sources/ggml/src/ggml-cuda/upscale.cu +51 -0
  462. data/ext/sources/ggml/src/ggml-cuda/upscale.cuh +5 -0
  463. data/ext/sources/ggml/src/ggml-cuda/vecdotq.cuh +1135 -0
  464. data/ext/{ggml → sources/ggml}/src/ggml-cuda/vendors/cuda.h +1 -0
  465. data/ext/{ggml → sources/ggml}/src/ggml-cuda/vendors/hip.h +57 -0
  466. data/ext/{ggml → sources/ggml}/src/ggml-cuda/vendors/musa.h +7 -1
  467. data/ext/sources/ggml/src/ggml-cuda/wkv.cu +199 -0
  468. data/ext/sources/ggml/src/ggml-cuda/wkv.cuh +7 -0
  469. data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +135 -0
  470. data/ext/{ggml → sources/ggml}/src/ggml-impl.h +147 -158
  471. data/ext/sources/ggml/src/ggml-kompute/CMakeLists.txt +166 -0
  472. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/common.comp +112 -0
  473. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +58 -0
  474. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +25 -0
  475. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +52 -0
  476. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +52 -0
  477. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +52 -0
  478. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +52 -0
  479. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +30 -0
  480. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +22 -0
  481. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +17 -0
  482. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +31 -0
  483. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +31 -0
  484. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +38 -0
  485. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +39 -0
  486. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +44 -0
  487. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +52 -0
  488. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +69 -0
  489. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +51 -0
  490. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +33 -0
  491. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +35 -0
  492. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +140 -0
  493. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +106 -0
  494. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +73 -0
  495. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +52 -0
  496. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +28 -0
  497. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +84 -0
  498. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +21 -0
  499. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +53 -0
  500. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +52 -0
  501. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +52 -0
  502. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +52 -0
  503. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +52 -0
  504. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +19 -0
  505. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +23 -0
  506. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +22 -0
  507. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +72 -0
  508. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +71 -0
  509. data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +121 -0
  510. data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +649 -0
  511. data/ext/{ggml → sources/ggml}/src/ggml-metal/ggml-metal.m +2504 -1108
  512. data/ext/{ggml → sources/ggml}/src/ggml-metal/ggml-metal.metal +2102 -1463
  513. data/ext/sources/ggml/src/ggml-musa/CMakeLists.txt +113 -0
  514. data/ext/sources/ggml/src/ggml-musa/mudnn.cu +112 -0
  515. data/ext/sources/ggml/src/ggml-musa/mudnn.cuh +12 -0
  516. data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +110 -0
  517. data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +6494 -0
  518. data/ext/sources/ggml/src/ggml-opencl/kernels/add.cl +83 -0
  519. data/ext/sources/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
  520. data/ext/sources/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
  521. data/ext/sources/ggml/src/ggml-opencl/kernels/concat.cl +109 -0
  522. data/ext/sources/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
  523. data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +118 -0
  524. data/ext/sources/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
  525. data/ext/sources/ggml/src/ggml-opencl/kernels/div.cl +72 -0
  526. data/ext/sources/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
  527. data/ext/sources/ggml/src/ggml-opencl/kernels/gelu.cl +62 -0
  528. data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
  529. data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
  530. data/ext/sources/ggml/src/ggml-opencl/kernels/get_rows.cl +163 -0
  531. data/ext/sources/ggml/src/ggml-opencl/kernels/glu.cl +201 -0
  532. data/ext/sources/ggml/src/ggml-opencl/kernels/group_norm.cl +72 -0
  533. data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
  534. data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
  535. data/ext/sources/ggml/src/ggml-opencl/kernels/mul.cl +79 -0
  536. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
  537. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
  538. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
  539. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
  540. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
  541. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
  542. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
  543. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
  544. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
  545. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
  546. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
  547. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
  548. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q6_k.cl +190 -0
  549. data/ext/sources/ggml/src/ggml-opencl/kernels/norm.cl +81 -0
  550. data/ext/sources/ggml/src/ggml-opencl/kernels/pad.cl +30 -0
  551. data/ext/sources/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
  552. data/ext/sources/ggml/src/ggml-opencl/kernels/repeat.cl +39 -0
  553. data/ext/sources/ggml/src/ggml-opencl/kernels/rms_norm.cl +96 -0
  554. data/ext/sources/ggml/src/ggml-opencl/kernels/rope.cl +721 -0
  555. data/ext/sources/ggml/src/ggml-opencl/kernels/scale.cl +16 -0
  556. data/ext/sources/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
  557. data/ext/sources/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
  558. data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +87 -0
  559. data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +87 -0
  560. data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f16.cl +86 -0
  561. data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f32.cl +86 -0
  562. data/ext/sources/ggml/src/ggml-opencl/kernels/sub.cl +72 -0
  563. data/ext/sources/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
  564. data/ext/sources/ggml/src/ggml-opencl/kernels/tanh.cl +63 -0
  565. data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +84 -0
  566. data/ext/sources/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
  567. data/ext/sources/ggml/src/ggml-opencl/kernels/upscale.cl +121 -0
  568. data/ext/{ggml → sources/ggml}/src/ggml-opt.cpp +373 -190
  569. data/ext/{ggml → sources/ggml}/src/ggml-quants.c +120 -128
  570. data/ext/sources/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
  571. data/ext/{ggml → sources/ggml}/src/ggml-rpc/ggml-rpc.cpp +494 -84
  572. data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +189 -0
  573. data/ext/sources/ggml/src/ggml-sycl/backend.hpp +37 -0
  574. data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +344 -0
  575. data/ext/sources/ggml/src/ggml-sycl/binbcast.hpp +39 -0
  576. data/ext/{ggml → sources/ggml}/src/ggml-sycl/common.cpp +20 -32
  577. data/ext/sources/ggml/src/ggml-sycl/common.hpp +561 -0
  578. data/ext/{ggml → sources/ggml}/src/ggml-sycl/concat.cpp +56 -70
  579. data/ext/sources/ggml/src/ggml-sycl/concat.hpp +20 -0
  580. data/ext/{ggml → sources/ggml}/src/ggml-sycl/conv.cpp +8 -12
  581. data/ext/sources/ggml/src/ggml-sycl/conv.hpp +20 -0
  582. data/ext/sources/ggml/src/ggml-sycl/convert.cpp +575 -0
  583. data/ext/sources/ggml/src/ggml-sycl/convert.hpp +34 -0
  584. data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +839 -0
  585. data/ext/sources/ggml/src/ggml-sycl/cpy.hpp +11 -0
  586. data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +823 -0
  587. data/ext/{ggml → sources/ggml}/src/ggml-sycl/dmmv.cpp +188 -67
  588. data/ext/sources/ggml/src/ggml-sycl/dmmv.hpp +27 -0
  589. data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +2987 -0
  590. data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +1120 -0
  591. data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +84 -0
  592. data/ext/sources/ggml/src/ggml-sycl/gemm.hpp +102 -0
  593. data/ext/sources/ggml/src/ggml-sycl/getrows.cpp +212 -0
  594. data/ext/sources/ggml/src/ggml-sycl/getrows.hpp +20 -0
  595. data/ext/{ggml → sources/ggml}/src/ggml-sycl/ggml-sycl.cpp +1197 -1295
  596. data/ext/sources/ggml/src/ggml-sycl/gla.cpp +106 -0
  597. data/ext/sources/ggml/src/ggml-sycl/gla.hpp +8 -0
  598. data/ext/sources/ggml/src/ggml-sycl/im2col.cpp +136 -0
  599. data/ext/sources/ggml/src/ggml-sycl/im2col.hpp +21 -0
  600. data/ext/{ggml → sources/ggml}/src/ggml-sycl/mmq.cpp +60 -81
  601. data/ext/sources/ggml/src/ggml-sycl/mmq.hpp +33 -0
  602. data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +1065 -0
  603. data/ext/sources/ggml/src/ggml-sycl/mmvq.hpp +27 -0
  604. data/ext/sources/ggml/src/ggml-sycl/norm.cpp +482 -0
  605. data/ext/sources/ggml/src/ggml-sycl/norm.hpp +26 -0
  606. data/ext/{ggml → sources/ggml}/src/ggml-sycl/outprod.cpp +8 -17
  607. data/ext/sources/ggml/src/ggml-sycl/outprod.hpp +10 -0
  608. data/ext/sources/ggml/src/ggml-sycl/presets.hpp +74 -0
  609. data/ext/sources/ggml/src/ggml-sycl/quants.hpp +111 -0
  610. data/ext/sources/ggml/src/ggml-sycl/rope.cpp +472 -0
  611. data/ext/sources/ggml/src/ggml-sycl/rope.hpp +20 -0
  612. data/ext/{ggml → sources/ggml}/src/ggml-sycl/softmax.cpp +38 -28
  613. data/ext/sources/ggml/src/ggml-sycl/softmax.hpp +20 -0
  614. data/ext/sources/ggml/src/ggml-sycl/sycl_hw.cpp +15 -0
  615. data/ext/sources/ggml/src/ggml-sycl/sycl_hw.hpp +26 -0
  616. data/ext/{ggml → sources/ggml}/src/ggml-sycl/tsembd.cpp +6 -11
  617. data/ext/sources/ggml/src/ggml-sycl/tsembd.hpp +20 -0
  618. data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +1307 -0
  619. data/ext/sources/ggml/src/ggml-sycl/wkv.cpp +289 -0
  620. data/ext/sources/ggml/src/ggml-sycl/wkv.hpp +10 -0
  621. data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +200 -0
  622. data/ext/sources/ggml/src/ggml-vulkan/cmake/host-toolchain.cmake.in +15 -0
  623. data/ext/{ggml → sources/ggml}/src/ggml-vulkan/ggml-vulkan.cpp +3822 -1335
  624. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +31 -0
  625. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +29 -0
  626. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +29 -0
  627. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +51 -0
  628. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +69 -0
  629. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +17 -0
  630. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +41 -0
  631. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +49 -0
  632. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +105 -0
  633. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
  634. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +23 -0
  635. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +51 -0
  636. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +242 -0
  637. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +17 -0
  638. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +31 -0
  639. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +20 -0
  640. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp +462 -0
  641. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp +699 -0
  642. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_head.comp +13 -0
  643. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +42 -0
  644. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +35 -0
  645. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +44 -0
  646. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +43 -0
  647. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +48 -0
  648. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +39 -0
  649. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +49 -0
  650. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +32 -0
  651. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +34 -0
  652. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +34 -0
  653. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +42 -0
  654. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +30 -0
  655. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +32 -0
  656. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +68 -0
  657. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +34 -0
  658. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +35 -0
  659. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +70 -0
  660. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +33 -0
  661. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +31 -0
  662. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +34 -0
  663. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +27 -0
  664. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +337 -0
  665. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +162 -0
  666. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +360 -0
  667. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +267 -0
  668. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +59 -0
  669. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
  670. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +25 -0
  671. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +23 -0
  672. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +64 -0
  673. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_head.comp +9 -0
  674. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_unary_head.comp +76 -0
  675. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +33 -0
  676. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +41 -0
  677. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +15 -0
  678. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
  679. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +66 -0
  680. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +100 -0
  681. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +41 -0
  682. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +22 -0
  683. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +27 -0
  684. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_split_k_reduce.comp +48 -0
  685. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +169 -0
  686. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp +118 -0
  687. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +82 -0
  688. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +79 -0
  689. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +90 -0
  690. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +87 -0
  691. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +87 -0
  692. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +90 -0
  693. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +88 -0
  694. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +118 -0
  695. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +154 -0
  696. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +130 -0
  697. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +132 -0
  698. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +136 -0
  699. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +167 -0
  700. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +130 -0
  701. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +868 -0
  702. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +441 -0
  703. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +442 -0
  704. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +99 -0
  705. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +44 -0
  706. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +42 -0
  707. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +28 -0
  708. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +74 -0
  709. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +77 -0
  710. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
  711. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +21 -0
  712. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +26 -0
  713. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +37 -0
  714. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +61 -0
  715. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +55 -0
  716. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +58 -0
  717. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +60 -0
  718. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +43 -0
  719. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +43 -0
  720. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +47 -0
  721. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +24 -0
  722. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +20 -0
  723. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +22 -0
  724. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +26 -0
  725. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +17 -0
  726. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +173 -0
  727. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +50 -0
  728. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +17 -0
  729. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +29 -0
  730. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +37 -0
  731. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
  732. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +20 -0
  733. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_bfloat16_support.comp +7 -0
  734. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat2_support.comp +7 -0
  735. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat_support.comp +7 -0
  736. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_integer_dot_support.comp +7 -0
  737. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +41 -0
  738. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/types.comp +1373 -0
  739. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +36 -0
  740. data/ext/{ggml → sources/ggml}/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +203 -36
  741. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/wkv6.comp +87 -0
  742. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/wkv7.comp +91 -0
  743. data/ext/{ggml → sources/ggml}/src/ggml.c +918 -1782
  744. data/ext/sources/ggml/src/ggml.cpp +26 -0
  745. data/ext/sources/ggml/src/gguf.cpp +1351 -0
  746. data/ext/{include → sources/include}/whisper.h +70 -2
  747. data/ext/sources/src/CMakeLists.txt +145 -0
  748. data/ext/sources/src/coreml/whisper-compat.h +10 -0
  749. data/ext/sources/src/coreml/whisper-compat.m +35 -0
  750. data/ext/{src → sources/src}/coreml/whisper-decoder-impl.h +27 -15
  751. data/ext/{src → sources/src}/coreml/whisper-decoder-impl.m +36 -10
  752. data/ext/{src → sources/src}/coreml/whisper-encoder-impl.h +21 -9
  753. data/ext/{src → sources/src}/coreml/whisper-encoder-impl.m +29 -3
  754. data/ext/sources/src/coreml/whisper-encoder.mm +73 -0
  755. data/ext/sources/src/whisper-arch.h +197 -0
  756. data/ext/{src → sources/src}/whisper.cpp +1966 -386
  757. data/ext/sources/tests/CMakeLists.txt +105 -0
  758. data/ext/sources/tests/earnings21/eval.mk +58 -0
  759. data/ext/sources/tests/earnings21/eval.py +68 -0
  760. data/ext/sources/tests/earnings21/normalizers/__init__.py +2 -0
  761. data/ext/sources/tests/earnings21/normalizers/basic.py +80 -0
  762. data/ext/sources/tests/earnings21/normalizers/english.json +1741 -0
  763. data/ext/sources/tests/earnings21/normalizers/english.py +550 -0
  764. data/ext/sources/tests/earnings21/requirements.txt +6 -0
  765. data/ext/sources/tests/en-0-ref.txt +1 -0
  766. data/ext/sources/tests/en-1-ref.txt +1 -0
  767. data/ext/sources/tests/en-2-ref.txt +1 -0
  768. data/ext/sources/tests/es-0-ref.txt +1 -0
  769. data/ext/sources/tests/librispeech/eval.mk +39 -0
  770. data/ext/sources/tests/librispeech/eval.py +47 -0
  771. data/ext/sources/tests/librispeech/normalizers/__init__.py +2 -0
  772. data/ext/sources/tests/librispeech/normalizers/basic.py +80 -0
  773. data/ext/sources/tests/librispeech/normalizers/english.json +1741 -0
  774. data/ext/sources/tests/librispeech/normalizers/english.py +550 -0
  775. data/ext/sources/tests/librispeech/requirements.txt +6 -0
  776. data/ext/sources/tests/run-tests.sh +130 -0
  777. data/ext/sources/tests/test-c.c +3 -0
  778. data/ext/sources/tests/test-vad-full.cpp +54 -0
  779. data/ext/sources/tests/test-vad.cpp +83 -0
  780. data/ext/sources/tests/test-whisper.js +58 -0
  781. data/extsources.rb +39 -5
  782. data/lib/whisper/context.rb +15 -0
  783. data/lib/whisper/model/uri.rb +202 -126
  784. data/lib/whisper/segment.rb +58 -0
  785. data/sig/whisper.rbs +510 -0
  786. data/test/helper.rb +24 -0
  787. data/{tests → test}/test_callback.rb +45 -3
  788. data/{tests → test}/test_error.rb +2 -2
  789. data/{tests → test}/test_model.rb +47 -0
  790. data/test/test_package.rb +51 -0
  791. data/test/test_params.rb +297 -0
  792. data/test/test_segment.rb +146 -0
  793. data/test/test_vad.rb +19 -0
  794. data/test/test_vad_params.rb +103 -0
  795. data/{tests → test}/test_whisper.rb +106 -36
  796. data/whispercpp.gemspec +5 -5
  797. metadata +837 -134
  798. data/ext/cpu.mk +0 -9
  799. data/ext/examples/dr_wav.h +0 -8815
  800. data/ext/ggml/src/ggml-cann/aclnn_ops.h +0 -592
  801. data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +0 -4262
  802. data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
  803. data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -10835
  804. data/ext/ggml/src/ggml-cpu/ggml-cpu.c +0 -14123
  805. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.cpp +0 -1884
  806. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.h +0 -14
  807. data/ext/ggml/src/ggml-metal/ggml-metal-impl.h +0 -288
  808. data/ext/ggml/src/ggml-sycl/convert.cpp +0 -547
  809. data/ext/ggml/src/ggml-sycl/element_wise.cpp +0 -1030
  810. data/ext/ggml/src/ggml-sycl/im2col.cpp +0 -126
  811. data/ext/ggml/src/ggml-sycl/mmvq.cpp +0 -1015
  812. data/ext/ggml/src/ggml-sycl/norm.cpp +0 -378
  813. data/ext/ggml/src/ggml-sycl/rope.cpp +0 -276
  814. data/ext/ggml/src/ggml-sycl/wkv6.cpp +0 -141
  815. data/ext/metal-embed.mk +0 -17
  816. data/ext/metal.mk +0 -6
  817. data/ext/ruby_whisper.cpp +0 -1909
  818. data/ext/scripts/get-flags.mk +0 -38
  819. data/lib/whisper.rb +0 -2
  820. data/tests/helper.rb +0 -7
  821. data/tests/test_package.rb +0 -31
  822. data/tests/test_params.rb +0 -160
  823. data/tests/test_segment.rb +0 -83
  824. /data/ext/{ggml → sources/ggml}/include/ggml-blas.h +0 -0
  825. /data/ext/{ggml → sources/ggml}/include/ggml-cann.h +0 -0
  826. /data/ext/{ggml → sources/ggml}/include/ggml-cuda.h +0 -0
  827. /data/ext/{ggml → sources/ggml}/include/ggml-kompute.h +0 -0
  828. /data/ext/{ggml → sources/ggml}/include/ggml-opencl.h +0 -0
  829. /data/ext/{ggml → sources/ggml}/include/ggml-sycl.h +0 -0
  830. /data/ext/{ggml → sources/ggml}/src/ggml-amx/common.h +0 -0
  831. /data/ext/{ggml → sources/ggml}/src/ggml-amx/ggml-amx.cpp +0 -0
  832. /data/ext/{ggml → sources/ggml}/src/ggml-amx/mmq.cpp +0 -0
  833. /data/ext/{ggml → sources/ggml}/src/ggml-amx/mmq.h +0 -0
  834. /data/ext/{ggml → sources/ggml}/src/ggml-blas/ggml-blas.cpp +0 -0
  835. /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/ascendc_kernels.h +0 -0
  836. /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/get_row_f16.cpp +0 -0
  837. /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/get_row_f32.cpp +0 -0
  838. /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -0
  839. /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -0
  840. /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -0
  841. /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -0
  842. /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -0
  843. /data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/amx.h +0 -0
  844. /data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/common.h +0 -0
  845. /data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/mmq.h +0 -0
  846. /data/ext/{ggml/src/ggml-cpu/ggml-cpu-hbm.h → sources/ggml/src/ggml-cpu/hbm.h} +0 -0
  847. /data/ext/{ggml/src/ggml-cpu/ggml-cpu-traits.h → sources/ggml/src/ggml-cpu/traits.h} +0 -0
  848. /data/ext/{ggml → sources/ggml}/src/ggml-kompute/ggml-kompute.cpp +0 -0
  849. /data/ext/{ggml → sources/ggml}/src/ggml-quants.h +0 -0
  850. /data/ext/{ggml → sources/ggml}/src/ggml-threading.cpp +0 -0
  851. /data/ext/{ggml → sources/ggml}/src/ggml-threading.h +0 -0
  852. /data/ext/{src → sources/src}/coreml/whisper-encoder.h +0 -0
  853. /data/ext/{src → sources/src}/openvino/whisper-openvino-encoder.cpp +0 -0
  854. /data/ext/{src → sources/src}/openvino/whisper-openvino-encoder.h +0 -0
  855. /data/{tests → test}/jfk_reader/.gitignore +0 -0
  856. /data/{tests → test}/jfk_reader/extconf.rb +0 -0
  857. /data/{tests → test}/jfk_reader/jfk_reader.c +0 -0
@@ -198,7 +198,7 @@
198
198
 
199
199
  #ifndef __GNUC__
200
200
  # define GGML_ATTRIBUTE_FORMAT(...)
201
- #elif defined(__MINGW32__)
201
+ #elif defined(__MINGW32__) && !defined(__clang__)
202
202
  # define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
203
203
  #else
204
204
  # define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
@@ -241,12 +241,6 @@
241
241
  #define GGML_ROPE_TYPE_MROPE 8
242
242
  #define GGML_ROPE_TYPE_VISION 24
243
243
 
244
- #define GGUF_MAGIC "GGUF"
245
-
246
- #define GGUF_VERSION 3
247
-
248
- #define GGUF_DEFAULT_ALIGNMENT 32
249
-
250
244
  #define GGML_UNUSED(x) (void)(x)
251
245
 
252
246
  #define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
@@ -399,14 +393,8 @@ extern "C" {
399
393
 
400
394
  // precision
401
395
  enum ggml_prec {
402
- GGML_PREC_DEFAULT,
403
- GGML_PREC_F32,
404
- };
405
-
406
- enum ggml_backend_type {
407
- GGML_BACKEND_TYPE_CPU = 0,
408
- GGML_BACKEND_TYPE_GPU = 10,
409
- GGML_BACKEND_TYPE_GPU_SPLIT = 20,
396
+ GGML_PREC_DEFAULT = 0, // stored as ggml_tensor.op_params, 0 by default
397
+ GGML_PREC_F32 = 10,
410
398
  };
411
399
 
412
400
  // model file types
@@ -466,6 +454,7 @@ extern "C" {
466
454
  GGML_OP_RMS_NORM,
467
455
  GGML_OP_RMS_NORM_BACK,
468
456
  GGML_OP_GROUP_NORM,
457
+ GGML_OP_L2_NORM,
469
458
 
470
459
  GGML_OP_MUL_MAT,
471
460
  GGML_OP_MUL_MAT_ID,
@@ -481,6 +470,7 @@ extern "C" {
481
470
  GGML_OP_TRANSPOSE,
482
471
  GGML_OP_GET_ROWS,
483
472
  GGML_OP_GET_ROWS_BACK,
473
+ GGML_OP_SET_ROWS,
484
474
  GGML_OP_DIAG,
485
475
  GGML_OP_DIAG_MASK_INF,
486
476
  GGML_OP_DIAG_MASK_ZERO,
@@ -492,6 +482,8 @@ extern "C" {
492
482
  GGML_OP_CONV_TRANSPOSE_1D,
493
483
  GGML_OP_IM2COL,
494
484
  GGML_OP_IM2COL_BACK,
485
+ GGML_OP_CONV_2D,
486
+ GGML_OP_CONV_2D_DW,
495
487
  GGML_OP_CONV_TRANSPOSE_2D,
496
488
  GGML_OP_POOL_1D,
497
489
  GGML_OP_POOL_2D,
@@ -499,6 +491,7 @@ extern "C" {
499
491
  GGML_OP_UPSCALE, // nearest interpolate
500
492
  GGML_OP_PAD,
501
493
  GGML_OP_PAD_REFLECT_1D,
494
+ GGML_OP_ROLL,
502
495
  GGML_OP_ARANGE,
503
496
  GGML_OP_TIMESTEP_EMBEDDING,
504
497
  GGML_OP_ARGSORT,
@@ -513,24 +506,23 @@ extern "C" {
513
506
  GGML_OP_GET_REL_POS,
514
507
  GGML_OP_ADD_REL_POS,
515
508
  GGML_OP_RWKV_WKV6,
509
+ GGML_OP_GATED_LINEAR_ATTN,
510
+ GGML_OP_RWKV_WKV7,
516
511
 
517
512
  GGML_OP_UNARY,
518
513
 
519
- GGML_OP_MAP_UNARY,
520
- GGML_OP_MAP_BINARY,
521
-
522
- GGML_OP_MAP_CUSTOM1_F32,
523
- GGML_OP_MAP_CUSTOM2_F32,
524
- GGML_OP_MAP_CUSTOM3_F32,
525
-
526
514
  GGML_OP_MAP_CUSTOM1,
527
515
  GGML_OP_MAP_CUSTOM2,
528
516
  GGML_OP_MAP_CUSTOM3,
529
517
 
518
+ GGML_OP_CUSTOM,
519
+
530
520
  GGML_OP_CROSS_ENTROPY_LOSS,
531
521
  GGML_OP_CROSS_ENTROPY_LOSS_BACK,
532
522
  GGML_OP_OPT_STEP_ADAMW,
533
523
 
524
+ GGML_OP_GLU,
525
+
534
526
  GGML_OP_COUNT,
535
527
  };
536
528
 
@@ -549,10 +541,19 @@ extern "C" {
549
541
  GGML_UNARY_OP_HARDSWISH,
550
542
  GGML_UNARY_OP_HARDSIGMOID,
551
543
  GGML_UNARY_OP_EXP,
544
+ GGML_UNARY_OP_GELU_ERF,
552
545
 
553
546
  GGML_UNARY_OP_COUNT,
554
547
  };
555
548
 
549
+ enum ggml_glu_op {
550
+ GGML_GLU_OP_REGLU,
551
+ GGML_GLU_OP_GEGLU,
552
+ GGML_GLU_OP_SWIGLU,
553
+
554
+ GGML_GLU_OP_COUNT,
555
+ };
556
+
556
557
  enum ggml_object_type {
557
558
  GGML_OBJECT_TYPE_TENSOR,
558
559
  GGML_OBJECT_TYPE_GRAPH,
@@ -587,8 +588,6 @@ extern "C" {
587
588
  struct ggml_tensor {
588
589
  enum ggml_type type;
589
590
 
590
- GGML_DEPRECATED(enum ggml_backend_type backend, "use the buffer type to find the storage location of the tensor");
591
-
592
591
  struct ggml_backend_buffer * buffer;
593
592
 
594
593
  int64_t ne[GGML_MAX_DIMS]; // number of elements
@@ -670,6 +669,7 @@ extern "C" {
670
669
  GGML_API const char * ggml_op_symbol(enum ggml_op op);
671
670
 
672
671
  GGML_API const char * ggml_unary_op_name(enum ggml_unary_op op);
672
+ GGML_API const char * ggml_glu_op_name(enum ggml_glu_op op);
673
673
  GGML_API const char * ggml_op_desc(const struct ggml_tensor * t); // unary or op name
674
674
 
675
675
  GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
@@ -688,11 +688,21 @@ extern "C" {
688
688
  GGML_API bool ggml_is_3d (const struct ggml_tensor * tensor);
689
689
  GGML_API int ggml_n_dims (const struct ggml_tensor * tensor); // returns 1 for scalars
690
690
 
691
+ // returns whether the tensor elements can be iterated over with a flattened index (no gaps, no permutation)
691
692
  GGML_API bool ggml_is_contiguous (const struct ggml_tensor * tensor);
692
693
  GGML_API bool ggml_is_contiguous_0(const struct ggml_tensor * tensor); // same as ggml_is_contiguous()
693
694
  GGML_API bool ggml_is_contiguous_1(const struct ggml_tensor * tensor); // contiguous for dims >= 1
694
695
  GGML_API bool ggml_is_contiguous_2(const struct ggml_tensor * tensor); // contiguous for dims >= 2
695
696
 
697
+ // returns whether the tensor elements are allocated as one contiguous block of memory (no gaps, but permutation ok)
698
+ GGML_API bool ggml_is_contiguously_allocated(const struct ggml_tensor * tensor);
699
+
700
+ // true for tensor that is stored in memory as CxWxHxN and has been permuted to WxHxCxN
701
+ GGML_API bool ggml_is_contiguous_channels(const struct ggml_tensor * tensor);
702
+
703
+ // true if the elements in dimension 0 are contiguous, or there is just 1 block of elements
704
+ GGML_API bool ggml_is_contiguous_rows(const struct ggml_tensor * tensor);
705
+
696
706
  GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1);
697
707
  GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
698
708
 
@@ -764,6 +774,7 @@ extern "C" {
764
774
  GGML_API void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3);
765
775
 
766
776
  GGML_API enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor);
777
+ GGML_API enum ggml_glu_op ggml_get_glu_op(const struct ggml_tensor * tensor);
767
778
 
768
779
  GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
769
780
  GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
@@ -776,7 +787,7 @@ extern "C" {
776
787
  // Tensor flags
777
788
  GGML_API void ggml_set_input(struct ggml_tensor * tensor);
778
789
  GGML_API void ggml_set_output(struct ggml_tensor * tensor);
779
- GGML_API void ggml_set_param(struct ggml_context * ctx, struct ggml_tensor * tensor);
790
+ GGML_API void ggml_set_param(struct ggml_tensor * tensor);
780
791
  GGML_API void ggml_set_loss(struct ggml_tensor * tensor);
781
792
 
782
793
  //
@@ -942,11 +953,20 @@ extern "C" {
942
953
  struct ggml_tensor * a,
943
954
  struct ggml_tensor * b);
944
955
 
956
+ // repeat a to the specified shape
957
+ GGML_API struct ggml_tensor * ggml_repeat_4d(
958
+ struct ggml_context * ctx,
959
+ struct ggml_tensor * a,
960
+ int64_t ne0,
961
+ int64_t ne1,
962
+ int64_t ne2,
963
+ int64_t ne3);
964
+
945
965
  // sums repetitions in a into shape of b
946
966
  GGML_API struct ggml_tensor * ggml_repeat_back(
947
967
  struct ggml_context * ctx,
948
968
  struct ggml_tensor * a,
949
- struct ggml_tensor * b);
969
+ struct ggml_tensor * b); // sum up values that are adjacent in dims > 0 instead of repeated with same stride
950
970
 
951
971
  // concat a and b along dim
952
972
  // used in stable-diffusion
@@ -1032,6 +1052,16 @@ extern "C" {
1032
1052
  struct ggml_context * ctx,
1033
1053
  struct ggml_tensor * a);
1034
1054
 
1055
+ // GELU using erf (error function) when possible
1056
+ // some backends may fallback to approximation based on Abramowitz and Stegun formula
1057
+ GGML_API struct ggml_tensor * ggml_gelu_erf(
1058
+ struct ggml_context * ctx,
1059
+ struct ggml_tensor * a);
1060
+
1061
+ GGML_API struct ggml_tensor * ggml_gelu_erf_inplace(
1062
+ struct ggml_context * ctx,
1063
+ struct ggml_tensor * a);
1064
+
1035
1065
  GGML_API struct ggml_tensor * ggml_gelu_quick(
1036
1066
  struct ggml_context * ctx,
1037
1067
  struct ggml_tensor * a);
@@ -1073,6 +1103,63 @@ extern "C" {
1073
1103
  struct ggml_context * ctx,
1074
1104
  struct ggml_tensor * a);
1075
1105
 
1106
+ // gated linear unit ops
1107
+ // A: n columns, r rows,
1108
+ // result is n / 2 columns, r rows,
1109
+ // expects gate in second half of row, unless swapped is true
1110
+ GGML_API struct ggml_tensor * ggml_glu(
1111
+ struct ggml_context * ctx,
1112
+ struct ggml_tensor * a,
1113
+ enum ggml_glu_op op,
1114
+ bool swapped);
1115
+
1116
+ GGML_API struct ggml_tensor * ggml_reglu(
1117
+ struct ggml_context * ctx,
1118
+ struct ggml_tensor * a);
1119
+
1120
+ GGML_API struct ggml_tensor * ggml_reglu_swapped(
1121
+ struct ggml_context * ctx,
1122
+ struct ggml_tensor * a);
1123
+
1124
+ GGML_API struct ggml_tensor * ggml_geglu(
1125
+ struct ggml_context * ctx,
1126
+ struct ggml_tensor * a);
1127
+
1128
+ GGML_API struct ggml_tensor * ggml_geglu_swapped(
1129
+ struct ggml_context * ctx,
1130
+ struct ggml_tensor * a);
1131
+
1132
+ GGML_API struct ggml_tensor * ggml_swiglu(
1133
+ struct ggml_context * ctx,
1134
+ struct ggml_tensor * a);
1135
+
1136
+ GGML_API struct ggml_tensor * ggml_swiglu_swapped(
1137
+ struct ggml_context * ctx,
1138
+ struct ggml_tensor * a);
1139
+
1140
+ // A: n columns, r rows,
1141
+ // B: n columns, r rows,
1142
+ GGML_API struct ggml_tensor * ggml_glu_split(
1143
+ struct ggml_context * ctx,
1144
+ struct ggml_tensor * a,
1145
+ struct ggml_tensor * b,
1146
+ enum ggml_glu_op op);
1147
+
1148
+ GGML_API struct ggml_tensor * ggml_reglu_split(
1149
+ struct ggml_context * ctx,
1150
+ struct ggml_tensor * a,
1151
+ struct ggml_tensor * b);
1152
+
1153
+ GGML_API struct ggml_tensor * ggml_geglu_split(
1154
+ struct ggml_context * ctx,
1155
+ struct ggml_tensor * a,
1156
+ struct ggml_tensor * b);
1157
+
1158
+ GGML_API struct ggml_tensor * ggml_swiglu_split(
1159
+ struct ggml_context * ctx,
1160
+ struct ggml_tensor * a,
1161
+ struct ggml_tensor * b);
1162
+
1076
1163
  // normalize along rows
1077
1164
  GGML_API struct ggml_tensor * ggml_norm(
1078
1165
  struct ggml_context * ctx,
@@ -1108,6 +1195,18 @@ extern "C" {
1108
1195
  int n_groups,
1109
1196
  float eps);
1110
1197
 
1198
+ // l2 normalize along rows
1199
+ // used in rwkv v7
1200
+ GGML_API struct ggml_tensor * ggml_l2_norm(
1201
+ struct ggml_context * ctx,
1202
+ struct ggml_tensor * a,
1203
+ float eps);
1204
+
1205
+ GGML_API struct ggml_tensor * ggml_l2_norm_inplace(
1206
+ struct ggml_context * ctx,
1207
+ struct ggml_tensor * a,
1208
+ float eps);
1209
+
1111
1210
  // a - x
1112
1211
  // b - dy
1113
1212
  GGML_API struct ggml_tensor * ggml_rms_norm_back(
@@ -1350,6 +1449,23 @@ extern "C" {
1350
1449
  struct ggml_tensor * b, // row indices
1351
1450
  struct ggml_tensor * c); // data for ggml_get_rows, only used for its shape
1352
1451
 
1452
+ // a TD [n_embd, ne1, ne2, ne3]
1453
+ // b TS [n_embd, n_rows, ne02, ne03] | ne02 == ne2, ne03 == ne3
1454
+ // c I64 [n_rows, ne11, ne12, 1] | c[i] in [0, ne1)
1455
+ //
1456
+ // undefined behavior if destination rows overlap
1457
+ //
1458
+ // broadcast:
1459
+ // ne2 % ne11 == 0
1460
+ // ne3 % ne12 == 0
1461
+ //
1462
+ // return view(a)
1463
+ GGML_API struct ggml_tensor * ggml_set_rows(
1464
+ struct ggml_context * ctx,
1465
+ struct ggml_tensor * a, // destination
1466
+ struct ggml_tensor * b, // source
1467
+ struct ggml_tensor * c); // row indices
1468
+
1353
1469
  GGML_API struct ggml_tensor * ggml_diag(
1354
1470
  struct ggml_context * ctx,
1355
1471
  struct ggml_tensor * a);
@@ -1397,16 +1513,20 @@ extern "C" {
1397
1513
  float scale,
1398
1514
  float max_bias);
1399
1515
 
1400
- GGML_API struct ggml_tensor * ggml_soft_max_back(
1516
+ GGML_API struct ggml_tensor * ggml_soft_max_ext_back(
1401
1517
  struct ggml_context * ctx,
1402
1518
  struct ggml_tensor * a,
1403
- struct ggml_tensor * b);
1519
+ struct ggml_tensor * b,
1520
+ float scale,
1521
+ float max_bias);
1404
1522
 
1405
1523
  // in-place, returns view(a)
1406
- GGML_API struct ggml_tensor * ggml_soft_max_back_inplace(
1524
+ GGML_API struct ggml_tensor * ggml_soft_max_ext_back_inplace(
1407
1525
  struct ggml_context * ctx,
1408
1526
  struct ggml_tensor * a,
1409
- struct ggml_tensor * b);
1527
+ struct ggml_tensor * b,
1528
+ float scale,
1529
+ float max_bias);
1410
1530
 
1411
1531
  // rotary position embedding
1412
1532
  // if (mode & 1) - skip n_past elements (NOT SUPPORTED)
@@ -1513,7 +1633,7 @@ extern "C" {
1513
1633
 
1514
1634
  // rotary position embedding backward, i.e compute dx from dy
1515
1635
  // a - dy
1516
- GGML_API struct ggml_tensor * ggml_rope_back(
1636
+ GGML_API struct ggml_tensor * ggml_rope_ext_back(
1517
1637
  struct ggml_context * ctx,
1518
1638
  struct ggml_tensor * a, // gradients of ggml_rope result
1519
1639
  struct ggml_tensor * b, // positions
@@ -1528,6 +1648,23 @@ extern "C" {
1528
1648
  float beta_fast,
1529
1649
  float beta_slow);
1530
1650
 
1651
+ GGML_API struct ggml_tensor * ggml_rope_multi_back(
1652
+ struct ggml_context * ctx,
1653
+ struct ggml_tensor * a,
1654
+ struct ggml_tensor * b,
1655
+ struct ggml_tensor * c,
1656
+ int n_dims,
1657
+ int sections[4],
1658
+ int mode,
1659
+ int n_ctx_orig,
1660
+ float freq_base,
1661
+ float freq_scale,
1662
+ float ext_factor,
1663
+ float attn_factor,
1664
+ float beta_fast,
1665
+ float beta_slow);
1666
+
1667
+
1531
1668
  // clamp
1532
1669
  // in-place, returns view(a)
1533
1670
  GGML_API struct ggml_tensor * ggml_clamp(
@@ -1564,17 +1701,6 @@ extern "C" {
1564
1701
  int d1, // dilation dimension 1
1565
1702
  bool is_2D);
1566
1703
 
1567
- GGML_API struct ggml_tensor * ggml_conv_depthwise_2d(
1568
- struct ggml_context * ctx,
1569
- struct ggml_tensor * a, // convolution kernel
1570
- struct ggml_tensor * b, // data
1571
- int s0, // stride dimension 0
1572
- int s1, // stride dimension 1
1573
- int p0, // padding dimension 0
1574
- int p1, // padding dimension 1
1575
- int d0, // dilation dimension 0
1576
- int d1); // dilation dimension 1
1577
-
1578
1704
  GGML_API struct ggml_tensor * ggml_conv_1d(
1579
1705
  struct ggml_context * ctx,
1580
1706
  struct ggml_tensor * a, // convolution kernel
@@ -1592,6 +1718,23 @@ extern "C" {
1592
1718
  int s, // stride
1593
1719
  int d); // dilation
1594
1720
 
1721
+ // depthwise
1722
+ // TODO: this is very likely wrong for some cases! - needs more testing
1723
+ GGML_API struct ggml_tensor * ggml_conv_1d_dw(
1724
+ struct ggml_context * ctx,
1725
+ struct ggml_tensor * a, // convolution kernel
1726
+ struct ggml_tensor * b, // data
1727
+ int s0, // stride
1728
+ int p0, // padding
1729
+ int d0); // dilation
1730
+
1731
+ GGML_API struct ggml_tensor * ggml_conv_1d_dw_ph(
1732
+ struct ggml_context * ctx,
1733
+ struct ggml_tensor * a, // convolution kernel
1734
+ struct ggml_tensor * b, // data
1735
+ int s0, // stride
1736
+ int d0); // dilation
1737
+
1595
1738
  GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
1596
1739
  struct ggml_context * ctx,
1597
1740
  struct ggml_tensor * a, // convolution kernel
@@ -1611,7 +1754,6 @@ extern "C" {
1611
1754
  int d0, // dilation dimension 0
1612
1755
  int d1); // dilation dimension 1
1613
1756
 
1614
-
1615
1757
  // kernel size is a->ne[0] x a->ne[1]
1616
1758
  // stride is equal to kernel size
1617
1759
  // padding is zero
@@ -1638,12 +1780,51 @@ extern "C" {
1638
1780
  struct ggml_tensor * a,
1639
1781
  struct ggml_tensor * b);
1640
1782
 
1783
+ // depthwise (via im2col and mul_mat)
1784
+ GGML_API struct ggml_tensor * ggml_conv_2d_dw(
1785
+ struct ggml_context * ctx,
1786
+ struct ggml_tensor * a, // convolution kernel
1787
+ struct ggml_tensor * b, // data
1788
+ int s0, // stride dimension 0
1789
+ int s1, // stride dimension 1
1790
+ int p0, // padding dimension 0
1791
+ int p1, // padding dimension 1
1792
+ int d0, // dilation dimension 0
1793
+ int d1); // dilation dimension 1
1794
+
1795
+ // Depthwise 2D convolution
1796
+ // may be faster than ggml_conv_2d_dw, but not available in all backends
1797
+ // a: KW KH 1 C convolution kernel
1798
+ // b: W H C N input data
1799
+ // res: W_out H_out C N
1800
+ GGML_API struct ggml_tensor * ggml_conv_2d_dw_direct(
1801
+ struct ggml_context * ctx,
1802
+ struct ggml_tensor * a,
1803
+ struct ggml_tensor * b,
1804
+ int stride0,
1805
+ int stride1,
1806
+ int pad0,
1807
+ int pad1,
1808
+ int dilation0,
1809
+ int dilation1);
1810
+
1641
1811
  GGML_API struct ggml_tensor * ggml_conv_transpose_2d_p0(
1642
1812
  struct ggml_context * ctx,
1643
1813
  struct ggml_tensor * a,
1644
1814
  struct ggml_tensor * b,
1645
1815
  int stride);
1646
1816
 
1817
+ GGML_API struct ggml_tensor * ggml_conv_2d_direct(
1818
+ struct ggml_context * ctx,
1819
+ struct ggml_tensor * a, // convolution kernel [KW, KH, IC, OC]
1820
+ struct ggml_tensor * b, // input data [W, H, C, N]
1821
+ int s0, // stride dimension 0
1822
+ int s1, // stride dimension 1
1823
+ int p0, // padding dimension 0
1824
+ int p1, // padding dimension 1
1825
+ int d0, // dilation dimension 0
1826
+ int d1); // dilation dimension 1
1827
+
1647
1828
  enum ggml_op_pool {
1648
1829
  GGML_OP_POOL_MAX,
1649
1830
  GGML_OP_POOL_AVG,
@@ -1683,24 +1864,47 @@ extern "C" {
1683
1864
  float p0,
1684
1865
  float p1);
1685
1866
 
1686
- // nearest interpolate
1867
+ enum ggml_scale_mode {
1868
+ GGML_SCALE_MODE_NEAREST = 0,
1869
+ GGML_SCALE_MODE_BILINEAR = 1,
1870
+
1871
+ GGML_SCALE_MODE_COUNT
1872
+ };
1873
+
1874
+ enum ggml_scale_flag {
1875
+ GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8)
1876
+ };
1877
+
1878
+ // interpolate
1687
1879
  // multiplies ne0 and ne1 by scale factor
1688
- // used in stable-diffusion
1689
1880
  GGML_API struct ggml_tensor * ggml_upscale(
1690
1881
  struct ggml_context * ctx,
1691
1882
  struct ggml_tensor * a,
1692
- int scale_factor);
1883
+ int scale_factor,
1884
+ enum ggml_scale_mode mode);
1693
1885
 
1694
- // nearest interpolate
1695
- // nearest interpolate to specified dimensions
1696
- // used in tortoise.cpp
1697
- GGML_API struct ggml_tensor * ggml_upscale_ext(
1886
+ // interpolate
1887
+ // interpolate scale to specified dimensions
1888
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_upscale_ext(
1698
1889
  struct ggml_context * ctx,
1699
1890
  struct ggml_tensor * a,
1700
1891
  int ne0,
1701
1892
  int ne1,
1702
1893
  int ne2,
1703
- int ne3);
1894
+ int ne3,
1895
+ enum ggml_scale_mode mode),
1896
+ "use ggml_interpolate instead");
1897
+
1898
+ // Up- or downsamples the input to the specified size.
1899
+ // 2D scale modes (eg. bilinear) are applied to the first two dimensions.
1900
+ GGML_API struct ggml_tensor * ggml_interpolate(
1901
+ struct ggml_context * ctx,
1902
+ struct ggml_tensor * a,
1903
+ int64_t ne0,
1904
+ int64_t ne1,
1905
+ int64_t ne2,
1906
+ int64_t ne3,
1907
+ uint32_t mode); // ggml_scale_mode [ | ggml_scale_flag...]
1704
1908
 
1705
1909
  // pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
1706
1910
  GGML_API struct ggml_tensor * ggml_pad(
@@ -1718,6 +1922,17 @@ extern "C" {
1718
1922
  int p0,
1719
1923
  int p1);
1720
1924
 
1925
+ // Move tensor elements by an offset given for each dimension. Elements that
1926
+ // are shifted beyond the last position are wrapped around to the beginning.
1927
+ GGML_API struct ggml_tensor * ggml_roll(
1928
+ struct ggml_context * ctx,
1929
+ struct ggml_tensor * a,
1930
+ int shift0,
1931
+ int shift1,
1932
+ int shift2,
1933
+ int shift3);
1934
+
1935
+
1721
1936
  // Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
1722
1937
  // timesteps: [N,]
1723
1938
  // return: [N, dim]
@@ -1750,13 +1965,13 @@ extern "C" {
1750
1965
  struct ggml_tensor * a,
1751
1966
  int k);
1752
1967
 
1753
- #define GGML_KQ_MASK_PAD 32
1968
+ #define GGML_KQ_MASK_PAD 64
1754
1969
 
1755
- // q: [n_embd, n_batch, n_head, 1]
1756
- // k: [n_embd, n_kv, n_head_kv, 1]
1757
- // v: [n_embd, n_kv, n_head_kv, 1] !! not transposed !!
1758
- // mask: [n_kv, n_batch_pad, 1, 1] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !!
1759
- // res: [n_embd, n_head, n_batch, 1] !! permuted !!
1970
+ // q: [n_embd_k, n_batch, n_head, 1]
1971
+ // k: [n_embd_k, n_kv, n_head_kv, 1]
1972
+ // v: [n_embd_v, n_kv, n_head_kv, 1] !! not transposed !!
1973
+ // mask: [n_kv, n_batch_pad, 1, 1] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !!
1974
+ // res: [n_embd_v, n_head, n_batch, 1] !! permuted !!
1760
1975
  GGML_API struct ggml_tensor * ggml_flash_attn_ext(
1761
1976
  struct ggml_context * ctx,
1762
1977
  struct ggml_tensor * q,
@@ -1856,84 +2071,26 @@ extern "C" {
1856
2071
  struct ggml_tensor * td,
1857
2072
  struct ggml_tensor * state);
1858
2073
 
1859
- // custom operators
2074
+ GGML_API struct ggml_tensor * ggml_gated_linear_attn(
2075
+ struct ggml_context * ctx,
2076
+ struct ggml_tensor * k,
2077
+ struct ggml_tensor * v,
2078
+ struct ggml_tensor * q,
2079
+ struct ggml_tensor * g,
2080
+ struct ggml_tensor * state,
2081
+ float scale);
2082
+
2083
+ GGML_API struct ggml_tensor * ggml_rwkv_wkv7(
2084
+ struct ggml_context * ctx,
2085
+ struct ggml_tensor * r,
2086
+ struct ggml_tensor * w,
2087
+ struct ggml_tensor * k,
2088
+ struct ggml_tensor * v,
2089
+ struct ggml_tensor * a,
2090
+ struct ggml_tensor * b,
2091
+ struct ggml_tensor * state);
1860
2092
 
1861
- typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
1862
- typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
1863
-
1864
- typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
1865
- typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
1866
- typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
1867
-
1868
- GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_f32(
1869
- struct ggml_context * ctx,
1870
- struct ggml_tensor * a,
1871
- ggml_unary_op_f32_t fun),
1872
- "use ggml_map_custom1 instead");
1873
-
1874
- GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
1875
- struct ggml_context * ctx,
1876
- struct ggml_tensor * a,
1877
- ggml_unary_op_f32_t fun),
1878
- "use ggml_map_custom1_inplace instead");
1879
-
1880
- GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_f32(
1881
- struct ggml_context * ctx,
1882
- struct ggml_tensor * a,
1883
- struct ggml_tensor * b,
1884
- ggml_binary_op_f32_t fun),
1885
- "use ggml_map_custom2 instead");
1886
-
1887
- GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
1888
- struct ggml_context * ctx,
1889
- struct ggml_tensor * a,
1890
- struct ggml_tensor * b,
1891
- ggml_binary_op_f32_t fun),
1892
- "use ggml_map_custom2_inplace instead");
1893
-
1894
- GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_f32(
1895
- struct ggml_context * ctx,
1896
- struct ggml_tensor * a,
1897
- ggml_custom1_op_f32_t fun),
1898
- "use ggml_map_custom1 instead");
1899
-
1900
- GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
1901
- struct ggml_context * ctx,
1902
- struct ggml_tensor * a,
1903
- ggml_custom1_op_f32_t fun),
1904
- "use ggml_map_custom1_inplace instead");
1905
-
1906
- GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_f32(
1907
- struct ggml_context * ctx,
1908
- struct ggml_tensor * a,
1909
- struct ggml_tensor * b,
1910
- ggml_custom2_op_f32_t fun),
1911
- "use ggml_map_custom2 instead");
1912
-
1913
- GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
1914
- struct ggml_context * ctx,
1915
- struct ggml_tensor * a,
1916
- struct ggml_tensor * b,
1917
- ggml_custom2_op_f32_t fun),
1918
- "use ggml_map_custom2_inplace instead");
1919
-
1920
- GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_f32(
1921
- struct ggml_context * ctx,
1922
- struct ggml_tensor * a,
1923
- struct ggml_tensor * b,
1924
- struct ggml_tensor * c,
1925
- ggml_custom3_op_f32_t fun),
1926
- "use ggml_map_custom3 instead");
1927
-
1928
- GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
1929
- struct ggml_context * ctx,
1930
- struct ggml_tensor * a,
1931
- struct ggml_tensor * b,
1932
- struct ggml_tensor * c,
1933
- ggml_custom3_op_f32_t fun),
1934
- "use ggml_map_custom3_inplace instead");
1935
-
1936
- // custom operators v2
2093
+ // custom operators
1937
2094
 
1938
2095
  typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata);
1939
2096
  typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata);
@@ -1990,6 +2147,30 @@ extern "C" {
1990
2147
  int n_tasks,
1991
2148
  void * userdata);
1992
2149
 
2150
+ typedef void (*ggml_custom_op_t)(struct ggml_tensor * dst , int ith, int nth, void * userdata);
2151
+
2152
+ GGML_API struct ggml_tensor * ggml_custom_4d(
2153
+ struct ggml_context * ctx,
2154
+ enum ggml_type type,
2155
+ int64_t ne0,
2156
+ int64_t ne1,
2157
+ int64_t ne2,
2158
+ int64_t ne3,
2159
+ struct ggml_tensor ** args,
2160
+ int n_args,
2161
+ ggml_custom_op_t fun,
2162
+ int n_tasks,
2163
+ void * userdata);
2164
+
2165
+ GGML_API struct ggml_tensor * ggml_custom_inplace(
2166
+ struct ggml_context * ctx,
2167
+ struct ggml_tensor * a,
2168
+ struct ggml_tensor ** args,
2169
+ int n_args,
2170
+ ggml_custom_op_t fun,
2171
+ int n_tasks,
2172
+ void * userdata);
2173
+
1993
2174
  // loss function
1994
2175
 
1995
2176
  GGML_API struct ggml_tensor * ggml_cross_entropy_loss(
@@ -2020,15 +2201,14 @@ extern "C" {
2020
2201
 
2021
2202
  GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
2022
2203
  GGML_API void ggml_build_backward_expand(
2023
- struct ggml_context * ctx_static, // context for static gradients (loss + gradient accumulation)
2024
- struct ggml_context * ctx_compute, // context for gradient computation
2025
- struct ggml_cgraph * cgraph,
2026
- bool accumulate); // whether or not gradients should be accumulated, requires static allocation of tensors in ctx_static
2204
+ struct ggml_context * ctx, // context for gradient computation
2205
+ struct ggml_cgraph * cgraph,
2206
+ struct ggml_tensor ** grad_accs);
2027
2207
 
2028
2208
  // graph allocation in a context
2029
2209
  GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
2030
2210
  GGML_API struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads);
2031
- GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph);
2211
+ GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool force_grads);
2032
2212
  GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst);
2033
2213
  GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // set regular grads + optimizer momenta to 0, set loss grad to 1
2034
2214
  GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);
@@ -2047,9 +2227,6 @@ extern "C" {
2047
2227
  GGML_API struct ggml_tensor * ggml_graph_get_grad (const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
2048
2228
  GGML_API struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
2049
2229
 
2050
- GGML_API void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname);
2051
- GGML_API struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval);
2052
-
2053
2230
  // print info and performance information for the graph
2054
2231
  GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph);
2055
2232
 
@@ -2094,132 +2271,6 @@ extern "C" {
2094
2271
  int64_t n_per_row,
2095
2272
  const float * imatrix);
2096
2273
 
2097
- //
2098
- // gguf
2099
- //
2100
-
2101
- enum gguf_type {
2102
- GGUF_TYPE_UINT8 = 0,
2103
- GGUF_TYPE_INT8 = 1,
2104
- GGUF_TYPE_UINT16 = 2,
2105
- GGUF_TYPE_INT16 = 3,
2106
- GGUF_TYPE_UINT32 = 4,
2107
- GGUF_TYPE_INT32 = 5,
2108
- GGUF_TYPE_FLOAT32 = 6,
2109
- GGUF_TYPE_BOOL = 7,
2110
- GGUF_TYPE_STRING = 8,
2111
- GGUF_TYPE_ARRAY = 9,
2112
- GGUF_TYPE_UINT64 = 10,
2113
- GGUF_TYPE_INT64 = 11,
2114
- GGUF_TYPE_FLOAT64 = 12,
2115
- GGUF_TYPE_COUNT, // marks the end of the enum
2116
- };
2117
-
2118
- struct gguf_context;
2119
-
2120
- struct gguf_init_params {
2121
- bool no_alloc;
2122
-
2123
- // if not NULL, create a ggml_context and allocate the tensor data in it
2124
- struct ggml_context ** ctx;
2125
- };
2126
-
2127
- GGML_API struct gguf_context * gguf_init_empty(void);
2128
- GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
2129
- //GGML_API struct gguf_context * gguf_init_from_buffer(..);
2130
-
2131
- GGML_API void gguf_free(struct gguf_context * ctx);
2132
-
2133
- GGML_API const char * gguf_type_name(enum gguf_type type);
2134
-
2135
- GGML_API int gguf_get_version (const struct gguf_context * ctx);
2136
- GGML_API size_t gguf_get_alignment (const struct gguf_context * ctx);
2137
- GGML_API size_t gguf_get_data_offset(const struct gguf_context * ctx);
2138
- GGML_API void * gguf_get_data (const struct gguf_context * ctx);
2139
-
2140
- GGML_API int gguf_get_n_kv(const struct gguf_context * ctx);
2141
- GGML_API int gguf_find_key(const struct gguf_context * ctx, const char * key);
2142
- GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int key_id);
2143
-
2144
- GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int key_id);
2145
- GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int key_id);
2146
-
2147
- // will abort if the wrong type is used for the key
2148
- GGML_API uint8_t gguf_get_val_u8 (const struct gguf_context * ctx, int key_id);
2149
- GGML_API int8_t gguf_get_val_i8 (const struct gguf_context * ctx, int key_id);
2150
- GGML_API uint16_t gguf_get_val_u16 (const struct gguf_context * ctx, int key_id);
2151
- GGML_API int16_t gguf_get_val_i16 (const struct gguf_context * ctx, int key_id);
2152
- GGML_API uint32_t gguf_get_val_u32 (const struct gguf_context * ctx, int key_id);
2153
- GGML_API int32_t gguf_get_val_i32 (const struct gguf_context * ctx, int key_id);
2154
- GGML_API float gguf_get_val_f32 (const struct gguf_context * ctx, int key_id);
2155
- GGML_API uint64_t gguf_get_val_u64 (const struct gguf_context * ctx, int key_id);
2156
- GGML_API int64_t gguf_get_val_i64 (const struct gguf_context * ctx, int key_id);
2157
- GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int key_id);
2158
- GGML_API bool gguf_get_val_bool(const struct gguf_context * ctx, int key_id);
2159
- GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int key_id);
2160
- GGML_API const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id);
2161
- GGML_API int gguf_get_arr_n (const struct gguf_context * ctx, int key_id);
2162
- GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id);
2163
- GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i);
2164
-
2165
- GGML_API int gguf_get_n_tensors (const struct gguf_context * ctx);
2166
- GGML_API int gguf_find_tensor (const struct gguf_context * ctx, const char * name);
2167
- GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i);
2168
- GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
2169
- GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx, int i);
2170
-
2171
- // removes key if it exists
2172
- GGML_API void gguf_remove_key(struct gguf_context * ctx, const char * key);
2173
-
2174
- // overrides existing values or adds a new one
2175
- GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
2176
- GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val);
2177
- GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t val);
2178
- GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t val);
2179
- GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val);
2180
- GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val);
2181
- GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val);
2182
- GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val);
2183
- GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t val);
2184
- GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val);
2185
- GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val);
2186
- GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
2187
- GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n);
2188
- GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, int n);
2189
-
2190
- // set or add KV pairs from another context
2191
- GGML_API void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src);
2192
-
2193
- // manage tensor info
2194
- GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor);
2195
- GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type);
2196
- GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size);
2197
-
2198
- // writing gguf files can be done in 2 ways:
2199
- //
2200
- // - write the entire gguf_context to a binary file in a single pass:
2201
- //
2202
- // gguf_write_to_file(ctx, fname);
2203
- //
2204
- // - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data:
2205
- //
2206
- // FILE * f = fopen(fname, "wb");
2207
- // fseek(f, gguf_get_meta_size(ctx), SEEK_SET);
2208
- // fwrite(f, ...);
2209
- // void * data = gguf_meta_get_meta_data(ctx);
2210
- // fseek(f, 0, SEEK_SET);
2211
- // fwrite(f, data, gguf_get_meta_size(ctx));
2212
- // free(data);
2213
- // fclose(f);
2214
- //
2215
-
2216
- // write the entire context to a binary file
2217
- GGML_API void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta);
2218
-
2219
- // get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
2220
- GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
2221
- GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data);
2222
-
2223
2274
  #ifdef __cplusplus
2224
2275
  // restrict not standard in C++
2225
2276
  # if defined(__GNUC__)
@@ -2232,7 +2283,11 @@ extern "C" {
2232
2283
  # define GGML_RESTRICT
2233
2284
  # endif
2234
2285
  #else
2235
- # define GGML_RESTRICT restrict
2286
+ # if defined (_MSC_VER) && (__STDC_VERSION__ < 201112L)
2287
+ # define GGML_RESTRICT __restrict
2288
+ # else
2289
+ # define GGML_RESTRICT restrict
2290
+ # endif
2236
2291
  #endif
2237
2292
  typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
2238
2293
  typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
@@ -2255,6 +2310,7 @@ extern "C" {
2255
2310
 
2256
2311
  // scheduling priorities
2257
2312
  enum ggml_sched_priority {
2313
+ GGML_SCHED_PRIO_LOW = -1,
2258
2314
  GGML_SCHED_PRIO_NORMAL,
2259
2315
  GGML_SCHED_PRIO_MEDIUM,
2260
2316
  GGML_SCHED_PRIO_HIGH,