whispercpp 1.3.0 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (787) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +6 -0
  3. data/LICENSE +1 -1
  4. data/README.md +216 -424
  5. data/Rakefile +79 -11
  6. data/ext/.gitignore +11 -0
  7. data/ext/dependencies.rb +61 -0
  8. data/ext/extconf.rb +18 -26
  9. data/ext/options.rb +221 -0
  10. data/ext/ruby_whisper.c +159 -0
  11. data/ext/ruby_whisper.h +27 -2
  12. data/ext/ruby_whisper_context.c +641 -0
  13. data/ext/ruby_whisper_error.c +52 -0
  14. data/ext/ruby_whisper_model.c +232 -0
  15. data/ext/ruby_whisper_params.c +1301 -0
  16. data/ext/ruby_whisper_segment.c +143 -0
  17. data/ext/ruby_whisper_transcribe.cpp +87 -0
  18. data/ext/ruby_whisper_vad_params.c +288 -0
  19. data/ext/sources/.dockerignore +3 -0
  20. data/ext/sources/.github/workflows/bindings-ruby.yml +21 -0
  21. data/ext/sources/CMakeGraphVizOptions.cmake +8 -0
  22. data/ext/sources/CMakeLists.txt +251 -0
  23. data/ext/sources/bindings/javascript/CMakeLists.txt +41 -0
  24. data/ext/sources/bindings/javascript/emscripten.cpp +93 -0
  25. data/ext/sources/bindings/javascript/libwhisper.worker.js +1 -0
  26. data/ext/sources/bindings/javascript/package-tmpl.json +26 -0
  27. data/ext/sources/bindings/javascript/package.json +26 -0
  28. data/ext/sources/bindings/javascript/whisper.js +19 -0
  29. data/ext/sources/build-xcframework.sh +547 -0
  30. data/ext/sources/ci/run.sh +336 -0
  31. data/ext/sources/close-issue.yml +28 -0
  32. data/ext/sources/cmake/DefaultTargetOptions.cmake +16 -0
  33. data/ext/sources/cmake/FindFFmpeg.cmake +163 -0
  34. data/ext/sources/cmake/build-info.cmake +60 -0
  35. data/ext/sources/cmake/git-vars.cmake +22 -0
  36. data/ext/sources/cmake/whisper-config.cmake.in +65 -0
  37. data/ext/sources/cmake/whisper.pc.in +10 -0
  38. data/ext/sources/examples/CMakeLists.txt +124 -0
  39. data/ext/sources/examples/addon.node/CMakeLists.txt +31 -0
  40. data/ext/sources/examples/addon.node/__test__/whisper.spec.js +37 -0
  41. data/ext/sources/examples/addon.node/addon.cpp +438 -0
  42. data/ext/sources/examples/addon.node/index.js +54 -0
  43. data/ext/sources/examples/addon.node/package.json +16 -0
  44. data/ext/sources/examples/bench/CMakeLists.txt +8 -0
  45. data/ext/sources/examples/bench/bench.cpp +175 -0
  46. data/ext/sources/examples/bench.wasm/CMakeLists.txt +49 -0
  47. data/ext/sources/examples/bench.wasm/emscripten.cpp +87 -0
  48. data/ext/sources/examples/bench.wasm/index-tmpl.html +284 -0
  49. data/ext/sources/examples/cli/CMakeLists.txt +8 -0
  50. data/ext/sources/examples/cli/cli.cpp +1294 -0
  51. data/ext/sources/examples/coi-serviceworker.js +146 -0
  52. data/ext/sources/examples/command/CMakeLists.txt +10 -0
  53. data/ext/sources/examples/command/command.cpp +776 -0
  54. data/ext/sources/examples/command/commands.txt +9 -0
  55. data/ext/sources/examples/command.wasm/CMakeLists.txt +50 -0
  56. data/ext/sources/examples/command.wasm/emscripten.cpp +327 -0
  57. data/ext/sources/examples/command.wasm/index-tmpl.html +414 -0
  58. data/ext/sources/examples/common-ggml.cpp +238 -0
  59. data/ext/sources/examples/common-ggml.h +18 -0
  60. data/ext/sources/examples/common-sdl.cpp +227 -0
  61. data/ext/sources/examples/common-sdl.h +49 -0
  62. data/ext/sources/examples/common-whisper.cpp +168 -0
  63. data/ext/sources/examples/common-whisper.h +24 -0
  64. data/ext/sources/examples/common.cpp +675 -0
  65. data/ext/sources/examples/common.h +322 -0
  66. data/ext/sources/examples/deprecation-warning/CMakeLists.txt +6 -0
  67. data/ext/sources/examples/deprecation-warning/deprecation-warning.cpp +38 -0
  68. data/ext/sources/examples/ffmpeg-transcode.cpp +368 -0
  69. data/ext/sources/examples/generate-karaoke.sh +57 -0
  70. data/ext/sources/examples/grammar-parser.cpp +423 -0
  71. data/ext/sources/examples/grammar-parser.h +29 -0
  72. data/ext/sources/examples/helpers.js +191 -0
  73. data/ext/sources/examples/json.hpp +24596 -0
  74. data/ext/sources/examples/livestream.sh +112 -0
  75. data/ext/sources/examples/lsp/CMakeLists.txt +9 -0
  76. data/ext/sources/examples/lsp/lsp.cpp +467 -0
  77. data/ext/sources/examples/lsp/whisper.vim +362 -0
  78. data/ext/sources/examples/miniaudio.h +93468 -0
  79. data/ext/sources/examples/python/test_whisper_processor.py +7 -0
  80. data/ext/sources/examples/python/whisper_processor.py +54 -0
  81. data/ext/sources/examples/quantize/CMakeLists.txt +6 -0
  82. data/ext/sources/examples/quantize/quantize.cpp +223 -0
  83. data/ext/sources/examples/server/CMakeLists.txt +12 -0
  84. data/ext/sources/examples/server/bench.js +29 -0
  85. data/ext/sources/examples/server/httplib.h +10497 -0
  86. data/ext/sources/examples/server/server.cpp +1091 -0
  87. data/ext/sources/examples/server.py +115 -0
  88. data/ext/sources/examples/stb_vorbis.c +5584 -0
  89. data/ext/sources/examples/stream/CMakeLists.txt +10 -0
  90. data/ext/sources/examples/stream/stream.cpp +429 -0
  91. data/ext/sources/examples/stream.wasm/CMakeLists.txt +49 -0
  92. data/ext/sources/examples/stream.wasm/emscripten.cpp +216 -0
  93. data/ext/sources/examples/stream.wasm/index-tmpl.html +414 -0
  94. data/ext/sources/examples/sycl/CMakeLists.txt +9 -0
  95. data/ext/sources/examples/sycl/build.sh +22 -0
  96. data/ext/sources/examples/sycl/ls-sycl-device.cpp +11 -0
  97. data/ext/sources/examples/sycl/run-whisper.sh +17 -0
  98. data/ext/sources/examples/talk-llama/CMakeLists.txt +40 -0
  99. data/ext/sources/examples/talk-llama/eleven-labs.py +80 -0
  100. data/ext/sources/examples/talk-llama/llama-adapter.cpp +388 -0
  101. data/ext/sources/examples/talk-llama/llama-adapter.h +76 -0
  102. data/ext/sources/examples/talk-llama/llama-arch.cpp +1746 -0
  103. data/ext/sources/examples/talk-llama/llama-arch.h +437 -0
  104. data/ext/sources/examples/talk-llama/llama-batch.cpp +374 -0
  105. data/ext/sources/examples/talk-llama/llama-batch.h +89 -0
  106. data/ext/sources/examples/talk-llama/llama-chat.cpp +663 -0
  107. data/ext/sources/examples/talk-llama/llama-chat.h +58 -0
  108. data/ext/sources/examples/talk-llama/llama-context.cpp +2676 -0
  109. data/ext/sources/examples/talk-llama/llama-context.h +276 -0
  110. data/ext/sources/examples/talk-llama/llama-cparams.cpp +5 -0
  111. data/ext/sources/examples/talk-llama/llama-cparams.h +41 -0
  112. data/ext/sources/examples/talk-llama/llama-grammar.cpp +1229 -0
  113. data/ext/sources/examples/talk-llama/llama-grammar.h +173 -0
  114. data/ext/sources/examples/talk-llama/llama-graph.cpp +1618 -0
  115. data/ext/sources/examples/talk-llama/llama-graph.h +640 -0
  116. data/ext/sources/examples/talk-llama/llama-hparams.cpp +95 -0
  117. data/ext/sources/examples/talk-llama/llama-hparams.h +190 -0
  118. data/ext/sources/examples/talk-llama/llama-impl.cpp +167 -0
  119. data/ext/sources/examples/talk-llama/llama-impl.h +61 -0
  120. data/ext/sources/examples/talk-llama/llama-io.cpp +15 -0
  121. data/ext/sources/examples/talk-llama/llama-io.h +35 -0
  122. data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +2739 -0
  123. data/ext/sources/examples/talk-llama/llama-kv-cache.h +502 -0
  124. data/ext/sources/examples/talk-llama/llama-kv-cells.h +379 -0
  125. data/ext/sources/examples/talk-llama/llama-memory.cpp +1 -0
  126. data/ext/sources/examples/talk-llama/llama-memory.h +32 -0
  127. data/ext/sources/examples/talk-llama/llama-mmap.cpp +600 -0
  128. data/ext/sources/examples/talk-llama/llama-mmap.h +68 -0
  129. data/ext/sources/examples/talk-llama/llama-model-loader.cpp +1138 -0
  130. data/ext/sources/examples/talk-llama/llama-model-loader.h +169 -0
  131. data/ext/sources/examples/talk-llama/llama-model-saver.cpp +281 -0
  132. data/ext/sources/examples/talk-llama/llama-model-saver.h +37 -0
  133. data/ext/sources/examples/talk-llama/llama-model.cpp +13814 -0
  134. data/ext/sources/examples/talk-llama/llama-model.h +425 -0
  135. data/ext/sources/examples/talk-llama/llama-quant.cpp +966 -0
  136. data/ext/sources/examples/talk-llama/llama-quant.h +1 -0
  137. data/ext/sources/examples/talk-llama/llama-sampling.cpp +2575 -0
  138. data/ext/sources/examples/talk-llama/llama-sampling.h +32 -0
  139. data/ext/sources/examples/talk-llama/llama-vocab.cpp +3340 -0
  140. data/ext/sources/examples/talk-llama/llama-vocab.h +131 -0
  141. data/ext/sources/examples/talk-llama/llama.cpp +354 -0
  142. data/ext/sources/examples/talk-llama/llama.h +1377 -0
  143. data/ext/sources/examples/talk-llama/prompts/talk-alpaca.txt +23 -0
  144. data/ext/sources/examples/talk-llama/speak +40 -0
  145. data/ext/sources/examples/talk-llama/speak.bat +1 -0
  146. data/ext/sources/examples/talk-llama/speak.ps1 +14 -0
  147. data/ext/sources/examples/talk-llama/talk-llama.cpp +808 -0
  148. data/ext/sources/examples/talk-llama/unicode-data.cpp +7034 -0
  149. data/ext/sources/examples/talk-llama/unicode-data.h +20 -0
  150. data/ext/sources/examples/talk-llama/unicode.cpp +849 -0
  151. data/ext/sources/examples/talk-llama/unicode.h +66 -0
  152. data/ext/sources/examples/vad-speech-segments/CMakeLists.txt +8 -0
  153. data/ext/sources/examples/vad-speech-segments/speech.cpp +143 -0
  154. data/ext/sources/examples/wchess/CMakeLists.txt +10 -0
  155. data/ext/sources/examples/wchess/libwchess/CMakeLists.txt +19 -0
  156. data/ext/sources/examples/wchess/libwchess/Chessboard.cpp +803 -0
  157. data/ext/sources/examples/wchess/libwchess/Chessboard.h +33 -0
  158. data/ext/sources/examples/wchess/libwchess/WChess.cpp +193 -0
  159. data/ext/sources/examples/wchess/libwchess/WChess.h +63 -0
  160. data/ext/sources/examples/wchess/libwchess/test-chessboard.cpp +117 -0
  161. data/ext/sources/examples/wchess/wchess.cmd/CMakeLists.txt +8 -0
  162. data/ext/sources/examples/wchess/wchess.cmd/wchess.cmd.cpp +249 -0
  163. data/ext/sources/examples/whisper.wasm/CMakeLists.txt +50 -0
  164. data/ext/sources/examples/whisper.wasm/emscripten.cpp +118 -0
  165. data/ext/sources/examples/whisper.wasm/index-tmpl.html +658 -0
  166. data/ext/sources/ggml/CMakeLists.txt +390 -0
  167. data/ext/sources/ggml/cmake/BuildTypes.cmake +54 -0
  168. data/ext/sources/ggml/cmake/GitVars.cmake +22 -0
  169. data/ext/sources/ggml/cmake/common.cmake +26 -0
  170. data/ext/sources/ggml/cmake/ggml-config.cmake.in +152 -0
  171. data/ext/sources/ggml/include/ggml-alloc.h +76 -0
  172. data/ext/sources/ggml/include/ggml-backend.h +354 -0
  173. data/ext/sources/ggml/include/ggml-blas.h +25 -0
  174. data/ext/sources/ggml/include/ggml-cann.h +123 -0
  175. data/ext/sources/ggml/include/ggml-cpp.h +39 -0
  176. data/ext/sources/ggml/include/ggml-cpu.h +143 -0
  177. data/ext/sources/ggml/include/ggml-cuda.h +47 -0
  178. data/ext/sources/ggml/include/ggml-kompute.h +50 -0
  179. data/ext/sources/ggml/include/ggml-metal.h +66 -0
  180. data/ext/sources/ggml/include/ggml-opencl.h +26 -0
  181. data/ext/sources/ggml/include/ggml-opt.h +237 -0
  182. data/ext/sources/ggml/include/ggml-rpc.h +33 -0
  183. data/ext/sources/ggml/include/ggml-sycl.h +49 -0
  184. data/ext/sources/ggml/include/ggml-vulkan.h +29 -0
  185. data/ext/{ggml.h → sources/ggml/include/ggml.h} +621 -821
  186. data/ext/sources/ggml/include/gguf.h +202 -0
  187. data/ext/sources/ggml/src/CMakeLists.txt +346 -0
  188. data/ext/sources/ggml/src/ggml-alloc.c +1042 -0
  189. data/ext/sources/ggml/src/ggml-amx/CMakeLists.txt +107 -0
  190. data/ext/sources/ggml/src/ggml-amx/common.h +94 -0
  191. data/ext/sources/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
  192. data/ext/sources/ggml/src/ggml-amx/mmq.cpp +2510 -0
  193. data/ext/sources/ggml/src/ggml-amx/mmq.h +17 -0
  194. data/ext/sources/ggml/src/ggml-backend-impl.h +255 -0
  195. data/ext/sources/ggml/src/ggml-backend-reg.cpp +586 -0
  196. data/ext/sources/ggml/src/ggml-backend.cpp +2011 -0
  197. data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +87 -0
  198. data/ext/sources/ggml/src/ggml-blas/ggml-blas.cpp +517 -0
  199. data/ext/sources/ggml/src/ggml-cann/CMakeLists.txt +74 -0
  200. data/ext/sources/ggml/src/ggml-cann/Doxyfile +2579 -0
  201. data/ext/sources/ggml/src/ggml-cann/acl_tensor.cpp +181 -0
  202. data/ext/sources/ggml/src/ggml-cann/acl_tensor.h +258 -0
  203. data/ext/sources/ggml/src/ggml-cann/aclnn_ops.cpp +3193 -0
  204. data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +1125 -0
  205. data/ext/sources/ggml/src/ggml-cann/common.h +420 -0
  206. data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +2606 -0
  207. data/ext/sources/ggml/src/ggml-cann/kernels/CMakeLists.txt +30 -0
  208. data/ext/sources/ggml/src/ggml-cann/kernels/ascendc_kernels.h +19 -0
  209. data/ext/sources/ggml/src/ggml-cann/kernels/dup.cpp +234 -0
  210. data/ext/sources/ggml/src/ggml-cann/kernels/get_row_f16.cpp +197 -0
  211. data/ext/sources/ggml/src/ggml-cann/kernels/get_row_f32.cpp +190 -0
  212. data/ext/sources/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +204 -0
  213. data/ext/sources/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
  214. data/ext/sources/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +218 -0
  215. data/ext/sources/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +216 -0
  216. data/ext/sources/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +295 -0
  217. data/ext/sources/ggml/src/ggml-common.h +1857 -0
  218. data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +504 -0
  219. data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +221 -0
  220. data/ext/sources/ggml/src/ggml-cpu/amx/amx.h +8 -0
  221. data/ext/sources/ggml/src/ggml-cpu/amx/common.h +91 -0
  222. data/ext/sources/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
  223. data/ext/sources/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  224. data/ext/sources/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
  225. data/ext/sources/ggml/src/ggml-cpu/binary-ops.h +16 -0
  226. data/ext/sources/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
  227. data/ext/sources/ggml/src/ggml-cpu/common.h +72 -0
  228. data/ext/sources/ggml/src/ggml-cpu/cpu-feats-x86.cpp +327 -0
  229. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +6431 -0
  230. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
  231. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
  232. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
  233. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +508 -0
  234. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-quants.c +13747 -0
  235. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  236. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
  237. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
  238. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +3510 -0
  239. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +671 -0
  240. data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +337 -0
  241. data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.h +95 -0
  242. data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +482 -0
  243. data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
  244. data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +3544 -0
  245. data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
  246. data/ext/sources/ggml/src/ggml-cpu/ops.cpp +8903 -0
  247. data/ext/sources/ggml/src/ggml-cpu/ops.h +110 -0
  248. data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +892 -0
  249. data/ext/sources/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
  250. data/ext/sources/ggml/src/ggml-cpu/unary-ops.h +28 -0
  251. data/ext/sources/ggml/src/ggml-cpu/vec.cpp +252 -0
  252. data/ext/sources/ggml/src/ggml-cpu/vec.h +818 -0
  253. data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +184 -0
  254. data/ext/sources/ggml/src/ggml-cuda/acc.cu +61 -0
  255. data/ext/sources/ggml/src/ggml-cuda/acc.cuh +5 -0
  256. data/ext/sources/ggml/src/ggml-cuda/arange.cu +34 -0
  257. data/ext/sources/ggml/src/ggml-cuda/arange.cuh +5 -0
  258. data/ext/sources/ggml/src/ggml-cuda/argmax.cu +91 -0
  259. data/ext/sources/ggml/src/ggml-cuda/argmax.cuh +3 -0
  260. data/ext/sources/ggml/src/ggml-cuda/argsort.cu +104 -0
  261. data/ext/sources/ggml/src/ggml-cuda/argsort.cuh +3 -0
  262. data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +363 -0
  263. data/ext/sources/ggml/src/ggml-cuda/binbcast.cuh +9 -0
  264. data/ext/sources/ggml/src/ggml-cuda/clamp.cu +45 -0
  265. data/ext/sources/ggml/src/ggml-cuda/clamp.cuh +5 -0
  266. data/ext/sources/ggml/src/ggml-cuda/common.cuh +828 -0
  267. data/ext/sources/ggml/src/ggml-cuda/concat.cu +221 -0
  268. data/ext/sources/ggml/src/ggml-cuda/concat.cuh +5 -0
  269. data/ext/sources/ggml/src/ggml-cuda/conv-transpose-1d.cu +89 -0
  270. data/ext/sources/ggml/src/ggml-cuda/conv-transpose-1d.cuh +5 -0
  271. data/ext/sources/ggml/src/ggml-cuda/convert.cu +730 -0
  272. data/ext/sources/ggml/src/ggml-cuda/convert.cuh +26 -0
  273. data/ext/sources/ggml/src/ggml-cuda/count-equal.cu +64 -0
  274. data/ext/sources/ggml/src/ggml-cuda/count-equal.cuh +5 -0
  275. data/ext/sources/ggml/src/ggml-cuda/cp-async.cuh +57 -0
  276. data/ext/sources/ggml/src/ggml-cuda/cpy.cu +705 -0
  277. data/ext/sources/ggml/src/ggml-cuda/cpy.cuh +11 -0
  278. data/ext/sources/ggml/src/ggml-cuda/cross-entropy-loss.cu +189 -0
  279. data/ext/sources/ggml/src/ggml-cuda/cross-entropy-loss.cuh +7 -0
  280. data/ext/sources/ggml/src/ggml-cuda/dequantize.cuh +103 -0
  281. data/ext/sources/ggml/src/ggml-cuda/diagmask.cu +40 -0
  282. data/ext/sources/ggml/src/ggml-cuda/diagmask.cuh +5 -0
  283. data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +881 -0
  284. data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +1471 -0
  285. data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cu +357 -0
  286. data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cuh +3 -0
  287. data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cu +365 -0
  288. data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cuh +3 -0
  289. data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f16.cuh +482 -0
  290. data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f32.cuh +472 -0
  291. data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +634 -0
  292. data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +3 -0
  293. data/ext/sources/ggml/src/ggml-cuda/fattn.cu +346 -0
  294. data/ext/sources/ggml/src/ggml-cuda/fattn.cuh +3 -0
  295. data/ext/sources/ggml/src/ggml-cuda/getrows.cu +275 -0
  296. data/ext/sources/ggml/src/ggml-cuda/getrows.cuh +15 -0
  297. data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +3505 -0
  298. data/ext/sources/ggml/src/ggml-cuda/gla.cu +93 -0
  299. data/ext/sources/ggml/src/ggml-cuda/gla.cuh +3 -0
  300. data/ext/sources/ggml/src/ggml-cuda/im2col.cu +103 -0
  301. data/ext/sources/ggml/src/ggml-cuda/im2col.cuh +5 -0
  302. data/ext/sources/ggml/src/ggml-cuda/mma.cuh +396 -0
  303. data/ext/sources/ggml/src/ggml-cuda/mmq.cu +324 -0
  304. data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +3217 -0
  305. data/ext/sources/ggml/src/ggml-cuda/mmv.cu +336 -0
  306. data/ext/sources/ggml/src/ggml-cuda/mmv.cuh +12 -0
  307. data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +595 -0
  308. data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +12 -0
  309. data/ext/sources/ggml/src/ggml-cuda/norm.cu +458 -0
  310. data/ext/sources/ggml/src/ggml-cuda/norm.cuh +11 -0
  311. data/ext/sources/ggml/src/ggml-cuda/opt-step-adamw.cu +78 -0
  312. data/ext/sources/ggml/src/ggml-cuda/opt-step-adamw.cuh +5 -0
  313. data/ext/sources/ggml/src/ggml-cuda/out-prod.cu +68 -0
  314. data/ext/sources/ggml/src/ggml-cuda/out-prod.cuh +3 -0
  315. data/ext/sources/ggml/src/ggml-cuda/pad.cu +49 -0
  316. data/ext/sources/ggml/src/ggml-cuda/pad.cuh +5 -0
  317. data/ext/sources/ggml/src/ggml-cuda/pool2d.cu +94 -0
  318. data/ext/sources/ggml/src/ggml-cuda/pool2d.cuh +5 -0
  319. data/ext/sources/ggml/src/ggml-cuda/quantize.cu +190 -0
  320. data/ext/sources/ggml/src/ggml-cuda/quantize.cuh +27 -0
  321. data/ext/sources/ggml/src/ggml-cuda/rope.cu +456 -0
  322. data/ext/sources/ggml/src/ggml-cuda/rope.cuh +7 -0
  323. data/ext/sources/ggml/src/ggml-cuda/scale.cu +31 -0
  324. data/ext/sources/ggml/src/ggml-cuda/scale.cuh +5 -0
  325. data/ext/sources/ggml/src/ggml-cuda/softmax.cu +283 -0
  326. data/ext/sources/ggml/src/ggml-cuda/softmax.cuh +7 -0
  327. data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +148 -0
  328. data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cuh +3 -0
  329. data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +153 -0
  330. data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cuh +3 -0
  331. data/ext/sources/ggml/src/ggml-cuda/sum.cu +45 -0
  332. data/ext/sources/ggml/src/ggml-cuda/sum.cuh +5 -0
  333. data/ext/sources/ggml/src/ggml-cuda/sumrows.cu +39 -0
  334. data/ext/sources/ggml/src/ggml-cuda/sumrows.cuh +5 -0
  335. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_16.cu +5 -0
  336. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu +10 -0
  337. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu +10 -0
  338. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu +10 -0
  339. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +10 -0
  340. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_16.cu +5 -0
  341. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +10 -0
  342. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu +10 -0
  343. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu +10 -0
  344. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu +10 -0
  345. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_16.cu +5 -0
  346. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu +10 -0
  347. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +10 -0
  348. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu +10 -0
  349. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu +10 -0
  350. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu +10 -0
  351. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu +10 -0
  352. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +10 -0
  353. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu +10 -0
  354. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +5 -0
  355. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +5 -0
  356. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +5 -0
  357. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +5 -0
  358. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +5 -0
  359. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +5 -0
  360. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +5 -0
  361. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +5 -0
  362. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +5 -0
  363. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +5 -0
  364. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +5 -0
  365. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +5 -0
  366. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +5 -0
  367. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +5 -0
  368. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +5 -0
  369. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +5 -0
  370. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +5 -0
  371. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +5 -0
  372. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +5 -0
  373. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +5 -0
  374. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +5 -0
  375. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +5 -0
  376. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +5 -0
  377. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +5 -0
  378. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +5 -0
  379. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +5 -0
  380. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +5 -0
  381. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +5 -0
  382. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +5 -0
  383. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +5 -0
  384. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +5 -0
  385. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +5 -0
  386. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +5 -0
  387. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +5 -0
  388. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +5 -0
  389. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +5 -0
  390. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +5 -0
  391. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +5 -0
  392. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +5 -0
  393. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +5 -0
  394. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +5 -0
  395. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +5 -0
  396. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +5 -0
  397. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +5 -0
  398. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +5 -0
  399. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +5 -0
  400. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +5 -0
  401. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +5 -0
  402. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +5 -0
  403. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +5 -0
  404. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +5 -0
  405. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +5 -0
  406. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +5 -0
  407. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +5 -0
  408. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +5 -0
  409. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +5 -0
  410. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +5 -0
  411. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +5 -0
  412. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +5 -0
  413. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +5 -0
  414. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +5 -0
  415. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +5 -0
  416. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +5 -0
  417. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +5 -0
  418. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +5 -0
  419. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +5 -0
  420. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +5 -0
  421. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +5 -0
  422. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +5 -0
  423. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +5 -0
  424. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +5 -0
  425. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +5 -0
  426. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +5 -0
  427. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +5 -0
  428. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +5 -0
  429. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +5 -0
  430. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +5 -0
  431. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +5 -0
  432. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +5 -0
  433. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +5 -0
  434. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +5 -0
  435. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +5 -0
  436. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +5 -0
  437. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +5 -0
  438. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +5 -0
  439. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +5 -0
  440. data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +78 -0
  441. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu +5 -0
  442. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu +5 -0
  443. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu +5 -0
  444. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu +5 -0
  445. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu +5 -0
  446. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu +5 -0
  447. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu +5 -0
  448. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu +5 -0
  449. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu +5 -0
  450. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu +5 -0
  451. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu +5 -0
  452. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu +5 -0
  453. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu +5 -0
  454. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu +5 -0
  455. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu +5 -0
  456. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu +5 -0
  457. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu +5 -0
  458. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu +5 -0
  459. data/ext/sources/ggml/src/ggml-cuda/tsembd.cu +47 -0
  460. data/ext/sources/ggml/src/ggml-cuda/tsembd.cuh +5 -0
  461. data/ext/sources/ggml/src/ggml-cuda/unary.cu +289 -0
  462. data/ext/sources/ggml/src/ggml-cuda/unary.cuh +59 -0
  463. data/ext/sources/ggml/src/ggml-cuda/upscale.cu +51 -0
  464. data/ext/sources/ggml/src/ggml-cuda/upscale.cuh +5 -0
  465. data/ext/sources/ggml/src/ggml-cuda/vecdotq.cuh +1135 -0
  466. data/ext/sources/ggml/src/ggml-cuda/vendors/cuda.h +15 -0
  467. data/ext/sources/ggml/src/ggml-cuda/vendors/hip.h +243 -0
  468. data/ext/sources/ggml/src/ggml-cuda/vendors/musa.h +140 -0
  469. data/ext/sources/ggml/src/ggml-cuda/wkv.cu +199 -0
  470. data/ext/sources/ggml/src/ggml-cuda/wkv.cuh +7 -0
  471. data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +131 -0
  472. data/ext/sources/ggml/src/ggml-impl.h +601 -0
  473. data/ext/sources/ggml/src/ggml-kompute/CMakeLists.txt +166 -0
  474. data/ext/sources/ggml/src/ggml-kompute/ggml-kompute.cpp +2251 -0
  475. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/common.comp +112 -0
  476. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +58 -0
  477. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +25 -0
  478. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +52 -0
  479. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +52 -0
  480. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +52 -0
  481. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +52 -0
  482. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +30 -0
  483. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +22 -0
  484. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +17 -0
  485. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +31 -0
  486. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +31 -0
  487. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +38 -0
  488. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +39 -0
  489. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +44 -0
  490. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +52 -0
  491. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +69 -0
  492. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +51 -0
  493. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +33 -0
  494. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +35 -0
  495. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +140 -0
  496. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +106 -0
  497. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +73 -0
  498. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +52 -0
  499. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +28 -0
  500. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +84 -0
  501. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +21 -0
  502. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +53 -0
  503. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +52 -0
  504. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +52 -0
  505. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +52 -0
  506. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +52 -0
  507. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +19 -0
  508. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +23 -0
  509. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +22 -0
  510. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +72 -0
  511. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +71 -0
  512. data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +120 -0
  513. data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +622 -0
  514. data/ext/sources/ggml/src/ggml-metal/ggml-metal.m +5998 -0
  515. data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +7089 -0
  516. data/ext/sources/ggml/src/ggml-musa/CMakeLists.txt +113 -0
  517. data/ext/sources/ggml/src/ggml-musa/mudnn.cu +112 -0
  518. data/ext/sources/ggml/src/ggml-musa/mudnn.cuh +12 -0
  519. data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +96 -0
  520. data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +5124 -0
  521. data/ext/sources/ggml/src/ggml-opencl/kernels/add.cl +83 -0
  522. data/ext/sources/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
  523. data/ext/sources/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
  524. data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +118 -0
  525. data/ext/sources/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
  526. data/ext/sources/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
  527. data/ext/sources/ggml/src/ggml-opencl/kernels/gelu.cl +62 -0
  528. data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
  529. data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
  530. data/ext/sources/ggml/src/ggml-opencl/kernels/get_rows.cl +163 -0
  531. data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
  532. data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
  533. data/ext/sources/ggml/src/ggml-opencl/kernels/mul.cl +79 -0
  534. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
  535. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
  536. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
  537. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
  538. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
  539. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
  540. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
  541. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
  542. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
  543. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
  544. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
  545. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q6_k.cl +190 -0
  546. data/ext/sources/ggml/src/ggml-opencl/kernels/norm.cl +81 -0
  547. data/ext/sources/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
  548. data/ext/sources/ggml/src/ggml-opencl/kernels/rms_norm.cl +96 -0
  549. data/ext/sources/ggml/src/ggml-opencl/kernels/rope.cl +721 -0
  550. data/ext/sources/ggml/src/ggml-opencl/kernels/scale.cl +16 -0
  551. data/ext/sources/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
  552. data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +87 -0
  553. data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +87 -0
  554. data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f16.cl +86 -0
  555. data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f32.cl +86 -0
  556. data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +84 -0
  557. data/ext/sources/ggml/src/ggml-opt.cpp +1037 -0
  558. data/ext/sources/ggml/src/ggml-quants.c +5232 -0
  559. data/ext/sources/ggml/src/ggml-quants.h +100 -0
  560. data/ext/sources/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
  561. data/ext/sources/ggml/src/ggml-rpc/ggml-rpc.cpp +1813 -0
  562. data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +189 -0
  563. data/ext/sources/ggml/src/ggml-sycl/backend.hpp +37 -0
  564. data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +345 -0
  565. data/ext/sources/ggml/src/ggml-sycl/binbcast.hpp +39 -0
  566. data/ext/sources/ggml/src/ggml-sycl/common.cpp +83 -0
  567. data/ext/sources/ggml/src/ggml-sycl/common.hpp +589 -0
  568. data/ext/sources/ggml/src/ggml-sycl/concat.cpp +195 -0
  569. data/ext/sources/ggml/src/ggml-sycl/concat.hpp +20 -0
  570. data/ext/sources/ggml/src/ggml-sycl/conv.cpp +101 -0
  571. data/ext/sources/ggml/src/ggml-sycl/conv.hpp +20 -0
  572. data/ext/sources/ggml/src/ggml-sycl/convert.cpp +623 -0
  573. data/ext/sources/ggml/src/ggml-sycl/convert.hpp +34 -0
  574. data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +700 -0
  575. data/ext/sources/ggml/src/ggml-sycl/cpy.hpp +11 -0
  576. data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +791 -0
  577. data/ext/sources/ggml/src/ggml-sycl/dmmv.cpp +1162 -0
  578. data/ext/sources/ggml/src/ggml-sycl/dmmv.hpp +27 -0
  579. data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +2957 -0
  580. data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +1511 -0
  581. data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +75 -0
  582. data/ext/sources/ggml/src/ggml-sycl/gemm.hpp +99 -0
  583. data/ext/sources/ggml/src/ggml-sycl/getrows.cpp +309 -0
  584. data/ext/sources/ggml/src/ggml-sycl/getrows.hpp +20 -0
  585. data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +4493 -0
  586. data/ext/sources/ggml/src/ggml-sycl/gla.cpp +106 -0
  587. data/ext/sources/ggml/src/ggml-sycl/gla.hpp +8 -0
  588. data/ext/sources/ggml/src/ggml-sycl/im2col.cpp +136 -0
  589. data/ext/sources/ggml/src/ggml-sycl/im2col.hpp +21 -0
  590. data/ext/sources/ggml/src/ggml-sycl/mmq.cpp +3030 -0
  591. data/ext/sources/ggml/src/ggml-sycl/mmq.hpp +33 -0
  592. data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +1110 -0
  593. data/ext/sources/ggml/src/ggml-sycl/mmvq.hpp +27 -0
  594. data/ext/sources/ggml/src/ggml-sycl/norm.cpp +501 -0
  595. data/ext/sources/ggml/src/ggml-sycl/norm.hpp +26 -0
  596. data/ext/sources/ggml/src/ggml-sycl/outprod.cpp +47 -0
  597. data/ext/sources/ggml/src/ggml-sycl/outprod.hpp +10 -0
  598. data/ext/sources/ggml/src/ggml-sycl/presets.hpp +74 -0
  599. data/ext/sources/ggml/src/ggml-sycl/quants.hpp +83 -0
  600. data/ext/sources/ggml/src/ggml-sycl/rope.cpp +361 -0
  601. data/ext/sources/ggml/src/ggml-sycl/rope.hpp +20 -0
  602. data/ext/sources/ggml/src/ggml-sycl/softmax.cpp +261 -0
  603. data/ext/sources/ggml/src/ggml-sycl/softmax.hpp +20 -0
  604. data/ext/sources/ggml/src/ggml-sycl/sycl_hw.cpp +13 -0
  605. data/ext/sources/ggml/src/ggml-sycl/sycl_hw.hpp +23 -0
  606. data/ext/sources/ggml/src/ggml-sycl/tsembd.cpp +72 -0
  607. data/ext/sources/ggml/src/ggml-sycl/tsembd.hpp +20 -0
  608. data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +1215 -0
  609. data/ext/sources/ggml/src/ggml-sycl/wkv.cpp +293 -0
  610. data/ext/sources/ggml/src/ggml-sycl/wkv.hpp +10 -0
  611. data/ext/sources/ggml/src/ggml-threading.cpp +12 -0
  612. data/ext/sources/ggml/src/ggml-threading.h +14 -0
  613. data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +196 -0
  614. data/ext/sources/ggml/src/ggml-vulkan/cmake/host-toolchain.cmake.in +15 -0
  615. data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +10700 -0
  616. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +39 -0
  617. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +29 -0
  618. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +29 -0
  619. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +51 -0
  620. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +69 -0
  621. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +17 -0
  622. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +41 -0
  623. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +49 -0
  624. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +105 -0
  625. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +23 -0
  626. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +51 -0
  627. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +242 -0
  628. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +17 -0
  629. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +31 -0
  630. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +20 -0
  631. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp +462 -0
  632. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp +699 -0
  633. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_head.comp +13 -0
  634. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +42 -0
  635. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +35 -0
  636. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +44 -0
  637. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +43 -0
  638. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +48 -0
  639. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +39 -0
  640. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +49 -0
  641. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +32 -0
  642. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +34 -0
  643. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +34 -0
  644. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +42 -0
  645. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +30 -0
  646. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +32 -0
  647. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +68 -0
  648. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +34 -0
  649. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +35 -0
  650. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +70 -0
  651. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +33 -0
  652. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +31 -0
  653. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +34 -0
  654. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +27 -0
  655. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +337 -0
  656. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +162 -0
  657. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +360 -0
  658. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +267 -0
  659. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +59 -0
  660. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +25 -0
  661. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +23 -0
  662. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +64 -0
  663. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_head.comp +9 -0
  664. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_unary_head.comp +76 -0
  665. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +33 -0
  666. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +41 -0
  667. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +66 -0
  668. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +100 -0
  669. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +41 -0
  670. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +22 -0
  671. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +27 -0
  672. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_split_k_reduce.comp +48 -0
  673. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +169 -0
  674. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp +118 -0
  675. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +82 -0
  676. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +79 -0
  677. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +90 -0
  678. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +87 -0
  679. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +87 -0
  680. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +90 -0
  681. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +88 -0
  682. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +118 -0
  683. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +154 -0
  684. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +130 -0
  685. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +132 -0
  686. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +136 -0
  687. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +167 -0
  688. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +130 -0
  689. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +868 -0
  690. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +441 -0
  691. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +442 -0
  692. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +99 -0
  693. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +44 -0
  694. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +42 -0
  695. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +28 -0
  696. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +74 -0
  697. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +77 -0
  698. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +21 -0
  699. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +26 -0
  700. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +37 -0
  701. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +52 -0
  702. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +55 -0
  703. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +58 -0
  704. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +60 -0
  705. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +43 -0
  706. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +43 -0
  707. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +47 -0
  708. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +24 -0
  709. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +20 -0
  710. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +22 -0
  711. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +26 -0
  712. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +17 -0
  713. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +173 -0
  714. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +50 -0
  715. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +17 -0
  716. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +29 -0
  717. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +37 -0
  718. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +20 -0
  719. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_bfloat16_support.comp +7 -0
  720. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat2_support.comp +7 -0
  721. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat_support.comp +7 -0
  722. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_integer_dot_support.comp +7 -0
  723. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +41 -0
  724. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/types.comp +1373 -0
  725. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +36 -0
  726. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +751 -0
  727. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/wkv6.comp +87 -0
  728. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/wkv7.comp +91 -0
  729. data/ext/sources/ggml/src/ggml.c +6550 -0
  730. data/ext/sources/ggml/src/gguf.cpp +1330 -0
  731. data/ext/{whisper.h → sources/include/whisper.h} +91 -24
  732. data/ext/sources/src/CMakeLists.txt +143 -0
  733. data/ext/sources/src/coreml/whisper-decoder-impl.h +158 -0
  734. data/ext/sources/src/coreml/whisper-decoder-impl.m +226 -0
  735. data/ext/sources/src/coreml/whisper-encoder-impl.h +154 -0
  736. data/ext/sources/src/coreml/whisper-encoder-impl.m +222 -0
  737. data/ext/sources/src/coreml/whisper-encoder.h +26 -0
  738. data/ext/sources/src/coreml/whisper-encoder.mm +73 -0
  739. data/ext/sources/src/openvino/whisper-openvino-encoder.cpp +108 -0
  740. data/ext/sources/src/openvino/whisper-openvino-encoder.h +31 -0
  741. data/ext/sources/src/whisper-arch.h +197 -0
  742. data/ext/{whisper.cpp → sources/src/whisper.cpp} +2535 -835
  743. data/ext/sources/tests/CMakeLists.txt +105 -0
  744. data/ext/sources/tests/earnings21/eval.mk +58 -0
  745. data/ext/sources/tests/earnings21/eval.py +68 -0
  746. data/ext/sources/tests/earnings21/normalizers/__init__.py +2 -0
  747. data/ext/sources/tests/earnings21/normalizers/basic.py +80 -0
  748. data/ext/sources/tests/earnings21/normalizers/english.json +1741 -0
  749. data/ext/sources/tests/earnings21/normalizers/english.py +550 -0
  750. data/ext/sources/tests/earnings21/requirements.txt +6 -0
  751. data/ext/sources/tests/en-0-ref.txt +1 -0
  752. data/ext/sources/tests/en-1-ref.txt +1 -0
  753. data/ext/sources/tests/en-2-ref.txt +1 -0
  754. data/ext/sources/tests/es-0-ref.txt +1 -0
  755. data/ext/sources/tests/librispeech/eval.mk +39 -0
  756. data/ext/sources/tests/librispeech/eval.py +47 -0
  757. data/ext/sources/tests/librispeech/normalizers/__init__.py +2 -0
  758. data/ext/sources/tests/librispeech/normalizers/basic.py +80 -0
  759. data/ext/sources/tests/librispeech/normalizers/english.json +1741 -0
  760. data/ext/sources/tests/librispeech/normalizers/english.py +550 -0
  761. data/ext/sources/tests/librispeech/requirements.txt +6 -0
  762. data/ext/sources/tests/run-tests.sh +130 -0
  763. data/ext/sources/tests/test-c.c +3 -0
  764. data/ext/sources/tests/test-vad-full.cpp +54 -0
  765. data/ext/sources/tests/test-vad.cpp +83 -0
  766. data/ext/sources/tests/test-whisper.js +58 -0
  767. data/extsources.rb +34 -0
  768. data/lib/whisper/model/uri.rb +178 -0
  769. data/sig/whisper.rbs +480 -0
  770. data/tests/helper.rb +35 -0
  771. data/tests/jfk_reader/.gitignore +5 -0
  772. data/tests/jfk_reader/extconf.rb +3 -0
  773. data/tests/jfk_reader/jfk_reader.c +68 -0
  774. data/tests/test_callback.rb +202 -0
  775. data/tests/test_error.rb +20 -0
  776. data/tests/test_model.rb +109 -0
  777. data/tests/test_package.rb +46 -0
  778. data/tests/test_params.rb +297 -0
  779. data/tests/test_segment.rb +74 -0
  780. data/tests/test_vad.rb +19 -0
  781. data/tests/test_vad_params.rb +103 -0
  782. data/tests/test_whisper.rb +212 -124
  783. data/whispercpp.gemspec +37 -0
  784. metadata +794 -13
  785. data/ext/dr_wav.h +0 -6434
  786. data/ext/ggml.c +0 -21755
  787. data/ext/ruby_whisper.cpp +0 -426
@@ -176,25 +176,15 @@
176
176
  #ifdef GGML_SHARED
177
177
  # if defined(_WIN32) && !defined(__MINGW32__)
178
178
  # ifdef GGML_BUILD
179
- # define GGML_API __declspec(dllexport)
179
+ # define GGML_API __declspec(dllexport) extern
180
180
  # else
181
- # define GGML_API __declspec(dllimport)
181
+ # define GGML_API __declspec(dllimport) extern
182
182
  # endif
183
183
  # else
184
- # define GGML_API __attribute__ ((visibility ("default")))
184
+ # define GGML_API __attribute__ ((visibility ("default"))) extern
185
185
  # endif
186
186
  #else
187
- # define GGML_API
188
- #endif
189
-
190
- #ifdef GGML_MULTIPLATFORM
191
- # if defined(_WIN32)
192
- # define GGML_CALL
193
- # else
194
- # define GGML_CALL __attribute__((__ms_abi__))
195
- # endif
196
- #else
197
- # define GGML_CALL
187
+ # define GGML_API extern
198
188
  #endif
199
189
 
200
190
  // TODO: support for clang
@@ -208,7 +198,7 @@
208
198
 
209
199
  #ifndef __GNUC__
210
200
  # define GGML_ATTRIBUTE_FORMAT(...)
211
- #elif defined(__MINGW32__)
201
+ #elif defined(__MINGW32__) && !defined(__clang__)
212
202
  # define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
213
203
  #else
214
204
  # define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
@@ -220,21 +210,24 @@
220
210
  #include <stdio.h>
221
211
 
222
212
  #define GGML_FILE_MAGIC 0x67676d6c // "ggml"
223
- #define GGML_FILE_VERSION 1
213
+ #define GGML_FILE_VERSION 2
224
214
 
225
215
  #define GGML_QNT_VERSION 2 // bump this on quantization format changes
226
216
  #define GGML_QNT_VERSION_FACTOR 1000 // do not change this
227
217
 
228
218
  #define GGML_MAX_DIMS 4
229
219
  #define GGML_MAX_PARAMS 2048
230
- #define GGML_MAX_CONTEXTS 64
231
220
  #define GGML_MAX_SRC 10
221
+ #define GGML_MAX_N_THREADS 512
222
+ #define GGML_MAX_OP_PARAMS 64
223
+
232
224
  #ifndef GGML_MAX_NAME
233
- #define GGML_MAX_NAME 64
225
+ # define GGML_MAX_NAME 64
234
226
  #endif
235
- #define GGML_MAX_OP_PARAMS 64
227
+
236
228
  #define GGML_DEFAULT_N_THREADS 4
237
229
  #define GGML_DEFAULT_GRAPH_SIZE 2048
230
+
238
231
  #if UINTPTR_MAX == 0xFFFFFFFF
239
232
  #define GGML_MEM_ALIGN 4
240
233
  #else
@@ -244,36 +237,35 @@
244
237
  #define GGML_EXIT_SUCCESS 0
245
238
  #define GGML_EXIT_ABORTED 1
246
239
 
247
- #define GGUF_MAGIC "GGUF"
248
-
249
- #define GGUF_VERSION 3
250
-
251
- #define GGUF_DEFAULT_ALIGNMENT 32
240
+ #define GGML_ROPE_TYPE_NEOX 2
241
+ #define GGML_ROPE_TYPE_MROPE 8
242
+ #define GGML_ROPE_TYPE_VISION 24
252
243
 
253
244
  #define GGML_UNUSED(x) (void)(x)
254
245
 
255
246
  #define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
256
247
 
257
- #define GGML_ASSERT(x) \
258
- do { \
259
- if (!(x)) { \
260
- fflush(stdout); \
261
- fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \
262
- ggml_print_backtrace(); \
263
- abort(); \
264
- } \
265
- } while (0)
266
-
267
248
  #ifndef NDEBUG
268
- #define GGML_UNREACHABLE() GGML_ASSERT(!"statement should not be reached")
249
+ # define GGML_UNREACHABLE() do { fprintf(stderr, "statement should be unreachable\n"); abort(); } while(0)
269
250
  #elif defined(__GNUC__)
270
- #define GGML_UNREACHABLE() __builtin_unreachable()
251
+ # define GGML_UNREACHABLE() __builtin_unreachable()
252
+ #elif defined(_MSC_VER)
253
+ # define GGML_UNREACHABLE() __assume(0)
254
+ #else
255
+ # define GGML_UNREACHABLE() ((void) 0)
256
+ #endif
257
+
258
+ #ifdef __cplusplus
259
+ # define GGML_NORETURN [[noreturn]]
271
260
  #elif defined(_MSC_VER)
272
- #define GGML_UNREACHABLE() __assume(0)
261
+ # define GGML_NORETURN __declspec(noreturn)
273
262
  #else
274
- #define GGML_UNREACHABLE() ((void) 0)
263
+ # define GGML_NORETURN _Noreturn
275
264
  #endif
276
265
 
266
+ #define GGML_ABORT(...) ggml_abort(__FILE__, __LINE__, __VA_ARGS__)
267
+ #define GGML_ASSERT(x) if (!(x)) GGML_ABORT("GGML_ASSERT(%s) failed", #x)
268
+
277
269
  // used to copy the number of elements and stride in bytes of tensors into local variables.
278
270
  // main purpose is to reduce code duplication and improve readability.
279
271
  //
@@ -312,10 +304,19 @@
312
304
  GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
313
305
  GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
314
306
 
307
+ #define GGML_TENSOR_BINARY_OP_LOCALS01 \
308
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
309
+ GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
310
+ GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
311
+ GGML_TENSOR_LOCALS(size_t, nb1, src1, nb)
312
+
315
313
  #ifdef __cplusplus
316
314
  extern "C" {
317
315
  #endif
318
316
 
317
+ GGML_NORETURN GGML_ATTRIBUTE_FORMAT(3, 4)
318
+ GGML_API void ggml_abort(const char * file, int line, const char * fmt, ...);
319
+
319
320
  enum ggml_status {
320
321
  GGML_STATUS_ALLOC_FAILED = -2,
321
322
  GGML_STATUS_FAILED = -1,
@@ -324,19 +325,27 @@ extern "C" {
324
325
  };
325
326
 
326
327
  // get ggml_status name string
327
- GGML_API GGML_CALL const char * ggml_status_to_string(enum ggml_status status);
328
+ GGML_API const char * ggml_status_to_string(enum ggml_status status);
328
329
 
330
+ // ieee 754-2008 half-precision float16
331
+ // todo: make this not an integral type
329
332
  typedef uint16_t ggml_fp16_t;
330
-
331
- // convert FP16 <-> FP32
332
- GGML_API float ggml_fp16_to_fp32(ggml_fp16_t x);
333
- GGML_API ggml_fp16_t ggml_fp32_to_fp16(float x);
334
-
335
- GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n);
336
- GGML_API void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n);
333
+ GGML_API float ggml_fp16_to_fp32(ggml_fp16_t);
334
+ GGML_API ggml_fp16_t ggml_fp32_to_fp16(float);
335
+ GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t *, float *, int64_t);
336
+ GGML_API void ggml_fp32_to_fp16_row(const float *, ggml_fp16_t *, int64_t);
337
+
338
+ // google brain half-precision bfloat16
339
+ typedef struct { uint16_t bits; } ggml_bf16_t;
340
+ GGML_API ggml_bf16_t ggml_fp32_to_bf16(float);
341
+ GGML_API float ggml_bf16_to_fp32(ggml_bf16_t); // consider just doing << 16
342
+ GGML_API void ggml_bf16_to_fp32_row(const ggml_bf16_t *, float *, int64_t);
343
+ GGML_API void ggml_fp32_to_bf16_row_ref(const float *, ggml_bf16_t *, int64_t);
344
+ GGML_API void ggml_fp32_to_bf16_row(const float *, ggml_bf16_t *, int64_t);
337
345
 
338
346
  struct ggml_object;
339
347
  struct ggml_context;
348
+ struct ggml_cgraph;
340
349
 
341
350
  // NOTE: always add types at the end of the enum to keep backward compatibility
342
351
  enum ggml_type {
@@ -370,19 +379,22 @@ extern "C" {
370
379
  GGML_TYPE_I64 = 27,
371
380
  GGML_TYPE_F64 = 28,
372
381
  GGML_TYPE_IQ1_M = 29,
373
- GGML_TYPE_COUNT,
382
+ GGML_TYPE_BF16 = 30,
383
+ // GGML_TYPE_Q4_0_4_4 = 31, support has been removed from gguf files
384
+ // GGML_TYPE_Q4_0_4_8 = 32,
385
+ // GGML_TYPE_Q4_0_8_8 = 33,
386
+ GGML_TYPE_TQ1_0 = 34,
387
+ GGML_TYPE_TQ2_0 = 35,
388
+ // GGML_TYPE_IQ4_NL_4_4 = 36,
389
+ // GGML_TYPE_IQ4_NL_4_8 = 37,
390
+ // GGML_TYPE_IQ4_NL_8_8 = 38,
391
+ GGML_TYPE_COUNT = 39,
374
392
  };
375
393
 
376
394
  // precision
377
395
  enum ggml_prec {
378
- GGML_PREC_DEFAULT,
379
- GGML_PREC_F32,
380
- };
381
-
382
- enum ggml_backend_type {
383
- GGML_BACKEND_TYPE_CPU = 0,
384
- GGML_BACKEND_TYPE_GPU = 10,
385
- GGML_BACKEND_TYPE_GPU_SPLIT = 20,
396
+ GGML_PREC_DEFAULT = 0, // stored as ggml_tensor.op_params, 0 by default
397
+ GGML_PREC_F32 = 10,
386
398
  };
387
399
 
388
400
  // model file types
@@ -410,6 +422,7 @@ extern "C" {
410
422
  GGML_FTYPE_MOSTLY_IQ2_S = 21, // except 1d tensors
411
423
  GGML_FTYPE_MOSTLY_IQ4_XS = 22, // except 1d tensors
412
424
  GGML_FTYPE_MOSTLY_IQ1_M = 23, // except 1d tensors
425
+ GGML_FTYPE_MOSTLY_BF16 = 24, // except 1d tensors
413
426
  };
414
427
 
415
428
  // available tensor operations:
@@ -426,10 +439,13 @@ extern "C" {
426
439
  GGML_OP_SQR,
427
440
  GGML_OP_SQRT,
428
441
  GGML_OP_LOG,
442
+ GGML_OP_SIN,
443
+ GGML_OP_COS,
429
444
  GGML_OP_SUM,
430
445
  GGML_OP_SUM_ROWS,
431
446
  GGML_OP_MEAN,
432
447
  GGML_OP_ARGMAX,
448
+ GGML_OP_COUNT_EQUAL,
433
449
  GGML_OP_REPEAT,
434
450
  GGML_OP_REPEAT_BACK,
435
451
  GGML_OP_CONCAT,
@@ -438,6 +454,7 @@ extern "C" {
438
454
  GGML_OP_RMS_NORM,
439
455
  GGML_OP_RMS_NORM_BACK,
440
456
  GGML_OP_GROUP_NORM,
457
+ GGML_OP_L2_NORM,
441
458
 
442
459
  GGML_OP_MUL_MAT,
443
460
  GGML_OP_MUL_MAT_ID,
@@ -460,22 +477,24 @@ extern "C" {
460
477
  GGML_OP_SOFT_MAX_BACK,
461
478
  GGML_OP_ROPE,
462
479
  GGML_OP_ROPE_BACK,
463
- GGML_OP_ALIBI,
464
480
  GGML_OP_CLAMP,
465
481
  GGML_OP_CONV_TRANSPOSE_1D,
466
482
  GGML_OP_IM2COL,
483
+ GGML_OP_IM2COL_BACK,
484
+ GGML_OP_CONV_2D_DW,
467
485
  GGML_OP_CONV_TRANSPOSE_2D,
468
486
  GGML_OP_POOL_1D,
469
487
  GGML_OP_POOL_2D,
488
+ GGML_OP_POOL_2D_BACK,
470
489
  GGML_OP_UPSCALE, // nearest interpolate
471
490
  GGML_OP_PAD,
491
+ GGML_OP_PAD_REFLECT_1D,
472
492
  GGML_OP_ARANGE,
473
493
  GGML_OP_TIMESTEP_EMBEDDING,
474
494
  GGML_OP_ARGSORT,
475
495
  GGML_OP_LEAKY_RELU,
476
496
 
477
- GGML_OP_FLASH_ATTN,
478
- GGML_OP_FLASH_FF,
497
+ GGML_OP_FLASH_ATTN_EXT,
479
498
  GGML_OP_FLASH_ATTN_BACK,
480
499
  GGML_OP_SSM_CONV,
481
500
  GGML_OP_SSM_SCAN,
@@ -483,22 +502,21 @@ extern "C" {
483
502
  GGML_OP_WIN_UNPART,
484
503
  GGML_OP_GET_REL_POS,
485
504
  GGML_OP_ADD_REL_POS,
505
+ GGML_OP_RWKV_WKV6,
506
+ GGML_OP_GATED_LINEAR_ATTN,
507
+ GGML_OP_RWKV_WKV7,
486
508
 
487
509
  GGML_OP_UNARY,
488
510
 
489
- GGML_OP_MAP_UNARY,
490
- GGML_OP_MAP_BINARY,
491
-
492
- GGML_OP_MAP_CUSTOM1_F32,
493
- GGML_OP_MAP_CUSTOM2_F32,
494
- GGML_OP_MAP_CUSTOM3_F32,
495
-
496
511
  GGML_OP_MAP_CUSTOM1,
497
512
  GGML_OP_MAP_CUSTOM2,
498
513
  GGML_OP_MAP_CUSTOM3,
499
514
 
515
+ GGML_OP_CUSTOM,
516
+
500
517
  GGML_OP_CROSS_ENTROPY_LOSS,
501
518
  GGML_OP_CROSS_ENTROPY_LOSS_BACK,
519
+ GGML_OP_OPT_STEP_ADAMW,
502
520
 
503
521
  GGML_OP_COUNT,
504
522
  };
@@ -511,11 +529,14 @@ extern "C" {
511
529
  GGML_UNARY_OP_TANH,
512
530
  GGML_UNARY_OP_ELU,
513
531
  GGML_UNARY_OP_RELU,
532
+ GGML_UNARY_OP_SIGMOID,
514
533
  GGML_UNARY_OP_GELU,
515
534
  GGML_UNARY_OP_GELU_QUICK,
516
535
  GGML_UNARY_OP_SILU,
517
536
  GGML_UNARY_OP_HARDSWISH,
518
537
  GGML_UNARY_OP_HARDSIGMOID,
538
+ GGML_UNARY_OP_EXP,
539
+ GGML_UNARY_OP_GELU_ERF,
519
540
 
520
541
  GGML_UNARY_OP_COUNT,
521
542
  };
@@ -527,36 +548,32 @@ extern "C" {
527
548
  };
528
549
 
529
550
  enum ggml_log_level {
530
- GGML_LOG_LEVEL_ERROR = 2,
551
+ GGML_LOG_LEVEL_NONE = 0,
552
+ GGML_LOG_LEVEL_DEBUG = 1,
553
+ GGML_LOG_LEVEL_INFO = 2,
531
554
  GGML_LOG_LEVEL_WARN = 3,
532
- GGML_LOG_LEVEL_INFO = 4,
533
- GGML_LOG_LEVEL_DEBUG = 5
555
+ GGML_LOG_LEVEL_ERROR = 4,
556
+ GGML_LOG_LEVEL_CONT = 5, // continue previous log
534
557
  };
535
558
 
559
+ // this tensor...
536
560
  enum ggml_tensor_flag {
537
- GGML_TENSOR_FLAG_INPUT = 1,
538
- GGML_TENSOR_FLAG_OUTPUT = 2,
539
- GGML_TENSOR_FLAG_PARAM = 4,
561
+ GGML_TENSOR_FLAG_INPUT = 1, // ...is an input for the GGML compute graph
562
+ GGML_TENSOR_FLAG_OUTPUT = 2, // ...is an output for the GGML compute graph
563
+ GGML_TENSOR_FLAG_PARAM = 4, // ...contains trainable parameters
564
+ GGML_TENSOR_FLAG_LOSS = 8, // ...defines loss for numerical optimization (multiple loss tensors add up)
540
565
  };
541
566
 
542
- // ggml object
543
- struct ggml_object {
544
- size_t offs;
545
- size_t size;
546
-
547
- struct ggml_object * next;
548
-
549
- enum ggml_object_type type;
550
-
551
- char padding[4];
567
+ struct ggml_init_params {
568
+ // memory pool
569
+ size_t mem_size; // bytes
570
+ void * mem_buffer; // if NULL, memory will be allocated internally
571
+ bool no_alloc; // don't allocate memory for the tensor data
552
572
  };
553
573
 
554
- static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
555
-
556
574
  // n-dimensional tensor
557
575
  struct ggml_tensor {
558
- enum ggml_type type;
559
- enum ggml_backend_type backend;
576
+ enum ggml_type type;
560
577
 
561
578
  struct ggml_backend_buffer * buffer;
562
579
 
@@ -574,14 +591,9 @@ extern "C" {
574
591
 
575
592
  int32_t flags;
576
593
 
577
- struct ggml_tensor * grad;
578
594
  struct ggml_tensor * src[GGML_MAX_SRC];
579
595
 
580
- // performance
581
- int perf_runs;
582
- int64_t perf_cycles;
583
- int64_t perf_time_us;
584
-
596
+ // source tensor and offset for views
585
597
  struct ggml_tensor * view_src;
586
598
  size_t view_offs;
587
599
 
@@ -601,95 +613,6 @@ extern "C" {
601
613
  // If it returns true, the computation is aborted
602
614
  typedef bool (*ggml_abort_callback)(void * data);
603
615
 
604
- // the compute plan that needs to be prepared for ggml_graph_compute()
605
- // since https://github.com/ggerganov/ggml/issues/287
606
- struct ggml_cplan {
607
- size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()`
608
- uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
609
-
610
- int n_threads;
611
-
612
- // abort ggml_graph_compute when true
613
- ggml_abort_callback abort_callback;
614
- void * abort_callback_data;
615
- };
616
-
617
- enum ggml_cgraph_eval_order {
618
- GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT = 0,
619
- GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT,
620
- GGML_CGRAPH_EVAL_ORDER_COUNT
621
- };
622
-
623
- struct ggml_hash_set {
624
- size_t size;
625
- struct ggml_tensor ** keys;
626
- };
627
-
628
- // computation graph
629
- struct ggml_cgraph {
630
- int size;
631
- int n_nodes;
632
- int n_leafs;
633
-
634
- struct ggml_tensor ** nodes;
635
- struct ggml_tensor ** grads;
636
- struct ggml_tensor ** leafs;
637
-
638
- struct ggml_hash_set visited_hash_table;
639
-
640
- enum ggml_cgraph_eval_order order;
641
-
642
- // performance
643
- int perf_runs;
644
- int64_t perf_cycles;
645
- int64_t perf_time_us;
646
- };
647
-
648
- // scratch buffer
649
- struct ggml_scratch {
650
- size_t offs;
651
- size_t size;
652
- void * data;
653
- };
654
-
655
- struct ggml_init_params {
656
- // memory pool
657
- size_t mem_size; // bytes
658
- void * mem_buffer; // if NULL, memory will be allocated internally
659
- bool no_alloc; // don't allocate memory for the tensor data
660
- };
661
-
662
-
663
- // compute types
664
-
665
- // NOTE: the INIT or FINALIZE pass is not scheduled unless explicitly enabled.
666
- // This behavior was changed since https://github.com/ggerganov/llama.cpp/pull/1995.
667
- enum ggml_task_type {
668
- GGML_TASK_TYPE_INIT = 0,
669
- GGML_TASK_TYPE_COMPUTE,
670
- GGML_TASK_TYPE_FINALIZE,
671
- };
672
-
673
- struct ggml_compute_params {
674
- enum ggml_task_type type;
675
-
676
- // ith = thread index, nth = number of threads
677
- int ith, nth;
678
-
679
- // work buffer for all threads
680
- size_t wsize;
681
- void * wdata;
682
- };
683
-
684
- // numa strategies
685
- enum ggml_numa_strategy {
686
- GGML_NUMA_STRATEGY_DISABLED = 0,
687
- GGML_NUMA_STRATEGY_DISTRIBUTE = 1,
688
- GGML_NUMA_STRATEGY_ISOLATE = 2,
689
- GGML_NUMA_STRATEGY_NUMACTL = 3,
690
- GGML_NUMA_STRATEGY_MIRROR = 4,
691
- GGML_NUMA_STRATEGY_COUNT
692
- };
693
616
 
694
617
  //
695
618
  // GUID
@@ -709,67 +632,78 @@ extern "C" {
709
632
  GGML_API int64_t ggml_cycles(void);
710
633
  GGML_API int64_t ggml_cycles_per_ms(void);
711
634
 
712
- GGML_API void ggml_print_backtrace(void);
713
-
714
635
  // accepts a UTF-8 path, even on Windows
715
636
  GGML_API FILE * ggml_fopen(const char * fname, const char * mode);
716
637
 
717
- GGML_API void ggml_numa_init(enum ggml_numa_strategy numa); // call once for better performance on NUMA systems
718
- GGML_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
719
-
720
638
  GGML_API void ggml_print_object (const struct ggml_object * obj);
721
639
  GGML_API void ggml_print_objects(const struct ggml_context * ctx);
722
640
 
723
- GGML_API GGML_CALL int64_t ggml_nelements (const struct ggml_tensor * tensor);
724
- GGML_API GGML_CALL int64_t ggml_nrows (const struct ggml_tensor * tensor);
725
- GGML_API GGML_CALL size_t ggml_nbytes (const struct ggml_tensor * tensor);
726
- GGML_API size_t ggml_nbytes_pad (const struct ggml_tensor * tensor); // same as ggml_nbytes() but padded to GGML_MEM_ALIGN
641
+ GGML_API int64_t ggml_nelements (const struct ggml_tensor * tensor);
642
+ GGML_API int64_t ggml_nrows (const struct ggml_tensor * tensor);
643
+ GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor);
644
+ GGML_API size_t ggml_nbytes_pad(const struct ggml_tensor * tensor); // same as ggml_nbytes() but padded to GGML_MEM_ALIGN
727
645
 
728
- GGML_API GGML_CALL int ggml_blck_size(enum ggml_type type);
729
- GGML_API GGML_CALL size_t ggml_type_size(enum ggml_type type); // size in bytes for all elements in a block
730
- GGML_API GGML_CALL size_t ggml_row_size (enum ggml_type type, int64_t ne); // size in bytes for all elements in a row
646
+ GGML_API int64_t ggml_blck_size(enum ggml_type type);
647
+ GGML_API size_t ggml_type_size(enum ggml_type type); // size in bytes for all elements in a block
648
+ GGML_API size_t ggml_row_size (enum ggml_type type, int64_t ne); // size in bytes for all elements in a row
731
649
 
732
650
  GGML_DEPRECATED(
733
651
  GGML_API double ggml_type_sizef(enum ggml_type type), // ggml_type_size()/ggml_blck_size() as float
734
652
  "use ggml_row_size() instead");
735
653
 
736
- GGML_API GGML_CALL const char * ggml_type_name(enum ggml_type type);
737
- GGML_API GGML_CALL const char * ggml_op_name (enum ggml_op op);
738
- GGML_API const char * ggml_op_symbol(enum ggml_op op);
654
+ GGML_API const char * ggml_type_name(enum ggml_type type);
655
+ GGML_API const char * ggml_op_name (enum ggml_op op);
656
+ GGML_API const char * ggml_op_symbol(enum ggml_op op);
739
657
 
740
- GGML_API const char * ggml_unary_op_name(enum ggml_unary_op op);
741
- GGML_API GGML_CALL const char * ggml_op_desc(const struct ggml_tensor * t); // unary or op name
658
+ GGML_API const char * ggml_unary_op_name(enum ggml_unary_op op);
659
+ GGML_API const char * ggml_op_desc(const struct ggml_tensor * t); // unary or op name
742
660
 
743
- GGML_API GGML_CALL size_t ggml_element_size(const struct ggml_tensor * tensor);
661
+ GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
744
662
 
745
- GGML_API GGML_CALL bool ggml_is_quantized(enum ggml_type type);
663
+ GGML_API bool ggml_is_quantized(enum ggml_type type);
746
664
 
747
665
  // TODO: temporary until model loading of ggml examples is refactored
748
666
  GGML_API enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype);
749
667
 
750
- GGML_API GGML_CALL bool ggml_is_transposed(const struct ggml_tensor * tensor);
751
- GGML_API GGML_CALL bool ggml_is_contiguous(const struct ggml_tensor * tensor);
752
- GGML_API GGML_CALL bool ggml_is_permuted (const struct ggml_tensor * tensor);
753
- GGML_API GGML_CALL bool ggml_is_empty (const struct ggml_tensor * tensor);
754
- GGML_API bool ggml_is_scalar (const struct ggml_tensor * tensor);
755
- GGML_API bool ggml_is_vector (const struct ggml_tensor * tensor);
756
- GGML_API bool ggml_is_matrix (const struct ggml_tensor * tensor);
757
- GGML_API bool ggml_is_3d (const struct ggml_tensor * tensor);
758
- GGML_API int ggml_n_dims (const struct ggml_tensor * tensor); // returns 1 for scalars
668
+ GGML_API bool ggml_is_transposed(const struct ggml_tensor * tensor);
669
+ GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor);
670
+ GGML_API bool ggml_is_empty (const struct ggml_tensor * tensor);
671
+ GGML_API bool ggml_is_scalar (const struct ggml_tensor * tensor);
672
+ GGML_API bool ggml_is_vector (const struct ggml_tensor * tensor);
673
+ GGML_API bool ggml_is_matrix (const struct ggml_tensor * tensor);
674
+ GGML_API bool ggml_is_3d (const struct ggml_tensor * tensor);
675
+ GGML_API int ggml_n_dims (const struct ggml_tensor * tensor); // returns 1 for scalars
676
+
677
+ // returns whether the tensor elements can be iterated over with a flattened index (no gaps, no permutation)
678
+ GGML_API bool ggml_is_contiguous (const struct ggml_tensor * tensor);
679
+ GGML_API bool ggml_is_contiguous_0(const struct ggml_tensor * tensor); // same as ggml_is_contiguous()
680
+ GGML_API bool ggml_is_contiguous_1(const struct ggml_tensor * tensor); // contiguous for dims >= 1
681
+ GGML_API bool ggml_is_contiguous_2(const struct ggml_tensor * tensor); // contiguous for dims >= 2
682
+
683
+ // returns whether the tensor elements are allocated as one contiguous block of memory (no gaps, but permutation ok)
684
+ GGML_API bool ggml_is_contiguously_allocated(const struct ggml_tensor * tensor);
685
+
686
+ // true for tensor that is stored in memory as CxWxHxN and has been permuted to WxHxCxN
687
+ GGML_API bool ggml_is_contiguous_channels(const struct ggml_tensor * tensor);
759
688
 
760
- GGML_API bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
689
+ GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1);
690
+ GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
691
+
692
+ GGML_API bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
761
693
 
762
694
  // use this to compute the memory overhead of a tensor
763
695
  GGML_API size_t ggml_tensor_overhead(void);
764
696
 
697
+ GGML_API bool ggml_validate_row_data(enum ggml_type type, const void * data, size_t nbytes);
698
+
765
699
  // main
766
700
 
767
- GGML_API struct ggml_context * ggml_init(struct ggml_init_params params);
768
- GGML_API void ggml_free(struct ggml_context * ctx);
701
+ GGML_API struct ggml_context * ggml_init (struct ggml_init_params params);
702
+ GGML_API void ggml_reset(struct ggml_context * ctx);
703
+ GGML_API void ggml_free (struct ggml_context * ctx);
769
704
 
770
705
  GGML_API size_t ggml_used_mem(const struct ggml_context * ctx);
771
706
 
772
- GGML_API size_t ggml_set_scratch (struct ggml_context * ctx, struct ggml_scratch scratch);
773
707
  GGML_API bool ggml_get_no_alloc(struct ggml_context * ctx);
774
708
  GGML_API void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc);
775
709
 
@@ -809,8 +743,7 @@ extern "C" {
809
743
  int64_t ne2,
810
744
  int64_t ne3);
811
745
 
812
- GGML_API struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
813
- GGML_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
746
+ GGML_API void * ggml_new_buffer(struct ggml_context * ctx, size_t nbytes);
814
747
 
815
748
  GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src);
816
749
  GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, struct ggml_tensor * src);
@@ -820,35 +753,25 @@ extern "C" {
820
753
  GGML_API struct ggml_tensor * ggml_get_next_tensor (const struct ggml_context * ctx, struct ggml_tensor * tensor);
821
754
  GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
822
755
 
823
- GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
824
- GGML_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
825
- GGML_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
826
-
827
756
  // Converts a flat index into coordinates
828
- GGML_API void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3);
829
-
830
- GGML_API int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i);
831
- GGML_API void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);
832
-
833
- GGML_API int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
834
- GGML_API void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, int32_t value);
757
+ GGML_API void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3);
835
758
 
836
- GGML_API float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);
837
- GGML_API void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);
838
-
839
- GGML_API float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
840
- GGML_API void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value);
759
+ GGML_API enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor);
841
760
 
842
761
  GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
843
762
  GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
844
763
 
845
- GGML_API GGML_CALL enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor);
846
-
847
764
  GGML_API const char * ggml_get_name (const struct ggml_tensor * tensor);
848
765
  GGML_API struct ggml_tensor * ggml_set_name ( struct ggml_tensor * tensor, const char * name);
849
766
  GGML_ATTRIBUTE_FORMAT(2, 3)
850
767
  GGML_API struct ggml_tensor * ggml_format_name( struct ggml_tensor * tensor, const char * fmt, ...);
851
768
 
769
+ // Tensor flags
770
+ GGML_API void ggml_set_input(struct ggml_tensor * tensor);
771
+ GGML_API void ggml_set_output(struct ggml_tensor * tensor);
772
+ GGML_API void ggml_set_param(struct ggml_tensor * tensor);
773
+ GGML_API void ggml_set_loss(struct ggml_tensor * tensor);
774
+
852
775
  //
853
776
  // operations on tensors with backpropagation
854
777
  //
@@ -963,6 +886,22 @@ extern "C" {
963
886
  struct ggml_context * ctx,
964
887
  struct ggml_tensor * a);
965
888
 
889
+ GGML_API struct ggml_tensor * ggml_sin(
890
+ struct ggml_context * ctx,
891
+ struct ggml_tensor * a);
892
+
893
+ GGML_API struct ggml_tensor * ggml_sin_inplace(
894
+ struct ggml_context * ctx,
895
+ struct ggml_tensor * a);
896
+
897
+ GGML_API struct ggml_tensor * ggml_cos(
898
+ struct ggml_context * ctx,
899
+ struct ggml_tensor * a);
900
+
901
+ GGML_API struct ggml_tensor * ggml_cos_inplace(
902
+ struct ggml_context * ctx,
903
+ struct ggml_tensor * a);
904
+
966
905
  // return scalar
967
906
  GGML_API struct ggml_tensor * ggml_sum(
968
907
  struct ggml_context * ctx,
@@ -983,6 +922,12 @@ extern "C" {
983
922
  struct ggml_context * ctx,
984
923
  struct ggml_tensor * a);
985
924
 
925
+ // count number of equal elements in a and b
926
+ GGML_API struct ggml_tensor * ggml_count_equal(
927
+ struct ggml_context * ctx,
928
+ struct ggml_tensor * a,
929
+ struct ggml_tensor * b);
930
+
986
931
  // if a is the same shape as b, and a is not parameter, return a
987
932
  // otherwise, return a new tensor: repeat(a) to fit in b
988
933
  GGML_API struct ggml_tensor * ggml_repeat(
@@ -994,14 +939,15 @@ extern "C" {
994
939
  GGML_API struct ggml_tensor * ggml_repeat_back(
995
940
  struct ggml_context * ctx,
996
941
  struct ggml_tensor * a,
997
- struct ggml_tensor * b);
942
+ struct ggml_tensor * b); // sum up values that are adjacent in dims > 0 instead of repeated with same stride
998
943
 
999
- // concat a and b on dim 2
944
+ // concat a and b along dim
1000
945
  // used in stable-diffusion
1001
946
  GGML_API struct ggml_tensor * ggml_concat(
1002
947
  struct ggml_context * ctx,
1003
948
  struct ggml_tensor * a,
1004
- struct ggml_tensor * b);
949
+ struct ggml_tensor * b,
950
+ int dim);
1005
951
 
1006
952
  GGML_API struct ggml_tensor * ggml_abs(
1007
953
  struct ggml_context * ctx,
@@ -1063,6 +1009,14 @@ extern "C" {
1063
1009
  struct ggml_context * ctx,
1064
1010
  struct ggml_tensor * a);
1065
1011
 
1012
+ GGML_API struct ggml_tensor * ggml_sigmoid(
1013
+ struct ggml_context * ctx,
1014
+ struct ggml_tensor * a);
1015
+
1016
+ GGML_API struct ggml_tensor * ggml_sigmoid_inplace(
1017
+ struct ggml_context * ctx,
1018
+ struct ggml_tensor * a);
1019
+
1066
1020
  GGML_API struct ggml_tensor * ggml_gelu(
1067
1021
  struct ggml_context * ctx,
1068
1022
  struct ggml_tensor * a);
@@ -1071,6 +1025,16 @@ extern "C" {
1071
1025
  struct ggml_context * ctx,
1072
1026
  struct ggml_tensor * a);
1073
1027
 
1028
+ // GELU using erf (error function) when possible
1029
+ // some backends may fallback to approximation based on Abramowitz and Stegun formula
1030
+ GGML_API struct ggml_tensor * ggml_gelu_erf(
1031
+ struct ggml_context * ctx,
1032
+ struct ggml_tensor * a);
1033
+
1034
+ GGML_API struct ggml_tensor * ggml_gelu_erf_inplace(
1035
+ struct ggml_context * ctx,
1036
+ struct ggml_tensor * a);
1037
+
1074
1038
  GGML_API struct ggml_tensor * ggml_gelu_quick(
1075
1039
  struct ggml_context * ctx,
1076
1040
  struct ggml_tensor * a);
@@ -1104,6 +1068,14 @@ extern "C" {
1104
1068
  struct ggml_context * ctx,
1105
1069
  struct ggml_tensor * a);
1106
1070
 
1071
+ GGML_API struct ggml_tensor * ggml_exp(
1072
+ struct ggml_context * ctx,
1073
+ struct ggml_tensor * a);
1074
+
1075
+ GGML_API struct ggml_tensor * ggml_exp_inplace(
1076
+ struct ggml_context * ctx,
1077
+ struct ggml_tensor * a);
1078
+
1107
1079
  // normalize along rows
1108
1080
  GGML_API struct ggml_tensor * ggml_norm(
1109
1081
  struct ggml_context * ctx,
@@ -1127,16 +1099,29 @@ extern "C" {
1127
1099
 
1128
1100
  // group normalize along ne0*ne1*n_groups
1129
1101
  // used in stable-diffusion
1130
- // TODO: eps is hardcoded to 1e-6 for now
1131
1102
  GGML_API struct ggml_tensor * ggml_group_norm(
1132
1103
  struct ggml_context * ctx,
1133
1104
  struct ggml_tensor * a,
1134
- int n_groups);
1105
+ int n_groups,
1106
+ float eps);
1135
1107
 
1136
1108
  GGML_API struct ggml_tensor * ggml_group_norm_inplace(
1137
1109
  struct ggml_context * ctx,
1138
1110
  struct ggml_tensor * a,
1139
- int n_groups);
1111
+ int n_groups,
1112
+ float eps);
1113
+
1114
+ // l2 normalize along rows
1115
+ // used in rwkv v7
1116
+ GGML_API struct ggml_tensor * ggml_l2_norm(
1117
+ struct ggml_context * ctx,
1118
+ struct ggml_tensor * a,
1119
+ float eps);
1120
+
1121
+ GGML_API struct ggml_tensor * ggml_l2_norm_inplace(
1122
+ struct ggml_context * ctx,
1123
+ struct ggml_tensor * a,
1124
+ float eps);
1140
1125
 
1141
1126
  // a - x
1142
1127
  // b - dy
@@ -1161,13 +1146,11 @@ extern "C" {
1161
1146
  enum ggml_prec prec);
1162
1147
 
1163
1148
  // indirect matrix multiplication
1164
- // ggml_mul_mat_id(ctx, as, ids, id, b) ~= ggml_mul_mat(as[ids[id]], b)
1165
1149
  GGML_API struct ggml_tensor * ggml_mul_mat_id(
1166
1150
  struct ggml_context * ctx,
1167
1151
  struct ggml_tensor * as,
1168
- struct ggml_tensor * ids,
1169
- int id,
1170
- struct ggml_tensor * b);
1152
+ struct ggml_tensor * b,
1153
+ struct ggml_tensor * ids);
1171
1154
 
1172
1155
  // A: m columns, n rows,
1173
1156
  // B: p columns, n rows,
@@ -1200,7 +1183,7 @@ extern "C" {
1200
1183
  size_t nb1,
1201
1184
  size_t nb2,
1202
1185
  size_t nb3,
1203
- size_t offset);
1186
+ size_t offset); // in bytes
1204
1187
 
1205
1188
  // b -> view(a,offset,nb1,nb2,3), return view(a)
1206
1189
  GGML_API struct ggml_tensor * ggml_set_inplace(
@@ -1210,19 +1193,19 @@ extern "C" {
1210
1193
  size_t nb1,
1211
1194
  size_t nb2,
1212
1195
  size_t nb3,
1213
- size_t offset);
1196
+ size_t offset); // in bytes
1214
1197
 
1215
1198
  GGML_API struct ggml_tensor * ggml_set_1d(
1216
1199
  struct ggml_context * ctx,
1217
1200
  struct ggml_tensor * a,
1218
1201
  struct ggml_tensor * b,
1219
- size_t offset);
1202
+ size_t offset); // in bytes
1220
1203
 
1221
1204
  GGML_API struct ggml_tensor * ggml_set_1d_inplace(
1222
1205
  struct ggml_context * ctx,
1223
1206
  struct ggml_tensor * a,
1224
1207
  struct ggml_tensor * b,
1225
- size_t offset);
1208
+ size_t offset); // in bytes
1226
1209
 
1227
1210
  // b -> view(a,offset,nb1,nb2,3), return modified a
1228
1211
  GGML_API struct ggml_tensor * ggml_set_2d(
@@ -1230,7 +1213,7 @@ extern "C" {
1230
1213
  struct ggml_tensor * a,
1231
1214
  struct ggml_tensor * b,
1232
1215
  size_t nb1,
1233
- size_t offset);
1216
+ size_t offset); // in bytes
1234
1217
 
1235
1218
  // b -> view(a,offset,nb1,nb2,3), return view(a)
1236
1219
  GGML_API struct ggml_tensor * ggml_set_2d_inplace(
@@ -1238,7 +1221,7 @@ extern "C" {
1238
1221
  struct ggml_tensor * a,
1239
1222
  struct ggml_tensor * b,
1240
1223
  size_t nb1,
1241
- size_t offset);
1224
+ size_t offset); // in bytes
1242
1225
 
1243
1226
  // a -> b, return view(b)
1244
1227
  GGML_API struct ggml_tensor * ggml_cpy(
@@ -1373,14 +1356,14 @@ extern "C" {
1373
1356
  // supports 3D: a->ne[2] == b->ne[1]
1374
1357
  GGML_API struct ggml_tensor * ggml_get_rows(
1375
1358
  struct ggml_context * ctx,
1376
- struct ggml_tensor * a,
1377
- struct ggml_tensor * b);
1359
+ struct ggml_tensor * a, // data
1360
+ struct ggml_tensor * b); // row indices
1378
1361
 
1379
1362
  GGML_API struct ggml_tensor * ggml_get_rows_back(
1380
1363
  struct ggml_context * ctx,
1381
- struct ggml_tensor * a,
1382
- struct ggml_tensor * b,
1383
- struct ggml_tensor * c);
1364
+ struct ggml_tensor * a, // gradients of ggml_get_rows result
1365
+ struct ggml_tensor * b, // row indices
1366
+ struct ggml_tensor * c); // data for ggml_get_rows, only used for its shape
1384
1367
 
1385
1368
  GGML_API struct ggml_tensor * ggml_diag(
1386
1369
  struct ggml_context * ctx,
@@ -1419,33 +1402,34 @@ extern "C" {
1419
1402
  struct ggml_context * ctx,
1420
1403
  struct ggml_tensor * a);
1421
1404
 
1422
- // fused soft_max(a*scale + mask + pos[i]*(ALiBi slope))
1405
+ // fused soft_max(a*scale + mask*(ALiBi slope))
1423
1406
  // mask is optional
1424
- // pos is required when max_bias > 0.0f
1425
1407
  // max_bias = 0.0f for no ALiBi
1426
1408
  GGML_API struct ggml_tensor * ggml_soft_max_ext(
1427
1409
  struct ggml_context * ctx,
1428
1410
  struct ggml_tensor * a,
1429
1411
  struct ggml_tensor * mask,
1430
- struct ggml_tensor * pos,
1431
1412
  float scale,
1432
1413
  float max_bias);
1433
1414
 
1434
- GGML_API struct ggml_tensor * ggml_soft_max_back(
1415
+ GGML_API struct ggml_tensor * ggml_soft_max_ext_back(
1435
1416
  struct ggml_context * ctx,
1436
1417
  struct ggml_tensor * a,
1437
- struct ggml_tensor * b);
1418
+ struct ggml_tensor * b,
1419
+ float scale,
1420
+ float max_bias);
1438
1421
 
1439
1422
  // in-place, returns view(a)
1440
- GGML_API struct ggml_tensor * ggml_soft_max_back_inplace(
1423
+ GGML_API struct ggml_tensor * ggml_soft_max_ext_back_inplace(
1441
1424
  struct ggml_context * ctx,
1442
1425
  struct ggml_tensor * a,
1443
- struct ggml_tensor * b);
1426
+ struct ggml_tensor * b,
1427
+ float scale,
1428
+ float max_bias);
1444
1429
 
1445
1430
  // rotary position embedding
1446
- // if mode & 1 == 1, skip n_past elements (DEPRECATED)
1447
- // if mode & 2 == 1, GPT-NeoX style
1448
- // if mode & 4 == 1, ChatGLM style
1431
+ // if (mode & 1) - skip n_past elements (NOT SUPPORTED)
1432
+ // if (mode & GGML_ROPE_TYPE_NEOX) - GPT-NeoX style
1449
1433
  //
1450
1434
  // b is an int32 vector with size a->ne[2], it contains the positions
1451
1435
  GGML_API struct ggml_tensor * ggml_rope(
@@ -1453,8 +1437,7 @@ extern "C" {
1453
1437
  struct ggml_tensor * a,
1454
1438
  struct ggml_tensor * b,
1455
1439
  int n_dims,
1456
- int mode,
1457
- int n_ctx);
1440
+ int mode);
1458
1441
 
1459
1442
  // in-place, returns view(a)
1460
1443
  GGML_API struct ggml_tensor * ggml_rope_inplace(
@@ -1462,18 +1445,34 @@ extern "C" {
1462
1445
  struct ggml_tensor * a,
1463
1446
  struct ggml_tensor * b,
1464
1447
  int n_dims,
1465
- int mode,
1466
- int n_ctx);
1448
+ int mode);
1467
1449
 
1468
1450
  // custom RoPE
1469
- GGML_API struct ggml_tensor * ggml_rope_custom(
1451
+ // c is freq factors (e.g. phi3-128k), (optional)
1452
+ GGML_API struct ggml_tensor * ggml_rope_ext(
1470
1453
  struct ggml_context * ctx,
1471
1454
  struct ggml_tensor * a,
1472
1455
  struct ggml_tensor * b,
1456
+ struct ggml_tensor * c,
1473
1457
  int n_dims,
1474
1458
  int mode,
1475
- int n_ctx,
1476
- int n_orig_ctx,
1459
+ int n_ctx_orig,
1460
+ float freq_base,
1461
+ float freq_scale,
1462
+ float ext_factor,
1463
+ float attn_factor,
1464
+ float beta_fast,
1465
+ float beta_slow);
1466
+
1467
+ GGML_API struct ggml_tensor * ggml_rope_multi(
1468
+ struct ggml_context * ctx,
1469
+ struct ggml_tensor * a,
1470
+ struct ggml_tensor * b,
1471
+ struct ggml_tensor * c,
1472
+ int n_dims,
1473
+ int sections[4],
1474
+ int mode,
1475
+ int n_ctx_orig,
1477
1476
  float freq_base,
1478
1477
  float freq_scale,
1479
1478
  float ext_factor,
@@ -1482,14 +1481,14 @@ extern "C" {
1482
1481
  float beta_slow);
1483
1482
 
1484
1483
  // in-place, returns view(a)
1485
- GGML_API struct ggml_tensor * ggml_rope_custom_inplace(
1484
+ GGML_API struct ggml_tensor * ggml_rope_ext_inplace(
1486
1485
  struct ggml_context * ctx,
1487
1486
  struct ggml_tensor * a,
1488
1487
  struct ggml_tensor * b,
1488
+ struct ggml_tensor * c,
1489
1489
  int n_dims,
1490
1490
  int mode,
1491
- int n_ctx,
1492
- int n_orig_ctx,
1491
+ int n_ctx_orig,
1493
1492
  float freq_base,
1494
1493
  float freq_scale,
1495
1494
  float ext_factor,
@@ -1497,47 +1496,73 @@ extern "C" {
1497
1496
  float beta_fast,
1498
1497
  float beta_slow);
1499
1498
 
1500
- // compute correction dims for YaRN RoPE scaling
1501
- GGML_CALL void ggml_rope_yarn_corr_dims(
1502
- int n_dims, int n_orig_ctx, float freq_base, float beta_fast, float beta_slow, float dims[2]);
1499
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_rope_custom(
1500
+ struct ggml_context * ctx,
1501
+ struct ggml_tensor * a,
1502
+ struct ggml_tensor * b,
1503
+ int n_dims,
1504
+ int mode,
1505
+ int n_ctx_orig,
1506
+ float freq_base,
1507
+ float freq_scale,
1508
+ float ext_factor,
1509
+ float attn_factor,
1510
+ float beta_fast,
1511
+ float beta_slow),
1512
+ "use ggml_rope_ext instead");
1503
1513
 
1504
- // xPos RoPE, in-place, returns view(a)
1505
- GGML_API struct ggml_tensor * ggml_rope_xpos_inplace(
1514
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_rope_custom_inplace(
1506
1515
  struct ggml_context * ctx,
1507
1516
  struct ggml_tensor * a,
1508
1517
  struct ggml_tensor * b,
1509
1518
  int n_dims,
1510
- float base,
1511
- bool down);
1519
+ int mode,
1520
+ int n_ctx_orig,
1521
+ float freq_base,
1522
+ float freq_scale,
1523
+ float ext_factor,
1524
+ float attn_factor,
1525
+ float beta_fast,
1526
+ float beta_slow),
1527
+ "use ggml_rope_ext_inplace instead");
1528
+
1529
+ // compute correction dims for YaRN RoPE scaling
1530
+ GGML_API void ggml_rope_yarn_corr_dims(
1531
+ int n_dims, int n_ctx_orig, float freq_base, float beta_fast, float beta_slow, float dims[2]);
1512
1532
 
1513
1533
  // rotary position embedding backward, i.e compute dx from dy
1514
1534
  // a - dy
1515
- GGML_API struct ggml_tensor * ggml_rope_back(
1535
+ GGML_API struct ggml_tensor * ggml_rope_ext_back(
1516
1536
  struct ggml_context * ctx,
1517
- struct ggml_tensor * a,
1518
- struct ggml_tensor * b,
1537
+ struct ggml_tensor * a, // gradients of ggml_rope result
1538
+ struct ggml_tensor * b, // positions
1539
+ struct ggml_tensor * c, // freq factors
1519
1540
  int n_dims,
1520
1541
  int mode,
1521
- int n_ctx,
1522
- int n_orig_ctx,
1542
+ int n_ctx_orig,
1523
1543
  float freq_base,
1524
1544
  float freq_scale,
1525
1545
  float ext_factor,
1526
1546
  float attn_factor,
1527
1547
  float beta_fast,
1528
- float beta_slow,
1529
- float xpos_base,
1530
- bool xpos_down);
1548
+ float beta_slow);
1531
1549
 
1532
- // alibi position embedding
1533
- // in-place, returns view(a)
1534
- GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_alibi(
1550
+ GGML_API struct ggml_tensor * ggml_rope_multi_back(
1535
1551
  struct ggml_context * ctx,
1536
1552
  struct ggml_tensor * a,
1537
- int n_past,
1538
- int n_head,
1539
- float bias_max),
1540
- "use ggml_soft_max_ext instead (will be removed in Mar 2024)");
1553
+ struct ggml_tensor * b,
1554
+ struct ggml_tensor * c,
1555
+ int n_dims,
1556
+ int sections[4],
1557
+ int mode,
1558
+ int n_ctx_orig,
1559
+ float freq_base,
1560
+ float freq_scale,
1561
+ float ext_factor,
1562
+ float attn_factor,
1563
+ float beta_fast,
1564
+ float beta_slow);
1565
+
1541
1566
 
1542
1567
  // clamp
1543
1568
  // in-place, returns view(a)
@@ -1547,34 +1572,38 @@ extern "C" {
1547
1572
  float min,
1548
1573
  float max);
1549
1574
 
1575
+ // im2col
1576
+ // converts data into a format that effectively results in a convolution when combined with matrix multiplication
1550
1577
  GGML_API struct ggml_tensor * ggml_im2col(
1551
1578
  struct ggml_context * ctx,
1552
- struct ggml_tensor * a,
1553
- struct ggml_tensor * b,
1554
- int s0,
1555
- int s1,
1556
- int p0,
1557
- int p1,
1558
- int d0,
1559
- int d1,
1560
- bool is_2D,
1561
- enum ggml_type dst_type);
1562
-
1563
- GGML_API struct ggml_tensor * ggml_conv_depthwise_2d(
1564
- struct ggml_context * ctx,
1565
- struct ggml_tensor * a,
1566
- struct ggml_tensor * b,
1567
- int s0,
1568
- int s1,
1569
- int p0,
1570
- int p1,
1571
- int d0,
1572
- int d1);
1579
+ struct ggml_tensor * a, // convolution kernel
1580
+ struct ggml_tensor * b, // data
1581
+ int s0, // stride dimension 0
1582
+ int s1, // stride dimension 1
1583
+ int p0, // padding dimension 0
1584
+ int p1, // padding dimension 1
1585
+ int d0, // dilation dimension 0
1586
+ int d1, // dilation dimension 1
1587
+ bool is_2D,
1588
+ enum ggml_type dst_type);
1589
+
1590
+ GGML_API struct ggml_tensor * ggml_im2col_back(
1591
+ struct ggml_context * ctx,
1592
+ struct ggml_tensor * a, // convolution kernel
1593
+ struct ggml_tensor * b, // gradient of im2col output
1594
+ int64_t * ne, // shape of im2col input
1595
+ int s0, // stride dimension 0
1596
+ int s1, // stride dimension 1
1597
+ int p0, // padding dimension 0
1598
+ int p1, // padding dimension 1
1599
+ int d0, // dilation dimension 0
1600
+ int d1, // dilation dimension 1
1601
+ bool is_2D);
1573
1602
 
1574
1603
  GGML_API struct ggml_tensor * ggml_conv_1d(
1575
1604
  struct ggml_context * ctx,
1576
- struct ggml_tensor * a,
1577
- struct ggml_tensor * b,
1605
+ struct ggml_tensor * a, // convolution kernel
1606
+ struct ggml_tensor * b, // data
1578
1607
  int s0, // stride
1579
1608
  int p0, // padding
1580
1609
  int d0); // dilation
@@ -1583,30 +1612,46 @@ extern "C" {
1583
1612
  // alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
1584
1613
  GGML_API struct ggml_tensor* ggml_conv_1d_ph(
1585
1614
  struct ggml_context * ctx,
1586
- struct ggml_tensor * a,
1587
- struct ggml_tensor * b,
1588
- int s,
1589
- int d);
1615
+ struct ggml_tensor * a, // convolution kernel
1616
+ struct ggml_tensor * b, // data
1617
+ int s, // stride
1618
+ int d); // dilation
1619
+
1620
+ // depthwise
1621
+ // TODO: this is very likely wrong for some cases! - needs more testing
1622
+ GGML_API struct ggml_tensor * ggml_conv_1d_dw(
1623
+ struct ggml_context * ctx,
1624
+ struct ggml_tensor * a, // convolution kernel
1625
+ struct ggml_tensor * b, // data
1626
+ int s0, // stride
1627
+ int p0, // padding
1628
+ int d0); // dilation
1629
+
1630
+ GGML_API struct ggml_tensor * ggml_conv_1d_dw_ph(
1631
+ struct ggml_context * ctx,
1632
+ struct ggml_tensor * a, // convolution kernel
1633
+ struct ggml_tensor * b, // data
1634
+ int s0, // stride
1635
+ int d0); // dilation
1590
1636
 
1591
1637
  GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
1592
1638
  struct ggml_context * ctx,
1593
- struct ggml_tensor * a,
1594
- struct ggml_tensor * b,
1595
- int s0,
1596
- int p0,
1597
- int d0);
1639
+ struct ggml_tensor * a, // convolution kernel
1640
+ struct ggml_tensor * b, // data
1641
+ int s0, // stride
1642
+ int p0, // padding
1643
+ int d0); // dilation
1598
1644
 
1599
1645
  GGML_API struct ggml_tensor * ggml_conv_2d(
1600
1646
  struct ggml_context * ctx,
1601
- struct ggml_tensor * a,
1602
- struct ggml_tensor * b,
1603
- int s0,
1604
- int s1,
1605
- int p0,
1606
- int p1,
1607
- int d0,
1608
- int d1);
1609
-
1647
+ struct ggml_tensor * a, // convolution kernel
1648
+ struct ggml_tensor * b, // data
1649
+ int s0, // stride dimension 0
1650
+ int s1, // stride dimension 1
1651
+ int p0, // padding dimension 0
1652
+ int p1, // padding dimension 1
1653
+ int d0, // dilation dimension 0
1654
+ int d1); // dilation dimension 1
1610
1655
 
1611
1656
  // kernel size is a->ne[0] x a->ne[1]
1612
1657
  // stride is equal to kernel size
@@ -1634,6 +1679,34 @@ extern "C" {
1634
1679
  struct ggml_tensor * a,
1635
1680
  struct ggml_tensor * b);
1636
1681
 
1682
+ // depthwise (via im2col and mul_mat)
1683
+ GGML_API struct ggml_tensor * ggml_conv_2d_dw(
1684
+ struct ggml_context * ctx,
1685
+ struct ggml_tensor * a, // convolution kernel
1686
+ struct ggml_tensor * b, // data
1687
+ int s0, // stride dimension 0
1688
+ int s1, // stride dimension 1
1689
+ int p0, // padding dimension 0
1690
+ int p1, // padding dimension 1
1691
+ int d0, // dilation dimension 0
1692
+ int d1); // dilation dimension 1
1693
+
1694
+ // Depthwise 2D convolution
1695
+ // may be faster than ggml_conv_2d_dw, but not available in all backends
1696
+ // a: KW KH 1 C convolution kernel
1697
+ // b: W H C N input data
1698
+ // res: W_out H_out C N
1699
+ GGML_API struct ggml_tensor * ggml_conv_2d_dw_direct(
1700
+ struct ggml_context * ctx,
1701
+ struct ggml_tensor * a,
1702
+ struct ggml_tensor * b,
1703
+ int stride0,
1704
+ int stride1,
1705
+ int pad0,
1706
+ int pad1,
1707
+ int dilation0,
1708
+ int dilation1);
1709
+
1637
1710
  GGML_API struct ggml_tensor * ggml_conv_transpose_2d_p0(
1638
1711
  struct ggml_context * ctx,
1639
1712
  struct ggml_tensor * a,
@@ -1667,12 +1740,41 @@ extern "C" {
1667
1740
  float p0,
1668
1741
  float p1);
1669
1742
 
1670
- // nearest interpolate
1671
- // used in stable-diffusion
1743
+ GGML_API struct ggml_tensor * ggml_pool_2d_back(
1744
+ struct ggml_context * ctx,
1745
+ struct ggml_tensor * a,
1746
+ struct ggml_tensor * af, // "a"/input used in forward pass
1747
+ enum ggml_op_pool op,
1748
+ int k0,
1749
+ int k1,
1750
+ int s0,
1751
+ int s1,
1752
+ float p0,
1753
+ float p1);
1754
+
1755
+ enum ggml_scale_mode {
1756
+ GGML_SCALE_MODE_NEAREST = 0,
1757
+ GGML_SCALE_MODE_BILINEAR = 1,
1758
+ };
1759
+
1760
+ // interpolate
1761
+ // multiplies ne0 and ne1 by scale factor
1672
1762
  GGML_API struct ggml_tensor * ggml_upscale(
1673
1763
  struct ggml_context * ctx,
1674
1764
  struct ggml_tensor * a,
1675
- int scale_factor);
1765
+ int scale_factor,
1766
+ enum ggml_scale_mode mode);
1767
+
1768
+ // interpolate
1769
+ // interpolate scale to specified dimensions
1770
+ GGML_API struct ggml_tensor * ggml_upscale_ext(
1771
+ struct ggml_context * ctx,
1772
+ struct ggml_tensor * a,
1773
+ int ne0,
1774
+ int ne1,
1775
+ int ne2,
1776
+ int ne3,
1777
+ enum ggml_scale_mode mode);
1676
1778
 
1677
1779
  // pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
1678
1780
  GGML_API struct ggml_tensor * ggml_pad(
@@ -1683,6 +1785,13 @@ extern "C" {
1683
1785
  int p2,
1684
1786
  int p3);
1685
1787
 
1788
+ // pad each dimension with reflection: [a, b, c, d] -> [b, a, b, c, d, c]
1789
+ GGML_API struct ggml_tensor * ggml_pad_reflect_1d(
1790
+ struct ggml_context * ctx,
1791
+ struct ggml_tensor * a,
1792
+ int p0,
1793
+ int p1);
1794
+
1686
1795
  // Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
1687
1796
  // timesteps: [N,]
1688
1797
  // return: [N, dim]
@@ -1715,13 +1824,31 @@ extern "C" {
1715
1824
  struct ggml_tensor * a,
1716
1825
  int k);
1717
1826
 
1718
- GGML_API struct ggml_tensor * ggml_flash_attn(
1827
+ #define GGML_KQ_MASK_PAD 64
1828
+
1829
+ // q: [n_embd_k, n_batch, n_head, 1]
1830
+ // k: [n_embd_k, n_kv, n_head_kv, 1]
1831
+ // v: [n_embd_v, n_kv, n_head_kv, 1] !! not transposed !!
1832
+ // mask: [n_kv, n_batch_pad, 1, 1] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !!
1833
+ // res: [n_embd_v, n_head, n_batch, 1] !! permuted !!
1834
+ GGML_API struct ggml_tensor * ggml_flash_attn_ext(
1719
1835
  struct ggml_context * ctx,
1720
1836
  struct ggml_tensor * q,
1721
1837
  struct ggml_tensor * k,
1722
1838
  struct ggml_tensor * v,
1723
- bool masked);
1839
+ struct ggml_tensor * mask,
1840
+ float scale,
1841
+ float max_bias,
1842
+ float logit_softcap);
1843
+
1844
+ GGML_API void ggml_flash_attn_ext_set_prec(
1845
+ struct ggml_tensor * a,
1846
+ enum ggml_prec prec);
1847
+
1848
+ GGML_API enum ggml_prec ggml_flash_attn_ext_get_prec(
1849
+ const struct ggml_tensor * a);
1724
1850
 
1851
+ // TODO: needs to be adapted to ggml_flash_attn_ext
1725
1852
  GGML_API struct ggml_tensor * ggml_flash_attn_back(
1726
1853
  struct ggml_context * ctx,
1727
1854
  struct ggml_tensor * q,
@@ -1730,20 +1857,10 @@ extern "C" {
1730
1857
  struct ggml_tensor * d,
1731
1858
  bool masked);
1732
1859
 
1733
- GGML_API struct ggml_tensor * ggml_flash_ff(
1734
- struct ggml_context * ctx,
1735
- struct ggml_tensor * a,
1736
- struct ggml_tensor * b0,
1737
- struct ggml_tensor * b1,
1738
- struct ggml_tensor * c0,
1739
- struct ggml_tensor * c1);
1740
-
1741
1860
  GGML_API struct ggml_tensor * ggml_ssm_conv(
1742
1861
  struct ggml_context * ctx,
1743
- struct ggml_tensor * s,
1744
- struct ggml_tensor * x,
1745
- struct ggml_tensor * c,
1746
- struct ggml_tensor * sq);
1862
+ struct ggml_tensor * sx,
1863
+ struct ggml_tensor * c);
1747
1864
 
1748
1865
  GGML_API struct ggml_tensor * ggml_ssm_scan(
1749
1866
  struct ggml_context * ctx,
@@ -1752,8 +1869,7 @@ extern "C" {
1752
1869
  struct ggml_tensor * dt,
1753
1870
  struct ggml_tensor * A,
1754
1871
  struct ggml_tensor * B,
1755
- struct ggml_tensor * C,
1756
- struct ggml_tensor * sq);
1872
+ struct ggml_tensor * C);
1757
1873
 
1758
1874
  // partition into non-overlapping windows with padding if needed
1759
1875
  // example:
@@ -1805,90 +1921,42 @@ extern "C" {
1805
1921
  struct ggml_tensor * pw,
1806
1922
  struct ggml_tensor * ph);
1807
1923
 
1808
- // custom operators
1924
+ GGML_API struct ggml_tensor * ggml_rwkv_wkv6(
1925
+ struct ggml_context * ctx,
1926
+ struct ggml_tensor * k,
1927
+ struct ggml_tensor * v,
1928
+ struct ggml_tensor * r,
1929
+ struct ggml_tensor * tf,
1930
+ struct ggml_tensor * td,
1931
+ struct ggml_tensor * state);
1932
+
1933
+ GGML_API struct ggml_tensor * ggml_gated_linear_attn(
1934
+ struct ggml_context * ctx,
1935
+ struct ggml_tensor * k,
1936
+ struct ggml_tensor * v,
1937
+ struct ggml_tensor * q,
1938
+ struct ggml_tensor * g,
1939
+ struct ggml_tensor * state,
1940
+ float scale);
1941
+
1942
+ GGML_API struct ggml_tensor * ggml_rwkv_wkv7(
1943
+ struct ggml_context * ctx,
1944
+ struct ggml_tensor * r,
1945
+ struct ggml_tensor * w,
1946
+ struct ggml_tensor * k,
1947
+ struct ggml_tensor * v,
1948
+ struct ggml_tensor * a,
1949
+ struct ggml_tensor * b,
1950
+ struct ggml_tensor * state);
1809
1951
 
1810
- typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
1811
- typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
1812
-
1813
- typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
1814
- typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
1815
- typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
1816
-
1817
- GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_f32(
1818
- struct ggml_context * ctx,
1819
- struct ggml_tensor * a,
1820
- ggml_unary_op_f32_t fun),
1821
- "use ggml_map_custom1 instead");
1822
-
1823
- GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
1824
- struct ggml_context * ctx,
1825
- struct ggml_tensor * a,
1826
- ggml_unary_op_f32_t fun),
1827
- "use ggml_map_custom1_inplace instead");
1828
-
1829
- GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_f32(
1830
- struct ggml_context * ctx,
1831
- struct ggml_tensor * a,
1832
- struct ggml_tensor * b,
1833
- ggml_binary_op_f32_t fun),
1834
- "use ggml_map_custom2 instead");
1835
-
1836
- GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
1837
- struct ggml_context * ctx,
1838
- struct ggml_tensor * a,
1839
- struct ggml_tensor * b,
1840
- ggml_binary_op_f32_t fun),
1841
- "use ggml_map_custom2_inplace instead");
1842
-
1843
- GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_f32(
1844
- struct ggml_context * ctx,
1845
- struct ggml_tensor * a,
1846
- ggml_custom1_op_f32_t fun),
1847
- "use ggml_map_custom1 instead");
1848
-
1849
- GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
1850
- struct ggml_context * ctx,
1851
- struct ggml_tensor * a,
1852
- ggml_custom1_op_f32_t fun),
1853
- "use ggml_map_custom1_inplace instead");
1854
-
1855
- GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_f32(
1856
- struct ggml_context * ctx,
1857
- struct ggml_tensor * a,
1858
- struct ggml_tensor * b,
1859
- ggml_custom2_op_f32_t fun),
1860
- "use ggml_map_custom2 instead");
1861
-
1862
- GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
1863
- struct ggml_context * ctx,
1864
- struct ggml_tensor * a,
1865
- struct ggml_tensor * b,
1866
- ggml_custom2_op_f32_t fun),
1867
- "use ggml_map_custom2_inplace instead");
1868
-
1869
- GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_f32(
1870
- struct ggml_context * ctx,
1871
- struct ggml_tensor * a,
1872
- struct ggml_tensor * b,
1873
- struct ggml_tensor * c,
1874
- ggml_custom3_op_f32_t fun),
1875
- "use ggml_map_custom3 instead");
1876
-
1877
- GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
1878
- struct ggml_context * ctx,
1879
- struct ggml_tensor * a,
1880
- struct ggml_tensor * b,
1881
- struct ggml_tensor * c,
1882
- ggml_custom3_op_f32_t fun),
1883
- "use ggml_map_custom3_inplace instead");
1884
-
1885
- // custom operators v2
1952
+ // custom operators
1886
1953
 
1887
1954
  typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata);
1888
1955
  typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata);
1889
1956
  typedef void (*ggml_custom3_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata);
1890
1957
 
1891
- #define GGML_N_TASKS_MAX -1
1958
+ #define GGML_N_TASKS_MAX (-1)
1959
+ // n_tasks == GGML_N_TASKS_MAX means to use max number of tasks
1892
1960
 
1893
1961
  GGML_API struct ggml_tensor * ggml_map_custom1(
1894
1962
  struct ggml_context * ctx,
@@ -1938,52 +2006,85 @@ extern "C" {
1938
2006
  int n_tasks,
1939
2007
  void * userdata);
1940
2008
 
2009
+ typedef void (*ggml_custom_op_t)(struct ggml_tensor * dst , int ith, int nth, void * userdata);
2010
+
2011
+ GGML_API struct ggml_tensor * ggml_custom_4d(
2012
+ struct ggml_context * ctx,
2013
+ enum ggml_type type,
2014
+ int64_t ne0,
2015
+ int64_t ne1,
2016
+ int64_t ne2,
2017
+ int64_t ne3,
2018
+ struct ggml_tensor ** args,
2019
+ int n_args,
2020
+ ggml_custom_op_t fun,
2021
+ int n_tasks,
2022
+ void * userdata);
2023
+
2024
+ GGML_API struct ggml_tensor * ggml_custom_inplace(
2025
+ struct ggml_context * ctx,
2026
+ struct ggml_tensor * a,
2027
+ struct ggml_tensor ** args,
2028
+ int n_args,
2029
+ ggml_custom_op_t fun,
2030
+ int n_tasks,
2031
+ void * userdata);
2032
+
1941
2033
  // loss function
1942
2034
 
1943
2035
  GGML_API struct ggml_tensor * ggml_cross_entropy_loss(
1944
- struct ggml_context * ctx,
1945
- struct ggml_tensor * a,
1946
- struct ggml_tensor * b);
2036
+ struct ggml_context * ctx,
2037
+ struct ggml_tensor * a, // logits
2038
+ struct ggml_tensor * b); // labels
1947
2039
 
1948
2040
  GGML_API struct ggml_tensor * ggml_cross_entropy_loss_back(
1949
- struct ggml_context * ctx,
1950
- struct ggml_tensor * a,
1951
- struct ggml_tensor * b,
1952
- struct ggml_tensor * c);
2041
+ struct ggml_context * ctx,
2042
+ struct ggml_tensor * a, // logits
2043
+ struct ggml_tensor * b, // labels
2044
+ struct ggml_tensor * c); // gradients of cross_entropy_loss result
2045
+
2046
+ // AdamW optimizer step
2047
+ // Paper: https://arxiv.org/pdf/1711.05101v3.pdf
2048
+ // PyTorch: https://pytorch.org/docs/stable/generated/torch.optim.AdamW.html
2049
+ GGML_API struct ggml_tensor * ggml_opt_step_adamw(
2050
+ struct ggml_context * ctx,
2051
+ struct ggml_tensor * a,
2052
+ struct ggml_tensor * grad,
2053
+ struct ggml_tensor * m,
2054
+ struct ggml_tensor * v,
2055
+ struct ggml_tensor * adamw_params); // parameters such a the learning rate
1953
2056
 
1954
2057
  //
1955
2058
  // automatic differentiation
1956
2059
  //
1957
2060
 
1958
- GGML_API void ggml_set_param(
1959
- struct ggml_context * ctx,
1960
- struct ggml_tensor * tensor);
2061
+ GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
2062
+ GGML_API void ggml_build_backward_expand(
2063
+ struct ggml_context * ctx, // context for gradient computation
2064
+ struct ggml_cgraph * cgraph,
2065
+ struct ggml_tensor ** grad_accs);
1961
2066
 
2067
+ // graph allocation in a context
2068
+ GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
2069
+ GGML_API struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads);
2070
+ GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool force_grads);
2071
+ GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst);
2072
+ GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // set regular grads + optimizer momenta to 0, set loss grad to 1
2073
+ GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);
1962
2074
 
1963
- GGML_API void ggml_build_forward_expand (struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
1964
- GGML_API void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool keep);
2075
+ GGML_API int ggml_graph_size (struct ggml_cgraph * cgraph);
2076
+ GGML_API struct ggml_tensor * ggml_graph_node (struct ggml_cgraph * cgraph, int i); // if i < 0, returns nodes[n_nodes + i]
2077
+ GGML_API struct ggml_tensor ** ggml_graph_nodes (struct ggml_cgraph * cgraph);
2078
+ GGML_API int ggml_graph_n_nodes(struct ggml_cgraph * cgraph);
1965
2079
 
1966
- // graph allocation in a context
1967
- GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
1968
- GGML_API struct ggml_cgraph * ggml_new_graph_custom (struct ggml_context * ctx, size_t size, bool grads);
1969
- GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph);
1970
- GGML_API struct ggml_cgraph ggml_graph_view (struct ggml_cgraph * cgraph, int i0, int i1);
1971
- GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst);
1972
- GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // zero grads
1973
- GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);
2080
+ GGML_API void ggml_graph_add_node(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
1974
2081
 
1975
2082
  GGML_API size_t ggml_graph_overhead(void);
1976
2083
  GGML_API size_t ggml_graph_overhead_custom(size_t size, bool grads);
1977
2084
 
1978
- // ggml_graph_plan() has to be called before ggml_graph_compute()
1979
- // when plan.work_size > 0, caller must allocate memory for plan.work_data
1980
- GGML_API struct ggml_cplan ggml_graph_plan (const struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);
1981
- GGML_API enum ggml_status ggml_graph_compute ( struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
1982
- // same as ggml_graph_compute() but the work data is allocated as a part of the context
1983
- // note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
1984
- GGML_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
1985
-
1986
- GGML_API struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name);
2085
+ GGML_API struct ggml_tensor * ggml_graph_get_tensor (const struct ggml_cgraph * cgraph, const char * name);
2086
+ GGML_API struct ggml_tensor * ggml_graph_get_grad (const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
2087
+ GGML_API struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
1987
2088
 
1988
2089
  GGML_API void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname);
1989
2090
  GGML_API struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval);
@@ -1994,197 +2095,14 @@ extern "C" {
1994
2095
  // dump the graph into a file using the dot format
1995
2096
  GGML_API void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename);
1996
2097
 
1997
- // build gradient checkpointing backward graph gb for gf using provided checkpoints
1998
- // gb_tmp will contain original backward graph with rewritten backward process nodes,
1999
- // but without the second forward pass nodes.
2000
- GGML_API void ggml_build_backward_gradient_checkpointing(
2001
- struct ggml_context * ctx,
2002
- struct ggml_cgraph * gf,
2003
- struct ggml_cgraph * gb,
2004
- struct ggml_cgraph * gb_tmp,
2005
- struct ggml_tensor * * checkpoints,
2006
- int n_checkpoints);
2007
- //
2008
- // optimization
2009
- //
2010
-
2011
- // optimization methods
2012
- enum ggml_opt_type {
2013
- GGML_OPT_TYPE_ADAM,
2014
- GGML_OPT_TYPE_LBFGS,
2015
- };
2016
-
2017
- // linesearch methods
2018
- enum ggml_linesearch {
2019
- GGML_LINESEARCH_DEFAULT = 1,
2020
-
2021
- GGML_LINESEARCH_BACKTRACKING_ARMIJO = 0,
2022
- GGML_LINESEARCH_BACKTRACKING_WOLFE = 1,
2023
- GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 2,
2024
- };
2025
-
2026
- // optimization return values
2027
- enum ggml_opt_result {
2028
- GGML_OPT_RESULT_OK = 0,
2029
- GGML_OPT_RESULT_DID_NOT_CONVERGE,
2030
- GGML_OPT_RESULT_NO_CONTEXT,
2031
- GGML_OPT_RESULT_INVALID_WOLFE,
2032
- GGML_OPT_RESULT_FAIL,
2033
- GGML_OPT_RESULT_CANCEL,
2034
-
2035
- GGML_LINESEARCH_FAIL = -128,
2036
- GGML_LINESEARCH_MINIMUM_STEP,
2037
- GGML_LINESEARCH_MAXIMUM_STEP,
2038
- GGML_LINESEARCH_MAXIMUM_ITERATIONS,
2039
- GGML_LINESEARCH_INVALID_PARAMETERS,
2040
- };
2041
-
2042
- typedef void (*ggml_opt_callback)(void * data, int accum_step, float * sched, bool * cancel);
2098
+ // TODO these functions were sandwiched in the old optimization interface, is there a better place for them?
2043
2099
  typedef void (*ggml_log_callback)(enum ggml_log_level level, const char * text, void * user_data);
2044
2100
 
2045
- // optimization parameters
2046
- //
2047
- // see ggml.c (ggml_opt_default_params) for default values
2048
- //
2049
- struct ggml_opt_params {
2050
- enum ggml_opt_type type;
2051
-
2052
- size_t graph_size;
2053
-
2054
- int n_threads;
2055
-
2056
- // delta-based convergence test
2057
- //
2058
- // if past == 0 - disabled
2059
- // if past > 0:
2060
- // stop if |f(x) - f(x_past)| < delta * max(1, |f(x)|)
2061
- //
2062
- int past;
2063
- float delta;
2064
-
2065
- // maximum number of iterations without improvement
2066
- //
2067
- // if 0 - disabled
2068
- // if > 0:
2069
- // assume convergence if no cost improvement in this number of iterations
2070
- //
2071
- int max_no_improvement;
2072
-
2073
- bool print_forward_graph;
2074
- bool print_backward_graph;
2075
-
2076
- int n_gradient_accumulation;
2077
-
2078
- // ADAM parameters
2079
- struct {
2080
- int n_iter;
2081
-
2082
- float sched; // schedule multiplier (fixed, decay or warmup)
2083
- float decay; // weight decay for AdamW, use 0.0f to disable
2084
- int decay_min_ndim; // minimum number of tensor dimension to apply weight decay
2085
- float alpha; // learning rate
2086
- float beta1;
2087
- float beta2;
2088
- float eps; // epsilon for numerical stability
2089
- float eps_f; // epsilon for convergence test
2090
- float eps_g; // epsilon for convergence test
2091
- float gclip; // gradient clipping
2092
- } adam;
2093
-
2094
- // LBFGS parameters
2095
- struct {
2096
- int m; // number of corrections to approximate the inv. Hessian
2097
- int n_iter;
2098
- int max_linesearch;
2099
-
2100
- float eps; // convergence tolerance
2101
- float ftol; // line search tolerance
2102
- float wolfe;
2103
- float min_step;
2104
- float max_step;
2105
-
2106
- enum ggml_linesearch linesearch;
2107
- } lbfgs;
2108
- };
2109
-
2110
- struct ggml_opt_context {
2111
- struct ggml_context * ctx;
2112
- struct ggml_opt_params params;
2113
-
2114
- int iter;
2115
- int64_t nx; // number of parameter elements
2116
-
2117
- bool just_initialized;
2118
-
2119
- float loss_before;
2120
- float loss_after;
2121
-
2122
- struct {
2123
- struct ggml_tensor * g; // current gradient
2124
- struct ggml_tensor * m; // first moment
2125
- struct ggml_tensor * v; // second moment
2126
- struct ggml_tensor * pf; // past function values
2127
- float fx_best;
2128
- float fx_prev;
2129
- int n_no_improvement;
2130
- } adam;
2131
-
2132
- struct {
2133
- struct ggml_tensor * x; // current parameters
2134
- struct ggml_tensor * xp; // previous parameters
2135
- struct ggml_tensor * g; // current gradient
2136
- struct ggml_tensor * gp; // previous gradient
2137
- struct ggml_tensor * d; // search direction
2138
- struct ggml_tensor * pf; // past function values
2139
- struct ggml_tensor * lmal; // the L-BFGS memory alpha
2140
- struct ggml_tensor * lmys; // the L-BFGS memory ys
2141
- struct ggml_tensor * lms; // the L-BFGS memory s
2142
- struct ggml_tensor * lmy; // the L-BFGS memory y
2143
- float fx_best;
2144
- float step;
2145
- int j;
2146
- int k;
2147
- int end;
2148
- int n_no_improvement;
2149
- } lbfgs;
2150
- };
2151
-
2152
- GGML_API struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type);
2153
-
2154
- // optimize the function defined by the tensor f
2155
- GGML_API enum ggml_opt_result ggml_opt(
2156
- struct ggml_context * ctx,
2157
- struct ggml_opt_params params,
2158
- struct ggml_tensor * f);
2159
-
2160
- // initialize optimizer context
2161
- GGML_API void ggml_opt_init(
2162
- struct ggml_context * ctx,
2163
- struct ggml_opt_context * opt,
2164
- struct ggml_opt_params params,
2165
- int64_t nx);
2166
-
2167
- // continue optimizing the function defined by the tensor f
2168
- GGML_API enum ggml_opt_result ggml_opt_resume(
2169
- struct ggml_context * ctx,
2170
- struct ggml_opt_context * opt,
2171
- struct ggml_tensor * f);
2172
-
2173
- // continue optimizing the function defined by the tensor f
2174
- GGML_API enum ggml_opt_result ggml_opt_resume_g(
2175
- struct ggml_context * ctx,
2176
- struct ggml_opt_context * opt,
2177
- struct ggml_tensor * f,
2178
- struct ggml_cgraph * gf,
2179
- struct ggml_cgraph * gb,
2180
- ggml_opt_callback callback,
2181
- void * callback_data);
2101
+ // Set callback for all future logging events.
2102
+ // If this is not called, or NULL is supplied, everything is output on stderr.
2103
+ GGML_API void ggml_log_set(ggml_log_callback log_callback, void * user_data);
2182
2104
 
2183
- //
2184
- // tensor flags
2185
- //
2186
- GGML_API void ggml_set_input(struct ggml_tensor * tensor);
2187
- GGML_API void ggml_set_output(struct ggml_tensor * tensor);
2105
+ GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
2188
2106
 
2189
2107
  //
2190
2108
  // quantization
@@ -2215,187 +2133,69 @@ extern "C" {
2215
2133
  int64_t n_per_row,
2216
2134
  const float * imatrix);
2217
2135
 
2218
- //
2219
- // gguf
2220
- //
2136
+ #ifdef __cplusplus
2137
+ // restrict not standard in C++
2138
+ # if defined(__GNUC__)
2139
+ # define GGML_RESTRICT __restrict__
2140
+ # elif defined(__clang__)
2141
+ # define GGML_RESTRICT __restrict
2142
+ # elif defined(_MSC_VER)
2143
+ # define GGML_RESTRICT __restrict
2144
+ # else
2145
+ # define GGML_RESTRICT
2146
+ # endif
2147
+ #else
2148
+ # if defined (_MSC_VER) && (__STDC_VERSION__ < 201112L)
2149
+ # define GGML_RESTRICT __restrict
2150
+ # else
2151
+ # define GGML_RESTRICT restrict
2152
+ # endif
2153
+ #endif
2154
+ typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
2155
+ typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
2221
2156
 
2222
- enum gguf_type {
2223
- GGUF_TYPE_UINT8 = 0,
2224
- GGUF_TYPE_INT8 = 1,
2225
- GGUF_TYPE_UINT16 = 2,
2226
- GGUF_TYPE_INT16 = 3,
2227
- GGUF_TYPE_UINT32 = 4,
2228
- GGUF_TYPE_INT32 = 5,
2229
- GGUF_TYPE_FLOAT32 = 6,
2230
- GGUF_TYPE_BOOL = 7,
2231
- GGUF_TYPE_STRING = 8,
2232
- GGUF_TYPE_ARRAY = 9,
2233
- GGUF_TYPE_UINT64 = 10,
2234
- GGUF_TYPE_INT64 = 11,
2235
- GGUF_TYPE_FLOAT64 = 12,
2236
- GGUF_TYPE_COUNT, // marks the end of the enum
2157
+ struct ggml_type_traits {
2158
+ const char * type_name;
2159
+ int64_t blck_size;
2160
+ int64_t blck_size_interleave; // interleave elements in blocks
2161
+ size_t type_size;
2162
+ bool is_quantized;
2163
+ ggml_to_float_t to_float;
2164
+ ggml_from_float_t from_float_ref;
2237
2165
  };
2238
2166
 
2239
- struct gguf_context;
2167
+ GGML_API const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type);
2240
2168
 
2241
- struct gguf_init_params {
2242
- bool no_alloc;
2169
+ // ggml threadpool
2170
+ // TODO: currently, only a few functions are in the base ggml API, while the rest are in the CPU backend
2171
+ // the goal should be to create an API that other backends can use move everything to the ggml base
2243
2172
 
2244
- // if not NULL, create a ggml_context and allocate the tensor data in it
2245
- struct ggml_context ** ctx;
2173
+ // scheduling priorities
2174
+ enum ggml_sched_priority {
2175
+ GGML_SCHED_PRIO_NORMAL,
2176
+ GGML_SCHED_PRIO_MEDIUM,
2177
+ GGML_SCHED_PRIO_HIGH,
2178
+ GGML_SCHED_PRIO_REALTIME
2246
2179
  };
2247
2180
 
2248
- GGML_API struct gguf_context * gguf_init_empty(void);
2249
- GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
2250
- //GGML_API struct gguf_context * gguf_init_from_buffer(..);
2251
-
2252
- GGML_API void gguf_free(struct gguf_context * ctx);
2253
-
2254
- GGML_API const char * gguf_type_name(enum gguf_type type);
2255
-
2256
- GGML_API int gguf_get_version (const struct gguf_context * ctx);
2257
- GGML_API size_t gguf_get_alignment (const struct gguf_context * ctx);
2258
- GGML_API size_t gguf_get_data_offset(const struct gguf_context * ctx);
2259
- GGML_API void * gguf_get_data (const struct gguf_context * ctx);
2260
-
2261
- GGML_API int gguf_get_n_kv(const struct gguf_context * ctx);
2262
- GGML_API int gguf_find_key(const struct gguf_context * ctx, const char * key);
2263
- GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int key_id);
2264
-
2265
- GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int key_id);
2266
- GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int key_id);
2267
-
2268
- // will abort if the wrong type is used for the key
2269
- GGML_API uint8_t gguf_get_val_u8 (const struct gguf_context * ctx, int key_id);
2270
- GGML_API int8_t gguf_get_val_i8 (const struct gguf_context * ctx, int key_id);
2271
- GGML_API uint16_t gguf_get_val_u16 (const struct gguf_context * ctx, int key_id);
2272
- GGML_API int16_t gguf_get_val_i16 (const struct gguf_context * ctx, int key_id);
2273
- GGML_API uint32_t gguf_get_val_u32 (const struct gguf_context * ctx, int key_id);
2274
- GGML_API int32_t gguf_get_val_i32 (const struct gguf_context * ctx, int key_id);
2275
- GGML_API float gguf_get_val_f32 (const struct gguf_context * ctx, int key_id);
2276
- GGML_API uint64_t gguf_get_val_u64 (const struct gguf_context * ctx, int key_id);
2277
- GGML_API int64_t gguf_get_val_i64 (const struct gguf_context * ctx, int key_id);
2278
- GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int key_id);
2279
- GGML_API bool gguf_get_val_bool(const struct gguf_context * ctx, int key_id);
2280
- GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int key_id);
2281
- GGML_API const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id);
2282
- GGML_API int gguf_get_arr_n (const struct gguf_context * ctx, int key_id);
2283
- GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id);
2284
- GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i);
2285
-
2286
- GGML_API int gguf_get_n_tensors (const struct gguf_context * ctx);
2287
- GGML_API int gguf_find_tensor (const struct gguf_context * ctx, const char * name);
2288
- GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i);
2289
- GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
2290
- GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx, int i);
2291
-
2292
- // overrides existing values or adds a new one
2293
- GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
2294
- GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val);
2295
- GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t val);
2296
- GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t val);
2297
- GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val);
2298
- GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val);
2299
- GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val);
2300
- GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val);
2301
- GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t val);
2302
- GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val);
2303
- GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val);
2304
- GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
2305
- GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n);
2306
- GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, int n);
2307
-
2308
- // set or add KV pairs from another context
2309
- GGML_API void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src);
2310
-
2311
- // manage tensor info
2312
- GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor);
2313
- GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type);
2314
- GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size);
2315
-
2316
- // writing gguf files can be done in 2 ways:
2317
- //
2318
- // - write the entire gguf_context to a binary file in a single pass:
2319
- //
2320
- // gguf_write_to_file(ctx, fname);
2321
- //
2322
- // - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data:
2323
- //
2324
- // FILE * f = fopen(fname, "wb");
2325
- // fseek(f, gguf_get_meta_size(ctx), SEEK_SET);
2326
- // fwrite(f, ...);
2327
- // void * data = gguf_meta_get_meta_data(ctx);
2328
- // fseek(f, 0, SEEK_SET);
2329
- // fwrite(f, data, gguf_get_meta_size(ctx));
2330
- // free(data);
2331
- // fclose(f);
2332
- //
2333
-
2334
- // write the entire context to a binary file
2335
- GGML_API void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta);
2336
-
2337
- // get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
2338
- GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
2339
- GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data);
2340
-
2341
- //
2342
- // system info
2343
- //
2181
+ // threadpool params
2182
+ // Use ggml_threadpool_params_default() or ggml_threadpool_params_init() to populate the defaults
2183
+ struct ggml_threadpool_params {
2184
+ bool cpumask[GGML_MAX_N_THREADS]; // mask of cpu cores (all-zeros means use default affinity settings)
2185
+ int n_threads; // number of threads
2186
+ enum ggml_sched_priority prio; // thread priority
2187
+ uint32_t poll; // polling level (0 - no polling, 100 - aggressive polling)
2188
+ bool strict_cpu; // strict cpu placement
2189
+ bool paused; // start in paused state
2190
+ };
2344
2191
 
2345
- GGML_API int ggml_cpu_has_avx (void);
2346
- GGML_API int ggml_cpu_has_avx_vnni (void);
2347
- GGML_API int ggml_cpu_has_avx2 (void);
2348
- GGML_API int ggml_cpu_has_avx512 (void);
2349
- GGML_API int ggml_cpu_has_avx512_vbmi(void);
2350
- GGML_API int ggml_cpu_has_avx512_vnni(void);
2351
- GGML_API int ggml_cpu_has_fma (void);
2352
- GGML_API int ggml_cpu_has_neon (void);
2353
- GGML_API int ggml_cpu_has_arm_fma (void);
2354
- GGML_API int ggml_cpu_has_metal (void);
2355
- GGML_API int ggml_cpu_has_f16c (void);
2356
- GGML_API int ggml_cpu_has_fp16_va (void);
2357
- GGML_API int ggml_cpu_has_wasm_simd (void);
2358
- GGML_API int ggml_cpu_has_blas (void);
2359
- GGML_API int ggml_cpu_has_cuda (void);
2360
- GGML_API int ggml_cpu_has_clblast (void);
2361
- GGML_API int ggml_cpu_has_vulkan (void);
2362
- GGML_API int ggml_cpu_has_kompute (void);
2363
- GGML_API int ggml_cpu_has_gpublas (void);
2364
- GGML_API int ggml_cpu_has_sse3 (void);
2365
- GGML_API int ggml_cpu_has_ssse3 (void);
2366
- GGML_API int ggml_cpu_has_sycl (void);
2367
- GGML_API int ggml_cpu_has_vsx (void);
2368
- GGML_API int ggml_cpu_has_matmul_int8(void);
2192
+ struct ggml_threadpool; // forward declaration, see ggml.c
2369
2193
 
2370
- //
2371
- // Internal types and functions exposed for tests and benchmarks
2372
- //
2194
+ typedef struct ggml_threadpool * ggml_threadpool_t;
2373
2195
 
2374
- #ifdef __cplusplus
2375
- // restrict not standard in C++
2376
- #define GGML_RESTRICT
2377
- #else
2378
- #define GGML_RESTRICT restrict
2379
- #endif
2380
- typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
2381
- typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
2382
- typedef void (*ggml_vec_dot_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,
2383
- const void * GGML_RESTRICT y, size_t by, int nrc);
2384
-
2385
- typedef struct {
2386
- const char * type_name;
2387
- int blck_size;
2388
- size_t type_size;
2389
- bool is_quantized;
2390
- ggml_to_float_t to_float;
2391
- ggml_from_float_t from_float;
2392
- ggml_from_float_t from_float_reference;
2393
- ggml_vec_dot_t vec_dot;
2394
- enum ggml_type vec_dot_type;
2395
- int64_t nrows; // number of rows to process simultaneously;
2396
- } ggml_type_traits_t;
2397
-
2398
- GGML_API ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type);
2196
+ GGML_API struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads);
2197
+ GGML_API void ggml_threadpool_params_init (struct ggml_threadpool_params * p, int n_threads);
2198
+ GGML_API bool ggml_threadpool_params_match (const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1);
2399
2199
 
2400
2200
  #ifdef __cplusplus
2401
2201
  }