whispercpp 1.3.1 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (797) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +4 -3
  3. data/README.md +92 -31
  4. data/Rakefile +26 -7
  5. data/ext/.gitignore +5 -7
  6. data/ext/dependencies.rb +61 -0
  7. data/ext/extconf.rb +21 -198
  8. data/ext/options.rb +221 -0
  9. data/ext/ruby_whisper.c +159 -0
  10. data/ext/ruby_whisper.h +17 -2
  11. data/ext/ruby_whisper_context.c +641 -0
  12. data/ext/ruby_whisper_error.c +52 -0
  13. data/ext/ruby_whisper_model.c +232 -0
  14. data/ext/ruby_whisper_params.c +1301 -0
  15. data/ext/ruby_whisper_segment.c +143 -0
  16. data/ext/ruby_whisper_transcribe.cpp +87 -0
  17. data/ext/ruby_whisper_vad_params.c +288 -0
  18. data/ext/sources/.dockerignore +3 -0
  19. data/ext/sources/.github/workflows/bindings-ruby.yml +21 -0
  20. data/ext/sources/CMakeGraphVizOptions.cmake +8 -0
  21. data/ext/sources/CMakeLists.txt +251 -0
  22. data/ext/sources/bindings/javascript/CMakeLists.txt +41 -0
  23. data/ext/sources/bindings/javascript/emscripten.cpp +93 -0
  24. data/ext/sources/bindings/javascript/libwhisper.worker.js +1 -0
  25. data/ext/sources/bindings/javascript/package-tmpl.json +26 -0
  26. data/ext/sources/bindings/javascript/package.json +26 -0
  27. data/ext/sources/bindings/javascript/whisper.js +19 -0
  28. data/ext/sources/build-xcframework.sh +547 -0
  29. data/ext/sources/ci/run.sh +336 -0
  30. data/ext/sources/close-issue.yml +28 -0
  31. data/ext/sources/cmake/DefaultTargetOptions.cmake +16 -0
  32. data/ext/sources/cmake/FindFFmpeg.cmake +163 -0
  33. data/ext/sources/cmake/build-info.cmake +60 -0
  34. data/ext/sources/cmake/git-vars.cmake +22 -0
  35. data/ext/sources/cmake/whisper-config.cmake.in +65 -0
  36. data/ext/sources/cmake/whisper.pc.in +10 -0
  37. data/ext/sources/examples/CMakeLists.txt +124 -0
  38. data/ext/sources/examples/addon.node/CMakeLists.txt +31 -0
  39. data/ext/sources/examples/addon.node/__test__/whisper.spec.js +37 -0
  40. data/ext/sources/examples/addon.node/addon.cpp +438 -0
  41. data/ext/sources/examples/addon.node/index.js +54 -0
  42. data/ext/sources/examples/addon.node/package.json +16 -0
  43. data/ext/sources/examples/bench/CMakeLists.txt +8 -0
  44. data/ext/sources/examples/bench/bench.cpp +175 -0
  45. data/ext/sources/examples/bench.wasm/CMakeLists.txt +49 -0
  46. data/ext/sources/examples/bench.wasm/emscripten.cpp +87 -0
  47. data/ext/sources/examples/bench.wasm/index-tmpl.html +284 -0
  48. data/ext/sources/examples/cli/CMakeLists.txt +8 -0
  49. data/ext/sources/examples/cli/cli.cpp +1294 -0
  50. data/ext/sources/examples/coi-serviceworker.js +146 -0
  51. data/ext/sources/examples/command/CMakeLists.txt +10 -0
  52. data/ext/sources/examples/command/command.cpp +776 -0
  53. data/ext/sources/examples/command/commands.txt +9 -0
  54. data/ext/sources/examples/command.wasm/CMakeLists.txt +50 -0
  55. data/ext/sources/examples/command.wasm/emscripten.cpp +327 -0
  56. data/ext/sources/examples/command.wasm/index-tmpl.html +414 -0
  57. data/ext/sources/examples/common-ggml.cpp +238 -0
  58. data/ext/sources/examples/common-ggml.h +18 -0
  59. data/ext/sources/examples/common-sdl.cpp +227 -0
  60. data/ext/sources/examples/common-sdl.h +49 -0
  61. data/ext/sources/examples/common-whisper.cpp +168 -0
  62. data/ext/sources/examples/common-whisper.h +24 -0
  63. data/ext/sources/examples/common.cpp +675 -0
  64. data/ext/sources/examples/common.h +322 -0
  65. data/ext/sources/examples/deprecation-warning/CMakeLists.txt +6 -0
  66. data/ext/sources/examples/deprecation-warning/deprecation-warning.cpp +38 -0
  67. data/ext/sources/examples/ffmpeg-transcode.cpp +368 -0
  68. data/ext/sources/examples/generate-karaoke.sh +57 -0
  69. data/ext/sources/examples/grammar-parser.cpp +423 -0
  70. data/ext/sources/examples/grammar-parser.h +29 -0
  71. data/ext/sources/examples/helpers.js +191 -0
  72. data/ext/sources/examples/json.hpp +24596 -0
  73. data/ext/sources/examples/livestream.sh +112 -0
  74. data/ext/sources/examples/lsp/CMakeLists.txt +9 -0
  75. data/ext/sources/examples/lsp/lsp.cpp +467 -0
  76. data/ext/sources/examples/lsp/whisper.vim +362 -0
  77. data/ext/sources/examples/miniaudio.h +93468 -0
  78. data/ext/sources/examples/python/test_whisper_processor.py +7 -0
  79. data/ext/sources/examples/python/whisper_processor.py +54 -0
  80. data/ext/sources/examples/quantize/CMakeLists.txt +6 -0
  81. data/ext/sources/examples/quantize/quantize.cpp +223 -0
  82. data/ext/sources/examples/server/CMakeLists.txt +12 -0
  83. data/ext/sources/examples/server/bench.js +29 -0
  84. data/ext/sources/examples/server/httplib.h +10497 -0
  85. data/ext/sources/examples/server/server.cpp +1091 -0
  86. data/ext/sources/examples/server.py +115 -0
  87. data/ext/sources/examples/stb_vorbis.c +5584 -0
  88. data/ext/sources/examples/stream/CMakeLists.txt +10 -0
  89. data/ext/sources/examples/stream/stream.cpp +429 -0
  90. data/ext/sources/examples/stream.wasm/CMakeLists.txt +49 -0
  91. data/ext/sources/examples/stream.wasm/emscripten.cpp +216 -0
  92. data/ext/sources/examples/stream.wasm/index-tmpl.html +414 -0
  93. data/ext/sources/examples/sycl/CMakeLists.txt +9 -0
  94. data/ext/sources/examples/sycl/build.sh +22 -0
  95. data/ext/sources/examples/sycl/ls-sycl-device.cpp +11 -0
  96. data/ext/sources/examples/sycl/run-whisper.sh +17 -0
  97. data/ext/sources/examples/talk-llama/CMakeLists.txt +40 -0
  98. data/ext/sources/examples/talk-llama/eleven-labs.py +80 -0
  99. data/ext/sources/examples/talk-llama/llama-adapter.cpp +388 -0
  100. data/ext/sources/examples/talk-llama/llama-adapter.h +76 -0
  101. data/ext/sources/examples/talk-llama/llama-arch.cpp +1746 -0
  102. data/ext/sources/examples/talk-llama/llama-arch.h +437 -0
  103. data/ext/sources/examples/talk-llama/llama-batch.cpp +374 -0
  104. data/ext/sources/examples/talk-llama/llama-batch.h +89 -0
  105. data/ext/sources/examples/talk-llama/llama-chat.cpp +663 -0
  106. data/ext/sources/examples/talk-llama/llama-chat.h +58 -0
  107. data/ext/sources/examples/talk-llama/llama-context.cpp +2676 -0
  108. data/ext/sources/examples/talk-llama/llama-context.h +276 -0
  109. data/ext/sources/examples/talk-llama/llama-cparams.cpp +5 -0
  110. data/ext/sources/examples/talk-llama/llama-cparams.h +41 -0
  111. data/ext/sources/examples/talk-llama/llama-grammar.cpp +1229 -0
  112. data/ext/sources/examples/talk-llama/llama-grammar.h +173 -0
  113. data/ext/sources/examples/talk-llama/llama-graph.cpp +1618 -0
  114. data/ext/sources/examples/talk-llama/llama-graph.h +640 -0
  115. data/ext/sources/examples/talk-llama/llama-hparams.cpp +95 -0
  116. data/ext/sources/examples/talk-llama/llama-hparams.h +190 -0
  117. data/ext/sources/examples/talk-llama/llama-impl.cpp +167 -0
  118. data/ext/sources/examples/talk-llama/llama-impl.h +61 -0
  119. data/ext/sources/examples/talk-llama/llama-io.cpp +15 -0
  120. data/ext/sources/examples/talk-llama/llama-io.h +35 -0
  121. data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +2739 -0
  122. data/ext/sources/examples/talk-llama/llama-kv-cache.h +502 -0
  123. data/ext/sources/examples/talk-llama/llama-kv-cells.h +379 -0
  124. data/ext/sources/examples/talk-llama/llama-memory.cpp +1 -0
  125. data/ext/sources/examples/talk-llama/llama-memory.h +32 -0
  126. data/ext/sources/examples/talk-llama/llama-mmap.cpp +600 -0
  127. data/ext/sources/examples/talk-llama/llama-mmap.h +68 -0
  128. data/ext/sources/examples/talk-llama/llama-model-loader.cpp +1138 -0
  129. data/ext/sources/examples/talk-llama/llama-model-loader.h +169 -0
  130. data/ext/sources/examples/talk-llama/llama-model-saver.cpp +281 -0
  131. data/ext/sources/examples/talk-llama/llama-model-saver.h +37 -0
  132. data/ext/sources/examples/talk-llama/llama-model.cpp +13814 -0
  133. data/ext/sources/examples/talk-llama/llama-model.h +425 -0
  134. data/ext/sources/examples/talk-llama/llama-quant.cpp +966 -0
  135. data/ext/sources/examples/talk-llama/llama-quant.h +1 -0
  136. data/ext/sources/examples/talk-llama/llama-sampling.cpp +2575 -0
  137. data/ext/sources/examples/talk-llama/llama-sampling.h +32 -0
  138. data/ext/sources/examples/talk-llama/llama-vocab.cpp +3340 -0
  139. data/ext/sources/examples/talk-llama/llama-vocab.h +131 -0
  140. data/ext/sources/examples/talk-llama/llama.cpp +354 -0
  141. data/ext/sources/examples/talk-llama/llama.h +1377 -0
  142. data/ext/sources/examples/talk-llama/prompts/talk-alpaca.txt +23 -0
  143. data/ext/sources/examples/talk-llama/speak +40 -0
  144. data/ext/sources/examples/talk-llama/speak.bat +1 -0
  145. data/ext/sources/examples/talk-llama/speak.ps1 +14 -0
  146. data/ext/sources/examples/talk-llama/talk-llama.cpp +808 -0
  147. data/ext/sources/examples/talk-llama/unicode-data.cpp +7034 -0
  148. data/ext/sources/examples/talk-llama/unicode-data.h +20 -0
  149. data/ext/sources/examples/talk-llama/unicode.cpp +849 -0
  150. data/ext/sources/examples/talk-llama/unicode.h +66 -0
  151. data/ext/sources/examples/vad-speech-segments/CMakeLists.txt +8 -0
  152. data/ext/sources/examples/vad-speech-segments/speech.cpp +143 -0
  153. data/ext/sources/examples/wchess/CMakeLists.txt +10 -0
  154. data/ext/sources/examples/wchess/libwchess/CMakeLists.txt +19 -0
  155. data/ext/sources/examples/wchess/libwchess/Chessboard.cpp +803 -0
  156. data/ext/sources/examples/wchess/libwchess/Chessboard.h +33 -0
  157. data/ext/sources/examples/wchess/libwchess/WChess.cpp +193 -0
  158. data/ext/sources/examples/wchess/libwchess/WChess.h +63 -0
  159. data/ext/sources/examples/wchess/libwchess/test-chessboard.cpp +117 -0
  160. data/ext/sources/examples/wchess/wchess.cmd/CMakeLists.txt +8 -0
  161. data/ext/sources/examples/wchess/wchess.cmd/wchess.cmd.cpp +249 -0
  162. data/ext/sources/examples/whisper.wasm/CMakeLists.txt +50 -0
  163. data/ext/sources/examples/whisper.wasm/emscripten.cpp +118 -0
  164. data/ext/sources/examples/whisper.wasm/index-tmpl.html +658 -0
  165. data/ext/sources/ggml/CMakeLists.txt +390 -0
  166. data/ext/sources/ggml/cmake/BuildTypes.cmake +54 -0
  167. data/ext/sources/ggml/cmake/GitVars.cmake +22 -0
  168. data/ext/sources/ggml/cmake/common.cmake +26 -0
  169. data/ext/sources/ggml/cmake/ggml-config.cmake.in +152 -0
  170. data/ext/{ggml → sources/ggml}/include/ggml-alloc.h +1 -1
  171. data/ext/{ggml → sources/ggml}/include/ggml-backend.h +9 -7
  172. data/ext/{ggml → sources/ggml}/include/ggml-cpp.h +2 -1
  173. data/ext/{ggml → sources/ggml}/include/ggml-cpu.h +9 -1
  174. data/ext/{ggml → sources/ggml}/include/ggml-metal.h +1 -1
  175. data/ext/{ggml → sources/ggml}/include/ggml-opt.h +49 -28
  176. data/ext/{ggml → sources/ggml}/include/ggml-rpc.h +6 -1
  177. data/ext/{ggml → sources/ggml}/include/ggml-vulkan.h +0 -2
  178. data/ext/{ggml → sources/ggml}/include/ggml.h +182 -265
  179. data/ext/sources/ggml/include/gguf.h +202 -0
  180. data/ext/sources/ggml/src/CMakeLists.txt +346 -0
  181. data/ext/{ggml → sources/ggml}/src/ggml-alloc.c +34 -29
  182. data/ext/sources/ggml/src/ggml-amx/CMakeLists.txt +107 -0
  183. data/ext/{ggml → sources/ggml}/src/ggml-backend-impl.h +1 -2
  184. data/ext/{ggml → sources/ggml}/src/ggml-backend-reg.cpp +87 -53
  185. data/ext/{ggml → sources/ggml}/src/ggml-backend.cpp +26 -14
  186. data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +87 -0
  187. data/ext/sources/ggml/src/ggml-cann/CMakeLists.txt +74 -0
  188. data/ext/sources/ggml/src/ggml-cann/Doxyfile +2579 -0
  189. data/ext/{ggml → sources/ggml}/src/ggml-cann/acl_tensor.cpp +10 -4
  190. data/ext/{ggml → sources/ggml}/src/ggml-cann/acl_tensor.h +5 -5
  191. data/ext/{ggml → sources/ggml}/src/ggml-cann/aclnn_ops.cpp +1272 -1506
  192. data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +1125 -0
  193. data/ext/{ggml → sources/ggml}/src/ggml-cann/common.h +135 -1
  194. data/ext/{ggml → sources/ggml}/src/ggml-cann/ggml-cann.cpp +564 -146
  195. data/ext/sources/ggml/src/ggml-cann/kernels/CMakeLists.txt +30 -0
  196. data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/dup.cpp +3 -5
  197. data/ext/{ggml → sources/ggml}/src/ggml-common.h +12 -8
  198. data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +504 -0
  199. data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/amx.cpp +2 -1
  200. data/ext/sources/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
  201. data/ext/sources/ggml/src/ggml-cpu/binary-ops.h +16 -0
  202. data/ext/sources/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
  203. data/ext/sources/ggml/src/ggml-cpu/common.h +72 -0
  204. data/ext/{ggml → sources/ggml}/src/ggml-cpu/cpu-feats-x86.cpp +5 -1
  205. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +6431 -0
  206. data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu-impl.h +163 -41
  207. data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu-quants.c +4029 -1117
  208. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +3510 -0
  209. data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu.cpp +67 -18
  210. data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +337 -0
  211. data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.h +95 -0
  212. data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +482 -0
  213. data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
  214. data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +3544 -0
  215. data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
  216. data/ext/sources/ggml/src/ggml-cpu/ops.cpp +8903 -0
  217. data/ext/sources/ggml/src/ggml-cpu/ops.h +110 -0
  218. data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +892 -0
  219. data/ext/sources/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
  220. data/ext/sources/ggml/src/ggml-cpu/unary-ops.h +28 -0
  221. data/ext/sources/ggml/src/ggml-cpu/vec.cpp +252 -0
  222. data/ext/sources/ggml/src/ggml-cpu/vec.h +818 -0
  223. data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +184 -0
  224. data/ext/sources/ggml/src/ggml-cuda/acc.cu +61 -0
  225. data/ext/sources/ggml/src/ggml-cuda/acc.cuh +5 -0
  226. data/ext/sources/ggml/src/ggml-cuda/arange.cu +34 -0
  227. data/ext/sources/ggml/src/ggml-cuda/arange.cuh +5 -0
  228. data/ext/sources/ggml/src/ggml-cuda/argmax.cu +91 -0
  229. data/ext/sources/ggml/src/ggml-cuda/argmax.cuh +3 -0
  230. data/ext/sources/ggml/src/ggml-cuda/argsort.cu +104 -0
  231. data/ext/sources/ggml/src/ggml-cuda/argsort.cuh +3 -0
  232. data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +363 -0
  233. data/ext/sources/ggml/src/ggml-cuda/binbcast.cuh +9 -0
  234. data/ext/sources/ggml/src/ggml-cuda/clamp.cu +45 -0
  235. data/ext/sources/ggml/src/ggml-cuda/clamp.cuh +5 -0
  236. data/ext/sources/ggml/src/ggml-cuda/common.cuh +828 -0
  237. data/ext/sources/ggml/src/ggml-cuda/concat.cu +221 -0
  238. data/ext/sources/ggml/src/ggml-cuda/concat.cuh +5 -0
  239. data/ext/sources/ggml/src/ggml-cuda/conv-transpose-1d.cu +89 -0
  240. data/ext/sources/ggml/src/ggml-cuda/conv-transpose-1d.cuh +5 -0
  241. data/ext/sources/ggml/src/ggml-cuda/convert.cu +730 -0
  242. data/ext/sources/ggml/src/ggml-cuda/convert.cuh +26 -0
  243. data/ext/sources/ggml/src/ggml-cuda/count-equal.cu +64 -0
  244. data/ext/sources/ggml/src/ggml-cuda/count-equal.cuh +5 -0
  245. data/ext/sources/ggml/src/ggml-cuda/cp-async.cuh +57 -0
  246. data/ext/sources/ggml/src/ggml-cuda/cpy.cu +705 -0
  247. data/ext/sources/ggml/src/ggml-cuda/cpy.cuh +11 -0
  248. data/ext/sources/ggml/src/ggml-cuda/cross-entropy-loss.cu +189 -0
  249. data/ext/sources/ggml/src/ggml-cuda/cross-entropy-loss.cuh +7 -0
  250. data/ext/sources/ggml/src/ggml-cuda/dequantize.cuh +103 -0
  251. data/ext/sources/ggml/src/ggml-cuda/diagmask.cu +40 -0
  252. data/ext/sources/ggml/src/ggml-cuda/diagmask.cuh +5 -0
  253. data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +881 -0
  254. data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +1471 -0
  255. data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cu +357 -0
  256. data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cuh +3 -0
  257. data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cu +365 -0
  258. data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cuh +3 -0
  259. data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f16.cuh +482 -0
  260. data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f32.cuh +472 -0
  261. data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +634 -0
  262. data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +3 -0
  263. data/ext/sources/ggml/src/ggml-cuda/fattn.cu +346 -0
  264. data/ext/sources/ggml/src/ggml-cuda/fattn.cuh +3 -0
  265. data/ext/sources/ggml/src/ggml-cuda/getrows.cu +275 -0
  266. data/ext/sources/ggml/src/ggml-cuda/getrows.cuh +15 -0
  267. data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +3505 -0
  268. data/ext/sources/ggml/src/ggml-cuda/gla.cu +93 -0
  269. data/ext/sources/ggml/src/ggml-cuda/gla.cuh +3 -0
  270. data/ext/sources/ggml/src/ggml-cuda/im2col.cu +103 -0
  271. data/ext/sources/ggml/src/ggml-cuda/im2col.cuh +5 -0
  272. data/ext/sources/ggml/src/ggml-cuda/mma.cuh +396 -0
  273. data/ext/sources/ggml/src/ggml-cuda/mmq.cu +324 -0
  274. data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +3217 -0
  275. data/ext/sources/ggml/src/ggml-cuda/mmv.cu +336 -0
  276. data/ext/sources/ggml/src/ggml-cuda/mmv.cuh +12 -0
  277. data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +595 -0
  278. data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +12 -0
  279. data/ext/sources/ggml/src/ggml-cuda/norm.cu +458 -0
  280. data/ext/sources/ggml/src/ggml-cuda/norm.cuh +11 -0
  281. data/ext/sources/ggml/src/ggml-cuda/opt-step-adamw.cu +78 -0
  282. data/ext/sources/ggml/src/ggml-cuda/opt-step-adamw.cuh +5 -0
  283. data/ext/sources/ggml/src/ggml-cuda/out-prod.cu +68 -0
  284. data/ext/sources/ggml/src/ggml-cuda/out-prod.cuh +3 -0
  285. data/ext/sources/ggml/src/ggml-cuda/pad.cu +49 -0
  286. data/ext/sources/ggml/src/ggml-cuda/pad.cuh +5 -0
  287. data/ext/sources/ggml/src/ggml-cuda/pool2d.cu +94 -0
  288. data/ext/sources/ggml/src/ggml-cuda/pool2d.cuh +5 -0
  289. data/ext/sources/ggml/src/ggml-cuda/quantize.cu +190 -0
  290. data/ext/sources/ggml/src/ggml-cuda/quantize.cuh +27 -0
  291. data/ext/sources/ggml/src/ggml-cuda/rope.cu +456 -0
  292. data/ext/sources/ggml/src/ggml-cuda/rope.cuh +7 -0
  293. data/ext/sources/ggml/src/ggml-cuda/scale.cu +31 -0
  294. data/ext/sources/ggml/src/ggml-cuda/scale.cuh +5 -0
  295. data/ext/sources/ggml/src/ggml-cuda/softmax.cu +283 -0
  296. data/ext/sources/ggml/src/ggml-cuda/softmax.cuh +7 -0
  297. data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +148 -0
  298. data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cuh +3 -0
  299. data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +153 -0
  300. data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cuh +3 -0
  301. data/ext/sources/ggml/src/ggml-cuda/sum.cu +45 -0
  302. data/ext/sources/ggml/src/ggml-cuda/sum.cuh +5 -0
  303. data/ext/sources/ggml/src/ggml-cuda/sumrows.cu +39 -0
  304. data/ext/sources/ggml/src/ggml-cuda/sumrows.cuh +5 -0
  305. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_16.cu +5 -0
  306. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu +10 -0
  307. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu +10 -0
  308. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu +10 -0
  309. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +10 -0
  310. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_16.cu +5 -0
  311. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +10 -0
  312. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu +10 -0
  313. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu +10 -0
  314. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu +10 -0
  315. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_16.cu +5 -0
  316. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu +10 -0
  317. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +10 -0
  318. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu +10 -0
  319. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu +10 -0
  320. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu +10 -0
  321. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu +10 -0
  322. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +10 -0
  323. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu +10 -0
  324. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +5 -0
  325. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +5 -0
  326. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +5 -0
  327. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +5 -0
  328. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +5 -0
  329. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +5 -0
  330. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +5 -0
  331. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +5 -0
  332. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +5 -0
  333. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +5 -0
  334. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +5 -0
  335. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +5 -0
  336. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +5 -0
  337. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +5 -0
  338. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +5 -0
  339. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +5 -0
  340. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +5 -0
  341. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +5 -0
  342. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +5 -0
  343. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +5 -0
  344. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +5 -0
  345. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +5 -0
  346. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +5 -0
  347. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +5 -0
  348. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +5 -0
  349. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +5 -0
  350. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +5 -0
  351. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +5 -0
  352. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +5 -0
  353. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +5 -0
  354. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +5 -0
  355. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +5 -0
  356. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +5 -0
  357. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +5 -0
  358. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +5 -0
  359. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +5 -0
  360. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +5 -0
  361. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +5 -0
  362. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +5 -0
  363. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +5 -0
  364. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +5 -0
  365. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +5 -0
  366. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +5 -0
  367. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +5 -0
  368. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +5 -0
  369. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +5 -0
  370. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +5 -0
  371. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +5 -0
  372. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +5 -0
  373. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +5 -0
  374. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +5 -0
  375. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +5 -0
  376. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +5 -0
  377. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +5 -0
  378. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +5 -0
  379. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +5 -0
  380. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +5 -0
  381. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +5 -0
  382. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +5 -0
  383. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +5 -0
  384. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +5 -0
  385. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +5 -0
  386. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +5 -0
  387. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +5 -0
  388. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +5 -0
  389. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +5 -0
  390. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +5 -0
  391. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +5 -0
  392. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +5 -0
  393. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +5 -0
  394. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +5 -0
  395. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +5 -0
  396. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +5 -0
  397. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +5 -0
  398. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +5 -0
  399. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +5 -0
  400. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +5 -0
  401. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +5 -0
  402. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +5 -0
  403. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +5 -0
  404. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +5 -0
  405. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +5 -0
  406. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +5 -0
  407. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +5 -0
  408. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +5 -0
  409. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +5 -0
  410. data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +78 -0
  411. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu +5 -0
  412. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu +5 -0
  413. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu +5 -0
  414. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu +5 -0
  415. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu +5 -0
  416. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu +5 -0
  417. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu +5 -0
  418. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu +5 -0
  419. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu +5 -0
  420. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu +5 -0
  421. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu +5 -0
  422. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu +5 -0
  423. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu +5 -0
  424. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu +5 -0
  425. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu +5 -0
  426. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu +5 -0
  427. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu +5 -0
  428. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu +5 -0
  429. data/ext/sources/ggml/src/ggml-cuda/tsembd.cu +47 -0
  430. data/ext/sources/ggml/src/ggml-cuda/tsembd.cuh +5 -0
  431. data/ext/sources/ggml/src/ggml-cuda/unary.cu +289 -0
  432. data/ext/sources/ggml/src/ggml-cuda/unary.cuh +59 -0
  433. data/ext/sources/ggml/src/ggml-cuda/upscale.cu +51 -0
  434. data/ext/sources/ggml/src/ggml-cuda/upscale.cuh +5 -0
  435. data/ext/sources/ggml/src/ggml-cuda/vecdotq.cuh +1135 -0
  436. data/ext/{ggml → sources/ggml}/src/ggml-cuda/vendors/cuda.h +1 -0
  437. data/ext/{ggml → sources/ggml}/src/ggml-cuda/vendors/hip.h +57 -0
  438. data/ext/{ggml → sources/ggml}/src/ggml-cuda/vendors/musa.h +7 -1
  439. data/ext/sources/ggml/src/ggml-cuda/wkv.cu +199 -0
  440. data/ext/sources/ggml/src/ggml-cuda/wkv.cuh +7 -0
  441. data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +131 -0
  442. data/ext/{ggml → sources/ggml}/src/ggml-impl.h +64 -19
  443. data/ext/sources/ggml/src/ggml-kompute/CMakeLists.txt +166 -0
  444. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/common.comp +112 -0
  445. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +58 -0
  446. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +25 -0
  447. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +52 -0
  448. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +52 -0
  449. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +52 -0
  450. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +52 -0
  451. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +30 -0
  452. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +22 -0
  453. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +17 -0
  454. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +31 -0
  455. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +31 -0
  456. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +38 -0
  457. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +39 -0
  458. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +44 -0
  459. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +52 -0
  460. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +69 -0
  461. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +51 -0
  462. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +33 -0
  463. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +35 -0
  464. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +140 -0
  465. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +106 -0
  466. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +73 -0
  467. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +52 -0
  468. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +28 -0
  469. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +84 -0
  470. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +21 -0
  471. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +53 -0
  472. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +52 -0
  473. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +52 -0
  474. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +52 -0
  475. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +52 -0
  476. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +19 -0
  477. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +23 -0
  478. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +22 -0
  479. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +72 -0
  480. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +71 -0
  481. data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +120 -0
  482. data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +622 -0
  483. data/ext/{ggml → sources/ggml}/src/ggml-metal/ggml-metal.m +2178 -1064
  484. data/ext/{ggml → sources/ggml}/src/ggml-metal/ggml-metal.metal +1575 -1218
  485. data/ext/sources/ggml/src/ggml-musa/CMakeLists.txt +113 -0
  486. data/ext/sources/ggml/src/ggml-musa/mudnn.cu +112 -0
  487. data/ext/sources/ggml/src/ggml-musa/mudnn.cuh +12 -0
  488. data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +96 -0
  489. data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +5124 -0
  490. data/ext/sources/ggml/src/ggml-opencl/kernels/add.cl +83 -0
  491. data/ext/sources/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
  492. data/ext/sources/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
  493. data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +118 -0
  494. data/ext/sources/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
  495. data/ext/sources/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
  496. data/ext/sources/ggml/src/ggml-opencl/kernels/gelu.cl +62 -0
  497. data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
  498. data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
  499. data/ext/sources/ggml/src/ggml-opencl/kernels/get_rows.cl +163 -0
  500. data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
  501. data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
  502. data/ext/sources/ggml/src/ggml-opencl/kernels/mul.cl +79 -0
  503. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
  504. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
  505. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
  506. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
  507. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
  508. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
  509. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
  510. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
  511. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
  512. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
  513. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
  514. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q6_k.cl +190 -0
  515. data/ext/sources/ggml/src/ggml-opencl/kernels/norm.cl +81 -0
  516. data/ext/sources/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
  517. data/ext/sources/ggml/src/ggml-opencl/kernels/rms_norm.cl +96 -0
  518. data/ext/sources/ggml/src/ggml-opencl/kernels/rope.cl +721 -0
  519. data/ext/sources/ggml/src/ggml-opencl/kernels/scale.cl +16 -0
  520. data/ext/sources/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
  521. data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +87 -0
  522. data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +87 -0
  523. data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f16.cl +86 -0
  524. data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f32.cl +86 -0
  525. data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +84 -0
  526. data/ext/{ggml → sources/ggml}/src/ggml-opt.cpp +373 -190
  527. data/ext/{ggml → sources/ggml}/src/ggml-quants.c +114 -120
  528. data/ext/sources/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
  529. data/ext/{ggml → sources/ggml}/src/ggml-rpc/ggml-rpc.cpp +480 -73
  530. data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +189 -0
  531. data/ext/sources/ggml/src/ggml-sycl/backend.hpp +37 -0
  532. data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +345 -0
  533. data/ext/sources/ggml/src/ggml-sycl/binbcast.hpp +39 -0
  534. data/ext/{ggml → sources/ggml}/src/ggml-sycl/common.cpp +20 -32
  535. data/ext/sources/ggml/src/ggml-sycl/common.hpp +589 -0
  536. data/ext/{ggml → sources/ggml}/src/ggml-sycl/concat.cpp +32 -33
  537. data/ext/sources/ggml/src/ggml-sycl/concat.hpp +20 -0
  538. data/ext/{ggml → sources/ggml}/src/ggml-sycl/conv.cpp +4 -2
  539. data/ext/sources/ggml/src/ggml-sycl/conv.hpp +20 -0
  540. data/ext/{ggml → sources/ggml}/src/ggml-sycl/convert.cpp +104 -28
  541. data/ext/sources/ggml/src/ggml-sycl/convert.hpp +34 -0
  542. data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +700 -0
  543. data/ext/sources/ggml/src/ggml-sycl/cpy.hpp +11 -0
  544. data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +791 -0
  545. data/ext/{ggml → sources/ggml}/src/ggml-sycl/dmmv.cpp +156 -17
  546. data/ext/sources/ggml/src/ggml-sycl/dmmv.hpp +27 -0
  547. data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +2957 -0
  548. data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +1511 -0
  549. data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +75 -0
  550. data/ext/sources/ggml/src/ggml-sycl/gemm.hpp +99 -0
  551. data/ext/sources/ggml/src/ggml-sycl/getrows.cpp +309 -0
  552. data/ext/sources/ggml/src/ggml-sycl/getrows.hpp +20 -0
  553. data/ext/{ggml → sources/ggml}/src/ggml-sycl/ggml-sycl.cpp +1004 -1240
  554. data/ext/sources/ggml/src/ggml-sycl/gla.cpp +106 -0
  555. data/ext/sources/ggml/src/ggml-sycl/gla.hpp +8 -0
  556. data/ext/sources/ggml/src/ggml-sycl/im2col.cpp +136 -0
  557. data/ext/sources/ggml/src/ggml-sycl/im2col.hpp +21 -0
  558. data/ext/{ggml → sources/ggml}/src/ggml-sycl/mmq.cpp +0 -1
  559. data/ext/sources/ggml/src/ggml-sycl/mmq.hpp +33 -0
  560. data/ext/{ggml → sources/ggml}/src/ggml-sycl/mmvq.cpp +261 -166
  561. data/ext/sources/ggml/src/ggml-sycl/mmvq.hpp +27 -0
  562. data/ext/{ggml → sources/ggml}/src/ggml-sycl/norm.cpp +204 -81
  563. data/ext/sources/ggml/src/ggml-sycl/norm.hpp +26 -0
  564. data/ext/{ggml → sources/ggml}/src/ggml-sycl/outprod.cpp +8 -17
  565. data/ext/sources/ggml/src/ggml-sycl/outprod.hpp +10 -0
  566. data/ext/sources/ggml/src/ggml-sycl/presets.hpp +74 -0
  567. data/ext/sources/ggml/src/ggml-sycl/quants.hpp +83 -0
  568. data/ext/sources/ggml/src/ggml-sycl/rope.cpp +361 -0
  569. data/ext/sources/ggml/src/ggml-sycl/rope.hpp +20 -0
  570. data/ext/{ggml → sources/ggml}/src/ggml-sycl/softmax.cpp +35 -25
  571. data/ext/sources/ggml/src/ggml-sycl/softmax.hpp +20 -0
  572. data/ext/sources/ggml/src/ggml-sycl/sycl_hw.cpp +13 -0
  573. data/ext/sources/ggml/src/ggml-sycl/sycl_hw.hpp +23 -0
  574. data/ext/{ggml → sources/ggml}/src/ggml-sycl/tsembd.cpp +3 -3
  575. data/ext/sources/ggml/src/ggml-sycl/tsembd.hpp +20 -0
  576. data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +1215 -0
  577. data/ext/sources/ggml/src/ggml-sycl/wkv.cpp +293 -0
  578. data/ext/sources/ggml/src/ggml-sycl/wkv.hpp +10 -0
  579. data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +196 -0
  580. data/ext/sources/ggml/src/ggml-vulkan/cmake/host-toolchain.cmake.in +15 -0
  581. data/ext/{ggml → sources/ggml}/src/ggml-vulkan/ggml-vulkan.cpp +3130 -1087
  582. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +39 -0
  583. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +29 -0
  584. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +29 -0
  585. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +51 -0
  586. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +69 -0
  587. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +17 -0
  588. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +41 -0
  589. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +49 -0
  590. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +105 -0
  591. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +23 -0
  592. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +51 -0
  593. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +242 -0
  594. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +17 -0
  595. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +31 -0
  596. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +20 -0
  597. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp +462 -0
  598. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp +699 -0
  599. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_head.comp +13 -0
  600. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +42 -0
  601. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +35 -0
  602. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +44 -0
  603. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +43 -0
  604. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +48 -0
  605. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +39 -0
  606. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +49 -0
  607. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +32 -0
  608. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +34 -0
  609. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +34 -0
  610. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +42 -0
  611. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +30 -0
  612. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +32 -0
  613. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +68 -0
  614. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +34 -0
  615. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +35 -0
  616. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +70 -0
  617. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +33 -0
  618. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +31 -0
  619. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +34 -0
  620. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +27 -0
  621. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +337 -0
  622. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +162 -0
  623. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +360 -0
  624. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +267 -0
  625. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +59 -0
  626. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +25 -0
  627. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +23 -0
  628. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +64 -0
  629. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_head.comp +9 -0
  630. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_unary_head.comp +76 -0
  631. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +33 -0
  632. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +41 -0
  633. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +66 -0
  634. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +100 -0
  635. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +41 -0
  636. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +22 -0
  637. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +27 -0
  638. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_split_k_reduce.comp +48 -0
  639. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +169 -0
  640. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp +118 -0
  641. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +82 -0
  642. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +79 -0
  643. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +90 -0
  644. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +87 -0
  645. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +87 -0
  646. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +90 -0
  647. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +88 -0
  648. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +118 -0
  649. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +154 -0
  650. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +130 -0
  651. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +132 -0
  652. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +136 -0
  653. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +167 -0
  654. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +130 -0
  655. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +868 -0
  656. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +441 -0
  657. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +442 -0
  658. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +99 -0
  659. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +44 -0
  660. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +42 -0
  661. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +28 -0
  662. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +74 -0
  663. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +77 -0
  664. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +21 -0
  665. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +26 -0
  666. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +37 -0
  667. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +52 -0
  668. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +55 -0
  669. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +58 -0
  670. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +60 -0
  671. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +43 -0
  672. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +43 -0
  673. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +47 -0
  674. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +24 -0
  675. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +20 -0
  676. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +22 -0
  677. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +26 -0
  678. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +17 -0
  679. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +173 -0
  680. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +50 -0
  681. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +17 -0
  682. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +29 -0
  683. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +37 -0
  684. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +20 -0
  685. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_bfloat16_support.comp +7 -0
  686. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat2_support.comp +7 -0
  687. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat_support.comp +7 -0
  688. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_integer_dot_support.comp +7 -0
  689. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +41 -0
  690. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/types.comp +1373 -0
  691. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +36 -0
  692. data/ext/{ggml → sources/ggml}/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +193 -35
  693. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/wkv6.comp +87 -0
  694. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/wkv7.comp +91 -0
  695. data/ext/{ggml → sources/ggml}/src/ggml.c +676 -1820
  696. data/ext/sources/ggml/src/gguf.cpp +1330 -0
  697. data/ext/{include → sources/include}/whisper.h +68 -2
  698. data/ext/sources/src/CMakeLists.txt +143 -0
  699. data/ext/{src → sources/src}/coreml/whisper-decoder-impl.h +27 -15
  700. data/ext/{src → sources/src}/coreml/whisper-decoder-impl.m +35 -10
  701. data/ext/{src → sources/src}/coreml/whisper-encoder-impl.h +21 -9
  702. data/ext/{src → sources/src}/coreml/whisper-encoder-impl.m +28 -3
  703. data/ext/sources/src/coreml/whisper-encoder.mm +73 -0
  704. data/ext/sources/src/whisper-arch.h +197 -0
  705. data/ext/{src → sources/src}/whisper.cpp +1905 -374
  706. data/ext/sources/tests/CMakeLists.txt +105 -0
  707. data/ext/sources/tests/earnings21/eval.mk +58 -0
  708. data/ext/sources/tests/earnings21/eval.py +68 -0
  709. data/ext/sources/tests/earnings21/normalizers/__init__.py +2 -0
  710. data/ext/sources/tests/earnings21/normalizers/basic.py +80 -0
  711. data/ext/sources/tests/earnings21/normalizers/english.json +1741 -0
  712. data/ext/sources/tests/earnings21/normalizers/english.py +550 -0
  713. data/ext/sources/tests/earnings21/requirements.txt +6 -0
  714. data/ext/sources/tests/en-0-ref.txt +1 -0
  715. data/ext/sources/tests/en-1-ref.txt +1 -0
  716. data/ext/sources/tests/en-2-ref.txt +1 -0
  717. data/ext/sources/tests/es-0-ref.txt +1 -0
  718. data/ext/sources/tests/librispeech/eval.mk +39 -0
  719. data/ext/sources/tests/librispeech/eval.py +47 -0
  720. data/ext/sources/tests/librispeech/normalizers/__init__.py +2 -0
  721. data/ext/sources/tests/librispeech/normalizers/basic.py +80 -0
  722. data/ext/sources/tests/librispeech/normalizers/english.json +1741 -0
  723. data/ext/sources/tests/librispeech/normalizers/english.py +550 -0
  724. data/ext/sources/tests/librispeech/requirements.txt +6 -0
  725. data/ext/sources/tests/run-tests.sh +130 -0
  726. data/ext/sources/tests/test-c.c +3 -0
  727. data/ext/sources/tests/test-vad-full.cpp +54 -0
  728. data/ext/sources/tests/test-vad.cpp +83 -0
  729. data/ext/sources/tests/test-whisper.js +58 -0
  730. data/extsources.rb +33 -5
  731. data/lib/whisper/model/uri.rb +149 -128
  732. data/sig/whisper.rbs +480 -0
  733. data/tests/helper.rb +28 -0
  734. data/tests/test_callback.rb +45 -3
  735. data/tests/test_error.rb +2 -2
  736. data/tests/test_model.rb +38 -0
  737. data/tests/test_package.rb +18 -3
  738. data/tests/test_params.rb +145 -8
  739. data/tests/test_segment.rb +10 -19
  740. data/tests/test_vad.rb +19 -0
  741. data/tests/test_vad_params.rb +103 -0
  742. data/tests/test_whisper.rb +37 -37
  743. data/whispercpp.gemspec +5 -4
  744. metadata +766 -111
  745. data/ext/cpu.mk +0 -9
  746. data/ext/examples/dr_wav.h +0 -8815
  747. data/ext/ggml/src/ggml-cann/aclnn_ops.h +0 -592
  748. data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +0 -4262
  749. data/ext/ggml/src/ggml-cpu/ggml-cpu.c +0 -14123
  750. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.cpp +0 -1884
  751. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.h +0 -14
  752. data/ext/ggml/src/ggml-metal/ggml-metal-impl.h +0 -288
  753. data/ext/ggml/src/ggml-sycl/element_wise.cpp +0 -1030
  754. data/ext/ggml/src/ggml-sycl/im2col.cpp +0 -126
  755. data/ext/ggml/src/ggml-sycl/rope.cpp +0 -276
  756. data/ext/ggml/src/ggml-sycl/wkv6.cpp +0 -141
  757. data/ext/metal-embed.mk +0 -17
  758. data/ext/metal.mk +0 -6
  759. data/ext/ruby_whisper.cpp +0 -1909
  760. data/ext/scripts/get-flags.mk +0 -38
  761. data/lib/whisper.rb +0 -2
  762. /data/ext/{ggml → sources/ggml}/include/ggml-blas.h +0 -0
  763. /data/ext/{ggml → sources/ggml}/include/ggml-cann.h +0 -0
  764. /data/ext/{ggml → sources/ggml}/include/ggml-cuda.h +0 -0
  765. /data/ext/{ggml → sources/ggml}/include/ggml-kompute.h +0 -0
  766. /data/ext/{ggml → sources/ggml}/include/ggml-opencl.h +0 -0
  767. /data/ext/{ggml → sources/ggml}/include/ggml-sycl.h +0 -0
  768. /data/ext/{ggml → sources/ggml}/src/ggml-amx/common.h +0 -0
  769. /data/ext/{ggml → sources/ggml}/src/ggml-amx/ggml-amx.cpp +0 -0
  770. /data/ext/{ggml → sources/ggml}/src/ggml-amx/mmq.cpp +0 -0
  771. /data/ext/{ggml → sources/ggml}/src/ggml-amx/mmq.h +0 -0
  772. /data/ext/{ggml → sources/ggml}/src/ggml-blas/ggml-blas.cpp +0 -0
  773. /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/ascendc_kernels.h +0 -0
  774. /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/get_row_f16.cpp +0 -0
  775. /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/get_row_f32.cpp +0 -0
  776. /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -0
  777. /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -0
  778. /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -0
  779. /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -0
  780. /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -0
  781. /data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/amx.h +0 -0
  782. /data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/common.h +0 -0
  783. /data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/mmq.cpp +0 -0
  784. /data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/mmq.h +0 -0
  785. /data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu-aarch64.h +0 -0
  786. /data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu-hbm.cpp +0 -0
  787. /data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu-hbm.h +0 -0
  788. /data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu-quants.h +0 -0
  789. /data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu-traits.cpp +0 -0
  790. /data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu-traits.h +0 -0
  791. /data/ext/{ggml → sources/ggml}/src/ggml-kompute/ggml-kompute.cpp +0 -0
  792. /data/ext/{ggml → sources/ggml}/src/ggml-quants.h +0 -0
  793. /data/ext/{ggml → sources/ggml}/src/ggml-threading.cpp +0 -0
  794. /data/ext/{ggml → sources/ggml}/src/ggml-threading.h +0 -0
  795. /data/ext/{src → sources/src}/coreml/whisper-encoder.h +0 -0
  796. /data/ext/{src → sources/src}/openvino/whisper-openvino-encoder.cpp +0 -0
  797. /data/ext/{src → sources/src}/openvino/whisper-openvino-encoder.h +0 -0
@@ -0,0 +1,1229 @@
1
+ #include "llama-grammar.h"
2
+
3
+ #include "llama-impl.h"
4
+ #include "llama-vocab.h"
5
+ #include "llama-sampling.h"
6
+
7
+ #include <cmath>
8
+ #include <algorithm>
9
+ #include <stdexcept>
10
+
11
+ //
12
+ // helpers
13
+ //
14
+
15
+ // NOTE: assumes valid utf8 (but checks for overrun)
16
+ static std::pair<uint32_t, const char *> decode_utf8(const char * src) {
17
+ static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 };
18
+ uint8_t first_byte = static_cast<uint8_t>(*src);
19
+ uint8_t highbits = first_byte >> 4;
20
+ int len = lookup[highbits];
21
+ uint8_t mask = (1 << (8 - len)) - 1;
22
+ uint32_t value = first_byte & mask;
23
+ const char * end = src + len; // may overrun!
24
+ const char * pos = src + 1;
25
+ for ( ; pos < end && *pos; pos++) {
26
+ value = (value << 6) + (static_cast<uint8_t>(*pos) & 0x3F);
27
+ }
28
+ return std::make_pair(value, pos);
29
+ }
30
+
31
+ static std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
32
+ const std::string & src,
33
+ llama_partial_utf8 partial_start) {
34
+ static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 3, 4 };
35
+ const char * pos = src.c_str();
36
+ std::vector<uint32_t> code_points;
37
+
38
+ // common english strings have the same number of codepoints and bytes. `+ 1` for the terminating 0.
39
+ code_points.reserve(src.size() + 1);
40
+ uint32_t value = partial_start.value;
41
+ int n_remain = partial_start.n_remain;
42
+
43
+ // continue previous decode, if applicable
44
+ while (*pos != 0 && n_remain > 0) {
45
+ uint8_t next_byte = static_cast<uint8_t>(*pos);
46
+ if ((next_byte >> 6) != 2) {
47
+ // invalid sequence, abort
48
+ code_points.push_back(0);
49
+ return std::make_pair(std::move(code_points), llama_partial_utf8{ 0, -1 });
50
+ }
51
+ value = (value << 6) + (next_byte & 0x3F);
52
+ ++pos;
53
+ --n_remain;
54
+ }
55
+
56
+ if (partial_start.n_remain > 0 && n_remain == 0) {
57
+ code_points.push_back(value);
58
+ }
59
+
60
+ // decode any subsequent utf-8 sequences, which may end in an incomplete one
61
+ while (*pos != 0) {
62
+ uint8_t first_byte = static_cast<uint8_t>(*pos);
63
+ uint8_t highbits = first_byte >> 4;
64
+ n_remain = lookup[highbits] - 1;
65
+
66
+ if (n_remain < 0) {
67
+ // invalid sequence, abort
68
+ code_points.clear();
69
+ code_points.push_back(0);
70
+ return std::make_pair(std::move(code_points), llama_partial_utf8{ 0, n_remain });
71
+ }
72
+
73
+ uint8_t mask = (1 << (7 - n_remain)) - 1;
74
+ value = first_byte & mask;
75
+
76
+ ++pos;
77
+ while (*pos != 0 && n_remain > 0) {
78
+ value = (value << 6) + (static_cast<uint8_t>(*pos) & 0x3F);
79
+ ++pos;
80
+ --n_remain;
81
+ }
82
+ if (n_remain == 0) {
83
+ code_points.push_back(value);
84
+ }
85
+ }
86
+ code_points.push_back(0);
87
+
88
+ return std::make_pair(std::move(code_points), llama_partial_utf8{ value, n_remain });
89
+ }
90
+
91
+ static bool is_digit_char(char c) {
92
+ return '0' <= c && c <= '9';
93
+ }
94
+
95
+ static bool is_word_char(char c) {
96
+ return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '-' || is_digit_char(c);
97
+ }
98
+
99
+ static std::pair<uint32_t, const char *> parse_hex(const char * src, int size) {
100
+ const char * pos = src;
101
+ const char * end = src + size;
102
+ uint32_t value = 0;
103
+ for ( ; pos < end && *pos; pos++) {
104
+ value <<= 4;
105
+ char c = *pos;
106
+ if ('a' <= c && c <= 'f') {
107
+ value += c - 'a' + 10;
108
+ } else if ('A' <= c && c <= 'F') {
109
+ value += c - 'A' + 10;
110
+ } else if ('0' <= c && c <= '9') {
111
+ value += c - '0';
112
+ } else {
113
+ break;
114
+ }
115
+ }
116
+ if (pos != end) {
117
+ throw std::runtime_error("expecting " + std::to_string(size) + " hex chars at " + src);
118
+ }
119
+ return std::make_pair(value, pos);
120
+ }
121
+
122
+ static const char * parse_space(const char * src, bool newline_ok) {
123
+ const char * pos = src;
124
+ while (*pos == ' ' || *pos == '\t' || *pos == '#' ||
125
+ (newline_ok && (*pos == '\r' || *pos == '\n'))) {
126
+ if (*pos == '#') {
127
+ while (*pos && *pos != '\r' && *pos != '\n') {
128
+ pos++;
129
+ }
130
+ } else {
131
+ pos++;
132
+ }
133
+ }
134
+ return pos;
135
+ }
136
+
137
+ static const char * parse_name(const char * src) {
138
+ const char * pos = src;
139
+ while (is_word_char(*pos)) {
140
+ pos++;
141
+ }
142
+ if (pos == src) {
143
+ throw std::runtime_error(std::string("expecting name at ") + src);
144
+ }
145
+ return pos;
146
+ }
147
+
148
+ static const char * parse_int(const char * src) {
149
+ const char * pos = src;
150
+ while (is_digit_char(*pos)) {
151
+ pos++;
152
+ }
153
+ if (pos == src) {
154
+ throw std::runtime_error(std::string("expecting integer at ") + src);
155
+ }
156
+ return pos;
157
+ }
158
+
159
+ static std::pair<uint32_t, const char *> parse_char(const char * src) {
160
+ if (*src == '\\') {
161
+ switch (src[1]) {
162
+ case 'x': return parse_hex(src + 2, 2);
163
+ case 'u': return parse_hex(src + 2, 4);
164
+ case 'U': return parse_hex(src + 2, 8);
165
+ case 't': return std::make_pair('\t', src + 2);
166
+ case 'r': return std::make_pair('\r', src + 2);
167
+ case 'n': return std::make_pair('\n', src + 2);
168
+ case '\\':
169
+ case '"':
170
+ case '[':
171
+ case ']':
172
+ return std::make_pair(src[1], src + 2);
173
+ default:
174
+ throw std::runtime_error(std::string("unknown escape at ") + src);
175
+ }
176
+ } else if (*src) {
177
+ return decode_utf8(src);
178
+ }
179
+ throw std::runtime_error("unexpected end of input");
180
+ }
181
+
182
+ static void print_grammar_char(FILE * file, uint32_t c) {
183
+ if (0x20 <= c && c <= 0x7f) {
184
+ fprintf(file, "%c", static_cast<char>(c));
185
+ } else {
186
+ // cop out of encoding UTF-8
187
+ fprintf(file, "<U+%04X>", c);
188
+ }
189
+ }
190
+
191
+ static bool is_char_element(llama_grammar_element elem) {
192
+ switch (elem.type) {
193
+ case LLAMA_GRETYPE_CHAR: return true;
194
+ case LLAMA_GRETYPE_CHAR_NOT: return true;
195
+ case LLAMA_GRETYPE_CHAR_ALT: return true;
196
+ case LLAMA_GRETYPE_CHAR_RNG_UPPER: return true;
197
+ case LLAMA_GRETYPE_CHAR_ANY: return true;
198
+ default: return false;
199
+ }
200
+ }
201
+
202
+ static void print_rule_binary(FILE * file, const llama_grammar_rule & rule) {
203
+ for (auto elem : rule) {
204
+ switch (elem.type) {
205
+ case LLAMA_GRETYPE_END: fprintf(file, "END"); break;
206
+ case LLAMA_GRETYPE_ALT: fprintf(file, "ALT"); break;
207
+ case LLAMA_GRETYPE_RULE_REF: fprintf(file, "RULE_REF"); break;
208
+ case LLAMA_GRETYPE_CHAR: fprintf(file, "CHAR"); break;
209
+ case LLAMA_GRETYPE_CHAR_NOT: fprintf(file, "CHAR_NOT"); break;
210
+ case LLAMA_GRETYPE_CHAR_RNG_UPPER: fprintf(file, "CHAR_RNG_UPPER"); break;
211
+ case LLAMA_GRETYPE_CHAR_ALT: fprintf(file, "CHAR_ALT"); break;
212
+ case LLAMA_GRETYPE_CHAR_ANY: fprintf(file, "CHAR_ANY"); break;
213
+ }
214
+ switch (elem.type) {
215
+ case LLAMA_GRETYPE_END:
216
+ case LLAMA_GRETYPE_ALT:
217
+ case LLAMA_GRETYPE_RULE_REF:
218
+ fprintf(file, "(%u) ", elem.value);
219
+ break;
220
+ case LLAMA_GRETYPE_CHAR:
221
+ case LLAMA_GRETYPE_CHAR_NOT:
222
+ case LLAMA_GRETYPE_CHAR_RNG_UPPER:
223
+ case LLAMA_GRETYPE_CHAR_ALT:
224
+ case LLAMA_GRETYPE_CHAR_ANY:
225
+ fprintf(file, "(\"");
226
+ print_grammar_char(file, elem.value);
227
+ fprintf(file, "\") ");
228
+ break;
229
+ }
230
+ }
231
+ fprintf(file, "\n");
232
+ }
233
+
234
+ static void print_rule(
235
+ FILE * file,
236
+ uint32_t rule_id,
237
+ const llama_grammar_rule & rule,
238
+ const std::map<uint32_t, std::string> & symbol_id_names) {
239
+ if (rule.empty() || rule.back().type != LLAMA_GRETYPE_END) {
240
+ throw std::runtime_error(
241
+ "malformed rule, does not end with LLAMA_GRETYPE_END: " + std::to_string(rule_id));
242
+ }
243
+ fprintf(file, "%s ::= ", symbol_id_names.at(rule_id).c_str());
244
+ for (size_t i = 0, end = rule.size() - 1; i < end; i++) {
245
+ llama_grammar_element elem = rule[i];
246
+ switch (elem.type) {
247
+ case LLAMA_GRETYPE_END:
248
+ throw std::runtime_error(
249
+ "unexpected end of rule: " + std::to_string(rule_id) + "," +
250
+ std::to_string(i));
251
+ case LLAMA_GRETYPE_ALT:
252
+ fprintf(file, "| ");
253
+ break;
254
+ case LLAMA_GRETYPE_RULE_REF:
255
+ fprintf(file, "%s ", symbol_id_names.at(elem.value).c_str());
256
+ break;
257
+ case LLAMA_GRETYPE_CHAR:
258
+ fprintf(file, "[");
259
+ print_grammar_char(file, elem.value);
260
+ break;
261
+ case LLAMA_GRETYPE_CHAR_NOT:
262
+ fprintf(file, "[^");
263
+ print_grammar_char(file, elem.value);
264
+ break;
265
+ case LLAMA_GRETYPE_CHAR_RNG_UPPER:
266
+ if (i == 0 || !is_char_element(rule[i - 1])) {
267
+ throw std::runtime_error(
268
+ "LLAMA_GRETYPE_CHAR_RNG_UPPER without preceding char: " +
269
+ std::to_string(rule_id) + "," + std::to_string(i));
270
+ }
271
+ fprintf(file, "-");
272
+ print_grammar_char(file, elem.value);
273
+ break;
274
+ case LLAMA_GRETYPE_CHAR_ALT:
275
+ if (i == 0 || !is_char_element(rule[i - 1])) {
276
+ throw std::runtime_error(
277
+ "LLAMA_GRETYPE_CHAR_ALT without preceding char: " +
278
+ std::to_string(rule_id) + "," + std::to_string(i));
279
+ }
280
+ print_grammar_char(file, elem.value);
281
+ break;
282
+ case LLAMA_GRETYPE_CHAR_ANY:
283
+ fprintf(file, ".");
284
+ break;
285
+ }
286
+ if (is_char_element(elem)) {
287
+ switch (rule[i + 1].type) {
288
+ case LLAMA_GRETYPE_CHAR_ALT:
289
+ case LLAMA_GRETYPE_CHAR_RNG_UPPER:
290
+ case LLAMA_GRETYPE_CHAR_ANY:
291
+ break;
292
+ default:
293
+ fprintf(file, "] ");
294
+ }
295
+ }
296
+ }
297
+ fprintf(file, "\n");
298
+ }
299
+
300
+ //
301
+ // implementation
302
+ //
303
+
304
+ uint32_t llama_grammar_parser::get_symbol_id(const char * src, size_t len) {
305
+ uint32_t next_id = static_cast<uint32_t>(symbol_ids.size());
306
+ auto result = symbol_ids.emplace(std::string(src, len), next_id);
307
+ return result.first->second;
308
+ }
309
+
310
+ uint32_t llama_grammar_parser::generate_symbol_id(const std::string & base_name) {
311
+ uint32_t next_id = static_cast<uint32_t>(symbol_ids.size());
312
+ symbol_ids[base_name + '_' + std::to_string(next_id)] = next_id;
313
+ return next_id;
314
+ }
315
+
316
+ void llama_grammar_parser::add_rule(uint32_t rule_id, const llama_grammar_rule & rule) {
317
+ if (rules.size() <= rule_id) {
318
+ rules.resize(rule_id + 1);
319
+ }
320
+ rules[rule_id] = rule;
321
+ }
322
+
323
+ const char * llama_grammar_parser::parse_alternates(
324
+ const char * src,
325
+ const std::string & rule_name,
326
+ uint32_t rule_id,
327
+ bool is_nested) {
328
+ llama_grammar_rule rule;
329
+ const char * pos = parse_sequence(src, rule_name, rule, is_nested);
330
+ while (*pos == '|') {
331
+ rule.push_back({LLAMA_GRETYPE_ALT, 0});
332
+ pos = parse_space(pos + 1, true);
333
+ pos = parse_sequence(pos, rule_name, rule, is_nested);
334
+ }
335
+ rule.push_back({LLAMA_GRETYPE_END, 0});
336
+ add_rule(rule_id, rule);
337
+ return pos;
338
+ }
339
+
340
+ const char * llama_grammar_parser::parse_sequence(
341
+ const char * src,
342
+ const std::string & rule_name,
343
+ llama_grammar_rule & rule,
344
+ bool is_nested) {
345
+ size_t last_sym_start = rule.size();
346
+ const char * pos = src;
347
+
348
+ auto handle_repetitions = [&](int min_times, int max_times) {
349
+
350
+ if (last_sym_start == rule.size()) {
351
+ throw std::runtime_error(std::string("expecting preceding item to */+/?/{ at ") + pos);
352
+ }
353
+
354
+ // apply transformation to previous symbol (last_sym_start to end) according to
355
+ // the following rewrite rules:
356
+ // S{m,n} --> S S S (m times) S'(n-m)
357
+ // S'(x) ::= S S'(x-1) |
358
+ // (... n-m definitions of these S' rules ...)
359
+ // S'(1) ::= S |
360
+ // S{m,} --> S S S (m times) S'
361
+ // S' ::= S S' |
362
+ // S* --> S{0,}
363
+ // --> S' ::= S S' |
364
+ // S+ --> S{1,}
365
+ // --> S S'
366
+ // S' ::= S S' |
367
+ // S? --> S{0,1}
368
+ // --> S'
369
+ // S' ::= S |
370
+
371
+ llama_grammar_rule prev_rule(rule.begin() + last_sym_start, rule.end());
372
+ if (min_times == 0) {
373
+ rule.resize(last_sym_start);
374
+ } else {
375
+ // Repeat the previous elements (min_times - 1) times
376
+ for (int i = 1; i < min_times; i++) {
377
+ rule.insert(rule.end(), prev_rule.begin(), prev_rule.end());
378
+ }
379
+ }
380
+
381
+ uint32_t last_rec_rule_id = 0;
382
+ auto n_opt = max_times < 0 ? 1 : max_times - min_times;
383
+
384
+ llama_grammar_rule rec_rule(prev_rule);
385
+ for (int i = 0; i < n_opt; i++) {
386
+ rec_rule.resize(prev_rule.size());
387
+ uint32_t rec_rule_id = generate_symbol_id( rule_name);
388
+ if (i > 0 || max_times < 0) {
389
+ rec_rule.push_back({LLAMA_GRETYPE_RULE_REF, max_times < 0 ? rec_rule_id : last_rec_rule_id});
390
+ }
391
+ rec_rule.push_back({LLAMA_GRETYPE_ALT, 0});
392
+ rec_rule.push_back({LLAMA_GRETYPE_END, 0});
393
+ add_rule( rec_rule_id, rec_rule);
394
+ last_rec_rule_id = rec_rule_id;
395
+ }
396
+ if (n_opt > 0) {
397
+ rule.push_back({LLAMA_GRETYPE_RULE_REF, last_rec_rule_id});
398
+ }
399
+ };
400
+
401
+ while (*pos) {
402
+ if (*pos == '"') { // literal string
403
+ pos++;
404
+ last_sym_start = rule.size();
405
+ while (*pos != '"') {
406
+ if (!*pos) {
407
+ throw std::runtime_error("unexpected end of input");
408
+ }
409
+ auto char_pair = parse_char(pos);
410
+ pos = char_pair.second;
411
+ rule.push_back({LLAMA_GRETYPE_CHAR, char_pair.first});
412
+ }
413
+ pos = parse_space(pos + 1, is_nested);
414
+ } else if (*pos == '[') { // char range(s)
415
+ pos++;
416
+ enum llama_gretype start_type = LLAMA_GRETYPE_CHAR;
417
+ if (*pos == '^') {
418
+ pos++;
419
+ start_type = LLAMA_GRETYPE_CHAR_NOT;
420
+ }
421
+ last_sym_start = rule.size();
422
+ while (*pos != ']') {
423
+ if (!*pos) {
424
+ throw std::runtime_error("unexpected end of input");
425
+ }
426
+ auto char_pair = parse_char(pos);
427
+ pos = char_pair.second;
428
+ enum llama_gretype type = last_sym_start < rule.size()
429
+ ? LLAMA_GRETYPE_CHAR_ALT
430
+ : start_type;
431
+
432
+ rule.push_back({type, char_pair.first});
433
+ if (pos[0] == '-' && pos[1] != ']') {
434
+ if (!pos[1]) {
435
+ throw std::runtime_error("unexpected end of input");
436
+ }
437
+ auto endchar_pair = parse_char(pos + 1);
438
+ pos = endchar_pair.second;
439
+ rule.push_back({LLAMA_GRETYPE_CHAR_RNG_UPPER, endchar_pair.first});
440
+ }
441
+ }
442
+ pos = parse_space(pos + 1, is_nested);
443
+ } else if (is_word_char(*pos)) { // rule reference
444
+ const char * name_end = parse_name(pos);
445
+ uint32_t ref_rule_id = get_symbol_id(pos, name_end - pos);
446
+ pos = parse_space(name_end, is_nested);
447
+ last_sym_start = rule.size();
448
+ rule.push_back({LLAMA_GRETYPE_RULE_REF, ref_rule_id});
449
+ } else if (*pos == '(') { // grouping
450
+ // parse nested alternates into synthesized rule
451
+ pos = parse_space(pos + 1, true);
452
+ uint32_t sub_rule_id = generate_symbol_id(rule_name);
453
+ pos = parse_alternates(pos, rule_name, sub_rule_id, true);
454
+ last_sym_start = rule.size();
455
+ // output reference to synthesized rule
456
+ rule.push_back({LLAMA_GRETYPE_RULE_REF, sub_rule_id});
457
+ if (*pos != ')') {
458
+ throw std::runtime_error(std::string("expecting ')' at ") + pos);
459
+ }
460
+ pos = parse_space(pos + 1, is_nested);
461
+ } else if (*pos == '.') { // any char
462
+ last_sym_start = rule.size();
463
+ rule.push_back({LLAMA_GRETYPE_CHAR_ANY, 0});
464
+ pos = parse_space(pos + 1, is_nested);
465
+ } else if (*pos == '*') {
466
+ pos = parse_space(pos + 1, is_nested);
467
+ handle_repetitions(0, -1);
468
+ } else if (*pos == '+') {
469
+ pos = parse_space(pos + 1, is_nested);
470
+ handle_repetitions(1, -1);
471
+ } else if (*pos == '?') {
472
+ pos = parse_space(pos + 1, is_nested);
473
+ handle_repetitions(0, 1);
474
+ } else if (*pos == '{') {
475
+ pos = parse_space(pos + 1, is_nested);
476
+
477
+ if (!is_digit_char(*pos)) {
478
+ throw std::runtime_error(std::string("expecting an int at ") + pos);
479
+ }
480
+ const char * int_end = parse_int(pos);
481
+ int min_times = std::stoul(std::string(pos, int_end - pos));
482
+ pos = parse_space(int_end, is_nested);
483
+
484
+ int max_times = -1;
485
+
486
+ if (*pos == '}') {
487
+ max_times = min_times;
488
+ pos = parse_space(pos + 1, is_nested);
489
+ } else if (*pos == ',') {
490
+ pos = parse_space(pos + 1, is_nested);
491
+
492
+ if (is_digit_char(*pos)) {
493
+ const char * int_end = parse_int(pos);
494
+ max_times = std::stoul(std::string(pos, int_end - pos));
495
+ pos = parse_space(int_end, is_nested);
496
+ }
497
+
498
+ if (*pos != '}') {
499
+ throw std::runtime_error(std::string("expecting '}' at ") + pos);
500
+ }
501
+ pos = parse_space(pos + 1, is_nested);
502
+ } else {
503
+ throw std::runtime_error(std::string("expecting ',' at ") + pos);
504
+ }
505
+ handle_repetitions(min_times, max_times);
506
+ } else {
507
+ break;
508
+ }
509
+ }
510
+ return pos;
511
+ }
512
+
513
+ const char * llama_grammar_parser::parse_rule(const char * src) {
514
+ const char * name_end = parse_name(src);
515
+ const char * pos = parse_space(name_end, false);
516
+ size_t name_len = name_end - src;
517
+ uint32_t rule_id = get_symbol_id(src, name_len);
518
+ const std::string name(src, name_len);
519
+
520
+ if (!(pos[0] == ':' && pos[1] == ':' && pos[2] == '=')) {
521
+ throw std::runtime_error(std::string("expecting ::= at ") + pos);
522
+ }
523
+ pos = parse_space(pos + 3, true);
524
+
525
+ pos = parse_alternates(pos, name, rule_id, false);
526
+
527
+ if (*pos == '\r') {
528
+ pos += pos[1] == '\n' ? 2 : 1;
529
+ } else if (*pos == '\n') {
530
+ pos++;
531
+ } else if (*pos) {
532
+ throw std::runtime_error(std::string("expecting newline or end at ") + pos);
533
+ }
534
+ return parse_space(pos, true);
535
+ }
536
+
537
+ bool llama_grammar_parser::parse(const char * src) {
538
+ try {
539
+ const char * pos = parse_space(src, true);
540
+ while (*pos) {
541
+ pos = parse_rule(pos);
542
+ }
543
+ // Validate the state to ensure that all rules are defined
544
+ for (const auto & rule : rules) {
545
+ if (rule.empty()) {
546
+ throw std::runtime_error("Undefined rule");
547
+ }
548
+ for (const auto & elem : rule) {
549
+ if (elem.type == LLAMA_GRETYPE_RULE_REF) {
550
+ // Ensure that the rule at that location exists
551
+ if (elem.value >= rules.size() || rules[elem.value].empty()) {
552
+ // Get the name of the rule that is missing
553
+ for (const auto & kv : symbol_ids) {
554
+ if (kv.second == elem.value) {
555
+ throw std::runtime_error("Undefined rule identifier '" + kv.first + "'");
556
+ }
557
+ }
558
+ }
559
+ }
560
+ }
561
+ }
562
+ } catch (const std::exception & err) {
563
+ fprintf(stderr, "%s: error parsing grammar: %s\n\n%s\n", __func__, err.what(), src);
564
+ rules.clear();
565
+ return false;
566
+ }
567
+
568
+ return true;
569
+ }
570
+
571
+ void llama_grammar_parser::print(FILE * file) {
572
+ try {
573
+ std::map<uint32_t, std::string> symbol_id_names;
574
+ for (const auto & kv : symbol_ids) {
575
+ symbol_id_names[kv.second] = kv.first;
576
+ }
577
+ for (size_t i = 0, end = rules.size(); i < end; i++) {
578
+ // fprintf(file, "%zu: ", i);
579
+ // print_rule_binary(file, rules[i]);
580
+ print_rule(file, uint32_t(i), rules[i], symbol_id_names);
581
+ // fprintf(file, "\n");
582
+ }
583
+ } catch (const std::exception & err) {
584
+ fprintf(stderr, "\n%s: error printing grammar: %s\n", __func__, err.what());
585
+ }
586
+ }
587
+
588
+ llama_grammar_stack llama_grammar_parser::c_rules() const {
589
+ llama_grammar_stack ret;
590
+ ret.reserve(rules.size());
591
+ for (const auto & rule : rules) {
592
+ ret.push_back(rule.data());
593
+ }
594
+ return ret;
595
+ }
596
+
597
+ // returns true iff pos points to the end of one of the definitions of a rule
598
+ static bool llama_grammar_is_end_of_sequence(const llama_grammar_element * pos) {
599
+ switch (pos->type) {
600
+ case LLAMA_GRETYPE_END: return true; // NOLINT
601
+ case LLAMA_GRETYPE_ALT: return true; // NOLINT
602
+ default: return false;
603
+ }
604
+ }
605
+
606
+ // returns true iff chr satisfies the char range at pos (regular or inverse range)
607
+ // asserts that pos is pointing to a char range element
608
+ static std::pair<bool, const llama_grammar_element *> llama_grammar_match_char(
609
+ const llama_grammar_element * pos,
610
+ const uint32_t chr) {
611
+ bool found = false;
612
+ bool is_positive_char = pos->type == LLAMA_GRETYPE_CHAR || pos->type == LLAMA_GRETYPE_CHAR_ANY;
613
+
614
+ GGML_ASSERT(is_positive_char || pos->type == LLAMA_GRETYPE_CHAR_NOT); // NOLINT
615
+
616
+ do {
617
+ if (pos[1].type == LLAMA_GRETYPE_CHAR_RNG_UPPER) {
618
+ // inclusive range, e.g. [a-z]
619
+ found = found || (pos->value <= chr && chr <= pos[1].value);
620
+ pos += 2;
621
+ } else if (pos->type == LLAMA_GRETYPE_CHAR_ANY) {
622
+ // Any character matches "."
623
+ found = true;
624
+ pos += 1;
625
+ } else {
626
+ // exact char match, e.g. [a] or "a"
627
+ found = found || pos->value == chr;
628
+ pos += 1;
629
+ }
630
+ } while (pos->type == LLAMA_GRETYPE_CHAR_ALT);
631
+
632
+ return std::make_pair(found == is_positive_char, pos);
633
+ }
634
+
635
+ // returns true iff some continuation of the given partial UTF-8 sequence could satisfy the char
636
+ // range at pos (regular or inverse range)
637
+ // asserts that pos is pointing to a char range element
638
+ static bool llama_grammar_match_partial_char(
639
+ const llama_grammar_element * pos,
640
+ const llama_partial_utf8 partial_utf8) {
641
+ bool is_positive_char = pos->type == LLAMA_GRETYPE_CHAR || pos->type == LLAMA_GRETYPE_CHAR_ANY;
642
+ GGML_ASSERT(is_positive_char || pos->type == LLAMA_GRETYPE_CHAR_NOT);
643
+
644
+ uint32_t partial_value = partial_utf8.value;
645
+ int n_remain = partial_utf8.n_remain;
646
+
647
+ // invalid sequence or 7-bit char split across 2 bytes (overlong)
648
+ if (n_remain < 0 || (n_remain == 1 && partial_value < 2)) {
649
+ return false;
650
+ }
651
+
652
+ // range of possible code points this partial UTF-8 sequence could complete to
653
+ uint32_t low = partial_value << (n_remain * 6);
654
+ uint32_t high = low | ((1 << (n_remain * 6)) - 1);
655
+
656
+ if (low == 0) {
657
+ if (n_remain == 2) {
658
+ low = 1 << 11;
659
+ } else if (n_remain == 3) {
660
+ low = 1 << 16;
661
+ }
662
+ }
663
+
664
+ do {
665
+ if (pos[1].type == LLAMA_GRETYPE_CHAR_RNG_UPPER) {
666
+ // inclusive range, e.g. [a-z]
667
+ if (pos->value <= high && low <= pos[1].value) {
668
+ return is_positive_char;
669
+ }
670
+ pos += 2;
671
+ } else if (pos->type == LLAMA_GRETYPE_CHAR_ANY) {
672
+ // Any character matches "."
673
+ return true;
674
+ } else {
675
+ // exact char match, e.g. [a] or "a"
676
+ if (low <= pos->value && pos->value <= high) {
677
+ return is_positive_char;
678
+ }
679
+ pos += 1;
680
+ }
681
+ } while (pos->type == LLAMA_GRETYPE_CHAR_ALT);
682
+
683
+ return !is_positive_char;
684
+ }
685
+
686
+ // transforms a grammar pushdown stack into N possible stacks, all ending
687
+ // at a character range (terminal element)
688
+ static void llama_grammar_advance_stack(
689
+ const llama_grammar_rules & rules,
690
+ const llama_grammar_stack & stack,
691
+ llama_grammar_stacks & new_stacks) {
692
+ if (stack.empty()) {
693
+ if (std::find(new_stacks.begin(), new_stacks.end(), stack) == new_stacks.end()) {
694
+ new_stacks.emplace_back(stack);
695
+ }
696
+ return;
697
+ }
698
+
699
+ const llama_grammar_element * pos = stack.back();
700
+
701
+ switch (pos->type) {
702
+ case LLAMA_GRETYPE_RULE_REF: {
703
+ const size_t rule_id = static_cast<size_t>(pos->value);
704
+ const llama_grammar_element * subpos = rules[rule_id].data();
705
+ do {
706
+ // init new stack without the top (pos)
707
+ llama_grammar_stack new_stack(stack.begin(), stack.end() - 1);
708
+ if (!llama_grammar_is_end_of_sequence(pos + 1)) {
709
+ // if this rule ref is followed by another element, add that to stack
710
+ new_stack.push_back(pos + 1);
711
+ }
712
+ if (!llama_grammar_is_end_of_sequence(subpos)) {
713
+ // if alternate is nonempty, add to stack
714
+ new_stack.push_back(subpos);
715
+ }
716
+ llama_grammar_advance_stack(rules, new_stack, new_stacks);
717
+ while (!llama_grammar_is_end_of_sequence(subpos)) {
718
+ // scan to end of alternate def
719
+ subpos++;
720
+ }
721
+ if (subpos->type == LLAMA_GRETYPE_ALT) {
722
+ // there's another alternate def of this rule to process
723
+ subpos++;
724
+ } else {
725
+ break;
726
+ }
727
+ } while (true);
728
+ break;
729
+ }
730
+ case LLAMA_GRETYPE_CHAR:
731
+ case LLAMA_GRETYPE_CHAR_NOT:
732
+ case LLAMA_GRETYPE_CHAR_ANY:
733
+ if (std::find(new_stacks.begin(), new_stacks.end(), stack) == new_stacks.end()) {
734
+ // only add the stack if it's not a duplicate of one we already have
735
+ new_stacks.emplace_back(stack);
736
+ }
737
+ break;
738
+ default:
739
+ // end of alternate (LLAMA_GRETYPE_END, LLAMA_GRETYPE_ALT) or middle of char range
740
+ // (LLAMA_GRETYPE_CHAR_ALT, LLAMA_GRETYPE_CHAR_RNG_UPPER); stack should never be left on
741
+ // those
742
+ GGML_ABORT("fatal error");
743
+ }
744
+ }
745
+
746
+ static llama_grammar_candidates llama_grammar_reject_candidates(
747
+ const llama_grammar_rules & rules,
748
+ const llama_grammar_stacks & stacks,
749
+ const llama_grammar_candidates & candidates) {
750
+ GGML_ASSERT(!stacks.empty()); // REVIEW
751
+
752
+ if (candidates.empty()) {
753
+ return {};
754
+ }
755
+
756
+ auto rejects = llama_grammar_reject_candidates_for_stack(rules, stacks.front(), candidates);
757
+
758
+ for (size_t i = 1, size = stacks.size(); i < size; ++i) {
759
+ rejects = llama_grammar_reject_candidates_for_stack(rules, stacks[i], rejects);
760
+ }
761
+
762
+ return rejects;
763
+ }
764
+
765
+ static bool llama_grammar_detect_left_recursion(
766
+ const llama_grammar_rules & rules,
767
+ size_t rule_index,
768
+ std::vector<bool> * rules_visited,
769
+ std::vector<bool> * rules_in_progress,
770
+ std::vector<bool> * rules_may_be_empty) {
771
+ if ((*rules_in_progress)[rule_index]) {
772
+ return true;
773
+ }
774
+
775
+ (*rules_in_progress)[rule_index] = true;
776
+
777
+ const llama_grammar_rule & rule = rules[rule_index];
778
+
779
+ // First check if the rule might produce the empty string. This could be done combined with the second
780
+ // step but it's more readable as two steps.
781
+ bool at_rule_start = true;
782
+ for (size_t i = 0; i < rule.size(); i++) {
783
+ if (llama_grammar_is_end_of_sequence(&rule[i])) {
784
+ if (at_rule_start) {
785
+ (*rules_may_be_empty)[rule_index] = true;
786
+ break;
787
+ }
788
+ at_rule_start = true;
789
+ } else {
790
+ at_rule_start = false;
791
+ }
792
+ }
793
+
794
+ // Second, recurse into leftmost nonterminals (or next-leftmost as long as the previous nonterminal may
795
+ // be empty)
796
+ bool recurse_into_nonterminal = true;
797
+ for (size_t i = 0; i < rule.size(); i++) {
798
+ if (rule[i].type == LLAMA_GRETYPE_RULE_REF && recurse_into_nonterminal) {
799
+ if (llama_grammar_detect_left_recursion(rules, (size_t)rule[i].value, rules_visited, rules_in_progress, rules_may_be_empty)) {
800
+ return true;
801
+ }
802
+ if (!((*rules_may_be_empty)[(size_t)rule[i].value])) {
803
+ recurse_into_nonterminal = false;
804
+ }
805
+ } else if (llama_grammar_is_end_of_sequence(&rule[i])) {
806
+ recurse_into_nonterminal = true;
807
+ } else {
808
+ recurse_into_nonterminal = false;
809
+ }
810
+ }
811
+
812
+ (*rules_in_progress)[rule_index] = false;
813
+ (*rules_visited)[rule_index] = true;
814
+
815
+ return false;
816
+ }
817
+
818
+ const llama_grammar_rules & llama_grammar_get_rules(const struct llama_grammar * grammar) {
819
+ return grammar->rules;
820
+ }
821
+
822
+ llama_grammar_stacks & llama_grammar_get_stacks(struct llama_grammar * grammar) {
823
+ return grammar->stacks;
824
+ }
825
+
826
+ void llama_grammar_accept(struct llama_grammar * grammar, uint32_t chr) {
827
+ llama_grammar_stacks stacks_new;
828
+ stacks_new.reserve(grammar->stacks.size());
829
+
830
+ for (const auto & stack : grammar->stacks) {
831
+ if (stack.empty()) {
832
+ continue;
833
+ }
834
+
835
+ auto match = llama_grammar_match_char(stack.back(), chr);
836
+ if (match.first) {
837
+ const llama_grammar_element * pos = match.second;
838
+
839
+ // update top of stack to next element, if any
840
+ llama_grammar_stack new_stack(stack.begin(), stack.end() - 1);
841
+ if (!llama_grammar_is_end_of_sequence(pos)) {
842
+ new_stack.push_back(pos);
843
+ }
844
+ llama_grammar_advance_stack(grammar->rules, new_stack, stacks_new);
845
+ }
846
+ }
847
+
848
+ grammar->stacks = std::move(stacks_new);
849
+ }
850
+
851
+ llama_grammar_candidates llama_grammar_reject_candidates_for_stack(
852
+ const llama_grammar_rules & rules,
853
+ const llama_grammar_stack & stack,
854
+ const llama_grammar_candidates & candidates) {
855
+
856
+ llama_grammar_candidates rejects;
857
+ rejects.reserve(candidates.size());
858
+
859
+ if (stack.empty()) {
860
+ for (const auto & tok : candidates) {
861
+ if (*tok.code_points != 0 || tok.partial_utf8.n_remain != 0) {
862
+ rejects.push_back(tok);
863
+ }
864
+ }
865
+ return rejects;
866
+ }
867
+
868
+ const llama_grammar_element * stack_pos = stack.back();
869
+
870
+ llama_grammar_candidates next_candidates;
871
+ next_candidates.reserve(candidates.size());
872
+
873
+ for (const auto & tok : candidates) {
874
+ if (*tok.code_points == 0) {
875
+ // reached end of full codepoints in token, reject iff it ended in a partial sequence
876
+ // that cannot satisfy this position in grammar
877
+ if (tok.partial_utf8.n_remain != 0 &&
878
+ !llama_grammar_match_partial_char(stack_pos, tok.partial_utf8)) {
879
+ rejects.push_back(tok);
880
+ }
881
+ } else if (llama_grammar_match_char(stack_pos, *tok.code_points).first) {
882
+ next_candidates.push_back({ tok.index, tok.code_points + 1, tok.partial_utf8 });
883
+ } else {
884
+ rejects.push_back(tok);
885
+ }
886
+ }
887
+
888
+ const auto * stack_pos_after = llama_grammar_match_char(stack_pos, 0).second;
889
+
890
+ // update top of stack to next element, if any
891
+ llama_grammar_stack stack_after(stack.begin(), stack.end() - 1);
892
+ if (!llama_grammar_is_end_of_sequence(stack_pos_after)) {
893
+ stack_after.push_back(stack_pos_after);
894
+ }
895
+ llama_grammar_stacks next_stacks;
896
+ llama_grammar_advance_stack(rules, stack_after, next_stacks);
897
+
898
+ auto next_rejects = llama_grammar_reject_candidates(rules, next_stacks, next_candidates);
899
+ for (const auto & tok : next_rejects) {
900
+ rejects.push_back({ tok.index, tok.code_points - 1, tok.partial_utf8 });
901
+ }
902
+
903
+ return rejects;
904
+ }
905
+
906
+ ////////////////////
907
+
908
+ struct llama_grammar * llama_grammar_init_impl(
909
+ const struct llama_vocab * vocab,
910
+ const llama_grammar_element ** rules,
911
+ size_t n_rules,
912
+ size_t start_rule_index) {
913
+ const llama_grammar_element * pos;
914
+
915
+ // copy rule definitions into vectors
916
+ llama_grammar_rules vec_rules(n_rules);
917
+ for (size_t i = 0; i < n_rules; i++) {
918
+ for (pos = rules[i]; pos->type != LLAMA_GRETYPE_END; pos++) {
919
+ vec_rules[i].push_back(*pos);
920
+ }
921
+ vec_rules[i].push_back({LLAMA_GRETYPE_END, 0});
922
+ }
923
+
924
+ // Check for left recursion
925
+ std::vector<bool> rules_visited(n_rules);
926
+ std::vector<bool> rules_in_progress(n_rules);
927
+ std::vector<bool> rules_may_be_empty(n_rules);
928
+ for (size_t i = 0; i < n_rules; i++) {
929
+ if (rules_visited[i]) {
930
+ continue;
931
+ }
932
+ if (llama_grammar_detect_left_recursion(vec_rules, i, &rules_visited, &rules_in_progress, &rules_may_be_empty)) {
933
+ LLAMA_LOG_ERROR("unsupported grammar, left recursion detected for nonterminal at index %zu", i);
934
+ return nullptr;
935
+ }
936
+ }
937
+
938
+ // loop over alternates of start rule to build initial stacks
939
+ llama_grammar_stacks stacks;
940
+ pos = vec_rules[start_rule_index].data();
941
+ do {
942
+ llama_grammar_stack stack;
943
+ if (!llama_grammar_is_end_of_sequence(pos)) {
944
+ // if alternate is nonempty, add to stack
945
+ stack.push_back(pos);
946
+ }
947
+ llama_grammar_advance_stack(vec_rules, stack, stacks);
948
+ while (!llama_grammar_is_end_of_sequence(pos)) {
949
+ // scan to end of alternate def
950
+ pos++;
951
+ }
952
+ if (pos->type == LLAMA_GRETYPE_ALT) {
953
+ // there's another alternate def of this rule to process
954
+ pos++;
955
+ } else {
956
+ break;
957
+ }
958
+ } while (true);
959
+
960
+ // Important: vec_rules has to be moved here, not copied, because stacks contains
961
+ // pointers to elements of vec_rules. If vec_rules were copied into llama_grammar
962
+ // then the pointers would be invalidated when the local vec_rules goes out of scope.
963
+ return new llama_grammar {
964
+ vocab,
965
+ std::move(vec_rules),
966
+ std::move(stacks),
967
+ /* .partial_utf8 = */ {},
968
+ /* .lazy =*/ false,
969
+ /* .awaiting_trigger = */ false,
970
+ /* .trigger_buffer = */ "",
971
+ /* .trigger_tokens = */ {},
972
+ /* .trigger_patterns = */ {},
973
+ };
974
+ }
975
+
976
+ struct llama_grammar * llama_grammar_init_impl(
977
+ const struct llama_vocab * vocab,
978
+ const char * grammar_str,
979
+ const char * grammar_root,
980
+ bool lazy,
981
+ const char ** trigger_patterns,
982
+ size_t num_trigger_patterns,
983
+ const llama_token * trigger_tokens,
984
+ size_t num_trigger_tokens) {
985
+ llama_grammar_parser parser;
986
+
987
+ // if there is a grammar, parse it
988
+ // rules will be empty (default) if there are parse errors
989
+ if (!parser.parse(grammar_str) || parser.rules.empty()) {
990
+ fprintf(stderr, "%s: failed to parse grammar\n", __func__);
991
+ return nullptr;
992
+ }
993
+
994
+ // Ensure that there is a "root" node.
995
+ if (parser.symbol_ids.find("root") == parser.symbol_ids.end()) {
996
+ fprintf(stderr, "%s: grammar does not contain a 'root' symbol\n", __func__);
997
+ return nullptr;
998
+ }
999
+
1000
+ std::vector<const llama_grammar_element *> grammar_rules(parser.c_rules());
1001
+
1002
+ const size_t n_rules = grammar_rules.size();
1003
+ const size_t start_rule_index = parser.symbol_ids.at(grammar_root);
1004
+
1005
+ const llama_grammar_element * pos;
1006
+
1007
+ // copy rule definitions into vectors
1008
+ llama_grammar_rules vec_rules(n_rules);
1009
+ for (size_t i = 0; i < n_rules; i++) {
1010
+ for (pos = grammar_rules[i]; pos->type != LLAMA_GRETYPE_END; pos++) {
1011
+ vec_rules[i].push_back(*pos);
1012
+ }
1013
+ vec_rules[i].push_back({LLAMA_GRETYPE_END, 0});
1014
+ }
1015
+
1016
+ // Check for left recursion
1017
+ std::vector<bool> rules_visited(n_rules);
1018
+ std::vector<bool> rules_in_progress(n_rules);
1019
+ std::vector<bool> rules_may_be_empty(n_rules);
1020
+ for (size_t i = 0; i < n_rules; i++) {
1021
+ if (rules_visited[i]) {
1022
+ continue;
1023
+ }
1024
+ if (llama_grammar_detect_left_recursion(vec_rules, i, &rules_visited, &rules_in_progress, &rules_may_be_empty)) {
1025
+ LLAMA_LOG_ERROR("unsupported grammar, left recursion detected for nonterminal at index %zu", i);
1026
+ return nullptr;
1027
+ }
1028
+ }
1029
+
1030
+ // loop over alternates of start rule to build initial stacks
1031
+ llama_grammar_stacks stacks;
1032
+ pos = vec_rules[start_rule_index].data();
1033
+ do {
1034
+ llama_grammar_stack stack;
1035
+ if (!llama_grammar_is_end_of_sequence(pos)) {
1036
+ // if alternate is nonempty, add to stack
1037
+ stack.push_back(pos);
1038
+ }
1039
+ llama_grammar_advance_stack(vec_rules, stack, stacks);
1040
+ while (!llama_grammar_is_end_of_sequence(pos)) {
1041
+ // scan to end of alternate def
1042
+ pos++;
1043
+ }
1044
+ if (pos->type == LLAMA_GRETYPE_ALT) {
1045
+ // there's another alternate def of this rule to process
1046
+ pos++;
1047
+ } else {
1048
+ break;
1049
+ }
1050
+ } while (true);
1051
+
1052
+ std::vector<llama_token> vec_trigger_tokens;
1053
+ std::vector<llama_grammar_trigger_pattern> vec_trigger_patterns;
1054
+ for (size_t i = 0; i < num_trigger_tokens; i++) {
1055
+ GGML_ASSERT(trigger_tokens != nullptr);
1056
+ vec_trigger_tokens.push_back(trigger_tokens[i]);
1057
+ }
1058
+ for (size_t i = 0; i < num_trigger_patterns; i++) {
1059
+ GGML_ASSERT(trigger_patterns != nullptr);
1060
+ auto & trigger = vec_trigger_patterns.emplace_back();
1061
+ trigger.pattern = trigger_patterns[i];
1062
+ trigger.regex = std::regex(trigger.pattern);
1063
+ }
1064
+
1065
+ // Important: vec_rules has to be moved here, not copied, because stacks contains
1066
+ // pointers to elements of vec_rules. If vec_rules were copied into llama_grammar
1067
+ // then the pointers would be invalidated when the local vec_rules goes out of scope.
1068
+ return new llama_grammar {
1069
+ vocab,
1070
+ std::move(vec_rules),
1071
+ std::move(stacks),
1072
+ /* .partial_utf8 = */ {},
1073
+ /* .lazy = */ lazy,
1074
+ /* .awaiting_trigger = */ lazy,
1075
+ /* .trigger_buffer = */ "",
1076
+ std::move(vec_trigger_tokens),
1077
+ std::move(vec_trigger_patterns),
1078
+ };
1079
+ }
1080
+
1081
+ void llama_grammar_free_impl(struct llama_grammar * grammar) {
1082
+ if (grammar == nullptr) {
1083
+ return;
1084
+ }
1085
+
1086
+ delete grammar;
1087
+ }
1088
+
1089
+ struct llama_grammar * llama_grammar_clone_impl(const struct llama_grammar & grammar) {
1090
+ auto * result = new llama_grammar {
1091
+ grammar.vocab,
1092
+ grammar.rules,
1093
+ grammar.stacks,
1094
+ grammar.partial_utf8,
1095
+ grammar.lazy,
1096
+ grammar.awaiting_trigger,
1097
+ grammar.trigger_buffer,
1098
+ grammar.trigger_tokens,
1099
+ grammar.trigger_patterns,
1100
+ };
1101
+
1102
+ // redirect elements in stacks to point to new rules
1103
+ for (size_t is = 0; is < result->stacks.size(); is++) {
1104
+ for (size_t ie = 0; ie < result->stacks[is].size(); ie++) {
1105
+ for (size_t ir0 = 0; ir0 < grammar.rules.size(); ir0++) {
1106
+ for (size_t ir1 = 0; ir1 < grammar.rules[ir0].size(); ir1++) {
1107
+ if (grammar.stacks[is][ie] == &grammar.rules[ir0][ir1]) {
1108
+ result->stacks[is][ie] = &result->rules[ir0][ir1];
1109
+ }
1110
+ }
1111
+ }
1112
+ }
1113
+ }
1114
+
1115
+ return result;
1116
+ }
1117
+
1118
+ void llama_grammar_apply_impl(const struct llama_grammar & grammar, llama_token_data_array * cur_p) {
1119
+ GGML_ASSERT(grammar.vocab != nullptr);
1120
+
1121
+ if (grammar.awaiting_trigger) {
1122
+ return;
1123
+ }
1124
+
1125
+ bool allow_eog = false;
1126
+ for (const auto & stack : grammar.stacks) {
1127
+ if (stack.empty()) {
1128
+ allow_eog = true;
1129
+ break;
1130
+ }
1131
+ }
1132
+
1133
+ std::vector<std::pair<std::vector<uint32_t>, llama_partial_utf8>> candidates_decoded;
1134
+ candidates_decoded.reserve(cur_p->size);
1135
+
1136
+ llama_grammar_candidates candidates_grammar;
1137
+ candidates_grammar.reserve(cur_p->size);
1138
+
1139
+ for (size_t i = 0; i < cur_p->size; ++i) {
1140
+ const llama_token id = cur_p->data[i].id;
1141
+ const std::string & piece = grammar.vocab->token_to_piece(id);
1142
+
1143
+ if (grammar.vocab->is_eog(id)) {
1144
+ if (!allow_eog) {
1145
+ cur_p->data[i].logit = -INFINITY;
1146
+ }
1147
+ } else if (piece.empty() || piece[0] == 0) {
1148
+ cur_p->data[i].logit = -INFINITY;
1149
+ } else {
1150
+ candidates_decoded.push_back(decode_utf8(piece, grammar.partial_utf8));
1151
+ candidates_grammar.push_back({ i, candidates_decoded.back().first.data(), candidates_decoded.back().second });
1152
+ }
1153
+ }
1154
+
1155
+ const auto rejects = llama_grammar_reject_candidates(grammar.rules, grammar.stacks, candidates_grammar);
1156
+ for (const auto & reject : rejects) {
1157
+ cur_p->data[reject.index].logit = -INFINITY;
1158
+ }
1159
+ }
1160
+
1161
+ void llama_grammar_accept_impl(struct llama_grammar & grammar, llama_token token) {
1162
+ GGML_ASSERT(grammar.vocab != nullptr);
1163
+
1164
+ const auto & piece = grammar.vocab->token_to_piece(token);
1165
+
1166
+ if (grammar.awaiting_trigger) {
1167
+ if (std::find(grammar.trigger_tokens.begin(), grammar.trigger_tokens.end(), token) != grammar.trigger_tokens.end()) {
1168
+ grammar.awaiting_trigger = false;
1169
+ grammar.trigger_buffer.clear();
1170
+ llama_grammar_accept_str(grammar, piece);
1171
+ LLAMA_LOG_DEBUG("Grammar triggered on token %u (`%s`)", token, piece.c_str());
1172
+ return;
1173
+ } else {
1174
+ grammar.trigger_buffer += piece;
1175
+
1176
+ std::smatch match;
1177
+ for (const auto & trigger_pattern : grammar.trigger_patterns) {
1178
+ if (std::regex_match(grammar.trigger_buffer, match, trigger_pattern.regex)) {
1179
+ grammar.awaiting_trigger = false;
1180
+ // get from the first matched capturing group to the end of the string
1181
+ size_t start = std::string::npos;
1182
+ for (auto i = 1u; i < match.size(); i++) {
1183
+ if (match.length(i) > 0) {
1184
+ start = match.position(i);
1185
+ break;
1186
+ }
1187
+ }
1188
+ if (start == std::string::npos) {
1189
+ start = match.position(0);
1190
+ }
1191
+ auto constrained_str = grammar.trigger_buffer.substr(start);
1192
+ // std::string constrained_str(match[1].first, grammar.trigger_buffer.end());
1193
+ grammar.trigger_buffer.clear();
1194
+ llama_grammar_accept_str(grammar, constrained_str);
1195
+ LLAMA_LOG_DEBUG("Grammar triggered on regex: '%s'\n", constrained_str.c_str());
1196
+ return;
1197
+ }
1198
+ }
1199
+ LLAMA_LOG_DEBUG("Grammar still awaiting trigger after token %d (`%s`)\n", token, piece.c_str());
1200
+ return;
1201
+ }
1202
+ }
1203
+
1204
+ if (grammar.vocab->is_eog(token)) {
1205
+ for (const auto & stack : grammar.stacks) {
1206
+ if (stack.empty()) {
1207
+ return;
1208
+ }
1209
+ }
1210
+ GGML_ABORT("fatal error");
1211
+ }
1212
+
1213
+ llama_grammar_accept_str(grammar, piece);
1214
+ }
1215
+
1216
+ void llama_grammar_accept_str(struct llama_grammar & grammar, const std::string & piece) {
1217
+ // Note terminating 0 in decoded string
1218
+ const auto decoded = decode_utf8(piece, grammar.partial_utf8);
1219
+ const auto & code_points = decoded.first;
1220
+
1221
+ for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
1222
+ llama_grammar_accept(&grammar, *it);
1223
+ }
1224
+
1225
+ grammar.partial_utf8 = decoded.second;
1226
+ if (grammar.stacks.empty()) {
1227
+ throw std::runtime_error("Unexpected empty grammar stack after accepting piece: " + piece);
1228
+ }
1229
+ }