whispercpp 1.3.1 → 1.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (857) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +7 -3
  3. data/README.md +161 -43
  4. data/Rakefile +45 -13
  5. data/ext/.gitignore +4 -8
  6. data/ext/dependencies.rb +73 -0
  7. data/ext/extconf.rb +21 -198
  8. data/ext/options.rb +85 -0
  9. data/ext/ruby_whisper.c +177 -0
  10. data/ext/ruby_whisper.h +17 -2
  11. data/ext/ruby_whisper_context.c +672 -0
  12. data/ext/ruby_whisper_error.c +52 -0
  13. data/ext/ruby_whisper_model.c +232 -0
  14. data/ext/ruby_whisper_params.c +1303 -0
  15. data/ext/ruby_whisper_segment.c +220 -0
  16. data/ext/ruby_whisper_transcribe.cpp +93 -0
  17. data/ext/ruby_whisper_vad_params.c +288 -0
  18. data/ext/sources/CMakeGraphVizOptions.cmake +8 -0
  19. data/ext/sources/CMakeLists.txt +255 -0
  20. data/ext/sources/bindings/javascript/CMakeLists.txt +41 -0
  21. data/ext/sources/bindings/javascript/emscripten.cpp +93 -0
  22. data/ext/sources/bindings/javascript/libwhisper.worker.js +1 -0
  23. data/ext/sources/bindings/javascript/package-tmpl.json +26 -0
  24. data/ext/sources/bindings/javascript/package.json +26 -0
  25. data/ext/sources/bindings/javascript/whisper.js +19 -0
  26. data/ext/sources/build-xcframework.sh +547 -0
  27. data/ext/sources/cmake/DefaultTargetOptions.cmake +16 -0
  28. data/ext/sources/cmake/FindFFmpeg.cmake +163 -0
  29. data/ext/sources/cmake/build-info.cmake +60 -0
  30. data/ext/sources/cmake/git-vars.cmake +22 -0
  31. data/ext/sources/cmake/whisper-config.cmake.in +65 -0
  32. data/ext/sources/cmake/whisper.pc.in +10 -0
  33. data/ext/sources/examples/CMakeLists.txt +124 -0
  34. data/ext/sources/examples/addon.node/CMakeLists.txt +31 -0
  35. data/ext/sources/examples/addon.node/__test__/whisper.spec.js +133 -0
  36. data/ext/sources/examples/addon.node/addon.cpp +557 -0
  37. data/ext/sources/examples/addon.node/index.js +57 -0
  38. data/ext/sources/examples/addon.node/package.json +16 -0
  39. data/ext/sources/examples/addon.node/vad-example.js +132 -0
  40. data/ext/sources/examples/bench/CMakeLists.txt +8 -0
  41. data/ext/sources/examples/bench/bench.cpp +176 -0
  42. data/ext/sources/examples/bench.wasm/CMakeLists.txt +49 -0
  43. data/ext/sources/examples/bench.wasm/emscripten.cpp +87 -0
  44. data/ext/sources/examples/bench.wasm/index-tmpl.html +284 -0
  45. data/ext/sources/examples/cli/CMakeLists.txt +8 -0
  46. data/ext/sources/examples/cli/cli.cpp +1295 -0
  47. data/ext/sources/examples/coi-serviceworker.js +146 -0
  48. data/ext/sources/examples/command/CMakeLists.txt +10 -0
  49. data/ext/sources/examples/command/command.cpp +800 -0
  50. data/ext/sources/examples/command/commands.txt +9 -0
  51. data/ext/sources/examples/command.wasm/CMakeLists.txt +50 -0
  52. data/ext/sources/examples/command.wasm/emscripten.cpp +327 -0
  53. data/ext/sources/examples/command.wasm/index-tmpl.html +414 -0
  54. data/ext/sources/examples/common-ggml.cpp +238 -0
  55. data/ext/sources/examples/common-ggml.h +18 -0
  56. data/ext/sources/examples/common-sdl.cpp +227 -0
  57. data/ext/sources/examples/common-sdl.h +49 -0
  58. data/ext/sources/examples/common-whisper.cpp +175 -0
  59. data/ext/sources/examples/common-whisper.h +24 -0
  60. data/ext/sources/examples/common.cpp +675 -0
  61. data/ext/sources/examples/common.h +322 -0
  62. data/ext/sources/examples/deprecation-warning/CMakeLists.txt +6 -0
  63. data/ext/sources/examples/deprecation-warning/deprecation-warning.cpp +38 -0
  64. data/ext/sources/examples/ffmpeg-transcode.cpp +368 -0
  65. data/ext/sources/examples/generate-karaoke.sh +57 -0
  66. data/ext/sources/examples/grammar-parser.cpp +423 -0
  67. data/ext/sources/examples/grammar-parser.h +29 -0
  68. data/ext/sources/examples/helpers.js +191 -0
  69. data/ext/sources/examples/json.hpp +24596 -0
  70. data/ext/sources/examples/livestream.sh +112 -0
  71. data/ext/sources/examples/lsp/CMakeLists.txt +9 -0
  72. data/ext/sources/examples/lsp/lsp.cpp +469 -0
  73. data/ext/sources/examples/lsp/whisper.vim +362 -0
  74. data/ext/sources/examples/miniaudio.h +93468 -0
  75. data/ext/sources/examples/python/test_whisper_processor.py +7 -0
  76. data/ext/sources/examples/python/whisper_processor.py +54 -0
  77. data/ext/sources/examples/quantize/CMakeLists.txt +6 -0
  78. data/ext/sources/examples/quantize/quantize.cpp +226 -0
  79. data/ext/sources/examples/server/CMakeLists.txt +15 -0
  80. data/ext/sources/examples/server/bench.js +29 -0
  81. data/ext/sources/examples/server/httplib.h +10497 -0
  82. data/ext/sources/examples/server/server.cpp +1238 -0
  83. data/ext/sources/examples/server.py +115 -0
  84. data/ext/sources/examples/stb_vorbis.c +5584 -0
  85. data/ext/sources/examples/stream/CMakeLists.txt +10 -0
  86. data/ext/sources/examples/stream/stream.cpp +435 -0
  87. data/ext/sources/examples/stream.wasm/CMakeLists.txt +49 -0
  88. data/ext/sources/examples/stream.wasm/emscripten.cpp +216 -0
  89. data/ext/sources/examples/stream.wasm/index-tmpl.html +414 -0
  90. data/ext/sources/examples/sycl/CMakeLists.txt +9 -0
  91. data/ext/sources/examples/sycl/build.sh +22 -0
  92. data/ext/sources/examples/sycl/ls-sycl-device.cpp +11 -0
  93. data/ext/sources/examples/sycl/run-whisper.sh +17 -0
  94. data/ext/sources/examples/talk-llama/CMakeLists.txt +43 -0
  95. data/ext/sources/examples/talk-llama/eleven-labs.py +80 -0
  96. data/ext/sources/examples/talk-llama/llama-adapter.cpp +388 -0
  97. data/ext/sources/examples/talk-llama/llama-adapter.h +76 -0
  98. data/ext/sources/examples/talk-llama/llama-arch.cpp +1914 -0
  99. data/ext/sources/examples/talk-llama/llama-arch.h +464 -0
  100. data/ext/sources/examples/talk-llama/llama-batch.cpp +843 -0
  101. data/ext/sources/examples/talk-llama/llama-batch.h +147 -0
  102. data/ext/sources/examples/talk-llama/llama-chat.cpp +685 -0
  103. data/ext/sources/examples/talk-llama/llama-chat.h +59 -0
  104. data/ext/sources/examples/talk-llama/llama-context.cpp +2845 -0
  105. data/ext/sources/examples/talk-llama/llama-context.h +297 -0
  106. data/ext/sources/examples/talk-llama/llama-cparams.cpp +5 -0
  107. data/ext/sources/examples/talk-llama/llama-cparams.h +41 -0
  108. data/ext/sources/examples/talk-llama/llama-grammar.cpp +1229 -0
  109. data/ext/sources/examples/talk-llama/llama-grammar.h +173 -0
  110. data/ext/sources/examples/talk-llama/llama-graph.cpp +1693 -0
  111. data/ext/sources/examples/talk-llama/llama-graph.h +710 -0
  112. data/ext/sources/examples/talk-llama/llama-hparams.cpp +103 -0
  113. data/ext/sources/examples/talk-llama/llama-hparams.h +207 -0
  114. data/ext/sources/examples/talk-llama/llama-impl.cpp +167 -0
  115. data/ext/sources/examples/talk-llama/llama-impl.h +61 -0
  116. data/ext/sources/examples/talk-llama/llama-io.cpp +15 -0
  117. data/ext/sources/examples/talk-llama/llama-io.h +35 -0
  118. data/ext/sources/examples/talk-llama/llama-kv-cache-unified-iswa.cpp +279 -0
  119. data/ext/sources/examples/talk-llama/llama-kv-cache-unified-iswa.h +128 -0
  120. data/ext/sources/examples/talk-llama/llama-kv-cache-unified.cpp +1841 -0
  121. data/ext/sources/examples/talk-llama/llama-kv-cache-unified.h +303 -0
  122. data/ext/sources/examples/talk-llama/llama-kv-cache.h +44 -0
  123. data/ext/sources/examples/talk-llama/llama-kv-cells.h +439 -0
  124. data/ext/sources/examples/talk-llama/llama-memory-hybrid.cpp +246 -0
  125. data/ext/sources/examples/talk-llama/llama-memory-hybrid.h +138 -0
  126. data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +1125 -0
  127. data/ext/sources/examples/talk-llama/llama-memory-recurrent.h +183 -0
  128. data/ext/sources/examples/talk-llama/llama-memory.cpp +59 -0
  129. data/ext/sources/examples/talk-llama/llama-memory.h +116 -0
  130. data/ext/sources/examples/talk-llama/llama-mmap.cpp +600 -0
  131. data/ext/sources/examples/talk-llama/llama-mmap.h +68 -0
  132. data/ext/sources/examples/talk-llama/llama-model-loader.cpp +1163 -0
  133. data/ext/sources/examples/talk-llama/llama-model-loader.h +169 -0
  134. data/ext/sources/examples/talk-llama/llama-model-saver.cpp +282 -0
  135. data/ext/sources/examples/talk-llama/llama-model-saver.h +37 -0
  136. data/ext/sources/examples/talk-llama/llama-model.cpp +15114 -0
  137. data/ext/sources/examples/talk-llama/llama-model.h +452 -0
  138. data/ext/sources/examples/talk-llama/llama-quant.cpp +1049 -0
  139. data/ext/sources/examples/talk-llama/llama-quant.h +1 -0
  140. data/ext/sources/examples/talk-llama/llama-sampling.cpp +2575 -0
  141. data/ext/sources/examples/talk-llama/llama-sampling.h +32 -0
  142. data/ext/sources/examples/talk-llama/llama-vocab.cpp +3377 -0
  143. data/ext/sources/examples/talk-llama/llama-vocab.h +132 -0
  144. data/ext/sources/examples/talk-llama/llama.cpp +358 -0
  145. data/ext/sources/examples/talk-llama/llama.h +1484 -0
  146. data/ext/sources/examples/talk-llama/prompts/talk-alpaca.txt +23 -0
  147. data/ext/sources/examples/talk-llama/speak +40 -0
  148. data/ext/sources/examples/talk-llama/speak.bat +1 -0
  149. data/ext/sources/examples/talk-llama/speak.ps1 +14 -0
  150. data/ext/sources/examples/talk-llama/talk-llama.cpp +810 -0
  151. data/ext/sources/examples/talk-llama/unicode-data.cpp +7034 -0
  152. data/ext/sources/examples/talk-llama/unicode-data.h +20 -0
  153. data/ext/sources/examples/talk-llama/unicode.cpp +854 -0
  154. data/ext/sources/examples/talk-llama/unicode.h +66 -0
  155. data/ext/sources/examples/vad-speech-segments/CMakeLists.txt +8 -0
  156. data/ext/sources/examples/vad-speech-segments/speech.cpp +149 -0
  157. data/ext/sources/examples/wchess/CMakeLists.txt +10 -0
  158. data/ext/sources/examples/wchess/libwchess/CMakeLists.txt +19 -0
  159. data/ext/sources/examples/wchess/libwchess/Chessboard.cpp +803 -0
  160. data/ext/sources/examples/wchess/libwchess/Chessboard.h +33 -0
  161. data/ext/sources/examples/wchess/libwchess/WChess.cpp +193 -0
  162. data/ext/sources/examples/wchess/libwchess/WChess.h +63 -0
  163. data/ext/sources/examples/wchess/libwchess/test-chessboard.cpp +117 -0
  164. data/ext/sources/examples/wchess/wchess.cmd/CMakeLists.txt +8 -0
  165. data/ext/sources/examples/wchess/wchess.cmd/wchess.cmd.cpp +251 -0
  166. data/ext/sources/examples/whisper.wasm/CMakeLists.txt +50 -0
  167. data/ext/sources/examples/whisper.wasm/emscripten.cpp +118 -0
  168. data/ext/sources/examples/whisper.wasm/index-tmpl.html +658 -0
  169. data/ext/sources/ggml/CMakeLists.txt +435 -0
  170. data/ext/sources/ggml/cmake/BuildTypes.cmake +54 -0
  171. data/ext/sources/ggml/cmake/GitVars.cmake +22 -0
  172. data/ext/sources/ggml/cmake/common.cmake +50 -0
  173. data/ext/sources/ggml/cmake/ggml-config.cmake.in +152 -0
  174. data/ext/{ggml → sources/ggml}/include/ggml-alloc.h +1 -1
  175. data/ext/{ggml → sources/ggml}/include/ggml-backend.h +10 -8
  176. data/ext/{ggml → sources/ggml}/include/ggml-cpp.h +2 -1
  177. data/ext/{ggml → sources/ggml}/include/ggml-cpu.h +11 -1
  178. data/ext/{ggml → sources/ggml}/include/ggml-metal.h +1 -1
  179. data/ext/{ggml → sources/ggml}/include/ggml-opt.h +49 -28
  180. data/ext/{ggml → sources/ggml}/include/ggml-rpc.h +6 -1
  181. data/ext/{ggml → sources/ggml}/include/ggml-vulkan.h +0 -2
  182. data/ext/{ggml → sources/ggml}/include/ggml.h +325 -269
  183. data/ext/sources/ggml/include/gguf.h +202 -0
  184. data/ext/sources/ggml/src/CMakeLists.txt +404 -0
  185. data/ext/{ggml → sources/ggml}/src/ggml-alloc.c +34 -29
  186. data/ext/sources/ggml/src/ggml-amx/CMakeLists.txt +107 -0
  187. data/ext/{ggml → sources/ggml}/src/ggml-backend-impl.h +1 -2
  188. data/ext/{ggml → sources/ggml}/src/ggml-backend-reg.cpp +92 -53
  189. data/ext/{ggml → sources/ggml}/src/ggml-backend.cpp +69 -34
  190. data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +87 -0
  191. data/ext/sources/ggml/src/ggml-cann/CMakeLists.txt +75 -0
  192. data/ext/sources/ggml/src/ggml-cann/Doxyfile +2579 -0
  193. data/ext/{ggml → sources/ggml}/src/ggml-cann/acl_tensor.cpp +10 -4
  194. data/ext/{ggml → sources/ggml}/src/ggml-cann/acl_tensor.h +5 -5
  195. data/ext/{ggml → sources/ggml}/src/ggml-cann/aclnn_ops.cpp +1272 -1506
  196. data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +1125 -0
  197. data/ext/{ggml → sources/ggml}/src/ggml-cann/common.h +140 -1
  198. data/ext/{ggml → sources/ggml}/src/ggml-cann/ggml-cann.cpp +588 -146
  199. data/ext/sources/ggml/src/ggml-cann/kernels/CMakeLists.txt +30 -0
  200. data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/dup.cpp +3 -5
  201. data/ext/{ggml → sources/ggml}/src/ggml-common.h +16 -8
  202. data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +597 -0
  203. data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/amx.cpp +3 -2
  204. data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/mmq.cpp +11 -10
  205. data/ext/sources/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
  206. data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +4114 -0
  207. data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +2163 -0
  208. data/ext/sources/ggml/src/ggml-cpu/arch/loongarch/quants.c +2639 -0
  209. data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
  210. data/ext/sources/ggml/src/ggml-cpu/arch/powerpc/quants.c +2732 -0
  211. data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +2069 -0
  212. data/ext/sources/ggml/src/ggml-cpu/arch/riscv/repack.cpp +397 -0
  213. data/ext/sources/ggml/src/ggml-cpu/arch/s390/quants.c +1300 -0
  214. data/ext/sources/ggml/src/ggml-cpu/arch/wasm/quants.c +1481 -0
  215. data/ext/{ggml/src/ggml-cpu/cpu-feats-x86.cpp → sources/ggml/src/ggml-cpu/arch/x86/cpu-feats.cpp} +5 -1
  216. data/ext/sources/ggml/src/ggml-cpu/arch/x86/quants.c +4311 -0
  217. data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +3285 -0
  218. data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +184 -0
  219. data/ext/sources/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
  220. data/ext/sources/ggml/src/ggml-cpu/binary-ops.h +16 -0
  221. data/ext/sources/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
  222. data/ext/sources/ggml/src/ggml-cpu/common.h +73 -0
  223. data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu-impl.h +172 -41
  224. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +3551 -0
  225. data/ext/{ggml → sources/ggml}/src/ggml-cpu/ggml-cpu.cpp +78 -25
  226. data/ext/{ggml/src/ggml-cpu/ggml-cpu-hbm.cpp → sources/ggml/src/ggml-cpu/hbm.cpp} +1 -1
  227. data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +337 -0
  228. data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.h +95 -0
  229. data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +482 -0
  230. data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
  231. data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +3594 -0
  232. data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.h +19 -0
  233. data/ext/sources/ggml/src/ggml-cpu/ops.cpp +9786 -0
  234. data/ext/sources/ggml/src/ggml-cpu/ops.h +118 -0
  235. data/ext/sources/ggml/src/ggml-cpu/quants.c +1158 -0
  236. data/ext/{ggml/src/ggml-cpu/ggml-cpu-quants.h → sources/ggml/src/ggml-cpu/quants.h} +26 -0
  237. data/ext/sources/ggml/src/ggml-cpu/repack.cpp +1571 -0
  238. data/ext/sources/ggml/src/ggml-cpu/repack.h +98 -0
  239. data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +1184 -0
  240. data/ext/{ggml/src/ggml-cpu/ggml-cpu-traits.cpp → sources/ggml/src/ggml-cpu/traits.cpp} +1 -1
  241. data/ext/sources/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
  242. data/ext/sources/ggml/src/ggml-cpu/unary-ops.h +28 -0
  243. data/ext/sources/ggml/src/ggml-cpu/vec.cpp +345 -0
  244. data/ext/sources/ggml/src/ggml-cpu/vec.h +1027 -0
  245. data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +184 -0
  246. data/ext/sources/ggml/src/ggml-cuda/acc.cu +61 -0
  247. data/ext/sources/ggml/src/ggml-cuda/acc.cuh +5 -0
  248. data/ext/sources/ggml/src/ggml-cuda/arange.cu +34 -0
  249. data/ext/sources/ggml/src/ggml-cuda/arange.cuh +5 -0
  250. data/ext/sources/ggml/src/ggml-cuda/argmax.cu +91 -0
  251. data/ext/sources/ggml/src/ggml-cuda/argmax.cuh +3 -0
  252. data/ext/sources/ggml/src/ggml-cuda/argsort.cu +104 -0
  253. data/ext/sources/ggml/src/ggml-cuda/argsort.cuh +3 -0
  254. data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +363 -0
  255. data/ext/sources/ggml/src/ggml-cuda/binbcast.cuh +9 -0
  256. data/ext/sources/ggml/src/ggml-cuda/clamp.cu +45 -0
  257. data/ext/sources/ggml/src/ggml-cuda/clamp.cuh +5 -0
  258. data/ext/sources/ggml/src/ggml-cuda/common.cuh +851 -0
  259. data/ext/sources/ggml/src/ggml-cuda/concat.cu +221 -0
  260. data/ext/sources/ggml/src/ggml-cuda/concat.cuh +5 -0
  261. data/ext/sources/ggml/src/ggml-cuda/conv-transpose-1d.cu +89 -0
  262. data/ext/sources/ggml/src/ggml-cuda/conv-transpose-1d.cuh +5 -0
  263. data/ext/sources/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
  264. data/ext/sources/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
  265. data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
  266. data/ext/sources/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
  267. data/ext/sources/ggml/src/ggml-cuda/convert.cu +752 -0
  268. data/ext/sources/ggml/src/ggml-cuda/convert.cuh +31 -0
  269. data/ext/sources/ggml/src/ggml-cuda/count-equal.cu +64 -0
  270. data/ext/sources/ggml/src/ggml-cuda/count-equal.cuh +5 -0
  271. data/ext/sources/ggml/src/ggml-cuda/cp-async.cuh +57 -0
  272. data/ext/sources/ggml/src/ggml-cuda/cpy.cu +705 -0
  273. data/ext/sources/ggml/src/ggml-cuda/cpy.cuh +11 -0
  274. data/ext/sources/ggml/src/ggml-cuda/cross-entropy-loss.cu +189 -0
  275. data/ext/sources/ggml/src/ggml-cuda/cross-entropy-loss.cuh +7 -0
  276. data/ext/sources/ggml/src/ggml-cuda/dequantize.cuh +103 -0
  277. data/ext/sources/ggml/src/ggml-cuda/diagmask.cu +40 -0
  278. data/ext/sources/ggml/src/ggml-cuda/diagmask.cuh +5 -0
  279. data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +881 -0
  280. data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +1474 -0
  281. data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cu +357 -0
  282. data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f16.cuh +3 -0
  283. data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cu +365 -0
  284. data/ext/sources/ggml/src/ggml-cuda/fattn-tile-f32.cuh +3 -0
  285. data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f16.cuh +482 -0
  286. data/ext/sources/ggml/src/ggml-cuda/fattn-vec-f32.cuh +472 -0
  287. data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +638 -0
  288. data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +3 -0
  289. data/ext/sources/ggml/src/ggml-cuda/fattn.cu +346 -0
  290. data/ext/sources/ggml/src/ggml-cuda/fattn.cuh +3 -0
  291. data/ext/sources/ggml/src/ggml-cuda/getrows.cu +275 -0
  292. data/ext/sources/ggml/src/ggml-cuda/getrows.cuh +15 -0
  293. data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +3647 -0
  294. data/ext/sources/ggml/src/ggml-cuda/gla.cu +93 -0
  295. data/ext/sources/ggml/src/ggml-cuda/gla.cuh +3 -0
  296. data/ext/sources/ggml/src/ggml-cuda/im2col.cu +103 -0
  297. data/ext/sources/ggml/src/ggml-cuda/im2col.cuh +5 -0
  298. data/ext/sources/ggml/src/ggml-cuda/mean.cu +19 -0
  299. data/ext/sources/ggml/src/ggml-cuda/mean.cuh +3 -0
  300. data/ext/sources/ggml/src/ggml-cuda/mma.cuh +396 -0
  301. data/ext/sources/ggml/src/ggml-cuda/mmq.cu +324 -0
  302. data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +3217 -0
  303. data/ext/sources/ggml/src/ggml-cuda/mmv.cu +506 -0
  304. data/ext/sources/ggml/src/ggml-cuda/mmv.cuh +11 -0
  305. data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +595 -0
  306. data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +12 -0
  307. data/ext/sources/ggml/src/ggml-cuda/norm.cu +458 -0
  308. data/ext/sources/ggml/src/ggml-cuda/norm.cuh +11 -0
  309. data/ext/sources/ggml/src/ggml-cuda/opt-step-adamw.cu +78 -0
  310. data/ext/sources/ggml/src/ggml-cuda/opt-step-adamw.cuh +5 -0
  311. data/ext/sources/ggml/src/ggml-cuda/out-prod.cu +68 -0
  312. data/ext/sources/ggml/src/ggml-cuda/out-prod.cuh +3 -0
  313. data/ext/sources/ggml/src/ggml-cuda/pad.cu +49 -0
  314. data/ext/sources/ggml/src/ggml-cuda/pad.cuh +5 -0
  315. data/ext/sources/ggml/src/ggml-cuda/pool2d.cu +94 -0
  316. data/ext/sources/ggml/src/ggml-cuda/pool2d.cuh +5 -0
  317. data/ext/sources/ggml/src/ggml-cuda/quantize.cu +190 -0
  318. data/ext/sources/ggml/src/ggml-cuda/quantize.cuh +27 -0
  319. data/ext/sources/ggml/src/ggml-cuda/rope.cu +456 -0
  320. data/ext/sources/ggml/src/ggml-cuda/rope.cuh +7 -0
  321. data/ext/sources/ggml/src/ggml-cuda/scale.cu +31 -0
  322. data/ext/sources/ggml/src/ggml-cuda/scale.cuh +5 -0
  323. data/ext/sources/ggml/src/ggml-cuda/softmax.cu +283 -0
  324. data/ext/sources/ggml/src/ggml-cuda/softmax.cuh +7 -0
  325. data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +148 -0
  326. data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cuh +3 -0
  327. data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +155 -0
  328. data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cuh +3 -0
  329. data/ext/sources/ggml/src/ggml-cuda/sum.cu +45 -0
  330. data/ext/sources/ggml/src/ggml-cuda/sum.cuh +5 -0
  331. data/ext/sources/ggml/src/ggml-cuda/sumrows.cu +26 -0
  332. data/ext/sources/ggml/src/ggml-cuda/sumrows.cuh +4 -0
  333. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_16.cu +5 -0
  334. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu +10 -0
  335. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu +10 -0
  336. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu +10 -0
  337. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +10 -0
  338. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_16.cu +5 -0
  339. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +10 -0
  340. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu +10 -0
  341. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu +10 -0
  342. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu +10 -0
  343. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_16.cu +5 -0
  344. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu +10 -0
  345. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +10 -0
  346. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu +10 -0
  347. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu +10 -0
  348. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu +10 -0
  349. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu +10 -0
  350. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +10 -0
  351. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu +10 -0
  352. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +5 -0
  353. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +5 -0
  354. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +5 -0
  355. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +5 -0
  356. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +5 -0
  357. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +5 -0
  358. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +5 -0
  359. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +5 -0
  360. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +5 -0
  361. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +5 -0
  362. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +5 -0
  363. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +5 -0
  364. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +5 -0
  365. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +5 -0
  366. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +5 -0
  367. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +5 -0
  368. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +5 -0
  369. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +5 -0
  370. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +5 -0
  371. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +5 -0
  372. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +5 -0
  373. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +5 -0
  374. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +5 -0
  375. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +5 -0
  376. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +5 -0
  377. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +5 -0
  378. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +5 -0
  379. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +5 -0
  380. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +5 -0
  381. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +5 -0
  382. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +5 -0
  383. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +5 -0
  384. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +5 -0
  385. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +5 -0
  386. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +5 -0
  387. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +5 -0
  388. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +5 -0
  389. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +5 -0
  390. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +5 -0
  391. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +5 -0
  392. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +5 -0
  393. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +5 -0
  394. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +5 -0
  395. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +5 -0
  396. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +5 -0
  397. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +5 -0
  398. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +5 -0
  399. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +5 -0
  400. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +5 -0
  401. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +5 -0
  402. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +5 -0
  403. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +5 -0
  404. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +5 -0
  405. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +5 -0
  406. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +5 -0
  407. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +5 -0
  408. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +5 -0
  409. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +5 -0
  410. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +5 -0
  411. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +5 -0
  412. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +5 -0
  413. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +5 -0
  414. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +5 -0
  415. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +5 -0
  416. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +5 -0
  417. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +5 -0
  418. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +5 -0
  419. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +5 -0
  420. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +5 -0
  421. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +5 -0
  422. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +5 -0
  423. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +5 -0
  424. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +5 -0
  425. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +5 -0
  426. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +5 -0
  427. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +5 -0
  428. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +5 -0
  429. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +5 -0
  430. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +5 -0
  431. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +5 -0
  432. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +5 -0
  433. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +5 -0
  434. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +5 -0
  435. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +5 -0
  436. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +5 -0
  437. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +5 -0
  438. data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +78 -0
  439. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu +5 -0
  440. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu +5 -0
  441. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu +5 -0
  442. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu +5 -0
  443. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu +5 -0
  444. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu +5 -0
  445. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu +5 -0
  446. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu +5 -0
  447. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu +5 -0
  448. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu +5 -0
  449. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu +5 -0
  450. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu +5 -0
  451. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu +5 -0
  452. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu +5 -0
  453. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu +5 -0
  454. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu +5 -0
  455. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu +5 -0
  456. data/ext/sources/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu +5 -0
  457. data/ext/sources/ggml/src/ggml-cuda/tsembd.cu +47 -0
  458. data/ext/sources/ggml/src/ggml-cuda/tsembd.cuh +5 -0
  459. data/ext/sources/ggml/src/ggml-cuda/unary.cu +378 -0
  460. data/ext/sources/ggml/src/ggml-cuda/unary.cuh +66 -0
  461. data/ext/sources/ggml/src/ggml-cuda/upscale.cu +51 -0
  462. data/ext/sources/ggml/src/ggml-cuda/upscale.cuh +5 -0
  463. data/ext/sources/ggml/src/ggml-cuda/vecdotq.cuh +1135 -0
  464. data/ext/{ggml → sources/ggml}/src/ggml-cuda/vendors/cuda.h +1 -0
  465. data/ext/{ggml → sources/ggml}/src/ggml-cuda/vendors/hip.h +57 -0
  466. data/ext/{ggml → sources/ggml}/src/ggml-cuda/vendors/musa.h +7 -1
  467. data/ext/sources/ggml/src/ggml-cuda/wkv.cu +199 -0
  468. data/ext/sources/ggml/src/ggml-cuda/wkv.cuh +7 -0
  469. data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +135 -0
  470. data/ext/{ggml → sources/ggml}/src/ggml-impl.h +147 -158
  471. data/ext/sources/ggml/src/ggml-kompute/CMakeLists.txt +166 -0
  472. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/common.comp +112 -0
  473. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +58 -0
  474. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +25 -0
  475. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +52 -0
  476. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +52 -0
  477. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +52 -0
  478. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +52 -0
  479. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +30 -0
  480. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +22 -0
  481. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +17 -0
  482. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +31 -0
  483. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +31 -0
  484. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +38 -0
  485. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +39 -0
  486. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +44 -0
  487. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +52 -0
  488. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +69 -0
  489. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +51 -0
  490. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +33 -0
  491. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +35 -0
  492. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +140 -0
  493. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +106 -0
  494. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +73 -0
  495. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +52 -0
  496. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +28 -0
  497. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +84 -0
  498. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +21 -0
  499. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +53 -0
  500. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +52 -0
  501. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +52 -0
  502. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +52 -0
  503. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +52 -0
  504. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +19 -0
  505. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +23 -0
  506. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +22 -0
  507. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +72 -0
  508. data/ext/sources/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +71 -0
  509. data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +121 -0
  510. data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +649 -0
  511. data/ext/{ggml → sources/ggml}/src/ggml-metal/ggml-metal.m +2504 -1108
  512. data/ext/{ggml → sources/ggml}/src/ggml-metal/ggml-metal.metal +2102 -1463
  513. data/ext/sources/ggml/src/ggml-musa/CMakeLists.txt +113 -0
  514. data/ext/sources/ggml/src/ggml-musa/mudnn.cu +112 -0
  515. data/ext/sources/ggml/src/ggml-musa/mudnn.cuh +12 -0
  516. data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +110 -0
  517. data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +6494 -0
  518. data/ext/sources/ggml/src/ggml-opencl/kernels/add.cl +83 -0
  519. data/ext/sources/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
  520. data/ext/sources/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
  521. data/ext/sources/ggml/src/ggml-opencl/kernels/concat.cl +109 -0
  522. data/ext/sources/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
  523. data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +118 -0
  524. data/ext/sources/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
  525. data/ext/sources/ggml/src/ggml-opencl/kernels/div.cl +72 -0
  526. data/ext/sources/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
  527. data/ext/sources/ggml/src/ggml-opencl/kernels/gelu.cl +62 -0
  528. data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
  529. data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
  530. data/ext/sources/ggml/src/ggml-opencl/kernels/get_rows.cl +163 -0
  531. data/ext/sources/ggml/src/ggml-opencl/kernels/glu.cl +201 -0
  532. data/ext/sources/ggml/src/ggml-opencl/kernels/group_norm.cl +72 -0
  533. data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
  534. data/ext/sources/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
  535. data/ext/sources/ggml/src/ggml-opencl/kernels/mul.cl +79 -0
  536. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
  537. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
  538. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
  539. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
  540. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
  541. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
  542. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
  543. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
  544. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
  545. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
  546. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
  547. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
  548. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q6_k.cl +190 -0
  549. data/ext/sources/ggml/src/ggml-opencl/kernels/norm.cl +81 -0
  550. data/ext/sources/ggml/src/ggml-opencl/kernels/pad.cl +30 -0
  551. data/ext/sources/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
  552. data/ext/sources/ggml/src/ggml-opencl/kernels/repeat.cl +39 -0
  553. data/ext/sources/ggml/src/ggml-opencl/kernels/rms_norm.cl +96 -0
  554. data/ext/sources/ggml/src/ggml-opencl/kernels/rope.cl +721 -0
  555. data/ext/sources/ggml/src/ggml-opencl/kernels/scale.cl +16 -0
  556. data/ext/sources/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
  557. data/ext/sources/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
  558. data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +87 -0
  559. data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +87 -0
  560. data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f16.cl +86 -0
  561. data/ext/sources/ggml/src/ggml-opencl/kernels/softmax_f32.cl +86 -0
  562. data/ext/sources/ggml/src/ggml-opencl/kernels/sub.cl +72 -0
  563. data/ext/sources/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
  564. data/ext/sources/ggml/src/ggml-opencl/kernels/tanh.cl +63 -0
  565. data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +84 -0
  566. data/ext/sources/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
  567. data/ext/sources/ggml/src/ggml-opencl/kernels/upscale.cl +121 -0
  568. data/ext/{ggml → sources/ggml}/src/ggml-opt.cpp +373 -190
  569. data/ext/{ggml → sources/ggml}/src/ggml-quants.c +120 -128
  570. data/ext/sources/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
  571. data/ext/{ggml → sources/ggml}/src/ggml-rpc/ggml-rpc.cpp +494 -84
  572. data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +189 -0
  573. data/ext/sources/ggml/src/ggml-sycl/backend.hpp +37 -0
  574. data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +344 -0
  575. data/ext/sources/ggml/src/ggml-sycl/binbcast.hpp +39 -0
  576. data/ext/{ggml → sources/ggml}/src/ggml-sycl/common.cpp +20 -32
  577. data/ext/sources/ggml/src/ggml-sycl/common.hpp +561 -0
  578. data/ext/{ggml → sources/ggml}/src/ggml-sycl/concat.cpp +56 -70
  579. data/ext/sources/ggml/src/ggml-sycl/concat.hpp +20 -0
  580. data/ext/{ggml → sources/ggml}/src/ggml-sycl/conv.cpp +8 -12
  581. data/ext/sources/ggml/src/ggml-sycl/conv.hpp +20 -0
  582. data/ext/sources/ggml/src/ggml-sycl/convert.cpp +575 -0
  583. data/ext/sources/ggml/src/ggml-sycl/convert.hpp +34 -0
  584. data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +839 -0
  585. data/ext/sources/ggml/src/ggml-sycl/cpy.hpp +11 -0
  586. data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +823 -0
  587. data/ext/{ggml → sources/ggml}/src/ggml-sycl/dmmv.cpp +188 -67
  588. data/ext/sources/ggml/src/ggml-sycl/dmmv.hpp +27 -0
  589. data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +2987 -0
  590. data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +1120 -0
  591. data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +84 -0
  592. data/ext/sources/ggml/src/ggml-sycl/gemm.hpp +102 -0
  593. data/ext/sources/ggml/src/ggml-sycl/getrows.cpp +212 -0
  594. data/ext/sources/ggml/src/ggml-sycl/getrows.hpp +20 -0
  595. data/ext/{ggml → sources/ggml}/src/ggml-sycl/ggml-sycl.cpp +1197 -1295
  596. data/ext/sources/ggml/src/ggml-sycl/gla.cpp +106 -0
  597. data/ext/sources/ggml/src/ggml-sycl/gla.hpp +8 -0
  598. data/ext/sources/ggml/src/ggml-sycl/im2col.cpp +136 -0
  599. data/ext/sources/ggml/src/ggml-sycl/im2col.hpp +21 -0
  600. data/ext/{ggml → sources/ggml}/src/ggml-sycl/mmq.cpp +60 -81
  601. data/ext/sources/ggml/src/ggml-sycl/mmq.hpp +33 -0
  602. data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +1065 -0
  603. data/ext/sources/ggml/src/ggml-sycl/mmvq.hpp +27 -0
  604. data/ext/sources/ggml/src/ggml-sycl/norm.cpp +482 -0
  605. data/ext/sources/ggml/src/ggml-sycl/norm.hpp +26 -0
  606. data/ext/{ggml → sources/ggml}/src/ggml-sycl/outprod.cpp +8 -17
  607. data/ext/sources/ggml/src/ggml-sycl/outprod.hpp +10 -0
  608. data/ext/sources/ggml/src/ggml-sycl/presets.hpp +74 -0
  609. data/ext/sources/ggml/src/ggml-sycl/quants.hpp +111 -0
  610. data/ext/sources/ggml/src/ggml-sycl/rope.cpp +472 -0
  611. data/ext/sources/ggml/src/ggml-sycl/rope.hpp +20 -0
  612. data/ext/{ggml → sources/ggml}/src/ggml-sycl/softmax.cpp +38 -28
  613. data/ext/sources/ggml/src/ggml-sycl/softmax.hpp +20 -0
  614. data/ext/sources/ggml/src/ggml-sycl/sycl_hw.cpp +15 -0
  615. data/ext/sources/ggml/src/ggml-sycl/sycl_hw.hpp +26 -0
  616. data/ext/{ggml → sources/ggml}/src/ggml-sycl/tsembd.cpp +6 -11
  617. data/ext/sources/ggml/src/ggml-sycl/tsembd.hpp +20 -0
  618. data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +1307 -0
  619. data/ext/sources/ggml/src/ggml-sycl/wkv.cpp +289 -0
  620. data/ext/sources/ggml/src/ggml-sycl/wkv.hpp +10 -0
  621. data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +200 -0
  622. data/ext/sources/ggml/src/ggml-vulkan/cmake/host-toolchain.cmake.in +15 -0
  623. data/ext/{ggml → sources/ggml}/src/ggml-vulkan/ggml-vulkan.cpp +3822 -1335
  624. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +31 -0
  625. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +29 -0
  626. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +29 -0
  627. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +51 -0
  628. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +69 -0
  629. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +17 -0
  630. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +41 -0
  631. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +49 -0
  632. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +105 -0
  633. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
  634. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +23 -0
  635. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +51 -0
  636. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +242 -0
  637. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +17 -0
  638. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +31 -0
  639. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +20 -0
  640. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp +462 -0
  641. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp +699 -0
  642. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_head.comp +13 -0
  643. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +42 -0
  644. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +35 -0
  645. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +44 -0
  646. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +43 -0
  647. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +48 -0
  648. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +39 -0
  649. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +49 -0
  650. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +32 -0
  651. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +34 -0
  652. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +34 -0
  653. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +42 -0
  654. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +30 -0
  655. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +32 -0
  656. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +68 -0
  657. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +34 -0
  658. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +35 -0
  659. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +70 -0
  660. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +33 -0
  661. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +31 -0
  662. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +34 -0
  663. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +27 -0
  664. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +337 -0
  665. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +162 -0
  666. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +360 -0
  667. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +267 -0
  668. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +59 -0
  669. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
  670. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +25 -0
  671. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +23 -0
  672. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +64 -0
  673. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_head.comp +9 -0
  674. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/generic_unary_head.comp +76 -0
  675. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +33 -0
  676. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +41 -0
  677. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +15 -0
  678. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
  679. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +66 -0
  680. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +100 -0
  681. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +41 -0
  682. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +22 -0
  683. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +27 -0
  684. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_split_k_reduce.comp +48 -0
  685. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +169 -0
  686. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp +118 -0
  687. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +82 -0
  688. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +79 -0
  689. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +90 -0
  690. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +87 -0
  691. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +87 -0
  692. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +90 -0
  693. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +88 -0
  694. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +118 -0
  695. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +154 -0
  696. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +130 -0
  697. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +132 -0
  698. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +136 -0
  699. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +167 -0
  700. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +130 -0
  701. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +868 -0
  702. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +441 -0
  703. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +442 -0
  704. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +99 -0
  705. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +44 -0
  706. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +42 -0
  707. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +28 -0
  708. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +74 -0
  709. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +77 -0
  710. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
  711. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +21 -0
  712. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +26 -0
  713. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +37 -0
  714. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +61 -0
  715. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +55 -0
  716. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +58 -0
  717. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +60 -0
  718. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +43 -0
  719. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +43 -0
  720. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +47 -0
  721. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +24 -0
  722. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +20 -0
  723. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +22 -0
  724. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +26 -0
  725. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +17 -0
  726. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +173 -0
  727. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +50 -0
  728. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +17 -0
  729. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +29 -0
  730. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +37 -0
  731. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
  732. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +20 -0
  733. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_bfloat16_support.comp +7 -0
  734. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat2_support.comp +7 -0
  735. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat_support.comp +7 -0
  736. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/test_integer_dot_support.comp +7 -0
  737. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +41 -0
  738. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/types.comp +1373 -0
  739. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +36 -0
  740. data/ext/{ggml → sources/ggml}/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +203 -36
  741. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/wkv6.comp +87 -0
  742. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/wkv7.comp +91 -0
  743. data/ext/{ggml → sources/ggml}/src/ggml.c +918 -1782
  744. data/ext/sources/ggml/src/ggml.cpp +26 -0
  745. data/ext/sources/ggml/src/gguf.cpp +1351 -0
  746. data/ext/{include → sources/include}/whisper.h +70 -2
  747. data/ext/sources/src/CMakeLists.txt +145 -0
  748. data/ext/sources/src/coreml/whisper-compat.h +10 -0
  749. data/ext/sources/src/coreml/whisper-compat.m +35 -0
  750. data/ext/{src → sources/src}/coreml/whisper-decoder-impl.h +27 -15
  751. data/ext/{src → sources/src}/coreml/whisper-decoder-impl.m +36 -10
  752. data/ext/{src → sources/src}/coreml/whisper-encoder-impl.h +21 -9
  753. data/ext/{src → sources/src}/coreml/whisper-encoder-impl.m +29 -3
  754. data/ext/sources/src/coreml/whisper-encoder.mm +73 -0
  755. data/ext/sources/src/whisper-arch.h +197 -0
  756. data/ext/{src → sources/src}/whisper.cpp +1966 -386
  757. data/ext/sources/tests/CMakeLists.txt +105 -0
  758. data/ext/sources/tests/earnings21/eval.mk +58 -0
  759. data/ext/sources/tests/earnings21/eval.py +68 -0
  760. data/ext/sources/tests/earnings21/normalizers/__init__.py +2 -0
  761. data/ext/sources/tests/earnings21/normalizers/basic.py +80 -0
  762. data/ext/sources/tests/earnings21/normalizers/english.json +1741 -0
  763. data/ext/sources/tests/earnings21/normalizers/english.py +550 -0
  764. data/ext/sources/tests/earnings21/requirements.txt +6 -0
  765. data/ext/sources/tests/en-0-ref.txt +1 -0
  766. data/ext/sources/tests/en-1-ref.txt +1 -0
  767. data/ext/sources/tests/en-2-ref.txt +1 -0
  768. data/ext/sources/tests/es-0-ref.txt +1 -0
  769. data/ext/sources/tests/librispeech/eval.mk +39 -0
  770. data/ext/sources/tests/librispeech/eval.py +47 -0
  771. data/ext/sources/tests/librispeech/normalizers/__init__.py +2 -0
  772. data/ext/sources/tests/librispeech/normalizers/basic.py +80 -0
  773. data/ext/sources/tests/librispeech/normalizers/english.json +1741 -0
  774. data/ext/sources/tests/librispeech/normalizers/english.py +550 -0
  775. data/ext/sources/tests/librispeech/requirements.txt +6 -0
  776. data/ext/sources/tests/run-tests.sh +130 -0
  777. data/ext/sources/tests/test-c.c +3 -0
  778. data/ext/sources/tests/test-vad-full.cpp +54 -0
  779. data/ext/sources/tests/test-vad.cpp +83 -0
  780. data/ext/sources/tests/test-whisper.js +58 -0
  781. data/extsources.rb +39 -5
  782. data/lib/whisper/context.rb +15 -0
  783. data/lib/whisper/model/uri.rb +202 -126
  784. data/lib/whisper/segment.rb +58 -0
  785. data/sig/whisper.rbs +510 -0
  786. data/test/helper.rb +24 -0
  787. data/{tests → test}/test_callback.rb +45 -3
  788. data/{tests → test}/test_error.rb +2 -2
  789. data/{tests → test}/test_model.rb +47 -0
  790. data/test/test_package.rb +51 -0
  791. data/test/test_params.rb +297 -0
  792. data/test/test_segment.rb +146 -0
  793. data/test/test_vad.rb +19 -0
  794. data/test/test_vad_params.rb +103 -0
  795. data/{tests → test}/test_whisper.rb +106 -36
  796. data/whispercpp.gemspec +5 -5
  797. metadata +837 -134
  798. data/ext/cpu.mk +0 -9
  799. data/ext/examples/dr_wav.h +0 -8815
  800. data/ext/ggml/src/ggml-cann/aclnn_ops.h +0 -592
  801. data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +0 -4262
  802. data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
  803. data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -10835
  804. data/ext/ggml/src/ggml-cpu/ggml-cpu.c +0 -14123
  805. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.cpp +0 -1884
  806. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.h +0 -14
  807. data/ext/ggml/src/ggml-metal/ggml-metal-impl.h +0 -288
  808. data/ext/ggml/src/ggml-sycl/convert.cpp +0 -547
  809. data/ext/ggml/src/ggml-sycl/element_wise.cpp +0 -1030
  810. data/ext/ggml/src/ggml-sycl/im2col.cpp +0 -126
  811. data/ext/ggml/src/ggml-sycl/mmvq.cpp +0 -1015
  812. data/ext/ggml/src/ggml-sycl/norm.cpp +0 -378
  813. data/ext/ggml/src/ggml-sycl/rope.cpp +0 -276
  814. data/ext/ggml/src/ggml-sycl/wkv6.cpp +0 -141
  815. data/ext/metal-embed.mk +0 -17
  816. data/ext/metal.mk +0 -6
  817. data/ext/ruby_whisper.cpp +0 -1909
  818. data/ext/scripts/get-flags.mk +0 -38
  819. data/lib/whisper.rb +0 -2
  820. data/tests/helper.rb +0 -7
  821. data/tests/test_package.rb +0 -31
  822. data/tests/test_params.rb +0 -160
  823. data/tests/test_segment.rb +0 -83
  824. /data/ext/{ggml → sources/ggml}/include/ggml-blas.h +0 -0
  825. /data/ext/{ggml → sources/ggml}/include/ggml-cann.h +0 -0
  826. /data/ext/{ggml → sources/ggml}/include/ggml-cuda.h +0 -0
  827. /data/ext/{ggml → sources/ggml}/include/ggml-kompute.h +0 -0
  828. /data/ext/{ggml → sources/ggml}/include/ggml-opencl.h +0 -0
  829. /data/ext/{ggml → sources/ggml}/include/ggml-sycl.h +0 -0
  830. /data/ext/{ggml → sources/ggml}/src/ggml-amx/common.h +0 -0
  831. /data/ext/{ggml → sources/ggml}/src/ggml-amx/ggml-amx.cpp +0 -0
  832. /data/ext/{ggml → sources/ggml}/src/ggml-amx/mmq.cpp +0 -0
  833. /data/ext/{ggml → sources/ggml}/src/ggml-amx/mmq.h +0 -0
  834. /data/ext/{ggml → sources/ggml}/src/ggml-blas/ggml-blas.cpp +0 -0
  835. /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/ascendc_kernels.h +0 -0
  836. /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/get_row_f16.cpp +0 -0
  837. /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/get_row_f32.cpp +0 -0
  838. /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -0
  839. /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -0
  840. /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -0
  841. /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -0
  842. /data/ext/{ggml → sources/ggml}/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -0
  843. /data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/amx.h +0 -0
  844. /data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/common.h +0 -0
  845. /data/ext/{ggml → sources/ggml}/src/ggml-cpu/amx/mmq.h +0 -0
  846. /data/ext/{ggml/src/ggml-cpu/ggml-cpu-hbm.h → sources/ggml/src/ggml-cpu/hbm.h} +0 -0
  847. /data/ext/{ggml/src/ggml-cpu/ggml-cpu-traits.h → sources/ggml/src/ggml-cpu/traits.h} +0 -0
  848. /data/ext/{ggml → sources/ggml}/src/ggml-kompute/ggml-kompute.cpp +0 -0
  849. /data/ext/{ggml → sources/ggml}/src/ggml-quants.h +0 -0
  850. /data/ext/{ggml → sources/ggml}/src/ggml-threading.cpp +0 -0
  851. /data/ext/{ggml → sources/ggml}/src/ggml-threading.h +0 -0
  852. /data/ext/{src → sources/src}/coreml/whisper-encoder.h +0 -0
  853. /data/ext/{src → sources/src}/openvino/whisper-openvino-encoder.cpp +0 -0
  854. /data/ext/{src → sources/src}/openvino/whisper-openvino-encoder.h +0 -0
  855. /data/{tests → test}/jfk_reader/.gitignore +0 -0
  856. /data/{tests → test}/jfk_reader/extconf.rb +0 -0
  857. /data/{tests → test}/jfk_reader/jfk_reader.c +0 -0
@@ -0,0 +1,1238 @@
1
+ #include "common.h"
2
+ #include "common-whisper.h"
3
+
4
+ #include "whisper.h"
5
+ #include "httplib.h"
6
+ #include "json.hpp"
7
+
8
+ #include <cfloat>
9
+ #include <chrono>
10
+ #include <cmath>
11
+ #include <cstdio>
12
+ #include <fstream>
13
+ #include <sstream>
14
+ #include <string>
15
+ #include <thread>
16
+ #include <vector>
17
+ #include <memory>
18
+ #include <csignal>
19
+ #include <atomic>
20
+ #include <functional>
21
+ #include <cstdlib>
22
+ #if defined (_WIN32)
23
+ #include <windows.h>
24
+ #endif
25
+
26
+ using namespace httplib;
27
+ using json = nlohmann::ordered_json;
28
+
29
+ enum server_state {
30
+ SERVER_STATE_LOADING_MODEL, // Server is starting up, model not fully loaded yet
31
+ SERVER_STATE_READY, // Server is ready and model is loaded
32
+ };
33
+
34
+ namespace {
35
+
36
+ // output formats
37
+ const std::string json_format = "json";
38
+ const std::string text_format = "text";
39
+ const std::string srt_format = "srt";
40
+ const std::string vjson_format = "verbose_json";
41
+ const std::string vtt_format = "vtt";
42
+
43
+ std::function<void(int)> shutdown_handler;
44
+ std::atomic_flag is_terminating = ATOMIC_FLAG_INIT;
45
+
46
+ inline void signal_handler(int signal) {
47
+ if (is_terminating.test_and_set()) {
48
+ // in case it hangs, we can force terminate the server by hitting Ctrl+C twice
49
+ // this is for better developer experience, we can remove when the server is stable enough
50
+ fprintf(stderr, "Received second interrupt, terminating immediately.\n");
51
+ exit(1);
52
+ }
53
+
54
+ shutdown_handler(signal);
55
+ }
56
+
57
+ struct server_params
58
+ {
59
+ std::string hostname = "127.0.0.1";
60
+ std::string public_path = "examples/server/public";
61
+ std::string request_path = "";
62
+ std::string inference_path = "/inference";
63
+
64
+ int32_t port = 8080;
65
+ int32_t read_timeout = 600;
66
+ int32_t write_timeout = 600;
67
+
68
+ bool ffmpeg_converter = false;
69
+ };
70
+
71
+ struct whisper_params {
72
+ int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
73
+ int32_t n_processors = 1;
74
+ int32_t offset_t_ms = 0;
75
+ int32_t offset_n = 0;
76
+ int32_t duration_ms = 0;
77
+ int32_t progress_step = 5;
78
+ int32_t max_context = -1;
79
+ int32_t max_len = 0;
80
+ int32_t best_of = 2;
81
+ int32_t beam_size = -1;
82
+ int32_t audio_ctx = 0;
83
+
84
+ float word_thold = 0.01f;
85
+ float entropy_thold = 2.40f;
86
+ float logprob_thold = -1.00f;
87
+ float temperature = 0.00f;
88
+ float temperature_inc = 0.20f;
89
+ float no_speech_thold = 0.6f;
90
+
91
+ bool debug_mode = false;
92
+ bool translate = false;
93
+ bool detect_language = false;
94
+ bool diarize = false;
95
+ bool tinydiarize = false;
96
+ bool split_on_word = false;
97
+ bool no_fallback = false;
98
+ bool print_special = false;
99
+ bool print_colors = false;
100
+ bool print_realtime = false;
101
+ bool print_progress = false;
102
+ bool no_timestamps = false;
103
+ bool use_gpu = true;
104
+ bool flash_attn = false;
105
+ bool suppress_nst = false;
106
+ bool no_context = false;
107
+
108
+ std::string language = "en";
109
+ std::string prompt = "";
110
+ std::string font_path = "/System/Library/Fonts/Supplemental/Courier New Bold.ttf";
111
+ std::string model = "models/ggml-base.en.bin";
112
+
113
+ std::string response_format = json_format;
114
+
115
+ // [TDRZ] speaker turn string
116
+ std::string tdrz_speaker_turn = " [SPEAKER_TURN]"; // TODO: set from command line
117
+
118
+ std::string openvino_encode_device = "CPU";
119
+
120
+ std::string dtw = "";
121
+
122
+ // Voice Activity Detection (VAD) parameters
123
+ bool vad = false;
124
+ std::string vad_model = "";
125
+ float vad_threshold = 0.5f;
126
+ int vad_min_speech_duration_ms = 250;
127
+ int vad_min_silence_duration_ms = 100;
128
+ float vad_max_speech_duration_s = FLT_MAX;
129
+ int vad_speech_pad_ms = 30;
130
+ float vad_samples_overlap = 0.1f;
131
+ };
132
+
133
+ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params, const server_params& sparams) {
134
+ fprintf(stderr, "\n");
135
+ fprintf(stderr, "usage: %s [options] \n", argv[0]);
136
+ fprintf(stderr, "\n");
137
+ fprintf(stderr, "options:\n");
138
+ fprintf(stderr, " -h, --help [default] show this help message and exit\n");
139
+ fprintf(stderr, " -t N, --threads N [%-7d] number of threads to use during computation\n", params.n_threads);
140
+ fprintf(stderr, " -p N, --processors N [%-7d] number of processors to use during computation\n", params.n_processors);
141
+ fprintf(stderr, " -ot N, --offset-t N [%-7d] time offset in milliseconds\n", params.offset_t_ms);
142
+ fprintf(stderr, " -on N, --offset-n N [%-7d] segment index offset\n", params.offset_n);
143
+ fprintf(stderr, " -d N, --duration N [%-7d] duration of audio to process in milliseconds\n", params.duration_ms);
144
+ fprintf(stderr, " -mc N, --max-context N [%-7d] maximum number of text context tokens to store\n", params.max_context);
145
+ fprintf(stderr, " -ml N, --max-len N [%-7d] maximum segment length in characters\n", params.max_len);
146
+ fprintf(stderr, " -sow, --split-on-word [%-7s] split on word rather than on token\n", params.split_on_word ? "true" : "false");
147
+ fprintf(stderr, " -bo N, --best-of N [%-7d] number of best candidates to keep\n", params.best_of);
148
+ fprintf(stderr, " -bs N, --beam-size N [%-7d] beam size for beam search\n", params.beam_size);
149
+ fprintf(stderr, " -ac N, --audio-ctx N [%-7d] audio context size (0 - all)\n", params.audio_ctx);
150
+ fprintf(stderr, " -wt N, --word-thold N [%-7.2f] word timestamp probability threshold\n", params.word_thold);
151
+ fprintf(stderr, " -et N, --entropy-thold N [%-7.2f] entropy threshold for decoder fail\n", params.entropy_thold);
152
+ fprintf(stderr, " -lpt N, --logprob-thold N [%-7.2f] log probability threshold for decoder fail\n", params.logprob_thold);
153
+ fprintf(stderr, " -debug, --debug-mode [%-7s] enable debug mode (eg. dump log_mel)\n", params.debug_mode ? "true" : "false");
154
+ fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false");
155
+ fprintf(stderr, " -di, --diarize [%-7s] stereo audio diarization\n", params.diarize ? "true" : "false");
156
+ fprintf(stderr, " -tdrz, --tinydiarize [%-7s] enable tinydiarize (requires a tdrz model)\n", params.tinydiarize ? "true" : "false");
157
+ fprintf(stderr, " -nf, --no-fallback [%-7s] do not use temperature fallback while decoding\n", params.no_fallback ? "true" : "false");
158
+ fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
159
+ fprintf(stderr, " -pc, --print-colors [%-7s] print colors\n", params.print_colors ? "true" : "false");
160
+ fprintf(stderr, " -pr, --print-realtime [%-7s] print output in realtime\n", params.print_realtime ? "true" : "false");
161
+ fprintf(stderr, " -pp, --print-progress [%-7s] print progress\n", params.print_progress ? "true" : "false");
162
+ fprintf(stderr, " -nt, --no-timestamps [%-7s] do not print timestamps\n", params.no_timestamps ? "true" : "false");
163
+ fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language ('auto' for auto-detect)\n", params.language.c_str());
164
+ fprintf(stderr, " -dl, --detect-language [%-7s] exit after automatically detecting language\n", params.detect_language ? "true" : "false");
165
+ fprintf(stderr, " --prompt PROMPT [%-7s] initial prompt\n", params.prompt.c_str());
166
+ fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
167
+ fprintf(stderr, " -oved D, --ov-e-device DNAME [%-7s] the OpenVINO device used for encode inference\n", params.openvino_encode_device.c_str());
168
+ // server params
169
+ fprintf(stderr, " -dtw MODEL --dtw MODEL [%-7s] compute token-level timestamps\n", params.dtw.c_str());
170
+ fprintf(stderr, " --host HOST, [%-7s] Hostname/ip-adress for the server\n", sparams.hostname.c_str());
171
+ fprintf(stderr, " --port PORT, [%-7d] Port number for the server\n", sparams.port);
172
+ fprintf(stderr, " --public PATH, [%-7s] Path to the public folder\n", sparams.public_path.c_str());
173
+ fprintf(stderr, " --request-path PATH, [%-7s] Request path for all requests\n", sparams.request_path.c_str());
174
+ fprintf(stderr, " --inference-path PATH, [%-7s] Inference path for all requests\n", sparams.inference_path.c_str());
175
+ fprintf(stderr, " --convert, [%-7s] Convert audio to WAV, requires ffmpeg on the server\n", sparams.ffmpeg_converter ? "true" : "false");
176
+ fprintf(stderr, " -sns, --suppress-nst [%-7s] suppress non-speech tokens\n", params.suppress_nst ? "true" : "false");
177
+ fprintf(stderr, " -nth N, --no-speech-thold N [%-7.2f] no speech threshold\n", params.no_speech_thold);
178
+ fprintf(stderr, " -nc, --no-context [%-7s] do not use previous audio context\n", params.no_context ? "true" : "false");
179
+ fprintf(stderr, " -ng, --no-gpu [%-7s] do not use gpu\n", params.use_gpu ? "false" : "true");
180
+ fprintf(stderr, " -fa, --flash-attn [%-7s] flash attention\n", params.flash_attn ? "true" : "false");
181
+ // Voice Activity Detection (VAD) parameters
182
+ fprintf(stderr, "\nVoice Activity Detection (VAD) options:\n");
183
+ fprintf(stderr, " --vad [%-7s] enable Voice Activity Detection (VAD)\n", params.vad ? "true" : "false");
184
+ fprintf(stderr, " -vm FNAME, --vad-model FNAME [%-7s] VAD model path\n", params.vad_model.c_str());
185
+ fprintf(stderr, " -vt N, --vad-threshold N [%-7.2f] VAD threshold for speech recognition\n", params.vad_threshold);
186
+ fprintf(stderr, " -vspd N, --vad-min-speech-duration-ms N [%-7d] VAD min speech duration (0.0-1.0)\n", params.vad_min_speech_duration_ms);
187
+ fprintf(stderr, " -vsd N, --vad-min-silence-duration-ms N [%-7d] VAD min silence duration (to split segments)\n", params.vad_min_silence_duration_ms);
188
+ fprintf(stderr, " -vmsd N, --vad-max-speech-duration-s N [%-7s] VAD max speech duration (auto-split longer)\n", params.vad_max_speech_duration_s == FLT_MAX ?
189
+ std::string("FLT_MAX").c_str() :
190
+ std::to_string(params.vad_max_speech_duration_s).c_str());
191
+ fprintf(stderr, " -vp N, --vad-speech-pad-ms N [%-7d] VAD speech padding (extend segments)\n", params.vad_speech_pad_ms);
192
+ fprintf(stderr, " -vo N, --vad-samples-overlap N [%-7.2f] VAD samples overlap (seconds between segments)\n", params.vad_samples_overlap);
193
+ fprintf(stderr, "\n");
194
+ }
195
+
196
+ bool whisper_params_parse(int argc, char ** argv, whisper_params & params, server_params & sparams) {
197
+ for (int i = 1; i < argc; i++) {
198
+ std::string arg = argv[i];
199
+
200
+ if (arg == "-h" || arg == "--help") {
201
+ whisper_print_usage(argc, argv, params, sparams);
202
+ exit(0);
203
+ }
204
+ else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); }
205
+ else if (arg == "-p" || arg == "--processors") { params.n_processors = std::stoi(argv[++i]); }
206
+ else if (arg == "-ot" || arg == "--offset-t") { params.offset_t_ms = std::stoi(argv[++i]); }
207
+ else if (arg == "-on" || arg == "--offset-n") { params.offset_n = std::stoi(argv[++i]); }
208
+ else if (arg == "-d" || arg == "--duration") { params.duration_ms = std::stoi(argv[++i]); }
209
+ else if (arg == "-mc" || arg == "--max-context") { params.max_context = std::stoi(argv[++i]); }
210
+ else if (arg == "-ml" || arg == "--max-len") { params.max_len = std::stoi(argv[++i]); }
211
+ else if (arg == "-bo" || arg == "--best-of") { params.best_of = std::stoi(argv[++i]); }
212
+ else if (arg == "-bs" || arg == "--beam-size") { params.beam_size = std::stoi(argv[++i]); }
213
+ else if (arg == "-ac" || arg == "--audio-ctx") { params.audio_ctx = std::stoi(argv[++i]); }
214
+ else if (arg == "-wt" || arg == "--word-thold") { params.word_thold = std::stof(argv[++i]); }
215
+ else if (arg == "-et" || arg == "--entropy-thold") { params.entropy_thold = std::stof(argv[++i]); }
216
+ else if (arg == "-lpt" || arg == "--logprob-thold") { params.logprob_thold = std::stof(argv[++i]); }
217
+ else if (arg == "-debug"|| arg == "--debug-mode") { params.debug_mode = true; }
218
+ else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
219
+ else if (arg == "-di" || arg == "--diarize") { params.diarize = true; }
220
+ else if (arg == "-tdrz" || arg == "--tinydiarize") { params.tinydiarize = true; }
221
+ else if (arg == "-sow" || arg == "--split-on-word") { params.split_on_word = true; }
222
+ else if (arg == "-nf" || arg == "--no-fallback") { params.no_fallback = true; }
223
+ else if (arg == "-fp" || arg == "--font-path") { params.font_path = argv[++i]; }
224
+ else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
225
+ else if (arg == "-pc" || arg == "--print-colors") { params.print_colors = true; }
226
+ else if (arg == "-pr" || arg == "--print-realtime") { params.print_realtime = true; }
227
+ else if (arg == "-pp" || arg == "--print-progress") { params.print_progress = true; }
228
+ else if (arg == "-nt" || arg == "--no-timestamps") { params.no_timestamps = true; }
229
+ else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
230
+ else if (arg == "-dl" || arg == "--detect-language") { params.detect_language = true; }
231
+ else if ( arg == "--prompt") { params.prompt = argv[++i]; }
232
+ else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
233
+ else if (arg == "-oved" || arg == "--ov-e-device") { params.openvino_encode_device = argv[++i]; }
234
+ else if (arg == "-dtw" || arg == "--dtw") { params.dtw = argv[++i]; }
235
+ else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
236
+ else if (arg == "-fa" || arg == "--flash-attn") { params.flash_attn = true; }
237
+ else if (arg == "-sns" || arg == "--suppress-nst") { params.suppress_nst = true; }
238
+ else if (arg == "-nth" || arg == "--no-speech-thold") { params.no_speech_thold = std::stof(argv[++i]); }
239
+ else if (arg == "-nc" || arg == "--no-context") { params.no_context = true; }
240
+
241
+ // server params
242
+ else if ( arg == "--port") { sparams.port = std::stoi(argv[++i]); }
243
+ else if ( arg == "--host") { sparams.hostname = argv[++i]; }
244
+ else if ( arg == "--public") { sparams.public_path = argv[++i]; }
245
+ else if ( arg == "--request-path") { sparams.request_path = argv[++i]; }
246
+ else if ( arg == "--inference-path") { sparams.inference_path = argv[++i]; }
247
+ else if ( arg == "--convert") { sparams.ffmpeg_converter = true; }
248
+
249
+ // Voice Activity Detection (VAD)
250
+ else if ( arg == "--vad") { params.vad = true; }
251
+ else if (arg == "-vm" || arg == "--vad-model") { params.vad_model = argv[++i]; }
252
+ else if (arg == "-vt" || arg == "--vad-threshold") { params.vad_threshold = std::stof(argv[++i]); }
253
+ else if (arg == "-vspd" || arg == "--vad-min-speech-duration-ms") { params.vad_min_speech_duration_ms = std::stoi(argv[++i]); }
254
+ else if (arg == "-vsd" || arg == "--vad-min-silence-duration-ms") { params.vad_min_speech_duration_ms = std::stoi(argv[++i]); }
255
+ else if (arg == "-vmsd" || arg == "--vad-max-speech-duration-s") { params.vad_max_speech_duration_s = std::stof(argv[++i]); }
256
+ else if (arg == "-vp" || arg == "--vad-speech-pad-ms") { params.vad_speech_pad_ms = std::stoi(argv[++i]); }
257
+ else if (arg == "-vo" || arg == "--vad-samples-overlap") { params.vad_samples_overlap = std::stof(argv[++i]); }
258
+ else {
259
+ fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
260
+ whisper_print_usage(argc, argv, params, sparams);
261
+ exit(0);
262
+ }
263
+ }
264
+
265
+ return true;
266
+ }
267
+
268
+ struct whisper_print_user_data {
269
+ const whisper_params * params;
270
+
271
+ const std::vector<std::vector<float>> * pcmf32s;
272
+ int progress_prev;
273
+ };
274
+
275
+ void check_ffmpeg_availibility() {
276
+ int result = system("ffmpeg -version");
277
+
278
+ if (result == 0) {
279
+ std::cout << "ffmpeg is available." << std::endl;
280
+ } else {
281
+ // ffmpeg is not available
282
+ std::cout << "ffmpeg is not found. Please ensure that ffmpeg is installed ";
283
+ std::cout << "and that its executable is included in your system's PATH. ";
284
+ exit(0);
285
+ }
286
+ }
287
+
288
+ std::string generate_temp_filename(const std::string &prefix, const std::string &extension) {
289
+ auto now = std::chrono::system_clock::now();
290
+ auto now_time_t = std::chrono::system_clock::to_time_t(now);
291
+
292
+ static std::mt19937 rng{std::random_device{}()};
293
+ std::uniform_int_distribution<long long> dist(0, 1e9);
294
+
295
+ std::stringstream ss;
296
+ ss << prefix
297
+ << "-"
298
+ << std::put_time(std::localtime(&now_time_t), "%Y%m%d-%H%M%S")
299
+ << "-"
300
+ << dist(rng)
301
+ << extension;
302
+
303
+ return ss.str();
304
+ }
305
+
306
+ bool convert_to_wav(const std::string & temp_filename, std::string & error_resp) {
307
+ std::ostringstream cmd_stream;
308
+ std::string converted_filename_temp = temp_filename + "_temp.wav";
309
+ cmd_stream << "ffmpeg -i \"" << temp_filename << "\" -y -ar 16000 -ac 1 -c:a pcm_s16le \"" << converted_filename_temp << "\" 2>&1";
310
+ std::string cmd = cmd_stream.str();
311
+
312
+ int status = std::system(cmd.c_str());
313
+ if (status != 0) {
314
+ error_resp = "{\"error\":\"FFmpeg conversion failed.\"}";
315
+ return false;
316
+ }
317
+
318
+ // Remove the original file
319
+ if (remove(temp_filename.c_str()) != 0) {
320
+ error_resp = "{\"error\":\"Failed to remove the original file.\"}";
321
+ return false;
322
+ }
323
+
324
+ // Rename the temporary file to match the original filename
325
+ if (rename(converted_filename_temp.c_str(), temp_filename.c_str()) != 0) {
326
+ error_resp = "{\"error\":\"Failed to rename the temporary file.\"}";
327
+ return false;
328
+ }
329
+ return true;
330
+ }
331
+
332
+ std::string estimate_diarization_speaker(std::vector<std::vector<float>> pcmf32s, int64_t t0, int64_t t1, bool id_only = false) {
333
+ std::string speaker = "";
334
+ const int64_t n_samples = pcmf32s[0].size();
335
+
336
+ const int64_t is0 = timestamp_to_sample(t0, n_samples, WHISPER_SAMPLE_RATE);
337
+ const int64_t is1 = timestamp_to_sample(t1, n_samples, WHISPER_SAMPLE_RATE);
338
+
339
+ double energy0 = 0.0f;
340
+ double energy1 = 0.0f;
341
+
342
+ for (int64_t j = is0; j < is1; j++) {
343
+ energy0 += fabs(pcmf32s[0][j]);
344
+ energy1 += fabs(pcmf32s[1][j]);
345
+ }
346
+
347
+ if (energy0 > 1.1*energy1) {
348
+ speaker = "0";
349
+ } else if (energy1 > 1.1*energy0) {
350
+ speaker = "1";
351
+ } else {
352
+ speaker = "?";
353
+ }
354
+
355
+ //printf("is0 = %lld, is1 = %lld, energy0 = %f, energy1 = %f, speaker = %s\n", is0, is1, energy0, energy1, speaker.c_str());
356
+
357
+ if (!id_only) {
358
+ speaker.insert(0, "(speaker ");
359
+ speaker.append(")");
360
+ }
361
+
362
+ return speaker;
363
+ }
364
+
365
+ void whisper_print_progress_callback(struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, int progress, void * user_data) {
366
+ int progress_step = ((whisper_print_user_data *) user_data)->params->progress_step;
367
+ int * progress_prev = &(((whisper_print_user_data *) user_data)->progress_prev);
368
+ if (progress >= *progress_prev + progress_step) {
369
+ *progress_prev += progress_step;
370
+ fprintf(stderr, "%s: progress = %3d%%\n", __func__, progress);
371
+ }
372
+ }
373
+
374
+ void whisper_print_segment_callback(struct whisper_context * ctx, struct whisper_state * /*state*/, int n_new, void * user_data) {
375
+ const auto & params = *((whisper_print_user_data *) user_data)->params;
376
+ const auto & pcmf32s = *((whisper_print_user_data *) user_data)->pcmf32s;
377
+
378
+ const int n_segments = whisper_full_n_segments(ctx);
379
+
380
+ std::string speaker = "";
381
+
382
+ int64_t t0 = 0;
383
+ int64_t t1 = 0;
384
+
385
+ // print the last n_new segments
386
+ const int s0 = n_segments - n_new;
387
+
388
+ if (s0 == 0) {
389
+ printf("\n");
390
+ }
391
+
392
+ for (int i = s0; i < n_segments; i++) {
393
+ if (!params.no_timestamps || params.diarize) {
394
+ t0 = whisper_full_get_segment_t0(ctx, i);
395
+ t1 = whisper_full_get_segment_t1(ctx, i);
396
+ }
397
+
398
+ if (!params.no_timestamps) {
399
+ printf("[%s --> %s] ", to_timestamp(t0).c_str(), to_timestamp(t1).c_str());
400
+ }
401
+
402
+ if (params.diarize && pcmf32s.size() == 2) {
403
+ speaker = estimate_diarization_speaker(pcmf32s, t0, t1);
404
+ }
405
+
406
+ if (params.print_colors) {
407
+ for (int j = 0; j < whisper_full_n_tokens(ctx, i); ++j) {
408
+ if (params.print_special == false) {
409
+ const whisper_token id = whisper_full_get_token_id(ctx, i, j);
410
+ if (id >= whisper_token_eot(ctx)) {
411
+ continue;
412
+ }
413
+ }
414
+
415
+ const char * text = whisper_full_get_token_text(ctx, i, j);
416
+ const float p = whisper_full_get_token_p (ctx, i, j);
417
+
418
+ const int col = std::max(0, std::min((int) k_colors.size() - 1, (int) (std::pow(p, 3)*float(k_colors.size()))));
419
+
420
+ printf("%s%s%s%s", speaker.c_str(), k_colors[col].c_str(), text, "\033[0m");
421
+ }
422
+ } else {
423
+ const char * text = whisper_full_get_segment_text(ctx, i);
424
+
425
+ printf("%s%s", speaker.c_str(), text);
426
+ }
427
+
428
+ if (params.tinydiarize) {
429
+ if (whisper_full_get_segment_speaker_turn_next(ctx, i)) {
430
+ printf("%s", params.tdrz_speaker_turn.c_str());
431
+ }
432
+ }
433
+
434
+ // with timestamps or speakers: each segment on new line
435
+ if (!params.no_timestamps || params.diarize) {
436
+ printf("\n");
437
+ }
438
+ fflush(stdout);
439
+ }
440
+ }
441
+
442
+ std::string output_str(struct whisper_context * ctx, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
443
+ std::stringstream result;
444
+ const int n_segments = whisper_full_n_segments(ctx);
445
+ for (int i = 0; i < n_segments; ++i) {
446
+ const char * text = whisper_full_get_segment_text(ctx, i);
447
+ std::string speaker = "";
448
+
449
+ if (params.diarize && pcmf32s.size() == 2)
450
+ {
451
+ const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
452
+ const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
453
+ speaker = estimate_diarization_speaker(pcmf32s, t0, t1);
454
+ }
455
+
456
+ result << speaker << text << "\n";
457
+ }
458
+ return result.str();
459
+ }
460
+
461
+ bool parse_str_to_bool(const std::string & s) {
462
+ if (s == "true" || s == "1" || s == "yes" || s == "y") {
463
+ return true;
464
+ }
465
+ return false;
466
+ }
467
+
468
+ void get_req_parameters(const Request & req, whisper_params & params)
469
+ {
470
+ if (req.has_file("offset_t"))
471
+ {
472
+ params.offset_t_ms = std::stoi(req.get_file_value("offset_t").content);
473
+ }
474
+ if (req.has_file("offset_n"))
475
+ {
476
+ params.offset_n = std::stoi(req.get_file_value("offset_n").content);
477
+ }
478
+ if (req.has_file("duration"))
479
+ {
480
+ params.duration_ms = std::stoi(req.get_file_value("duration").content);
481
+ }
482
+ if (req.has_file("max_context"))
483
+ {
484
+ params.max_context = std::stoi(req.get_file_value("max_context").content);
485
+ }
486
+ if (req.has_file("max_len"))
487
+ {
488
+ params.max_len = std::stoi(req.get_file_value("max_len").content);
489
+ }
490
+ if (req.has_file("best_of"))
491
+ {
492
+ params.best_of = std::stoi(req.get_file_value("best_of").content);
493
+ }
494
+ if (req.has_file("beam_size"))
495
+ {
496
+ params.beam_size = std::stoi(req.get_file_value("beam_size").content);
497
+ }
498
+ if (req.has_file("audio_ctx"))
499
+ {
500
+ params.audio_ctx = std::stof(req.get_file_value("audio_ctx").content);
501
+ }
502
+ if (req.has_file("word_thold"))
503
+ {
504
+ params.word_thold = std::stof(req.get_file_value("word_thold").content);
505
+ }
506
+ if (req.has_file("entropy_thold"))
507
+ {
508
+ params.entropy_thold = std::stof(req.get_file_value("entropy_thold").content);
509
+ }
510
+ if (req.has_file("logprob_thold"))
511
+ {
512
+ params.logprob_thold = std::stof(req.get_file_value("logprob_thold").content);
513
+ }
514
+ if (req.has_file("debug_mode"))
515
+ {
516
+ params.debug_mode = parse_str_to_bool(req.get_file_value("debug_mode").content);
517
+ }
518
+ if (req.has_file("translate"))
519
+ {
520
+ params.translate = parse_str_to_bool(req.get_file_value("translate").content);
521
+ }
522
+ if (req.has_file("diarize"))
523
+ {
524
+ params.diarize = parse_str_to_bool(req.get_file_value("diarize").content);
525
+ }
526
+ if (req.has_file("tinydiarize"))
527
+ {
528
+ params.tinydiarize = parse_str_to_bool(req.get_file_value("tinydiarize").content);
529
+ }
530
+ if (req.has_file("split_on_word"))
531
+ {
532
+ params.split_on_word = parse_str_to_bool(req.get_file_value("split_on_word").content);
533
+ }
534
+ if (req.has_file("no_timestamps"))
535
+ {
536
+ params.no_timestamps = parse_str_to_bool(req.get_file_value("no_timestamps").content);
537
+ }
538
+ if (req.has_file("language"))
539
+ {
540
+ params.language = req.get_file_value("language").content;
541
+ }
542
+ if (req.has_file("detect_language"))
543
+ {
544
+ params.detect_language = parse_str_to_bool(req.get_file_value("detect_language").content);
545
+ }
546
+ if (req.has_file("prompt"))
547
+ {
548
+ params.prompt = req.get_file_value("prompt").content;
549
+ }
550
+ if (req.has_file("response_format"))
551
+ {
552
+ params.response_format = req.get_file_value("response_format").content;
553
+ }
554
+ if (req.has_file("temperature"))
555
+ {
556
+ params.temperature = std::stof(req.get_file_value("temperature").content);
557
+ }
558
+ if (req.has_file("temperature_inc"))
559
+ {
560
+ params.temperature_inc = std::stof(req.get_file_value("temperature_inc").content);
561
+ }
562
+ if (req.has_file("suppress_non_speech"))
563
+ {
564
+ params.suppress_nst = parse_str_to_bool(req.get_file_value("suppress_non_speech").content);
565
+ }
566
+ if (req.has_file("suppress_nst"))
567
+ {
568
+ params.suppress_nst = parse_str_to_bool(req.get_file_value("suppress_nst").content);
569
+ }
570
+ if (req.has_file("no_context"))
571
+ {
572
+ params.no_context = parse_str_to_bool(req.get_file_value("no_context").content);
573
+ }
574
+ if (req.has_file("vad"))
575
+ {
576
+ params.vad = parse_str_to_bool(req.get_file_value("vad").content);
577
+ }
578
+ if (req.has_file("vad_threshold"))
579
+ {
580
+ params.vad_threshold = std::stof(req.get_file_value("vad_threshold").content);
581
+ }
582
+ if (req.has_file("vad_min_speech_duration_ms"))
583
+ {
584
+ params.vad_min_speech_duration_ms = std::stof(req.get_file_value("vad_min_speech_duration_ms").content);
585
+ }
586
+ if (req.has_file("vad_min_silence_duration_ms"))
587
+ {
588
+ params.vad_min_silence_duration_ms = std::stof(req.get_file_value("vad_min_silence_duration_ms").content);
589
+ }
590
+ if (req.has_file("vad_max_speech_duration_s"))
591
+ {
592
+ params.vad_max_speech_duration_s = std::stof(req.get_file_value("vad_max_speech_duration_s").content);
593
+ }
594
+ if (req.has_file("vad_speech_pad_ms"))
595
+ {
596
+ params.vad_speech_pad_ms = std::stoi(req.get_file_value("vad_speech_pad_ms").content);
597
+ }
598
+ if (req.has_file("vad_samples_overlap"))
599
+ {
600
+ params.vad_samples_overlap = std::stof(req.get_file_value("vad_samples_overlap").content);
601
+ }
602
+ }
603
+
604
+ } // namespace
605
+
606
+ int main(int argc, char ** argv) {
607
+ ggml_backend_load_all();
608
+
609
+ whisper_params params;
610
+ server_params sparams;
611
+
612
+ std::mutex whisper_mutex;
613
+
614
+ if (whisper_params_parse(argc, argv, params, sparams) == false) {
615
+ whisper_print_usage(argc, argv, params, sparams);
616
+ return 1;
617
+ }
618
+
619
+ if (params.language != "auto" && whisper_lang_id(params.language.c_str()) == -1) {
620
+ fprintf(stderr, "error: unknown language '%s'\n", params.language.c_str());
621
+ whisper_print_usage(argc, argv, params, sparams);
622
+ exit(0);
623
+ }
624
+
625
+ if (params.diarize && params.tinydiarize) {
626
+ fprintf(stderr, "error: cannot use both --diarize and --tinydiarize\n");
627
+ whisper_print_usage(argc, argv, params, sparams);
628
+ exit(0);
629
+ }
630
+
631
+ if (sparams.ffmpeg_converter) {
632
+ check_ffmpeg_availibility();
633
+ }
634
+ // whisper init
635
+ struct whisper_context_params cparams = whisper_context_default_params();
636
+
637
+ cparams.use_gpu = params.use_gpu;
638
+ cparams.flash_attn = params.flash_attn;
639
+
640
+ if (!params.dtw.empty()) {
641
+ cparams.dtw_token_timestamps = true;
642
+ cparams.dtw_aheads_preset = WHISPER_AHEADS_NONE;
643
+
644
+ if (params.dtw == "tiny") {
645
+ cparams.dtw_aheads_preset = WHISPER_AHEADS_TINY;
646
+ }
647
+ if (params.dtw == "tiny.en") {
648
+ cparams.dtw_aheads_preset = WHISPER_AHEADS_TINY_EN;
649
+ }
650
+ if (params.dtw == "base") {
651
+ cparams.dtw_aheads_preset = WHISPER_AHEADS_BASE;
652
+ }
653
+ if (params.dtw == "base.en") {
654
+ cparams.dtw_aheads_preset = WHISPER_AHEADS_BASE_EN;
655
+ }
656
+ if (params.dtw == "small") {
657
+ cparams.dtw_aheads_preset = WHISPER_AHEADS_SMALL;
658
+ }
659
+ if (params.dtw == "small.en") {
660
+ cparams.dtw_aheads_preset = WHISPER_AHEADS_SMALL_EN;
661
+ }
662
+ if (params.dtw == "medium") {
663
+ cparams.dtw_aheads_preset = WHISPER_AHEADS_MEDIUM;
664
+ }
665
+ if (params.dtw == "medium.en") {
666
+ cparams.dtw_aheads_preset = WHISPER_AHEADS_MEDIUM_EN;
667
+ }
668
+ if (params.dtw == "large.v1") {
669
+ cparams.dtw_aheads_preset = WHISPER_AHEADS_LARGE_V1;
670
+ }
671
+ if (params.dtw == "large.v2") {
672
+ cparams.dtw_aheads_preset = WHISPER_AHEADS_LARGE_V2;
673
+ }
674
+ if (params.dtw == "large.v3") {
675
+ cparams.dtw_aheads_preset = WHISPER_AHEADS_LARGE_V3;
676
+ }
677
+ if (params.dtw == "large.v3.turbo") {
678
+ cparams.dtw_aheads_preset = WHISPER_AHEADS_LARGE_V3_TURBO;
679
+ }
680
+
681
+ if (cparams.dtw_aheads_preset == WHISPER_AHEADS_NONE) {
682
+ fprintf(stderr, "error: unknown DTW preset '%s'\n", params.dtw.c_str());
683
+ return 3;
684
+ }
685
+ }
686
+
687
+ std::unique_ptr<httplib::Server> svr = std::make_unique<httplib::Server>();
688
+ std::atomic<server_state> state{SERVER_STATE_LOADING_MODEL};
689
+
690
+ struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
691
+
692
+ if (ctx == nullptr) {
693
+ fprintf(stderr, "error: failed to initialize whisper context\n");
694
+ return 3;
695
+ }
696
+
697
+ // initialize openvino encoder. this has no effect on whisper.cpp builds that don't have OpenVINO configured
698
+ whisper_ctx_init_openvino_encoder(ctx, nullptr, params.openvino_encode_device.c_str(), nullptr);
699
+ state.store(SERVER_STATE_READY);
700
+
701
+
702
+ svr->set_default_headers({{"Server", "whisper.cpp"},
703
+ {"Access-Control-Allow-Origin", "*"},
704
+ {"Access-Control-Allow-Headers", "content-type, authorization"}});
705
+
706
+ std::string const default_content = R"(
707
+ <html>
708
+ <head>
709
+ <title>Whisper.cpp Server</title>
710
+ <meta charset="utf-8">
711
+ <meta name="viewport" content="width=device-width">
712
+ <style>
713
+ body {
714
+ font-family: sans-serif;
715
+ }
716
+ form {
717
+ display: flex;
718
+ flex-direction: column;
719
+ align-items: flex-start;
720
+ }
721
+ label {
722
+ margin-bottom: 0.5rem;
723
+ }
724
+ input, select {
725
+ margin-bottom: 1rem;
726
+ }
727
+ button {
728
+ margin-top: 1rem;
729
+ }
730
+ </style>
731
+ </head>
732
+ <body>
733
+ <h1>Whisper.cpp Server</h1>
734
+
735
+ <h2>/inference</h2>
736
+ <pre>
737
+ curl 127.0.0.1:)" + std::to_string(sparams.port) + R"(/inference \
738
+ -H "Content-Type: multipart/form-data" \
739
+ -F file="@&lt;file-path&gt;" \
740
+ -F temperature="0.0" \
741
+ -F temperature_inc="0.2" \
742
+ -F response_format="json"
743
+ </pre>
744
+
745
+ <h2>/load</h2>
746
+ <pre>
747
+ curl 127.0.0.1:)" + std::to_string(sparams.port) + R"(/load \
748
+ -H "Content-Type: multipart/form-data" \
749
+ -F model="&lt;path-to-model-file&gt;"
750
+ </pre>
751
+
752
+ <div>
753
+ <h2>Try it out</h2>
754
+ <form action="/inference" method="POST" enctype="multipart/form-data">
755
+ <label for="file">Choose an audio file:</label>
756
+ <input type="file" id="file" name="file" accept="audio/*" required><br>
757
+
758
+ <label for="temperature">Temperature:</label>
759
+ <input type="number" id="temperature" name="temperature" value="0.0" step="0.01" placeholder="e.g., 0.0"><br>
760
+
761
+ <label for="response_format">Response Format:</label>
762
+ <select id="response_format" name="response_format">
763
+ <option value="verbose_json">Verbose JSON</option>
764
+ <option value="json">JSON</option>
765
+ <option value="text">Text</option>
766
+ <option value="srt">SRT</option>
767
+ <option value="vtt">VTT</option>
768
+ </select><br>
769
+
770
+ <button type="submit">Submit</button>
771
+ </form>
772
+ </div>
773
+ </body>
774
+ </html>
775
+ )";
776
+
777
+ // store default params so we can reset after each inference request
778
+ whisper_params default_params = params;
779
+
780
+ // this is only called if no index.html is found in the public --path
781
+ svr->Get(sparams.request_path + "/", [&](const Request &, Response &res){
782
+ res.set_content(default_content, "text/html");
783
+ return false;
784
+ });
785
+
786
+ svr->Options(sparams.request_path + sparams.inference_path, [&](const Request &, Response &){
787
+ });
788
+
789
+ svr->Post(sparams.request_path + sparams.inference_path, [&](const Request &req, Response &res){
790
+ // acquire whisper model mutex lock
791
+ std::lock_guard<std::mutex> lock(whisper_mutex);
792
+
793
+ // first check user requested fields of the request
794
+ if (!req.has_file("file"))
795
+ {
796
+ fprintf(stderr, "error: no 'file' field in the request\n");
797
+ const std::string error_resp = "{\"error\":\"no 'file' field in the request\"}";
798
+ res.set_content(error_resp, "application/json");
799
+ return;
800
+ }
801
+ auto audio_file = req.get_file_value("file");
802
+
803
+ // check non-required fields
804
+ get_req_parameters(req, params);
805
+
806
+ std::string filename{audio_file.filename};
807
+ printf("Received request: %s\n", filename.c_str());
808
+
809
+ // audio arrays
810
+ std::vector<float> pcmf32; // mono-channel F32 PCM
811
+ std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
812
+
813
+ if (sparams.ffmpeg_converter) {
814
+ // if file is not wav, convert to wav
815
+ // write to temporary file
816
+ const std::string temp_filename = generate_temp_filename("whisper-server", ".wav");
817
+ std::ofstream temp_file{temp_filename, std::ios::binary};
818
+ temp_file << audio_file.content;
819
+ temp_file.close();
820
+
821
+ std::string error_resp = "{\"error\":\"Failed to execute ffmpeg command.\"}";
822
+ const bool is_converted = convert_to_wav(temp_filename, error_resp);
823
+ if (!is_converted) {
824
+ res.set_content(error_resp, "application/json");
825
+ return;
826
+ }
827
+
828
+ // read audio content into pcmf32
829
+ if (!::read_audio_data(temp_filename, pcmf32, pcmf32s, params.diarize))
830
+ {
831
+ fprintf(stderr, "error: failed to read WAV file '%s'\n", temp_filename.c_str());
832
+ const std::string error_resp = "{\"error\":\"failed to read WAV file\"}";
833
+ res.set_content(error_resp, "application/json");
834
+ std::remove(temp_filename.c_str());
835
+ return;
836
+ }
837
+ // remove temp file
838
+ std::remove(temp_filename.c_str());
839
+ } else {
840
+ if (!::read_audio_data(audio_file.content, pcmf32, pcmf32s, params.diarize))
841
+ {
842
+ fprintf(stderr, "error: failed to read audio data\n");
843
+ const std::string error_resp = "{\"error\":\"failed to read audio data\"}";
844
+ res.set_content(error_resp, "application/json");
845
+ return;
846
+ }
847
+ }
848
+
849
+ printf("Successfully loaded %s\n", filename.c_str());
850
+
851
+ // print system information
852
+ {
853
+ fprintf(stderr, "\n");
854
+ fprintf(stderr, "system_info: n_threads = %d / %d | %s\n",
855
+ params.n_threads*params.n_processors, std::thread::hardware_concurrency(), whisper_print_system_info());
856
+ }
857
+
858
+ // print some info about the processing
859
+ {
860
+ fprintf(stderr, "\n");
861
+ if (!whisper_is_multilingual(ctx)) {
862
+ if (params.language != "en" || params.translate) {
863
+ params.language = "en";
864
+ params.translate = false;
865
+ fprintf(stderr, "%s: WARNING: model is not multilingual, ignoring language and translation options\n", __func__);
866
+ }
867
+ }
868
+ if (params.detect_language) {
869
+ params.language = "auto";
870
+ }
871
+ fprintf(stderr, "%s: processing '%s' (%d samples, %.1f sec), %d threads, %d processors, lang = %s, task = %s, %stimestamps = %d ...\n",
872
+ __func__, filename.c_str(), int(pcmf32.size()), float(pcmf32.size())/WHISPER_SAMPLE_RATE,
873
+ params.n_threads, params.n_processors,
874
+ params.language.c_str(),
875
+ params.translate ? "translate" : "transcribe",
876
+ params.tinydiarize ? "tdrz = 1, " : "",
877
+ params.no_timestamps ? 0 : 1);
878
+
879
+ fprintf(stderr, "\n");
880
+ }
881
+
882
+ // run the inference
883
+ {
884
+ printf("Running whisper.cpp inference on %s\n", filename.c_str());
885
+ whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
886
+
887
+ wparams.strategy = params.beam_size > 1 ? WHISPER_SAMPLING_BEAM_SEARCH : WHISPER_SAMPLING_GREEDY;
888
+
889
+ wparams.print_realtime = false;
890
+ wparams.print_progress = params.print_progress;
891
+ wparams.print_timestamps = !params.no_timestamps;
892
+ wparams.print_special = params.print_special;
893
+ wparams.translate = params.translate;
894
+ wparams.language = params.language.c_str();
895
+ wparams.detect_language = params.detect_language;
896
+ wparams.n_threads = params.n_threads;
897
+ wparams.n_max_text_ctx = params.max_context >= 0 ? params.max_context : wparams.n_max_text_ctx;
898
+ wparams.offset_ms = params.offset_t_ms;
899
+ wparams.duration_ms = params.duration_ms;
900
+
901
+ wparams.thold_pt = params.word_thold;
902
+ wparams.max_len = params.max_len == 0 ? 60 : params.max_len;
903
+ wparams.split_on_word = params.split_on_word;
904
+ wparams.audio_ctx = params.audio_ctx;
905
+
906
+ wparams.debug_mode = params.debug_mode;
907
+
908
+ wparams.tdrz_enable = params.tinydiarize; // [TDRZ]
909
+
910
+ wparams.initial_prompt = params.prompt.c_str();
911
+
912
+ wparams.greedy.best_of = params.best_of;
913
+ wparams.beam_search.beam_size = params.beam_size;
914
+
915
+ wparams.temperature = params.temperature;
916
+ wparams.no_speech_thold = params.no_speech_thold;
917
+ wparams.temperature_inc = params.temperature_inc;
918
+ wparams.entropy_thold = params.entropy_thold;
919
+ wparams.logprob_thold = params.logprob_thold;
920
+
921
+ wparams.no_timestamps = params.no_timestamps;
922
+ wparams.token_timestamps = !params.no_timestamps && params.response_format == vjson_format;
923
+ wparams.no_context = params.no_context;
924
+
925
+ wparams.suppress_nst = params.suppress_nst;
926
+
927
+ wparams.vad = params.vad;
928
+ wparams.vad_model_path = params.vad_model.c_str();
929
+
930
+ wparams.vad_params.threshold = params.vad_threshold;
931
+ wparams.vad_params.min_speech_duration_ms = params.vad_min_speech_duration_ms;
932
+ wparams.vad_params.min_silence_duration_ms = params.vad_min_silence_duration_ms;
933
+ wparams.vad_params.max_speech_duration_s = params.vad_max_speech_duration_s;
934
+ wparams.vad_params.speech_pad_ms = params.vad_speech_pad_ms;
935
+ wparams.vad_params.samples_overlap = params.vad_samples_overlap;
936
+
937
+ whisper_print_user_data user_data = { &params, &pcmf32s, 0 };
938
+
939
+ // this callback is called on each new segment
940
+ if (params.print_realtime) {
941
+ wparams.new_segment_callback = whisper_print_segment_callback;
942
+ wparams.new_segment_callback_user_data = &user_data;
943
+ }
944
+
945
+ if (wparams.print_progress) {
946
+ wparams.progress_callback = whisper_print_progress_callback;
947
+ wparams.progress_callback_user_data = &user_data;
948
+ }
949
+
950
+ // tell whisper to abort if the HTTP connection closed
951
+ wparams.abort_callback = [](void *user_data) {
952
+ // user_data is a pointer to our Request
953
+ auto req_ptr = static_cast<const httplib::Request*>(user_data);
954
+ return req_ptr->is_connection_closed();
955
+ };
956
+ wparams.abort_callback_user_data = (void*)&req;
957
+
958
+ if (whisper_full_parallel(ctx, wparams, pcmf32.data(), pcmf32.size(), params.n_processors) != 0) {
959
+ // handle failure or early abort
960
+ if (req.is_connection_closed()) {
961
+ // log client disconnect
962
+ fprintf(stderr, "client disconnected, aborted processing\n");
963
+ res.status = 499; // Client Closed Request (nginx convention)
964
+ res.set_content("{\"error\":\"client disconnected\"}", "application/json");
965
+ return;
966
+ }
967
+ fprintf(stderr, "%s: failed to process audio\n", argv[0]);
968
+ res.status = 500; // Internal Server Error
969
+ const std::string error_resp = "{\"error\":\"failed to process audio\"}";
970
+ res.set_content(error_resp, "application/json");
971
+ return;
972
+ }
973
+ }
974
+
975
+ // return results to user
976
+ if (params.response_format == text_format)
977
+ {
978
+ std::string results = output_str(ctx, params, pcmf32s);
979
+ res.set_content(results.c_str(), "text/html; charset=utf-8");
980
+ }
981
+ else if (params.response_format == srt_format)
982
+ {
983
+ std::stringstream ss;
984
+ const int n_segments = whisper_full_n_segments(ctx);
985
+ for (int i = 0; i < n_segments; ++i) {
986
+ const char * text = whisper_full_get_segment_text(ctx, i);
987
+ const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
988
+ const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
989
+ std::string speaker = "";
990
+
991
+ if (params.diarize && pcmf32s.size() == 2)
992
+ {
993
+ speaker = estimate_diarization_speaker(pcmf32s, t0, t1);
994
+ }
995
+
996
+ ss << i + 1 + params.offset_n << "\n";
997
+ ss << to_timestamp(t0, true) << " --> " << to_timestamp(t1, true) << "\n";
998
+ ss << speaker << text << "\n\n";
999
+ }
1000
+ res.set_content(ss.str(), "application/x-subrip");
1001
+ } else if (params.response_format == vtt_format) {
1002
+ std::stringstream ss;
1003
+
1004
+ ss << "WEBVTT\n\n";
1005
+
1006
+ const int n_segments = whisper_full_n_segments(ctx);
1007
+ for (int i = 0; i < n_segments; ++i) {
1008
+ const char * text = whisper_full_get_segment_text(ctx, i);
1009
+ const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
1010
+ const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
1011
+ std::string speaker = "";
1012
+
1013
+ if (params.diarize && pcmf32s.size() == 2)
1014
+ {
1015
+ speaker = estimate_diarization_speaker(pcmf32s, t0, t1, true);
1016
+ speaker.insert(0, "<v Speaker");
1017
+ speaker.append(">");
1018
+ }
1019
+
1020
+ ss << to_timestamp(t0) << " --> " << to_timestamp(t1) << "\n";
1021
+ ss << speaker << text << "\n\n";
1022
+ }
1023
+ res.set_content(ss.str(), "text/vtt");
1024
+ } else if (params.response_format == vjson_format) {
1025
+ /* try to match openai/whisper's Python format */
1026
+ std::string results = output_str(ctx, params, pcmf32s);
1027
+ // Get language probabilities
1028
+ std::vector<float> lang_probs(whisper_lang_max_id() + 1, 0.0f);
1029
+ const auto detected_lang_id = whisper_lang_auto_detect(ctx, 0, params.n_threads, lang_probs.data());
1030
+ json jres = json{
1031
+ {"task", params.translate ? "translate" : "transcribe"},
1032
+ {"language", whisper_lang_str_full(whisper_full_lang_id(ctx))},
1033
+ {"duration", float(pcmf32.size())/WHISPER_SAMPLE_RATE},
1034
+ {"text", results},
1035
+ {"segments", json::array()},
1036
+ {"detected_language", whisper_lang_str_full(detected_lang_id)},
1037
+ {"detected_language_probability", lang_probs[detected_lang_id]},
1038
+ {"language_probabilities", json::object()}
1039
+ };
1040
+ // Add all language probabilities
1041
+ for (int i = 0; i <= whisper_lang_max_id(); ++i) {
1042
+ if (lang_probs[i] > 0.001f) { // Only include non-negligible probabilities
1043
+ jres["language_probabilities"][whisper_lang_str(i)] = lang_probs[i];
1044
+ }
1045
+ }
1046
+ const int n_segments = whisper_full_n_segments(ctx);
1047
+ for (int i = 0; i < n_segments; ++i)
1048
+ {
1049
+ json segment = json{
1050
+ {"id", i},
1051
+ {"text", whisper_full_get_segment_text(ctx, i)},
1052
+ };
1053
+
1054
+ if (!params.no_timestamps) {
1055
+ segment["start"] = whisper_full_get_segment_t0(ctx, i) * 0.01;
1056
+ segment["end"] = whisper_full_get_segment_t1(ctx, i) * 0.01;
1057
+ }
1058
+
1059
+ float total_logprob = 0;
1060
+ const int n_tokens = whisper_full_n_tokens(ctx, i);
1061
+ for (int j = 0; j < n_tokens; ++j) {
1062
+ whisper_token_data token = whisper_full_get_token_data(ctx, i, j);
1063
+ if (token.id >= whisper_token_eot(ctx)) {
1064
+ continue;
1065
+ }
1066
+
1067
+ segment["tokens"].push_back(token.id);
1068
+ json word = json{{"word", whisper_full_get_token_text(ctx, i, j)}};
1069
+ if (!params.no_timestamps) {
1070
+ word["start"] = token.t0 * 0.01;
1071
+ word["end"] = token.t1 * 0.01;
1072
+ word["t_dtw"] = token.t_dtw;
1073
+ }
1074
+ word["probability"] = token.p;
1075
+ total_logprob += token.plog;
1076
+ segment["words"].push_back(word);
1077
+ }
1078
+
1079
+ segment["temperature"] = params.temperature;
1080
+ segment["avg_logprob"] = total_logprob / n_tokens;
1081
+
1082
+ // TODO compression_ratio and no_speech_prob are not implemented yet
1083
+ // segment["compression_ratio"] = 0;
1084
+ segment["no_speech_prob"] = whisper_full_get_segment_no_speech_prob(ctx, i);
1085
+
1086
+ jres["segments"].push_back(segment);
1087
+ }
1088
+ res.set_content(jres.dump(-1, ' ', false, json::error_handler_t::replace),
1089
+ "application/json");
1090
+ }
1091
+ // TODO add more output formats
1092
+ else
1093
+ {
1094
+ std::string results = output_str(ctx, params, pcmf32s);
1095
+ json jres = json{
1096
+ {"text", results}
1097
+ };
1098
+ res.set_content(jres.dump(-1, ' ', false, json::error_handler_t::replace),
1099
+ "application/json");
1100
+ }
1101
+
1102
+ // reset params to their defaults
1103
+ params = default_params;
1104
+ });
1105
+ svr->Post(sparams.request_path + "/load", [&](const Request &req, Response &res){
1106
+ std::lock_guard<std::mutex> lock(whisper_mutex);
1107
+ state.store(SERVER_STATE_LOADING_MODEL);
1108
+ if (!req.has_file("model"))
1109
+ {
1110
+ fprintf(stderr, "error: no 'model' field in the request\n");
1111
+ const std::string error_resp = "{\"error\":\"no 'model' field in the request\"}";
1112
+ res.set_content(error_resp, "application/json");
1113
+ return;
1114
+ }
1115
+ std::string model = req.get_file_value("model").content;
1116
+ if (!is_file_exist(model.c_str()))
1117
+ {
1118
+ fprintf(stderr, "error: 'model': %s not found!\n", model.c_str());
1119
+ const std::string error_resp = "{\"error\":\"model not found!\"}";
1120
+ res.set_content(error_resp, "application/json");
1121
+ return;
1122
+ }
1123
+
1124
+ // clean up
1125
+ whisper_free(ctx);
1126
+
1127
+ // whisper init
1128
+ ctx = whisper_init_from_file_with_params(model.c_str(), cparams);
1129
+
1130
+ // TODO perhaps load prior model here instead of exit
1131
+ if (ctx == nullptr) {
1132
+ fprintf(stderr, "error: model init failed, no model loaded must exit\n");
1133
+ exit(1);
1134
+ }
1135
+
1136
+ // initialize openvino encoder. this has no effect on whisper.cpp builds that don't have OpenVINO configured
1137
+ whisper_ctx_init_openvino_encoder(ctx, nullptr, params.openvino_encode_device.c_str(), nullptr);
1138
+
1139
+ state.store(SERVER_STATE_READY);
1140
+ const std::string success = "Load was successful!";
1141
+ res.set_content(success, "application/text");
1142
+
1143
+ // check if the model is in the file system
1144
+ });
1145
+
1146
+ svr->Get(sparams.request_path + "/health", [&](const Request &, Response &res){
1147
+ server_state current_state = state.load();
1148
+ if (current_state == SERVER_STATE_READY) {
1149
+ const std::string health_response = "{\"status\":\"ok\"}";
1150
+ res.set_content(health_response, "application/json");
1151
+ } else {
1152
+ res.set_content("{\"status\":\"loading model\"}", "application/json");
1153
+ res.status = 503;
1154
+ }
1155
+ });
1156
+
1157
+ svr->set_exception_handler([](const Request &, Response &res, std::exception_ptr ep) {
1158
+ const char fmt[] = "500 Internal Server Error\n%s";
1159
+ char buf[BUFSIZ];
1160
+ try {
1161
+ std::rethrow_exception(std::move(ep));
1162
+ } catch (std::exception &e) {
1163
+ snprintf(buf, sizeof(buf), fmt, e.what());
1164
+ } catch (...) {
1165
+ snprintf(buf, sizeof(buf), fmt, "Unknown Exception");
1166
+ }
1167
+ res.set_content(buf, "text/plain");
1168
+ res.status = 500;
1169
+ });
1170
+
1171
+ svr->set_error_handler([](const Request &req, Response &res) {
1172
+ if (res.status == 400) {
1173
+ res.set_content("Invalid request", "text/plain");
1174
+ } else if (res.status != 500) {
1175
+ res.set_content("File Not Found (" + req.path + ")", "text/plain");
1176
+ res.status = 404;
1177
+ }
1178
+ });
1179
+
1180
+ // set timeouts and change hostname and port
1181
+ svr->set_read_timeout(sparams.read_timeout);
1182
+ svr->set_write_timeout(sparams.write_timeout);
1183
+
1184
+ if (!svr->bind_to_port(sparams.hostname, sparams.port))
1185
+ {
1186
+ fprintf(stderr, "\ncouldn't bind to server socket: hostname=%s port=%d\n\n",
1187
+ sparams.hostname.c_str(), sparams.port);
1188
+ return 1;
1189
+ }
1190
+
1191
+ // Set the base directory for serving static files
1192
+ svr->set_base_dir(sparams.public_path);
1193
+
1194
+ // to make it ctrl+clickable:
1195
+ printf("\nwhisper server listening at http://%s:%d\n\n", sparams.hostname.c_str(), sparams.port);
1196
+
1197
+ shutdown_handler = [&](int signal) {
1198
+ printf("\nCaught signal %d, shutting down gracefully...\n", signal);
1199
+ if (svr) {
1200
+ svr->stop();
1201
+ }
1202
+ };
1203
+
1204
+ #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
1205
+ struct sigaction sigint_action;
1206
+ sigint_action.sa_handler = signal_handler;
1207
+ sigemptyset (&sigint_action.sa_mask);
1208
+ sigint_action.sa_flags = 0;
1209
+ sigaction(SIGINT, &sigint_action, NULL);
1210
+ sigaction(SIGTERM, &sigint_action, NULL);
1211
+ #elif defined (_WIN32)
1212
+ auto console_ctrl_handler = +[](DWORD ctrl_type) -> BOOL {
1213
+ return (ctrl_type == CTRL_C_EVENT) ? (signal_handler(SIGINT), true) : false;
1214
+ };
1215
+ SetConsoleCtrlHandler(reinterpret_cast<PHANDLER_ROUTINE>(console_ctrl_handler), true);
1216
+ #endif
1217
+
1218
+ // clean up function, to be called before exit
1219
+ auto clean_up = [&]() {
1220
+ whisper_print_timings(ctx);
1221
+ whisper_free(ctx);
1222
+ };
1223
+
1224
+ std::thread t([&] {
1225
+ if (!svr->listen_after_bind()) {
1226
+ fprintf(stderr, "error: server listen failed\n");
1227
+ }
1228
+ });
1229
+
1230
+ svr->wait_until_ready();
1231
+
1232
+ t.join();
1233
+
1234
+
1235
+ clean_up();
1236
+
1237
+ return 0;
1238
+ }