whispercpp 1.3.5 → 1.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (610) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +1 -1
  3. data/README.md +99 -2
  4. data/ext/extconf.rb +1 -0
  5. data/ext/ruby_whisper.c +20 -4
  6. data/ext/ruby_whisper.h +30 -2
  7. data/ext/ruby_whisper_context.c +216 -124
  8. data/ext/ruby_whisper_context_params.c +163 -0
  9. data/ext/ruby_whisper_model.c +0 -1
  10. data/ext/ruby_whisper_params.c +0 -1
  11. data/ext/ruby_whisper_segment.c +0 -1
  12. data/ext/ruby_whisper_token.c +29 -9
  13. data/ext/ruby_whisper_transcribe.cpp +4 -1
  14. data/ext/ruby_whisper_vad_context.c +48 -1
  15. data/ext/ruby_whisper_vad_context_detect.cpp +6 -5
  16. data/ext/ruby_whisper_vad_params.c +0 -1
  17. data/ext/ruby_whisper_vad_segment.c +0 -1
  18. data/ext/ruby_whisper_vad_segments.c +0 -1
  19. data/ext/sources/CMakeLists.txt +1 -1
  20. data/ext/sources/bindings/javascript/package.json +1 -1
  21. data/ext/sources/cmake/whisper-config.cmake.in +5 -40
  22. data/ext/sources/examples/bench/bench.cpp +23 -18
  23. data/ext/sources/examples/cli/cli.cpp +8 -0
  24. data/ext/sources/examples/common-ggml.cpp +2 -0
  25. data/ext/sources/examples/miniaudio.h +4507 -2131
  26. data/ext/sources/examples/server/server.cpp +18 -4
  27. data/ext/sources/examples/talk-llama/CMakeLists.txt +3 -2
  28. data/ext/sources/examples/talk-llama/llama-adapter.cpp +7 -13
  29. data/ext/sources/examples/talk-llama/llama-adapter.h +4 -3
  30. data/ext/sources/examples/talk-llama/llama-arch.cpp +335 -17
  31. data/ext/sources/examples/talk-llama/llama-arch.h +42 -0
  32. data/ext/sources/examples/talk-llama/llama-batch.cpp +3 -1
  33. data/ext/sources/examples/talk-llama/llama-chat.cpp +21 -1
  34. data/ext/sources/examples/talk-llama/llama-chat.h +1 -0
  35. data/ext/sources/examples/talk-llama/llama-context.cpp +508 -520
  36. data/ext/sources/examples/talk-llama/llama-context.h +27 -28
  37. data/ext/sources/examples/talk-llama/llama-cparams.h +5 -0
  38. data/ext/sources/examples/talk-llama/llama-ext.h +12 -0
  39. data/ext/sources/examples/talk-llama/llama-grammar.cpp +8 -8
  40. data/ext/sources/examples/talk-llama/llama-graph.cpp +583 -130
  41. data/ext/sources/examples/talk-llama/llama-graph.h +131 -10
  42. data/ext/sources/examples/talk-llama/llama-hparams.cpp +57 -40
  43. data/ext/sources/examples/talk-llama/llama-hparams.h +79 -10
  44. data/ext/sources/examples/talk-llama/llama-impl.cpp +4 -4
  45. data/ext/sources/examples/talk-llama/llama-impl.h +13 -1
  46. data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.cpp +3 -1
  47. data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +274 -89
  48. data/ext/sources/examples/talk-llama/llama-kv-cache.h +2 -3
  49. data/ext/sources/examples/talk-llama/llama-memory-hybrid-iswa.cpp +275 -0
  50. data/ext/sources/examples/talk-llama/llama-memory-hybrid-iswa.h +140 -0
  51. data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +11 -13
  52. data/ext/sources/examples/talk-llama/llama-mmap.cpp +28 -11
  53. data/ext/sources/examples/talk-llama/llama-model-loader.cpp +527 -119
  54. data/ext/sources/examples/talk-llama/llama-model-loader.h +35 -5
  55. data/ext/sources/examples/talk-llama/llama-model-saver.cpp +60 -46
  56. data/ext/sources/examples/talk-llama/llama-model-saver.h +5 -2
  57. data/ext/sources/examples/talk-llama/llama-model.cpp +1365 -647
  58. data/ext/sources/examples/talk-llama/llama-model.h +72 -19
  59. data/ext/sources/examples/talk-llama/llama-quant.cpp +578 -346
  60. data/ext/sources/examples/talk-llama/{llama-sampling.cpp → llama-sampler.cpp} +190 -76
  61. data/ext/sources/examples/talk-llama/{llama-sampling.h → llama-sampler.h} +0 -2
  62. data/ext/sources/examples/talk-llama/llama-vocab.cpp +118 -48
  63. data/ext/sources/examples/talk-llama/llama-vocab.h +5 -0
  64. data/ext/sources/examples/talk-llama/llama.cpp +76 -22
  65. data/ext/sources/examples/talk-llama/llama.h +63 -30
  66. data/ext/sources/examples/talk-llama/models/afmoe.cpp +2 -3
  67. data/ext/sources/examples/talk-llama/models/apertus.cpp +3 -3
  68. data/ext/sources/examples/talk-llama/models/arcee.cpp +3 -3
  69. data/ext/sources/examples/talk-llama/models/arctic.cpp +4 -5
  70. data/ext/sources/examples/talk-llama/models/baichuan.cpp +4 -3
  71. data/ext/sources/examples/talk-llama/models/bailingmoe.cpp +1 -2
  72. data/ext/sources/examples/talk-llama/models/bailingmoe2.cpp +3 -5
  73. data/ext/sources/examples/talk-llama/models/bert.cpp +13 -7
  74. data/ext/sources/examples/talk-llama/models/bitnet.cpp +9 -24
  75. data/ext/sources/examples/talk-llama/models/bloom.cpp +2 -2
  76. data/ext/sources/examples/talk-llama/models/chameleon.cpp +3 -3
  77. data/ext/sources/examples/talk-llama/models/chatglm.cpp +2 -2
  78. data/ext/sources/examples/talk-llama/models/codeshell.cpp +3 -3
  79. data/ext/sources/examples/talk-llama/models/cogvlm.cpp +3 -3
  80. data/ext/sources/examples/talk-llama/models/cohere2-iswa.cpp +2 -2
  81. data/ext/sources/examples/talk-llama/models/command-r.cpp +2 -2
  82. data/ext/sources/examples/talk-llama/models/dbrx.cpp +4 -5
  83. data/ext/sources/examples/talk-llama/models/deci.cpp +3 -3
  84. data/ext/sources/examples/talk-llama/models/deepseek.cpp +4 -6
  85. data/ext/sources/examples/talk-llama/models/deepseek2.cpp +24 -21
  86. data/ext/sources/examples/talk-llama/models/delta-net-base.cpp +445 -0
  87. data/ext/sources/examples/talk-llama/models/dots1.cpp +4 -6
  88. data/ext/sources/examples/talk-llama/models/dream.cpp +3 -3
  89. data/ext/sources/examples/talk-llama/models/ernie4-5-moe.cpp +4 -6
  90. data/ext/sources/examples/talk-llama/models/ernie4-5.cpp +3 -3
  91. data/ext/sources/examples/talk-llama/models/eurobert.cpp +97 -0
  92. data/ext/sources/examples/talk-llama/models/exaone-moe.cpp +145 -0
  93. data/ext/sources/examples/talk-llama/models/exaone.cpp +3 -3
  94. data/ext/sources/examples/talk-llama/models/exaone4.cpp +3 -3
  95. data/ext/sources/examples/talk-llama/models/falcon-h1.cpp +2 -4
  96. data/ext/sources/examples/talk-llama/models/falcon.cpp +3 -3
  97. data/ext/sources/examples/talk-llama/models/gemma-embedding.cpp +1 -1
  98. data/ext/sources/examples/talk-llama/models/gemma.cpp +1 -1
  99. data/ext/sources/examples/talk-llama/models/gemma2-iswa.cpp +1 -1
  100. data/ext/sources/examples/talk-llama/models/gemma3.cpp +1 -1
  101. data/ext/sources/examples/talk-llama/models/gemma3n-iswa.cpp +7 -7
  102. data/ext/sources/examples/talk-llama/models/glm4-moe.cpp +3 -3
  103. data/ext/sources/examples/talk-llama/models/glm4.cpp +14 -7
  104. data/ext/sources/examples/talk-llama/models/gpt2.cpp +2 -2
  105. data/ext/sources/examples/talk-llama/models/gptneox.cpp +2 -2
  106. data/ext/sources/examples/talk-llama/models/granite-hybrid.cpp +4 -5
  107. data/ext/sources/examples/talk-llama/models/granite.cpp +4 -5
  108. data/ext/sources/examples/talk-llama/models/grok.cpp +4 -4
  109. data/ext/sources/examples/talk-llama/models/grovemoe.cpp +5 -7
  110. data/ext/sources/examples/talk-llama/models/hunyuan-dense.cpp +3 -3
  111. data/ext/sources/examples/talk-llama/models/hunyuan-moe.cpp +4 -5
  112. data/ext/sources/examples/talk-llama/models/internlm2.cpp +3 -3
  113. data/ext/sources/examples/talk-llama/models/jais.cpp +2 -2
  114. data/ext/sources/examples/talk-llama/models/jais2.cpp +123 -0
  115. data/ext/sources/examples/talk-llama/models/jamba.cpp +3 -3
  116. data/ext/sources/examples/talk-llama/models/kimi-linear.cpp +381 -0
  117. data/ext/sources/examples/talk-llama/models/lfm2.cpp +145 -124
  118. data/ext/sources/examples/talk-llama/models/llada-moe.cpp +4 -4
  119. data/ext/sources/examples/talk-llama/models/llada.cpp +3 -3
  120. data/ext/sources/examples/talk-llama/models/llama-iswa.cpp +4 -4
  121. data/ext/sources/examples/talk-llama/models/llama.cpp +18 -11
  122. data/ext/sources/examples/talk-llama/models/maincoder.cpp +3 -3
  123. data/ext/sources/examples/talk-llama/models/{graph-context-mamba.cpp → mamba-base.cpp} +9 -3
  124. data/ext/sources/examples/talk-llama/models/mamba.cpp +1 -2
  125. data/ext/sources/examples/talk-llama/models/mimo2-iswa.cpp +11 -5
  126. data/ext/sources/examples/talk-llama/models/minicpm3.cpp +14 -13
  127. data/ext/sources/examples/talk-llama/models/minimax-m2.cpp +4 -5
  128. data/ext/sources/examples/talk-llama/models/mistral3.cpp +4 -4
  129. data/ext/sources/examples/talk-llama/models/models.h +181 -46
  130. data/ext/sources/examples/talk-llama/models/modern-bert.cpp +2 -9
  131. data/ext/sources/examples/talk-llama/models/mpt.cpp +2 -2
  132. data/ext/sources/examples/talk-llama/models/nemotron-h.cpp +26 -14
  133. data/ext/sources/examples/talk-llama/models/nemotron.cpp +3 -3
  134. data/ext/sources/examples/talk-llama/models/neo-bert.cpp +2 -2
  135. data/ext/sources/examples/talk-llama/models/olmo.cpp +3 -3
  136. data/ext/sources/examples/talk-llama/models/olmo2.cpp +3 -3
  137. data/ext/sources/examples/talk-llama/models/olmoe.cpp +4 -4
  138. data/ext/sources/examples/talk-llama/models/openai-moe-iswa.cpp +1 -1
  139. data/ext/sources/examples/talk-llama/models/openelm.cpp +3 -3
  140. data/ext/sources/examples/talk-llama/models/orion.cpp +3 -3
  141. data/ext/sources/examples/talk-llama/models/paddleocr.cpp +122 -0
  142. data/ext/sources/examples/talk-llama/models/pangu-embedded.cpp +3 -3
  143. data/ext/sources/examples/talk-llama/models/phi2.cpp +2 -2
  144. data/ext/sources/examples/talk-llama/models/phi3.cpp +3 -3
  145. data/ext/sources/examples/talk-llama/models/plamo.cpp +3 -3
  146. data/ext/sources/examples/talk-llama/models/plamo2.cpp +9 -5
  147. data/ext/sources/examples/talk-llama/models/plamo3.cpp +2 -2
  148. data/ext/sources/examples/talk-llama/models/plm.cpp +15 -14
  149. data/ext/sources/examples/talk-llama/models/qwen.cpp +2 -2
  150. data/ext/sources/examples/talk-llama/models/qwen2.cpp +3 -3
  151. data/ext/sources/examples/talk-llama/models/qwen2moe.cpp +4 -4
  152. data/ext/sources/examples/talk-llama/models/qwen2vl.cpp +3 -3
  153. data/ext/sources/examples/talk-llama/models/qwen3.cpp +12 -9
  154. data/ext/sources/examples/talk-llama/models/qwen35.cpp +381 -0
  155. data/ext/sources/examples/talk-llama/models/qwen35moe.cpp +422 -0
  156. data/ext/sources/examples/talk-llama/models/qwen3moe.cpp +15 -8
  157. data/ext/sources/examples/talk-llama/models/qwen3next.cpp +84 -432
  158. data/ext/sources/examples/talk-llama/models/qwen3vl-moe.cpp +9 -18
  159. data/ext/sources/examples/talk-llama/models/qwen3vl.cpp +8 -17
  160. data/ext/sources/examples/talk-llama/models/refact.cpp +2 -2
  161. data/ext/sources/examples/talk-llama/models/rnd1.cpp +4 -4
  162. data/ext/sources/examples/talk-llama/models/rwkv6-base.cpp +2 -0
  163. data/ext/sources/examples/talk-llama/models/rwkv7-base.cpp +2 -0
  164. data/ext/sources/examples/talk-llama/models/seed-oss.cpp +3 -3
  165. data/ext/sources/examples/talk-llama/models/smallthinker.cpp +4 -4
  166. data/ext/sources/examples/talk-llama/models/smollm3.cpp +3 -3
  167. data/ext/sources/examples/talk-llama/models/stablelm.cpp +2 -2
  168. data/ext/sources/examples/talk-llama/models/starcoder.cpp +2 -2
  169. data/ext/sources/examples/talk-llama/models/starcoder2.cpp +3 -3
  170. data/ext/sources/examples/talk-llama/models/step35-iswa.cpp +165 -0
  171. data/ext/sources/examples/talk-llama/models/t5-dec.cpp +2 -2
  172. data/ext/sources/examples/talk-llama/models/t5-enc.cpp +2 -2
  173. data/ext/sources/examples/talk-llama/models/xverse.cpp +3 -3
  174. data/ext/sources/examples/talk-llama/unicode.cpp +21 -65
  175. data/ext/sources/ggml/CMakeLists.txt +9 -3
  176. data/ext/sources/ggml/include/ggml-backend.h +1 -1
  177. data/ext/sources/ggml/include/ggml-cann.h +1 -1
  178. data/ext/sources/ggml/include/ggml-cpu.h +5 -0
  179. data/ext/sources/ggml/include/ggml-openvino.h +37 -0
  180. data/ext/sources/ggml/include/ggml-opt.h +1 -1
  181. data/ext/sources/ggml/include/ggml-rpc.h +6 -1
  182. data/ext/sources/ggml/include/ggml-virtgpu.h +14 -0
  183. data/ext/sources/ggml/include/ggml.h +56 -9
  184. data/ext/sources/ggml/src/CMakeLists.txt +3 -0
  185. data/ext/sources/ggml/src/ggml-alloc.c +4 -9
  186. data/ext/sources/ggml/src/ggml-backend-dl.cpp +48 -0
  187. data/ext/sources/ggml/src/ggml-backend-dl.h +45 -0
  188. data/ext/sources/ggml/src/ggml-backend-reg.cpp +28 -86
  189. data/ext/sources/ggml/src/ggml-backend.cpp +5 -2
  190. data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +1 -1
  191. data/ext/sources/ggml/src/ggml-blas/ggml-blas.cpp +6 -2
  192. data/ext/sources/ggml/src/ggml-cann/acl_tensor.cpp +1 -1
  193. data/ext/sources/ggml/src/ggml-cann/acl_tensor.h +1 -1
  194. data/ext/sources/ggml/src/ggml-cann/aclnn_ops.cpp +348 -189
  195. data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +40 -85
  196. data/ext/sources/ggml/src/ggml-cann/common.h +3 -4
  197. data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +44 -62
  198. data/ext/sources/ggml/src/ggml-common.h +11 -0
  199. data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +16 -11
  200. data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +42 -19
  201. data/ext/sources/ggml/src/ggml-cpu/amx/common.h +34 -10
  202. data/ext/sources/ggml/src/ggml-cpu/amx/mmq.cpp +85 -85
  203. data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +85 -1
  204. data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +2744 -548
  205. data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +1653 -0
  206. data/ext/sources/ggml/src/ggml-cpu/arch/riscv/repack.cpp +1391 -0
  207. data/ext/sources/ggml/src/ggml-cpu/arch/s390/quants.c +8 -10
  208. data/ext/sources/ggml/src/ggml-cpu/arch/x86/quants.c +9 -9
  209. data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +118 -18
  210. data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +107 -26
  211. data/ext/sources/ggml/src/ggml-cpu/binary-ops.cpp +2 -6
  212. data/ext/sources/ggml/src/ggml-cpu/common.h +8 -0
  213. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +3 -0
  214. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +59 -12
  215. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +15 -0
  216. data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +21 -20
  217. data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +965 -252
  218. data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +584 -197
  219. data/ext/sources/ggml/src/ggml-cpu/ops.cpp +903 -188
  220. data/ext/sources/ggml/src/ggml-cpu/ops.h +1 -0
  221. data/ext/sources/ggml/src/ggml-cpu/quants.c +40 -0
  222. data/ext/sources/ggml/src/ggml-cpu/quants.h +3 -0
  223. data/ext/sources/ggml/src/ggml-cpu/repack.cpp +2890 -679
  224. data/ext/sources/ggml/src/ggml-cpu/repack.h +119 -8
  225. data/ext/sources/ggml/src/ggml-cpu/simd-gemm.h +136 -0
  226. data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +111 -3
  227. data/ext/sources/ggml/src/ggml-cpu/unary-ops.cpp +1 -1
  228. data/ext/sources/ggml/src/ggml-cpu/vec.cpp +17 -0
  229. data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +1 -1
  230. data/ext/sources/ggml/src/ggml-cuda/argsort.cu +19 -10
  231. data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +32 -30
  232. data/ext/sources/ggml/src/ggml-cuda/common.cuh +134 -18
  233. data/ext/sources/ggml/src/ggml-cuda/convert.cu +41 -27
  234. data/ext/sources/ggml/src/ggml-cuda/cpy.cu +6 -3
  235. data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +78 -64
  236. data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +384 -143
  237. data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cuh +36 -22
  238. data/ext/sources/ggml/src/ggml-cuda/fattn-vec.cuh +3 -3
  239. data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +26 -5
  240. data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +1 -1
  241. data/ext/sources/ggml/src/ggml-cuda/fattn.cu +127 -12
  242. data/ext/sources/ggml/src/ggml-cuda/gated_delta_net.cu +263 -0
  243. data/ext/sources/ggml/src/ggml-cuda/gated_delta_net.cuh +4 -0
  244. data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +595 -200
  245. data/ext/sources/ggml/src/ggml-cuda/mean.cu +9 -8
  246. data/ext/sources/ggml/src/ggml-cuda/mma.cuh +173 -6
  247. data/ext/sources/ggml/src/ggml-cuda/mmf.cu +30 -10
  248. data/ext/sources/ggml/src/ggml-cuda/mmf.cuh +158 -85
  249. data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +34 -22
  250. data/ext/sources/ggml/src/ggml-cuda/mmvf.cu +127 -67
  251. data/ext/sources/ggml/src/ggml-cuda/mmvf.cuh +2 -0
  252. data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +157 -65
  253. data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +1 -0
  254. data/ext/sources/ggml/src/ggml-cuda/norm.cu +18 -76
  255. data/ext/sources/ggml/src/ggml-cuda/pad.cu +13 -10
  256. data/ext/sources/ggml/src/ggml-cuda/quantize.cu +1 -1
  257. data/ext/sources/ggml/src/ggml-cuda/reduce_rows.cuh +2 -16
  258. data/ext/sources/ggml/src/ggml-cuda/rope.cu +233 -133
  259. data/ext/sources/ggml/src/ggml-cuda/softmax.cu +8 -83
  260. data/ext/sources/ggml/src/ggml-cuda/solve_tri.cu +1 -1
  261. data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +56 -32
  262. data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cuh +1 -1
  263. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_32.cu +5 -0
  264. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +1 -0
  265. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_32.cu +5 -0
  266. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +1 -0
  267. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +1 -0
  268. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +1 -0
  269. data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +3 -3
  270. data/ext/sources/ggml/src/ggml-cuda/top-k.cu +0 -1
  271. data/ext/sources/ggml/src/ggml-cuda/topk-moe.cu +199 -135
  272. data/ext/sources/ggml/src/ggml-cuda/topk-moe.cuh +20 -14
  273. data/ext/sources/ggml/src/ggml-cuda/unary.cu +55 -0
  274. data/ext/sources/ggml/src/ggml-cuda/unary.cuh +2 -0
  275. data/ext/sources/ggml/src/ggml-cuda/vecdotq.cuh +31 -17
  276. data/ext/sources/ggml/src/ggml-cuda/vendors/hip.h +10 -0
  277. data/ext/sources/ggml/src/ggml-hexagon/CMakeLists.txt +82 -45
  278. data/ext/sources/ggml/src/ggml-hexagon/ggml-hexagon.cpp +334 -160
  279. data/ext/sources/ggml/src/ggml-hexagon/htp/CMakeLists.txt +7 -5
  280. data/ext/sources/ggml/src/ggml-hexagon/htp/act-ops.c +328 -197
  281. data/ext/sources/ggml/src/ggml-hexagon/htp/argsort-ops.c +281 -0
  282. data/ext/sources/ggml/src/ggml-hexagon/htp/binary-ops.c +765 -234
  283. data/ext/sources/ggml/src/ggml-hexagon/htp/cpy-ops.c +252 -0
  284. data/ext/sources/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +412 -265
  285. data/ext/sources/ggml/src/ggml-hexagon/htp/get-rows-ops.c +23 -23
  286. data/ext/sources/ggml/src/ggml-hexagon/htp/{htp-dma.c → hex-dma.c} +1 -1
  287. data/ext/sources/ggml/src/ggml-hexagon/htp/{htp-dma.h → hex-dma.h} +28 -3
  288. data/ext/sources/ggml/src/ggml-hexagon/htp/hex-dump.h +77 -0
  289. data/ext/sources/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
  290. data/ext/sources/ggml/src/ggml-hexagon/htp/hex-utils.h +51 -0
  291. data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ctx.h +1 -1
  292. data/ext/sources/ggml/src/ggml-hexagon/htp/htp-msg.h +27 -37
  293. data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ops.h +6 -35
  294. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-arith.h +443 -0
  295. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-base.h +240 -0
  296. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-copy.h +245 -0
  297. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-div.h +251 -0
  298. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
  299. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-exp.h +215 -0
  300. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
  301. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-inverse.h +210 -0
  302. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-reduce.h +296 -0
  303. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
  304. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +141 -0
  305. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
  306. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
  307. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.h +20 -1347
  308. data/ext/sources/ggml/src/ggml-hexagon/htp/main.c +211 -13
  309. data/ext/sources/ggml/src/ggml-hexagon/htp/matmul-ops.c +1119 -952
  310. data/ext/sources/ggml/src/ggml-hexagon/htp/rope-ops.c +254 -244
  311. data/ext/sources/ggml/src/ggml-hexagon/htp/set-rows-ops.c +36 -36
  312. data/ext/sources/ggml/src/ggml-hexagon/htp/softmax-ops.c +155 -138
  313. data/ext/sources/ggml/src/ggml-hexagon/htp/ssm-conv.c +339 -0
  314. data/ext/sources/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +128 -0
  315. data/ext/sources/ggml/src/ggml-hexagon/htp/unary-ops.c +209 -114
  316. data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.c +1 -5
  317. data/ext/sources/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
  318. data/ext/sources/ggml/src/ggml-hexagon/htp-drv.h +121 -0
  319. data/ext/sources/ggml/src/ggml-hexagon/libdl.h +79 -0
  320. data/ext/sources/ggml/src/ggml-hexagon/libggml-htp.inf +38 -0
  321. data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +6 -0
  322. data/ext/sources/ggml/src/ggml-impl.h +62 -0
  323. data/ext/sources/ggml/src/ggml-metal/CMakeLists.txt +10 -10
  324. data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.cpp +13 -2
  325. data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.h +8 -0
  326. data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.m +147 -17
  327. data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.cpp +274 -73
  328. data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.h +22 -4
  329. data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.m +102 -36
  330. data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +174 -23
  331. data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.cpp +580 -280
  332. data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.h +5 -4
  333. data/ext/sources/ggml/src/ggml-metal/ggml-metal.cpp +320 -107
  334. data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +1068 -825
  335. data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +19 -1
  336. data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +3108 -636
  337. data/ext/sources/ggml/src/ggml-opencl/kernels/concat.cl +41 -99
  338. data/ext/sources/ggml/src/ggml-opencl/kernels/cpy.cl +45 -0
  339. data/ext/sources/ggml/src/ggml-opencl/kernels/cumsum.cl +139 -0
  340. data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +204 -0
  341. data/ext/sources/ggml/src/ggml-opencl/kernels/diag.cl +27 -0
  342. data/ext/sources/ggml/src/ggml-opencl/kernels/exp.cl +125 -0
  343. data/ext/sources/ggml/src/ggml-opencl/kernels/expm1.cl +87 -56
  344. data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_noshuffle_q4_1_f32.cl +132 -0
  345. data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general_q8_0_f32.cl +195 -0
  346. data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_noshuffle_q4_1_f32.cl +283 -0
  347. data/ext/sources/ggml/src/ggml-opencl/kernels/l2_norm.cl +71 -0
  348. data/ext/sources/ggml/src/ggml-opencl/kernels/mean.cl +114 -13
  349. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
  350. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
  351. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
  352. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_8x4.cl +129 -0
  353. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
  354. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
  355. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
  356. data/ext/sources/ggml/src/ggml-opencl/kernels/{mul_mv_q6_k.cl → mul_mv_q6_k_f32.cl} +4 -0
  357. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
  358. data/ext/sources/ggml/src/ggml-opencl/kernels/neg.cl +125 -0
  359. data/ext/sources/ggml/src/ggml-opencl/kernels/repeat.cl +31 -32
  360. data/ext/sources/ggml/src/ggml-opencl/kernels/scale.cl +14 -4
  361. data/ext/sources/ggml/src/ggml-opencl/kernels/softplus.cl +88 -60
  362. data/ext/sources/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
  363. data/ext/sources/ggml/src/ggml-opencl/kernels/sum_rows.cl +114 -13
  364. data/ext/sources/ggml/src/ggml-opencl/kernels/tanh.cl +94 -48
  365. data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +26 -0
  366. data/ext/sources/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
  367. data/ext/sources/ggml/src/ggml-openvino/.clang-format +154 -0
  368. data/ext/sources/ggml/src/ggml-openvino/CMakeLists.txt +22 -0
  369. data/ext/sources/ggml/src/ggml-openvino/ggml-decoder.cpp +975 -0
  370. data/ext/sources/ggml/src/ggml-openvino/ggml-decoder.h +294 -0
  371. data/ext/sources/ggml/src/ggml-openvino/ggml-openvino-extra.cpp +373 -0
  372. data/ext/sources/ggml/src/ggml-openvino/ggml-openvino-extra.h +182 -0
  373. data/ext/sources/ggml/src/ggml-openvino/ggml-openvino.cpp +1110 -0
  374. data/ext/sources/ggml/src/ggml-openvino/ggml-quants.cpp +884 -0
  375. data/ext/sources/ggml/src/ggml-openvino/ggml-quants.h +153 -0
  376. data/ext/sources/ggml/src/ggml-openvino/openvino/decoder.h +74 -0
  377. data/ext/sources/ggml/src/ggml-openvino/openvino/frontend.cpp +27 -0
  378. data/ext/sources/ggml/src/ggml-openvino/openvino/frontend.h +23 -0
  379. data/ext/sources/ggml/src/ggml-openvino/openvino/input_model.cpp +17 -0
  380. data/ext/sources/ggml/src/ggml-openvino/openvino/input_model.h +29 -0
  381. data/ext/sources/ggml/src/ggml-openvino/openvino/node_context.h +112 -0
  382. data/ext/sources/ggml/src/ggml-openvino/openvino/op/cont.cpp +48 -0
  383. data/ext/sources/ggml/src/ggml-openvino/openvino/op/cpy.cpp +21 -0
  384. data/ext/sources/ggml/src/ggml-openvino/openvino/op/flash_attn_ext.cpp +90 -0
  385. data/ext/sources/ggml/src/ggml-openvino/openvino/op/get_rows.cpp +69 -0
  386. data/ext/sources/ggml/src/ggml-openvino/openvino/op/glu_geglu.cpp +61 -0
  387. data/ext/sources/ggml/src/ggml-openvino/openvino/op/glu_swiglu.cpp +62 -0
  388. data/ext/sources/ggml/src/ggml-openvino/openvino/op/mulmat.cpp +90 -0
  389. data/ext/sources/ggml/src/ggml-openvino/openvino/op/permute.cpp +102 -0
  390. data/ext/sources/ggml/src/ggml-openvino/openvino/op/reshape.cpp +83 -0
  391. data/ext/sources/ggml/src/ggml-openvino/openvino/op/rms_norm.cpp +46 -0
  392. data/ext/sources/ggml/src/ggml-openvino/openvino/op/rope.cpp +123 -0
  393. data/ext/sources/ggml/src/ggml-openvino/openvino/op/scale.cpp +41 -0
  394. data/ext/sources/ggml/src/ggml-openvino/openvino/op/set_rows.cpp +76 -0
  395. data/ext/sources/ggml/src/ggml-openvino/openvino/op/softmax.cpp +89 -0
  396. data/ext/sources/ggml/src/ggml-openvino/openvino/op/transpose.cpp +23 -0
  397. data/ext/sources/ggml/src/ggml-openvino/openvino/op/unary_silu.cpp +27 -0
  398. data/ext/sources/ggml/src/ggml-openvino/openvino/op/view.cpp +53 -0
  399. data/ext/sources/ggml/src/ggml-openvino/openvino/op_table.cpp +46 -0
  400. data/ext/sources/ggml/src/ggml-openvino/openvino/op_table.h +39 -0
  401. data/ext/sources/ggml/src/ggml-openvino/openvino/pass/eliminate_zp.cpp +123 -0
  402. data/ext/sources/ggml/src/ggml-openvino/openvino/pass/eliminate_zp.h +17 -0
  403. data/ext/sources/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.cpp +60 -0
  404. data/ext/sources/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.h +17 -0
  405. data/ext/sources/ggml/src/ggml-openvino/openvino/pass/mark_decompression_convert_constant_folding.h +29 -0
  406. data/ext/sources/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.cpp +58 -0
  407. data/ext/sources/ggml/src/ggml-openvino/openvino/pass/squeeze_matmul.h +17 -0
  408. data/ext/sources/ggml/src/ggml-openvino/openvino/translate_session.cpp +293 -0
  409. data/ext/sources/ggml/src/ggml-openvino/openvino/translate_session.h +28 -0
  410. data/ext/sources/ggml/src/ggml-openvino/openvino/utils.cpp +226 -0
  411. data/ext/sources/ggml/src/ggml-openvino/openvino/utils.h +85 -0
  412. data/ext/sources/ggml/src/ggml-openvino/utils.cpp +823 -0
  413. data/ext/sources/ggml/src/ggml-openvino/utils.h +123 -0
  414. data/ext/sources/ggml/src/ggml-quants.c +96 -5
  415. data/ext/sources/ggml/src/ggml-quants.h +3 -0
  416. data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +15 -88
  417. data/ext/sources/ggml/src/ggml-sycl/add-id.cpp +5 -1
  418. data/ext/sources/ggml/src/ggml-sycl/backend.hpp +1 -0
  419. data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +21 -20
  420. data/ext/sources/ggml/src/ggml-sycl/common.hpp +315 -10
  421. data/ext/sources/ggml/src/ggml-sycl/convert.cpp +69 -1
  422. data/ext/sources/ggml/src/ggml-sycl/convert.hpp +22 -1
  423. data/ext/sources/ggml/src/ggml-sycl/count-equal.cpp +1 -1
  424. data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +791 -47
  425. data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +78 -68
  426. data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +2 -0
  427. data/ext/sources/ggml/src/ggml-sycl/fattn-common.hpp +1179 -0
  428. data/ext/sources/ggml/src/ggml-sycl/fattn-tile.cpp +55 -0
  429. data/ext/sources/ggml/src/ggml-sycl/fattn-tile.hpp +1338 -0
  430. data/ext/sources/ggml/src/ggml-sycl/fattn-vec.hpp +667 -0
  431. data/ext/sources/ggml/src/ggml-sycl/fattn.cpp +225 -0
  432. data/ext/sources/ggml/src/ggml-sycl/fattn.hpp +22 -0
  433. data/ext/sources/ggml/src/ggml-sycl/gated_delta_net.cpp +309 -0
  434. data/ext/sources/ggml/src/ggml-sycl/gated_delta_net.hpp +8 -0
  435. data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +316 -51
  436. data/ext/sources/ggml/src/ggml-sycl/norm.cpp +65 -66
  437. data/ext/sources/ggml/src/ggml-sycl/outprod.cpp +3 -3
  438. data/ext/sources/ggml/src/ggml-sycl/presets.hpp +3 -0
  439. data/ext/sources/ggml/src/ggml-sycl/quants.hpp +1 -1
  440. data/ext/sources/ggml/src/ggml-sycl/rope.cpp +450 -287
  441. data/ext/sources/ggml/src/ggml-sycl/rope.hpp +6 -0
  442. data/ext/sources/ggml/src/ggml-sycl/softmax.cpp +6 -6
  443. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq112-dv112.cpp +5 -0
  444. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq128-dv128.cpp +5 -0
  445. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq256-dv256.cpp +5 -0
  446. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq40-dv40.cpp +5 -0
  447. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq576-dv512.cpp +5 -0
  448. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq64-dv64.cpp +5 -0
  449. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq72-dv72.cpp +5 -0
  450. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq80-dv80.cpp +5 -0
  451. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-tile-instance-dkq96-dv96.cpp +5 -0
  452. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-f16.cpp +7 -0
  453. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_0.cpp +7 -0
  454. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q4_1.cpp +7 -0
  455. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_0.cpp +7 -0
  456. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q5_1.cpp +7 -0
  457. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-f16-q8_0.cpp +7 -0
  458. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-f16.cpp +7 -0
  459. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_0.cpp +7 -0
  460. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q4_1.cpp +7 -0
  461. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_0.cpp +7 -0
  462. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q5_1.cpp +7 -0
  463. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_0-q8_0.cpp +7 -0
  464. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-f16.cpp +7 -0
  465. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_0.cpp +7 -0
  466. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q4_1.cpp +7 -0
  467. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_0.cpp +7 -0
  468. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q5_1.cpp +7 -0
  469. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q4_1-q8_0.cpp +7 -0
  470. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-f16.cpp +7 -0
  471. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_0.cpp +7 -0
  472. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q4_1.cpp +7 -0
  473. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_0.cpp +7 -0
  474. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q5_1.cpp +7 -0
  475. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_0-q8_0.cpp +7 -0
  476. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-f16.cpp +7 -0
  477. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_0.cpp +7 -0
  478. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q4_1.cpp +7 -0
  479. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_0.cpp +7 -0
  480. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q5_1.cpp +7 -0
  481. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q5_1-q8_0.cpp +7 -0
  482. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-f16.cpp +7 -0
  483. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_0.cpp +7 -0
  484. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q4_1.cpp +7 -0
  485. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_0.cpp +7 -0
  486. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q5_1.cpp +7 -0
  487. data/ext/sources/ggml/src/ggml-sycl/template-instances/fattn-vec-instance-q8_0-q8_0.cpp +7 -0
  488. data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +13 -0
  489. data/ext/sources/ggml/src/ggml-sycl/wkv.cpp +1 -1
  490. data/ext/sources/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
  491. data/ext/sources/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
  492. data/ext/sources/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
  493. data/ext/sources/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
  494. data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
  495. data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
  496. data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
  497. data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
  498. data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
  499. data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
  500. data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
  501. data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
  502. data/ext/sources/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
  503. data/ext/sources/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
  504. data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
  505. data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
  506. data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
  507. data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
  508. data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
  509. data/ext/sources/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
  510. data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
  511. data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +119 -0
  512. data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +158 -0
  513. data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
  514. data/ext/sources/ggml/src/ggml-virtgpu/ggml-backend.cpp +69 -0
  515. data/ext/sources/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
  516. data/ext/sources/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
  517. data/ext/sources/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
  518. data/ext/sources/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
  519. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
  520. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
  521. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
  522. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
  523. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
  524. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
  525. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
  526. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +98 -0
  527. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
  528. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
  529. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
  530. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu.cpp +544 -0
  531. data/ext/sources/ggml/src/ggml-virtgpu/virtgpu.h +117 -0
  532. data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +1 -1
  533. data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1250 -465
  534. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +16 -8
  535. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/elu.comp +27 -0
  536. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +374 -170
  537. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.glsl +66 -22
  538. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +389 -201
  539. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +106 -58
  540. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_mask_opt.comp +162 -0
  541. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +9 -8
  542. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gated_delta_net.comp +128 -0
  543. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +12 -9
  544. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.glsl +20 -17
  545. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +11 -3
  546. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +8 -4
  547. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_id_funcs.glsl +3 -1
  548. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +5 -3
  549. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +3 -3
  550. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +2 -3
  551. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +36 -63
  552. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -4
  553. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -4
  554. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -4
  555. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +10 -5
  556. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +7 -4
  557. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sgn.comp +21 -0
  558. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +16 -10
  559. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +55 -35
  560. data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +1314 -109
  561. data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu.cpp +1660 -1371
  562. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
  563. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
  564. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
  565. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +141 -0
  566. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +65 -72
  567. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/concat.wgsl +75 -0
  568. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +6 -0
  569. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
  570. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +40 -5
  571. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +105 -60
  572. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{get_rows.tmpl.wgsl → get_rows.wgsl} +53 -259
  573. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{mul_mat.tmpl.wgsl → mul_mat.wgsl} +68 -257
  574. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +692 -23
  575. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{mul_mat_reg_tile.tmpl.wgsl → mul_mat_reg_tile.wgsl} +28 -128
  576. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{mul_mat_subgroup_matrix.tmpl.wgsl → mul_mat_subgroup_matrix.wgsl} +31 -137
  577. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +480 -0
  578. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
  579. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/repeat.wgsl +67 -0
  580. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/{scale.tmpl.wgsl → scale.wgsl} +9 -36
  581. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +40 -12
  582. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
  583. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +193 -0
  584. data/ext/sources/ggml/src/ggml-zdnn/ggml-zdnn.cpp +6 -1
  585. data/ext/sources/ggml/src/ggml-zendnn/CMakeLists.txt +31 -32
  586. data/ext/sources/ggml/src/ggml-zendnn/ggml-zendnn.cpp +9 -6
  587. data/ext/sources/ggml/src/ggml.c +167 -33
  588. data/ext/sources/ggml/src/gguf.cpp +229 -44
  589. data/ext/sources/src/whisper.cpp +6 -28
  590. data/sig/whisper.rbs +43 -2
  591. data/test/test_context_params.rb +82 -0
  592. data/test/test_token.rb +11 -0
  593. data/test/test_vad_context.rb +58 -8
  594. data/test/test_whisper.rb +20 -0
  595. data/whispercpp.gemspec +1 -1
  596. metadata +240 -28
  597. data/ext/sources/ggml/cmake/BuildTypes.cmake +0 -54
  598. data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm-ppc.h +0 -333
  599. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-exp.c +0 -94
  600. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-inverse.c +0 -72
  601. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sigmoid.c +0 -49
  602. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.c +0 -1020
  603. data/ext/sources/ggml/src/ggml-hexagon/htp/ops-utils.h +0 -149
  604. data/ext/sources/ggml/src/ggml-hexagon/htp-utils.c +0 -454
  605. data/ext/sources/ggml/src/ggml-hexagon/htp-utils.h +0 -221
  606. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/bin_op.tmpl.wgsl +0 -188
  607. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/binary_head.tmpl +0 -45
  608. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.tmpl.wgsl +0 -267
  609. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.tmpl.wgsl +0 -112
  610. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl +0 -483
@@ -1,5 +1,3 @@
1
- #include <ruby.h>
2
- #include <ruby/memory_view.h>
3
1
  #include "ruby_whisper.h"
4
2
 
5
3
  extern ID id_to_s;
@@ -20,6 +18,7 @@ extern VALUE eError;
20
18
  extern VALUE cModel;
21
19
 
22
20
  extern const rb_data_type_t ruby_whisper_params_type;
21
+ extern const rb_data_type_t ruby_whisper_context_params_type;
23
22
  extern VALUE ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self);
24
23
  extern VALUE rb_whisper_model_s_new(VALUE context);
25
24
  extern VALUE rb_whisper_segment_s_new(VALUE context, int index);
@@ -27,6 +26,27 @@ extern void prepare_transcription(ruby_whisper_params *rwp, VALUE *context);
27
26
 
28
27
  ID transcribe_option_names[1];
29
28
 
29
+ typedef struct fill_samples_args {
30
+ float *dest;
31
+ VALUE *src;
32
+ int n_samples;
33
+ } fill_samples_args;
34
+
35
+ typedef struct full_args {
36
+ VALUE *context;
37
+ VALUE *params;
38
+ float *samples;
39
+ int n_samples;
40
+ } full_args;
41
+
42
+ typedef struct full_parallel_args {
43
+ VALUE *context;
44
+ VALUE *params;
45
+ float *samples;
46
+ int n_samples;
47
+ int n_processors;
48
+ } full_parallel_args;
49
+
30
50
  static void
31
51
  ruby_whisper_free(ruby_whisper *rw)
32
52
  {
@@ -124,16 +144,25 @@ ruby_whisper_initialize(int argc, VALUE *argv, VALUE self)
124
144
  {
125
145
  ruby_whisper *rw;
126
146
  VALUE whisper_model_file_path;
147
+ VALUE context_params;
148
+ struct whisper_context_params params;
127
149
 
128
150
  // TODO: we can support init from buffer here too maybe another ruby object to expose
129
- rb_scan_args(argc, argv, "01", &whisper_model_file_path);
151
+ rb_scan_args(argc, argv, "11", &whisper_model_file_path, &context_params);
130
152
  TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
131
153
 
132
154
  whisper_model_file_path = ruby_whisper_normalize_model_path(whisper_model_file_path);
133
155
  if (!rb_respond_to(whisper_model_file_path, id_to_s)) {
134
156
  rb_raise(rb_eRuntimeError, "Expected file path to model to initialize Whisper::Context");
135
157
  }
136
- rw->context = whisper_init_from_file_with_params(StringValueCStr(whisper_model_file_path), whisper_context_default_params());
158
+ if (NIL_P(context_params)) {
159
+ params = whisper_context_default_params();
160
+ } else {
161
+ ruby_whisper_context_params *rwcp;
162
+ GetContextParams(context_params, rwcp);
163
+ params = rwcp->params;
164
+ }
165
+ rw->context = whisper_init_from_file_with_params(StringValueCStr(whisper_model_file_path), params);
137
166
  if (rw->context == NULL) {
138
167
  rb_raise(rb_eRuntimeError, "error: failed to initialize whisper context");
139
168
  }
@@ -272,82 +301,175 @@ VALUE ruby_whisper_model_type(VALUE self)
272
301
  return rb_str_new2(whisper_model_type_readable(rw->context));
273
302
  }
274
303
 
275
- /*
276
- * Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
277
- * Not thread safe for same context
278
- * Uses the specified decoding strategy to obtain the text.
279
- *
280
- * call-seq:
281
- * full(params, samples, n_samples) -> nil
282
- * full(params, samples) -> nil
283
- *
284
- * The second argument +samples+ must be an array of samples, respond to :length, or be a MemoryView of an array of float. It must be 32 bit float PCM audio data.
285
- */
286
- VALUE ruby_whisper_full(int argc, VALUE *argv, VALUE self)
304
+ static bool
305
+ check_memory_view(rb_memory_view_t *memview)
287
306
  {
288
- if (argc < 2 || argc > 3) {
289
- rb_raise(rb_eArgError, "wrong number of arguments (given %d, expected 2..3)", argc);
307
+ if (memview->format != NULL && strcmp(memview->format, "f") != 0) {
308
+ rb_warn("currently only format \"f\" is supported for MemoryView, but given: %s", memview->format);
309
+ return false;
310
+ }
311
+ if (memview->format != NULL && memview->ndim != 1) {
312
+ rb_warn("currently only 1 dimensional MemoryView is supported, but given: %zd", memview->ndim);
313
+ return false;
290
314
  }
291
315
 
292
- ruby_whisper *rw;
293
- ruby_whisper_params *rwp;
294
- GetContext(self, rw);
295
- VALUE params = argv[0];
296
- TypedData_Get_Struct(params, ruby_whisper_params, &ruby_whisper_params_type, rwp);
297
- VALUE samples = argv[1];
298
- int n_samples;
299
- rb_memory_view_t view;
300
- const bool memory_view_available_p = rb_memory_view_available_p(samples);
301
- if (argc == 3) {
302
- n_samples = NUM2INT(argv[2]);
303
- if (TYPE(samples) == T_ARRAY) {
304
- if (RARRAY_LEN(samples) < n_samples) {
305
- rb_raise(rb_eArgError, "samples length %ld is less than n_samples %d", RARRAY_LEN(samples), n_samples);
316
+ return true;
317
+ }
318
+
319
+ static VALUE
320
+ fill_samples(VALUE rb_args)
321
+ {
322
+ fill_samples_args *args = (fill_samples_args *)rb_args;
323
+
324
+ if (RB_TYPE_P(*args->src, T_ARRAY)) {
325
+ for (int i = 0; i < args->n_samples; i++) {
326
+ args->dest[i] = RFLOAT_VALUE(rb_ary_entry(*args->src, i));
327
+ }
328
+ } else {
329
+ // TODO: use rb_block_call
330
+ VALUE iter = rb_funcall(*args->src, id_to_enum, 1, rb_str_new2("each"));
331
+ for (int i = 0; i < args->n_samples; i++) {
332
+ // TODO: check if iter is exhausted and raise ArgumentError appropriately
333
+ VALUE sample = rb_funcall(iter, id_next, 0);
334
+ args->dest[i] = RFLOAT_VALUE(sample);
335
+ }
336
+ }
337
+
338
+ return Qnil;
339
+ }
340
+
341
+ struct parsed_samples_t
342
+ parse_samples(VALUE *samples, VALUE *n_samples)
343
+ {
344
+ bool memview_available = rb_memory_view_available_p(*samples);
345
+ struct parsed_samples_t parsed = {0};
346
+ parsed.memview_exported = false;
347
+ const bool is_array = RB_TYPE_P(*samples, T_ARRAY);
348
+
349
+ if (!NIL_P(*n_samples)) {
350
+ parsed.n_samples = NUM2INT(*n_samples);
351
+ if (is_array) {
352
+ if (RARRAY_LEN(*samples) < parsed.n_samples) {
353
+ rb_raise(rb_eArgError, "samples length %ld is less than n_samples %d", RARRAY_LEN(*samples), parsed.n_samples);
306
354
  }
307
355
  }
308
356
  // Should check when samples.respond_to?(:length)?
309
357
  } else {
310
- if (TYPE(samples) == T_ARRAY) {
311
- if (RARRAY_LEN(samples) > INT_MAX) {
358
+ if (is_array) {
359
+ if (RARRAY_LEN(*samples) > INT_MAX) {
312
360
  rb_raise(rb_eArgError, "samples are too long");
313
361
  }
314
- n_samples = (int)RARRAY_LEN(samples);
315
- } else if (memory_view_available_p) {
316
- if (!rb_memory_view_get(samples, &view, RUBY_MEMORY_VIEW_SIMPLE)) {
317
- view.obj = Qnil;
318
- rb_raise(rb_eArgError, "unable to get a memory view");
362
+ parsed.n_samples = (int)RARRAY_LEN(*samples);
363
+ } else if (memview_available) {
364
+ bool memview_got = rb_memory_view_get(*samples, &parsed.memview, RUBY_MEMORY_VIEW_SIMPLE);
365
+ if (memview_got) {
366
+ parsed.memview_exported = check_memory_view(&parsed.memview);
367
+ if (!parsed.memview_exported) {
368
+ rb_memory_view_release(&parsed.memview);
369
+ parsed.memview = (rb_memory_view_t){0};
370
+ }
319
371
  }
320
- ssize_t n_samples_size = view.byte_size / view.item_size;
321
- if (n_samples_size > INT_MAX) {
322
- rb_raise(rb_eArgError, "samples are too long");
372
+ if (parsed.memview_exported) {
373
+ ssize_t n_samples_size = parsed.memview.byte_size / parsed.memview.item_size;
374
+ if (n_samples_size > INT_MAX) {
375
+ rb_memory_view_release(&parsed.memview);
376
+ rb_raise(rb_eArgError, "samples are too long: %zd", n_samples_size);
377
+ }
378
+ parsed.n_samples = (int)n_samples_size;
379
+ } else {
380
+ rb_warn("unable to get a memory view. falls back to Ruby object");
381
+ if (rb_respond_to(*samples, id_length)) {
382
+ parsed.n_samples = NUM2INT(rb_funcall(*samples, id_length, 0));
383
+ } else {
384
+ rb_raise(rb_eArgError, "samples must respond to :length");
385
+ }
323
386
  }
324
- n_samples = (int)n_samples_size;
325
- } else if (rb_respond_to(samples, id_length)) {
326
- n_samples = NUM2INT(rb_funcall(samples, id_length, 0));
387
+ } else if (rb_respond_to(*samples, id_length)) {
388
+ parsed.n_samples = NUM2INT(rb_funcall(*samples, id_length, 0));
327
389
  } else {
328
- rb_raise(rb_eArgError, "samples must respond to :length or be a MemoryView of an array of flaot when n_samples is not given");
390
+ rb_raise(rb_eArgError, "samples must respond to :length or be a MemoryView of an array of float when n_samples is not given");
329
391
  }
330
392
  }
331
- float * c_samples = (float *)malloc(n_samples * sizeof(float));
332
- if (memory_view_available_p) {
333
- c_samples = (float *)view.data;
393
+
394
+ if (parsed.memview_exported) {
395
+ parsed.samples = (float *)parsed.memview.data;
334
396
  } else {
335
- if (TYPE(samples) == T_ARRAY) {
336
- for (int i = 0; i < n_samples; i++) {
337
- c_samples[i] = RFLOAT_VALUE(rb_ary_entry(samples, i));
338
- }
339
- } else {
340
- // TODO: use rb_block_call
341
- VALUE iter = rb_funcall(samples, id_to_enum, 1, rb_str_new2("each"));
342
- for (int i = 0; i < n_samples; i++) {
343
- // TODO: check if iter is exhausted and raise ArgumentError appropriately
344
- VALUE sample = rb_funcall(iter, id_next, 0);
345
- c_samples[i] = RFLOAT_VALUE(sample);
346
- }
397
+ parsed.samples = ALLOC_N(float, parsed.n_samples);
398
+ fill_samples_args args = {
399
+ parsed.samples,
400
+ samples,
401
+ parsed.n_samples,
402
+ };
403
+ int state;
404
+ rb_protect(fill_samples, (VALUE)&args, &state);
405
+ if (state) {
406
+ xfree(parsed.samples);
407
+ rb_jump_tag(state);
347
408
  }
348
409
  }
349
- prepare_transcription(rwp, &self);
350
- const int result = whisper_full(rw->context, rwp->params, c_samples, n_samples);
410
+
411
+ return parsed;
412
+ }
413
+
414
+ VALUE
415
+ release_samples(VALUE rb_parsed_args)
416
+ {
417
+ parsed_samples_t *parsed_args = (parsed_samples_t *)rb_parsed_args;
418
+
419
+ if (parsed_args->memview_exported) {
420
+ rb_memory_view_release(&parsed_args->memview);
421
+ } else {
422
+ xfree(parsed_args->samples);
423
+ }
424
+ *parsed_args = (parsed_samples_t){0};
425
+
426
+ return Qnil;
427
+ }
428
+
429
+ static VALUE
430
+ full_body(VALUE rb_args)
431
+ {
432
+ full_args *args = (full_args *)rb_args;
433
+
434
+ ruby_whisper *rw;
435
+ ruby_whisper_params *rwp;
436
+ GetContext(*args->context, rw);
437
+ TypedData_Get_Struct(*args->params, ruby_whisper_params, &ruby_whisper_params_type, rwp);
438
+
439
+ prepare_transcription(rwp, args->context);
440
+ int result = whisper_full(rw->context, rwp->params, args->samples, args->n_samples);
441
+
442
+ return INT2NUM(result);
443
+ }
444
+
445
+ /*
446
+ * Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
447
+ * Not thread safe for same context
448
+ * Uses the specified decoding strategy to obtain the text.
449
+ *
450
+ * call-seq:
451
+ * full(params, samples, n_samples) -> nil
452
+ * full(params, samples) -> nil
453
+ *
454
+ * The second argument +samples+ must be an array of samples, respond to :length, or be a MemoryView of an array of float. It must be 32 bit float PCM audio data.
455
+ */
456
+ VALUE ruby_whisper_full(int argc, VALUE *argv, VALUE self)
457
+ {
458
+ if (argc < 2 || argc > 3) {
459
+ rb_raise(rb_eArgError, "wrong number of arguments (given %d, expected 2..3)", argc);
460
+ }
461
+
462
+ VALUE n_samples = argc == 2 ? Qnil : argv[2];
463
+
464
+ struct parsed_samples_t parsed = parse_samples(&argv[1], &n_samples);
465
+ full_args args = {
466
+ &self,
467
+ &argv[0],
468
+ parsed.samples,
469
+ parsed.n_samples,
470
+ };
471
+ VALUE rb_result = rb_ensure(full_body, (VALUE)&args, release_samples, (VALUE)&parsed);
472
+ const int result = NUM2INT(rb_result);
351
473
  if (0 == result) {
352
474
  return self;
353
475
  } else {
@@ -355,6 +477,22 @@ VALUE ruby_whisper_full(int argc, VALUE *argv, VALUE self)
355
477
  }
356
478
  }
357
479
 
480
+ static VALUE
481
+ full_parallel_body(VALUE rb_args)
482
+ {
483
+ full_parallel_args *args = (full_parallel_args *)rb_args;
484
+
485
+ ruby_whisper *rw;
486
+ ruby_whisper_params *rwp;
487
+ GetContext(*args->context, rw);
488
+ TypedData_Get_Struct(*args->params, ruby_whisper_params, &ruby_whisper_params_type, rwp);
489
+
490
+ prepare_transcription(rwp, args->context);
491
+ int result = whisper_full_parallel(rw->context, rwp->params, args->samples, args->n_samples, args->n_processors);
492
+
493
+ return INT2NUM(result);
494
+ }
495
+
358
496
  /*
359
497
  * Split the input audio in chunks and process each chunk separately using whisper_full_with_state()
360
498
  * Result is stored in the default state of the context
@@ -372,19 +510,11 @@ static VALUE
372
510
  ruby_whisper_full_parallel(int argc, VALUE *argv,VALUE self)
373
511
  {
374
512
  if (argc < 2 || argc > 4) {
375
- rb_raise(rb_eArgError, "wrong number of arguments (given %d, expected 2..3)", argc);
513
+ rb_raise(rb_eArgError, "wrong number of arguments (given %d, expected 2..4)", argc);
376
514
  }
377
515
 
378
- ruby_whisper *rw;
379
- ruby_whisper_params *rwp;
380
- GetContext(self, rw);
381
- VALUE params = argv[0];
382
- TypedData_Get_Struct(params, ruby_whisper_params, &ruby_whisper_params_type, rwp);
383
- VALUE samples = argv[1];
384
- int n_samples;
516
+ VALUE n_samples = argc == 2 ? Qnil : argv[2];
385
517
  int n_processors;
386
- rb_memory_view_t view;
387
- const bool memory_view_available_p = rb_memory_view_available_p(samples);
388
518
  switch (argc) {
389
519
  case 2:
390
520
  n_processors = 1;
@@ -396,56 +526,16 @@ ruby_whisper_full_parallel(int argc, VALUE *argv,VALUE self)
396
526
  n_processors = NUM2INT(argv[3]);
397
527
  break;
398
528
  }
399
- if (argc >= 3 && !NIL_P(argv[2])) {
400
- n_samples = NUM2INT(argv[2]);
401
- if (TYPE(samples) == T_ARRAY) {
402
- if (RARRAY_LEN(samples) < n_samples) {
403
- rb_raise(rb_eArgError, "samples length %ld is less than n_samples %d", RARRAY_LEN(samples), n_samples);
404
- }
405
- }
406
- // Should check when samples.respond_to?(:length)?
407
- } else if (memory_view_available_p) {
408
- if (!rb_memory_view_get(samples, &view, RUBY_MEMORY_VIEW_SIMPLE)) {
409
- view.obj = Qnil;
410
- rb_raise(rb_eArgError, "unable to get a memory view");
411
- }
412
- ssize_t n_samples_size = view.byte_size / view.item_size;
413
- if (n_samples_size > INT_MAX) {
414
- rb_raise(rb_eArgError, "samples are too long");
415
- }
416
- n_samples = (int)n_samples_size;
417
- } else {
418
- if (TYPE(samples) == T_ARRAY) {
419
- if (RARRAY_LEN(samples) > INT_MAX) {
420
- rb_raise(rb_eArgError, "samples are too long");
421
- }
422
- n_samples = (int)RARRAY_LEN(samples);
423
- } else if (rb_respond_to(samples, id_length)) {
424
- n_samples = NUM2INT(rb_funcall(samples, id_length, 0));
425
- } else {
426
- rb_raise(rb_eArgError, "samples must respond to :length or be a MemoryView of an array of flaot when n_samples is not given");
427
- }
428
- }
429
- float * c_samples = (float *)malloc(n_samples * sizeof(float));
430
- if (memory_view_available_p) {
431
- c_samples = (float *)view.data;
432
- } else {
433
- if (TYPE(samples) == T_ARRAY) {
434
- for (int i = 0; i < n_samples; i++) {
435
- c_samples[i] = RFLOAT_VALUE(rb_ary_entry(samples, i));
436
- }
437
- } else {
438
- // FIXME: use rb_block_call
439
- VALUE iter = rb_funcall(samples, id_to_enum, 1, rb_str_new2("each"));
440
- for (int i = 0; i < n_samples; i++) {
441
- // TODO: check if iter is exhausted and raise ArgumentError
442
- VALUE sample = rb_funcall(iter, id_next, 0);
443
- c_samples[i] = RFLOAT_VALUE(sample);
444
- }
445
- }
446
- }
447
- prepare_transcription(rwp, &self);
448
- const int result = whisper_full_parallel(rw->context, rwp->params, c_samples, n_samples, n_processors);
529
+ struct parsed_samples_t parsed = parse_samples(&argv[1], &n_samples);
530
+ const full_parallel_args args = {
531
+ &self,
532
+ &argv[0],
533
+ parsed.samples,
534
+ parsed.n_samples,
535
+ n_processors,
536
+ };
537
+ const VALUE rb_result = rb_ensure(full_parallel_body, (VALUE)&args, release_samples, (VALUE)&parsed);
538
+ const int result = NUM2INT(rb_result);
449
539
  if (0 == result) {
450
540
  return self;
451
541
  } else {
@@ -631,7 +721,7 @@ ruby_whisper_get_model(VALUE self)
631
721
  return rb_whisper_model_s_new(self);
632
722
  }
633
723
 
634
- void
724
+ VALUE
635
725
  init_ruby_whisper_context(VALUE *mWhisper)
636
726
  {
637
727
  cContext = rb_define_class_under(*mWhisper, "Context", rb_cObject);
@@ -669,4 +759,6 @@ init_ruby_whisper_context(VALUE *mWhisper)
669
759
  rb_define_method(cContext, "each_segment", ruby_whisper_each_segment, 0);
670
760
 
671
761
  rb_define_method(cContext, "model", ruby_whisper_get_model, 0);
762
+
763
+ return cContext;
672
764
  }
@@ -0,0 +1,163 @@
1
+ #include "ruby_whisper.h"
2
+
3
+ #define NUM_PARAMS 6
4
+
5
+ #define DEF_BOOLEAN_ATTR_METHOD(name) \
6
+ static VALUE \
7
+ ruby_whisper_context_params_get_ ## name(VALUE self) { \
8
+ ruby_whisper_context_params *rwcp; \
9
+ GetContextParams(self, rwcp); \
10
+ return rwcp->params.name ? Qtrue : Qfalse; \
11
+ } \
12
+ static VALUE \
13
+ ruby_whisper_context_params_set_ ## name(VALUE self, VALUE value) { \
14
+ ruby_whisper_context_params *rwcp; \
15
+ GetContextParams(self, rwcp); \
16
+ rwcp->params.name = RTEST(value); \
17
+ return value; \
18
+ }
19
+
20
+ #define DEF_INT_ATTR_METHOD(name) \
21
+ static VALUE \
22
+ ruby_whisper_context_params_get_ ## name(VALUE self) { \
23
+ ruby_whisper_context_params *rwcp; \
24
+ GetContextParams(self, rwcp); \
25
+ return INT2NUM(rwcp->params.name); \
26
+ } \
27
+ static VALUE \
28
+ ruby_whisper_context_params_set_ ## name(VALUE self, VALUE value) { \
29
+ ruby_whisper_context_params *rwcp; \
30
+ GetContextParams(self, rwcp); \
31
+ rwcp->params.name = NUM2INT(value); \
32
+ return value; \
33
+ }
34
+
35
+ #define DEFINE_PARAM(param_name, nth) \
36
+ id_ ## param_name = rb_intern(#param_name); \
37
+ param_names[nth] = id_ ## param_name; \
38
+ rb_define_method(cContextParams, #param_name, ruby_whisper_context_params_get_ ## param_name, 0); \
39
+ rb_define_method(cContextParams, #param_name "=", ruby_whisper_context_params_set_ ## param_name, 1);
40
+
41
+ VALUE cContextParams;
42
+
43
+ static ID param_names[NUM_PARAMS];
44
+ static ID id_use_gpu;
45
+ static ID id_flash_attn;
46
+ static ID id_gpu_device;
47
+ static ID id_dtw_token_timestamps;
48
+ static ID id_dtw_aheads_preset;
49
+ static ID id_dtw_n_top;
50
+
51
+ static size_t
52
+ ruby_whisper_context_params_memsize(const void *p)
53
+ {
54
+ const ruby_whisper_context_params *rwcp = (ruby_whisper_context_params *)p;
55
+ if (!rwcp) {
56
+ return 0;
57
+ }
58
+ return sizeof(ruby_whisper_context_params);
59
+ }
60
+
61
+ const rb_data_type_t ruby_whisper_context_params_type = {
62
+ "ruby_whisper_context_params",
63
+ {0, RUBY_DEFAULT_FREE, ruby_whisper_context_params_memsize,},
64
+ 0, 0,
65
+ 0
66
+ };
67
+
68
+ static VALUE
69
+ ruby_whisper_context_params_s_allocate(VALUE klass)
70
+ {
71
+ ruby_whisper_context_params *rwcp;
72
+ return TypedData_Make_Struct(klass, ruby_whisper_context_params, &ruby_whisper_context_params_type, rwcp);
73
+ }
74
+
75
+ DEF_BOOLEAN_ATTR_METHOD(use_gpu);
76
+ DEF_BOOLEAN_ATTR_METHOD(flash_attn);
77
+ DEF_INT_ATTR_METHOD(gpu_device);
78
+ DEF_BOOLEAN_ATTR_METHOD(dtw_token_timestamps);
79
+ DEF_INT_ATTR_METHOD(dtw_aheads_preset);
80
+
81
+ static VALUE
82
+ ruby_whisper_context_params_get_dtw_n_top(VALUE self) {
83
+ ruby_whisper_context_params *rwcp;
84
+ GetContextParams(self, rwcp);
85
+
86
+ int dtw_n_top = rwcp->params.dtw_n_top;
87
+
88
+ return dtw_n_top == -1 ? Qnil : INT2NUM(dtw_n_top);
89
+ }
90
+
91
+ static VALUE
92
+ ruby_whisper_context_params_set_dtw_n_top(VALUE self, VALUE value) {
93
+ ruby_whisper_context_params *rwcp;
94
+ GetContextParams(self, rwcp);
95
+
96
+ rwcp->params.dtw_n_top = NIL_P(value) ? -1 : NUM2INT(value);
97
+
98
+ return value;
99
+ }
100
+
101
+ #define SET_PARAM_IF_SAME(param_name) \
102
+ if (id == id_ ## param_name) { \
103
+ ruby_whisper_context_params_set_ ## param_name(self, value); \
104
+ continue; \
105
+ }
106
+
107
+ static VALUE
108
+ ruby_whisper_context_params_initialize(int argc, VALUE *argv, VALUE self)
109
+ {
110
+ ruby_whisper_context_params *rwcp;
111
+ TypedData_Get_Struct(self, ruby_whisper_context_params, &ruby_whisper_context_params_type, rwcp);
112
+ rwcp->params = whisper_context_default_params();
113
+
114
+ VALUE kw_hash;
115
+ rb_scan_args_kw(RB_SCAN_ARGS_KEYWORDS, argc, argv, ":", &kw_hash);
116
+ if (NIL_P(kw_hash)) {
117
+ return Qnil;
118
+ }
119
+
120
+ VALUE values[NUM_PARAMS] = {Qundef};
121
+ rb_get_kwargs(kw_hash, param_names, 0, NUM_PARAMS, values);
122
+
123
+ ID id;
124
+ VALUE value;
125
+ for (int i = 0; i < NUM_PARAMS; i++) {
126
+ id = param_names[i];
127
+ value = values[i];
128
+ if (value == Qundef) {
129
+ continue;
130
+ }
131
+ SET_PARAM_IF_SAME(use_gpu)
132
+ SET_PARAM_IF_SAME(flash_attn)
133
+ SET_PARAM_IF_SAME(gpu_device)
134
+ SET_PARAM_IF_SAME(dtw_token_timestamps)
135
+ SET_PARAM_IF_SAME(dtw_aheads_preset)
136
+ SET_PARAM_IF_SAME(dtw_n_top)
137
+ }
138
+
139
+ return Qnil;
140
+ }
141
+
142
+ #undef SET_PARAM_IF_SAME
143
+
144
+ void
145
+ init_ruby_whisper_context_params(VALUE *cContext)
146
+ {
147
+ cContextParams = rb_define_class_under(*cContext, "Params", rb_cObject);
148
+
149
+ rb_define_alloc_func(cContextParams, ruby_whisper_context_params_s_allocate);
150
+ rb_define_method(cContextParams, "initialize", ruby_whisper_context_params_initialize, -1);
151
+
152
+ DEFINE_PARAM(use_gpu, 0)
153
+ DEFINE_PARAM(flash_attn, 1)
154
+ DEFINE_PARAM(gpu_device, 2)
155
+ DEFINE_PARAM(dtw_token_timestamps, 3)
156
+ DEFINE_PARAM(dtw_aheads_preset, 4)
157
+ DEFINE_PARAM(dtw_n_top, 5)
158
+ }
159
+
160
+ #undef DEFINE_PARAM
161
+ #undef DEF_INT_ATTR_METHOD
162
+ #undef DEF_BOOLEAN_ATTR_METHOD
163
+ #undef NUM_PARAMS
@@ -1,4 +1,3 @@
1
- #include <ruby.h>
2
1
  #include "ruby_whisper.h"
3
2
 
4
3
  extern const rb_data_type_t ruby_whisper_type;
@@ -1,4 +1,3 @@
1
- #include <ruby.h>
2
1
  #include "ruby_whisper.h"
3
2
 
4
3
  #define BOOL_PARAMS_SETTER(self, prop, value) \
@@ -1,4 +1,3 @@
1
- #include <ruby.h>
2
1
  #include "ruby_whisper.h"
3
2
 
4
3
  #define N_KEY_NAMES 6
@@ -1,4 +1,3 @@
1
- #include <ruby.h>
2
1
  #include "ruby_whisper.h"
3
2
 
4
3
  #define N_KEY_NAMES 11
@@ -25,12 +24,34 @@ ruby_whisper_token_memsize(const void *p)
25
24
  if (!rwt) {
26
25
  return 0;
27
26
  }
28
- return sizeof(rwt);
27
+ size_t size = sizeof(*rwt);
28
+ if (rwt->token_data) {
29
+ size += sizeof(*rwt->token_data);
30
+ }
31
+ return size;
32
+ }
33
+
34
+ static void
35
+ ruby_whisper_token_mark(void *p)
36
+ {
37
+ ruby_whisper_token *rwt = (ruby_whisper_token *)p;
38
+ rb_gc_mark(rwt->text);
39
+ }
40
+
41
+ static void
42
+ ruby_whisper_token_free(void *p)
43
+ {
44
+ ruby_whisper_token *rwt = (ruby_whisper_token *)p;
45
+ if (rwt->token_data) {
46
+ xfree(rwt->token_data);
47
+ rwt->token_data = NULL;
48
+ }
49
+ xfree(rwt);
29
50
  }
30
51
 
31
52
  static const rb_data_type_t ruby_whisper_token_type = {
32
53
  "ruby_whisper_token",
33
- {0, RUBY_DEFAULT_FREE, ruby_whisper_token_memsize,},
54
+ {ruby_whisper_token_mark, ruby_whisper_token_free, ruby_whisper_token_memsize,},
34
55
  0, 0,
35
56
  0
36
57
  };
@@ -41,19 +62,19 @@ ruby_whisper_token_allocate(VALUE klass)
41
62
  ruby_whisper_token *rwt;
42
63
  VALUE token = TypedData_Make_Struct(klass, ruby_whisper_token, &ruby_whisper_token_type, rwt);
43
64
  rwt->token_data = NULL;
44
- rwt->text = NULL;
65
+ rwt->text = Qnil;
45
66
  return token;
46
67
  }
47
68
 
48
69
  VALUE
49
70
  ruby_whisper_token_s_init(struct whisper_context *context, int i_segment, int i_token)
50
71
  {
51
- whisper_token_data token_data = whisper_full_get_token_data(context, i_segment, i_token);
52
72
  const VALUE token = ruby_whisper_token_allocate(cToken);
53
73
  ruby_whisper_token *rwt;
54
74
  TypedData_Get_Struct(token, ruby_whisper_token, &ruby_whisper_token_type, rwt);
55
- rwt->token_data = &token_data;
56
- rwt->text = whisper_full_get_token_text(context, i_segment, i_token);
75
+ rwt->token_data = ALLOC(whisper_token_data);
76
+ *(rwt->token_data) = whisper_full_get_token_data(context, i_segment, i_token);
77
+ rwt->text = rb_str_new2(whisper_full_get_token_text(context, i_segment, i_token));
57
78
  return token;
58
79
  }
59
80
 
@@ -183,10 +204,9 @@ ruby_whisper_token_get_text(VALUE self)
183
204
  {
184
205
  ruby_whisper_token *rwt;
185
206
  GetToken(self, rwt);
186
- return rb_str_new2(rwt->text);
207
+ return rwt->text;
187
208
  }
188
209
 
189
-
190
210
  /*
191
211
  * Start time of the token.
192
212
  *