whispercpp 1.3.4 → 1.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (630) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +60 -43
  3. data/ext/extconf.rb +2 -2
  4. data/ext/ruby_whisper.c +14 -2
  5. data/ext/ruby_whisper.h +39 -0
  6. data/ext/ruby_whisper_context.c +22 -22
  7. data/ext/ruby_whisper_model.c +12 -12
  8. data/ext/ruby_whisper_params.c +47 -23
  9. data/ext/ruby_whisper_segment.c +84 -19
  10. data/ext/ruby_whisper_token.c +351 -0
  11. data/ext/ruby_whisper_transcribe.cpp +1 -1
  12. data/ext/ruby_whisper_vad_context.c +75 -0
  13. data/ext/ruby_whisper_vad_context_detect.cpp +50 -0
  14. data/ext/ruby_whisper_vad_segment.c +139 -0
  15. data/ext/ruby_whisper_vad_segments.c +106 -0
  16. data/ext/sources/CMakeLists.txt +4 -1
  17. data/ext/sources/bindings/javascript/package.json +1 -1
  18. data/ext/sources/cmake/arm64-apple-clang.cmake +16 -0
  19. data/ext/sources/cmake/arm64-windows-llvm.cmake +16 -0
  20. data/ext/sources/cmake/riscv64-spacemit-linux-gnu-gcc.cmake +29 -0
  21. data/ext/sources/cmake/x64-windows-llvm.cmake +5 -0
  22. data/ext/sources/examples/addon.node/vad-example.js +2 -2
  23. data/ext/sources/examples/cli/cli.cpp +121 -112
  24. data/ext/sources/examples/lsp/CMakeLists.txt +2 -1
  25. data/ext/sources/examples/quantize/CMakeLists.txt +2 -1
  26. data/ext/sources/examples/server/server.cpp +10 -11
  27. data/ext/sources/examples/talk-llama/CMakeLists.txt +5 -1
  28. data/ext/sources/examples/talk-llama/llama-adapter.cpp +12 -3
  29. data/ext/sources/examples/talk-llama/llama-adapter.h +7 -1
  30. data/ext/sources/examples/talk-llama/llama-arch.cpp +2046 -1974
  31. data/ext/sources/examples/talk-llama/llama-arch.h +67 -2
  32. data/ext/sources/examples/talk-llama/llama-batch.cpp +75 -33
  33. data/ext/sources/examples/talk-llama/llama-batch.h +17 -4
  34. data/ext/sources/examples/talk-llama/llama-chat.cpp +79 -3
  35. data/ext/sources/examples/talk-llama/llama-chat.h +4 -0
  36. data/ext/sources/examples/talk-llama/llama-context.cpp +775 -78
  37. data/ext/sources/examples/talk-llama/llama-context.h +57 -9
  38. data/ext/sources/examples/talk-llama/llama-cparams.h +1 -0
  39. data/ext/sources/examples/talk-llama/llama-grammar.cpp +288 -53
  40. data/ext/sources/examples/talk-llama/llama-grammar.h +22 -1
  41. data/ext/sources/examples/talk-llama/llama-graph.cpp +381 -64
  42. data/ext/sources/examples/talk-llama/llama-graph.h +103 -13
  43. data/ext/sources/examples/talk-llama/llama-hparams.cpp +26 -2
  44. data/ext/sources/examples/talk-llama/llama-hparams.h +41 -10
  45. data/ext/sources/examples/talk-llama/llama-impl.cpp +7 -3
  46. data/ext/sources/examples/talk-llama/llama-impl.h +1 -1
  47. data/ext/sources/examples/talk-llama/llama-kv-cache-iswa.cpp +5 -3
  48. data/ext/sources/examples/talk-llama/llama-kv-cache.cpp +145 -65
  49. data/ext/sources/examples/talk-llama/llama-kv-cache.h +22 -7
  50. data/ext/sources/examples/talk-llama/llama-kv-cells.h +44 -2
  51. data/ext/sources/examples/talk-llama/llama-memory-hybrid.cpp +12 -10
  52. data/ext/sources/examples/talk-llama/llama-memory-recurrent.cpp +32 -19
  53. data/ext/sources/examples/talk-llama/llama-memory-recurrent.h +2 -2
  54. data/ext/sources/examples/talk-llama/llama-mmap.cpp +172 -37
  55. data/ext/sources/examples/talk-llama/llama-mmap.h +8 -3
  56. data/ext/sources/examples/talk-llama/llama-model-loader.cpp +91 -9
  57. data/ext/sources/examples/talk-llama/llama-model-loader.h +6 -0
  58. data/ext/sources/examples/talk-llama/llama-model-saver.cpp +3 -0
  59. data/ext/sources/examples/talk-llama/llama-model.cpp +1529 -13134
  60. data/ext/sources/examples/talk-llama/llama-model.h +44 -3
  61. data/ext/sources/examples/talk-llama/llama-quant.cpp +8 -23
  62. data/ext/sources/examples/talk-llama/llama-sampling.cpp +1294 -198
  63. data/ext/sources/examples/talk-llama/llama-sampling.h +19 -7
  64. data/ext/sources/examples/talk-llama/llama-vocab.cpp +133 -37
  65. data/ext/sources/examples/talk-llama/llama-vocab.h +45 -40
  66. data/ext/sources/examples/talk-llama/llama.cpp +729 -2
  67. data/ext/sources/examples/talk-llama/llama.h +152 -14
  68. data/ext/sources/examples/talk-llama/models/afmoe.cpp +191 -0
  69. data/ext/sources/examples/talk-llama/models/apertus.cpp +125 -0
  70. data/ext/sources/examples/talk-llama/models/arcee.cpp +135 -0
  71. data/ext/sources/examples/talk-llama/models/arctic.cpp +138 -0
  72. data/ext/sources/examples/talk-llama/models/arwkv7.cpp +86 -0
  73. data/ext/sources/examples/talk-llama/models/baichuan.cpp +122 -0
  74. data/ext/sources/examples/talk-llama/models/bailingmoe.cpp +144 -0
  75. data/ext/sources/examples/talk-llama/models/bailingmoe2.cpp +135 -0
  76. data/ext/sources/examples/talk-llama/models/bert.cpp +178 -0
  77. data/ext/sources/examples/talk-llama/models/bitnet.cpp +160 -0
  78. data/ext/sources/examples/talk-llama/models/bloom.cpp +101 -0
  79. data/ext/sources/examples/talk-llama/models/chameleon.cpp +178 -0
  80. data/ext/sources/examples/talk-llama/models/chatglm.cpp +132 -0
  81. data/ext/sources/examples/talk-llama/models/codeshell.cpp +111 -0
  82. data/ext/sources/examples/talk-llama/models/cogvlm.cpp +102 -0
  83. data/ext/sources/examples/talk-llama/models/cohere2-iswa.cpp +134 -0
  84. data/ext/sources/examples/talk-llama/models/command-r.cpp +122 -0
  85. data/ext/sources/examples/talk-llama/models/dbrx.cpp +123 -0
  86. data/ext/sources/examples/talk-llama/models/deci.cpp +135 -0
  87. data/ext/sources/examples/talk-llama/models/deepseek.cpp +144 -0
  88. data/ext/sources/examples/talk-llama/models/deepseek2.cpp +259 -0
  89. data/ext/sources/examples/talk-llama/models/dots1.cpp +134 -0
  90. data/ext/sources/examples/talk-llama/models/dream.cpp +105 -0
  91. data/ext/sources/examples/talk-llama/models/ernie4-5-moe.cpp +150 -0
  92. data/ext/sources/examples/talk-llama/models/ernie4-5.cpp +110 -0
  93. data/ext/sources/examples/talk-llama/models/exaone.cpp +114 -0
  94. data/ext/sources/examples/talk-llama/models/exaone4.cpp +123 -0
  95. data/ext/sources/examples/talk-llama/models/falcon-h1.cpp +113 -0
  96. data/ext/sources/examples/talk-llama/models/falcon.cpp +120 -0
  97. data/ext/sources/examples/talk-llama/models/gemma-embedding.cpp +116 -0
  98. data/ext/sources/examples/talk-llama/models/gemma.cpp +112 -0
  99. data/ext/sources/examples/talk-llama/models/gemma2-iswa.cpp +128 -0
  100. data/ext/sources/examples/talk-llama/models/gemma3.cpp +155 -0
  101. data/ext/sources/examples/talk-llama/models/gemma3n-iswa.cpp +384 -0
  102. data/ext/sources/examples/talk-llama/models/glm4-moe.cpp +170 -0
  103. data/ext/sources/examples/talk-llama/models/glm4.cpp +150 -0
  104. data/ext/sources/examples/talk-llama/models/gpt2.cpp +105 -0
  105. data/ext/sources/examples/talk-llama/models/gptneox.cpp +144 -0
  106. data/ext/sources/examples/talk-llama/models/granite-hybrid.cpp +196 -0
  107. data/ext/sources/examples/talk-llama/models/granite.cpp +211 -0
  108. data/ext/sources/examples/talk-llama/models/graph-context-mamba.cpp +283 -0
  109. data/ext/sources/examples/talk-llama/models/grok.cpp +159 -0
  110. data/ext/sources/examples/talk-llama/models/grovemoe.cpp +141 -0
  111. data/ext/sources/examples/talk-llama/models/hunyuan-dense.cpp +132 -0
  112. data/ext/sources/examples/talk-llama/models/hunyuan-moe.cpp +154 -0
  113. data/ext/sources/examples/talk-llama/models/internlm2.cpp +120 -0
  114. data/ext/sources/examples/talk-llama/models/jais.cpp +86 -0
  115. data/ext/sources/examples/talk-llama/models/jamba.cpp +106 -0
  116. data/ext/sources/examples/talk-llama/models/lfm2.cpp +175 -0
  117. data/ext/sources/examples/talk-llama/models/llada-moe.cpp +122 -0
  118. data/ext/sources/examples/talk-llama/models/llada.cpp +99 -0
  119. data/ext/sources/examples/talk-llama/models/llama-iswa.cpp +178 -0
  120. data/ext/sources/examples/talk-llama/models/llama.cpp +168 -0
  121. data/ext/sources/examples/talk-llama/models/maincoder.cpp +117 -0
  122. data/ext/sources/examples/talk-llama/models/mamba.cpp +55 -0
  123. data/ext/sources/examples/talk-llama/models/mimo2-iswa.cpp +123 -0
  124. data/ext/sources/examples/talk-llama/models/minicpm3.cpp +199 -0
  125. data/ext/sources/examples/talk-llama/models/minimax-m2.cpp +124 -0
  126. data/ext/sources/examples/talk-llama/models/mistral3.cpp +160 -0
  127. data/ext/sources/examples/talk-llama/models/models.h +569 -0
  128. data/ext/sources/examples/talk-llama/models/modern-bert.cpp +116 -0
  129. data/ext/sources/examples/talk-llama/models/mpt.cpp +126 -0
  130. data/ext/sources/examples/talk-llama/models/nemotron-h.cpp +150 -0
  131. data/ext/sources/examples/talk-llama/models/nemotron.cpp +122 -0
  132. data/ext/sources/examples/talk-llama/models/neo-bert.cpp +104 -0
  133. data/ext/sources/examples/talk-llama/models/olmo.cpp +121 -0
  134. data/ext/sources/examples/talk-llama/models/olmo2.cpp +150 -0
  135. data/ext/sources/examples/talk-llama/models/olmoe.cpp +124 -0
  136. data/ext/sources/examples/talk-llama/models/openai-moe-iswa.cpp +127 -0
  137. data/ext/sources/examples/talk-llama/models/openelm.cpp +124 -0
  138. data/ext/sources/examples/talk-llama/models/orion.cpp +123 -0
  139. data/ext/sources/examples/talk-llama/models/pangu-embedded.cpp +121 -0
  140. data/ext/sources/examples/talk-llama/models/phi2.cpp +121 -0
  141. data/ext/sources/examples/talk-llama/models/phi3.cpp +152 -0
  142. data/ext/sources/examples/talk-llama/models/plamo.cpp +110 -0
  143. data/ext/sources/examples/talk-llama/models/plamo2.cpp +316 -0
  144. data/ext/sources/examples/talk-llama/models/plamo3.cpp +128 -0
  145. data/ext/sources/examples/talk-llama/models/plm.cpp +168 -0
  146. data/ext/sources/examples/talk-llama/models/qwen.cpp +108 -0
  147. data/ext/sources/examples/talk-llama/models/qwen2.cpp +126 -0
  148. data/ext/sources/examples/talk-llama/models/qwen2moe.cpp +151 -0
  149. data/ext/sources/examples/talk-llama/models/qwen2vl.cpp +117 -0
  150. data/ext/sources/examples/talk-llama/models/qwen3.cpp +117 -0
  151. data/ext/sources/examples/talk-llama/models/qwen3moe.cpp +124 -0
  152. data/ext/sources/examples/talk-llama/models/qwen3next.cpp +873 -0
  153. data/ext/sources/examples/talk-llama/models/qwen3vl-moe.cpp +149 -0
  154. data/ext/sources/examples/talk-llama/models/qwen3vl.cpp +141 -0
  155. data/ext/sources/examples/talk-llama/models/refact.cpp +94 -0
  156. data/ext/sources/examples/talk-llama/models/rnd1.cpp +126 -0
  157. data/ext/sources/examples/talk-llama/models/rwkv6-base.cpp +162 -0
  158. data/ext/sources/examples/talk-llama/models/rwkv6.cpp +94 -0
  159. data/ext/sources/examples/talk-llama/models/rwkv6qwen2.cpp +86 -0
  160. data/ext/sources/examples/talk-llama/models/rwkv7-base.cpp +135 -0
  161. data/ext/sources/examples/talk-llama/models/rwkv7.cpp +90 -0
  162. data/ext/sources/examples/talk-llama/models/seed-oss.cpp +124 -0
  163. data/ext/sources/examples/talk-llama/models/smallthinker.cpp +126 -0
  164. data/ext/sources/examples/talk-llama/models/smollm3.cpp +128 -0
  165. data/ext/sources/examples/talk-llama/models/stablelm.cpp +146 -0
  166. data/ext/sources/examples/talk-llama/models/starcoder.cpp +100 -0
  167. data/ext/sources/examples/talk-llama/models/starcoder2.cpp +121 -0
  168. data/ext/sources/examples/talk-llama/models/t5-dec.cpp +166 -0
  169. data/ext/sources/examples/talk-llama/models/t5-enc.cpp +96 -0
  170. data/ext/sources/examples/talk-llama/models/wavtokenizer-dec.cpp +149 -0
  171. data/ext/sources/examples/talk-llama/models/xverse.cpp +108 -0
  172. data/ext/sources/examples/talk-llama/unicode.cpp +102 -16
  173. data/ext/sources/examples/vad-speech-segments/CMakeLists.txt +1 -1
  174. data/ext/sources/examples/whisper.wasm/index-tmpl.html +1 -1
  175. data/ext/sources/ggml/CMakeLists.txt +82 -54
  176. data/ext/sources/ggml/include/ggml-alloc.h +9 -0
  177. data/ext/sources/ggml/include/ggml-backend.h +4 -1
  178. data/ext/sources/ggml/include/ggml-cpu.h +1 -0
  179. data/ext/sources/ggml/include/ggml-hexagon.h +19 -0
  180. data/ext/sources/ggml/include/ggml-rpc.h +8 -11
  181. data/ext/sources/ggml/include/ggml-zendnn.h +22 -0
  182. data/ext/sources/ggml/include/ggml.h +190 -12
  183. data/ext/sources/ggml/src/CMakeLists.txt +82 -11
  184. data/ext/sources/ggml/src/ggml-alloc.c +124 -41
  185. data/ext/sources/ggml/src/ggml-backend-impl.h +1 -4
  186. data/ext/sources/ggml/src/ggml-backend-reg.cpp +27 -3
  187. data/ext/sources/ggml/src/ggml-backend.cpp +71 -21
  188. data/ext/sources/ggml/src/ggml-blas/CMakeLists.txt +17 -3
  189. data/ext/sources/ggml/src/ggml-blas/ggml-blas.cpp +5 -9
  190. data/ext/sources/ggml/src/ggml-cann/acl_tensor.cpp +57 -45
  191. data/ext/sources/ggml/src/ggml-cann/acl_tensor.h +138 -47
  192. data/ext/sources/ggml/src/ggml-cann/aclnn_ops.cpp +2179 -1696
  193. data/ext/sources/ggml/src/ggml-cann/aclnn_ops.h +238 -317
  194. data/ext/sources/ggml/src/ggml-cann/common.h +283 -208
  195. data/ext/sources/ggml/src/ggml-cann/ggml-cann.cpp +626 -776
  196. data/ext/sources/ggml/src/ggml-cpu/CMakeLists.txt +156 -86
  197. data/ext/sources/ggml/src/ggml-cpu/amx/amx.cpp +1 -0
  198. data/ext/sources/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +4 -0
  199. data/ext/sources/ggml/src/ggml-cpu/arch/arm/quants.c +428 -26
  200. data/ext/sources/ggml/src/ggml-cpu/arch/arm/repack.cpp +1004 -0
  201. data/ext/sources/ggml/src/ggml-cpu/arch/loongarch/quants.c +4 -5
  202. data/ext/sources/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
  203. data/ext/sources/ggml/src/ggml-cpu/arch/riscv/quants.c +108 -49
  204. data/ext/sources/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
  205. data/ext/sources/ggml/src/ggml-cpu/arch/x86/repack.cpp +6 -6
  206. data/ext/sources/ggml/src/ggml-cpu/arch-fallback.h +50 -2
  207. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu-impl.h +5 -3
  208. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.c +195 -71
  209. data/ext/sources/ggml/src/ggml-cpu/ggml-cpu.cpp +4 -0
  210. data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.cpp +573 -106
  211. data/ext/sources/ggml/src/ggml-cpu/kleidiai/kernels.h +33 -44
  212. data/ext/sources/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +298 -112
  213. data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm-ppc.h +333 -0
  214. data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.cpp +819 -125
  215. data/ext/sources/ggml/src/ggml-cpu/llamafile/sgemm.h +6 -0
  216. data/ext/sources/ggml/src/ggml-cpu/ops.cpp +708 -431
  217. data/ext/sources/ggml/src/ggml-cpu/ops.h +5 -4
  218. data/ext/sources/ggml/src/ggml-cpu/repack.cpp +671 -31
  219. data/ext/sources/ggml/src/ggml-cpu/repack.h +14 -0
  220. data/ext/sources/ggml/src/ggml-cpu/simd-mappings.h +41 -43
  221. data/ext/sources/ggml/src/ggml-cpu/spacemit/ime.cpp +3 -2
  222. data/ext/sources/ggml/src/ggml-cpu/unary-ops.cpp +151 -0
  223. data/ext/sources/ggml/src/ggml-cpu/unary-ops.h +7 -0
  224. data/ext/sources/ggml/src/ggml-cpu/vec.cpp +124 -1
  225. data/ext/sources/ggml/src/ggml-cpu/vec.h +261 -146
  226. data/ext/sources/ggml/src/ggml-cuda/CMakeLists.txt +72 -1
  227. data/ext/sources/ggml/src/ggml-cuda/argmax.cu +2 -2
  228. data/ext/sources/ggml/src/ggml-cuda/argsort.cu +123 -6
  229. data/ext/sources/ggml/src/ggml-cuda/argsort.cuh +16 -0
  230. data/ext/sources/ggml/src/ggml-cuda/binbcast.cu +1 -1
  231. data/ext/sources/ggml/src/ggml-cuda/common.cuh +353 -80
  232. data/ext/sources/ggml/src/ggml-cuda/convert.cuh +10 -0
  233. data/ext/sources/ggml/src/ggml-cuda/cpy-utils.cuh +1 -1
  234. data/ext/sources/ggml/src/ggml-cuda/cpy.cu +339 -246
  235. data/ext/sources/ggml/src/ggml-cuda/cpy.cuh +1 -5
  236. data/ext/sources/ggml/src/ggml-cuda/cumsum.cu +307 -0
  237. data/ext/sources/ggml/src/ggml-cuda/cumsum.cuh +5 -0
  238. data/ext/sources/ggml/src/ggml-cuda/diag.cu +77 -0
  239. data/ext/sources/ggml/src/ggml-cuda/diag.cuh +5 -0
  240. data/ext/sources/ggml/src/ggml-cuda/fattn-common.cuh +31 -21
  241. data/ext/sources/ggml/src/ggml-cuda/fattn-mma-f16.cuh +663 -596
  242. data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cu +35 -741
  243. data/ext/sources/ggml/src/ggml-cuda/fattn-tile.cuh +1241 -0
  244. data/ext/sources/ggml/src/ggml-cuda/fattn-vec.cuh +30 -37
  245. data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cu +14 -13
  246. data/ext/sources/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +48 -0
  247. data/ext/sources/ggml/src/ggml-cuda/fattn.cu +83 -37
  248. data/ext/sources/ggml/src/ggml-cuda/fill.cu +37 -0
  249. data/ext/sources/ggml/src/ggml-cuda/fill.cuh +3 -0
  250. data/ext/sources/ggml/src/ggml-cuda/ggml-cuda.cu +1155 -164
  251. data/ext/sources/ggml/src/ggml-cuda/mean.cu +5 -4
  252. data/ext/sources/ggml/src/ggml-cuda/mma.cuh +741 -48
  253. data/ext/sources/ggml/src/ggml-cuda/mmf.cu +60 -12
  254. data/ext/sources/ggml/src/ggml-cuda/mmf.cuh +381 -42
  255. data/ext/sources/ggml/src/ggml-cuda/mmid.cu +164 -0
  256. data/ext/sources/ggml/src/ggml-cuda/mmid.cuh +5 -0
  257. data/ext/sources/ggml/src/ggml-cuda/mmq.cu +69 -176
  258. data/ext/sources/ggml/src/ggml-cuda/mmq.cuh +498 -171
  259. data/ext/sources/ggml/src/ggml-cuda/mmvf.cu +375 -79
  260. data/ext/sources/ggml/src/ggml-cuda/mmvf.cuh +3 -2
  261. data/ext/sources/ggml/src/ggml-cuda/mmvq.cu +241 -95
  262. data/ext/sources/ggml/src/ggml-cuda/mmvq.cuh +1 -1
  263. data/ext/sources/ggml/src/ggml-cuda/pad.cu +64 -33
  264. data/ext/sources/ggml/src/ggml-cuda/quantize.cu +151 -0
  265. data/ext/sources/ggml/src/ggml-cuda/quantize.cuh +14 -0
  266. data/ext/sources/ggml/src/ggml-cuda/rope.cu +192 -77
  267. data/ext/sources/ggml/src/ggml-cuda/rope.cuh +2 -0
  268. data/ext/sources/ggml/src/ggml-cuda/set-rows.cu +101 -47
  269. data/ext/sources/ggml/src/ggml-cuda/set.cu +39 -0
  270. data/ext/sources/ggml/src/ggml-cuda/set.cuh +7 -0
  271. data/ext/sources/ggml/src/ggml-cuda/softmax.cu +203 -6
  272. data/ext/sources/ggml/src/ggml-cuda/solve_tri.cu +275 -0
  273. data/ext/sources/ggml/src/ggml-cuda/solve_tri.cuh +3 -0
  274. data/ext/sources/ggml/src/ggml-cuda/ssm-conv.cu +14 -20
  275. data/ext/sources/ggml/src/ggml-cuda/ssm-scan.cu +49 -84
  276. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq112-dv112.cu +5 -0
  277. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq128-dv128.cu +5 -0
  278. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq256-dv256.cu +5 -0
  279. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq40-dv40.cu +5 -0
  280. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq576-dv512.cu +5 -0
  281. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq64-dv64.cu +5 -0
  282. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq72-dv72.cu +5 -0
  283. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq80-dv80.cu +5 -0
  284. data/ext/sources/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq96-dv96.cu +5 -0
  285. data/ext/sources/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +19 -1
  286. data/ext/sources/ggml/src/ggml-cuda/top-k.cu +96 -0
  287. data/ext/sources/ggml/src/ggml-cuda/top-k.cuh +3 -0
  288. data/ext/sources/ggml/src/ggml-cuda/topk-moe.cu +168 -76
  289. data/ext/sources/ggml/src/ggml-cuda/topk-moe.cuh +11 -4
  290. data/ext/sources/ggml/src/ggml-cuda/tri.cu +136 -0
  291. data/ext/sources/ggml/src/ggml-cuda/tri.cuh +5 -0
  292. data/ext/sources/ggml/src/ggml-cuda/unary.cu +105 -11
  293. data/ext/sources/ggml/src/ggml-cuda/unary.cuh +36 -0
  294. data/ext/sources/ggml/src/ggml-cuda/upscale.cu +163 -7
  295. data/ext/sources/ggml/src/ggml-cuda/vendors/cuda.h +4 -0
  296. data/ext/sources/ggml/src/ggml-cuda/vendors/hip.h +12 -1
  297. data/ext/sources/ggml/src/ggml-cuda/vendors/musa.h +6 -0
  298. data/ext/sources/ggml/src/ggml-hexagon/CMakeLists.txt +80 -0
  299. data/ext/sources/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3151 -0
  300. data/ext/sources/ggml/src/ggml-hexagon/htp/CMakeLists.txt +44 -0
  301. data/ext/sources/ggml/src/ggml-hexagon/htp/act-ops.c +682 -0
  302. data/ext/sources/ggml/src/ggml-hexagon/htp/binary-ops.c +360 -0
  303. data/ext/sources/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
  304. data/ext/sources/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +566 -0
  305. data/ext/sources/ggml/src/ggml-hexagon/htp/get-rows-ops.c +112 -0
  306. data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ctx.h +35 -0
  307. data/ext/sources/ggml/src/ggml-hexagon/htp/htp-dma.c +63 -0
  308. data/ext/sources/ggml/src/ggml-hexagon/htp/htp-dma.h +157 -0
  309. data/ext/sources/ggml/src/ggml-hexagon/htp/htp-msg.h +165 -0
  310. data/ext/sources/ggml/src/ggml-hexagon/htp/htp-ops.h +92 -0
  311. data/ext/sources/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
  312. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-exp.c +94 -0
  313. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-inverse.c +72 -0
  314. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-sigmoid.c +49 -0
  315. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.c +1020 -0
  316. data/ext/sources/ggml/src/ggml-hexagon/htp/hvx-utils.h +1353 -0
  317. data/ext/sources/ggml/src/ggml-hexagon/htp/main.c +1001 -0
  318. data/ext/sources/ggml/src/ggml-hexagon/htp/matmul-ops.c +2503 -0
  319. data/ext/sources/ggml/src/ggml-hexagon/htp/ops-utils.h +149 -0
  320. data/ext/sources/ggml/src/ggml-hexagon/htp/rope-ops.c +487 -0
  321. data/ext/sources/ggml/src/ggml-hexagon/htp/set-rows-ops.c +168 -0
  322. data/ext/sources/ggml/src/ggml-hexagon/htp/softmax-ops.c +402 -0
  323. data/ext/sources/ggml/src/ggml-hexagon/htp/unary-ops.c +287 -0
  324. data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.c +297 -0
  325. data/ext/sources/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
  326. data/ext/sources/ggml/src/ggml-hexagon/htp-utils.c +454 -0
  327. data/ext/sources/ggml/src/ggml-hexagon/htp-utils.h +221 -0
  328. data/ext/sources/ggml/src/ggml-hexagon/op-desc.h +153 -0
  329. data/ext/sources/ggml/src/ggml-hip/CMakeLists.txt +8 -13
  330. data/ext/sources/ggml/src/ggml-impl.h +67 -6
  331. data/ext/sources/ggml/src/ggml-metal/ggml-metal-common.cpp +2 -2
  332. data/ext/sources/ggml/src/ggml-metal/ggml-metal-context.m +29 -20
  333. data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.cpp +652 -285
  334. data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.h +103 -56
  335. data/ext/sources/ggml/src/ggml-metal/ggml-metal-device.m +496 -118
  336. data/ext/sources/ggml/src/ggml-metal/ggml-metal-impl.h +231 -9
  337. data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.cpp +1227 -224
  338. data/ext/sources/ggml/src/ggml-metal/ggml-metal-ops.h +12 -0
  339. data/ext/sources/ggml/src/ggml-metal/ggml-metal.cpp +14 -8
  340. data/ext/sources/ggml/src/ggml-metal/ggml-metal.metal +1972 -704
  341. data/ext/sources/ggml/src/ggml-musa/CMakeLists.txt +3 -1
  342. data/ext/sources/ggml/src/ggml-opencl/CMakeLists.txt +11 -0
  343. data/ext/sources/ggml/src/ggml-opencl/ggml-opencl.cpp +1430 -120
  344. data/ext/sources/ggml/src/ggml-opencl/kernels/cvt.cl +63 -0
  345. data/ext/sources/ggml/src/ggml-opencl/kernels/expm1.cl +82 -0
  346. data/ext/sources/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
  347. data/ext/sources/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +4 -3
  348. data/ext/sources/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
  349. data/ext/sources/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
  350. data/ext/sources/ggml/src/ggml-opencl/kernels/get_rows.cl +36 -12
  351. data/ext/sources/ggml/src/ggml-opencl/kernels/mean.cl +39 -0
  352. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
  353. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +24 -10
  354. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +24 -10
  355. data/ext/sources/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
  356. data/ext/sources/ggml/src/ggml-opencl/kernels/pad.cl +29 -20
  357. data/ext/sources/ggml/src/ggml-opencl/kernels/rms_norm.cl +25 -10
  358. data/ext/sources/ggml/src/ggml-opencl/kernels/rope.cl +50 -24
  359. data/ext/sources/ggml/src/ggml-opencl/kernels/set_rows.cl +35 -16
  360. data/ext/sources/ggml/src/ggml-opencl/kernels/softplus.cl +88 -0
  361. data/ext/sources/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
  362. data/ext/sources/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
  363. data/ext/sources/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
  364. data/ext/sources/ggml/src/ggml-opencl/kernels/transpose.cl +13 -0
  365. data/ext/sources/ggml/src/ggml-rpc/ggml-rpc.cpp +438 -156
  366. data/ext/sources/ggml/src/ggml-sycl/CMakeLists.txt +48 -3
  367. data/ext/sources/ggml/src/ggml-sycl/add-id.cpp +77 -0
  368. data/ext/sources/ggml/src/ggml-sycl/add-id.hpp +8 -0
  369. data/ext/sources/ggml/src/ggml-sycl/backend.hpp +6 -0
  370. data/ext/sources/ggml/src/ggml-sycl/binbcast.cpp +0 -9
  371. data/ext/sources/ggml/src/ggml-sycl/binbcast.hpp +0 -6
  372. data/ext/sources/ggml/src/ggml-sycl/common.hpp +117 -15
  373. data/ext/sources/ggml/src/ggml-sycl/concat.cpp +55 -44
  374. data/ext/sources/ggml/src/ggml-sycl/convert.cpp +34 -0
  375. data/ext/sources/ggml/src/ggml-sycl/count-equal.cpp +79 -0
  376. data/ext/sources/ggml/src/ggml-sycl/count-equal.hpp +9 -0
  377. data/ext/sources/ggml/src/ggml-sycl/cpy.cpp +0 -3
  378. data/ext/sources/ggml/src/ggml-sycl/dequantize.hpp +18 -0
  379. data/ext/sources/ggml/src/ggml-sycl/dpct/helper.hpp +76 -3
  380. data/ext/sources/ggml/src/ggml-sycl/element_wise.cpp +333 -300
  381. data/ext/sources/ggml/src/ggml-sycl/element_wise.hpp +10 -2
  382. data/ext/sources/ggml/src/ggml-sycl/ggml-sycl.cpp +335 -110
  383. data/ext/sources/ggml/src/ggml-sycl/mmvq.cpp +22 -0
  384. data/ext/sources/ggml/src/ggml-sycl/norm.cpp +156 -0
  385. data/ext/sources/ggml/src/ggml-sycl/norm.hpp +2 -0
  386. data/ext/sources/ggml/src/ggml-sycl/pad.cpp +97 -0
  387. data/ext/sources/ggml/src/ggml-sycl/pad.hpp +24 -0
  388. data/ext/sources/ggml/src/ggml-sycl/pad_reflect_1d.cpp +100 -0
  389. data/ext/sources/ggml/src/ggml-sycl/pad_reflect_1d.hpp +10 -0
  390. data/ext/sources/ggml/src/ggml-sycl/presets.hpp +2 -0
  391. data/ext/sources/ggml/src/ggml-sycl/repeat_back.cpp +76 -0
  392. data/ext/sources/ggml/src/ggml-sycl/repeat_back.hpp +8 -0
  393. data/ext/sources/ggml/src/ggml-sycl/roll.cpp +122 -0
  394. data/ext/sources/ggml/src/ggml-sycl/roll.hpp +20 -0
  395. data/ext/sources/ggml/src/ggml-sycl/rope.cpp +30 -17
  396. data/ext/sources/ggml/src/ggml-sycl/set.cpp +73 -0
  397. data/ext/sources/ggml/src/ggml-sycl/set.hpp +5 -0
  398. data/ext/sources/ggml/src/ggml-sycl/softmax.cpp +327 -162
  399. data/ext/sources/ggml/src/ggml-sycl/softmax.hpp +4 -0
  400. data/ext/sources/ggml/src/ggml-sycl/ssm_conv.cpp +127 -0
  401. data/ext/sources/ggml/src/ggml-sycl/ssm_conv.hpp +5 -0
  402. data/ext/sources/ggml/src/ggml-sycl/vecdotq.hpp +58 -0
  403. data/ext/sources/ggml/src/ggml-vulkan/CMakeLists.txt +38 -18
  404. data/ext/sources/ggml/src/ggml-vulkan/ggml-vulkan.cpp +5013 -2859
  405. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/abs.comp +21 -0
  406. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +2 -2
  407. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +2 -2
  408. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add1.comp +28 -0
  409. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp +1 -1
  410. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/arange.comp +20 -0
  411. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +2 -2
  412. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +33 -26
  413. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/argsort_large.comp +114 -0
  414. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ceil.comp +22 -0
  415. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +2 -2
  416. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +2 -2
  417. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +2 -2
  418. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +1 -1
  419. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +47 -49
  420. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +1 -1
  421. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +2 -2
  422. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +3 -3
  423. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +4 -4
  424. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/copy_transpose.comp +67 -0
  425. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +2 -2
  426. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +2 -2
  427. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/count_experts.comp +51 -0
  428. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum.comp +83 -0
  429. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass1.comp +60 -0
  430. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass2.comp +66 -0
  431. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +1 -1
  432. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs.comp → dequant_funcs.glsl} +9 -21
  433. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs_cm2.comp → dequant_funcs_cm2.glsl} +18 -4
  434. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_head.comp → dequant_head.glsl} +1 -1
  435. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
  436. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +1 -1
  437. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +1 -1
  438. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +1 -1
  439. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +1 -1
  440. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +1 -1
  441. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +1 -1
  442. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +1 -1
  443. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +1 -1
  444. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp +3 -3
  445. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +3 -3
  446. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +1 -1
  447. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +1 -1
  448. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +1 -1
  449. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +3 -3
  450. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +1 -1
  451. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +1 -1
  452. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +3 -3
  453. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +1 -1
  454. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +1 -1
  455. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag.comp +29 -0
  456. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +1 -1
  457. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +2 -2
  458. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +3 -3
  459. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/fill.comp +19 -0
  460. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +39 -17
  461. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{flash_attn_base.comp → flash_attn_base.glsl} +19 -1
  462. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +45 -7
  463. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +50 -12
  464. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +1 -1
  465. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/floor.comp +22 -0
  466. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +2 -2
  467. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +2 -2
  468. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +2 -2
  469. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +2 -2
  470. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +2 -2
  471. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +2 -2
  472. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_binary_head.comp → generic_binary_head.glsl} +17 -2
  473. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_head.comp → generic_head.glsl} +2 -0
  474. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{generic_unary_head.comp → generic_unary_head.glsl} +7 -0
  475. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +4 -4
  476. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +3 -3
  477. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{glu_head.comp → glu_head.glsl} +1 -1
  478. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +2 -2
  479. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp +2 -2
  480. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp +2 -2
  481. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +19 -7
  482. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +2 -3
  483. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +2 -2
  484. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +2 -2
  485. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/log.comp +18 -0
  486. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +2 -2
  487. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +2 -2
  488. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{mul_mat_vec_base.comp → mul_mat_vec_base.glsl} +70 -25
  489. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iface.glsl +35 -0
  490. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +71 -21
  491. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +41 -25
  492. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +2 -2
  493. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +44 -26
  494. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +2 -2
  495. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +2 -2
  496. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +2 -2
  497. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +9 -7
  498. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +9 -7
  499. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +4 -6
  500. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +2 -2
  501. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +4 -6
  502. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +4 -6
  503. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +2 -2
  504. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +39 -36
  505. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq_funcs.glsl +494 -0
  506. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +78 -103
  507. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +34 -23
  508. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{mul_mm_funcs.comp → mul_mm_funcs.glsl} +69 -59
  509. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_id_funcs.glsl +72 -0
  510. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +88 -228
  511. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +454 -0
  512. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_shmem_types.glsl +78 -0
  513. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +97 -13
  514. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/neg.comp +20 -0
  515. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +2 -2
  516. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +2 -2
  517. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp +1 -1
  518. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +21 -6
  519. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +1 -1
  520. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +10 -10
  521. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +2 -2
  522. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +2 -2
  523. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +2 -2
  524. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +2 -2
  525. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +50 -4
  526. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +2 -2
  527. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp +2 -2
  528. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +2 -2
  529. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +234 -0
  530. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.glsl +20 -0
  531. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +6 -50
  532. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +6 -33
  533. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +6 -33
  534. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +28 -0
  535. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +6 -39
  536. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/round.comp +29 -0
  537. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +2 -2
  538. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +2 -2
  539. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +2 -2
  540. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +2 -2
  541. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +2 -2
  542. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +1 -1
  543. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +2 -2
  544. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large1.comp +62 -0
  545. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large2.comp +79 -0
  546. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large3.comp +65 -0
  547. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large_common.glsl +53 -0
  548. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/softplus.comp +23 -0
  549. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/solve_tri.comp +81 -0
  550. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp +2 -2
  551. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +2 -2
  552. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +44 -0
  553. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/ssm_scan.comp +124 -0
  554. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/step.comp +22 -0
  555. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +2 -2
  556. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +2 -25
  557. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.glsl +25 -0
  558. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +2 -2
  559. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp +2 -2
  560. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +2 -2
  561. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +1 -1
  562. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_argsort.comp +118 -0
  563. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_moe.comp +213 -0
  564. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/topk_nary_search.comp +246 -0
  565. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/tri.comp +43 -0
  566. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/trunc.comp +22 -0
  567. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{types.comp → types.glsl} +345 -26
  568. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +90 -12
  569. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +335 -151
  570. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/xielu.comp +35 -0
  571. data/ext/sources/ggml/src/ggml-webgpu/CMakeLists.txt +28 -2
  572. data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +169 -0
  573. data/ext/sources/ggml/src/ggml-webgpu/ggml-webgpu.cpp +1964 -435
  574. data/ext/sources/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
  575. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/bin_op.tmpl.wgsl +188 -0
  576. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +101 -0
  577. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +33 -10
  578. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +591 -0
  579. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.tmpl.wgsl +1 -1
  580. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
  581. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.tmpl.wgsl +6 -6
  582. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +97 -0
  583. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.tmpl.wgsl +247 -0
  584. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.tmpl.wgsl +302 -0
  585. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.tmpl.wgsl +267 -0
  586. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +83 -17
  587. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
  588. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/scale.tmpl.wgsl +90 -0
  589. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.tmpl.wgsl +112 -0
  590. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
  591. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl +483 -0
  592. data/ext/sources/ggml/src/ggml-zendnn/CMakeLists.txt +92 -0
  593. data/ext/sources/ggml/src/ggml-zendnn/ggml-zendnn.cpp +466 -0
  594. data/ext/sources/ggml/src/ggml.c +425 -33
  595. data/ext/sources/include/whisper.h +1 -0
  596. data/ext/sources/src/CMakeLists.txt +3 -1
  597. data/ext/sources/src/whisper.cpp +101 -35
  598. data/ext/sources/tests/CMakeLists.txt +2 -2
  599. data/ext/sources/tests/test-vad-full.cpp +4 -2
  600. data/ext/sources/tests/test-vad.cpp +1 -1
  601. data/extsources.rb +1 -0
  602. data/lib/whisper/model/uri.rb +17 -18
  603. data/sig/whisper.rbs +119 -2
  604. data/test/test_params.rb +16 -8
  605. data/test/test_segment.rb +0 -1
  606. data/test/test_token.rb +70 -0
  607. data/test/test_vad.rb +1 -1
  608. data/test/test_vad_context.rb +50 -0
  609. data/test/test_vad_segment.rb +19 -0
  610. data/test/test_vad_segments.rb +16 -0
  611. data/test/test_whisper.rb +7 -0
  612. data/whispercpp.gemspec +1 -1
  613. metadata +287 -34
  614. data/ext/sources/build-xcframework.sh +0 -571
  615. data/ext/sources/ggml/src/ggml-cann/Doxyfile +0 -2579
  616. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +0 -105
  617. data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +0 -55
  618. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add.tmpl.wgsl +0 -44
  619. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/add_in_place.tmpl.wgsl +0 -41
  620. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +0 -60
  621. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul.tmpl.wgsl +0 -44
  622. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/mul_in_place.tmpl.wgsl +0 -41
  623. data/ext/sources/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm_in_place.wgsl +0 -48
  624. /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_bfloat16_support.comp → feature-tests/bfloat16.comp} +0 -0
  625. /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat_support.comp → feature-tests/coopmat.comp} +0 -0
  626. /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat2_support.comp → feature-tests/coopmat2.comp} +0 -0
  627. /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{test_integer_dot_support.comp → feature-tests/integer_dot.comp} +0 -0
  628. /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{glu_main.comp → glu_main.glsl} +0 -0
  629. /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{rte.comp → rte.glsl} +0 -0
  630. /data/ext/sources/ggml/src/ggml-vulkan/vulkan-shaders/{utils.comp → utils.glsl} +0 -0
@@ -3,12 +3,14 @@
3
3
  #include "ggml.h" // ggml_op
4
4
 
5
5
  #include <string>
6
+ #include <set>
6
7
 
7
8
  //
8
9
  // gguf constants (sync with gguf.py)
9
10
  //
10
11
 
11
12
  enum llm_arch {
13
+ LLM_ARCH_CLIP,
12
14
  LLM_ARCH_LLAMA,
13
15
  LLM_ARCH_LLAMA4,
14
16
  LLM_ARCH_DECI,
@@ -22,6 +24,7 @@ enum llm_arch {
22
24
  LLM_ARCH_STARCODER,
23
25
  LLM_ARCH_REFACT,
24
26
  LLM_ARCH_BERT,
27
+ LLM_ARCH_MODERN_BERT,
25
28
  LLM_ARCH_NOMIC_BERT,
26
29
  LLM_ARCH_NOMIC_BERT_MOE,
27
30
  LLM_ARCH_NEO_BERT,
@@ -35,11 +38,15 @@ enum llm_arch {
35
38
  LLM_ARCH_QWEN2VL,
36
39
  LLM_ARCH_QWEN3,
37
40
  LLM_ARCH_QWEN3MOE,
41
+ LLM_ARCH_QWEN3NEXT,
42
+ LLM_ARCH_QWEN3VL,
43
+ LLM_ARCH_QWEN3VLMOE,
38
44
  LLM_ARCH_PHI2,
39
45
  LLM_ARCH_PHI3,
40
46
  LLM_ARCH_PHIMOE,
41
47
  LLM_ARCH_PLAMO,
42
48
  LLM_ARCH_PLAMO2,
49
+ LLM_ARCH_PLAMO3,
43
50
  LLM_ARCH_CODESHELL,
44
51
  LLM_ARCH_ORION,
45
52
  LLM_ARCH_INTERNLM2,
@@ -75,6 +82,7 @@ enum llm_arch {
75
82
  LLM_ARCH_JAIS,
76
83
  LLM_ARCH_NEMOTRON,
77
84
  LLM_ARCH_NEMOTRON_H,
85
+ LLM_ARCH_NEMOTRON_H_MOE,
78
86
  LLM_ARCH_EXAONE,
79
87
  LLM_ARCH_EXAONE4,
80
88
  LLM_ARCH_RWKV6,
@@ -88,8 +96,10 @@ enum llm_arch {
88
96
  LLM_ARCH_WAVTOKENIZER_DEC,
89
97
  LLM_ARCH_PLM,
90
98
  LLM_ARCH_BAILINGMOE,
99
+ LLM_ARCH_BAILINGMOE2,
91
100
  LLM_ARCH_DOTS1,
92
101
  LLM_ARCH_ARCEE,
102
+ LLM_ARCH_AFMOE,
93
103
  LLM_ARCH_ERNIE4_5,
94
104
  LLM_ARCH_ERNIE4_5_MOE,
95
105
  LLM_ARCH_HUNYUAN_MOE,
@@ -97,12 +107,22 @@ enum llm_arch {
97
107
  LLM_ARCH_SMOLLM3,
98
108
  LLM_ARCH_OPENAI_MOE,
99
109
  LLM_ARCH_LFM2,
110
+ LLM_ARCH_LFM2MOE,
100
111
  LLM_ARCH_DREAM,
101
112
  LLM_ARCH_SMALLTHINKER,
102
113
  LLM_ARCH_LLADA,
103
114
  LLM_ARCH_LLADA_MOE,
104
115
  LLM_ARCH_SEED_OSS,
105
116
  LLM_ARCH_GROVEMOE,
117
+ LLM_ARCH_APERTUS,
118
+ LLM_ARCH_MINIMAX_M2,
119
+ LLM_ARCH_COGVLM,
120
+ LLM_ARCH_RND1,
121
+ LLM_ARCH_PANGU_EMBED,
122
+ LLM_ARCH_MISTRAL3,
123
+ LLM_ARCH_MIMO2,
124
+ LLM_ARCH_LLAMA_EMBED,
125
+ LLM_ARCH_MAINCODER,
106
126
  LLM_ARCH_UNKNOWN,
107
127
  };
108
128
 
@@ -112,6 +132,18 @@ enum llm_kv {
112
132
  LLM_KV_GENERAL_QUANTIZATION_VERSION,
113
133
  LLM_KV_GENERAL_ALIGNMENT,
114
134
  LLM_KV_GENERAL_FILE_TYPE,
135
+ LLM_KV_GENERAL_SAMPLING_SEQUENCE,
136
+ LLM_KV_GENERAL_SAMPLING_TOP_K,
137
+ LLM_KV_GENERAL_SAMPLING_TOP_P,
138
+ LLM_KV_GENERAL_SAMPLING_MIN_P,
139
+ LLM_KV_GENERAL_SAMPLING_XTC_PROBABILITY,
140
+ LLM_KV_GENERAL_SAMPLING_XTC_THRESHOLD,
141
+ LLM_KV_GENERAL_SAMPLING_TEMP,
142
+ LLM_KV_GENERAL_SAMPLING_PENALTY_LAST_N,
143
+ LLM_KV_GENERAL_SAMPLING_PENALTY_REPEAT,
144
+ LLM_KV_GENERAL_SAMPLING_MIROSTAT,
145
+ LLM_KV_GENERAL_SAMPLING_MIROSTAT_TAU,
146
+ LLM_KV_GENERAL_SAMPLING_MIROSTAT_ETA,
115
147
  LLM_KV_GENERAL_NAME,
116
148
  LLM_KV_GENERAL_AUTHOR,
117
149
  LLM_KV_GENERAL_VERSION,
@@ -124,6 +156,7 @@ enum llm_kv {
124
156
  LLM_KV_VOCAB_SIZE,
125
157
  LLM_KV_CONTEXT_LENGTH,
126
158
  LLM_KV_EMBEDDING_LENGTH,
159
+ LLM_KV_EMBEDDING_LENGTH_OUT,
127
160
  LLM_KV_FEATURES_LENGTH,
128
161
  LLM_KV_BLOCK_COUNT,
129
162
  LLM_KV_LEADING_DENSE_BLOCK_COUNT,
@@ -136,6 +169,8 @@ enum llm_kv {
136
169
  LLM_KV_EXPERT_COUNT,
137
170
  LLM_KV_EXPERT_USED_COUNT,
138
171
  LLM_KV_EXPERT_SHARED_COUNT,
172
+ LLM_KV_EXPERT_GROUP_COUNT,
173
+ LLM_KV_EXPERT_GROUP_USED_COUNT,
139
174
  LLM_KV_EXPERT_WEIGHTS_SCALE,
140
175
  LLM_KV_EXPERT_WEIGHTS_NORM,
141
176
  LLM_KV_EXPERT_GATING_FUNC,
@@ -143,6 +178,7 @@ enum llm_kv {
143
178
  LLM_KV_EXPERTS_PER_GROUP,
144
179
  LLM_KV_MOE_EVERY_N_LAYERS,
145
180
  LLM_KV_NEXTN_PREDICT_LAYERS,
181
+ LLM_KV_NUM_DEEPSTACK_LAYERS,
146
182
  LLM_KV_POOLING_TYPE,
147
183
  LLM_KV_LOGIT_SCALE,
148
184
  LLM_KV_DECODER_START_TOKEN_ID,
@@ -178,15 +214,18 @@ enum llm_kv {
178
214
  LLM_KV_ATTENTION_GATE_LORA_RANK,
179
215
  LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT,
180
216
  LLM_KV_ATTENTION_SLIDING_WINDOW,
217
+ LLM_KV_ATTENTION_SLIDING_WINDOW_PATTERN,
181
218
  LLM_KV_ATTENTION_SCALE,
182
219
  LLM_KV_ATTENTION_OUTPUT_SCALE,
183
220
  LLM_KV_ATTENTION_TEMPERATURE_LENGTH,
221
+ LLM_KV_ATTENTION_TEMPERATURE_SCALE,
184
222
  LLM_KV_ATTENTION_KEY_LENGTH_MLA,
185
223
  LLM_KV_ATTENTION_VALUE_LENGTH_MLA,
186
224
 
187
225
  LLM_KV_ROPE_DIMENSION_COUNT,
188
226
  LLM_KV_ROPE_DIMENSION_SECTIONS,
189
227
  LLM_KV_ROPE_FREQ_BASE,
228
+ LLM_KV_ROPE_FREQ_BASE_SWA,
190
229
  LLM_KV_ROPE_SCALE_LINEAR,
191
230
  LLM_KV_ROPE_SCALING_TYPE,
192
231
  LLM_KV_ROPE_SCALING_FACTOR,
@@ -260,10 +299,21 @@ enum llm_kv {
260
299
 
261
300
  LLM_KV_SHORTCONV_L_CACHE,
262
301
 
302
+ LLM_KV_XIELU_ALPHA_N,
303
+ LLM_KV_XIELU_ALPHA_P,
304
+ LLM_KV_XIELU_BETA,
305
+ LLM_KV_XIELU_EPS,
306
+
263
307
  // deprecated:
264
308
  LLM_KV_TOKENIZER_PREFIX_ID,
265
309
  LLM_KV_TOKENIZER_SUFFIX_ID,
266
310
  LLM_KV_TOKENIZER_MIDDLE_ID,
311
+
312
+ // sentence-transformers dense layers in and out features
313
+ LLM_KV_DENSE_2_FEAT_IN,
314
+ LLM_KV_DENSE_2_FEAT_OUT,
315
+ LLM_KV_DENSE_3_FEAT_IN,
316
+ LLM_KV_DENSE_3_FEAT_OUT,
267
317
  };
268
318
 
269
319
  enum llm_tensor {
@@ -271,8 +321,11 @@ enum llm_tensor {
271
321
  LLM_TENSOR_TOKEN_EMBD_NORM,
272
322
  LLM_TENSOR_TOKEN_TYPES,
273
323
  LLM_TENSOR_POS_EMBD,
324
+ LLM_TENSOR_DENSE_2_OUT,
325
+ LLM_TENSOR_DENSE_3_OUT,
274
326
  LLM_TENSOR_OUTPUT,
275
327
  LLM_TENSOR_OUTPUT_NORM,
328
+ LLM_TENSOR_OUTPUT_NORM_LFM2, // fix for wrong tensor name
276
329
  LLM_TENSOR_ROPE_FREQS,
277
330
  LLM_TENSOR_ROPE_FACTORS_LONG,
278
331
  LLM_TENSOR_ROPE_FACTORS_SHORT,
@@ -287,6 +340,7 @@ enum llm_tensor {
287
340
  LLM_TENSOR_ATTN_POST_NORM,
288
341
  LLM_TENSOR_ATTN_ROT_EMBD,
289
342
  LLM_TENSOR_ATTN_SINKS,
343
+ LLM_TENSOR_ATTN_GATE,
290
344
  LLM_TENSOR_FFN_GATE_INP,
291
345
  LLM_TENSOR_FFN_GATE_INP_SHEXP,
292
346
  LLM_TENSOR_FFN_NORM,
@@ -336,11 +390,13 @@ enum llm_tensor {
336
390
  LLM_TENSOR_SSM_DT,
337
391
  LLM_TENSOR_SSM_DT_NORM,
338
392
  LLM_TENSOR_SSM_A,
393
+ LLM_TENSOR_SSM_A_NOSCAN, // qwen3next special case with MUL instead of SSM_SCAN
339
394
  LLM_TENSOR_SSM_B_NORM,
340
395
  LLM_TENSOR_SSM_C_NORM,
341
396
  LLM_TENSOR_SSM_D,
342
397
  LLM_TENSOR_SSM_NORM,
343
398
  LLM_TENSOR_SSM_OUT,
399
+ LLM_TENSOR_SSM_BETA_ALPHA, // qwen3next
344
400
  LLM_TENSOR_TIME_MIX_W0,
345
401
  LLM_TENSOR_TIME_MIX_W1,
346
402
  LLM_TENSOR_TIME_MIX_W2,
@@ -436,6 +492,11 @@ enum llm_tensor {
436
492
  LLM_TENSOR_SHORTCONV_CONV,
437
493
  LLM_TENSOR_SHORTCONV_INPROJ,
438
494
  LLM_TENSOR_SHORTCONV_OUTPROJ,
495
+ LLM_TENSOR_VISEXP_ATTN_QKV,
496
+ LLM_TENSOR_VISEXP_ATTN_OUT,
497
+ LLM_TENSOR_VISEXP_FFN_GATE,
498
+ LLM_TENSOR_VISEXP_FFN_DOWN,
499
+ LLM_TENSOR_VISEXP_FFN_UP,
439
500
  LLM_TENSOR_NEXTN_EH_PROJ,
440
501
  LLM_TENSOR_NEXTN_EMBED_TOKENS,
441
502
  LLM_TENSOR_NEXTN_ENORM,
@@ -475,6 +536,10 @@ struct LLM_TN_IMPL {
475
536
  const int bid;
476
537
  const int xid;
477
538
 
539
+ const std::set<llm_tensor> model_tensors;
540
+
541
+ LLM_TN_IMPL(llm_arch arch, llm_tensor tensor, const char * suffix, int bid, int xid);
542
+
478
543
  std::string str() const;
479
544
 
480
545
  operator std::string() const {
@@ -496,11 +561,11 @@ struct LLM_TN {
496
561
  llm_arch arch;
497
562
 
498
563
  LLM_TN_IMPL operator()(llm_tensor tensor, const char * suffix, int bid = -1, int xid = -1) const {
499
- return { arch, tensor, suffix, bid, xid };
564
+ return LLM_TN_IMPL(arch, tensor, suffix, bid, xid);
500
565
  }
501
566
 
502
567
  LLM_TN_IMPL operator()(llm_tensor tensor, int bid = -1, int xid = -1) const {
503
- return { arch, tensor, nullptr, bid, xid };
568
+ return LLM_TN_IMPL(arch, tensor, nullptr, bid, xid);
504
569
  }
505
570
  };
506
571
 
@@ -215,6 +215,7 @@ bool llama_batch_allocr::init(
215
215
  /*.n_seq_tokens =*/ (uint32_t) 1,
216
216
  /*.n_seqs =*/ (uint32_t) batch.n_tokens,
217
217
  /*.n_seqs_unq =*/ (uint32_t) this->seq_id_unq.size(),
218
+ /*.n_pos =*/ n_pos_per_embd,
218
219
  /*.token =*/ batch.token,
219
220
  /*.embd =*/ batch.embd,
220
221
  /*.pos =*/ batch.pos,
@@ -251,46 +252,72 @@ bool llama_batch_allocr::init(
251
252
  // consistency checks
252
253
  //
253
254
 
254
- for (uint32_t s = 0; s < n_seq_max; ++s) {
255
- if (seq_pos[s].empty()) {
256
- continue;
255
+ if (n_pos_per_embd > 1) {
256
+ // M-RoPE case: allow position to "jump" forward only (non-continuous positions are allowed)
257
+ for (uint32_t s = 0; s < n_seq_max; ++s) {
258
+ if (seq_pos[s].empty()) {
259
+ continue;
260
+ }
261
+
262
+ const llama_pos p0 = memory ? memory->seq_pos_max(s) : -1;
263
+
264
+ if (batch.token) {
265
+ if (p0 >= 0 && p0 >= seq_pos_min(s)) {
266
+ LLAMA_LOG_ERROR(
267
+ "%s: the tokens of sequence %d in the input batch have inconsistent sequence positions:\n"
268
+ " - the last position stored in the memory module of the context (i.e. the KV cache) for sequence %d is X = %d\n"
269
+ " - the tokens for sequence %d in the input batch have a starting position of Y = %d\n"
270
+ " for M-RoPE, it is required that the position satisfies: X < Y\n",
271
+ __func__, s, s, p0, s, seq_pos_min(s));
272
+
273
+ return false;
274
+ }
275
+ } else {
276
+ // embedding inputs can have overlapping positions
277
+ if (p0 >= 0 && p0 > seq_pos_min(s)) {
278
+ LLAMA_LOG_ERROR(
279
+ "%s: the tokens of sequence %d in the input batch have inconsistent sequence positions:\n"
280
+ " - the last position stored in the memory module of the context (i.e. the KV cache) for sequence %d is X = %d\n"
281
+ " - the tokens for sequence %d in the input batch have a starting position of Y = %d\n"
282
+ " for M-RoPE, it is required that the position satisfies: X <= Y\n",
283
+ __func__, s, s, p0, s, seq_pos_min(s));
284
+
285
+ return false;
286
+ }
287
+ }
257
288
  }
289
+ } else {
290
+ for (uint32_t s = 0; s < n_seq_max; ++s) {
291
+ if (seq_pos[s].empty()) {
292
+ continue;
293
+ }
258
294
 
259
- const llama_pos p0 = memory ? memory->seq_pos_max(s) : -1;
295
+ const llama_pos p0 = memory ? memory->seq_pos_max(s) : -1;
260
296
 
261
- if (p0 >= 0) {
262
- bool ok = true;
297
+ if (p0 >= 0) {
298
+ bool ok = true;
263
299
 
264
- if (batch.token) {
265
300
  if (seq_pos_min(s) != p0 + 1) {
266
301
  ok = false;
267
302
  }
268
- } else {
269
- assert(batch.embd);
270
303
 
271
- // for embeddings (typically used as vision input), we allow them to have repeating positions
272
- // ref: https://github.com/ggml-org/llama.cpp/issues/13694#issuecomment-2983871762
273
- if (seq_pos_min(s) != p0 && seq_pos_min(s) != p0 + 1) {
274
- ok = false;
304
+ if (!ok) {
305
+ LLAMA_LOG_ERROR(
306
+ "%s: the tokens of sequence %d in the input batch have inconsistent sequence positions:\n"
307
+ " - the last position stored in the memory module of the context (i.e. the KV cache) for sequence %d is X = %d\n"
308
+ " - the tokens for sequence %d in the input batch have a starting position of Y = %d\n"
309
+ " it is required that the sequence positions remain consecutive: Y = X + 1\n",
310
+ __func__, s, s, p0, s, seq_pos_min(s));
311
+
312
+ return false;
275
313
  }
276
314
  }
277
315
 
278
- if (!ok) {
279
- LLAMA_LOG_ERROR(
280
- "%s: the tokens of sequence %d in the input batch have inconsistent sequence positions:\n"
281
- " - the last position stored in the memory module of the context (i.e. the KV cache) for sequence %d is X = %d\n"
282
- " - the tokens for sequence %d in the input batch have a starting position of Y = %d\n"
283
- " it is required that the sequence positions remain consecutive: Y = X + 1\n",
284
- __func__, s, s, p0, s, seq_pos_min(s));
285
-
316
+ if (seq_pos_max(s) - seq_pos_min(s) + 1 > (int) seq_pos[s].size()) {
317
+ LLAMA_LOG_ERROR("%s: sequence %d positions are not continuous\n", __func__, s);
286
318
  return false;
287
319
  }
288
320
  }
289
-
290
- if (seq_pos_max(s) - seq_pos_min(s) + 1 > (int) seq_pos[s].size()) {
291
- LLAMA_LOG_ERROR("%s: sequence %d positions are not continuous\n", __func__, s);
292
- return false;
293
- }
294
321
  }
295
322
 
296
323
  if (memory) {
@@ -389,6 +416,7 @@ llama_ubatch llama_batch_allocr::ubatch_reserve(uint32_t n_seq_tokens, uint32_t
389
416
  /*.n_seq_tokens =*/ n_seq_tokens,
390
417
  /*.n_seqs =*/ n_seqs,
391
418
  /*.n_seqs_unq =*/ n_seqs,
419
+ /*.n_pos =*/ n_pos_per_embd,
392
420
 
393
421
  /*.token =*/ udata->token.data(),
394
422
  /*.embd =*/ nullptr,
@@ -655,10 +683,8 @@ llama_ubatch llama_batch_allocr::ubatch_add(const std::vector<int32_t> & idxs, u
655
683
 
656
684
  auto udata = std::make_shared<llama_ubatch::data_t>();
657
685
 
658
- const int32_t n_pos_cur = batch.embd ? n_pos_per_embd : 1;
659
-
660
686
  const int64_t n_embd_all = batch.embd ? (int64_t) n_tokens*n_embd : 0;
661
- const int64_t n_pos_all = (int64_t) n_tokens*n_pos_cur;
687
+ const int64_t n_pos_all = (int64_t) n_tokens*n_pos_per_embd;
662
688
 
663
689
  udata->token .resize(n_tokens);
664
690
  udata->embd .resize(n_embd_all);
@@ -669,6 +695,8 @@ llama_ubatch llama_batch_allocr::ubatch_add(const std::vector<int32_t> & idxs, u
669
695
  udata->seq_idx .resize(LLAMA_MAX_SEQ, -1);
670
696
  udata->output .resize(n_tokens);
671
697
 
698
+ udata->seq_id_data.reserve(n_tokens);
699
+
672
700
  seq_set_t seq_set_unq;
673
701
 
674
702
  for (size_t i = 0; i < idxs.size(); ++i) {
@@ -680,16 +708,23 @@ llama_ubatch llama_batch_allocr::ubatch_add(const std::vector<int32_t> & idxs, u
680
708
  memcpy(udata->embd.data() + i*n_embd, batch.embd + (int64_t) idxs[i]*n_embd, n_embd*sizeof(float));
681
709
  }
682
710
 
683
- for (int j = 0; j < n_pos_cur; ++j) {
684
- udata->pos[j*n_tokens + i] = batch.pos[j*batch.n_tokens + idxs[i]];
711
+ for (size_t j = 0; j < (size_t)n_pos_per_embd; ++j) {
712
+ // if we are using M-RoPE
713
+ // if the current batch is text, we need to broadcast the same position across all RoPE sections
714
+ // otherwise, the input batch is image embeddings, we copy the positions as-is
715
+ // if we are not using M-RoPE, there is only one position per token (this loop runs only once)
716
+ size_t src_off = batch.token ? 0 : j*batch.n_tokens;
717
+ udata->pos[j*n_tokens + i] = batch.pos[src_off + idxs[i]];
685
718
  }
686
719
 
687
720
  udata->n_seq_id[i] = batch.n_seq_id[idxs[i]];
688
- udata->seq_id[i] = batch.seq_id[idxs[i]];
689
721
  udata->output[i] = batch.logits[idxs[i]];
690
722
 
691
723
  for (int s = 0; s < udata->n_seq_id[i]; ++s) {
692
- seq_set_unq.set(udata->seq_id[i][s]);
724
+ const llama_seq_id seq_id = batch.seq_id[idxs[i]][s];
725
+
726
+ udata->seq_id_data.push_back(seq_id);
727
+ seq_set_unq.set(seq_id);
693
728
  }
694
729
 
695
730
  if (udata->output[i]) {
@@ -697,6 +732,12 @@ llama_ubatch llama_batch_allocr::ubatch_add(const std::vector<int32_t> & idxs, u
697
732
  }
698
733
  }
699
734
 
735
+ llama_seq_id * seq_id_ptr = udata->seq_id_data.data();
736
+ for (size_t i = 0; i < idxs.size(); ++i) {
737
+ udata->seq_id[i] = seq_id_ptr;
738
+ seq_id_ptr += udata->n_seq_id[i];
739
+ }
740
+
700
741
  for (uint32_t s = 0; s < n_seq_max; ++s) {
701
742
  if (seq_set_unq.test(s)) {
702
743
  udata->seq_idx[s] = udata->seq_id_unq.size();
@@ -710,6 +751,7 @@ llama_ubatch llama_batch_allocr::ubatch_add(const std::vector<int32_t> & idxs, u
710
751
  /*.n_seq_tokens =*/ n_tokens/n_seqs,
711
752
  /*.n_seqs =*/ n_seqs,
712
753
  /*.n_seqs_unq =*/ (uint32_t) udata->seq_id_unq.size(),
754
+ /*.n_pos =*/ n_pos_per_embd,
713
755
 
714
756
  /*.token =*/ batch.token ? udata->token.data() : nullptr,
715
757
  /*.embd =*/ batch.embd ? udata->embd.data() : nullptr,
@@ -17,6 +17,16 @@ struct llama_ubatch {
17
17
  return b_equal_seqs != 0;
18
18
  }
19
19
 
20
+ // typical for M-RoPE cases:
21
+ // 0 - sequantial position of the tokens/embeddings in the sequence
22
+ // 1 - y position in the image
23
+ // 2 - x position in the image
24
+ // 3 - other
25
+ bool is_pos_2d() const {
26
+ // TODO @ngxson : we may need to check for model arch when more models use >1 positions
27
+ return n_pos >= 3;
28
+ }
29
+
20
30
  uint32_t b_equal_seqs; // note: this is a boolean, but we use an int32_t for alignment
21
31
  // otherwise address sanitizer complains
22
32
  // TODO: whole_seqs for embeddings?
@@ -25,6 +35,7 @@ struct llama_ubatch {
25
35
  uint32_t n_seq_tokens; // tokens per sequence set
26
36
  uint32_t n_seqs; // sequence sets in the ubatch
27
37
  uint32_t n_seqs_unq; // unique sequence ids in the ubatch
38
+ uint32_t n_pos; // number of position inputs for each token/embedding
28
39
 
29
40
  // seq_id_unq: unique sequence ids in the ubatch
30
41
  // seq_idx: indices of the unique sequence ids in the ubatch in [0, n_seqs_unq)
@@ -33,7 +44,7 @@ struct llama_ubatch {
33
44
  // // size | idx | val
34
45
  llama_token * token; // [n_tokens] | i | id, token
35
46
  float * embd; // [n_embd, n_tokens] | i | embd
36
- llama_pos * pos; // [n_tokens] | i | pos
47
+ llama_pos * pos; // [n_tokens*n_pos] | i | pos
37
48
  int32_t * n_seq_id; // [n_tokens] | i | -
38
49
  llama_seq_id ** seq_id; // [n_tokens] | s | s0, s1, seq_id
39
50
  llama_seq_id * seq_id_unq; // [n_seqs_unq] | s | seq_id
@@ -45,13 +56,15 @@ struct llama_ubatch {
45
56
  std::vector<float> embd;
46
57
  std::vector<llama_pos> pos;
47
58
  std::vector<int32_t> n_seq_id;
48
- std::vector<llama_seq_id *> seq_id;
59
+ std::vector<llama_seq_id *> seq_id; // these point into the seq_id_data below
49
60
  std::vector<llama_seq_id> seq_id_unq;
50
61
  std::vector<int32_t> seq_idx;
51
62
  std::vector<int8_t> output;
63
+
64
+ std::vector<llama_seq_id> seq_id_data;
52
65
  };
53
66
 
54
- // the llama_ubatch pointers above point to this data if set. otherwise - points to non-owning data
67
+ // the llama_ubatch pointers above point to this data if set. otherwise - point to external non-owning data
55
68
  std::shared_ptr<data_t> data;
56
69
  };
57
70
 
@@ -123,7 +136,7 @@ private:
123
136
  uint32_t n_seq_max;
124
137
  uint32_t n_outputs;
125
138
 
126
- std::array<llama_seq_id, 1> seq_id_0 = { 0 }; // default sequence id
139
+ std::array<llama_seq_id, 1> seq_id_0 = {{ 0 }}; // default sequence id
127
140
 
128
141
  std::vector<llama_pos> pos;
129
142
  std::vector<int32_t> n_seq_id;
@@ -63,6 +63,8 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
63
63
  { "megrez", LLM_CHAT_TEMPLATE_MEGREZ },
64
64
  { "yandex", LLM_CHAT_TEMPLATE_YANDEX },
65
65
  { "bailing", LLM_CHAT_TEMPLATE_BAILING },
66
+ { "bailing-think", LLM_CHAT_TEMPLATE_BAILING_THINK },
67
+ { "bailing2", LLM_CHAT_TEMPLATE_BAILING2 },
66
68
  { "llama4", LLM_CHAT_TEMPLATE_LLAMA4 },
67
69
  { "smolvlm", LLM_CHAT_TEMPLATE_SMOLVLM },
68
70
  { "hunyuan-moe", LLM_CHAT_TEMPLATE_HUNYUAN_MOE },
@@ -71,6 +73,8 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
71
73
  { "kimi-k2", LLM_CHAT_TEMPLATE_KIMI_K2 },
72
74
  { "seed_oss", LLM_CHAT_TEMPLATE_SEED_OSS },
73
75
  { "grok-2", LLM_CHAT_TEMPLATE_GROK_2 },
76
+ { "pangu-embedded", LLM_CHAT_TEMPLATE_PANGU_EMBED },
77
+ { "solar-open", LLM_CHAT_TEMPLATE_SOLAR_OPEN },
74
78
  };
75
79
 
76
80
  llm_chat_template llm_chat_template_from_str(const std::string & name) {
@@ -191,6 +195,10 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
191
195
  return LLM_CHAT_TEMPLATE_YANDEX;
192
196
  } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("'HUMAN'")) {
193
197
  return LLM_CHAT_TEMPLATE_BAILING;
198
+ } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("\"HUMAN\"") && tmpl_contains("<think>")) {
199
+ return LLM_CHAT_TEMPLATE_BAILING_THINK;
200
+ } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("<role>HUMAN</role>") && tmpl_contains("<|role_end|>")) {
201
+ return LLM_CHAT_TEMPLATE_BAILING2;
194
202
  } else if (tmpl_contains("<|header_start|>") && tmpl_contains("<|header_end|>")) {
195
203
  return LLM_CHAT_TEMPLATE_LLAMA4;
196
204
  } else if (tmpl_contains("<|endofuserprompt|>")) {
@@ -207,6 +215,10 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
207
215
  return LLM_CHAT_TEMPLATE_SEED_OSS;
208
216
  } else if (tmpl_contains("'Assistant: ' + message['content'] + '<|separator|>")) {
209
217
  return LLM_CHAT_TEMPLATE_GROK_2;
218
+ } else if (tmpl_contains(LU8("[unused9]系统:[unused10]"))) {
219
+ return LLM_CHAT_TEMPLATE_PANGU_EMBED;
220
+ } else if (tmpl_contains("<|begin|>") && tmpl_contains("<|end|>") && tmpl_contains("<|content|>")) {
221
+ return LLM_CHAT_TEMPLATE_SOLAR_OPEN;
210
222
  }
211
223
  return LLM_CHAT_TEMPLATE_UNKNOWN;
212
224
  }
@@ -590,7 +602,7 @@ int32_t llm_chat_apply_template(
590
602
  ss << message->content << "<|end_of_text|>\n";
591
603
  }
592
604
  if (add_ass) {
593
- ss << "<|start_of_role|>assistant<|end_of_role|>\n";
605
+ ss << "<|start_of_role|>assistant<|end_of_role|>";
594
606
  }
595
607
  } else if (tmpl == LLM_CHAT_TEMPLATE_GIGACHAT) {
596
608
  // GigaChat template
@@ -644,8 +656,8 @@ int32_t llm_chat_apply_template(
644
656
  if (add_ass) {
645
657
  ss << " Ассистент:[SEP]";
646
658
  }
647
- } else if (tmpl == LLM_CHAT_TEMPLATE_BAILING) {
648
- // Bailing (Ling) template
659
+ } else if (tmpl == LLM_CHAT_TEMPLATE_BAILING || tmpl == LLM_CHAT_TEMPLATE_BAILING_THINK) {
660
+ // Bailing (Ling/Ring) template
649
661
  for (auto message : chat) {
650
662
  std::string role(message->role);
651
663
 
@@ -658,6 +670,33 @@ int32_t llm_chat_apply_template(
658
670
  ss << "<role>" << role << "</role>" << message->content;
659
671
  }
660
672
 
673
+ if (add_ass) {
674
+ ss << "<role>ASSISTANT</role>";
675
+
676
+ if (tmpl == LLM_CHAT_TEMPLATE_BAILING_THINK) {
677
+ ss << "<think>";
678
+ }
679
+ }
680
+ } else if (tmpl == LLM_CHAT_TEMPLATE_BAILING2) {
681
+ // Bailing2 (Ling 2.0) template
682
+ bool has_system = !chat.empty() && std::string(chat[0]->role) == "system";
683
+
684
+ if (!has_system) {
685
+ ss << "<role>SYSTEM</role>detailed thinking off<|role_end|>";
686
+ }
687
+
688
+ for (auto message : chat) {
689
+ std::string role(message->role);
690
+
691
+ if (role == "user") {
692
+ role = "HUMAN";
693
+ } else {
694
+ std::transform(role.begin(), role.end(), role.begin(), ::toupper);
695
+ }
696
+
697
+ ss << "<role>" << role << "</role>" << message->content << "<|role_end|>";
698
+ }
699
+
661
700
  if (add_ass) {
662
701
  ss << "<role>ASSISTANT</role>";
663
702
  }
@@ -780,6 +819,43 @@ int32_t llm_chat_apply_template(
780
819
  if (add_ass) {
781
820
  ss << "Assistant:";
782
821
  }
822
+ }else if (tmpl == LLM_CHAT_TEMPLATE_PANGU_EMBED) {
823
+ // [unused9]系统:xxx[unused10]
824
+ // [unused9]用户:xxx[unused10]
825
+ // [unused9]助手:xxx[unused10]
826
+ // ...
827
+ for (size_t i = 0; i < chat.size(); ++i) {
828
+ const auto & msg = chat[i];
829
+ const std::string & role = msg->role;
830
+ const std::string & content = msg->content;
831
+
832
+ if (i == 0 && role != "system") {
833
+ ss << "[unused9]系统:[unused10]";
834
+ }
835
+
836
+ if (role == "system") {
837
+ ss << "[unused9]系统:" << content << "[unused10]";
838
+ } else if (role == "user") {
839
+ ss << "[unused9]用户:" << content << "[unused10]";
840
+ } else if (role == "assistant") {
841
+ ss << "[unused9]助手:" << content << "[unused10]";
842
+ } else if (role == "tool") {
843
+ ss << "[unused9]工具:" << content << "[unused10]";
844
+ } else if (role == "function") {
845
+ ss << "[unused9]方法:" << content << "[unused10]";
846
+ }
847
+ }
848
+ if (add_ass) {
849
+ ss << "[unused9]助手:";
850
+ }
851
+ } else if (tmpl == LLM_CHAT_TEMPLATE_SOLAR_OPEN) {
852
+ for (auto message : chat) {
853
+ std::string role(message->role);
854
+ ss << "<|begin|>" << role << "<|content|>" << message->content << "<|end|>";
855
+ }
856
+ if (add_ass) {
857
+ ss << "<|begin|>assistant";
858
+ }
783
859
  } else {
784
860
  // template not supported
785
861
  return -1;
@@ -42,6 +42,8 @@ enum llm_chat_template {
42
42
  LLM_CHAT_TEMPLATE_MEGREZ,
43
43
  LLM_CHAT_TEMPLATE_YANDEX,
44
44
  LLM_CHAT_TEMPLATE_BAILING,
45
+ LLM_CHAT_TEMPLATE_BAILING_THINK,
46
+ LLM_CHAT_TEMPLATE_BAILING2,
45
47
  LLM_CHAT_TEMPLATE_LLAMA4,
46
48
  LLM_CHAT_TEMPLATE_SMOLVLM,
47
49
  LLM_CHAT_TEMPLATE_DOTS1,
@@ -51,6 +53,8 @@ enum llm_chat_template {
51
53
  LLM_CHAT_TEMPLATE_KIMI_K2,
52
54
  LLM_CHAT_TEMPLATE_SEED_OSS,
53
55
  LLM_CHAT_TEMPLATE_GROK_2,
56
+ LLM_CHAT_TEMPLATE_PANGU_EMBED,
57
+ LLM_CHAT_TEMPLATE_SOLAR_OPEN,
54
58
  LLM_CHAT_TEMPLATE_UNKNOWN,
55
59
  };
56
60