@novastera-oss/llamarn 0.4.1 → 0.4.3-beta4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (976) hide show
  1. package/RNLlamaCpp.podspec +3 -0
  2. package/android/CMakeLists.txt +2 -0
  3. package/android/src/main/cpp/include/llama.h +44 -21
  4. package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
  8. package/android/src/main/jniLibs/armeabi-v7a/libggml-base.so +0 -0
  9. package/android/src/main/jniLibs/armeabi-v7a/libggml-cpu.so +0 -0
  10. package/android/src/main/jniLibs/armeabi-v7a/libggml.so +0 -0
  11. package/android/src/main/jniLibs/armeabi-v7a/libllama.so +0 -0
  12. package/android/src/main/jniLibs/x86/libggml-base.so +0 -0
  13. package/android/src/main/jniLibs/x86/libggml-cpu.so +0 -0
  14. package/android/src/main/jniLibs/x86/libggml.so +0 -0
  15. package/android/src/main/jniLibs/x86/libllama.so +0 -0
  16. package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
  17. package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
  18. package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
  19. package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
  20. package/cpp/build-info.cpp +2 -2
  21. package/cpp/llama.cpp/CMakeLists.txt +12 -0
  22. package/cpp/llama.cpp/CODEOWNERS +116 -10
  23. package/cpp/llama.cpp/CONTRIBUTING.md +30 -3
  24. package/cpp/llama.cpp/README.md +13 -5
  25. package/cpp/llama.cpp/build-xcframework.sh +5 -0
  26. package/cpp/llama.cpp/cmake/riscv64-spacemit-linux-gnu-gcc.cmake +29 -0
  27. package/cpp/llama.cpp/common/CMakeLists.txt +12 -2
  28. package/cpp/llama.cpp/common/arg.cpp +303 -795
  29. package/cpp/llama.cpp/common/arg.h +2 -3
  30. package/cpp/llama.cpp/common/chat-parser-xml-toolcall.cpp +861 -0
  31. package/cpp/llama.cpp/common/chat-parser-xml-toolcall.h +45 -0
  32. package/cpp/llama.cpp/common/chat-parser.cpp +156 -15
  33. package/cpp/llama.cpp/common/chat-parser.h +13 -0
  34. package/cpp/llama.cpp/common/chat.cpp +1147 -88
  35. package/cpp/llama.cpp/common/chat.h +16 -3
  36. package/cpp/llama.cpp/common/common.cpp +70 -15
  37. package/cpp/llama.cpp/common/common.h +57 -19
  38. package/cpp/llama.cpp/common/download.cpp +1072 -0
  39. package/cpp/llama.cpp/common/download.h +55 -0
  40. package/cpp/llama.cpp/common/http.h +73 -0
  41. package/cpp/llama.cpp/common/json-partial.cpp +70 -2
  42. package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +61 -22
  43. package/cpp/llama.cpp/common/json-schema-to-grammar.h +2 -0
  44. package/cpp/llama.cpp/common/log.cpp +59 -2
  45. package/cpp/llama.cpp/common/log.h +12 -4
  46. package/cpp/llama.cpp/common/sampling.cpp +84 -8
  47. package/cpp/llama.cpp/common/sampling.h +3 -1
  48. package/cpp/llama.cpp/common/speculative.cpp +1 -1
  49. package/cpp/llama.cpp/convert_hf_to_gguf.py +1608 -233
  50. package/cpp/llama.cpp/convert_hf_to_gguf_update.py +6 -1
  51. package/cpp/llama.cpp/convert_lora_to_gguf.py +37 -5
  52. package/cpp/llama.cpp/ggml/CMakeLists.txt +47 -28
  53. package/cpp/llama.cpp/ggml/include/ggml-backend.h +19 -1
  54. package/cpp/llama.cpp/ggml/include/ggml-cpu.h +1 -1
  55. package/cpp/llama.cpp/ggml/include/ggml-hexagon.h +19 -0
  56. package/cpp/llama.cpp/ggml/include/ggml-metal.h +1 -6
  57. package/cpp/llama.cpp/ggml/include/ggml-rpc.h +7 -9
  58. package/cpp/llama.cpp/ggml/include/ggml-zdnn.h +2 -1
  59. package/cpp/llama.cpp/ggml/include/ggml.h +199 -6
  60. package/cpp/llama.cpp/ggml/src/CMakeLists.txt +38 -0
  61. package/cpp/llama.cpp/ggml/src/ggml-alloc.c +299 -130
  62. package/cpp/llama.cpp/ggml/src/ggml-backend-impl.h +4 -4
  63. package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +21 -5
  64. package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +99 -2
  65. package/cpp/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +1 -1
  66. package/cpp/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +1 -0
  67. package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +57 -45
  68. package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +138 -47
  69. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +1584 -1773
  70. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +201 -317
  71. package/cpp/llama.cpp/ggml/src/ggml-cann/common.h +146 -187
  72. package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +771 -713
  73. package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +135 -77
  74. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +5 -2
  75. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +428 -26
  76. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +16 -17
  77. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +318 -145
  78. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
  79. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +155 -60
  80. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +8 -8
  81. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +0 -1
  82. package/cpp/llama.cpp/ggml/src/ggml-cpu/common.h +14 -0
  83. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +10 -9
  84. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +108 -64
  85. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +14 -4
  86. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +530 -87
  87. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +37 -45
  88. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +349 -127
  89. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +947 -1218
  90. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +5 -4
  91. package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +143 -29
  92. package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +82 -76
  93. package/cpp/llama.cpp/ggml/src/ggml-cpu/spacemit/ime.cpp +1025 -0
  94. package/cpp/llama.cpp/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
  95. package/cpp/llama.cpp/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
  96. package/cpp/llama.cpp/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
  97. package/cpp/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +151 -0
  98. package/cpp/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +7 -0
  99. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +233 -28
  100. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +326 -66
  101. package/cpp/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +12 -3
  102. package/cpp/llama.cpp/ggml/src/ggml-cuda/argsort.cu +102 -6
  103. package/cpp/llama.cpp/ggml/src/ggml-cuda/binbcast.cu +110 -76
  104. package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +167 -38
  105. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d.cu +6 -11
  106. package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cuh +12 -0
  107. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy-utils.cuh +1 -1
  108. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +245 -151
  109. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cuh +1 -5
  110. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +341 -289
  111. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile.cu +49 -0
  112. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile.cuh +1233 -0
  113. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec.cuh +586 -0
  114. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +6 -6
  115. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +48 -0
  116. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +123 -220
  117. package/cpp/llama.cpp/ggml/src/ggml-cuda/getrows.cu +41 -39
  118. package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +715 -45
  119. package/cpp/llama.cpp/ggml/src/ggml-cuda/im2col.cu +150 -0
  120. package/cpp/llama.cpp/ggml/src/ggml-cuda/im2col.cuh +1 -0
  121. package/cpp/llama.cpp/ggml/src/ggml-cuda/mma.cuh +321 -24
  122. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmf.cu +93 -351
  123. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmf.cuh +828 -1
  124. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmid.cu +164 -0
  125. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmid.cuh +5 -0
  126. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +3 -166
  127. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cuh +1 -1
  128. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmvf.cu +371 -78
  129. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmvf.cuh +3 -2
  130. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmvq.cu +279 -147
  131. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmvq.cuh +1 -1
  132. package/cpp/llama.cpp/ggml/src/ggml-cuda/norm.cu +97 -85
  133. package/cpp/llama.cpp/ggml/src/ggml-cuda/pad.cu +46 -23
  134. package/cpp/llama.cpp/ggml/src/ggml-cuda/pad_reflect_1d.cu +63 -54
  135. package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cu +12 -10
  136. package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cu +192 -77
  137. package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cuh +2 -0
  138. package/cpp/llama.cpp/ggml/src/ggml-cuda/scale.cu +10 -9
  139. package/cpp/llama.cpp/ggml/src/ggml-cuda/set-rows.cu +137 -75
  140. package/cpp/llama.cpp/ggml/src/ggml-cuda/set.cu +39 -0
  141. package/cpp/llama.cpp/ggml/src/ggml-cuda/set.cuh +7 -0
  142. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq112-dv112.cu +5 -0
  143. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq128-dv128.cu +5 -0
  144. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq256-dv256.cu +5 -0
  145. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq40-dv40.cu +5 -0
  146. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq576-dv512.cu +5 -0
  147. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq64-dv64.cu +5 -0
  148. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq72-dv72.cu +5 -0
  149. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq80-dv80.cu +5 -0
  150. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq96-dv96.cu +5 -0
  151. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-f16.cu +7 -0
  152. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_0.cu +7 -0
  153. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q4_1.cu +7 -0
  154. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_0.cu +7 -0
  155. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q5_1.cu +7 -0
  156. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-f16-q8_0.cu +7 -0
  157. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-f16.cu +7 -0
  158. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_0.cu +7 -0
  159. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q4_1.cu +7 -0
  160. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_0.cu +7 -0
  161. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q5_1.cu +7 -0
  162. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_0-q8_0.cu +7 -0
  163. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-f16.cu +7 -0
  164. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_0.cu +7 -0
  165. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q4_1.cu +7 -0
  166. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_0.cu +7 -0
  167. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q5_1.cu +7 -0
  168. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q4_1-q8_0.cu +7 -0
  169. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-f16.cu +7 -0
  170. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_0.cu +7 -0
  171. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q4_1.cu +7 -0
  172. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_0.cu +7 -0
  173. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q5_1.cu +7 -0
  174. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_0-q8_0.cu +7 -0
  175. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-f16.cu +7 -0
  176. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_0.cu +7 -0
  177. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q4_1.cu +7 -0
  178. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_0.cu +7 -0
  179. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q5_1.cu +7 -0
  180. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q5_1-q8_0.cu +7 -0
  181. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-f16.cu +7 -0
  182. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_0.cu +7 -0
  183. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q4_1.cu +7 -0
  184. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_0.cu +7 -0
  185. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q5_1.cu +7 -0
  186. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-instance-q8_0-q8_0.cu +7 -0
  187. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +40 -19
  188. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_1.cu +5 -0
  189. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_10.cu +5 -0
  190. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_11.cu +5 -0
  191. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_12.cu +5 -0
  192. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_13.cu +5 -0
  193. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_14.cu +5 -0
  194. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_15.cu +5 -0
  195. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_16.cu +5 -0
  196. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_2.cu +5 -0
  197. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_3.cu +5 -0
  198. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_4.cu +5 -0
  199. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_5.cu +5 -0
  200. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_6.cu +5 -0
  201. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_7.cu +5 -0
  202. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_8.cu +5 -0
  203. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmf-instance-ncols_9.cu +5 -0
  204. package/cpp/llama.cpp/ggml/src/ggml-cuda/topk-moe.cu +336 -0
  205. package/cpp/llama.cpp/ggml/src/ggml-cuda/topk-moe.cuh +16 -0
  206. package/cpp/llama.cpp/ggml/src/ggml-cuda/tsembd.cu +3 -3
  207. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +105 -11
  208. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +36 -0
  209. package/cpp/llama.cpp/ggml/src/ggml-cuda/upscale.cu +87 -6
  210. package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +28 -12
  211. package/cpp/llama.cpp/ggml/src/ggml-hexagon/CMakeLists.txt +68 -0
  212. package/cpp/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3807 -0
  213. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/CMakeLists.txt +40 -0
  214. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/act-ops.c +442 -0
  215. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/binary-ops.c +360 -0
  216. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
  217. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/htp-ctx.h +40 -0
  218. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/htp-dma.c +69 -0
  219. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/htp-dma.h +119 -0
  220. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/htp-msg.h +156 -0
  221. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/htp-ops.h +64 -0
  222. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
  223. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-exp.c +93 -0
  224. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-inverse.c +60 -0
  225. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-sigmoid.c +49 -0
  226. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-utils.c +960 -0
  227. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-utils.h +1032 -0
  228. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/main.c +829 -0
  229. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/matmul-ops.c +2223 -0
  230. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/ops-utils.h +149 -0
  231. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/rope-ops.c +418 -0
  232. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/softmax-ops.c +402 -0
  233. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/unary-ops.c +255 -0
  234. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/worker-pool.c +297 -0
  235. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
  236. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp-utils.c +448 -0
  237. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp-utils.h +220 -0
  238. package/cpp/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +8 -13
  239. package/cpp/llama.cpp/ggml/src/ggml-impl.h +110 -12
  240. package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +6 -5
  241. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-common.cpp +446 -0
  242. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
  243. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-context.h +33 -0
  244. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-context.m +599 -0
  245. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-device.cpp +1662 -0
  246. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-device.h +251 -0
  247. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-device.m +1527 -0
  248. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +244 -39
  249. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-ops.cpp +3844 -0
  250. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-ops.h +90 -0
  251. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.cpp +723 -0
  252. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +3453 -1907
  253. package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +3 -1
  254. package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +10 -0
  255. package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1331 -109
  256. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/cvt.cl +126 -0
  257. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +31 -4
  258. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +35 -7
  259. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +31 -4
  260. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
  261. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
  262. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/get_rows.cl +36 -12
  263. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
  264. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +24 -10
  265. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +24 -10
  266. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
  267. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
  268. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
  269. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
  270. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
  271. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
  272. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
  273. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/pad.cl +29 -20
  274. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +25 -10
  275. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/rope.cl +50 -24
  276. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +123 -10
  277. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +2 -2
  278. package/cpp/llama.cpp/ggml/src/ggml-quants.c +1 -0
  279. package/cpp/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +341 -161
  280. package/cpp/llama.cpp/ggml/src/ggml-sycl/backend.hpp +6 -0
  281. package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +6 -5
  282. package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +74 -15
  283. package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +50 -30
  284. package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +10 -4
  285. package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +166 -99
  286. package/cpp/llama.cpp/ggml/src/ggml-sycl/count-equal.cpp +79 -0
  287. package/cpp/llama.cpp/ggml/src/ggml-sycl/count-equal.hpp +9 -0
  288. package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +72 -94
  289. package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +67 -49
  290. package/cpp/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +21 -31
  291. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +252 -316
  292. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +6 -2
  293. package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +9 -6
  294. package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +359 -142
  295. package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +2 -2
  296. package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +1 -1
  297. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +80 -60
  298. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +201 -132
  299. package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +230 -55
  300. package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.hpp +2 -0
  301. package/cpp/llama.cpp/ggml/src/ggml-sycl/pad.cpp +97 -0
  302. package/cpp/llama.cpp/ggml/src/ggml-sycl/pad.hpp +24 -0
  303. package/cpp/llama.cpp/ggml/src/ggml-sycl/pad_reflect_1d.cpp +72 -0
  304. package/cpp/llama.cpp/ggml/src/ggml-sycl/pad_reflect_1d.hpp +8 -0
  305. package/cpp/llama.cpp/ggml/src/ggml-sycl/presets.hpp +2 -0
  306. package/cpp/llama.cpp/ggml/src/ggml-sycl/repeat_back.cpp +76 -0
  307. package/cpp/llama.cpp/ggml/src/ggml-sycl/repeat_back.hpp +8 -0
  308. package/cpp/llama.cpp/ggml/src/ggml-sycl/roll.cpp +122 -0
  309. package/cpp/llama.cpp/ggml/src/ggml-sycl/roll.hpp +20 -0
  310. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +50 -41
  311. package/cpp/llama.cpp/ggml/src/ggml-sycl/set.cpp +73 -0
  312. package/cpp/llama.cpp/ggml/src/ggml-sycl/set.hpp +5 -0
  313. package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.cpp +45 -36
  314. package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +330 -165
  315. package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +4 -0
  316. package/cpp/llama.cpp/ggml/src/ggml-sycl/ssm_conv.cpp +127 -0
  317. package/cpp/llama.cpp/ggml/src/ggml-sycl/ssm_conv.hpp +5 -0
  318. package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +12 -6
  319. package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +16 -12
  320. package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +38 -18
  321. package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +4184 -2159
  322. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/abs.comp +21 -0
  323. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +2 -2
  324. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +2 -2
  325. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/add1.comp +28 -0
  326. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp +1 -1
  327. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/arange.comp +20 -0
  328. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +2 -2
  329. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +33 -26
  330. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/argsort_large.comp +114 -0
  331. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/ceil.comp +22 -0
  332. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +2 -2
  333. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +2 -2
  334. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +2 -2
  335. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +1 -1
  336. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +53 -30
  337. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +1 -1
  338. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +2 -2
  339. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +3 -3
  340. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +13 -6
  341. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_transpose.comp +67 -0
  342. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +2 -2
  343. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +2 -2
  344. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +1 -1
  345. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs.comp → dequant_funcs.glsl} +138 -2
  346. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs_cm2.comp → dequant_funcs_cm2.glsl} +18 -4
  347. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{dequant_head.comp → dequant_head.glsl} +1 -1
  348. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
  349. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +1 -1
  350. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +2 -2
  351. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +1 -1
  352. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +3 -2
  353. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +7 -6
  354. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +5 -3
  355. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +1 -1
  356. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +1 -1
  357. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp +3 -3
  358. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +3 -3
  359. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +1 -1
  360. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +1 -1
  361. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +1 -1
  362. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +3 -3
  363. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +1 -1
  364. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +1 -1
  365. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +3 -3
  366. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +1 -1
  367. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +1 -1
  368. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +1 -1
  369. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +2 -2
  370. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +3 -2
  371. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/fill.comp +19 -0
  372. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +52 -14
  373. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{flash_attn_base.comp → flash_attn_base.glsl} +50 -12
  374. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +61 -12
  375. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +54 -12
  376. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +5 -1
  377. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/floor.comp +22 -0
  378. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +2 -2
  379. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +2 -2
  380. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +2 -2
  381. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +2 -2
  382. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +2 -2
  383. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +2 -2
  384. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{generic_binary_head.comp → generic_binary_head.glsl} +10 -2
  385. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +21 -12
  386. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +28 -18
  387. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{glu_head.comp → glu_head.glsl} +1 -1
  388. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +2 -2
  389. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp +22 -0
  390. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp +22 -0
  391. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +15 -7
  392. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +125 -0
  393. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +2 -2
  394. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +2 -2
  395. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/log.comp +18 -0
  396. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +2 -2
  397. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +1 -1
  398. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.glsl +229 -0
  399. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iface.glsl +33 -0
  400. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +1 -1
  401. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +1 -1
  402. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +1 -1
  403. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +1 -1
  404. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +1 -1
  405. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +1 -1
  406. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +1 -1
  407. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +9 -7
  408. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +9 -7
  409. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +3 -5
  410. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +1 -1
  411. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +3 -5
  412. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +3 -5
  413. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +1 -1
  414. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +140 -0
  415. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +106 -634
  416. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +118 -9
  417. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.glsl +556 -0
  418. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_id_funcs.glsl +70 -0
  419. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +77 -214
  420. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +589 -0
  421. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_shmem_types.glsl +78 -0
  422. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +97 -13
  423. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/neg.comp +20 -0
  424. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +2 -2
  425. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +2 -2
  426. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp +1 -1
  427. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +25 -4
  428. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +1 -1
  429. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +55 -5
  430. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +2 -2
  431. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +2 -2
  432. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +2 -2
  433. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +2 -2
  434. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +45 -3
  435. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +2 -2
  436. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp +2 -2
  437. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +2 -2
  438. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +227 -0
  439. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.glsl +20 -0
  440. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +5 -52
  441. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +5 -35
  442. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +5 -35
  443. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +27 -0
  444. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +5 -41
  445. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/round.comp +29 -0
  446. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +2 -2
  447. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +2 -2
  448. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +2 -2
  449. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +2 -2
  450. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +2 -2
  451. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +1 -1
  452. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +6 -2
  453. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/softplus.comp +23 -0
  454. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp +2 -2
  455. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +2 -2
  456. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +44 -0
  457. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/ssm_scan.comp +140 -0
  458. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/step.comp +22 -0
  459. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +2 -2
  460. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +1 -1
  461. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +2 -2
  462. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp +2 -2
  463. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +2 -2
  464. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +5 -4
  465. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/topk_moe.comp +171 -0
  466. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/trunc.comp +22 -0
  467. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{types.comp → types.glsl} +79 -29
  468. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +36 -12
  469. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +471 -196
  470. package/cpp/llama.cpp/ggml/src/ggml-webgpu/CMakeLists.txt +8 -0
  471. package/cpp/llama.cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +1690 -383
  472. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/bin_op.tmpl.wgsl +188 -0
  473. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/binary_head.tmpl +45 -0
  474. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +930 -0
  475. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +101 -0
  476. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +57 -10
  477. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.tmpl.wgsl +874 -0
  478. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
  479. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.tmpl.wgsl +25 -912
  480. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +97 -0
  481. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.tmpl.wgsl +247 -0
  482. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.tmpl.wgsl +302 -0
  483. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.tmpl.wgsl +267 -0
  484. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +123 -0
  485. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
  486. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/scale.tmpl.wgsl +90 -0
  487. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/{set_rows.wgsl → set_rows.tmpl.wgsl} +38 -8
  488. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
  489. package/cpp/llama.cpp/ggml/src/ggml-zdnn/common.hpp +59 -0
  490. package/cpp/llama.cpp/ggml/src/ggml-zdnn/ggml-zdnn.cpp +96 -314
  491. package/cpp/llama.cpp/ggml/src/ggml-zdnn/mmf.cpp +80 -0
  492. package/cpp/llama.cpp/ggml/src/ggml-zdnn/mmf.hpp +12 -0
  493. package/cpp/llama.cpp/ggml/src/ggml-zdnn/utils.cpp +79 -0
  494. package/cpp/llama.cpp/ggml/src/ggml-zdnn/utils.hpp +19 -0
  495. package/cpp/llama.cpp/ggml/src/ggml.c +440 -17
  496. package/cpp/llama.cpp/ggml/src/gguf.cpp +104 -29
  497. package/cpp/llama.cpp/gguf-py/gguf/constants.py +363 -13
  498. package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +64 -0
  499. package/cpp/llama.cpp/gguf-py/gguf/lazy.py +8 -3
  500. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_convert_endian.py +6 -0
  501. package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +156 -18
  502. package/cpp/llama.cpp/gguf-py/gguf/utility.py +80 -0
  503. package/cpp/llama.cpp/gguf-py/gguf/vocab.py +4 -4
  504. package/cpp/llama.cpp/include/llama.h +44 -21
  505. package/cpp/llama.cpp/media/llama1-icon-transparent.png +0 -0
  506. package/cpp/llama.cpp/media/llama1-icon-transparent.svg +77 -0
  507. package/cpp/llama.cpp/media/llama1-icon.png +0 -0
  508. package/cpp/llama.cpp/media/llama1-icon.svg +87 -0
  509. package/cpp/llama.cpp/requirements/requirements-all.txt +2 -0
  510. package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +3 -3
  511. package/cpp/llama.cpp/requirements/requirements-convert_legacy_llama.txt +3 -1
  512. package/cpp/llama.cpp/requirements/requirements-tool_bench.txt +1 -1
  513. package/cpp/llama.cpp/src/CMakeLists.txt +101 -0
  514. package/cpp/llama.cpp/src/llama-adapter.cpp +33 -0
  515. package/cpp/llama.cpp/src/llama-adapter.h +3 -0
  516. package/cpp/llama.cpp/src/llama-arch.cpp +344 -14
  517. package/cpp/llama.cpp/src/llama-arch.h +50 -0
  518. package/cpp/llama.cpp/src/llama-batch.cpp +63 -31
  519. package/cpp/llama.cpp/src/llama-batch.h +13 -2
  520. package/cpp/llama.cpp/src/llama-chat.cpp +85 -3
  521. package/cpp/llama.cpp/src/llama-chat.h +4 -0
  522. package/cpp/llama.cpp/src/llama-context.cpp +300 -45
  523. package/cpp/llama.cpp/src/llama-context.h +16 -6
  524. package/cpp/llama.cpp/src/llama-cparams.h +2 -1
  525. package/cpp/llama.cpp/src/llama-grammar.cpp +17 -9
  526. package/cpp/llama.cpp/src/llama-graph.cpp +226 -64
  527. package/cpp/llama.cpp/src/llama-graph.h +27 -5
  528. package/cpp/llama.cpp/src/llama-hparams.cpp +53 -2
  529. package/cpp/llama.cpp/src/llama-hparams.h +48 -8
  530. package/cpp/llama.cpp/src/llama-impl.cpp +3 -3
  531. package/cpp/llama.cpp/src/llama-impl.h +2 -0
  532. package/cpp/llama.cpp/src/llama-kv-cache-iswa.cpp +13 -3
  533. package/cpp/llama.cpp/src/llama-kv-cache-iswa.h +2 -0
  534. package/cpp/llama.cpp/src/llama-kv-cache.cpp +120 -62
  535. package/cpp/llama.cpp/src/llama-kv-cache.h +13 -4
  536. package/cpp/llama.cpp/src/llama-kv-cells.h +44 -2
  537. package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +19 -9
  538. package/cpp/llama.cpp/src/llama-memory-hybrid.h +2 -0
  539. package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +38 -17
  540. package/cpp/llama.cpp/src/llama-memory-recurrent.h +5 -2
  541. package/cpp/llama.cpp/src/llama-memory.h +3 -0
  542. package/cpp/llama.cpp/src/llama-model-loader.cpp +2 -0
  543. package/cpp/llama.cpp/src/llama-model.cpp +1070 -12614
  544. package/cpp/llama.cpp/src/llama-model.h +40 -4
  545. package/cpp/llama.cpp/src/llama-quant.cpp +14 -6
  546. package/cpp/llama.cpp/src/llama-sampling.cpp +243 -136
  547. package/cpp/llama.cpp/src/llama-vocab.cpp +43 -3
  548. package/cpp/llama.cpp/src/llama-vocab.h +43 -39
  549. package/cpp/llama.cpp/src/llama.cpp +69 -10
  550. package/cpp/llama.cpp/src/models/afmoe.cpp +187 -0
  551. package/cpp/llama.cpp/src/models/apertus.cpp +125 -0
  552. package/cpp/llama.cpp/src/models/arcee.cpp +135 -0
  553. package/cpp/llama.cpp/src/models/arctic.cpp +138 -0
  554. package/cpp/llama.cpp/src/models/arwkv7.cpp +86 -0
  555. package/cpp/llama.cpp/src/models/baichuan.cpp +122 -0
  556. package/cpp/llama.cpp/src/models/bailingmoe.cpp +144 -0
  557. package/cpp/llama.cpp/src/models/bailingmoe2.cpp +135 -0
  558. package/cpp/llama.cpp/src/models/bert.cpp +176 -0
  559. package/cpp/llama.cpp/src/models/bitnet.cpp +160 -0
  560. package/cpp/llama.cpp/src/models/bloom.cpp +101 -0
  561. package/cpp/llama.cpp/src/models/chameleon.cpp +178 -0
  562. package/cpp/llama.cpp/src/models/chatglm.cpp +132 -0
  563. package/cpp/llama.cpp/src/models/codeshell.cpp +111 -0
  564. package/cpp/llama.cpp/src/models/cogvlm.cpp +100 -0
  565. package/cpp/llama.cpp/src/models/cohere2-iswa.cpp +131 -0
  566. package/cpp/llama.cpp/src/models/command-r.cpp +122 -0
  567. package/cpp/llama.cpp/src/models/dbrx.cpp +123 -0
  568. package/cpp/llama.cpp/src/models/deci.cpp +135 -0
  569. package/cpp/llama.cpp/src/models/deepseek.cpp +144 -0
  570. package/cpp/llama.cpp/src/models/deepseek2.cpp +237 -0
  571. package/cpp/llama.cpp/src/models/dots1.cpp +134 -0
  572. package/cpp/llama.cpp/src/models/dream.cpp +105 -0
  573. package/cpp/llama.cpp/src/models/ernie4-5-moe.cpp +150 -0
  574. package/cpp/llama.cpp/src/models/ernie4-5.cpp +110 -0
  575. package/cpp/llama.cpp/src/models/exaone.cpp +114 -0
  576. package/cpp/llama.cpp/src/models/exaone4.cpp +123 -0
  577. package/cpp/llama.cpp/src/models/falcon-h1.cpp +113 -0
  578. package/cpp/llama.cpp/src/models/falcon.cpp +120 -0
  579. package/cpp/llama.cpp/src/models/gemma-embedding.cpp +120 -0
  580. package/cpp/llama.cpp/src/models/gemma.cpp +112 -0
  581. package/cpp/llama.cpp/src/models/gemma2-iswa.cpp +125 -0
  582. package/cpp/llama.cpp/src/models/gemma3-iswa.cpp +131 -0
  583. package/cpp/llama.cpp/src/models/gemma3n-iswa.cpp +377 -0
  584. package/cpp/llama.cpp/src/models/glm4-moe.cpp +153 -0
  585. package/cpp/llama.cpp/src/models/glm4.cpp +127 -0
  586. package/cpp/llama.cpp/src/models/gpt2.cpp +105 -0
  587. package/cpp/llama.cpp/src/models/gptneox.cpp +144 -0
  588. package/cpp/llama.cpp/src/models/granite-hybrid.cpp +196 -0
  589. package/cpp/llama.cpp/src/models/granite.cpp +211 -0
  590. package/cpp/llama.cpp/src/models/graph-context-mamba.cpp +283 -0
  591. package/cpp/llama.cpp/src/models/grok.cpp +159 -0
  592. package/cpp/llama.cpp/src/models/grovemoe.cpp +141 -0
  593. package/cpp/llama.cpp/src/models/hunyuan-dense.cpp +132 -0
  594. package/cpp/llama.cpp/src/models/hunyuan-moe.cpp +154 -0
  595. package/cpp/llama.cpp/src/models/internlm2.cpp +120 -0
  596. package/cpp/llama.cpp/src/models/jais.cpp +86 -0
  597. package/cpp/llama.cpp/src/models/jamba.cpp +106 -0
  598. package/cpp/llama.cpp/src/models/lfm2.cpp +173 -0
  599. package/cpp/llama.cpp/src/models/llada-moe.cpp +122 -0
  600. package/cpp/llama.cpp/src/models/llada.cpp +99 -0
  601. package/cpp/llama.cpp/src/models/llama-iswa.cpp +174 -0
  602. package/cpp/llama.cpp/src/models/llama.cpp +155 -0
  603. package/cpp/llama.cpp/src/models/mamba.cpp +55 -0
  604. package/cpp/llama.cpp/src/models/minicpm3.cpp +199 -0
  605. package/cpp/llama.cpp/src/models/minimax-m2.cpp +124 -0
  606. package/cpp/llama.cpp/src/models/models.h +485 -0
  607. package/cpp/llama.cpp/src/models/mpt.cpp +126 -0
  608. package/cpp/llama.cpp/src/models/nemotron-h.cpp +121 -0
  609. package/cpp/llama.cpp/src/models/nemotron.cpp +122 -0
  610. package/cpp/llama.cpp/src/models/neo-bert.cpp +104 -0
  611. package/cpp/llama.cpp/src/models/olmo.cpp +121 -0
  612. package/cpp/llama.cpp/src/models/olmo2.cpp +150 -0
  613. package/cpp/llama.cpp/src/models/olmoe.cpp +124 -0
  614. package/cpp/llama.cpp/src/models/openai-moe-iswa.cpp +124 -0
  615. package/cpp/llama.cpp/src/models/openelm.cpp +124 -0
  616. package/cpp/llama.cpp/src/models/orion.cpp +123 -0
  617. package/cpp/llama.cpp/src/models/pangu-embedded.cpp +121 -0
  618. package/cpp/llama.cpp/src/models/phi2.cpp +121 -0
  619. package/cpp/llama.cpp/src/models/phi3.cpp +152 -0
  620. package/cpp/llama.cpp/src/models/plamo.cpp +110 -0
  621. package/cpp/llama.cpp/src/models/plamo2.cpp +316 -0
  622. package/cpp/llama.cpp/src/models/plm.cpp +168 -0
  623. package/cpp/llama.cpp/src/models/qwen.cpp +108 -0
  624. package/cpp/llama.cpp/src/models/qwen2.cpp +117 -0
  625. package/cpp/llama.cpp/src/models/qwen2moe.cpp +151 -0
  626. package/cpp/llama.cpp/src/models/qwen2vl.cpp +117 -0
  627. package/cpp/llama.cpp/src/models/qwen3.cpp +117 -0
  628. package/cpp/llama.cpp/src/models/qwen3moe.cpp +124 -0
  629. package/cpp/llama.cpp/src/models/qwen3vl-moe.cpp +149 -0
  630. package/cpp/llama.cpp/src/models/qwen3vl.cpp +141 -0
  631. package/cpp/llama.cpp/src/models/refact.cpp +94 -0
  632. package/cpp/llama.cpp/src/models/rwkv6-base.cpp +162 -0
  633. package/cpp/llama.cpp/src/models/rwkv6.cpp +94 -0
  634. package/cpp/llama.cpp/src/models/rwkv6qwen2.cpp +86 -0
  635. package/cpp/llama.cpp/src/models/rwkv7-base.cpp +135 -0
  636. package/cpp/llama.cpp/src/models/rwkv7.cpp +90 -0
  637. package/cpp/llama.cpp/src/models/seed-oss.cpp +124 -0
  638. package/cpp/llama.cpp/src/models/smallthinker.cpp +120 -0
  639. package/cpp/llama.cpp/src/models/smollm3.cpp +128 -0
  640. package/cpp/llama.cpp/src/models/stablelm.cpp +146 -0
  641. package/cpp/llama.cpp/src/models/starcoder.cpp +100 -0
  642. package/cpp/llama.cpp/src/models/starcoder2.cpp +121 -0
  643. package/cpp/llama.cpp/src/models/t5-dec.cpp +166 -0
  644. package/cpp/llama.cpp/src/models/t5-enc.cpp +96 -0
  645. package/cpp/llama.cpp/src/models/wavtokenizer-dec.cpp +149 -0
  646. package/cpp/llama.cpp/src/models/xverse.cpp +108 -0
  647. package/cpp/llama.cpp/src/unicode.cpp +77 -0
  648. package/cpp/llama.cpp/src/unicode.h +43 -0
  649. package/cpp/llama.cpp/vendor/cpp-httplib/CMakeLists.txt +94 -0
  650. package/cpp/llama.cpp/vendor/cpp-httplib/httplib.cpp +9339 -0
  651. package/cpp/llama.cpp/vendor/cpp-httplib/httplib.h +433 -8222
  652. package/cpp/llama.cpp/vendor/cpp-httplib/patch-boringssl.cmake +6 -0
  653. package/cpp/llama.cpp/vendor/miniaudio/miniaudio.h +4179 -1900
  654. package/cpp/llama.cpp/vendor/minja/chat-template.hpp +9 -2
  655. package/cpp/llama.cpp/vendor/minja/minja.hpp +101 -22
  656. package/ios/include/chat.h +16 -3
  657. package/ios/include/common/minja/chat-template.hpp +9 -2
  658. package/ios/include/common/minja/minja.hpp +101 -22
  659. package/ios/include/common.h +57 -19
  660. package/ios/include/json-schema-to-grammar.h +2 -0
  661. package/ios/include/llama.h +44 -21
  662. package/ios/include/log.h +12 -4
  663. package/ios/include/sampling.h +3 -1
  664. package/ios/libs/llama.xcframework/Info.plist +20 -20
  665. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  666. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +6399 -5557
  667. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-backend.h +19 -1
  668. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-cpu.h +1 -1
  669. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-metal.h +1 -6
  670. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +199 -6
  671. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +44 -21
  672. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
  673. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  674. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +6362 -5520
  675. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4813 -4241
  676. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +19 -1
  677. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +1 -1
  678. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-metal.h +1 -6
  679. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +199 -6
  680. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +44 -21
  681. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
  682. package/package.json +10 -4
  683. package/cpp/llama.cpp/ggml/src/ggml-cann/Doxyfile +0 -2579
  684. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu +0 -371
  685. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cuh +0 -3
  686. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu +0 -379
  687. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cuh +0 -3
  688. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +0 -495
  689. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +0 -486
  690. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +0 -5
  691. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +0 -5
  692. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +0 -5
  693. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +0 -5
  694. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +0 -5
  695. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +0 -5
  696. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +0 -5
  697. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +0 -5
  698. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +0 -5
  699. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +0 -5
  700. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +0 -5
  701. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +0 -5
  702. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +0 -5
  703. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +0 -5
  704. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +0 -5
  705. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +0 -5
  706. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +0 -5
  707. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +0 -5
  708. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +0 -5
  709. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +0 -5
  710. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +0 -5
  711. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +0 -5
  712. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +0 -5
  713. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +0 -5
  714. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +0 -5
  715. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +0 -5
  716. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +0 -5
  717. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +0 -5
  718. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +0 -5
  719. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +0 -5
  720. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +0 -5
  721. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +0 -5
  722. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +0 -5
  723. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +0 -5
  724. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +0 -5
  725. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +0 -5
  726. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +0 -5
  727. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +0 -5
  728. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +0 -5
  729. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +0 -5
  730. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +0 -5
  731. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +0 -5
  732. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +0 -5
  733. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +0 -5
  734. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +0 -5
  735. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +0 -5
  736. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +0 -5
  737. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +0 -5
  738. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +0 -5
  739. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +0 -5
  740. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +0 -5
  741. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +0 -5
  742. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +0 -5
  743. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +0 -5
  744. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +0 -5
  745. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +0 -5
  746. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +0 -5
  747. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +0 -5
  748. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +0 -5
  749. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +0 -5
  750. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +0 -5
  751. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +0 -5
  752. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +0 -5
  753. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +0 -5
  754. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +0 -5
  755. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +0 -5
  756. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +0 -5
  757. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +0 -5
  758. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +0 -5
  759. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +0 -5
  760. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +0 -5
  761. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +0 -5
  762. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +0 -5
  763. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +0 -5
  764. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +0 -5
  765. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +0 -5
  766. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +0 -5
  767. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +0 -5
  768. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +0 -5
  769. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +0 -5
  770. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +0 -5
  771. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +0 -5
  772. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +0 -5
  773. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +0 -5
  774. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +0 -5
  775. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +0 -5
  776. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +0 -6886
  777. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp +0 -154
  778. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +0 -105
  779. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +0 -55
  780. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +0 -60
  781. package/cpp/llama.cpp/ggml/src/ggml-zdnn/ggml-zdnn-impl.h +0 -97
  782. package/cpp/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
  783. package/cpp/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
  784. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
  785. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +0 -112
  786. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +0 -46
  787. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
  788. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.inp +0 -112
  789. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.out +0 -46
  790. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
  791. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +0 -112
  792. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +0 -46
  793. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
  794. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +0 -112
  795. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +0 -46
  796. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
  797. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.inp +0 -112
  798. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.out +0 -46
  799. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
  800. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +0 -112
  801. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +0 -46
  802. package/cpp/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
  803. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
  804. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +0 -112
  805. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +0 -46
  806. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
  807. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +0 -112
  808. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +0 -46
  809. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
  810. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.inp +0 -112
  811. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.out +0 -46
  812. package/cpp/llama.cpp/models/ggml-vocab-nomic-bert-moe.gguf +0 -0
  813. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
  814. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +0 -112
  815. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.out +0 -46
  816. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
  817. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +0 -112
  818. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.out +0 -46
  819. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
  820. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.inp +0 -112
  821. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.out +0 -46
  822. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
  823. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +0 -112
  824. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.out +0 -46
  825. package/cpp/llama.cpp/models/templates/ByteDance-Seed-OSS.jinja +0 -171
  826. package/cpp/llama.cpp/models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja +0 -202
  827. package/cpp/llama.cpp/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja +0 -156
  828. package/cpp/llama.cpp/models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja +0 -124
  829. package/cpp/llama.cpp/models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja +0 -152
  830. package/cpp/llama.cpp/models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja +0 -152
  831. package/cpp/llama.cpp/models/templates/Qwen-QwQ-32B.jinja +0 -62
  832. package/cpp/llama.cpp/models/templates/Qwen-Qwen2.5-7B-Instruct.jinja +0 -54
  833. package/cpp/llama.cpp/models/templates/Qwen-Qwen3-0.6B.jinja +0 -85
  834. package/cpp/llama.cpp/models/templates/README.md +0 -25
  835. package/cpp/llama.cpp/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja +0 -1
  836. package/cpp/llama.cpp/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja +0 -1
  837. package/cpp/llama.cpp/models/templates/fireworks-ai-llama-3-firefunction-v2.jinja +0 -57
  838. package/cpp/llama.cpp/models/templates/google-gemma-2-2b-it.jinja +0 -4
  839. package/cpp/llama.cpp/models/templates/ibm-granite-granite-3.3-2B-Instruct.jinja +0 -59
  840. package/cpp/llama.cpp/models/templates/llama-cpp-deepseek-r1.jinja +0 -76
  841. package/cpp/llama.cpp/models/templates/llama-cpp-rwkv-world.jinja +0 -34
  842. package/cpp/llama.cpp/models/templates/meetkai-functionary-medium-v3.1.jinja +0 -58
  843. package/cpp/llama.cpp/models/templates/meetkai-functionary-medium-v3.2.jinja +0 -287
  844. package/cpp/llama.cpp/models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja +0 -109
  845. package/cpp/llama.cpp/models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja +0 -93
  846. package/cpp/llama.cpp/models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja +0 -109
  847. package/cpp/llama.cpp/models/templates/microsoft-Phi-3.5-mini-instruct.jinja +0 -8
  848. package/cpp/llama.cpp/models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja +0 -87
  849. package/cpp/llama.cpp/models/templates/moonshotai-Kimi-K2.jinja +0 -43
  850. package/cpp/llama.cpp/models/templates/openai-gpt-oss-120b.jinja +0 -331
  851. package/cpp/llama.cpp/models/templates/unsloth-mistral-Devstral-Small-2507.jinja +0 -105
  852. package/cpp/llama.cpp/prompts/LLM-questions.txt +0 -49
  853. package/cpp/llama.cpp/prompts/alpaca.txt +0 -1
  854. package/cpp/llama.cpp/prompts/assistant.txt +0 -31
  855. package/cpp/llama.cpp/prompts/chat-with-baichuan.txt +0 -4
  856. package/cpp/llama.cpp/prompts/chat-with-bob.txt +0 -7
  857. package/cpp/llama.cpp/prompts/chat-with-qwen.txt +0 -1
  858. package/cpp/llama.cpp/prompts/chat-with-vicuna-v0.txt +0 -7
  859. package/cpp/llama.cpp/prompts/chat-with-vicuna-v1.txt +0 -7
  860. package/cpp/llama.cpp/prompts/chat.txt +0 -28
  861. package/cpp/llama.cpp/prompts/dan-modified.txt +0 -1
  862. package/cpp/llama.cpp/prompts/dan.txt +0 -1
  863. package/cpp/llama.cpp/prompts/mnemonics.txt +0 -93
  864. package/cpp/llama.cpp/prompts/parallel-questions.txt +0 -43
  865. package/cpp/llama.cpp/prompts/reason-act.txt +0 -18
  866. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Info.plist +0 -20
  867. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  868. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +0 -5524
  869. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +0 -4247
  870. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-alloc.h +0 -76
  871. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-backend.h +0 -354
  872. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-blas.h +0 -25
  873. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-cpu.h +0 -145
  874. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-metal.h +0 -66
  875. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-opt.h +0 -256
  876. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +0 -2492
  877. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/gguf.h +0 -202
  878. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +0 -1391
  879. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Modules/module.modulemap +0 -17
  880. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Resources/Info.plist +0 -32
  881. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-alloc.h +0 -76
  882. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-backend.h +0 -354
  883. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-blas.h +0 -25
  884. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-cpu.h +0 -145
  885. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-metal.h +0 -66
  886. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-opt.h +0 -256
  887. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +0 -2492
  888. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/gguf.h +0 -202
  889. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +0 -1391
  890. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Modules/module.modulemap +0 -17
  891. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Resources/Info.plist +0 -32
  892. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
  893. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-alloc.h +0 -76
  894. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-backend.h +0 -354
  895. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-blas.h +0 -25
  896. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-cpu.h +0 -145
  897. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-metal.h +0 -66
  898. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-opt.h +0 -256
  899. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +0 -2492
  900. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/gguf.h +0 -202
  901. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +0 -1391
  902. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Modules/module.modulemap +0 -17
  903. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Resources/Info.plist +0 -32
  904. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
  905. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
  906. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Info.plist +0 -20
  907. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  908. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +0 -5561
  909. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-alloc.h +0 -76
  910. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-backend.h +0 -354
  911. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-blas.h +0 -25
  912. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-cpu.h +0 -145
  913. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-metal.h +0 -66
  914. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-opt.h +0 -256
  915. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +0 -2492
  916. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/gguf.h +0 -202
  917. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +0 -1391
  918. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Info.plist +0 -35
  919. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Modules/module.modulemap +0 -17
  920. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
  921. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Info.plist +0 -20
  922. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  923. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +0 -5524
  924. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +0 -4246
  925. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-alloc.h +0 -76
  926. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +0 -354
  927. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-blas.h +0 -25
  928. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +0 -145
  929. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-metal.h +0 -66
  930. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +0 -256
  931. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +0 -2492
  932. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/gguf.h +0 -202
  933. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +0 -1391
  934. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Info.plist +0 -35
  935. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Modules/module.modulemap +0 -17
  936. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
  937. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Info.plist +0 -20
  938. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  939. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +0 -5558
  940. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-alloc.h +0 -76
  941. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-backend.h +0 -354
  942. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-blas.h +0 -25
  943. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-cpu.h +0 -145
  944. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-metal.h +0 -66
  945. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-opt.h +0 -256
  946. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +0 -2492
  947. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/gguf.h +0 -202
  948. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +0 -1391
  949. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Info.plist +0 -32
  950. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Modules/module.modulemap +0 -17
  951. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
  952. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Info.plist +0 -20
  953. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  954. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +0 -5520
  955. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +0 -4243
  956. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-alloc.h +0 -76
  957. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +0 -354
  958. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-blas.h +0 -25
  959. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +0 -145
  960. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-metal.h +0 -66
  961. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +0 -256
  962. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +0 -2492
  963. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/gguf.h +0 -202
  964. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +0 -1391
  965. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Info.plist +0 -32
  966. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Modules/module.modulemap +0 -17
  967. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
  968. /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{test_bfloat16_support.comp → feature-tests/bfloat16.comp} +0 -0
  969. /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat_support.comp → feature-tests/coopmat.comp} +0 -0
  970. /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat2_support.comp → feature-tests/coopmat2.comp} +0 -0
  971. /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{test_integer_dot_support.comp → feature-tests/integer_dot.comp} +0 -0
  972. /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{generic_head.comp → generic_head.glsl} +0 -0
  973. /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{generic_unary_head.comp → generic_unary_head.glsl} +0 -0
  974. /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{glu_main.comp → glu_main.glsl} +0 -0
  975. /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{rte.comp → rte.glsl} +0 -0
  976. /package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/{utils.comp → utils.glsl} +0 -0
@@ -9,8 +9,11 @@
9
9
  #include <minja/chat-template.hpp>
10
10
  #include <minja/minja.hpp>
11
11
 
12
+ #include <algorithm>
12
13
  #include <cstdio>
14
+ #include <cctype>
13
15
  #include <exception>
16
+ #include <functional>
14
17
  #include <iostream>
15
18
  #include <optional>
16
19
  #include <stdexcept>
@@ -163,6 +166,19 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin
163
166
  throw std::runtime_error("Invalid tool_choice: " + tool_choice);
164
167
  }
165
168
 
169
+ bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates) {
170
+ common_chat_templates_inputs dummy_inputs;
171
+ common_chat_msg msg;
172
+ msg.role = "user";
173
+ msg.content = "test";
174
+ dummy_inputs.messages = {msg};
175
+ dummy_inputs.enable_thinking = false;
176
+ const auto rendered_no_thinking = common_chat_templates_apply(chat_templates, dummy_inputs);
177
+ dummy_inputs.enable_thinking = true;
178
+ const auto rendered_with_thinking = common_chat_templates_apply(chat_templates, dummy_inputs);
179
+ return rendered_no_thinking.prompt != rendered_with_thinking.prompt;
180
+ }
181
+
166
182
  template <>
167
183
  std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messages) {
168
184
  std::vector<common_chat_msg> msgs;
@@ -297,7 +313,6 @@ json common_chat_msgs_to_json_oaicompat(const std::vector<common_chat_msg> & msg
297
313
  }
298
314
  if (!msg.reasoning_content.empty()) {
299
315
  jmsg["reasoning_content"] = msg.reasoning_content;
300
- jmsg["thinking"] = msg.reasoning_content; // gpt-oss
301
316
  }
302
317
  if (!msg.tool_name.empty()) {
303
318
  jmsg["name"] = msg.tool_name;
@@ -612,17 +627,28 @@ const char * common_chat_format_name(common_chat_format format) {
612
627
  case COMMON_CHAT_FORMAT_CONTENT_ONLY: return "Content-only";
613
628
  case COMMON_CHAT_FORMAT_GENERIC: return "Generic";
614
629
  case COMMON_CHAT_FORMAT_MISTRAL_NEMO: return "Mistral Nemo";
630
+ case COMMON_CHAT_FORMAT_MAGISTRAL: return "Magistral";
615
631
  case COMMON_CHAT_FORMAT_LLAMA_3_X: return "Llama 3.x";
616
632
  case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS: return "Llama 3.x with builtin tools";
617
633
  case COMMON_CHAT_FORMAT_DEEPSEEK_R1: return "DeepSeek R1";
618
634
  case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: return "FireFunction v2";
619
635
  case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return "Functionary v3.2";
620
636
  case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return "Functionary v3.1 Llama 3.1";
637
+ case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1: return "DeepSeek V3.1";
621
638
  case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro";
622
639
  case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B";
623
640
  case COMMON_CHAT_FORMAT_GRANITE: return "Granite";
624
641
  case COMMON_CHAT_FORMAT_GPT_OSS: return "GPT-OSS";
625
642
  case COMMON_CHAT_FORMAT_SEED_OSS: return "Seed-OSS";
643
+ case COMMON_CHAT_FORMAT_NEMOTRON_V2: return "Nemotron V2";
644
+ case COMMON_CHAT_FORMAT_APERTUS: return "Apertus";
645
+ case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS: return "LFM2 with JSON tools";
646
+ case COMMON_CHAT_FORMAT_MINIMAX_M2: return "MiniMax-M2";
647
+ case COMMON_CHAT_FORMAT_GLM_4_5: return "GLM 4.5";
648
+ case COMMON_CHAT_FORMAT_KIMI_K2: return "Kimi K2";
649
+ case COMMON_CHAT_FORMAT_QWEN3_CODER_XML: return "Qwen3 Coder";
650
+ case COMMON_CHAT_FORMAT_APRIEL_1_5: return "Apriel 1.5";
651
+ case COMMON_CHAT_FORMAT_XIAOMI_MIMO: return "Xiaomi MiMo";
626
652
  default:
627
653
  throw std::runtime_error("Unknown chat format");
628
654
  }
@@ -684,11 +710,13 @@ static void parse_json_tool_calls(
684
710
  size_t from = std::string::npos;
685
711
  auto first = true;
686
712
  while (true) {
713
+ auto start_pos = builder.pos();
687
714
  auto res = function_regex_start_only && first
688
715
  ? builder.try_consume_regex(*function_regex_start_only)
689
716
  : function_regex
690
717
  ? builder.try_find_regex(*function_regex, from)
691
718
  : std::nullopt;
719
+
692
720
  if (res) {
693
721
  std::string name;
694
722
  if (get_function_name) {
@@ -723,6 +751,8 @@ static void parse_json_tool_calls(
723
751
  return;
724
752
  }
725
753
  throw common_chat_msg_partial_exception("incomplete tool call");
754
+ } else {
755
+ builder.move_to(start_pos);
726
756
  }
727
757
  break;
728
758
  }
@@ -782,6 +812,7 @@ static std::string apply(
782
812
  }
783
813
  tmpl_inputs.add_generation_prompt = inputs.add_generation_prompt;
784
814
  tmpl_inputs.extra_context = inputs.extra_context;
815
+ tmpl_inputs.extra_context["enable_thinking"] = inputs.enable_thinking;
785
816
  if (additional_context) {
786
817
  tmpl_inputs.extra_context.merge_patch(*additional_context);
787
818
  }
@@ -963,6 +994,185 @@ static common_chat_params common_chat_params_init_mistral_nemo(const common_chat
963
994
  data.format = COMMON_CHAT_FORMAT_MISTRAL_NEMO;
964
995
  return data;
965
996
  }
997
+
998
+
999
+ // Case-insensitive find
1000
+ static size_t ifind_string(const std::string & haystack, const std::string & needle, size_t pos = 0) {
1001
+ auto it = std::search(
1002
+ haystack.begin() + pos, haystack.end(),
1003
+ needle.begin(), needle.end(),
1004
+ [](char a, char b) { return std::tolower(a) == std::tolower(b); }
1005
+ );
1006
+ return (it == haystack.end()) ? std::string::npos : std::distance(haystack.begin(), it);
1007
+ }
1008
+
1009
+ static common_chat_params common_chat_params_init_lfm2(const common_chat_template & tmpl, const struct templates_params & inputs) {
1010
+ common_chat_params data;
1011
+ const auto is_json_schema_provided = !inputs.json_schema.is_null();
1012
+ const auto is_grammar_provided = !inputs.grammar.empty();
1013
+ const auto are_tools_provided = inputs.tools.is_array() && !inputs.tools.empty();
1014
+
1015
+ // the logic requires potentially modifying the messages
1016
+ auto tweaked_messages = inputs.messages;
1017
+
1018
+ auto replace_json_schema_marker = [](json & messages) -> bool {
1019
+ static std::string marker1 = "force json schema.\n";
1020
+ static std::string marker2 = "force json schema.";
1021
+
1022
+ if (messages.empty() || messages.at(0).at("role") != "system") {
1023
+ return false;
1024
+ }
1025
+
1026
+ std::string content = messages.at(0).at("content");
1027
+
1028
+ for (const auto & marker : {marker1, marker2}) {
1029
+ const auto pos = ifind_string(content, marker);
1030
+ if (pos != std::string::npos) {
1031
+ content.replace(pos, marker.length(), "");
1032
+ // inject modified content back into the messages
1033
+ messages.at(0).at("content") = content;
1034
+ return true;
1035
+ }
1036
+ }
1037
+
1038
+ return false;
1039
+ };
1040
+
1041
+ // Lfm2 model does not natively work with json, but can generally understand the tools structure
1042
+ //
1043
+ // Example of the pytorch dialog structure:
1044
+ // <|startoftext|><|im_start|>system
1045
+ // List of tools: <|tool_list_start|>[{"name": "get_candidate_status", "description": "Retrieves the current status of a candidate in the recruitment process", "parameters": {"type": "object", "properties": {"candidate_id": {"type": "string", "description": "Unique identifier for the candidate"}}, "required": ["candidate_id"]}}]<|tool_list_end|><|im_end|>
1046
+ // <|im_start|>user
1047
+ // What is the current status of candidate ID 12345?<|im_end|>
1048
+ // <|im_start|>assistant
1049
+ // <|tool_call_start|>[get_candidate_status(candidate_id="12345")]<|tool_call_end|>Checking the current status of candidate ID 12345.<|im_end|>
1050
+ // <|im_start|>tool
1051
+ // <|tool_response_start|>{"candidate_id": "12345", "status": "Interview Scheduled", "position": "Clinical Research Associate", "date": "2023-11-20"}<|tool_response_end|><|im_end|>
1052
+ // <|im_start|>assistant
1053
+ // The candidate with ID 12345 is currently in the "Interview Scheduled" stage for the position of Clinical Research Associate, with an interview date set for 2023-11-20.<|im_end|>
1054
+ //
1055
+ // For the llama server compatibility with json tools semantic,
1056
+ // the client can add "Follow json schema." line into the system message prompt to force the json output.
1057
+ //
1058
+ if (are_tools_provided && (is_json_schema_provided || is_grammar_provided)) {
1059
+ // server/utils.hpp prohibits that branch for the custom grammar anyways
1060
+ throw std::runtime_error("Tools call must not use \"json_schema\" or \"grammar\", use non-tool invocation if you want to use custom grammar");
1061
+ } else if (are_tools_provided && replace_json_schema_marker(tweaked_messages)) {
1062
+ LOG_INF("%s: Using tools to build a grammar\n", __func__);
1063
+
1064
+ data.grammar = build_grammar([&](const common_grammar_builder & builder) {
1065
+ auto schemas = json::array();
1066
+ foreach_function(inputs.tools, [&](const json & tool) {
1067
+ const auto & function = tool.at("function");
1068
+ schemas.push_back({
1069
+ {"type", "object"},
1070
+ {"properties", {
1071
+ {"name", {
1072
+ {"type", "string"},
1073
+ {"const", function.at("name")},
1074
+ }},
1075
+ {"arguments", function.at("parameters")},
1076
+ }},
1077
+ {"required", json::array({"name", "arguments", "id"})},
1078
+ });
1079
+ });
1080
+ auto schema = json {
1081
+ {"type", "array"},
1082
+ {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
1083
+ {"minItems", 1},
1084
+ };
1085
+ if (!inputs.parallel_tool_calls) {
1086
+ schema["maxItems"] = 1;
1087
+ }
1088
+
1089
+ builder.add_rule("root", "\"<|tool_call_start|>\"" + builder.add_schema("tool_calls", schema) + "\"<|tool_call_end|>\"");
1090
+ });
1091
+ // model has no concept of tool selection mode choice,
1092
+ // if the system prompt rendered correctly it will produce a tool call
1093
+ // the grammar goes inside the tool call body
1094
+ data.grammar_lazy = true;
1095
+ data.grammar_triggers = {{COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, "\\s*<\\|tool_call_start\\|>\\s*\\["}};
1096
+ data.preserved_tokens = {"<|tool_call_start|>", "<|tool_call_end|>"};
1097
+ data.format = COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS;
1098
+ } else if (are_tools_provided && (!is_json_schema_provided && !is_grammar_provided)) {
1099
+ LOG_INF("%s: Using tools without json schema or grammar\n", __func__);
1100
+ // output those tokens
1101
+ data.preserved_tokens = {"<|tool_call_start|>", "<|tool_call_end|>"};
1102
+ } else if (is_json_schema_provided) {
1103
+ LOG_INF("%s: Using provided json schema to build a grammar\n", __func__);
1104
+ data.grammar = json_schema_to_grammar(inputs.json_schema);
1105
+ } else if (is_grammar_provided) {
1106
+ LOG_INF("%s: Using provided grammar\n", __func__);
1107
+ data.grammar = inputs.grammar;
1108
+ } else {
1109
+ LOG_INF("%s: Using content relying on the template\n", __func__);
1110
+ }
1111
+
1112
+ data.prompt = apply(tmpl, inputs, /* messages_override= */ tweaked_messages);
1113
+ LOG_DBG("%s: Prompt: %s\n", __func__, data.prompt.c_str());
1114
+
1115
+ return data;
1116
+ }
1117
+
1118
+ static common_chat_params common_chat_params_init_magistral(const common_chat_template & tmpl, const struct templates_params & inputs) {
1119
+ common_chat_params data;
1120
+ data.prompt = apply(tmpl, inputs);
1121
+ data.format = COMMON_CHAT_FORMAT_MAGISTRAL;
1122
+ data.preserved_tokens = {
1123
+ "[THINK]",
1124
+ "[/THINK]",
1125
+ };
1126
+
1127
+ if (inputs.tools.is_array() && !inputs.tools.empty()) {
1128
+ data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1129
+ data.grammar = build_grammar([&](const common_grammar_builder & builder) {
1130
+ auto schemas = json::array();
1131
+ foreach_function(inputs.tools, [&](const json & tool) {
1132
+ const auto & function = tool.at("function");
1133
+ schemas.push_back({
1134
+ {"type", "object"},
1135
+ {"properties", {
1136
+ {"name", {
1137
+ {"type", "string"},
1138
+ {"const", function.at("name")},
1139
+ }},
1140
+ {"arguments", function.at("parameters")},
1141
+ {"id", {
1142
+ {"type", "string"},
1143
+ {"pattern", "^[a-zA-Z0-9]{9}$"},
1144
+ }},
1145
+ }},
1146
+ {"required", json::array({"name", "arguments", "id"})},
1147
+ });
1148
+ });
1149
+ auto schema = json {
1150
+ {"type", "array"},
1151
+ {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
1152
+ {"minItems", 1},
1153
+ };
1154
+ if (!inputs.parallel_tool_calls) {
1155
+ schema["maxItems"] = 1;
1156
+ }
1157
+ builder.add_rule("root", "\"[TOOL_CALLS]\" " + builder.add_schema("tool_calls", schema));
1158
+ });
1159
+ data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]"});
1160
+ data.preserved_tokens.push_back("[TOOL_CALLS]");
1161
+ } else {
1162
+ data.grammar_lazy = false;
1163
+ if (!inputs.json_schema.is_null()) {
1164
+ if (!inputs.grammar.empty()) {
1165
+ throw std::runtime_error("Either \"json_schema\" or \"grammar\" can be specified, but not both");
1166
+ }
1167
+ data.grammar = json_schema_to_grammar(inputs.json_schema);
1168
+ } else {
1169
+ data.grammar = inputs.grammar;
1170
+ }
1171
+ }
1172
+
1173
+ return data;
1174
+ }
1175
+
966
1176
  static void common_chat_parse_mistral_nemo(common_chat_msg_parser & builder) {
967
1177
  if (!builder.syntax().parse_tool_calls) {
968
1178
  builder.add_content(builder.consume_rest());
@@ -973,6 +1183,18 @@ static void common_chat_parse_mistral_nemo(common_chat_msg_parser & builder) {
973
1183
  parse_prefixed_json_tool_call_array(builder, prefix);
974
1184
  }
975
1185
 
1186
+ static void common_chat_parse_magistral(common_chat_msg_parser & builder) {
1187
+ builder.try_parse_reasoning("[THINK]", "[/THINK]");
1188
+
1189
+ if (!builder.syntax().parse_tool_calls) {
1190
+ builder.add_content(builder.consume_rest());
1191
+ return;
1192
+ }
1193
+
1194
+ static const common_regex prefix(regex_escape("[TOOL_CALLS]"));
1195
+ parse_prefixed_json_tool_call_array(builder, prefix);
1196
+ }
1197
+
976
1198
  static common_chat_params common_chat_params_init_command_r7b(const common_chat_template & tmpl, const struct templates_params & inputs) {
977
1199
  common_chat_params data;
978
1200
 
@@ -1184,7 +1406,139 @@ static common_chat_params common_chat_params_init_llama_3_x(const common_chat_te
1184
1406
  });
1185
1407
  return data;
1186
1408
  }
1409
+
1410
+ static common_chat_params common_chat_params_init_nemotron_v2(const common_chat_template & tmpl, const struct templates_params & inputs) {
1411
+ common_chat_params data;
1412
+
1413
+ // Generate the prompt using the apply() function with the template
1414
+ data.prompt = apply(tmpl, inputs);
1415
+ data.format = COMMON_CHAT_FORMAT_NEMOTRON_V2;
1416
+
1417
+ // Handle thinking tags appropriately based on inputs.enable_thinking
1418
+ if (string_ends_with(data.prompt, "<think>\n")) {
1419
+ if (!inputs.enable_thinking) {
1420
+ data.prompt += "</think>";
1421
+ } else {
1422
+ data.thinking_forced_open = true;
1423
+ }
1424
+ }
1425
+
1426
+ // When tools are present, build grammar for the <TOOLCALL> format, similar to CommandR, but without tool call ID
1427
+ if (!inputs.tools.is_null() && inputs.tools.is_array() && !inputs.tools.empty()) {
1428
+ data.grammar_lazy = true;
1429
+ data.grammar = build_grammar([&](const common_grammar_builder & builder) {
1430
+ auto schemas = json::array();
1431
+ foreach_function(inputs.tools, [&](const json & tool) {
1432
+ const auto & function = tool.at("function");
1433
+ schemas.push_back({
1434
+ { "type", "object" },
1435
+ { "properties",
1436
+ {
1437
+ { "name",
1438
+ {
1439
+ { "type", "string" },
1440
+ { "const", function.at("name") },
1441
+ } },
1442
+ { "arguments", function.at("parameters") },
1443
+ } },
1444
+ { "required", json::array({ "name", "arguments" }) },
1445
+ });
1446
+ });
1447
+ auto schema = json{
1448
+ { "type", "array" },
1449
+ { "items", schemas.size() == 1 ? schemas[0] : json{ { "anyOf", schemas } } },
1450
+ { "minItems", 1 },
1451
+ };
1452
+ if (!inputs.parallel_tool_calls) {
1453
+ schema["maxItems"] = 1;
1454
+ }
1455
+ builder.add_rule("root",
1456
+ std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
1457
+ "\"<TOOLCALL>\" " + builder.add_schema("tool_calls", schema) +
1458
+ " \"</TOOLCALL>\"");
1459
+ });
1460
+ data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
1461
+ // If thinking_forced_open, then we capture the </think> tag in the grammar,
1462
+ // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
1463
+ std::string(data.thinking_forced_open ?
1464
+ "[\\s\\S]*?(</think>\\s*)" :
1465
+ "(?:<think>[\\s\\S]*?</think>\\s*)?") +
1466
+ "(<TOOLCALL>)[\\s\\S]*" });
1467
+ }
1468
+ return data;
1469
+ }
1470
+
1471
+ static common_chat_params common_chat_params_init_apertus(const common_chat_template & tmpl, const struct templates_params & inputs) {
1472
+ common_chat_params data;
1473
+
1474
+ // Generate the prompt using the apply() function with the template
1475
+ data.prompt = apply(tmpl, inputs);
1476
+ data.format = COMMON_CHAT_FORMAT_APERTUS;
1477
+
1478
+ // Handle thinking tags appropriately based on inputs.enable_thinking
1479
+ if (string_ends_with(data.prompt, "<|inner_prefix|>")) {
1480
+ if (!inputs.enable_thinking) {
1481
+ data.prompt += "<|inner_suffix|>";
1482
+ } else {
1483
+ data.thinking_forced_open = true;
1484
+ }
1485
+ }
1486
+
1487
+ // When tools are present, build grammar for the <|tools_prefix|> format
1488
+ if (!inputs.tools.is_null() && inputs.tools.is_array() && !inputs.tools.empty()) {
1489
+ data.grammar_lazy = true;
1490
+ data.grammar = build_grammar([&](const common_grammar_builder & builder) {
1491
+ auto schemas = json::array();
1492
+ foreach_function(inputs.tools, [&](const json & tool) {
1493
+ const auto & function = tool.at("function");
1494
+ schemas.push_back({
1495
+ { "type", "object" },
1496
+ { "properties",
1497
+ {
1498
+ { function.at("name"), function.at("parameters") }
1499
+ } },
1500
+ { "required", json::array({ function.at("name") }) },
1501
+ });
1502
+ });
1503
+ auto schema = json{
1504
+ { "type", "array" },
1505
+ { "items", schemas.size() == 1 ? schemas[0] : json{ { "anyOf", schemas } } },
1506
+ { "minItems", 1 },
1507
+ };
1508
+ if (!inputs.parallel_tool_calls) {
1509
+ schema["maxItems"] = 1;
1510
+ }
1511
+ builder.add_rule("root",
1512
+ std::string(data.thinking_forced_open ? "( \"<|inner_suffix|>\" space )? " : "") +
1513
+ "\"<|tools_prefix|>\"" + builder.add_schema("tool_calls", schema) + "\"<|tools_suffix|>\"");
1514
+ });
1515
+ data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
1516
+ // If thinking_forced_open, then we capture the <|inner_suffix|> tag in the grammar,
1517
+ // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
1518
+ std::string(data.thinking_forced_open ?
1519
+ "[\\s\\S]*?(<\\|inner_suffix\\|>\\s*)" :
1520
+ "(?:<\\|inner_prefix\\|>[\\s\\S]*?<\\|inner_suffix\\|>\\s*)?") +
1521
+ "(<\\|tools_prefix\\|>)[\\s\\S]*" });
1522
+ data.preserved_tokens = {
1523
+ "<|system_start|>",
1524
+ "<|system_end|>",
1525
+ "<|developer_start|>",
1526
+ "<|developer_end|>",
1527
+ "<|user_start|>",
1528
+ "<|user_end|>",
1529
+ "<|assistant_start|>",
1530
+ "<|assistant_end|>",
1531
+ "<|inner_prefix|>",
1532
+ "<|inner_suffix|>",
1533
+ "<|tools_prefix|>",
1534
+ "<|tools_suffix|>",
1535
+ };
1536
+ }
1537
+ return data;
1538
+ }
1187
1539
  static void common_chat_parse_llama_3_1(common_chat_msg_parser & builder, bool with_builtin_tools = false) {
1540
+ builder.try_parse_reasoning("<think>", "</think>");
1541
+
1188
1542
  if (!builder.syntax().parse_tool_calls) {
1189
1543
  builder.add_content(builder.consume_rest());
1190
1544
  return;
@@ -1313,6 +1667,71 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
1313
1667
  }
1314
1668
  return data;
1315
1669
  }
1670
+
1671
+ static common_chat_params common_chat_params_init_deepseek_v3_1(const common_chat_template & tmpl, const struct templates_params & inputs) {
1672
+ common_chat_params data;
1673
+
1674
+ // Pass thinking context for DeepSeek V3.1 template
1675
+ json additional_context = {
1676
+ {"thinking", inputs.enable_thinking},
1677
+ };
1678
+
1679
+ auto prompt = apply(tmpl, inputs,
1680
+ /* messages_override= */ inputs.messages,
1681
+ /* tools_override= */ std::nullopt,
1682
+ additional_context);
1683
+ data.prompt = prompt;
1684
+ data.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
1685
+ if (string_ends_with(data.prompt, "<think>")) {
1686
+ if (!inputs.enable_thinking) {
1687
+ data.prompt += "</think>";
1688
+ } else {
1689
+ data.thinking_forced_open = true;
1690
+ }
1691
+ }
1692
+ if (inputs.tools.is_array() && !inputs.tools.empty()) {
1693
+ data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null();
1694
+ data.grammar = build_grammar([&](const common_grammar_builder & builder) {
1695
+ std::vector<std::string> tool_rules;
1696
+ foreach_function(inputs.tools, [&](const json & tool) {
1697
+ const auto & function = tool.at("function");
1698
+ std::string name = function.at("name");
1699
+ auto parameters = function.at("parameters");
1700
+ builder.resolve_refs(parameters);
1701
+ tool_rules.push_back(builder.add_rule(name + "-call",
1702
+ "( \"<|tool▁call▁begin|>\" )? \"" + name + "<|tool▁sep|>"
1703
+ "\" " + builder.add_schema(name + "-args", parameters) + " "
1704
+ "\"<|tool▁call▁end|>\""));
1705
+ });
1706
+ // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag,
1707
+ // so we accept common variants (then it's all constrained)
1708
+ builder.add_rule("root",
1709
+ std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
1710
+ "( \"<|tool▁calls▁begin|>\" | \"<|tool_calls_begin|>\" | \"<|tool calls begin|>\" | \"<|tool\\\\_calls\\\\_begin|>\" | \"<|tool▁calls|>\" ) "
1711
+ "(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
1712
+ "\"<|tool▁calls▁end|>\""
1713
+ " space");
1714
+ data.grammar_triggers.push_back({
1715
+ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
1716
+ // If thinking_forced_open, then we capture the </think> tag in the grammar,
1717
+ // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
1718
+ std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)" : "(?:<think>[\\s\\S]*?</think>\\s*)?") +
1719
+ "(<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)[\\s\\S]*"
1720
+ });
1721
+ data.preserved_tokens = {
1722
+ "<think>",
1723
+ "</think>",
1724
+ "<|tool▁calls▁begin|>",
1725
+ "<|tool▁call▁begin|>",
1726
+ "<|tool▁sep|>",
1727
+ "<|tool▁call▁end|>",
1728
+ "<|tool▁calls▁end|>",
1729
+ };
1730
+ });
1731
+ }
1732
+ return data;
1733
+ }
1734
+
1316
1735
  static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) {
1317
1736
  builder.try_parse_reasoning("<think>", "</think>");
1318
1737
  if (!builder.syntax().parse_tool_calls) {
@@ -1334,9 +1753,357 @@ static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) {
1334
1753
  tool_calls_end);
1335
1754
  }
1336
1755
 
1756
+ static void common_chat_parse_deepseek_v3_1_content(common_chat_msg_parser & builder) {
1757
+ static const common_regex function_regex("(?:<|tool▁call▁begin|>)?([^\\n<]+)(?:<|tool▁sep|>)");
1758
+
1759
+ static const common_regex close_regex("(?:[\\s]*)?<|tool▁call▁end|>");
1760
+ static const common_regex tool_calls_begin("(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)");
1761
+ static const common_regex tool_calls_end("<|tool▁calls▁end|>");
1762
+
1763
+ if (!builder.syntax().parse_tool_calls) {
1764
+ LOG_DBG("%s: not parse_tool_calls\n", __func__);
1765
+ builder.add_content(builder.consume_rest());
1766
+ return;
1767
+ }
1768
+
1769
+ LOG_DBG("%s: parse_tool_calls\n", __func__);
1770
+
1771
+ parse_json_tool_calls(
1772
+ builder,
1773
+ /* block_open= */ tool_calls_begin,
1774
+ /* function_regex_start_only= */ std::nullopt,
1775
+ function_regex,
1776
+ close_regex,
1777
+ tool_calls_end);
1778
+ }
1779
+
1780
+ static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) {
1781
+ // DeepSeek V3.1 outputs reasoning content between "<think>" and "</think>" tags, followed by regular content
1782
+ // First try to parse using the standard reasoning parsing method
1783
+ LOG_DBG("%s: thinking_forced_open: %s\n", __func__, std::to_string(builder.syntax().thinking_forced_open).c_str());
1784
+
1785
+ auto start_pos = builder.pos();
1786
+ auto found_end_think = builder.try_find_literal("</think>");
1787
+ builder.move_to(start_pos);
1788
+
1789
+ if (builder.syntax().thinking_forced_open && !builder.is_partial() && !found_end_think) {
1790
+ LOG_DBG("%s: no end_think, not partial, adding content\n", __func__);
1791
+ common_chat_parse_deepseek_v3_1_content(builder);
1792
+ } else if (builder.try_parse_reasoning("<think>", "</think>")) {
1793
+ // If reasoning was parsed successfully, the remaining content is regular content
1794
+ LOG_DBG("%s: parsed reasoning, adding content\n", __func__);
1795
+ // </think><|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>NAME\n```json\nJSON\n```<|tool▁call▁end|><|tool▁calls▁end|>
1796
+ common_chat_parse_deepseek_v3_1_content(builder);
1797
+ } else {
1798
+ if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE) {
1799
+ LOG_DBG("%s: reasoning_format none, adding content\n", __func__);
1800
+ common_chat_parse_deepseek_v3_1_content(builder);
1801
+ return;
1802
+ }
1803
+ // If no reasoning tags found, check if we should treat everything as reasoning
1804
+ if (builder.syntax().thinking_forced_open) {
1805
+ // If thinking is forced open but no tags found, treat everything as reasoning
1806
+ LOG_DBG("%s: thinking_forced_open, adding reasoning content\n", __func__);
1807
+ builder.add_reasoning_content(builder.consume_rest());
1808
+ } else {
1809
+ LOG_DBG("%s: no thinking_forced_open, adding content\n", __func__);
1810
+ // <|tool▁call▁begin|>NAME<|tool▁sep|>JSON<|tool▁call▁end|>
1811
+ common_chat_parse_deepseek_v3_1_content(builder);
1812
+ }
1813
+ }
1814
+ }
1815
+
1816
+
1817
+ static common_chat_params common_chat_params_init_minimax_m2(const common_chat_template & tmpl, const struct templates_params & params) {
1818
+ common_chat_params data;
1819
+ data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1820
+
1821
+ data.prompt = apply(tmpl, params);
1822
+ data.format = COMMON_CHAT_FORMAT_MINIMAX_M2;
1823
+
1824
+ // Handle thinking tags based on prompt ending
1825
+ if (string_ends_with(data.prompt, "<think>\n")) {
1826
+ if (!params.enable_thinking) {
1827
+ // Close the thinking tag immediately if thinking is disabled
1828
+ data.prompt += "</think>\n\n";
1829
+ } else {
1830
+ // Mark thinking as forced open (template started with <think>)
1831
+ data.thinking_forced_open = true;
1832
+ }
1833
+ }
1834
+
1835
+ // Preserve MiniMax-M2 special tokens
1836
+ data.preserved_tokens = {
1837
+ "<think>",
1838
+ "</think>",
1839
+ "<minimax:tool_call>",
1840
+ "</minimax:tool_call>",
1841
+ };
1842
+
1843
+ // build grammar for tool call
1844
+ static const xml_tool_call_format form {
1845
+ /* form.scope_start = */ "<minimax:tool_call>\n",
1846
+ /* form.tool_start = */ "<invoke name=\"",
1847
+ /* form.tool_sep = */ "\">\n",
1848
+ /* form.key_start = */ "<parameter name=\"",
1849
+ /* form.key_val_sep = */ "\">",
1850
+ /* form.val_end = */ "</parameter>\n",
1851
+ /* form.tool_end = */ "</invoke>\n",
1852
+ /* form.scope_end = */ "</minimax:tool_call>",
1853
+ };
1854
+ build_grammar_xml_tool_call(data, params.tools, form);
1855
+
1856
+ return data;
1857
+ }
1858
+
1859
+ static void common_chat_parse_minimax_m2(common_chat_msg_parser & builder) {
1860
+ static const xml_tool_call_format form {
1861
+ /* form.scope_start = */ "<minimax:tool_call>",
1862
+ /* form.tool_start = */ "<invoke name=\"",
1863
+ /* form.tool_sep = */ "\">",
1864
+ /* form.key_start = */ "<parameter name=\"",
1865
+ /* form.key_val_sep = */ "\">",
1866
+ /* form.val_end = */ "</parameter>",
1867
+ /* form.tool_end = */ "</invoke>",
1868
+ /* form.scope_end = */ "</minimax:tool_call>",
1869
+ };
1870
+ builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
1871
+ }
1872
+
1873
+ static common_chat_params common_chat_params_init_qwen3_coder_xml(const common_chat_template & tmpl, const struct templates_params & params) {
1874
+ common_chat_params data;
1875
+ data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1876
+
1877
+ data.prompt = apply(tmpl, params);
1878
+ data.format = COMMON_CHAT_FORMAT_QWEN3_CODER_XML;
1879
+
1880
+ data.preserved_tokens = {
1881
+ "<tool_call>",
1882
+ "</tool_call>",
1883
+ "<function=",
1884
+ "</function>",
1885
+ "<parameter=",
1886
+ "</parameter>",
1887
+ };
1888
+
1889
+ // build grammar for tool call
1890
+ static const xml_tool_call_format form {
1891
+ /* form.scope_start = */ "<tool_call>\n",
1892
+ /* form.tool_start = */ "<function=",
1893
+ /* form.tool_sep = */ ">\n",
1894
+ /* form.key_start = */ "<parameter=",
1895
+ /* form.key_val_sep = */ ">\n",
1896
+ /* form.val_end = */ "\n</parameter>\n",
1897
+ /* form.tool_end = */ "</function>\n",
1898
+ /* form.scope_end = */ "</tool_call>",
1899
+ };
1900
+ build_grammar_xml_tool_call(data, params.tools, form);
1901
+
1902
+ return data;
1903
+ }
1904
+
1905
+ static void common_chat_parse_qwen3_coder_xml(common_chat_msg_parser & builder) {
1906
+ static const xml_tool_call_format form = ([]() {
1907
+ xml_tool_call_format form {};
1908
+ form.scope_start = "<tool_call>";
1909
+ form.tool_start = "<function=";
1910
+ form.tool_sep = ">";
1911
+ form.key_start = "<parameter=";
1912
+ form.key_val_sep = ">";
1913
+ form.val_end = "</parameter>";
1914
+ form.tool_end = "</function>";
1915
+ form.scope_end = "</tool_call>";
1916
+ form.trim_raw_argval = true;
1917
+ return form;
1918
+ })();
1919
+ builder.consume_reasoning_with_xml_tool_calls(form);
1920
+ }
1921
+
1922
+ static common_chat_params common_chat_params_init_kimi_k2(const common_chat_template & tmpl, const struct templates_params & params) {
1923
+ common_chat_params data;
1924
+ data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1925
+
1926
+ data.prompt = apply(tmpl, params);
1927
+ data.format = COMMON_CHAT_FORMAT_KIMI_K2;
1928
+
1929
+ data.preserved_tokens = {
1930
+ "<think>",
1931
+ "</think>",
1932
+ "<|tool_calls_section_begin|>",
1933
+ "<|tool_call_begin|>",
1934
+ "<|tool_call_argument_begin|>",
1935
+ "<|tool_call_end|>",
1936
+ "<|tool_calls_section_end|>",
1937
+ "<|im_end|>",
1938
+ "<|im_system|>",
1939
+ "<|im_middle|>",
1940
+ };
1941
+
1942
+ data.additional_stops.insert(data.additional_stops.end(), {
1943
+ "<|im_end|>",
1944
+ "<|im_middle|>"
1945
+ });
1946
+ // build grammar for tool call
1947
+ static const xml_tool_call_format form = ([]() {
1948
+ xml_tool_call_format form {};
1949
+ form.scope_start = "<|tool_calls_section_begin|>";
1950
+ form.tool_start = "<|tool_call_begin|>";
1951
+ form.tool_sep = "<|tool_call_argument_begin|>{";
1952
+ form.key_start = "\"";
1953
+ form.key_val_sep = "\": ";
1954
+ form.val_end = ", ";
1955
+ form.tool_end = "}<|tool_call_end|>";
1956
+ form.scope_end = "<|tool_calls_section_end|>";
1957
+ form.raw_argval = false;
1958
+ form.last_val_end = "";
1959
+ return form;
1960
+ })();
1961
+ build_grammar_xml_tool_call(data, params.tools, form);
1962
+
1963
+ return data;
1964
+ }
1965
+
1966
+ static void common_chat_parse_kimi_k2(common_chat_msg_parser & builder) {
1967
+ static const xml_tool_call_format form = ([]() {
1968
+ xml_tool_call_format form {};
1969
+ form.scope_start = "<|tool_calls_section_begin|>";
1970
+ form.tool_start = "<|tool_call_begin|>";
1971
+ form.tool_sep = "<|tool_call_argument_begin|>{";
1972
+ form.key_start = "\"";
1973
+ form.key_val_sep = "\": ";
1974
+ form.val_end = ", ";
1975
+ form.tool_end = "}<|tool_call_end|>";
1976
+ form.scope_end = "<|tool_calls_section_end|>";
1977
+ form.raw_argval = false;
1978
+ form.last_val_end = "";
1979
+ return form;
1980
+ })();
1981
+ builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
1982
+ }
1983
+
1984
+ static common_chat_params common_chat_params_init_apriel_1_5(const common_chat_template & tmpl, const struct templates_params & params) {
1985
+ common_chat_params data;
1986
+ data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1987
+
1988
+ data.prompt = apply(tmpl, params);
1989
+ data.format = COMMON_CHAT_FORMAT_APRIEL_1_5;
1990
+
1991
+ data.preserved_tokens = {
1992
+ "<thinking>",
1993
+ "</thinking>",
1994
+ "<tool_calls>",
1995
+ "</tool_calls>",
1996
+ };
1997
+
1998
+ // build grammar for tool call
1999
+ static const xml_tool_call_format form = ([]() {
2000
+ xml_tool_call_format form {};
2001
+ form.scope_start = "<tool_calls>[";
2002
+ form.tool_start = "{\"name\": \"";
2003
+ form.tool_sep = "\", \"arguments\": {";
2004
+ form.key_start = "\"";
2005
+ form.key_val_sep = "\": ";
2006
+ form.val_end = ", ";
2007
+ form.tool_end = "}, ";
2008
+ form.scope_end = "]</tool_calls>";
2009
+ form.raw_argval = false;
2010
+ form.last_val_end = "";
2011
+ form.last_tool_end = "}";
2012
+ return form;
2013
+ })();
2014
+ build_grammar_xml_tool_call(data, params.tools, form);
2015
+
2016
+ return data;
2017
+ }
2018
+
2019
+ static void common_chat_parse_apriel_1_5(common_chat_msg_parser & builder) {
2020
+ static const xml_tool_call_format form = ([]() {
2021
+ xml_tool_call_format form {};
2022
+ form.scope_start = "<tool_calls>[";
2023
+ form.tool_start = "{\"name\": \"";
2024
+ form.tool_sep = "\", \"arguments\": {";
2025
+ form.key_start = "\"";
2026
+ form.key_val_sep = "\": ";
2027
+ form.val_end = ", ";
2028
+ form.tool_end = "}, ";
2029
+ form.scope_end = "]</tool_calls>";
2030
+ form.raw_argval = false;
2031
+ form.last_val_end = "";
2032
+ form.last_tool_end = "}";
2033
+ return form;
2034
+ })();
2035
+ builder.consume_reasoning_with_xml_tool_calls(form, "<thinking>", "</thinking>");
2036
+ }
2037
+
2038
+ static common_chat_params common_chat_params_init_xiaomi_mimo(const common_chat_template & tmpl, const struct templates_params & params) {
2039
+ common_chat_params data;
2040
+ data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
2041
+
2042
+ data.prompt = apply(tmpl, params);
2043
+ data.format = COMMON_CHAT_FORMAT_XIAOMI_MIMO;
2044
+
2045
+ data.preserved_tokens = {
2046
+ "<tool_call>",
2047
+ "</tool_call>",
2048
+ };
2049
+
2050
+ // build grammar for tool call
2051
+ static const xml_tool_call_format form = ([]() {
2052
+ xml_tool_call_format form {};
2053
+ form.scope_start = "\n";
2054
+ form.tool_start = "<tool_call>\n{\"name\": \"";
2055
+ form.tool_sep = "\", \"arguments\": {";
2056
+ form.key_start = "\"";
2057
+ form.key_val_sep = "\": ";
2058
+ form.val_end = ", ";
2059
+ form.tool_end = "}\n</tool_call>";
2060
+ form.scope_end = "";
2061
+ form.raw_argval = false;
2062
+ form.last_val_end = "";
2063
+ return form;
2064
+ })();
2065
+ build_grammar_xml_tool_call(data, params.tools, form);
2066
+
2067
+ return data;
2068
+ }
2069
+
2070
+ static void common_chat_parse_xiaomi_mimo(common_chat_msg_parser & builder) {
2071
+ static const xml_tool_call_format form = ([]() {
2072
+ xml_tool_call_format form {};
2073
+ form.scope_start = "";
2074
+ form.tool_start = "<tool_call>\n{\"name\": \"";
2075
+ form.tool_sep = "\", \"arguments\": {";
2076
+ form.key_start = "\"";
2077
+ form.key_val_sep = "\": ";
2078
+ form.val_end = ", ";
2079
+ form.tool_end = "}\n</tool_call>";
2080
+ form.scope_end = "";
2081
+ form.raw_argval = false;
2082
+ form.last_val_end = "";
2083
+ return form;
2084
+ })();
2085
+ builder.consume_reasoning_with_xml_tool_calls(form);
2086
+ }
2087
+
1337
2088
  static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl, const struct templates_params & inputs) {
1338
2089
  common_chat_params data;
1339
- auto prompt = apply(tmpl, inputs);
2090
+
2091
+ // Copy reasoning to the "thinking" field as expected by the gpt-oss template
2092
+ auto adjusted_messages = json::array();
2093
+ for (const auto & msg : inputs.messages) {
2094
+ auto has_reasoning_content = msg.contains("reasoning_content") && msg.at("reasoning_content").is_string();
2095
+ auto has_tool_calls = msg.contains("tool_calls") && msg.at("tool_calls").is_array();
2096
+
2097
+ if (has_reasoning_content && has_tool_calls) {
2098
+ auto adjusted_message = msg;
2099
+ adjusted_message["thinking"] = msg.at("reasoning_content");
2100
+ adjusted_messages.push_back(adjusted_message);
2101
+ } else {
2102
+ adjusted_messages.push_back(msg);
2103
+ }
2104
+ }
2105
+
2106
+ auto prompt = apply(tmpl, inputs, /* messages_override= */ adjusted_messages);
1340
2107
 
1341
2108
  // Check if we need to replace the return token with end token during
1342
2109
  // inference and without generation prompt. For more details see:
@@ -1411,17 +2178,36 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp
1411
2178
  );
1412
2179
  });
1413
2180
 
1414
- auto recipient_in_role = builder.add_rule("recipient_in_role",
1415
- "\"<|start|>assistant\"? \" to=functions.\" ( " +
1416
- string_join(tool_rules_recipient_in_role, " | ") + " )"
1417
- );
1418
-
1419
2181
  auto recipient_in_channel = builder.add_rule("recipient_in_channel",
1420
2182
  channel + " \" to=functions.\" ( " +
1421
2183
  string_join(tool_rules_recipient_in_channel, " | ") + " )"
1422
2184
  );
1423
2185
 
1424
- builder.add_rule("root", recipient_in_role + " | " + recipient_in_channel);
2186
+ if (data.grammar_lazy) {
2187
+ auto recipient_in_role = builder.add_rule("recipient_in_role",
2188
+ "\"<|start|>assistant\"? \" to=functions.\" ( " +
2189
+ string_join(tool_rules_recipient_in_role, " | ") + " )"
2190
+ );
2191
+
2192
+ builder.add_rule("root", recipient_in_role + " | " + recipient_in_channel);
2193
+ } else {
2194
+ auto not_end = builder.add_rule("not-end",
2195
+ "[^<] | \"<\" [^|] | \"<|\" [^e] | \"<|e\" [^n] | \"<|en\" [^d] | \"<|end\" [^|] | \"<|end|\" [^>]");
2196
+ auto analysis = builder.add_rule("analysis",
2197
+ "\"<|channel|>analysis<|message|>\" ( " + not_end + " )* \"<|end|>\"");
2198
+ auto commentary = builder.add_rule("commentary",
2199
+ "\"<|channel|>commentary<|message|>\" ( " + not_end + " )* \"<|end|>\"");
2200
+
2201
+ auto recipient_in_role = builder.add_rule("recipient_in_role",
2202
+ "\" to=functions.\" ( " + string_join(tool_rules_recipient_in_role, " | ") + " )"
2203
+ );
2204
+
2205
+ builder.add_rule("root",
2206
+ "( " + analysis + " \"<|start|>assistant\" )? " +
2207
+ "( " + commentary + " \"<|start|>assistant\" )? " +
2208
+ "( " + recipient_in_role + " | " + recipient_in_channel + " )"
2209
+ );
2210
+ }
1425
2211
 
1426
2212
  // Trigger on tool calls that appear in the commentary channel
1427
2213
  data.grammar_triggers.push_back({
@@ -1533,13 +2319,109 @@ static void common_chat_parse_gpt_oss(common_chat_msg_parser & builder) {
1533
2319
  }
1534
2320
  }
1535
2321
 
2322
+ static common_chat_params common_chat_params_init_glm_4_5(const common_chat_template & tmpl, const struct templates_params & inputs) {
2323
+ common_chat_params data;
2324
+ data.grammar_lazy = inputs.tools.is_array() && !inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
2325
+
2326
+ std::string prompt = apply(tmpl, inputs);
2327
+
2328
+ // match the existing trimming behavior
2329
+ if (inputs.add_bos && string_starts_with(prompt, tmpl.bos_token())) {
2330
+ prompt.erase(0, tmpl.bos_token().size());
2331
+ }
2332
+ if (inputs.add_eos && string_ends_with(prompt, tmpl.eos_token())) {
2333
+ prompt.erase(prompt.size() - tmpl.eos_token().size());
2334
+ }
2335
+ if (string_ends_with(prompt, "<think>")) {
2336
+ if (!inputs.enable_thinking) {
2337
+ prompt += "</think>";
2338
+ } else {
2339
+ data.thinking_forced_open = true;
2340
+ }
2341
+ }
2342
+
2343
+ // add GLM preserved tokens
2344
+ data.preserved_tokens = {
2345
+ "<|endoftext|>",
2346
+ "[MASK]",
2347
+ "[gMASK]",
2348
+ "[sMASK]",
2349
+ "<sop>",
2350
+ "<eop>",
2351
+ "<|system|>",
2352
+ "<|user|>",
2353
+ "<|assistant|>",
2354
+ "<|observation|>",
2355
+ "<|begin_of_image|>",
2356
+ "<|end_of_image|>",
2357
+ "<|begin_of_video|>",
2358
+ "<|end_of_video|>",
2359
+ "<|begin_of_audio|>",
2360
+ "<|end_of_audio|>",
2361
+ "<|begin_of_transcription|>",
2362
+ "<|end_of_transcription|>",
2363
+ "<|code_prefix|>",
2364
+ "<|code_middle|>",
2365
+ "<|code_suffix|>",
2366
+ "/nothink",
2367
+ "<think>",
2368
+ "</think>",
2369
+ "<tool_call>",
2370
+ "</tool_call>",
2371
+ "<arg_key>",
2372
+ "</arg_key>",
2373
+ "<arg_value>",
2374
+ "</arg_value>"
2375
+ };
2376
+
2377
+ // extra GLM 4.5 stop word
2378
+ data.additional_stops.insert(data.additional_stops.end(), {
2379
+ "<|user|>",
2380
+ "<|observation|>"
2381
+ });
2382
+
2383
+ // build grammar for tool call
2384
+ static const xml_tool_call_format form {
2385
+ /* form.scope_start = */ "",
2386
+ /* form.tool_start = */ "\n<tool_call>",
2387
+ /* form.tool_sep = */ "\n",
2388
+ /* form.key_start = */ "<arg_key>",
2389
+ /* form.key_val_sep = */ "</arg_key>\n<arg_value>",
2390
+ /* form.val_end = */ "</arg_value>\n",
2391
+ /* form.tool_end = */ "</tool_call>\n",
2392
+ /* form.scope_end = */ "",
2393
+ };
2394
+ build_grammar_xml_tool_call(data, inputs.tools, form);
2395
+
2396
+ data.prompt = prompt;
2397
+ data.format = COMMON_CHAT_FORMAT_GLM_4_5;
2398
+ return data;
2399
+ }
2400
+
2401
+ static void common_chat_parse_glm_4_5(common_chat_msg_parser & builder) {
2402
+ static const xml_tool_call_format form {
2403
+ /* form.scope_start = */ "",
2404
+ /* form.tool_start = */ "<tool_call>",
2405
+ /* form.tool_sep = */ "",
2406
+ /* form.key_start = */ "<arg_key>",
2407
+ /* form.key_val_sep = */ "</arg_key>",
2408
+ /* form.val_end = */ "</arg_value>",
2409
+ /* form.tool_end = */ "</tool_call>",
2410
+ /* form.scope_end = */ "",
2411
+ /* form.key_val_sep2 = */ "<arg_value>",
2412
+ };
2413
+ builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
2414
+ }
2415
+
1536
2416
  static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct templates_params & inputs) {
1537
2417
  LOG_DBG("%s\n", __func__);
1538
2418
  common_chat_params data;
1539
- data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, /* tools_override= */ json(), json {
2419
+ const std::optional<json> tools_override = json();
2420
+ const std::optional<json> additional_context = json {
1540
2421
  {"datetime", format_time(inputs.now, "%b %d %Y %H:%M:%S GMT")},
1541
2422
  {"functions", json(inputs.tools.empty() ? "" : inputs.tools.dump(2))},
1542
- });
2423
+ };
2424
+ data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, tools_override, additional_context);
1543
2425
  if (inputs.tools.is_array() && !inputs.tools.empty()) {
1544
2426
  data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1545
2427
  data.grammar = build_grammar([&](const common_grammar_builder & builder) {
@@ -1830,7 +2712,7 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
1830
2712
  // If thinking_forced_open, then we capture the </think> tag in the grammar,
1831
2713
  // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
1832
2714
  std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)" : "(?:<think>[\\s\\S]*?</think>\\s*)?") + (
1833
- "(\\s*"
2715
+ "\\s*("
1834
2716
  "(?:<tool_call>"
1835
2717
  "|<function"
1836
2718
  "|(?:```(?:json|xml)?\n\\s*)?(?:<function_call>|<tools>|<xml><json>|<response>)?"
@@ -2025,15 +2907,28 @@ static common_chat_params common_chat_params_init_granite(const common_chat_temp
2025
2907
 
2026
2908
  static void common_chat_parse_granite(common_chat_msg_parser & builder) {
2027
2909
  // Parse thinking tags
2910
+ static const common_regex start_think_regex(regex_escape("<think>"));
2911
+ static const common_regex end_think_regex(regex_escape("</think>"));
2912
+ // Granite models output partial tokens such as "<" and "<think".
2913
+ // By leveraging try_consume_regex()/try_find_regex() throwing
2914
+ // common_chat_msg_partial_exception for these partial tokens,
2915
+ // processing is interrupted and the tokens are not passed to add_content().
2916
+ if (auto res = builder.try_consume_regex(start_think_regex)) {
2917
+ // Restore position for try_parse_reasoning()
2918
+ builder.move_to(res->groups[0].begin);
2919
+ builder.try_find_regex(end_think_regex, std::string::npos, false);
2920
+ // Restore position for try_parse_reasoning()
2921
+ builder.move_to(res->groups[0].begin);
2922
+ }
2028
2923
  builder.try_parse_reasoning("<think>", "</think>");
2029
2924
 
2030
- // Parse response tags using regex
2031
- static const common_regex response_regex("<response>([\\s\\S]*?)</response>");
2032
- if (auto res = builder.try_find_regex(response_regex)) {
2033
- // Extract the content between the tags (capture group 1)
2034
- auto content = builder.str(res->groups[1]);
2035
- builder.add_content(content);
2036
- builder.move_to(res->groups[0].end);
2925
+ // Parse response tags
2926
+ static const common_regex start_response_regex(regex_escape("<response>"));
2927
+ static const common_regex end_response_regex(regex_escape("</response>"));
2928
+ // Granite models output partial tokens such as "<" and "<response".
2929
+ // Same hack as reasoning parsing.
2930
+ if (builder.try_consume_regex(start_response_regex)) {
2931
+ builder.try_find_regex(end_response_regex);
2037
2932
  }
2038
2933
 
2039
2934
  if (!builder.syntax().parse_tool_calls) {
@@ -2046,108 +2941,154 @@ static void common_chat_parse_granite(common_chat_msg_parser & builder) {
2046
2941
  if (auto res = builder.try_find_regex(tool_call_regex)) {
2047
2942
  builder.move_to(res->groups[0].end);
2048
2943
 
2944
+ // Expect JSON array of tool calls
2945
+ if (auto tool_call = builder.try_consume_json_with_dumped_args({{{"arguments"}}})) {
2946
+ if (!builder.add_tool_calls(tool_call->value) || tool_call->is_partial) {
2947
+ throw common_chat_msg_partial_exception("incomplete tool call");
2948
+ }
2949
+ }
2950
+ } else {
2951
+ builder.add_content(builder.consume_rest());
2952
+ }
2953
+ }
2954
+
2955
+ static void common_chat_parse_nemotron_v2(common_chat_msg_parser & builder) {
2956
+ // Parse thinking tags
2957
+ builder.try_parse_reasoning("<think>", "</think>");
2958
+ if (!builder.syntax().parse_tool_calls) {
2959
+ builder.add_content(builder.consume_rest());
2960
+ return;
2961
+ }
2962
+
2963
+ // Look for tool calls
2964
+ static const common_regex tool_call_regex(regex_escape("<TOOLCALL>"));
2965
+ if (auto res = builder.try_find_regex(tool_call_regex)) {
2966
+ builder.move_to(res->groups[0].end);
2967
+
2049
2968
  // Expect JSON array of tool calls
2050
2969
  auto tool_calls_data = builder.consume_json();
2051
2970
  if (tool_calls_data.json.is_array()) {
2052
- if (!builder.add_tool_calls(tool_calls_data.json)) {
2053
- builder.add_content("<|tool_call|>" + tool_calls_data.json.dump());
2971
+ if (!builder.try_consume_literal("</TOOLCALL>")) {
2972
+ throw common_chat_msg_partial_exception("Incomplete tool call");
2054
2973
  }
2974
+ builder.add_tool_calls(tool_calls_data.json);
2055
2975
  } else {
2056
- builder.add_content("<|tool_call|>" + tool_calls_data.json.dump());
2976
+ throw common_chat_msg_partial_exception("Incomplete tool call");
2057
2977
  }
2058
- } else {
2978
+ }
2979
+ builder.add_content(builder.consume_rest());
2980
+ }
2981
+
2982
+ static void common_chat_parse_apertus(common_chat_msg_parser & builder) {
2983
+ // Parse thinking tags
2984
+ builder.try_parse_reasoning("<|inner_prefix|>", "<|inner_suffix|>");
2985
+ if (!builder.syntax().parse_tool_calls) {
2059
2986
  builder.add_content(builder.consume_rest());
2987
+ return;
2060
2988
  }
2989
+
2990
+ // Look for tool calls
2991
+ static const common_regex tool_call_regex(regex_escape("<|tools_prefix|>"));
2992
+ if (auto res = builder.try_find_regex(tool_call_regex)) {
2993
+ builder.move_to(res->groups[0].end);
2994
+
2995
+ auto tool_calls_data = builder.consume_json();
2996
+ if (tool_calls_data.json.is_array()) {
2997
+ builder.consume_spaces();
2998
+ if (!builder.try_consume_literal("<|tools_suffix|>")) {
2999
+ throw common_chat_msg_partial_exception("Incomplete tool call");
3000
+ }
3001
+ for (const auto & value : tool_calls_data.json) {
3002
+ if (value.is_object()) {
3003
+ builder.add_tool_call_short_form(value);
3004
+ }
3005
+ }
3006
+ } else {
3007
+ throw common_chat_msg_partial_exception("Incomplete tool call");
3008
+ }
3009
+ }
3010
+ builder.add_content(builder.consume_rest());
2061
3011
  }
2062
3012
 
2063
- static void common_chat_parse_seed_oss(common_chat_msg_parser & builder) {
2064
- // Parse thinking tags first - this handles the main reasoning content
2065
- builder.try_parse_reasoning("<seed:think>", "</seed:think>");
2066
3013
 
3014
+ static void common_chat_parse_lfm2(common_chat_msg_parser & builder) {
2067
3015
  if (!builder.syntax().parse_tool_calls) {
2068
3016
  builder.add_content(builder.consume_rest());
2069
3017
  return;
2070
3018
  }
2071
3019
 
2072
- // Parse tool calls - Seed-OSS uses <seed:tool_call> format
2073
- static const common_regex tool_call_begin_regex("<seed:tool_call>");
2074
- static const common_regex tool_call_end_regex("</seed:tool_call>");
2075
- static const common_regex function_regex("<function=([^>]+)>");
2076
- static const common_regex param_regex("<parameter=([^>]+)>");
3020
+ // LFM2 format: <|tool_call_start|>[{"name": "get_current_time", "arguments": {"location": "Paris"}}]<|tool_call_end|>
3021
+ static const common_regex tool_call_start_regex(regex_escape("<|tool_call_start|>"));
3022
+ static const common_regex tool_call_end_regex(regex_escape("<|tool_call_end|>"));
2077
3023
 
2078
- while (auto tool_res = builder.try_find_regex(tool_call_begin_regex)) {
2079
- builder.consume_spaces(); // Consume whitespace after <seed:tool_call>
3024
+ // Loop through all tool calls
3025
+ while (auto res = builder.try_find_regex(tool_call_start_regex, std::string::npos, /* add_prelude_to_content= */ true)) {
3026
+ builder.move_to(res->groups[0].end);
2080
3027
 
2081
- // Look for function call inside tool call, ignore any content before it
2082
- if (auto func_res = builder.try_find_regex(function_regex, std::string::npos, false)) {
2083
- auto function_name = builder.str(func_res->groups[1]);
3028
+ // Parse JSON array format: [{"name": "...", "arguments": {...}}]
3029
+ auto tool_calls_data = builder.consume_json();
2084
3030
 
2085
- // Parse Seed-OSS parameters <parameter=name>value</parameter>
2086
- json args = json::object();
2087
- // Parse all parameters
2088
- while (auto param_res = builder.try_find_regex(param_regex, std::string::npos, false)) {
2089
- // again, ignore noise around parameters
2090
- auto param_name = builder.str(param_res->groups[1]);
2091
- builder.move_to(param_res->groups[0].end);
2092
- builder.consume_spaces(); // Consume whitespace after parameter
2093
- auto savedPos = builder.pos();
2094
- if (auto param_parse = builder.try_find_literal("</parameter>")) {
2095
- auto param = param_parse->prelude;
2096
- builder.move_to(savedPos);
2097
- try {
2098
- if (auto param_res = builder.try_consume_json()) {
2099
- args[param_name] = param_res->json;
2100
- } else {
2101
- args[param_name] = param;
2102
- }
2103
- } catch (json::exception &) {
2104
- args[param_name] = param;
2105
- }
2106
- } else {
2107
- throw common_chat_msg_partial_exception("Incomplete tool parameter");
3031
+ // Consume end marker
3032
+ builder.consume_spaces();
3033
+ if (!builder.try_consume_regex(tool_call_end_regex)) {
3034
+ throw common_chat_msg_partial_exception("Expected <|tool_call_end|>");
3035
+ }
3036
+
3037
+ // Process each tool call in the array
3038
+ if (tool_calls_data.json.is_array()) {
3039
+ for (const auto & tool_call : tool_calls_data.json) {
3040
+ if (!tool_call.is_object()) {
3041
+ throw common_chat_msg_partial_exception("Tool call must be an object");
2108
3042
  }
2109
- }
2110
- // Look for closing function tag
2111
- auto end_func = builder.try_find_literal("</function>");
2112
- if (end_func) {
2113
- builder.move_to(end_func->groups[0].end);
2114
- builder.consume_spaces(); // Consume whitespace after </function>
2115
-
2116
- // Add the tool call with parsed arguments, but only if we REALLY got the literal
2117
- auto eaten_fragment = builder.input().substr(end_func->groups[0].begin, end_func->groups[0].end);
2118
- auto funlen = std::string("</function>").length();
2119
- if (eaten_fragment.length() >= funlen && eaten_fragment.substr(0, funlen) == std::string("</function>")) {
2120
- if (!builder.add_tool_call(function_name, "", args.dump())) {
2121
- throw common_chat_msg_partial_exception("Incomplete tool call");
3043
+
3044
+ if (!tool_call.contains("name")) {
3045
+ throw common_chat_msg_partial_exception("Tool call missing 'name' field");
3046
+ }
3047
+
3048
+ std::string function_name = tool_call.at("name");
3049
+ std::string arguments = "{}";
3050
+
3051
+ if (tool_call.contains("arguments")) {
3052
+ if (tool_call.at("arguments").is_object()) {
3053
+ arguments = tool_call.at("arguments").dump();
3054
+ } else if (tool_call.at("arguments").is_string()) {
3055
+ arguments = tool_call.at("arguments");
2122
3056
  }
2123
- } else {
3057
+ }
3058
+
3059
+ if (!builder.add_tool_call(function_name, "", arguments)) {
2124
3060
  throw common_chat_msg_partial_exception("Incomplete tool call");
2125
3061
  }
2126
- } else {
2127
- throw common_chat_msg_partial_exception("Incomplete tool call");
2128
- }
2129
- // Look for closing tool call tag
2130
- if (auto end_tool = builder.try_find_regex(tool_call_end_regex, std::string::npos, false)) {
2131
- builder.move_to(end_tool->groups[0].end);
2132
- builder.consume_spaces(); // Consume trailing whitespace after tool call
2133
- } else {
2134
- throw common_chat_msg_partial_exception("Incomplete tool call");
2135
3062
  }
2136
3063
  } else {
2137
- // No function found - don't consume content here, let it be handled at the end
2138
- break;
3064
+ throw common_chat_msg_partial_exception("Expected JSON array for tool calls");
2139
3065
  }
3066
+
3067
+ // Consume any trailing whitespace after this tool call
3068
+ builder.consume_spaces();
2140
3069
  }
2141
3070
 
2142
- // Consume any remaining whitespace after all tool call processing
2143
- builder.consume_spaces();
3071
+ // Consume any remaining content after all tool calls
2144
3072
  auto remaining = builder.consume_rest();
2145
- // If there's any non-whitespace content remaining, add it as content
2146
3073
  if (!string_strip(remaining).empty()) {
2147
3074
  builder.add_content(remaining);
2148
3075
  }
2149
3076
  }
2150
3077
 
3078
+ static void common_chat_parse_seed_oss(common_chat_msg_parser & builder) {
3079
+ static const xml_tool_call_format form {
3080
+ /* form.scope_start = */ "<seed:tool_call>",
3081
+ /* form.tool_start = */ "<function=",
3082
+ /* form.tool_sep = */ ">",
3083
+ /* form.key_start = */ "<parameter=",
3084
+ /* form.key_val_sep = */ ">",
3085
+ /* form.val_end = */ "</parameter>",
3086
+ /* form.tool_end = */ "</function>",
3087
+ /* form.scope_end = */ "</seed:tool_call>",
3088
+ };
3089
+ builder.consume_reasoning_with_xml_tool_calls(form, "<seed:think>", "</seed:think>");
3090
+ }
3091
+
2151
3092
  static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) {
2152
3093
  common_chat_params data;
2153
3094
  data.prompt = apply(tmpl, inputs);
@@ -2263,6 +3204,12 @@ static common_chat_params common_chat_templates_apply_jinja(
2263
3204
  }
2264
3205
  }
2265
3206
 
3207
+ // DeepSeek V3.1: detect based on specific patterns in the template
3208
+ if (src.find("message['prefix'] is defined and message['prefix'] and thinking") != std::string::npos &&
3209
+ params.json_schema.is_null()) {
3210
+ return common_chat_params_init_deepseek_v3_1(tmpl, params);
3211
+ }
3212
+
2266
3213
  // DeepSeek R1: use handler in all cases except json schema (thinking / tools).
2267
3214
  if (src.find("<|tool▁calls▁begin|>") != std::string::npos && params.json_schema.is_null()) {
2268
3215
  return common_chat_params_init_deepseek_r1(tmpl, params);
@@ -2278,6 +3225,35 @@ static common_chat_params common_chat_templates_apply_jinja(
2278
3225
  return common_chat_params_init_granite(tmpl, params);
2279
3226
  }
2280
3227
 
3228
+ // GLM 4.5: detect by <arg_key> and <arg_value> tags (check before Hermes since both use <tool_call>)
3229
+ if (src.find("[gMASK]<sop>") != std::string::npos &&
3230
+ src.find("<arg_key>") != std::string::npos &&
3231
+ src.find("<arg_value>") != std::string::npos &&
3232
+ params.json_schema.is_null()) {
3233
+ return common_chat_params_init_glm_4_5(tmpl, params);
3234
+ }
3235
+
3236
+ // Qwen3-Coder XML format detection (must come before Hermes 2 Pro)
3237
+ // Detect via explicit XML markers unique to Qwen3-Coder to avoid false positives in other templates.
3238
+ // Require presence of <tool_call>, <function=...>, and <parameter=...> blocks.
3239
+ if (src.find("<tool_call>") != std::string::npos &&
3240
+ src.find("<function>") != std::string::npos &&
3241
+ src.find("<function=") != std::string::npos &&
3242
+ src.find("<parameters>") != std::string::npos &&
3243
+ src.find("<parameter=") != std::string::npos) {
3244
+ return common_chat_params_init_qwen3_coder_xml(tmpl, params);
3245
+ }
3246
+
3247
+ // Xiaomi MiMo format detection (must come before Hermes 2 Pro)
3248
+ if (src.find("<tools>") != std::string::npos &&
3249
+ src.find("# Tools") != std::string::npos &&
3250
+ src.find("</tools>") != std::string::npos &&
3251
+ src.find("<tool_calls>") != std::string::npos &&
3252
+ src.find("</tool_calls>") != std::string::npos &&
3253
+ src.find("<tool_response>") != std::string::npos) {
3254
+ return common_chat_params_init_xiaomi_mimo(tmpl, params);
3255
+ }
3256
+
2281
3257
  // Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
2282
3258
  if (src.find("<tool_call>") != std::string::npos && params.json_schema.is_null()) {
2283
3259
  return common_chat_params_init_hermes_2_pro(tmpl, params);
@@ -2293,6 +3269,45 @@ static common_chat_params common_chat_templates_apply_jinja(
2293
3269
  return common_chat_params_init_seed_oss(tmpl, params, inputs);
2294
3270
  }
2295
3271
 
3272
+ // Nemotron v2
3273
+ if (src.find("<SPECIAL_10>") != std::string::npos) {
3274
+ return common_chat_params_init_nemotron_v2(tmpl, params);
3275
+ }
3276
+
3277
+ // Apertus format detection
3278
+ if (src.find("<|system_start|>") != std::string::npos && src.find("<|tools_prefix|>") != std::string::npos) {
3279
+ return common_chat_params_init_apertus(tmpl, params);
3280
+ }
3281
+
3282
+ // LFM2 (w/ tools)
3283
+ if (src.find("List of tools: <|tool_list_start|>[") != std::string::npos &&
3284
+ src.find("]<|tool_list_end|>") != std::string::npos) {
3285
+ return common_chat_params_init_lfm2(tmpl, params);
3286
+ }
3287
+
3288
+ // MiniMax-M2 format detection
3289
+ if (src.find("]~!b[") != std::string::npos && src.find("]~b]") != std::string::npos) {
3290
+ return common_chat_params_init_minimax_m2(tmpl, params);
3291
+ }
3292
+
3293
+ // Kimi K2 format detection
3294
+ if (src.find("<|im_system|>tool_declare<|im_middle|>") != std::string::npos &&
3295
+ src.find("<|tool_calls_section_begin|>") != std::string::npos &&
3296
+ src.find("## Return of") != std::string::npos) {
3297
+ return common_chat_params_init_kimi_k2(tmpl, params);
3298
+ }
3299
+
3300
+ // Apriel 1.5 format detection
3301
+ if (src.find("<thinking>") != std::string::npos &&
3302
+ src.find("</thinking>") != std::string::npos &&
3303
+ src.find("<available_tools>") != std::string::npos &&
3304
+ src.find("<|assistant|>") != std::string::npos &&
3305
+ src.find("<|tool_result|>") != std::string::npos &&
3306
+ src.find("<tool_calls>[") != std::string::npos &&
3307
+ src.find("]</tool_calls>") != std::string::npos) {
3308
+ return common_chat_params_init_apriel_1_5(tmpl, params);
3309
+ }
3310
+
2296
3311
  // Use generic handler when mixing tools + JSON schema.
2297
3312
  // TODO: support that mix in handlers below.
2298
3313
  if ((params.tools.is_array() && params.json_schema.is_object())) {
@@ -2321,6 +3336,10 @@ static common_chat_params common_chat_templates_apply_jinja(
2321
3336
  return common_chat_params_init_llama_3_x(tmpl, params, allow_python_tag_builtin_tools);
2322
3337
  }
2323
3338
 
3339
+ if (src.find("[THINK]") != std::string::npos && src.find("[/THINK]") != std::string::npos) {
3340
+ return common_chat_params_init_magistral(tmpl, params);
3341
+ }
3342
+
2324
3343
  // Plain handler (no tools)
2325
3344
  if (params.tools.is_null() || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
2326
3345
  return common_chat_params_init_without_tools(tmpl, params);
@@ -2340,7 +3359,7 @@ static common_chat_params common_chat_templates_apply_legacy(
2340
3359
  const struct common_chat_templates * tmpls,
2341
3360
  const struct common_chat_templates_inputs & inputs)
2342
3361
  {
2343
- int alloc_size = 0;
3362
+ size_t alloc_size = 0;
2344
3363
  std::vector<llama_chat_message> chat;
2345
3364
  std::vector<std::string> contents;
2346
3365
 
@@ -2362,7 +3381,8 @@ static common_chat_params common_chat_templates_apply_legacy(
2362
3381
  const auto & msg = inputs.messages[i];
2363
3382
  const auto & content = contents[i];
2364
3383
  chat.push_back({msg.role.c_str(), content.c_str()});
2365
- alloc_size += (msg.role.size() + content.size()) * 1.25;
3384
+ size_t msg_size = msg.role.size() + content.size();
3385
+ alloc_size += msg_size + (msg_size / 4); // == msg_size * 1.25 but avoiding float ops
2366
3386
  }
2367
3387
 
2368
3388
  std::vector<char> buf(alloc_size);
@@ -2384,6 +3404,11 @@ static common_chat_params common_chat_templates_apply_legacy(
2384
3404
  res = llama_chat_apply_template(src.c_str(), chat.data(), chat.size(), inputs.add_generation_prompt, buf.data(), buf.size());
2385
3405
  }
2386
3406
 
3407
+ // for safety, we check the result again
3408
+ if (res < 0 || (size_t) res > buf.size()) {
3409
+ throw std::runtime_error("failed to apply chat template, try using --jinja");
3410
+ }
3411
+
2387
3412
  common_chat_params params;
2388
3413
  params.prompt = std::string(buf.data(), res);
2389
3414
  if (!inputs.json_schema.empty()) {
@@ -2405,6 +3430,7 @@ common_chat_params common_chat_templates_apply(
2405
3430
  }
2406
3431
 
2407
3432
  static void common_chat_parse_content_only(common_chat_msg_parser & builder) {
3433
+ builder.try_parse_reasoning("<think>", "</think>");
2408
3434
  builder.add_content(builder.consume_rest());
2409
3435
  }
2410
3436
 
@@ -2421,6 +3447,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
2421
3447
  case COMMON_CHAT_FORMAT_MISTRAL_NEMO:
2422
3448
  common_chat_parse_mistral_nemo(builder);
2423
3449
  break;
3450
+ case COMMON_CHAT_FORMAT_MAGISTRAL:
3451
+ common_chat_parse_magistral(builder);
3452
+ break;
2424
3453
  case COMMON_CHAT_FORMAT_LLAMA_3_X:
2425
3454
  common_chat_parse_llama_3_1(builder);
2426
3455
  break;
@@ -2430,6 +3459,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
2430
3459
  case COMMON_CHAT_FORMAT_DEEPSEEK_R1:
2431
3460
  common_chat_parse_deepseek_r1(builder);
2432
3461
  break;
3462
+ case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1:
3463
+ common_chat_parse_deepseek_v3_1(builder);
3464
+ break;
2433
3465
  case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2:
2434
3466
  common_chat_parse_functionary_v3_2(builder);
2435
3467
  break;
@@ -2454,6 +3486,33 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
2454
3486
  case COMMON_CHAT_FORMAT_SEED_OSS:
2455
3487
  common_chat_parse_seed_oss(builder);
2456
3488
  break;
3489
+ case COMMON_CHAT_FORMAT_NEMOTRON_V2:
3490
+ common_chat_parse_nemotron_v2(builder);
3491
+ break;
3492
+ case COMMON_CHAT_FORMAT_APERTUS:
3493
+ common_chat_parse_apertus(builder);
3494
+ break;
3495
+ case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS:
3496
+ common_chat_parse_lfm2(builder);
3497
+ break;
3498
+ case COMMON_CHAT_FORMAT_MINIMAX_M2:
3499
+ common_chat_parse_minimax_m2(builder);
3500
+ break;
3501
+ case COMMON_CHAT_FORMAT_GLM_4_5:
3502
+ common_chat_parse_glm_4_5(builder);
3503
+ break;
3504
+ case COMMON_CHAT_FORMAT_KIMI_K2:
3505
+ common_chat_parse_kimi_k2(builder);
3506
+ break;
3507
+ case COMMON_CHAT_FORMAT_QWEN3_CODER_XML:
3508
+ common_chat_parse_qwen3_coder_xml(builder);
3509
+ break;
3510
+ case COMMON_CHAT_FORMAT_APRIEL_1_5:
3511
+ common_chat_parse_apriel_1_5(builder);
3512
+ break;
3513
+ case COMMON_CHAT_FORMAT_XIAOMI_MIMO:
3514
+ common_chat_parse_xiaomi_mimo(builder);
3515
+ break;
2457
3516
  default:
2458
3517
  throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
2459
3518
  }