@novastera-oss/llamarn 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. package/README.md +80 -14
  2. package/RNLlamaCpp.podspec +10 -3
  3. package/android/CMakeLists.txt +8 -0
  4. package/android/src/main/cpp/include/llama.h +62 -125
  5. package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
  9. package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
  10. package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
  11. package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
  12. package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
  13. package/cpp/build-info.cpp +2 -2
  14. package/cpp/llama.cpp/README.md +11 -3
  15. package/cpp/llama.cpp/build-xcframework.sh +1 -0
  16. package/cpp/llama.cpp/common/CMakeLists.txt +8 -2
  17. package/cpp/llama.cpp/common/arg.cpp +153 -113
  18. package/cpp/llama.cpp/common/chat-parser.cpp +379 -0
  19. package/cpp/llama.cpp/common/chat-parser.h +117 -0
  20. package/cpp/llama.cpp/common/chat.cpp +847 -699
  21. package/cpp/llama.cpp/common/chat.h +73 -6
  22. package/cpp/llama.cpp/common/common.cpp +50 -82
  23. package/cpp/llama.cpp/common/common.h +21 -17
  24. package/cpp/llama.cpp/common/json-partial.cpp +255 -0
  25. package/cpp/llama.cpp/common/json-partial.h +37 -0
  26. package/cpp/llama.cpp/common/minja/chat-template.hpp +9 -5
  27. package/cpp/llama.cpp/common/minja/minja.hpp +69 -36
  28. package/cpp/llama.cpp/common/regex-partial.cpp +204 -0
  29. package/cpp/llama.cpp/common/regex-partial.h +56 -0
  30. package/cpp/llama.cpp/common/sampling.cpp +7 -8
  31. package/cpp/llama.cpp/convert_hf_to_gguf.py +453 -118
  32. package/cpp/llama.cpp/convert_hf_to_gguf_update.py +120 -68
  33. package/cpp/llama.cpp/ggml/CMakeLists.txt +2 -1
  34. package/cpp/llama.cpp/ggml/cmake/common.cmake +25 -0
  35. package/cpp/llama.cpp/ggml/include/ggml-opt.h +49 -28
  36. package/cpp/llama.cpp/ggml/include/ggml.h +26 -7
  37. package/cpp/llama.cpp/ggml/src/CMakeLists.txt +16 -10
  38. package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +4 -1
  39. package/cpp/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +1 -0
  40. package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +2 -0
  41. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +604 -0
  42. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +42 -0
  43. package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +54 -2
  44. package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +50 -51
  45. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -2
  46. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +5 -9
  47. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +779 -19
  48. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +22 -0
  49. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +88 -5
  50. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -12
  51. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +264 -69
  52. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +322 -100
  53. package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +117 -1
  54. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +85 -16
  55. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +220 -49
  56. package/cpp/llama.cpp/ggml/src/ggml-cuda/acc.cu +40 -26
  57. package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +1 -1
  58. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +11 -1
  59. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +15 -7
  60. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +266 -64
  61. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +49 -4
  62. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +48 -4
  63. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +2 -1
  64. package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +5 -1
  65. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +2 -0
  66. package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cu +7 -6
  67. package/cpp/llama.cpp/ggml/src/ggml-cuda/sum.cu +1 -1
  68. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +10 -0
  69. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +2 -0
  70. package/cpp/llama.cpp/ggml/src/ggml-impl.h +1 -1
  71. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +4 -0
  72. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +99 -17
  73. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +200 -2
  74. package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
  75. package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
  76. package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
  77. package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +6 -0
  78. package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +972 -178
  79. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
  80. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/div.cl +72 -0
  81. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +72 -0
  82. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
  83. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sub.cl +72 -0
  84. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
  85. package/cpp/llama.cpp/ggml/src/ggml-opt.cpp +373 -190
  86. package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +29 -23
  87. package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -10
  88. package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +101 -5
  89. package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +31 -33
  90. package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +1 -0
  91. package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +29 -2
  92. package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +4 -5
  93. package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
  94. package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +9 -1
  95. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +84 -72
  96. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +2 -0
  97. package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
  98. package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +1 -3
  99. package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +324 -129
  100. package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +1 -0
  101. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +31 -2
  102. package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +95 -68
  103. package/cpp/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +1 -0
  104. package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +22 -0
  105. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -2
  106. package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +1 -4
  107. package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +2 -3
  108. package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +69 -43
  109. package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +2 -14
  110. package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +81 -91
  111. package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +432 -181
  112. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +17 -0
  113. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
  114. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +6 -152
  115. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +162 -0
  116. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +360 -0
  117. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +2 -118
  118. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +1 -1
  119. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +12 -1
  120. package/cpp/llama.cpp/ggml/src/ggml.c +107 -36
  121. package/cpp/llama.cpp/ggml/src/gguf.cpp +33 -33
  122. package/cpp/llama.cpp/gguf-py/gguf/constants.py +100 -15
  123. package/cpp/llama.cpp/gguf-py/gguf/gguf_reader.py +1 -1
  124. package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +44 -12
  125. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_editor_gui.py +21 -10
  126. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_new_metadata.py +5 -2
  127. package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +128 -31
  128. package/cpp/llama.cpp/gguf-py/gguf/utility.py +1 -1
  129. package/cpp/llama.cpp/gguf-py/pyproject.toml +1 -1
  130. package/cpp/llama.cpp/include/llama.h +62 -125
  131. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +1 -1
  132. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +1 -1
  133. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.inp +1 -1
  134. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.out +1 -1
  135. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +1 -1
  136. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +1 -1
  137. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +1 -1
  138. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +1 -1
  139. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.inp +1 -1
  140. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.out +1 -1
  141. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +1 -1
  142. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +1 -1
  143. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +1 -1
  144. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +1 -1
  145. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +1 -1
  146. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +1 -1
  147. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.inp +1 -1
  148. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.out +1 -1
  149. package/cpp/llama.cpp/models/ggml-vocab-nomic-bert-moe.gguf +0 -0
  150. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +1 -1
  151. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.out +1 -1
  152. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +1 -1
  153. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.out +1 -1
  154. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.inp +1 -1
  155. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.out +1 -1
  156. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +1 -1
  157. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.out +1 -1
  158. package/cpp/llama.cpp/models/templates/Qwen-QwQ-32B.jinja +62 -0
  159. package/cpp/llama.cpp/models/templates/Qwen-Qwen3-0.6B.jinja +85 -0
  160. package/cpp/llama.cpp/models/templates/README.md +2 -0
  161. package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
  162. package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
  163. package/cpp/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
  164. package/cpp/llama.cpp/requirements/requirements-gguf_editor_gui.txt +1 -1
  165. package/cpp/llama.cpp/src/CMakeLists.txt +2 -0
  166. package/cpp/llama.cpp/src/llama-arch.cpp +6 -0
  167. package/cpp/llama.cpp/src/llama-arch.h +2 -0
  168. package/cpp/llama.cpp/src/llama-batch.cpp +3 -1
  169. package/cpp/llama.cpp/src/llama-context.cpp +340 -123
  170. package/cpp/llama.cpp/src/llama-context.h +30 -0
  171. package/cpp/llama.cpp/src/llama-cparams.cpp +4 -0
  172. package/cpp/llama.cpp/src/llama-cparams.h +2 -0
  173. package/cpp/llama.cpp/src/llama-grammar.cpp +12 -2
  174. package/cpp/llama.cpp/src/llama-graph.cpp +157 -247
  175. package/cpp/llama.cpp/src/llama-graph.h +52 -7
  176. package/cpp/llama.cpp/src/llama-hparams.cpp +17 -1
  177. package/cpp/llama.cpp/src/llama-hparams.h +37 -5
  178. package/cpp/llama.cpp/src/llama-kv-cache.cpp +742 -481
  179. package/cpp/llama.cpp/src/llama-kv-cache.h +196 -99
  180. package/cpp/llama.cpp/src/llama-kv-cells.h +379 -0
  181. package/cpp/llama.cpp/src/llama-memory.h +4 -3
  182. package/cpp/llama.cpp/src/llama-model-loader.cpp +22 -17
  183. package/cpp/llama.cpp/src/llama-model-saver.cpp +281 -0
  184. package/cpp/llama.cpp/src/llama-model-saver.h +37 -0
  185. package/cpp/llama.cpp/src/llama-model.cpp +529 -172
  186. package/cpp/llama.cpp/src/llama-model.h +6 -1
  187. package/cpp/llama.cpp/src/llama-quant.cpp +15 -13
  188. package/cpp/llama.cpp/src/llama-sampling.cpp +2 -2
  189. package/cpp/llama.cpp/src/llama-vocab.cpp +35 -8
  190. package/cpp/llama.cpp/src/llama-vocab.h +6 -0
  191. package/cpp/llama.cpp/src/llama.cpp +14 -0
  192. package/cpp/rn-completion.cpp +4 -2
  193. package/ios/include/chat.h +73 -6
  194. package/ios/include/common/minja/chat-template.hpp +9 -5
  195. package/ios/include/common/minja/minja.hpp +69 -36
  196. package/ios/include/common.h +21 -17
  197. package/ios/include/llama.h +62 -125
  198. package/ios/libs/llama.xcframework/Info.plist +19 -19
  199. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  200. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4617 -4487
  201. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-opt.h +237 -0
  202. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +26 -7
  203. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +62 -125
  204. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
  205. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  206. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
  207. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3557 -3435
  208. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
  209. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
  210. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
  211. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
  212. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  213. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
  214. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3559 -3437
  215. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-opt.h +237 -0
  216. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +26 -7
  217. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +62 -125
  218. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-opt.h +237 -0
  219. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +26 -7
  220. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +62 -125
  221. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
  222. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-opt.h +237 -0
  223. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +26 -7
  224. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +62 -125
  225. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
  226. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
  227. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  228. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4616 -4487
  229. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-opt.h +237 -0
  230. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +26 -7
  231. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +62 -125
  232. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
  233. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  234. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4637 -4508
  235. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3556 -3435
  236. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
  237. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
  238. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
  239. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
  240. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  241. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4653 -4523
  242. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-opt.h +237 -0
  243. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +26 -7
  244. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +62 -125
  245. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
  246. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  247. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4674 -4544
  248. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3587 -3465
  249. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
  250. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
  251. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
  252. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
  253. package/package.json +1 -1
  254. package/cpp/llama.cpp/common/stb_image.h +0 -7988
  255. package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
  256. package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
  257. package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
  258. package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
  259. package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
  260. package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
  261. package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
  262. package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
  263. package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
  264. package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
  265. package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
  266. package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
@@ -1,10 +1,125 @@
1
1
  #include "chat.h"
2
+ #include "chat-parser.h"
3
+ #include "common.h"
2
4
  #include "json-schema-to-grammar.h"
3
5
  #include "log.h"
6
+ #include "json-partial.h"
4
7
  #include "minja/chat-template.hpp"
5
8
  #include "minja/minja.hpp"
9
+ #include "regex-partial.h"
6
10
 
11
+ #include <cstdio>
12
+ #include <exception>
13
+ #include <iostream>
7
14
  #include <optional>
15
+ #include <stdexcept>
16
+ #include <string>
17
+ #include <vector>
18
+
19
+
20
+ static std::string format_time(const std::chrono::system_clock::time_point & now, const std::string & format) {
21
+ auto time = std::chrono::system_clock::to_time_t(now);
22
+ auto local_time = *std::localtime(&time);
23
+ std::ostringstream ss;
24
+ ss << std::put_time(&local_time, format.c_str());
25
+ auto res = ss.str();
26
+ return res;
27
+ }
28
+
29
+ static std::string string_diff(const std::string & last, const std::string & current) {
30
+ if (last.empty()) {
31
+ return current;
32
+ }
33
+ if (!string_starts_with(current, last)) {
34
+ if (string_starts_with(last, current)) {
35
+ // This happens if the last generation ended on a partial stop word (not erased),
36
+ // and the current ended on a stop word (erased).
37
+ return "";
38
+ }
39
+ throw std::runtime_error("Invalid diff: '" + last + "' not found at start of '" + current + "'");
40
+ }
41
+ return current.substr(last.size());
42
+ }
43
+
44
+ static bool has_content_or_tool_calls(const common_chat_msg & msg) {
45
+ return !msg.content.empty() || !msg.tool_calls.empty();
46
+ }
47
+
48
+ template <>
49
+ json common_chat_msg::to_json_oaicompat() const
50
+ {
51
+ json message {
52
+ {"role", "assistant"},
53
+ };
54
+ if (!reasoning_content.empty()) {
55
+ message["reasoning_content"] = reasoning_content;
56
+ }
57
+ if (content.empty() && !tool_calls.empty()) {
58
+ message["content"] = json();
59
+ } else {
60
+ message["content"] = content;
61
+ }
62
+ if (!tool_calls.empty()) {
63
+ auto arr = json::array();
64
+ for (const auto & tc : tool_calls) {
65
+ arr.push_back({
66
+ {"type", "function"},
67
+ {"function", {
68
+ {"name", tc.name},
69
+ {"arguments", tc.arguments},
70
+ }},
71
+ {"id", tc.id},
72
+ // // Some templates generate and require an id (sometimes in a very specific format, e.g. Mistral Nemo).
73
+ // // We only generate a random id for the ones that don't generate one by themselves
74
+ // // (they also won't get to see it as their template likely doesn't use it, so it's all for the client)
75
+ // {"id", tc.id.empty() ? gen_tool_call_id() : tc.id},
76
+ });
77
+ }
78
+ message["tool_calls"] = arr;
79
+ }
80
+ return message;
81
+ }
82
+
83
+ std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const common_chat_msg & previous_msg, const common_chat_msg & new_msg) {
84
+ std::vector<common_chat_msg_diff> diffs;
85
+ // if (previous_msg.reasoning_content != current.reasoning_content) {
86
+ // auto & diff = diffs.emplace_back();
87
+ // diff.reasoning_content_delta = string_diff(previous_msg.reasoning_content, current.reasoning_content);
88
+ // }
89
+ if (previous_msg.content != new_msg.content) {
90
+ auto & diff = diffs.emplace_back();
91
+ diff.content_delta = string_diff(previous_msg.content, new_msg.content);
92
+ }
93
+
94
+ if (new_msg.tool_calls.size() < previous_msg.tool_calls.size()) {
95
+ throw std::runtime_error("Invalid diff: now finding less tool calls!");
96
+ }
97
+
98
+ if (!previous_msg.tool_calls.empty()) {
99
+ auto idx = previous_msg.tool_calls.size() - 1;
100
+ const auto & pref = previous_msg.tool_calls[idx];
101
+ const auto & newf = new_msg.tool_calls[idx];
102
+ if (pref.name != newf.name) {
103
+ throw std::runtime_error("Invalid diff: tool call mismatch!");
104
+ }
105
+ auto args_diff = string_diff(pref.arguments, newf.arguments);
106
+ if (!args_diff.empty() || pref.id != newf.id) {
107
+ auto & diff = diffs.emplace_back();
108
+ diff.tool_call_index = idx;
109
+ if (pref.id != newf.id) {
110
+ diff.tool_call_delta.id = newf.id;
111
+ diff.tool_call_delta.name = newf.name;
112
+ }
113
+ diff.tool_call_delta.arguments = args_diff;
114
+ }
115
+ }
116
+ for (size_t idx = previous_msg.tool_calls.size(); idx < new_msg.tool_calls.size(); ++idx) {
117
+ auto & diff = diffs.emplace_back();
118
+ diff.tool_call_index = idx;
119
+ diff.tool_call_delta = new_msg.tool_calls[idx];
120
+ }
121
+ return diffs;
122
+ }
8
123
 
9
124
  typedef minja::chat_template common_chat_template;
10
125
 
@@ -23,7 +138,8 @@ struct templates_params {
23
138
  bool stream;
24
139
  std::string grammar;
25
140
  bool add_generation_prompt = true;
26
- bool extract_reasoning = true;
141
+ bool enable_thinking = true;
142
+ std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
27
143
  };
28
144
 
29
145
  common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice) {
@@ -267,6 +383,32 @@ json common_chat_tools_to_json_oaicompat(const std::vector<common_chat_tool> & t
267
383
  return result;
268
384
  }
269
385
 
386
+ template <> json common_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff) {
387
+ json delta = json::object();
388
+ // if (!diff.reasoning_content_delta.empty()) {
389
+ // delta["reasoning_content"] = msg.reasoning_content;
390
+ // }
391
+ if (!diff.content_delta.empty()) {
392
+ delta["content"] = diff.content_delta;
393
+ }
394
+ if (diff.tool_call_index != std::string::npos) {
395
+ json tool_call;
396
+ tool_call["index"] = diff.tool_call_index;
397
+ if (!diff.tool_call_delta.id.empty()) {
398
+ tool_call["id"] = diff.tool_call_delta.id;
399
+ tool_call["type"] = "function";
400
+ }
401
+ json function = json::object();
402
+ if (!diff.tool_call_delta.name.empty()) {
403
+ function["name"] = diff.tool_call_delta.name;
404
+ }
405
+ function["arguments"] = diff.tool_call_delta.arguments;
406
+ tool_call["function"] = function;
407
+ delta["tool_calls"] = json::array({tool_call});
408
+ }
409
+ return delta;
410
+ }
411
+
270
412
  bool common_chat_verify_template(const std::string & tmpl, bool use_jinja) {
271
413
  if (use_jinja) {
272
414
  try {
@@ -434,7 +576,7 @@ common_chat_templates_ptr common_chat_templates_init(
434
576
  return tmpls;
435
577
  }
436
578
 
437
- std::string common_chat_format_name(common_chat_format format) {
579
+ const char * common_chat_format_name(common_chat_format format) {
438
580
  switch (format) {
439
581
  case COMMON_CHAT_FORMAT_CONTENT_ONLY: return "Content-only";
440
582
  case COMMON_CHAT_FORMAT_GENERIC: return "Generic";
@@ -442,182 +584,127 @@ std::string common_chat_format_name(common_chat_format format) {
442
584
  case COMMON_CHAT_FORMAT_LLAMA_3_X: return "Llama 3.x";
443
585
  case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS: return "Llama 3.x with builtin tools";
444
586
  case COMMON_CHAT_FORMAT_DEEPSEEK_R1: return "DeepSeek R1";
445
- case COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING: return "DeepSeek R1 (extract reasoning)";
446
587
  case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: return "FireFunction v2";
447
588
  case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return "Functionary v3.2";
448
589
  case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return "Functionary v3.1 Llama 3.1";
449
590
  case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro";
450
- case COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING: return "Hermes 2 Pro (extract reasoning)";
451
591
  case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B";
452
- case COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING: return "Command R7B (extract reasoning)";
453
592
  default:
454
593
  throw std::runtime_error("Unknown chat format");
455
594
  }
456
595
  }
457
596
 
458
- static bool parse_json(std::string::const_iterator & it, const std::string::const_iterator & end, json & out) {
459
- // // https://json.nlohmann.me/features/parsing/sax_interface/
460
- struct json_error_locator : public nlohmann::json_sax<json> {
461
- std::size_t position;
462
- bool found_error;
463
-
464
- json_error_locator() : position(0), found_error(false) {}
465
-
466
- bool parse_error(std::size_t position, const std::string &, const json::exception &) override { // NOLINT
467
- this->position = position - 1;
468
- this->found_error = true;
469
- return false;
470
- }
471
- bool null() override { return true; } // NOLINT
472
- bool boolean(bool) override { return true; } // NOLINT
473
- bool number_integer(number_integer_t) override { return true; } // NOLINT
474
- bool number_unsigned(number_unsigned_t) override { return true; } // NOLINT
475
- bool number_float(number_float_t, const string_t &) override { return true; } // NOLINT
476
- bool string(string_t &) override { return true; } // NOLINT
477
- bool binary(binary_t &) override { return true; } // NOLINT
478
- bool start_object(std::size_t) override { return true; } // NOLINT
479
- bool key(string_t &) override { return true; } // NOLINT
480
- bool end_object() override { return true; }
481
- bool start_array(std::size_t) override { return true; } // NOLINT
482
- bool end_array() override { return true; }
483
- };
484
- json_error_locator err_loc;
485
- json::sax_parse(it, end, &err_loc);
486
-
487
- std::string::const_iterator temptative_end;
488
- if (err_loc.found_error) {
489
- temptative_end = it + err_loc.position;
490
- } else {
491
- temptative_end = end;
492
- }
493
- std::string json_sub {it, temptative_end};
494
- try {
495
- out = json::parse(json_sub);
496
- it = temptative_end;
497
- return true;
498
- } catch (const std::exception &) {
499
- return false;
500
- }
501
- }
502
-
503
- static bool parse_literal(std::string::const_iterator & it, const std::string::const_iterator & end, const std::string & expected) {
504
- auto expected_it = expected.begin();
505
- auto tmp_it = it;
506
- while (tmp_it != end && expected_it != expected.end() && *tmp_it == *expected_it) {
507
- ++tmp_it;
508
- ++expected_it;
509
- }
510
- if (expected_it == expected.end()) {
511
- it = tmp_it;
512
- return true;
513
- }
514
- return false;
515
- }
516
-
517
- static std::optional<std::smatch> parse_pattern(std::string::const_iterator & it, const std::string::const_iterator & end, const std::regex & expected) {
518
- std::smatch match;
519
- if (std::regex_match(it, end, match, expected)) {
520
- it = match.suffix().first;
521
- return match;
597
+ const char * common_reasoning_format_name(common_reasoning_format format) {
598
+ switch (format) {
599
+ case COMMON_REASONING_FORMAT_NONE: return "none";
600
+ case COMMON_REASONING_FORMAT_DEEPSEEK: return "deepseek";
601
+ default:
602
+ throw std::runtime_error("Unknown reasoning format");
522
603
  }
523
- return std::nullopt;
524
604
  }
525
605
 
526
- static void consume_spaces(std::string::const_iterator & it, const std::string::const_iterator & end) {
527
- while (it != end && std::isspace(*it)) {
528
- ++it;
606
+ static std::string wrap_code_as_arguments(common_chat_msg_parser & builder, const std::string & code) {
607
+ std::string arguments;
608
+ if (builder.is_partial()) {
609
+ arguments = (json {{"code", code + builder.healing_marker()}}).dump();
610
+ auto idx = arguments.find(builder.healing_marker());
611
+ if (idx != std::string::npos) {
612
+ arguments.resize(idx);
613
+ }
614
+ } else {
615
+ arguments = (json {{"code", code}}).dump();
529
616
  }
617
+ return arguments;
530
618
  }
531
619
 
532
620
  /**
533
621
  * Takes a prefix regex that must have 1 group to capture the function name, a closing suffix, and expects json parameters in between.
534
622
  * Aggregates the prefix, suffix and in-between text into the content.
535
623
  */
536
- static common_chat_msg parse_json_tool_calls(
537
- const std::string& input,
538
- const std::optional<std::regex> & trigger_opt,
539
- const std::regex & function_regex,
540
- const std::regex & close_regex,
541
- bool allow_raw_python = false) {
542
- std::smatch match;
543
-
544
- common_chat_msg result;
545
- result.role = "assistant";
546
-
547
-
548
- auto end = input.end();
549
- auto it = input.begin();
550
-
551
- if (trigger_opt) {
552
- if (!std::regex_search(it, end, match, *trigger_opt)) {
553
- result.content = input;
554
- return result;
555
- }
556
- result.content = match.prefix().str();
557
- it = match.suffix().first;
558
- }
624
+ static void parse_json_tool_calls(
625
+ common_chat_msg_parser & builder,
626
+ const std::optional<common_regex> & block_open,
627
+ const std::optional<common_regex> & function_regex_start_only,
628
+ const std::optional<common_regex> & function_regex,
629
+ const common_regex & close_regex,
630
+ const std::optional<common_regex> & block_close,
631
+ bool allow_raw_python = false,
632
+ const std::function<std::string(const common_chat_msg_parser::find_regex_result & fres)> & get_function_name = nullptr) {
633
+
634
+ auto parse_tool_calls = [&]() {
635
+ size_t from = std::string::npos;
636
+ auto first = true;
637
+ while (true) {
638
+ auto res = function_regex_start_only && first
639
+ ? builder.try_consume_regex(*function_regex_start_only)
640
+ : function_regex
641
+ ? builder.try_find_regex(*function_regex, from)
642
+ : std::nullopt;
643
+ if (res) {
644
+ std::string name;
645
+ if (get_function_name) {
646
+ name = get_function_name(*res);
647
+ } else {
648
+ GGML_ASSERT(res->groups.size() == 2);
649
+ name = builder.str(res->groups[1]);
650
+ }
651
+ first = false;
652
+ if (name.empty()) {
653
+ // get_function_name signalled us that we should skip this match and treat it as content.
654
+ from = res->groups[0].begin + 1;
655
+ continue;
656
+ }
657
+ from = std::string::npos;
559
658
 
560
- while (it != end) {
561
- std::sregex_iterator rend;
562
- std::sregex_iterator rit(it, end, function_regex);
563
- if (rit == rend) {
564
- result.content += std::string(it, end);
659
+ auto maybe_raw_python = name == "python" && allow_raw_python;
660
+ if (builder.input()[builder.pos()] == '{' || !maybe_raw_python) {
661
+ if (auto arguments = builder.try_consume_json_with_dumped_args({{}})) {
662
+ if (!builder.add_tool_call(name, "", arguments->value) || arguments->is_partial) {
663
+ throw common_chat_msg_partial_exception("incomplete tool call");
664
+ }
665
+ builder.consume_regex(close_regex);
666
+ }
667
+ continue;
668
+ }
669
+ if (maybe_raw_python) {
670
+ auto arguments = wrap_code_as_arguments(builder, builder.consume_rest());
671
+ if (!builder.add_tool_call(name, "", arguments)) {
672
+ throw common_chat_msg_partial_exception("incomplete tool call");
673
+ }
674
+ return;
675
+ }
676
+ throw common_chat_msg_partial_exception("incomplete tool call");
677
+ }
565
678
  break;
566
679
  }
567
- auto name = rit->str(1);
568
- result.content += std::string(it, rit->prefix().second);
569
- it = rit->suffix().first;
570
-
571
- json arguments;
572
- if (parse_json(it, end, arguments)) {
573
- if (!std::regex_search(it, end, match, close_regex)) {
574
- throw std::runtime_error("Malformed input, missing closing pattern: " + input);
575
- }
576
- it = match.suffix().first;
577
- result.tool_calls.push_back({name, arguments.is_string() ? arguments.get<std::string>() : arguments.dump(), /* id= */ ""});
578
- } else {
579
- if (allow_raw_python && name == "python") {
580
- result.tool_calls.push_back({name, json({{"code", std::string(it, end)}}).dump(), /* id= */ ""});
581
- break;
582
- }
583
- throw std::runtime_error("Failed to parse json tool call arguments: " + input);
680
+ if (block_close) {
681
+ builder.consume_regex(*block_close);
584
682
  }
585
- }
586
-
587
- if (!result.tool_calls.empty()) {
588
- if (!string_strip(result.content).empty()) {
589
- LOG_WRN("Content found with tool calls: %s\n", result.content.c_str());
683
+ builder.consume_spaces();
684
+ builder.add_content(builder.consume_rest());
685
+ };
686
+ if (block_open) {
687
+ if (auto res = builder.try_find_regex(*block_open)) {
688
+ parse_tool_calls();
689
+ } else {
690
+ builder.add_content(builder.consume_rest());
590
691
  }
591
- result.content = "";
692
+ } else {
693
+ parse_tool_calls();
592
694
  }
593
- return result;
594
695
  }
595
696
 
596
- static common_chat_tool_call process_tool_call(const json & tool_call) {
597
- const auto & arguments = tool_call.at("arguments");
598
- return {
599
- /* .name = */ tool_call.at("name"),
600
- /* .arguments = */ arguments.is_string() ? arguments.get<std::string>() : arguments.dump(),
601
- /* .id = */ tool_call.contains("id") ? tool_call.at("id") : "",
602
- };
603
- }
604
- static common_chat_msg parse_prefixed_json_tool_call_array(const std::string& input, const std::string & prefix, size_t rstrip_prefix = 0) {
605
- auto content_end = input.find(prefix);
606
- size_t tc_start = std::string::npos;
607
-
608
- common_chat_msg result;
609
- result.role = "assistant";
610
- if (content_end == std::string::npos) {
611
- result.content = input;
612
- } else {
613
- tc_start = content_end + prefix.size() - rstrip_prefix;
614
- result.content = input.substr(0, content_end);
615
- auto tool_calls = json::parse(input.substr(tc_start));
616
- for (const auto & tool_call : tool_calls) {
617
- result.tool_calls.emplace_back(process_tool_call(tool_call));
697
+ static void parse_prefixed_json_tool_call_array(common_chat_msg_parser & builder, const common_regex & prefix, size_t rstrip_prefix = 0) {
698
+ static const std::vector<std::vector<std::string>> args_paths = {{"arguments"}};
699
+ if (auto res = builder.try_find_regex(prefix)) {
700
+ builder.move_back(rstrip_prefix);
701
+ auto tool_calls = builder.consume_json_with_dumped_args(args_paths);
702
+ if (!builder.add_tool_calls(tool_calls.value) || tool_calls.is_partial) {
703
+ throw common_chat_msg_partial_exception("incomplete tool call array");
618
704
  }
705
+ } else {
706
+ builder.add_content(builder.consume_rest());
619
707
  }
620
- return result;
621
708
  }
622
709
 
623
710
  static void foreach_function(const json & tools, const std::function<void(const json &)> & fn) {
@@ -744,29 +831,36 @@ static common_chat_params common_chat_params_init_generic(const common_chat_temp
744
831
  data.format = COMMON_CHAT_FORMAT_GENERIC;
745
832
  return data;
746
833
  }
747
- static common_chat_msg common_chat_parse_generic(const std::string & input) {
748
- json data = json::parse(input);
749
- common_chat_msg result;
750
- result.role = "assistant";
751
- if (data.contains("tool_calls")) {
752
- for (const auto & tool_call : data.at("tool_calls")) {
753
- result.tool_calls.push_back({
754
- tool_call.at("name"),
755
- tool_call.at("arguments").dump(),
756
- tool_call.contains("id") ? tool_call.at("id") : "",
757
- });
834
+ static void common_chat_parse_generic(common_chat_msg_parser & builder) {
835
+ if (!builder.syntax().parse_tool_calls) {
836
+ builder.add_content(builder.consume_rest());
837
+ return;
838
+ }
839
+ static const std::vector<std::vector<std::string>> content_paths = {
840
+ {"response"},
841
+ };
842
+ static const std::vector<std::vector<std::string>> args_paths = {
843
+ {"tool_call", "arguments"},
844
+ {"tool_calls", "arguments"},
845
+ };
846
+ auto data = builder.consume_json_with_dumped_args(args_paths, content_paths);
847
+ if (data.value.contains("tool_calls")) {
848
+ if (!builder.add_tool_calls(data.value.at("tool_calls")) || data.is_partial) {
849
+ throw common_chat_msg_partial_exception("incomplete tool calls");
758
850
  }
759
- } else if (data.contains("tool_call")) {
760
- result.tool_calls.push_back({
761
- data.at("tool_call").at("name"),
762
- data.at("tool_call").at("arguments").dump(),
763
- /* id= */ "",
764
- });
765
- } else if (data.contains("response")) {
766
- const auto & response = data.at("response");
767
- result.content = response.is_string() ? response.get<std::string>() : response.dump(2);
851
+ } else if (data.value.contains("tool_call")) {
852
+ if (!builder.add_tool_call(data.value.at("tool_call")) || data.is_partial) {
853
+ throw common_chat_msg_partial_exception("incomplete tool call");
854
+ }
855
+ } else if (data.value.contains("response")) {
856
+ const auto & response = data.value.at("response");
857
+ builder.add_content(response.is_string() ? response.template get<std::string>() : response.dump(2));
858
+ if (data.is_partial) {
859
+ throw common_chat_msg_partial_exception("incomplete response");
860
+ }
861
+ } else {
862
+ throw common_chat_msg_partial_exception("Expected 'tool_call', 'tool_calls' or 'response' in JSON");
768
863
  }
769
- return result;
770
864
  }
771
865
 
772
866
  static common_chat_params common_chat_params_init_mistral_nemo(const common_chat_template & tmpl, const struct templates_params & inputs) {
@@ -813,12 +907,44 @@ static common_chat_params common_chat_params_init_mistral_nemo(const common_chat
813
907
  data.format = COMMON_CHAT_FORMAT_MISTRAL_NEMO;
814
908
  return data;
815
909
  }
816
- static common_chat_msg common_chat_parse_mistral_nemo(const std::string & input) {
817
- return parse_prefixed_json_tool_call_array(input, "[TOOL_CALLS]");
910
+ static void common_chat_parse_mistral_nemo(common_chat_msg_parser & builder) {
911
+ if (!builder.syntax().parse_tool_calls) {
912
+ builder.add_content(builder.consume_rest());
913
+ return;
914
+ }
915
+
916
+ static const common_regex prefix(regex_escape("[TOOL_CALLS]"));
917
+ parse_prefixed_json_tool_call_array(builder, prefix);
818
918
  }
819
919
 
820
920
  static common_chat_params common_chat_params_init_command_r7b(const common_chat_template & tmpl, const struct templates_params & inputs) {
821
921
  common_chat_params data;
922
+
923
+ auto adjusted_messages = json::array();
924
+ for (const auto & msg : inputs.messages) {
925
+ auto has_reasoning_content = msg.contains("reasoning_content") && msg.at("reasoning_content").is_string();
926
+ auto has_tool_calls = msg.contains("tool_calls") && msg.at("tool_calls").is_array();
927
+ if (has_reasoning_content && has_tool_calls) {
928
+ auto adjusted_message = msg;
929
+ adjusted_message["tool_plan"] = msg.at("reasoning_content");
930
+ adjusted_message.erase("reasoning_content");
931
+ adjusted_messages.push_back(adjusted_message);
932
+ } else {
933
+ adjusted_messages.push_back(msg);
934
+ }
935
+ }
936
+ data.prompt = apply(tmpl, adjusted_messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt, {});
937
+ data.format = COMMON_CHAT_FORMAT_COMMAND_R7B;
938
+ if (string_ends_with(data.prompt, "<|START_THINKING|>")) {
939
+ if (!inputs.enable_thinking) {
940
+ data.prompt += "<|END_THINKING|>";
941
+ } else {
942
+ data.thinking_forced_open = true;
943
+ }
944
+ } else if (!inputs.enable_thinking && string_ends_with(data.prompt, "<|CHATBOT_TOKEN|>")) {
945
+ data.prompt += "<|START_THINKING|><|END_THINKING|>";
946
+ }
947
+
822
948
  data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
823
949
  data.grammar = build_grammar([&](const common_grammar_builder & builder) {
824
950
  auto schemas = json::array();
@@ -849,11 +975,16 @@ static common_chat_params common_chat_params_init_command_r7b(const common_chat_
849
975
  if (!inputs.parallel_tool_calls) {
850
976
  schema["maxItems"] = 1;
851
977
  }
852
- builder.add_rule("root", "\"<|START_ACTION|>\" " + builder.add_schema("tool_calls", schema) + " \"<|END_ACTION|>\"");
978
+ builder.add_rule("root",
979
+ std::string(data.thinking_forced_open ? "( \"<|END_THINKING|>\" space )? " : "") +
980
+ "\"<|START_ACTION|>\" " + builder.add_schema("tool_calls", schema) + " \"<|END_ACTION|>\"");
853
981
  });
854
982
  data.grammar_triggers.push_back({
855
- COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
856
- "<|START_ACTION|>",
983
+ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
984
+ // If thinking_forced_open, then we capture the </think> tag in the grammar,
985
+ // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
986
+ std::string(data.thinking_forced_open ? "[\\s\\S]*?(<\\|END_THINKING\\|>\\s*)" : "(?:<\\|START_THINKING\\|>[\\s\\S]*?<\\|END_THINKING\\|>\\s*)?") +
987
+ "(<\\|START_ACTION\\|>)[\\s\\S]*"
857
988
  });
858
989
  data.preserved_tokens = {
859
990
  "<|START_ACTION|>",
@@ -863,61 +994,40 @@ static common_chat_params common_chat_params_init_command_r7b(const common_chat_
863
994
  "<|START_THINKING|>",
864
995
  "<|END_THINKING|>",
865
996
  };
866
- auto adjusted_messages = json::array();
867
- for (const auto & msg : inputs.messages) {
868
- auto has_reasoning_content = msg.contains("reasoning_content") && msg.at("reasoning_content").is_string();
869
- auto has_tool_calls = msg.contains("tool_calls") && msg.at("tool_calls").is_array();
870
- if (has_reasoning_content && has_tool_calls) {
871
- auto adjusted_message = msg;
872
- adjusted_message["tool_plan"] = msg.at("reasoning_content");
873
- adjusted_message.erase("reasoning_content");
874
- adjusted_messages.push_back(adjusted_message);
875
- } else {
876
- adjusted_messages.push_back(msg);
877
- }
878
- }
879
- data.prompt = apply(tmpl, adjusted_messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt, {});
880
- data.format = inputs.extract_reasoning ? COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING : COMMON_CHAT_FORMAT_COMMAND_R7B;
881
997
  return data;
882
998
  }
883
- static common_chat_msg common_chat_parse_command_r7b(const std::string & input, bool extract_reasoning) {
884
- static const std::regex thought_regex("(<\\|START_THINKING\\|>([\\s\\S]*?)<\\|END_THINKING\\|>)([\\s\\S]*)");
885
- static const std::regex action_regex("<\\|START_ACTION\\|>([\\s\\S]*?)<\\|END_ACTION\\|>");
886
- static const std::regex response_regex("(?:<\\|START_RESPONSE\\|>)?([\\s\\S]*?)<\\|END_RESPONSE\\|>");
887
-
888
- std::smatch match;
889
-
890
- common_chat_msg result;
891
- result.role = "assistant";
892
-
893
- std::string rest = input;
894
999
 
895
- if (std::regex_match(rest, match, thought_regex)) {
896
- if (extract_reasoning) {
897
- result.reasoning_content = match[2].str();
898
- } else if (!match[2].str().empty()) {
899
- // Let the unparsed thinking tags through in content only if their insides aren't empty.
900
- result.content = match[1].str();
1000
+ static void common_chat_parse_command_r7b(common_chat_msg_parser & builder) {
1001
+ builder.try_parse_reasoning("<|START_THINKING|>", "<|END_THINKING|>");
1002
+
1003
+ static const common_regex start_action_regex("<\\|START_ACTION\\|>");
1004
+ static const common_regex end_action_regex("<\\|END_ACTION\\|>");
1005
+ static const common_regex start_response_regex("<\\|START_RESPONSE\\|>");
1006
+ static const common_regex end_response_regex("<\\|END_RESPONSE\\|>");
1007
+
1008
+ if (auto res = builder.try_find_regex(start_action_regex)) {
1009
+ // If we didn't extract thoughts, prelude includes them.
1010
+ auto tool_calls = builder.consume_json_with_dumped_args({{"parameters"}});
1011
+ for (const auto & tool_call : tool_calls.value) {
1012
+ std::string name = tool_call.contains("tool_name") ? tool_call.at("tool_name") : "";
1013
+ std::string id = tool_call.contains("tool_call_id") ? tool_call.at("tool_call_id") : "";
1014
+ std::string arguments = tool_call.contains("parameters") ? tool_call.at("parameters") : "";
1015
+ if (!builder.add_tool_call(name, id, arguments) || tool_calls.is_partial) {
1016
+ throw common_chat_msg_partial_exception("incomplete tool call");
1017
+ }
901
1018
  }
902
- rest = match[3].str();
903
- }
904
- if (std::regex_match(rest, match, action_regex)) {
905
- auto actions_str = match[1].str();
906
- auto actions = json::parse(actions_str);
907
- for (const auto & action : actions) {
908
- result.tool_calls.push_back({
909
- /* .name = */ action.at("tool_name"),
910
- /* .arguments = */ action.at("parameters").dump(),
911
- /* .id = */ action.at("tool_call_id"),
912
- });
1019
+ if (tool_calls.is_partial) {
1020
+ throw common_chat_msg_partial_exception("incomplete tool call");
1021
+ }
1022
+ builder.consume_regex(end_action_regex);
1023
+ } else if (auto res = builder.try_find_regex(start_response_regex)) {
1024
+ if (!builder.try_find_regex(end_response_regex)) {
1025
+ builder.add_content(builder.consume_rest());
1026
+ throw common_chat_msg_partial_exception(end_response_regex.str());
913
1027
  }
914
- } else if (std::regex_match(rest, match, response_regex)) {
915
- auto response = match[1].str();
916
- result.content += response;
917
1028
  } else {
918
- result.content += rest;
1029
+ builder.add_content(builder.consume_rest());
919
1030
  }
920
- return result;
921
1031
  }
922
1032
 
923
1033
  static void expect_tool_parameters(const std::string & name, const json & parameters, const std::vector<std::string> & expected_properties) {
@@ -939,116 +1049,172 @@ static void expect_tool_parameters(const std::string & name, const json & parame
939
1049
  }
940
1050
  }
941
1051
 
942
- static common_chat_params common_chat_params_init_llama_3_1_tool_calls(const common_chat_template & tmpl, const struct templates_params & inputs, bool allow_python_tag_builtin_tools) {
1052
+ static common_chat_params common_chat_params_init_llama_3_x(const common_chat_template & tmpl, const struct templates_params & inputs, bool allow_python_tag_builtin_tools) {
943
1053
  auto builtin_tools = json::array();
944
1054
  common_chat_params data;
945
- data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
946
- data.grammar = build_grammar([&](const common_grammar_builder & builder) {
947
- std::vector<std::string> tool_rules;
948
-
949
- auto handle_builtin_tool = [&](const std::string & name, const json & parameters) {
950
- if (name == "wolfram_alpha" || name == "web_search" || name == "brave_search") {
951
- // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
952
- // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
953
- expect_tool_parameters(name, parameters, {"query"});
954
- } else if (name == "python" || name == "code_interpreter") {
955
- // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py
956
- expect_tool_parameters(name, parameters, {"code"});
957
- } else {
958
- return false;
959
- }
1055
+ if (!inputs.tools.is_null()) {
1056
+ data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1057
+ data.grammar = build_grammar([&](const common_grammar_builder & builder) {
1058
+ std::vector<std::string> tool_rules;
960
1059
 
961
- std::vector<std::string> kvs;
962
- for (const auto & [key, value] : parameters.at("properties").items()) {
963
- kvs.push_back("\"" + key + "=\" " + builder.add_schema(name + "-args-" + key, value)); // NOLINT
964
- }
1060
+ auto handle_builtin_tool = [&](const std::string & name, const json & parameters) {
1061
+ if (name == "wolfram_alpha" || name == "web_search" || name == "brave_search") {
1062
+ // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
1063
+ // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
1064
+ expect_tool_parameters(name, parameters, {"query"});
1065
+ } else if (name == "python" || name == "code_interpreter") {
1066
+ // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py
1067
+ expect_tool_parameters(name, parameters, {"code"});
1068
+ } else {
1069
+ return false;
1070
+ }
965
1071
 
966
- tool_rules.push_back(
967
- builder.add_rule(
968
- name + "-call",
969
- "\"<|python_tag|>" + name + ".call(\" " + string_join(kvs, " \", \" ") + " \")\""));
970
- builtin_tools.push_back(name);
1072
+ std::vector<std::string> kvs;
1073
+ for (const auto & [key, value] : parameters.at("properties").items()) {
1074
+ kvs.push_back("\"" + key + "=\" " + builder.add_schema(name + "-args-" + key, value)); // NOLINT
1075
+ }
971
1076
 
972
- return true;
973
- };
1077
+ tool_rules.push_back(
1078
+ builder.add_rule(
1079
+ name + "-call",
1080
+ "\"<|python_tag|>" + name + ".call(\" " + string_join(kvs, " \", \" ") + " \")\""));
1081
+ builtin_tools.push_back(name);
974
1082
 
975
- foreach_function(inputs.tools, [&](const json & tool) {
976
- const auto & function = tool.at("function");
977
- std::string name = function.at("name");
978
- auto parameters = function.at("parameters");
979
- builder.resolve_refs(parameters);
1083
+ return true;
1084
+ };
1085
+
1086
+ foreach_function(inputs.tools, [&](const json & tool) {
1087
+ const auto & function = tool.at("function");
1088
+ std::string name = function.at("name");
1089
+ auto parameters = function.at("parameters");
1090
+ builder.resolve_refs(parameters);
980
1091
 
981
- // https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/remote/tool_runtime
982
- if (allow_python_tag_builtin_tools) {
983
- handle_builtin_tool(name, parameters);
1092
+ // https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/remote/tool_runtime
1093
+ if (allow_python_tag_builtin_tools) {
1094
+ handle_builtin_tool(name, parameters);
1095
+ }
1096
+ tool_rules.push_back(
1097
+ builder.add_rule(
1098
+ name + "-call",
1099
+ "\"{\" space "
1100
+ "( \"\\\"type\\\"\" space \":\" space \"\\\"function\\\"\" space \",\" space )? "
1101
+ " \"\\\"name\\\"\" space \":\" space \"\\\"" + name + "\\\"\" space \",\" space "
1102
+ " \"\\\"parameters\\\"\" space \":\" space " + builder.add_schema(name + "-args", parameters) + " "
1103
+ "\"}\" space"));
1104
+ });
1105
+ // Small models may hallucinate function names so we match anything (*at the start*) that looks like the JSON of a function call, regardless of the name.
1106
+ data.grammar_triggers.push_back({
1107
+ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
1108
+ "(\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\")[\\s\\S]*", // + name + "\"[\\s\\S]*",
1109
+ });
1110
+ if (!builtin_tools.empty()) {
1111
+ data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"});
1112
+ data.preserved_tokens.push_back("<|python_tag|>");
984
1113
  }
985
- tool_rules.push_back(
986
- builder.add_rule(
987
- name + "-call",
988
- "\"{\" space "
989
- "( \"\\\"type\\\"\" space \":\" space \"\\\"function\\\"\" space \",\" space )? "
990
- " \"\\\"name\\\"\" space \":\" space \"\\\"" + name + "\\\"\" space \",\" space "
991
- " \"\\\"parameters\\\"\" space \":\" space " + builder.add_schema(name + "-args", parameters) + " "
992
- "\"}\" space"));
1114
+ // Allow a few empty lines on top of the usual constrained json schema space rule.
1115
+ builder.add_rule("root", string_join(tool_rules, " | "));
1116
+ data.additional_stops.push_back("<|eom_id|>");
993
1117
  });
994
- // Small models may hallucinate function names so we match anything (*at the start*) that looks like the JSON of a function call, regardless of the name.
995
- data.grammar_triggers.push_back({
996
- COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START,
997
- "\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\"", // + name + "\"[\\s\\S]*",
998
- });
999
- if (!builtin_tools.empty()) {
1000
- data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"});
1001
- data.preserved_tokens.push_back("<|python_tag|>");
1002
- }
1003
- // Allow a few empty lines on top of the usual constrained json schema space rule.
1004
- builder.add_rule("root", string_join(tool_rules, " | "));
1005
- });
1006
- data.additional_stops.push_back("<|eom_id|>");
1118
+ data.format = allow_python_tag_builtin_tools && !builtin_tools.empty()
1119
+ ? COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS
1120
+ : COMMON_CHAT_FORMAT_LLAMA_3_X;
1121
+ } else {
1122
+ data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
1123
+ }
1007
1124
  data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt, {
1125
+ {"date_string", format_time(inputs.now, "%d %b %Y")},
1008
1126
  {"tools_in_user_message", false},
1009
1127
  {"builtin_tools", builtin_tools.empty() ? json() : builtin_tools},
1010
1128
  });
1011
- data.format = allow_python_tag_builtin_tools && !builtin_tools.empty()
1012
- ? COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS
1013
- : COMMON_CHAT_FORMAT_LLAMA_3_X;
1014
1129
  return data;
1015
1130
  }
1016
- static common_chat_msg common_chat_parse_llama_3_1(const std::string & input, bool with_builtin_tools = false) {
1017
- // TODO: tighten & simplify the parser, don't accept leading text context.
1018
- static const std::regex function_regex(
1131
+ static void common_chat_parse_llama_3_1(common_chat_msg_parser & builder, bool with_builtin_tools = false) {
1132
+ if (!builder.syntax().parse_tool_calls) {
1133
+ builder.add_content(builder.consume_rest());
1134
+ return;
1135
+ }
1136
+
1137
+ static const common_regex function_regex(
1019
1138
  "\\s*\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\"([^\"]+)\"\\s*,\\s*\"parameters\"\\s*: ");
1020
- static const std::regex close_regex("\\}\\s*");
1021
- static const std::regex builtin_call_regex("<\\|python_tag\\|>\\s*([^.(]+)\\s*\\.\\s*call\\s*\\(\\s*([\\w]+)\\s*=\\s*([\\s\\S]*?)\\)");
1139
+ static const common_regex close_regex("\\}\\s*");
1140
+
1141
+ static const common_regex function_name_regex("\\s*(\\w+)\\s*\\.\\s*call\\(");
1142
+ static const common_regex arg_name_regex("\\s*(\\w+)\\s*=\\s*");
1022
1143
 
1023
1144
  if (with_builtin_tools) {
1024
- std::smatch match;
1025
- if (std::regex_match(input, match, builtin_call_regex)) {
1026
- try {
1027
- auto name = match[1].str();
1028
- auto arg_name = match[2].str();
1029
- auto arg_value_str = match[3].str();
1030
- auto arg_value = json::parse(arg_value_str);
1031
-
1032
- common_chat_msg msg;
1033
- msg.role = "assistant";
1034
- msg.tool_calls.push_back({
1035
- /* .name = */ name,
1036
- /* .arguments = */ (json {
1037
- {arg_name, arg_value},
1038
- }).dump(),
1039
- /* .id = */ "",
1040
- });
1041
- return msg;
1042
- } catch (const std::exception & e) {
1043
- LOG_WRN("Failed to parse builtin tool call arguments (%s): %s", e.what(), input.c_str());
1145
+ static const common_regex builtin_call_regex("<\\|python_tag\\|>");
1146
+ if (auto res = builder.try_find_regex(builtin_call_regex)) {
1147
+ auto fun_res = builder.consume_regex(function_name_regex);
1148
+ auto function_name = builder.str(fun_res.groups[1]);
1149
+
1150
+ common_healing_marker healing_marker;
1151
+ json args = json::object();
1152
+ while (true) {
1153
+ if (auto arg_res = builder.try_consume_regex(arg_name_regex)) {
1154
+ auto arg_name = builder.str(arg_res->groups[1]);
1155
+ auto partial = builder.consume_json();
1156
+ args[arg_name] = partial.json;
1157
+ healing_marker.marker = partial.healing_marker.marker;
1158
+ healing_marker.json_dump_marker = partial.healing_marker.json_dump_marker;
1159
+ builder.consume_spaces();
1160
+ if (!builder.try_consume_literal(",")) {
1161
+ break;
1162
+ }
1163
+ } else {
1164
+ break;
1165
+ }
1166
+ }
1167
+ builder.consume_literal(")");
1168
+ builder.consume_spaces();
1169
+
1170
+ auto arguments = args.dump();
1171
+ if (!builder.add_tool_call(function_name, "", arguments)) {
1172
+ throw common_chat_msg_partial_exception("Incomplete tool call");
1044
1173
  }
1174
+ return;
1045
1175
  }
1046
1176
  }
1047
- return parse_json_tool_calls(input, std::nullopt, function_regex, close_regex);
1177
+ parse_json_tool_calls(
1178
+ builder,
1179
+ /* block_open= */ std::nullopt,
1180
+ /* function_regex_start_only= */ function_regex,
1181
+ /* function_regex= */ std::nullopt,
1182
+ close_regex,
1183
+ std::nullopt);
1184
+
1048
1185
  }
1049
1186
 
1050
1187
  static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_template & tmpl, const struct templates_params & inputs) {
1051
1188
  common_chat_params data;
1189
+ auto prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
1190
+
1191
+ // Hacks to fix the official (broken) prompt.
1192
+ // It is advisable to use --chat-template-file models/templates/llama-cpp-deepseek-r1.jinja instead,
1193
+ // until the official template is fixed.
1194
+ if (tmpl.source().find("{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}") != std::string::npos) {
1195
+ // Don't leave the chat dangling after tool results
1196
+ if (string_ends_with(prompt, "<|tool▁outputs▁end|>")) {
1197
+ prompt += "<|end▁of▁sentence|>";
1198
+ if (inputs.add_generation_prompt) {
1199
+ prompt += "<|Assistant|>";
1200
+ }
1201
+ }
1202
+ // Fix up tool call delta example added by Minja
1203
+ prompt = std::regex_replace(
1204
+ prompt,
1205
+ std::regex("(<|tool▁call▁end|>)[\\s\\r\\n]*(<|tool▁outputs▁begin|>|<|User|>)"),
1206
+ "$1<|tool▁calls▁end|><|end▁of▁sentence|>$2");
1207
+ }
1208
+ data.prompt = prompt;
1209
+ data.format = COMMON_CHAT_FORMAT_DEEPSEEK_R1;
1210
+ if (string_ends_with(data.prompt, "<think>\n")) {
1211
+ if (!inputs.enable_thinking) {
1212
+ data.prompt += "</think>";
1213
+ } else {
1214
+ data.thinking_forced_open = true;
1215
+ }
1216
+ }
1217
+
1052
1218
  if (inputs.tools.is_array() && !inputs.tools.empty()) {
1053
1219
  data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null();
1054
1220
  data.grammar = build_grammar([&](const common_grammar_builder & builder) {
@@ -1059,21 +1225,25 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
1059
1225
  auto parameters = function.at("parameters");
1060
1226
  builder.resolve_refs(parameters);
1061
1227
  tool_rules.push_back(builder.add_rule(name + "-call",
1062
- "\"<|tool▁call▁begin|>function<|tool▁sep|>" + name + "\\n"
1228
+ "( \"<|tool▁call▁begin|>\" )? \"function<|tool▁sep|>" + name + "\\n"
1063
1229
  "```json\\n\" " + builder.add_schema(name + "-args", parameters) + " "
1064
1230
  "\"```<|tool▁call▁end|>\""));
1065
1231
  });
1066
1232
  // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag,
1067
1233
  // so we accept common variants (then it's all constrained)
1068
1234
  builder.add_rule("root",
1069
- "( \"<|tool▁calls▁begin|>\" | \"<|tool_calls_begin|>\" | \"<|tool calls begin|>\" | \"<|tool\\\\_calls\\\\_begin|>\" ) "
1235
+ std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
1236
+ "( \"<|tool▁calls▁begin|>\" | \"<|tool_calls_begin|>\" | \"<|tool calls begin|>\" | \"<|tool\\\\_calls\\\\_begin|>\" | \"<|tool▁calls|>\" ) "
1070
1237
  "(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
1071
1238
  "\"<|tool▁calls▁end|>\""
1072
1239
  " space");
1073
- data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|tool▁calls▁begin|>"});
1074
- data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|tool_calls_begin|>"});
1075
- data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|tool calls begin|>"});
1076
- data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|tool\\_calls\\_begin|>"});
1240
+ data.grammar_triggers.push_back({
1241
+ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
1242
+ // If thinking_forced_open, then we capture the </think> tag in the grammar,
1243
+ // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
1244
+ std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)" : "(?:<think>[\\s\\S]*?</think>\\s*)?") +
1245
+ "(<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)[\\s\\S]*"
1246
+ });
1077
1247
  data.preserved_tokens = {
1078
1248
  "<think>",
1079
1249
  "</think>",
@@ -1085,72 +1255,34 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
1085
1255
  };
1086
1256
  });
1087
1257
  }
1088
- auto prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
1089
-
1090
- // Hacks to fix the official (broken) prompt.
1091
- // It is advisable to use --chat-template-file models/templates/llama-cpp-deepseek-r1.jinja instead,
1092
- // until the official template is fixed.
1093
- if (tmpl.source().find("{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}") != std::string::npos) {
1094
- // Don't leave the chat dangling after tool results
1095
- if (string_ends_with(prompt, "<|tool▁outputs▁end|>")) {
1096
- prompt += "<|end▁of▁sentence|>";
1097
- if (inputs.add_generation_prompt) {
1098
- prompt += "<|Assistant|>";
1099
- }
1100
- }
1101
- // Fix up tool call delta example added by Minja
1102
- prompt = std::regex_replace(
1103
- prompt,
1104
- std::regex("(<|tool▁call▁end|>)[\\s\\r\\n]*(<|tool▁outputs▁begin|>|<|User|>)"),
1105
- "$1<|tool▁calls▁end|><|end▁of▁sentence|>$2");
1106
- }
1107
- data.prompt = prompt;
1108
- data.format = inputs.extract_reasoning ? COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING : COMMON_CHAT_FORMAT_DEEPSEEK_R1;
1109
1258
  return data;
1110
1259
  }
1111
- static common_chat_msg handle_think_tag_prelude(const std::string & input, bool extract_reasoning, const std::function<common_chat_msg(const std::string &)> & rest_parser) {
1112
- std::smatch match;
1113
- static const std::regex reasoning_content_regex("((?:<think>)?([\\s\\S\\r\\n]*?)</think>)?([\\s\\S\\r\\n]*)");
1114
- if (std::regex_match(input, match, reasoning_content_regex)) {
1115
- auto rest = match[3].str();
1116
- auto msg = rest_parser(rest);
1117
- auto reasoning_content = string_strip(match[2].str());
1118
- if (extract_reasoning) {
1119
- msg.reasoning_content = reasoning_content;
1120
- } else if (!reasoning_content.empty()) {
1121
- std::ostringstream content;
1122
- content << "<think>" << reasoning_content << "</think>" << msg.content;
1123
- msg.content = content.str();
1124
- }
1125
- return msg;
1126
- }
1127
- return rest_parser(input);
1128
- }
1129
- static common_chat_msg common_chat_parse_deepseek_r1(const std::string & input, bool extract_reasoning) {
1130
- return handle_think_tag_prelude(input, extract_reasoning, [](const std::string & input) {
1131
- static const std::regex function_regex("<|tool▁call▁begin|>function<|tool▁sep|>([^\n]+)\n```json\n");
1132
- static const std::regex close_regex("```[\\s\\r\\n]*<|tool▁call▁end|>");
1133
- static const std::regex tool_calls_regex("[\\s\\r\\n]*(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>)([\\s\\S\\r\\n]*?)<|tool▁calls▁end|>");
1134
-
1135
- common_chat_msg msg;
1136
- msg.role = "assistant";
1137
- std::smatch match;
1138
- if (std::regex_search(input, match, tool_calls_regex)) {
1139
- auto tool_calls = match[1].str();
1140
- auto msg2 = parse_json_tool_calls(tool_calls, std::nullopt, function_regex, close_regex);
1141
- msg.tool_calls = std::move(msg2.tool_calls);
1142
- } else {
1143
- msg.content = input;
1144
- }
1145
- return msg;
1146
- });
1260
+ static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) {
1261
+ builder.try_parse_reasoning("<think>", "</think>");
1262
+ if (!builder.syntax().parse_tool_calls) {
1263
+ builder.add_content(builder.consume_rest());
1264
+ return;
1265
+ }
1266
+
1267
+ static const common_regex tool_calls_begin("(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)");
1268
+ static const common_regex tool_calls_end("<|tool▁calls▁end|>");
1269
+ static const common_regex function_regex("(?:<|tool▁call▁begin|>)?function<|tool▁sep|>([^\n]+)\n```json\n");
1270
+ static const common_regex close_regex("```[\\s\\r\\n]*<|tool▁call▁end|>");
1271
+
1272
+ parse_json_tool_calls(
1273
+ builder,
1274
+ /* block_open= */ tool_calls_begin,
1275
+ /* function_regex_start_only= */ std::nullopt,
1276
+ function_regex,
1277
+ close_regex,
1278
+ tool_calls_end);
1147
1279
  }
1148
1280
 
1149
1281
  static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct templates_params & inputs) {
1150
1282
  LOG_DBG("%s\n", __func__);
1151
1283
  common_chat_params data;
1152
1284
  data.prompt = apply(tmpl, inputs.messages, /* tools= */ nullptr, inputs.add_generation_prompt, {
1153
- {"datetime", "Jan 29 2025 13:00:00 GMT"},
1285
+ {"datetime", format_time(inputs.now, "%b %d %Y %H:%M:%S GMT")},
1154
1286
  {"functions", json(inputs.tools.empty() ? "" : inputs.tools.dump(2))},
1155
1287
  });
1156
1288
  if (inputs.tools.is_array() && !inputs.tools.empty()) {
@@ -1191,13 +1323,19 @@ static common_chat_params common_chat_params_init_firefunction_v2(const common_c
1191
1323
  }
1192
1324
  return data;
1193
1325
  }
1194
- static common_chat_msg common_chat_parse_firefunction_v2(const std::string & input) {
1195
- return parse_prefixed_json_tool_call_array(input, " functools[", /* rstrip_prefix= */ 1);
1326
+ static void common_chat_parse_firefunction_v2(common_chat_msg_parser & builder) {
1327
+ if (!builder.syntax().parse_tool_calls) {
1328
+ builder.add_content(builder.consume_rest());
1329
+ return;
1330
+ }
1331
+ static const common_regex prefix(regex_escape(" functools["));
1332
+ parse_prefixed_json_tool_call_array(builder, prefix, /* rstrip_prefix= */ 1);
1196
1333
  }
1197
1334
 
1198
1335
  static common_chat_params common_chat_params_init_functionary_v3_2(const common_chat_template & tmpl, const struct templates_params & inputs) {
1199
1336
  // >>>all\nlet's call functions>>>fn1\n{"arg1": 1...}\n>>>fn2\n{"arg1": 1...}...
1200
1337
  // Using ">>>f1\n", ">>>f2\n"... as trigger words for the grammar
1338
+ // If the function is python, we also allow raw python code (if the line after `python\n` doesn't start w/ opening `{`), which the model seems to prefer for multiline code.
1201
1339
  common_chat_params data;
1202
1340
  data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
1203
1341
  data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2;
@@ -1211,24 +1349,21 @@ static common_chat_params common_chat_params_init_functionary_v3_2(const common_
1211
1349
  std::string name = function.at("name");
1212
1350
  auto parameters = function.at("parameters");
1213
1351
  builder.resolve_refs(parameters);
1352
+ std::string args_pattern = "[\\s\\S]*";
1214
1353
  auto args_rule = builder.add_schema(name + "-args", parameters);
1215
- first_tool_rules.push_back(builder.add_rule(name + "-call", "( \"assistant<|end_header_id|>\\n\" )? \"" + name + "\\n\" " + args_rule));
1216
- subsequent_tool_rules.push_back(builder.add_rule(name + "-call2", "\">>>" + name + "\\n\" " + args_rule));
1217
- data.grammar_triggers.push_back({
1218
- COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START,
1219
- regex_escape(name + "\n"),
1220
- });
1221
- data.grammar_triggers.push_back({
1222
- COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START,
1223
- regex_escape("assistant<|end_header_id|>\n" + name + "\n"),
1224
- });
1225
- data.grammar_triggers.push_back({
1226
- COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
1227
- regex_escape(">>>" + name + "\n"),
1228
- });
1354
+ if (name == "python") {
1355
+ args_rule = builder.add_rule(name + "-maybe-raw-args", args_rule + " | [^{] .*");
1356
+ } else {
1357
+ args_pattern = "\\{" + args_pattern;
1358
+ }
1359
+ auto call_rule = builder.add_rule(name + "-call", "\"" + name + "\\n\" " + args_rule);
1360
+ first_tool_rules.push_back(call_rule);
1361
+ if (inputs.parallel_tool_calls) {
1362
+ subsequent_tool_rules.push_back(builder.add_rule(name + "-call2", "\">>>\" " + call_rule));
1363
+ }
1229
1364
  data.grammar_triggers.push_back({
1230
- COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
1231
- ">>>assistant<|end_header_id|>\n" + name,
1365
+ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
1366
+ "((?:[\\s\\S]+?>>>)?" + regex_escape(name) + "\n)" + args_pattern,
1232
1367
  });
1233
1368
  });
1234
1369
  data.preserved_tokens = {
@@ -1246,319 +1381,311 @@ static common_chat_params common_chat_params_init_functionary_v3_2(const common_
1246
1381
  }
1247
1382
  return data;
1248
1383
  }
1249
-
1250
- static common_chat_msg common_chat_parse_functionary_v3_2(const std::string & input) {
1251
- static const std::regex function_regex(R"((?:>>>)?(?:assistant<|end_header_id|>\n)?(\w+)\n)");
1252
- static const std::regex close_regex(R"($|(?=>>>))");
1253
-
1254
- std::string content;
1255
- auto it = input.begin();
1256
- const auto end = input.end();
1257
-
1258
- if (parse_literal(it, end, "all\n")) {
1259
- std::smatch match;
1260
- if (std::regex_search(it, end, match, function_regex)) {
1261
- auto fun_it = match.prefix().second;
1262
- content = std::string(it, fun_it);
1263
- it = fun_it;
1264
- } else {
1265
- common_chat_msg res;
1266
- res.role = "assistant";
1267
- res.content = std::string(it, end);
1268
- return res;
1269
- }
1270
- }
1271
- // TODO: tighten & simplify.
1272
- try {
1273
- auto res = parse_json_tool_calls(std::string(it, end), std::nullopt, function_regex, close_regex, /* allow_raw_python= */ true);
1274
- res.content = content + res.content;
1275
- return res;
1276
- } catch (const std::exception & e) {
1277
- LOG_ERR("Failed to parse functionary v3.2 input: %s\n", e.what());
1278
- common_chat_msg res;
1279
- res.role = "assistant";
1280
- res.content = input;
1281
- return res;
1282
- }
1384
+ static void common_chat_parse_functionary_v3_2(common_chat_msg_parser & builder) {
1385
+ static const common_regex function_regex_start_only(R"((\w+\n\{|python\n|all\n))");
1386
+ static const common_regex function_regex(R"(>>>(\w+\n\{|python\n|all\n))");
1387
+ static const common_regex close_regex(R"(\s*)");
1388
+
1389
+ parse_json_tool_calls(
1390
+ builder,
1391
+ std::nullopt,
1392
+ function_regex_start_only,
1393
+ function_regex,
1394
+ close_regex,
1395
+ std::nullopt,
1396
+ /* allow_raw_python= */ true,
1397
+ /* get_function_name= */ [&](const auto & res) -> std::string {
1398
+ auto at_start = res.groups[0].begin == 0;
1399
+ auto name = builder.str(res.groups[1]);
1400
+ if (!name.empty() && name.back() == '{') {
1401
+ // Unconsume the opening brace '{' to ensure the JSON parsing goes well.
1402
+ builder.move_back(1);
1403
+ }
1404
+ auto idx = name.find_last_not_of("\n{");
1405
+ name = name.substr(0, idx + 1);
1406
+ if (at_start && name == "all") {
1407
+ return "";
1408
+ }
1409
+ return name;
1410
+ });
1283
1411
  }
1284
1412
 
1285
1413
  static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(const common_chat_template & tmpl, const struct templates_params & inputs) {
1286
1414
  // https://github.com/MeetKai/functionary/blob/main/tests/prompt_test_v3-llama3.1.txt
1287
1415
  common_chat_params data;
1288
- json tools = inputs.tools.is_null() ? inputs.tools : json::array();
1289
- std::string python_code_argument_name;
1290
- auto has_raw_python = false;
1291
1416
 
1292
- data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1293
- data.grammar = build_grammar([&](const common_grammar_builder & builder) {
1294
- std::vector<std::string> tool_rules;
1295
- foreach_function(inputs.tools, [&](const json & tool) {
1296
- const auto & function = tool.at("function");
1297
- const auto & parameters = function.at("parameters");
1298
- std::string name = function.at("name");
1299
- if (name == "python" || name == "ipython") {
1300
- if (!parameters.contains("type")) {
1301
- throw std::runtime_error("Missing type in python tool");
1302
- }
1303
- has_raw_python = true;
1304
- const auto & type = parameters.at("type");
1305
- if (type == "object") {
1306
- auto properties = parameters.at("properties");
1307
- for (auto it = properties.begin(); it != properties.end(); ++it) {
1308
- if (it.value().at("type") == "string") {
1309
- if (!python_code_argument_name.empty()) {
1310
- throw std::runtime_error("Multiple string arguments found in python tool");
1417
+ if (!inputs.tools.is_null()) {
1418
+ std::string python_code_argument_name;
1419
+ auto has_raw_python = false;
1420
+
1421
+ data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1422
+ data.grammar = build_grammar([&](const common_grammar_builder & builder) {
1423
+ std::vector<std::string> tool_rules;
1424
+ foreach_function(inputs.tools, [&](const json & tool) {
1425
+ const auto & function = tool.at("function");
1426
+ const auto & parameters = function.at("parameters");
1427
+ std::string name = function.at("name");
1428
+ if (name == "python" || name == "ipython") {
1429
+ if (!parameters.contains("type")) {
1430
+ throw std::runtime_error("Missing type in python tool");
1431
+ }
1432
+ has_raw_python = true;
1433
+ const auto & type = parameters.at("type");
1434
+ if (type == "object") {
1435
+ auto properties = parameters.at("properties");
1436
+ for (auto it = properties.begin(); it != properties.end(); ++it) {
1437
+ if (it.value().at("type") == "string") {
1438
+ if (!python_code_argument_name.empty()) {
1439
+ throw std::runtime_error("Multiple string arguments found in python tool");
1440
+ }
1441
+ python_code_argument_name = it.key();
1311
1442
  }
1312
- python_code_argument_name = it.key();
1313
1443
  }
1444
+ if (python_code_argument_name.empty()) {
1445
+ throw std::runtime_error("No string argument found in python tool");
1446
+ }
1447
+ } else if (type != "string") {
1448
+ throw std::runtime_error("Invalid type in python tool: " + type.dump());
1314
1449
  }
1315
- if (python_code_argument_name.empty()) {
1316
- throw std::runtime_error("No string argument found in python tool");
1317
- }
1318
- } else if (type != "string") {
1319
- throw std::runtime_error("Invalid type in python tool: " + type.dump());
1320
1450
  }
1451
+ tool_rules.push_back(builder.add_rule(name + "-call", "\"<function=" + name + ">\" " + builder.add_schema(name + "-args", parameters) + " \"</function>\" space"));
1452
+ });
1453
+ if (has_raw_python) {
1454
+ tool_rules.push_back(builder.add_rule("python-call", "\"<|python_tag|>\" .*"));
1455
+ data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"});
1456
+ data.preserved_tokens.push_back("<|python_tag|>");
1321
1457
  }
1322
- tool_rules.push_back(builder.add_rule(name + "-call", "\"<function=" + name + ">\" " + builder.add_schema(name + "-args", parameters) + " \"</function>\" space"));
1458
+ auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | ")) + " space";
1459
+ builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call);
1460
+ data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<function="});
1323
1461
  });
1324
- if (has_raw_python) {
1325
- tool_rules.push_back(builder.add_rule("python-call", "\"<|python_tag|>\" .*"));
1326
- data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"});
1327
- data.preserved_tokens.push_back("<|python_tag|>");
1328
- }
1329
- auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | ")) + " space";
1330
- builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call);
1331
- data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<function="});
1332
- });
1462
+ data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1;
1463
+ } else {
1464
+ data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
1465
+ }
1333
1466
 
1334
1467
  data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
1335
1468
  // TODO: if (has_raw_python)
1336
- data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1;
1337
1469
  return data;
1338
1470
  }
1339
- static common_chat_msg common_chat_parse_functionary_v3_1_llama_3_1(const std::string & input) {
1471
+ static void common_chat_parse_functionary_v3_1_llama_3_1(common_chat_msg_parser & builder) {
1472
+ if (!builder.syntax().parse_tool_calls) {
1473
+ builder.add_content(builder.consume_rest());
1474
+ return;
1475
+ }
1340
1476
  // This version of Functionary still supports the llama 3.1 tool call format for the python tool.
1341
- static const std::regex python_tag_regex(R"(<\|python_tag\|>([\s\S\n]*)$)");
1342
- std::smatch match;
1343
- if (std::regex_search(input, match, python_tag_regex)) {
1344
- auto code = match[1].str();
1345
- common_chat_msg msg;
1346
- msg.role = "assistant";
1347
- msg.content = match.prefix().str();
1348
- msg.tool_calls.push_back({
1349
- /* .name = */ "python",
1350
- /* .arguments = */ (json {{"code", code}}).dump(),
1351
- /* .id = */ "",
1352
- });
1353
- return msg;
1477
+ static const common_regex python_tag_regex(regex_escape("<|python_tag|>"));
1478
+
1479
+ static const common_regex function_regex(R"(<function=(\w+)>)");
1480
+ static const common_regex close_regex(R"(</function>)");
1481
+
1482
+ parse_json_tool_calls(
1483
+ builder,
1484
+ /* block_open= */ std::nullopt,
1485
+ /* function_regex_start_only= */ std::nullopt,
1486
+ function_regex,
1487
+ close_regex,
1488
+ std::nullopt);
1489
+
1490
+ if (auto res = builder.try_find_regex(python_tag_regex)) {
1491
+ auto arguments = wrap_code_as_arguments(builder, builder.consume_rest());
1492
+ builder.add_tool_call("python", "", arguments);
1493
+ return;
1354
1494
  }
1355
- static const std::regex function_regex(R"(<function=(\w+)>)");
1356
- static const std::regex close_regex(R"(</function>)");
1357
- // TODO: tighten & simplify.
1358
- return parse_json_tool_calls(input, std::nullopt, function_regex, close_regex);
1359
1495
  }
1360
1496
 
1361
1497
  static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat_template & tmpl, const struct templates_params & inputs) {
1362
1498
  common_chat_params data;
1363
- // (content)?(<tool_call>{"name": "foo", "arguments": {"a": 1}}</tool_call>)*
1364
- data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1365
- data.grammar = build_grammar([&](const common_grammar_builder & builder) {
1366
- std::vector<std::string> tool_rules;
1367
- std::vector<std::string> tool_call_alts;
1368
- foreach_function(inputs.tools, [&](const json & tool) {
1369
- const auto & function = tool.at("function");
1370
- std::string name = function.at("name");
1371
- auto parameters = function.at("parameters");
1372
- builder.resolve_refs(parameters);
1373
- tool_rules.push_back(builder.add_schema(name + "-call", {
1374
- {"type", "object"},
1375
- {"properties", json {
1376
- {"name", json {{"const", name}}},
1377
- {"arguments", parameters},
1378
- }},
1379
- {"required", json::array({"name", "arguments"})},
1380
- }));
1381
- tool_call_alts.push_back(builder.add_rule(
1382
- name + "-function-tag",
1383
- "\"<function\" ( \"=" + name + "\" | \" name=\\\"" + name + "\\\"\" ) \">\" space " +
1384
- builder.add_schema(name + "-args", parameters) + " "
1385
- "\"</function>\" space"));
1386
1499
 
1387
- data.grammar_triggers.push_back({
1388
- COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
1389
- "<function=" + name + ">",
1500
+ json additional_context = {
1501
+ {"enable_thinking", inputs.enable_thinking},
1502
+ };
1503
+
1504
+ data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt, additional_context);
1505
+ data.format = COMMON_CHAT_FORMAT_HERMES_2_PRO;
1506
+ if (string_ends_with(data.prompt, "<think>\n")) {
1507
+ if (!inputs.enable_thinking) {
1508
+ data.prompt += "</think>";
1509
+ } else {
1510
+ data.thinking_forced_open = true;
1511
+ }
1512
+ }
1513
+
1514
+ if (!inputs.tools.is_null()) {
1515
+ // (content)?(<tool_call>{"name": "foo", "arguments": {"a": 1}}</tool_call>)*
1516
+ data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1517
+ data.grammar = build_grammar([&](const common_grammar_builder & builder) {
1518
+ std::vector<std::string> tool_rules;
1519
+ std::vector<std::string> tool_call_alts;
1520
+ std::vector<std::string> escaped_names;
1521
+ foreach_function(inputs.tools, [&](const json & tool) {
1522
+ const auto & function = tool.at("function");
1523
+ std::string name = function.at("name");
1524
+ auto parameters = function.at("parameters");
1525
+ builder.resolve_refs(parameters);
1526
+ tool_rules.push_back(builder.add_schema(name + "-call", {
1527
+ {"type", "object"},
1528
+ {"properties", json {
1529
+ {"name", json {{"const", name}}},
1530
+ {"arguments", parameters},
1531
+ }},
1532
+ {"required", json::array({"name", "arguments"})},
1533
+ }));
1534
+ tool_call_alts.push_back(builder.add_rule(
1535
+ name + "-function-tag",
1536
+ "\"<function\" ( \"=" + name + "\" | \" name=\\\"" + name + "\\\"\" ) \">\" space " +
1537
+ builder.add_schema(name + "-args", parameters) + " "
1538
+ "\"</function>\" space"));
1539
+
1540
+ data.grammar_triggers.push_back({
1541
+ COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
1542
+ "<function=" + name + ">",
1543
+ });
1544
+ auto escaped_name = regex_escape(name);
1545
+ data.grammar_triggers.push_back({
1546
+ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
1547
+ "<function\\s+name\\s*=\\s*\"" + escaped_name + "\"",
1548
+ });
1549
+ escaped_names.push_back(escaped_name);
1390
1550
  });
1391
- auto escaped_name = regex_escape(name);
1551
+ auto any_tool_call = builder.add_rule("any_tool_call", "( " + string_join(tool_rules, " | ") + " ) space");
1552
+ std::vector<std::string> alt_tags {
1553
+ any_tool_call,
1554
+ "\"<tool_call>\" space " + any_tool_call + " \"</tool_call>\"",
1555
+ // The rest is just to accommodate common "good bad" outputs.
1556
+ "\"<function_call>\" space " + any_tool_call + " \"</function_call>\"",
1557
+ "\"<response>\" space " + any_tool_call + " \"</response>\"",
1558
+ "\"<tools>\" space " + any_tool_call + " \"</tools>\"",
1559
+ "\"<json>\" space " + any_tool_call + " \"</json>\"",
1560
+ "\"<xml>\" space " + any_tool_call + " \"</xml>\"",
1561
+ "\"<JSON>\" space " + any_tool_call + " \"</JSON>\"",
1562
+ };
1563
+ auto wrappable_tool_call = builder.add_rule("wrappable_tool_call", "( " + string_join(alt_tags, " | ") + " ) space");
1564
+ tool_call_alts.push_back(wrappable_tool_call);
1565
+ tool_call_alts.push_back(
1566
+ "( \"```\\n\" | \"```json\\n\" | \"```xml\\n\" ) space " + wrappable_tool_call + " space \"```\" space ");
1567
+ auto tool_call = builder.add_rule("tool_call", string_join(tool_call_alts, " | "));
1568
+ builder.add_rule("root",
1569
+ std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
1570
+ (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call));
1571
+ // Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives)
1392
1572
  data.grammar_triggers.push_back({
1393
- COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
1394
- "<function\\s+name\\s*=\\s*\"" + escaped_name + "\"",
1573
+ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
1574
+ // If thinking_forced_open, then we capture the </think> tag in the grammar,
1575
+ // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
1576
+ std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)" : "(?:<think>[\\s\\S]*?</think>\\s*)?") + (
1577
+ "(\\s*"
1578
+ "(?:<tool_call>"
1579
+ "|<function"
1580
+ "|(?:```(?:json|xml)?\n\\s*)?(?:<function_call>|<tools>|<xml><json>|<response>)?"
1581
+ "\\s*\\{\\s*\"name\"\\s*:\\s*\"(?:" + string_join(escaped_names, "|") + ")\""
1582
+ ")"
1583
+ ")[\\s\\S]*"
1584
+ ),
1395
1585
  });
1586
+ data.preserved_tokens = {
1587
+ "<think>",
1588
+ "</think>",
1589
+ "<tool_call>",
1590
+ "</tool_call>",
1591
+ "<function",
1592
+ "<tools>",
1593
+ "</tools>",
1594
+ "<response>",
1595
+ "</response>",
1596
+ "<function_call>",
1597
+ "</function_call>",
1598
+ "<json>",
1599
+ "</json>",
1600
+ "<JSON>",
1601
+ "</JSON>",
1602
+ "```",
1603
+ "```json",
1604
+ "```xml",
1605
+ };
1396
1606
  });
1397
- auto any_tool_call = builder.add_rule("any_tool_call", "( " + string_join(tool_rules, " | ") + " ) space");
1398
- std::vector<std::string> alt_tags {
1399
- any_tool_call,
1400
- "\"<tool_call>\" space " + any_tool_call + " \"</tool_call>\"",
1401
- // The rest is just to accommodate common "good bad" outputs.
1402
- "\"<function_call>\" space " + any_tool_call + " \"</function_call>\"",
1403
- "\"<response>\" space " + any_tool_call + " \"</response>\"",
1404
- "\"<tools>\" space " + any_tool_call + " \"</tools>\"",
1405
- "\"<json>\" space " + any_tool_call + " \"</json>\"",
1406
- "\"<xml>\" space " + any_tool_call + " \"</xml>\"",
1407
- "\"<JSON>\" space " + any_tool_call + " \"</JSON>\"",
1408
- };
1409
- auto wrappable_tool_call = builder.add_rule("wrappable_tool_call", "( " + string_join(alt_tags, " | ") + " ) space");
1410
- tool_call_alts.push_back(wrappable_tool_call);
1411
- tool_call_alts.push_back(
1412
- "( \"```\\n\" | \"```json\\n\" | \"```xml\\n\" ) space " + wrappable_tool_call + " space \"```\" space ");
1413
- auto tool_call = builder.add_rule("tool_call", string_join(tool_call_alts, " | "));
1414
- builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call);
1415
- data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<tool_call>"});
1416
- data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<function"});
1417
- // Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives)
1418
- data.grammar_triggers.push_back({
1419
- COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START,
1420
- "(?:```(?:json|xml)?\n\\s*)?(?:<function_call>|<tools>|<xml><json>|<response>)?\\s*\\{\\s*\"", //name\"\\s*:\\s*\"" + escaped_name + "\"",
1421
- });
1422
- data.preserved_tokens = {
1423
- "<think>",
1424
- "</think>",
1425
- "<tool_call>",
1426
- "</tool_call>",
1427
- "<function",
1428
- "<tools>",
1429
- "</tools>",
1430
- "<response>",
1431
- "</response>",
1432
- "<function_call>",
1433
- "</function_call>",
1434
- "<json>",
1435
- "</json>",
1436
- "<JSON>",
1437
- "</JSON>",
1438
- "```",
1439
- "```json",
1440
- "```xml",
1441
- };
1442
- });
1607
+ }
1443
1608
 
1444
- data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
1445
- data.format = inputs.extract_reasoning ? COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING : COMMON_CHAT_FORMAT_HERMES_2_PRO;
1446
1609
  return data;
1447
1610
  }
1448
- static common_chat_msg common_chat_parse_hermes_2_pro(const std::string& input, bool extract_reasoning) {
1449
- return handle_think_tag_prelude(input, extract_reasoning, [](const std::string & input) {
1450
- static const std::regex open_regex(
1451
- "(?:"
1452
- "(```(?:xml|json)?\\n\\s*)?" // match 1 (block_start)
1453
- "(<tool_call>" // match 2 (open_tag)
1454
- "|<function_call>"
1455
- "|<tool>"
1456
- "|<tools>"
1457
- "|<response>"
1458
- "|<json>"
1459
- "|<xml>"
1460
- "|<JSON>"
1611
+ static void common_chat_parse_hermes_2_pro(common_chat_msg_parser & builder) {
1612
+ builder.try_parse_reasoning("<think>", "</think>");
1613
+ if (!builder.syntax().parse_tool_calls) {
1614
+ builder.add_content(builder.consume_rest());
1615
+ return;
1616
+ }
1617
+
1618
+ static const common_regex open_regex(
1619
+ "(?:"
1620
+ "(```(?:xml|json)?\\n\\s*)?" // match 1 (block_start)
1621
+ "(" // match 2 (open_tag)
1622
+ "<tool_call>"
1623
+ "|<function_call>"
1624
+ "|<tool>"
1625
+ "|<tools>"
1626
+ "|<response>"
1627
+ "|<json>"
1628
+ "|<xml>"
1629
+ "|<JSON>"
1461
1630
  ")?"
1462
- "(\\s*\\{\\s*\"name\"\\s*:[\\s\\S]*)" // match 3 (named tool call + rest)
1463
- ")"
1464
- "|"
1465
- "(?:<function=([^>]+)>" // match 4 (function name)
1466
- "|<function name=\"([^\"]+)\">)" // match 5 (function name again)
1467
- "([\\s\\S]*)" // match 6 (function arguments + rest)})"
1468
- );
1469
-
1470
- try {
1471
- common_chat_msg msg;
1472
- msg.role = "assistant";
1473
-
1474
- std::string::const_iterator it = input.begin();
1475
- const std::string::const_iterator end = input.end();
1476
- std::smatch match;
1477
-
1478
- while (it != end) {
1479
- if (std::regex_search(it, end, match, open_regex)) {
1480
- // Add content before the match
1481
- msg.content += std::string(it, match[0].first);
1482
-
1483
- auto block_start = match[1].str();
1484
- std::string block_end = block_start.empty() ? "" : "```";
1485
-
1486
- auto open_tag = match[2].str();
1487
- std::string close_tag;
1488
-
1489
- if (match[3].matched) {
1490
- close_tag = open_tag.empty() ? "" : "</" + open_tag.substr(1);
1491
- auto json_it = match[3].first;
1492
- json tool_call;
1493
- if (parse_json(json_it, end, tool_call) && tool_call.contains("name") && tool_call.contains("arguments")) {
1631
+ "(\\s*\\{\\s*\"name\")" // match 3 (named tool call)
1632
+ ")"
1633
+ "|<function=([^>]+)>" // match 4 (function name)
1634
+ "|<function name=\"([^\"]+)\">" // match 5 (function name again)
1635
+ );
1636
+
1637
+ if (auto res = builder.try_find_regex(open_regex)) {
1638
+ const auto & block_start = res->groups[1];
1639
+ std::string block_end = block_start.empty() ? "" : "```";
1640
+
1641
+ const auto & open_tag = res->groups[2];
1642
+ std::string close_tag;
1643
+
1644
+ if (!res->groups[3].empty()) {
1645
+ builder.move_to(res->groups[3].begin);
1646
+ close_tag = open_tag.empty() ? "" : "</" + builder.str(open_tag).substr(1);
1647
+
1648
+ if (auto tool_call = builder.try_consume_json_with_dumped_args({{"arguments"}})) {
1649
+ if (!builder.add_tool_call(tool_call->value) || tool_call->is_partial) {
1650
+ throw common_chat_msg_partial_exception("incomplete tool call");
1651
+ }
1652
+ builder.consume_spaces();
1653
+ builder.consume_literal(close_tag);
1654
+ builder.consume_spaces();
1655
+ if (!block_end.empty()) {
1656
+ builder.consume_literal(block_end);
1657
+ builder.consume_spaces();
1658
+ }
1659
+ builder.add_content(builder.consume_rest());
1660
+ } else {
1661
+ throw common_chat_msg_partial_exception("failed to parse tool call");
1662
+ }
1663
+ } else {
1664
+ auto function_name = builder.str(res->groups[4]);
1665
+ if (function_name.empty()) {
1666
+ function_name = builder.str(res->groups[5]);
1667
+ }
1668
+ GGML_ASSERT(!function_name.empty());
1494
1669
 
1495
- msg.tool_calls.emplace_back(process_tool_call(tool_call));
1496
- it = json_it; // Move iterator past parsed JSON
1670
+ close_tag = "</function>";
1497
1671
 
1498
- // Handle close tags
1499
- consume_spaces(it, end);
1500
- if (!close_tag.empty() && !parse_literal(it, end, close_tag)) {
1501
- throw std::runtime_error("Failed to parse closing tag");
1502
- }
1503
- consume_spaces(it, end);
1504
- if (!block_end.empty() && !parse_literal(it, end, block_end)) {
1505
- throw std::runtime_error("Failed to parse block end");
1506
- }
1507
- consume_spaces(it, end);
1508
- } else {
1509
- // Not a valid tool call, treat as content
1510
- msg.content += std::string(match[0].first, match[0].second);
1511
- it = match[0].second;
1512
- }
1513
- } else {
1514
- auto function_name = match[4].str();
1515
- if (function_name.empty()) {
1516
- function_name = match[5].str();
1517
- }
1518
- GGML_ASSERT(!function_name.empty());
1519
-
1520
- close_tag = "</function>";
1521
- // Start parsing from after the opening tags
1522
- auto json_it = match[6].first;
1523
- json arguments;
1524
- if (parse_json(json_it, end, arguments)) {
1525
- msg.tool_calls.emplace_back(process_tool_call({
1526
- {"name", function_name},
1527
- {"arguments", arguments},
1528
- }));
1529
- it = json_it; // Move iterator past parsed JSON
1530
-
1531
- // Handle close tags
1532
- consume_spaces(it, end);
1533
- if (!close_tag.empty() && !parse_literal(it, end, close_tag)) {
1534
- throw std::runtime_error("Failed to parse closing tag");
1535
- }
1536
- consume_spaces(it, end);
1537
- if (!block_end.empty() && !parse_literal(it, end, block_end)) {
1538
- throw std::runtime_error("Failed to parse block end");
1539
- }
1540
- consume_spaces(it, end);
1541
- } else {
1542
- // Not a valid tool call, treat as content
1543
- msg.content += std::string(match[0].first, match[0].second);
1544
- it = match[0].second;
1545
- }
1546
- }
1547
- } else {
1548
- // Add remaining content
1549
- msg.content += std::string(it, end);
1550
- break;
1672
+ if (auto arguments = builder.try_consume_json_with_dumped_args({{}})) {
1673
+ if (!builder.add_tool_call(function_name, "", arguments->value) || arguments->is_partial) {
1674
+ throw common_chat_msg_partial_exception("incomplete tool call");
1675
+ }
1676
+ builder.consume_spaces();
1677
+ builder.consume_literal(close_tag);
1678
+ builder.consume_spaces();
1679
+ if (!block_end.empty()) {
1680
+ builder.consume_literal(block_end);
1681
+ builder.consume_spaces();
1551
1682
  }
1552
1683
  }
1553
- return msg;
1554
- } catch (const std::exception & e) {
1555
- LOG_ERR("Failed to parse hermes 2 pro input: %s\n", e.what());
1556
- common_chat_msg msg;
1557
- msg.role = "assistant";
1558
- msg.content = input;
1559
- return msg;
1684
+ builder.add_content(builder.consume_rest());
1560
1685
  }
1561
- });
1686
+ } else {
1687
+ builder.add_content(builder.consume_rest());
1688
+ }
1562
1689
  }
1563
1690
 
1564
1691
  static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) {
@@ -1590,9 +1717,10 @@ static common_chat_params common_chat_templates_apply_jinja(
1590
1717
  const auto & caps = tmpl.original_caps();
1591
1718
  params.messages = common_chat_msgs_to_json_oaicompat<json>(inputs.messages, /* concat_text= */ !tmpl.original_caps().requires_typed_content);
1592
1719
  params.add_generation_prompt = inputs.add_generation_prompt;
1593
- params.extract_reasoning = inputs.extract_reasoning;
1594
1720
  params.tool_choice = inputs.tool_choice;
1721
+ params.enable_thinking = inputs.enable_thinking;
1595
1722
  params.grammar = inputs.grammar;
1723
+ params.now = inputs.now;
1596
1724
  if (!inputs.json_schema.empty()) {
1597
1725
  params.json_schema = json::parse(inputs.json_schema);
1598
1726
  }
@@ -1624,7 +1752,7 @@ static common_chat_params common_chat_templates_apply_jinja(
1624
1752
  }
1625
1753
 
1626
1754
  // Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
1627
- if (src.find("<tool_call>") != std::string::npos && params.json_schema.is_null() && params.tools.is_array() && params.json_schema.is_null()) {
1755
+ if (src.find("<tool_call>") != std::string::npos && params.json_schema.is_null()) {
1628
1756
  return common_chat_params_init_hermes_2_pro(tmpl, params);
1629
1757
  }
1630
1758
 
@@ -1644,21 +1772,21 @@ static common_chat_params common_chat_templates_apply_jinja(
1644
1772
  return common_chat_params_init_firefunction_v2(tmpl, params);
1645
1773
  }
1646
1774
 
1647
- // Plain handler (no tools)
1648
- if (params.tools.is_null() || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
1649
- return common_chat_params_init_without_tools(tmpl, params);
1650
- }
1651
-
1652
1775
  // Functionary v3.1 (w/ tools)
1653
1776
  if (src.find("<|start_header_id|>") != std::string::npos
1654
1777
  && src.find("<function=") != std::string::npos) {
1655
1778
  return common_chat_params_init_functionary_v3_1_llama_3_1(tmpl, params);
1656
1779
  }
1657
1780
 
1658
- // Llama 3.1, 3.2, 3.3 (w/ tools)
1781
+ // Llama 3.1, 3.2, 3.3 (also requires date_string so using it even w/o tools)
1659
1782
  if (src.find("<|start_header_id|>ipython<|end_header_id|>") != std::string::npos) {
1660
1783
  auto allow_python_tag_builtin_tools = src.find("<|python_tag|>") != std::string::npos;
1661
- return common_chat_params_init_llama_3_1_tool_calls(tmpl, params, allow_python_tag_builtin_tools);
1784
+ return common_chat_params_init_llama_3_x(tmpl, params, allow_python_tag_builtin_tools);
1785
+ }
1786
+
1787
+ // Plain handler (no tools)
1788
+ if (params.tools.is_null() || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
1789
+ return common_chat_params_init_without_tools(tmpl, params);
1662
1790
  }
1663
1791
 
1664
1792
  // Mistral Nemo (w/ tools)
@@ -1738,44 +1866,64 @@ common_chat_params common_chat_templates_apply(
1738
1866
  : common_chat_templates_apply_legacy(tmpls, inputs);
1739
1867
  }
1740
1868
 
1741
- static common_chat_msg common_chat_parse_content_only(const std::string & input) {
1742
- common_chat_msg msg;
1743
- msg.role = "assistant";
1744
- msg.content = input;
1745
- return msg;
1869
+ static void common_chat_parse_content_only(common_chat_msg_parser & builder) {
1870
+ builder.add_content(builder.consume_rest());
1746
1871
  }
1747
1872
 
1748
- common_chat_msg common_chat_parse(const std::string & input, common_chat_format format) {
1749
- switch (format) {
1873
+ static void common_chat_parse(common_chat_msg_parser & builder) {
1874
+ LOG_DBG("Parsing input with format %s: %s\n", common_chat_format_name(builder.syntax().format), builder.input().c_str());
1875
+
1876
+ switch (builder.syntax().format) {
1750
1877
  case COMMON_CHAT_FORMAT_CONTENT_ONLY:
1751
- return common_chat_parse_content_only(input);
1878
+ common_chat_parse_content_only(builder);
1879
+ break;
1752
1880
  case COMMON_CHAT_FORMAT_GENERIC:
1753
- return common_chat_parse_generic(input);
1881
+ common_chat_parse_generic(builder);
1882
+ break;
1754
1883
  case COMMON_CHAT_FORMAT_MISTRAL_NEMO:
1755
- return common_chat_parse_mistral_nemo(input);
1884
+ common_chat_parse_mistral_nemo(builder);
1885
+ break;
1756
1886
  case COMMON_CHAT_FORMAT_LLAMA_3_X:
1757
- return common_chat_parse_llama_3_1(input);
1887
+ common_chat_parse_llama_3_1(builder);
1888
+ break;
1758
1889
  case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS:
1759
- return common_chat_parse_llama_3_1(input, /* with_builtin_tools= */ true);
1890
+ common_chat_parse_llama_3_1(builder, /* with_builtin_tools= */ true);
1891
+ break;
1760
1892
  case COMMON_CHAT_FORMAT_DEEPSEEK_R1:
1761
- return common_chat_parse_deepseek_r1(input, /* extract_reasoning= */ false);
1762
- case COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING:
1763
- return common_chat_parse_deepseek_r1(input, /* extract_reasoning= */ true);
1893
+ common_chat_parse_deepseek_r1(builder);
1894
+ break;
1764
1895
  case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2:
1765
- return common_chat_parse_functionary_v3_2(input);
1896
+ common_chat_parse_functionary_v3_2(builder);
1897
+ break;
1766
1898
  case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1:
1767
- return common_chat_parse_functionary_v3_1_llama_3_1(input);
1899
+ common_chat_parse_functionary_v3_1_llama_3_1(builder);
1900
+ break;
1768
1901
  case COMMON_CHAT_FORMAT_HERMES_2_PRO:
1769
- return common_chat_parse_hermes_2_pro(input, /* extract_reasoning= */ false);
1770
- case COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING:
1771
- return common_chat_parse_hermes_2_pro(input, /* extract_reasoning= */ true);
1902
+ common_chat_parse_hermes_2_pro(builder);
1903
+ break;
1772
1904
  case COMMON_CHAT_FORMAT_FIREFUNCTION_V2:
1773
- return common_chat_parse_firefunction_v2(input);
1905
+ common_chat_parse_firefunction_v2(builder);
1906
+ break;
1774
1907
  case COMMON_CHAT_FORMAT_COMMAND_R7B:
1775
- return common_chat_parse_command_r7b(input, /* extract_reasoning= */ false);
1776
- case COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING:
1777
- return common_chat_parse_command_r7b(input, /* extract_reasoning= */ true);
1908
+ common_chat_parse_command_r7b(builder);
1909
+ break;
1778
1910
  default:
1779
- throw std::runtime_error("Unsupported format: " + common_chat_format_name(format));
1911
+ throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
1780
1912
  }
1913
+ builder.finish();
1914
+ }
1915
+
1916
+ common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax) {
1917
+ common_chat_msg_parser builder(input, is_partial, syntax);
1918
+ try {
1919
+ common_chat_parse(builder);
1920
+ } catch (const common_chat_msg_partial_exception & ex) {
1921
+ LOG_DBG("Partial parse: %s\n", ex.what());
1922
+ if (!is_partial) {
1923
+ throw std::runtime_error(ex.what());
1924
+ }
1925
+ }
1926
+ auto msg = builder.result();
1927
+ LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat<json>({msg}).at(0).dump().c_str());
1928
+ return msg;
1781
1929
  }