cui-llama.rn 1.4.4 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (216) hide show
  1. package/android/src/main/CMakeLists.txt +9 -2
  2. package/android/src/main/jni.cpp +54 -34
  3. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  4. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  9. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  10. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  11. package/cpp/binary-ops.cpp +158 -0
  12. package/cpp/binary-ops.h +16 -0
  13. package/cpp/chat.cpp +1769 -1085
  14. package/cpp/chat.h +143 -0
  15. package/cpp/common.cpp +1562 -1996
  16. package/cpp/common.h +677 -744
  17. package/cpp/cpu-common.h +72 -0
  18. package/cpp/ggml-alloc.c +1039 -1030
  19. package/cpp/ggml-alloc.h +1 -1
  20. package/cpp/ggml-backend-impl.h +255 -255
  21. package/cpp/ggml-backend-reg.cpp +586 -582
  22. package/cpp/ggml-backend.cpp +2004 -2002
  23. package/cpp/ggml-backend.h +354 -354
  24. package/cpp/ggml-common.h +1857 -1851
  25. package/cpp/ggml-cpp.h +39 -39
  26. package/cpp/ggml-cpu-aarch64.cpp +5725 -4247
  27. package/cpp/ggml-cpu-aarch64.h +8 -8
  28. package/cpp/ggml-cpu-impl.h +512 -380
  29. package/cpp/ggml-cpu-quants.c +13026 -11517
  30. package/cpp/ggml-cpu-traits.cpp +36 -36
  31. package/cpp/ggml-cpu-traits.h +38 -38
  32. package/cpp/ggml-cpu.c +3438 -14485
  33. package/cpp/ggml-cpu.cpp +655 -633
  34. package/cpp/ggml-cpu.h +138 -135
  35. package/cpp/ggml-impl.h +594 -567
  36. package/cpp/ggml-metal-impl.h +312 -3
  37. package/cpp/ggml-metal.h +66 -66
  38. package/cpp/ggml-metal.m +5360 -5002
  39. package/cpp/ggml-opt.cpp +854 -854
  40. package/cpp/ggml-opt.h +216 -216
  41. package/cpp/ggml-quants.c +5238 -5238
  42. package/cpp/ggml-threading.h +14 -14
  43. package/cpp/ggml.c +6618 -6524
  44. package/cpp/ggml.h +2222 -2194
  45. package/cpp/gguf.cpp +1330 -1329
  46. package/cpp/gguf.h +202 -202
  47. package/cpp/json-schema-to-grammar.cpp +1024 -1025
  48. package/cpp/json-schema-to-grammar.h +21 -22
  49. package/cpp/json.hpp +24766 -24766
  50. package/cpp/llama-adapter.cpp +382 -347
  51. package/cpp/llama-adapter.h +76 -74
  52. package/cpp/llama-arch.cpp +1714 -1492
  53. package/cpp/llama-arch.h +428 -402
  54. package/cpp/llama-batch.cpp +368 -368
  55. package/cpp/llama-batch.h +88 -88
  56. package/cpp/llama-chat.cpp +640 -587
  57. package/cpp/llama-chat.h +56 -53
  58. package/cpp/llama-context.cpp +2831 -1775
  59. package/cpp/llama-context.h +265 -128
  60. package/cpp/llama-cparams.cpp +1 -1
  61. package/cpp/llama-cparams.h +38 -37
  62. package/cpp/llama-cpp.h +30 -30
  63. package/cpp/llama-grammar.cpp +1219 -1219
  64. package/cpp/llama-grammar.h +173 -164
  65. package/cpp/llama-graph.cpp +1695 -0
  66. package/cpp/llama-graph.h +592 -0
  67. package/cpp/llama-hparams.cpp +79 -71
  68. package/cpp/llama-hparams.h +156 -139
  69. package/cpp/llama-impl.cpp +167 -167
  70. package/cpp/llama-impl.h +61 -61
  71. package/cpp/llama-io.cpp +15 -0
  72. package/cpp/llama-io.h +35 -0
  73. package/cpp/llama-kv-cache.cpp +1380 -718
  74. package/cpp/llama-kv-cache.h +213 -218
  75. package/cpp/llama-memory.cpp +1 -0
  76. package/cpp/llama-memory.h +21 -0
  77. package/cpp/llama-mmap.cpp +600 -590
  78. package/cpp/llama-mmap.h +68 -68
  79. package/cpp/llama-model-loader.cpp +1129 -1124
  80. package/cpp/llama-model-loader.h +169 -167
  81. package/cpp/llama-model.cpp +13080 -4023
  82. package/cpp/llama-model.h +409 -370
  83. package/cpp/llama-sampling.cpp +2563 -2525
  84. package/cpp/llama-sampling.h +32 -32
  85. package/cpp/llama-vocab.cpp +3295 -3252
  86. package/cpp/llama-vocab.h +125 -125
  87. package/cpp/llama.cpp +351 -10137
  88. package/cpp/llama.h +1434 -1340
  89. package/cpp/log.cpp +427 -423
  90. package/cpp/log.h +132 -132
  91. package/cpp/{chat-template.hpp → minja/chat-template.hpp} +537 -529
  92. package/cpp/{minja.hpp → minja/minja.hpp} +2941 -2883
  93. package/cpp/ops.cpp +8723 -0
  94. package/cpp/ops.h +128 -0
  95. package/cpp/rn-llama.cpp +45 -71
  96. package/cpp/rn-llama.h +3 -3
  97. package/cpp/sampling.cpp +573 -532
  98. package/cpp/sgemm.cpp +3043 -2598
  99. package/cpp/sgemm.h +14 -14
  100. package/cpp/simd-mappings.h +888 -0
  101. package/cpp/speculative.cpp +278 -277
  102. package/cpp/speculative.h +28 -28
  103. package/cpp/unary-ops.cpp +186 -0
  104. package/cpp/unary-ops.h +28 -0
  105. package/cpp/vec.cpp +258 -0
  106. package/cpp/vec.h +802 -0
  107. package/ios/CMakeLists.txt +5 -2
  108. package/ios/RNLlama.mm +2 -2
  109. package/ios/RNLlamaContext.mm +40 -24
  110. package/package.json +1 -1
  111. package/src/NativeRNLlama.ts +6 -4
  112. package/src/index.ts +3 -1
  113. package/android/src/main/build-arm64/CMakeCache.txt +0 -429
  114. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCCompiler.cmake +0 -81
  115. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCXXCompiler.cmake +0 -101
  116. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_C.bin +0 -0
  117. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_CXX.bin +0 -0
  118. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeSystem.cmake +0 -15
  119. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.c +0 -904
  120. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.o +0 -0
  121. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.cpp +0 -919
  122. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.o +0 -0
  123. package/android/src/main/build-arm64/CMakeFiles/CMakeConfigureLog.yaml +0 -431
  124. package/android/src/main/build-arm64/CMakeFiles/CMakeDirectoryInformation.cmake +0 -16
  125. package/android/src/main/build-arm64/CMakeFiles/Makefile.cmake +0 -165
  126. package/android/src/main/build-arm64/CMakeFiles/Makefile2 +0 -297
  127. package/android/src/main/build-arm64/CMakeFiles/Progress/1 +0 -1
  128. package/android/src/main/build-arm64/CMakeFiles/Progress/2 +0 -1
  129. package/android/src/main/build-arm64/CMakeFiles/Progress/3 +0 -1
  130. package/android/src/main/build-arm64/CMakeFiles/Progress/4 +0 -1
  131. package/android/src/main/build-arm64/CMakeFiles/Progress/5 +0 -1
  132. package/android/src/main/build-arm64/CMakeFiles/Progress/6 +0 -1
  133. package/android/src/main/build-arm64/CMakeFiles/Progress/count.txt +0 -1
  134. package/android/src/main/build-arm64/CMakeFiles/TargetDirectories.txt +0 -8
  135. package/android/src/main/build-arm64/CMakeFiles/cmake.check_cache +0 -1
  136. package/android/src/main/build-arm64/CMakeFiles/progress.marks +0 -1
  137. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o +0 -0
  138. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o.d +0 -58
  139. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o +0 -0
  140. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o.d +0 -756
  141. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o +0 -0
  142. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o.d +0 -709
  143. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o +0 -0
  144. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o.d +0 -714
  145. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o +0 -0
  146. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o.d +0 -62
  147. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o +0 -0
  148. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o.d +0 -708
  149. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o +0 -0
  150. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o.d +0 -113
  151. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o +0 -0
  152. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o.d +0 -713
  153. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o +0 -0
  154. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o.d +0 -763
  155. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o +0 -0
  156. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o.d +0 -61
  157. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o +0 -0
  158. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o.d +0 -707
  159. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o +0 -0
  160. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o.d +0 -104
  161. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o +0 -0
  162. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o.d +0 -714
  163. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o +0 -0
  164. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o.d +0 -723
  165. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/DependInfo.cmake +0 -62
  166. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/build.make +0 -722
  167. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/cmake_clean.cmake +0 -89
  168. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.make +0 -2
  169. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.ts +0 -2
  170. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/depend.make +0 -2
  171. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/flags.make +0 -17
  172. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/progress.make +0 -41
  173. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/DependInfo.cmake +0 -62
  174. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/build.make +0 -722
  175. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/cmake_clean.cmake +0 -89
  176. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.make +0 -2
  177. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.ts +0 -2
  178. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/depend.make +0 -2
  179. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/flags.make +0 -17
  180. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/progress.make +0 -41
  181. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/DependInfo.cmake +0 -62
  182. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/build.make +0 -722
  183. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/cmake_clean.cmake +0 -89
  184. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.make +0 -2
  185. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.ts +0 -2
  186. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/depend.make +0 -2
  187. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/flags.make +0 -17
  188. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/progress.make +0 -41
  189. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/DependInfo.cmake +0 -62
  190. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/build.make +0 -722
  191. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/cmake_clean.cmake +0 -89
  192. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.make +0 -2
  193. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.ts +0 -2
  194. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/depend.make +0 -2
  195. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/flags.make +0 -17
  196. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/progress.make +0 -41
  197. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/DependInfo.cmake +0 -62
  198. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/build.make +0 -722
  199. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/cmake_clean.cmake +0 -89
  200. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.make +0 -2
  201. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.ts +0 -2
  202. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/depend.make +0 -2
  203. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/flags.make +0 -17
  204. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/progress.make +0 -41
  205. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/DependInfo.cmake +0 -62
  206. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/build.make +0 -722
  207. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/cmake_clean.cmake +0 -89
  208. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.make +0 -2
  209. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.ts +0 -2
  210. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/depend.make +0 -2
  211. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/flags.make +0 -17
  212. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/progress.make +0 -41
  213. package/android/src/main/build-arm64/Makefile +0 -1862
  214. package/android/src/main/build-arm64/cmake_install.cmake +0 -66
  215. package/cpp/chat.hpp +0 -55
  216. package/cpp/rn-llama.hpp +0 -913
@@ -1,529 +1,537 @@
1
- /*
2
- Copyright 2024 Google LLC
3
-
4
- Use of this source code is governed by an MIT-style
5
- license that can be found in the LICENSE file or at
6
- https://opensource.org/licenses/MIT.
7
- */
8
- // SPDX-License-Identifier: MIT
9
- #pragma once
10
-
11
- #include "minja.hpp"
12
- #include "json.hpp"
13
- #include <string>
14
- #include <vector>
15
-
16
- using json = nlohmann::ordered_json;
17
-
18
- namespace minja {
19
-
20
- struct chat_template_caps {
21
- bool supports_tools = false;
22
- bool supports_tool_calls = false;
23
- bool supports_tool_responses = false;
24
- bool supports_system_role = false;
25
- bool supports_parallel_tool_calls = false;
26
- bool supports_tool_call_id = false;
27
- // meta-llama/Llama-3.1-8B-Instruct expects arguments to be an object.
28
- // Most other templates (and OpenAI's API) expect the arguments object to be stringified.
29
- bool requires_object_arguments = false;
30
- // CohereForAI/c4ai-command-r-plus simple variant
31
- bool requires_non_null_content = false;
32
- // MiniMaxAI/MiniMax-Text-01 special
33
- bool requires_typed_content = false;
34
- };
35
-
36
- struct chat_template_inputs {
37
- nlohmann::ordered_json messages;
38
- nlohmann::ordered_json tools;
39
- bool add_generation_prompt = true;
40
- nlohmann::ordered_json extra_context;
41
- std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
42
- };
43
-
44
- struct chat_template_options {
45
- bool apply_polyfills = true;
46
- bool use_bos_token = true;
47
- bool use_eos_token = true;
48
- bool define_strftime_now = true;
49
-
50
- bool polyfill_tools = true;
51
- bool polyfill_tool_call_examples = true;
52
- bool polyfill_tool_calls = true;
53
- bool polyfill_tool_responses = true;
54
- bool polyfill_system_role = true;
55
- bool polyfill_object_arguments = true;
56
- bool polyfill_typed_content = true;
57
- };
58
-
59
- class chat_template {
60
-
61
- private:
62
- chat_template_caps caps_;
63
- std::string source_;
64
- std::string bos_token_;
65
- std::string eos_token_;
66
- std::shared_ptr<minja::TemplateNode> template_root_;
67
- std::string tool_call_example_;
68
-
69
- std::string try_raw_render(
70
- const nlohmann::ordered_json & messages,
71
- const nlohmann::ordered_json & tools,
72
- bool add_generation_prompt,
73
- const nlohmann::ordered_json & extra_context = nlohmann::ordered_json()) const
74
- {
75
- try {
76
- chat_template_inputs inputs;
77
- inputs.messages = messages;
78
- inputs.tools = tools;
79
- inputs.add_generation_prompt = add_generation_prompt;
80
- inputs.extra_context = extra_context;
81
- // Use fixed date for tests
82
- inputs.now = std::chrono::system_clock::from_time_t(0);
83
-
84
- chat_template_options opts;
85
- opts.apply_polyfills = false;
86
-
87
- auto prompt = apply(inputs, opts);
88
- // fprintf(stderr, "try_raw_render: %s\n", prompt.c_str());
89
- return prompt;
90
- } catch (const std::exception & e) {
91
- // fprintf(stderr, "try_raw_render error: %s\n", e.what());
92
- return "";
93
- }
94
- }
95
-
96
- public:
97
-
98
- chat_template(const std::string & source, const std::string & bos_token, const std::string & eos_token)
99
- : source_(source), bos_token_(bos_token), eos_token_(eos_token)
100
- {
101
- template_root_ = minja::Parser::parse(source_, {
102
- /* .trim_blocks = */ true,
103
- /* .lstrip_blocks = */ true,
104
- /* .keep_trailing_newline = */ false,
105
- });
106
-
107
- auto contains = [](const std::string & haystack, const std::string & needle) {
108
- return haystack.find(needle) != std::string::npos;
109
- };
110
-
111
- const std::string user_needle = "<User Needle>";
112
- const std::string sys_needle = "<System Needle>";
113
- const json dummy_str_user_msg = {{"role", "user"}, {"content", user_needle}};
114
- const json dummy_typed_user_msg = {{"role", "user"}, {"content", json::array({{{"type", "text"}, {"text", user_needle}}})}};
115
-
116
- caps_.requires_typed_content =
117
- !contains(try_raw_render(json::array({dummy_str_user_msg}), {}, false), user_needle)
118
- && contains(try_raw_render(json::array({dummy_typed_user_msg}), {}, false), user_needle);
119
-
120
- const auto dummy_user_msg = caps_.requires_typed_content
121
- ? dummy_typed_user_msg
122
- : dummy_str_user_msg;
123
- const json needle_system_msg = {
124
- {"role", "system"},
125
- {"content", caps_.requires_typed_content ? json::array({{{"type", "text"}, {"text", sys_needle}}}) : json(sys_needle)},
126
- };
127
-
128
- caps_.supports_system_role = contains(try_raw_render({needle_system_msg, dummy_user_msg,}, {}, false), sys_needle);
129
-
130
- auto out = try_raw_render(json::array({
131
- dummy_user_msg
132
- }), json::array({
133
- {
134
- {"name", "some_tool"},
135
- {"type", "function"},
136
- {"function", {
137
- {"name", "some_tool"},
138
- {"description", "Some tool."},
139
- {"parameters", {
140
- {"type", "object"},
141
- {"properties", {
142
- {"arg", {
143
- {"type", "string"},
144
- {"description", "Some argument."},
145
- }},
146
- }},
147
- {"required", json::array({ "arg" })},
148
- }},
149
- }},
150
- },
151
- }), false);
152
- caps_.supports_tools = contains(out, "some_tool");
153
-
154
- auto make_tool_calls_msg = [&](const json & tool_calls) {
155
- return json {
156
- {"role", "assistant"},
157
- {"content", nullptr},
158
- {"tool_calls", tool_calls},
159
- };
160
- };
161
- auto make_tool_call = [](const std::string & tool_name, const json & arguments) {
162
- return json {
163
- {"id", "call_1___"},
164
- {"type", "function"},
165
- {"function", {
166
- {"arguments", arguments},
167
- {"name", tool_name},
168
- }},
169
- };
170
- };
171
- const json dummy_args_obj {{"argument_needle", "print('Hello, World!')"}};
172
-
173
- // Note: the arguments are rendered in both cases, but may be double-escaped, which we don't want.
174
- out = try_raw_render(json::array({
175
- dummy_user_msg,
176
- make_tool_calls_msg(json::array({make_tool_call("ipython", dummy_args_obj.dump())})),
177
- }), {}, false);
178
- auto tool_call_renders_str_arguments = contains(out, "\"argument_needle\":") || contains(out, "'argument_needle':");
179
- out = try_raw_render(json::array({
180
- dummy_user_msg,
181
- make_tool_calls_msg(json::array({make_tool_call("ipython", dummy_args_obj)})),
182
- }), {}, false);
183
- auto tool_call_renders_obj_arguments = contains(out, "\"argument_needle\":") || contains(out, "'argument_needle':");
184
-
185
- caps_.supports_tool_calls = tool_call_renders_str_arguments || tool_call_renders_obj_arguments;
186
- caps_.requires_object_arguments = !tool_call_renders_str_arguments && tool_call_renders_obj_arguments;
187
- auto out_empty = try_raw_render(json::array({dummy_user_msg, {{"role", "assistant"}, {"content", ""}}}), {}, false);
188
- auto out_null = try_raw_render(json::array({dummy_user_msg, {{"role", "assistant"}, {"content", nullptr}}}), {}, false);
189
- caps_.requires_non_null_content = contains(out_empty, user_needle) && !contains(out_null, user_needle);
190
-
191
- if (caps_.supports_tool_calls) {
192
- auto dummy_args = caps_.requires_object_arguments ? dummy_args_obj : json(dummy_args_obj.dump());
193
- auto tc1 = make_tool_call("test_tool1", dummy_args);
194
- auto tc2 = make_tool_call("test_tool2", dummy_args);
195
- auto out = try_raw_render(json::array({
196
- dummy_user_msg,
197
- make_tool_calls_msg(json::array({tc1, tc2})),
198
- }), {}, false);
199
- caps_.supports_parallel_tool_calls = contains(out, "test_tool1") && contains(out, "test_tool2");
200
-
201
- out = try_raw_render(json::array({
202
- dummy_user_msg,
203
- make_tool_calls_msg(json::array({tc1})),
204
- {
205
- {"role", "tool"},
206
- {"name", "test_tool1"},
207
- {"content", "Some response!"},
208
- {"tool_call_id", "call_911_"},
209
- }
210
- }), {}, false);
211
- caps_.supports_tool_responses = contains(out, "Some response!");
212
- caps_.supports_tool_call_id = contains(out, "call_911_");
213
- }
214
-
215
- try {
216
- if (!caps_.supports_tools) {
217
- const json user_msg {
218
- {"role", "user"},
219
- {"content", "Hey"},
220
- };
221
- const json args {
222
- {"arg1", "some_value"},
223
- };
224
- const json tool_call_msg {
225
- {"role", "assistant"},
226
- {"content", nullptr},
227
- {"tool_calls", json::array({
228
- {
229
- // TODO: detect if requires numerical id or fixed length == 6 like Nemo
230
- {"id", "call_1___"},
231
- {"type", "function"},
232
- {"function", {
233
- {"name", "tool_name"},
234
- {"arguments", (caps_.requires_object_arguments ? args : json(minja::Value(args).dump(-1, /* to_json= */ true)))},
235
- }},
236
- },
237
- })},
238
- };
239
- std::string prefix, full;
240
- {
241
- chat_template_inputs inputs;
242
- inputs.messages = json::array({user_msg});
243
- inputs.add_generation_prompt = true;
244
- prefix = apply(inputs);
245
- }
246
- {
247
- chat_template_inputs inputs;
248
- inputs.messages = json::array({user_msg, tool_call_msg});
249
- inputs.add_generation_prompt = false;
250
- full = apply(inputs);
251
- }
252
- auto eos_pos_last = full.rfind(eos_token_);
253
- if (eos_pos_last == prefix.size() - eos_token_.size() ||
254
- (full[full.size() - 1] == '\n' && (eos_pos_last == full.size() - eos_token_.size() - 1))) {
255
- full = full.substr(0, eos_pos_last);
256
- }
257
- size_t common_prefix_length = 0;
258
- for (size_t i = 0; i < prefix.size() && i < full.size(); ++i) {
259
- if (prefix[i] != full[i]) {
260
- break;
261
- }
262
- if (prefix[i] == '<') {
263
- // DeepSeek R1's template (as of 20250209) adds a trailing <think> if add_generation_prompt,
264
- // but it removes thinking tags for past messages.
265
- // The prefix and full strings diverge at <think> vs. <|tool▁calls▁begin|>, we avoid consuming the leading <.
266
- continue;
267
- }
268
- common_prefix_length = i + 1;
269
- }
270
- auto example = full.substr(common_prefix_length);
271
- if (example.find("tool_name") == std::string::npos && example.find("some_value") == std::string::npos) {
272
- fprintf(stderr, "Failed to infer a tool call example (possible template bug)\n");
273
- } else {
274
- tool_call_example_ = example;
275
- }
276
- }
277
- } catch (const std::exception & e) {
278
- fprintf(stderr, "Failed to generate tool call example: %s\n", e.what());
279
- }
280
- }
281
-
282
- const std::string & source() const { return source_; }
283
- const std::string & bos_token() const { return bos_token_; }
284
- const std::string & eos_token() const { return eos_token_; }
285
- const chat_template_caps & original_caps() const { return caps_; }
286
-
287
- // Deprecated, please use the form with chat_template_inputs and chat_template_options
288
- std::string apply(
289
- const nlohmann::ordered_json & messages,
290
- const nlohmann::ordered_json & tools,
291
- bool add_generation_prompt,
292
- const nlohmann::ordered_json & extra_context = nlohmann::ordered_json(),
293
- bool apply_polyfills = true)
294
- {
295
- fprintf(stderr, "[%s] Deprecated!\n", __func__);
296
- chat_template_inputs inputs;
297
- inputs.messages = messages;
298
- inputs.tools = tools;
299
- inputs.add_generation_prompt = add_generation_prompt;
300
- inputs.extra_context = extra_context;
301
- inputs.now = std::chrono::system_clock::now();
302
-
303
- chat_template_options opts;
304
- opts.apply_polyfills = apply_polyfills;
305
-
306
- return apply(inputs, opts);
307
- }
308
-
309
- std::string apply(
310
- const chat_template_inputs & inputs,
311
- const chat_template_options & opts = chat_template_options()) const
312
- {
313
- json actual_messages;
314
-
315
- auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
316
- auto has_tool_calls = false;
317
- auto has_tool_responses = false;
318
- auto has_string_content = false;
319
- for (const auto & message : inputs.messages) {
320
- if (message.contains("tool_calls") && !message["tool_calls"].is_null()) {
321
- has_tool_calls = true;
322
- }
323
- if (message.contains("role") && message["role"] == "tool") {
324
- has_tool_responses = true;
325
- }
326
- if (message.contains("content") && message["content"].is_string()) {
327
- has_string_content = true;
328
- }
329
- }
330
-
331
- auto polyfill_system_role = opts.polyfill_system_role && !caps_.supports_system_role;
332
- auto polyfill_tools = opts.polyfill_tools && has_tools && !caps_.supports_tools;
333
- auto polyfill_tool_call_example = polyfill_tools && opts.polyfill_tool_call_examples;
334
- auto polyfill_tool_calls = opts.polyfill_tool_calls && has_tool_calls && !caps_.supports_tool_calls;
335
- auto polyfill_tool_responses = opts.polyfill_tool_responses && has_tool_responses && !caps_.supports_tool_responses;
336
- auto polyfill_object_arguments = opts.polyfill_object_arguments && has_tool_calls && caps_.requires_object_arguments;
337
- auto polyfill_typed_content = opts.polyfill_typed_content && has_string_content && caps_.requires_typed_content;
338
-
339
- auto needs_polyfills = opts.apply_polyfills && (false
340
- || polyfill_system_role
341
- || polyfill_tools
342
- || polyfill_tool_calls
343
- || polyfill_tool_responses
344
- || polyfill_object_arguments
345
- || polyfill_typed_content
346
- );
347
-
348
- if (needs_polyfills) {
349
- actual_messages = json::array();
350
-
351
- auto add_message = [&](const json & msg) {
352
- if (polyfill_typed_content && msg.contains("content") && !msg.at("content").is_null() && msg.at("content").is_string()) {
353
- actual_messages.push_back({
354
- {"role", msg.at("role")},
355
- {"content", {{
356
- {"type", "text"},
357
- {"text", msg.at("content")},
358
- }}},
359
- });
360
- } else {
361
- actual_messages.push_back(msg);
362
- }
363
- };
364
-
365
- std::string pending_system;
366
- auto flush_sys = [&]() {
367
- if (!pending_system.empty()) {
368
- add_message({
369
- {"role", "user"},
370
- {"content", pending_system},
371
- });
372
- pending_system.clear();
373
- }
374
- };
375
-
376
- json adjusted_messages;
377
- if (polyfill_tools) {
378
- adjusted_messages = add_system(inputs.messages,
379
- "You can call any of the following tools to satisfy the user's requests: " + minja::Value(inputs.tools).dump(2, /* to_json= */ true) +
380
- (!polyfill_tool_call_example || tool_call_example_.empty() ? "" : "\n\nExample tool call syntax:\n\n" + tool_call_example_ + "\n\n"));
381
- } else {
382
- adjusted_messages = inputs.messages;
383
- }
384
-
385
- for (const auto & message_ : adjusted_messages) {
386
- auto message = message_;
387
- if (!message.contains("role") || !message.contains("content")) {
388
- throw std::runtime_error("message must have 'role' and 'content' fields: " + message.dump());
389
- }
390
- std::string role = message.at("role");
391
-
392
- if (message.contains("tool_calls")) {
393
- if (polyfill_object_arguments || polyfill_tool_calls) {
394
- for (auto & tool_call : message.at("tool_calls")) {
395
- if (tool_call["type"] == "function") {
396
- auto & function = tool_call.at("function");
397
- auto & arguments = function.at("arguments");
398
- if (arguments.is_string()) {
399
- try {
400
- arguments = json::parse(arguments.get<std::string>());
401
- } catch (const std::exception & ecvt) {
402
- fprintf(stderr, "Failed to parse arguments: %s\n", ecvt.what());
403
- }
404
- }
405
- }
406
- }
407
- }
408
- if (polyfill_tool_calls) {
409
- auto content = message.at("content");
410
- auto tool_calls = json::array();
411
- for (const auto & tool_call : message.at("tool_calls")) {
412
- if (tool_call.at("type") != "function") {
413
- continue;
414
- }
415
- const auto & function = tool_call.at("function");
416
- auto tc = json {
417
- {"name", function.at("name")},
418
- {"arguments", function.at("arguments")},
419
- };
420
- if (tool_call.contains("id")) {
421
- tc["id"] = tool_call["id"];
422
- }
423
- tool_calls.push_back(tc);
424
- }
425
- auto obj = json {
426
- {"tool_calls", tool_calls},
427
- };
428
- if (!content.is_null() && content != "") {
429
- obj["content"] = content;
430
- }
431
- message["content"] = obj.dump(2);
432
- message.erase("tool_calls");
433
- }
434
- }
435
- if (polyfill_tool_responses && role == "tool") {
436
- message["role"] = "user";
437
- auto obj = json {
438
- {"tool_response", {
439
- {"content", message.at("content")},
440
- }},
441
- };
442
- if (message.contains("name")) {
443
- obj["tool_response"]["name"] = message.at("name");
444
- }
445
- if (message.contains("tool_call_id")) {
446
- obj["tool_response"]["tool_call_id"] = message.at("tool_call_id");
447
- }
448
- message["content"] = obj.dump(2);
449
- message.erase("name");
450
- }
451
-
452
- if (!message["content"].is_null() && polyfill_system_role) {
453
- std::string content = message.at("content");
454
- if (role == "system") {
455
- if (!pending_system.empty()) pending_system += "\n";
456
- pending_system += content;
457
- continue;
458
- } else {
459
- if (role == "user") {
460
- if (!pending_system.empty()) {
461
- message["content"] = pending_system + (content.empty() ? "" : "\n" + content);
462
- pending_system.clear();
463
- }
464
- } else {
465
- flush_sys();
466
- }
467
- }
468
- }
469
- add_message(message);
470
- }
471
- flush_sys();
472
- } else {
473
- actual_messages = inputs.messages;
474
- }
475
-
476
- auto context = minja::Context::make(json({
477
- {"messages", actual_messages},
478
- {"add_generation_prompt", inputs.add_generation_prompt},
479
- }));
480
- context->set("bos_token", opts.use_bos_token ? bos_token_ : "");
481
- context->set("eos_token", opts.use_eos_token ? eos_token_ : "");
482
- if (opts.define_strftime_now) {
483
- auto now = inputs.now;
484
- context->set("strftime_now", Value::callable([now](const std::shared_ptr<minja::Context> &, minja::ArgumentsValue & args) {
485
- args.expectArgs("strftime_now", {1, 1}, {0, 0});
486
- auto format = args.args[0].get<std::string>();
487
-
488
- auto time = std::chrono::system_clock::to_time_t(now);
489
- auto local_time = *std::localtime(&time);
490
- std::ostringstream ss;
491
- ss << std::put_time(&local_time, format.c_str());
492
- return ss.str();
493
- }));
494
- }
495
- if (!inputs.tools.is_null()) {
496
- context->set("tools", minja::Value(inputs.tools));
497
- }
498
- if (!inputs.extra_context.is_null()) {
499
- for (auto & kv : inputs.extra_context.items()) {
500
- context->set(kv.key(), minja::Value(kv.value()));
501
- }
502
- }
503
-
504
- auto ret = template_root_->render(context);
505
- // fprintf(stderr, "actual_messages: %s\n", actual_messages.dump(2).c_str());
506
- // fprintf(stderr, "apply: %s\n\n", ret.c_str());
507
- return ret;
508
- }
509
-
510
- static nlohmann::ordered_json add_system(const nlohmann::ordered_json & messages, const std::string & system_prompt) {
511
- json messages_with_system = messages;
512
-
513
- if (messages_with_system.size() > 0 && messages_with_system[0].at("role") == "system") {
514
- std::string existing_system = messages_with_system.at(0).at("content");
515
- messages_with_system[0] = json {
516
- {"role", "system"},
517
- {"content", existing_system + "\n\n" + system_prompt},
518
- };
519
- } else {
520
- messages_with_system.insert(messages_with_system.begin(), json {
521
- {"role", "system"},
522
- {"content", system_prompt},
523
- });
524
- }
525
- return messages_with_system;
526
- }
527
- };
528
-
529
- } // namespace minja
1
+ /*
2
+ Copyright 2024 Google LLC
3
+
4
+ Use of this source code is governed by an MIT-style
5
+ license that can be found in the LICENSE file or at
6
+ https://opensource.org/licenses/MIT.
7
+ */
8
+ // SPDX-License-Identifier: MIT
9
+ #pragma once
10
+
11
+ #include "minja.hpp"
12
+
13
+ #include <chrono>
14
+ #include <cstddef>
15
+ #include <cstdio>
16
+ #include <exception>
17
+ #include <iomanip>
18
+ #include <memory>
19
+ #include <sstream>
20
+ #include <string>
21
+ #include <vector>
22
+
23
+ #include <json.hpp>
24
+
25
+ using json = nlohmann::ordered_json;
26
+
27
+ namespace minja {
28
+
29
+ struct chat_template_caps {
30
+ bool supports_tools = false;
31
+ bool supports_tool_calls = false;
32
+ bool supports_tool_responses = false;
33
+ bool supports_system_role = false;
34
+ bool supports_parallel_tool_calls = false;
35
+ bool supports_tool_call_id = false;
36
+ // meta-llama/Llama-3.1-8B-Instruct expects arguments to be an object.
37
+ // Most other templates (and OpenAI's API) expect the arguments object to be stringified.
38
+ bool requires_object_arguments = false;
39
+ // CohereForAI/c4ai-command-r-plus simple variant
40
+ bool requires_non_null_content = false;
41
+ // MiniMaxAI/MiniMax-Text-01 special
42
+ bool requires_typed_content = false;
43
+ };
44
+
45
+ struct chat_template_inputs {
46
+ nlohmann::ordered_json messages;
47
+ nlohmann::ordered_json tools;
48
+ bool add_generation_prompt = true;
49
+ nlohmann::ordered_json extra_context;
50
+ std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
51
+ };
52
+
53
+ struct chat_template_options {
54
+ bool apply_polyfills = true;
55
+ bool use_bos_token = true;
56
+ bool use_eos_token = true;
57
+ bool define_strftime_now = true;
58
+
59
+ bool polyfill_tools = true;
60
+ bool polyfill_tool_call_examples = true;
61
+ bool polyfill_tool_calls = true;
62
+ bool polyfill_tool_responses = true;
63
+ bool polyfill_system_role = true;
64
+ bool polyfill_object_arguments = true;
65
+ bool polyfill_typed_content = true;
66
+ };
67
+
68
+ class chat_template {
69
+
70
+ private:
71
+ chat_template_caps caps_;
72
+ std::string source_;
73
+ std::string bos_token_;
74
+ std::string eos_token_;
75
+ std::shared_ptr<minja::TemplateNode> template_root_;
76
+ std::string tool_call_example_;
77
+
78
+ std::string try_raw_render(
79
+ const nlohmann::ordered_json & messages,
80
+ const nlohmann::ordered_json & tools,
81
+ bool add_generation_prompt,
82
+ const nlohmann::ordered_json & extra_context = nlohmann::ordered_json()) const
83
+ {
84
+ try {
85
+ chat_template_inputs inputs;
86
+ inputs.messages = messages;
87
+ inputs.tools = tools;
88
+ inputs.add_generation_prompt = add_generation_prompt;
89
+ inputs.extra_context = extra_context;
90
+ // Use fixed date for tests
91
+ inputs.now = std::chrono::system_clock::from_time_t(0);
92
+
93
+ chat_template_options opts;
94
+ opts.apply_polyfills = false;
95
+
96
+ auto prompt = apply(inputs, opts);
97
+ // fprintf(stderr, "try_raw_render: %s\n", prompt.c_str());
98
+ return prompt;
99
+ } catch (const std::exception & e) {
100
+ // fprintf(stderr, "try_raw_render error: %s\n", e.what());
101
+ return "";
102
+ }
103
+ }
104
+
105
+ public:
106
+
107
+ chat_template(const std::string & source, const std::string & bos_token, const std::string & eos_token)
108
+ : source_(source), bos_token_(bos_token), eos_token_(eos_token)
109
+ {
110
+ template_root_ = minja::Parser::parse(source_, {
111
+ /* .trim_blocks = */ true,
112
+ /* .lstrip_blocks = */ true,
113
+ /* .keep_trailing_newline = */ false,
114
+ });
115
+
116
+ auto contains = [](const std::string & haystack, const std::string & needle) {
117
+ return haystack.find(needle) != std::string::npos;
118
+ };
119
+
120
+ const std::string user_needle = "<User Needle>";
121
+ const std::string sys_needle = "<System Needle>";
122
+ const json dummy_str_user_msg = {{"role", "user"}, {"content", user_needle}};
123
+ const json dummy_typed_user_msg = {{"role", "user"}, {"content", json::array({{{"type", "text"}, {"text", user_needle}}})}};
124
+
125
+ caps_.requires_typed_content =
126
+ !contains(try_raw_render(json::array({dummy_str_user_msg}), {}, false), user_needle)
127
+ && contains(try_raw_render(json::array({dummy_typed_user_msg}), {}, false), user_needle);
128
+
129
+ const auto dummy_user_msg = caps_.requires_typed_content
130
+ ? dummy_typed_user_msg
131
+ : dummy_str_user_msg;
132
+ const json needle_system_msg = {
133
+ {"role", "system"},
134
+ {"content", caps_.requires_typed_content ? json::array({{{"type", "text"}, {"text", sys_needle}}}) : json(sys_needle)},
135
+ };
136
+
137
+ caps_.supports_system_role = contains(try_raw_render({needle_system_msg, dummy_user_msg,}, {}, false), sys_needle);
138
+
139
+ auto out = try_raw_render(json::array({
140
+ dummy_user_msg
141
+ }), json::array({
142
+ {
143
+ {"name", "some_tool"},
144
+ {"type", "function"},
145
+ {"function", {
146
+ {"name", "some_tool"},
147
+ {"description", "Some tool."},
148
+ {"parameters", {
149
+ {"type", "object"},
150
+ {"properties", {
151
+ {"arg", {
152
+ {"type", "string"},
153
+ {"description", "Some argument."},
154
+ }},
155
+ }},
156
+ {"required", json::array({ "arg" })},
157
+ }},
158
+ }},
159
+ },
160
+ }), false);
161
+ caps_.supports_tools = contains(out, "some_tool");
162
+
163
+ auto make_tool_calls_msg = [&](const json & tool_calls) {
164
+ return json {
165
+ {"role", "assistant"},
166
+ {"content", nullptr},
167
+ {"tool_calls", tool_calls},
168
+ };
169
+ };
170
+ auto make_tool_call = [](const std::string & tool_name, const json & arguments) {
171
+ return json {
172
+ {"id", "call_1___"},
173
+ {"type", "function"},
174
+ {"function", {
175
+ {"arguments", arguments},
176
+ {"name", tool_name},
177
+ }},
178
+ };
179
+ };
180
+ const json dummy_args_obj {{"argument_needle", "print('Hello, World!')"}};
181
+
182
+ // Note: the arguments are rendered in both cases, but may be double-escaped, which we don't want.
183
+ out = try_raw_render(json::array({
184
+ dummy_user_msg,
185
+ make_tool_calls_msg(json::array({make_tool_call("ipython", dummy_args_obj.dump())})),
186
+ }), {}, false);
187
+ auto tool_call_renders_str_arguments = contains(out, "\"argument_needle\":") || contains(out, "'argument_needle':");
188
+ out = try_raw_render(json::array({
189
+ dummy_user_msg,
190
+ make_tool_calls_msg(json::array({make_tool_call("ipython", dummy_args_obj)})),
191
+ }), {}, false);
192
+ auto tool_call_renders_obj_arguments = contains(out, "\"argument_needle\":") || contains(out, "'argument_needle':");
193
+
194
+ caps_.supports_tool_calls = tool_call_renders_str_arguments || tool_call_renders_obj_arguments;
195
+ caps_.requires_object_arguments = !tool_call_renders_str_arguments && tool_call_renders_obj_arguments;
196
+ auto out_empty = try_raw_render(json::array({dummy_user_msg, {{"role", "assistant"}, {"content", ""}}}), {}, false);
197
+ auto out_null = try_raw_render(json::array({dummy_user_msg, {{"role", "assistant"}, {"content", nullptr}}}), {}, false);
198
+ caps_.requires_non_null_content = contains(out_empty, user_needle) && !contains(out_null, user_needle);
199
+
200
+ if (caps_.supports_tool_calls) {
201
+ auto dummy_args = caps_.requires_object_arguments ? dummy_args_obj : json(dummy_args_obj.dump());
202
+ auto tc1 = make_tool_call("test_tool1", dummy_args);
203
+ auto tc2 = make_tool_call("test_tool2", dummy_args);
204
+ auto out = try_raw_render(json::array({
205
+ dummy_user_msg,
206
+ make_tool_calls_msg(json::array({tc1, tc2})),
207
+ }), {}, false);
208
+ caps_.supports_parallel_tool_calls = contains(out, "test_tool1") && contains(out, "test_tool2");
209
+
210
+ out = try_raw_render(json::array({
211
+ dummy_user_msg,
212
+ make_tool_calls_msg(json::array({tc1})),
213
+ {
214
+ {"role", "tool"},
215
+ {"name", "test_tool1"},
216
+ {"content", "Some response!"},
217
+ {"tool_call_id", "call_911_"},
218
+ }
219
+ }), {}, false);
220
+ caps_.supports_tool_responses = contains(out, "Some response!");
221
+ caps_.supports_tool_call_id = contains(out, "call_911_");
222
+ }
223
+
224
+ try {
225
+ if (!caps_.supports_tools) {
226
+ const json user_msg {
227
+ {"role", "user"},
228
+ {"content", "Hey"},
229
+ };
230
+ const json args {
231
+ {"arg1", "some_value"},
232
+ };
233
+ const json tool_call_msg {
234
+ {"role", "assistant"},
235
+ {"content", nullptr},
236
+ {"tool_calls", json::array({
237
+ {
238
+ // TODO: detect if requires numerical id or fixed length == 6 like Nemo
239
+ {"id", "call_1___"},
240
+ {"type", "function"},
241
+ {"function", {
242
+ {"name", "tool_name"},
243
+ {"arguments", (caps_.requires_object_arguments ? args : json(minja::Value(args).dump(-1, /* to_json= */ true)))},
244
+ }},
245
+ },
246
+ })},
247
+ };
248
+ std::string prefix, full;
249
+ {
250
+ chat_template_inputs inputs;
251
+ inputs.messages = json::array({user_msg});
252
+ inputs.add_generation_prompt = true;
253
+ prefix = apply(inputs);
254
+ }
255
+ {
256
+ chat_template_inputs inputs;
257
+ inputs.messages = json::array({user_msg, tool_call_msg});
258
+ inputs.add_generation_prompt = false;
259
+ full = apply(inputs);
260
+ }
261
+ auto eos_pos_last = full.rfind(eos_token_);
262
+ if (eos_pos_last == prefix.size() - eos_token_.size() ||
263
+ (full[full.size() - 1] == '\n' && (eos_pos_last == full.size() - eos_token_.size() - 1))) {
264
+ full = full.substr(0, eos_pos_last);
265
+ }
266
+ size_t common_prefix_length = 0;
267
+ for (size_t i = 0; i < prefix.size() && i < full.size(); ++i) {
268
+ if (prefix[i] != full[i]) {
269
+ break;
270
+ }
271
+ if (prefix[i] == '<') {
272
+ // DeepSeek R1's template (as of 20250209) adds a trailing <think> if add_generation_prompt,
273
+ // but it removes thinking tags for past messages.
274
+ // The prefix and full strings diverge at <think> vs. <|tool▁calls▁begin|>, we avoid consuming the leading <.
275
+ continue;
276
+ }
277
+ common_prefix_length = i + 1;
278
+ }
279
+ auto example = full.substr(common_prefix_length);
280
+ if (example.find("tool_name") == std::string::npos && example.find("some_value") == std::string::npos) {
281
+ fprintf(stderr, "Failed to infer a tool call example (possible template bug)\n");
282
+ } else {
283
+ tool_call_example_ = example;
284
+ }
285
+ }
286
+ } catch (const std::exception & e) {
287
+ fprintf(stderr, "Failed to generate tool call example: %s\n", e.what());
288
+ }
289
+ }
290
+
291
+ const std::string & source() const { return source_; }
292
+ const std::string & bos_token() const { return bos_token_; }
293
+ const std::string & eos_token() const { return eos_token_; }
294
+ const chat_template_caps & original_caps() const { return caps_; }
295
+
296
+ // Deprecated, please use the form with chat_template_inputs and chat_template_options
297
+ std::string apply(
298
+ const nlohmann::ordered_json & messages,
299
+ const nlohmann::ordered_json & tools,
300
+ bool add_generation_prompt,
301
+ const nlohmann::ordered_json & extra_context = nlohmann::ordered_json(),
302
+ bool apply_polyfills = true)
303
+ {
304
+ fprintf(stderr, "[%s] Deprecated!\n", __func__);
305
+ chat_template_inputs inputs;
306
+ inputs.messages = messages;
307
+ inputs.tools = tools;
308
+ inputs.add_generation_prompt = add_generation_prompt;
309
+ inputs.extra_context = extra_context;
310
+ inputs.now = std::chrono::system_clock::now();
311
+
312
+ chat_template_options opts;
313
+ opts.apply_polyfills = apply_polyfills;
314
+
315
+ return apply(inputs, opts);
316
+ }
317
+
318
+ std::string apply(
319
+ const chat_template_inputs & inputs,
320
+ const chat_template_options & opts = chat_template_options()) const
321
+ {
322
+ json actual_messages;
323
+
324
+ auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
325
+ auto has_tool_calls = false;
326
+ auto has_tool_responses = false;
327
+ auto has_string_content = false;
328
+ for (const auto & message : inputs.messages) {
329
+ if (message.contains("tool_calls") && !message["tool_calls"].is_null()) {
330
+ has_tool_calls = true;
331
+ }
332
+ if (message.contains("role") && message["role"] == "tool") {
333
+ has_tool_responses = true;
334
+ }
335
+ if (message.contains("content") && message["content"].is_string()) {
336
+ has_string_content = true;
337
+ }
338
+ }
339
+
340
+ auto polyfill_system_role = opts.polyfill_system_role && !caps_.supports_system_role;
341
+ auto polyfill_tools = opts.polyfill_tools && has_tools && !caps_.supports_tools;
342
+ auto polyfill_tool_call_example = polyfill_tools && opts.polyfill_tool_call_examples;
343
+ auto polyfill_tool_calls = opts.polyfill_tool_calls && has_tool_calls && !caps_.supports_tool_calls;
344
+ auto polyfill_tool_responses = opts.polyfill_tool_responses && has_tool_responses && !caps_.supports_tool_responses;
345
+ auto polyfill_object_arguments = opts.polyfill_object_arguments && has_tool_calls && caps_.requires_object_arguments;
346
+ auto polyfill_typed_content = opts.polyfill_typed_content && has_string_content && caps_.requires_typed_content;
347
+
348
+ auto needs_polyfills = opts.apply_polyfills && (false
349
+ || polyfill_system_role
350
+ || polyfill_tools
351
+ || polyfill_tool_calls
352
+ || polyfill_tool_responses
353
+ || polyfill_object_arguments
354
+ || polyfill_typed_content
355
+ );
356
+
357
+ if (needs_polyfills) {
358
+ actual_messages = json::array();
359
+
360
+ auto add_message = [&](const json & msg) {
361
+ if (polyfill_typed_content && msg.contains("content") && !msg.at("content").is_null() && msg.at("content").is_string()) {
362
+ actual_messages.push_back({
363
+ {"role", msg.at("role")},
364
+ {"content", {{
365
+ {"type", "text"},
366
+ {"text", msg.at("content")},
367
+ }}},
368
+ });
369
+ } else {
370
+ actual_messages.push_back(msg);
371
+ }
372
+ };
373
+
374
+ std::string pending_system;
375
+ auto flush_sys = [&]() {
376
+ if (!pending_system.empty()) {
377
+ add_message({
378
+ {"role", "user"},
379
+ {"content", pending_system},
380
+ });
381
+ pending_system.clear();
382
+ }
383
+ };
384
+
385
+ json adjusted_messages;
386
+ if (polyfill_tools) {
387
+ adjusted_messages = add_system(inputs.messages,
388
+ "You can call any of the following tools to satisfy the user's requests: " + minja::Value(inputs.tools).dump(2, /* to_json= */ true) +
389
+ (!polyfill_tool_call_example || tool_call_example_.empty() ? "" : "\n\nExample tool call syntax:\n\n" + tool_call_example_ + "\n\n"));
390
+ } else {
391
+ adjusted_messages = inputs.messages;
392
+ }
393
+
394
+ for (const auto & message_ : adjusted_messages) {
395
+ auto message = message_;
396
+ if (!message.contains("role") || !message.contains("content")) {
397
+ throw std::runtime_error("message must have 'role' and 'content' fields: " + message.dump());
398
+ }
399
+ std::string role = message.at("role");
400
+
401
+ if (message.contains("tool_calls")) {
402
+ if (polyfill_object_arguments || polyfill_tool_calls) {
403
+ for (auto & tool_call : message.at("tool_calls")) {
404
+ if (tool_call["type"] == "function") {
405
+ auto & function = tool_call.at("function");
406
+ auto & arguments = function.at("arguments");
407
+ if (arguments.is_string()) {
408
+ try {
409
+ arguments = json::parse(arguments.get<std::string>());
410
+ } catch (const std::exception & ecvt) {
411
+ fprintf(stderr, "Failed to parse arguments: %s\n", ecvt.what());
412
+ }
413
+ }
414
+ }
415
+ }
416
+ }
417
+ if (polyfill_tool_calls) {
418
+ auto content = message.at("content");
419
+ auto tool_calls = json::array();
420
+ for (const auto & tool_call : message.at("tool_calls")) {
421
+ if (tool_call.at("type") != "function") {
422
+ continue;
423
+ }
424
+ const auto & function = tool_call.at("function");
425
+ auto tc = json {
426
+ {"name", function.at("name")},
427
+ {"arguments", function.at("arguments")},
428
+ };
429
+ if (tool_call.contains("id")) {
430
+ tc["id"] = tool_call["id"];
431
+ }
432
+ tool_calls.push_back(tc);
433
+ }
434
+ auto obj = json {
435
+ {"tool_calls", tool_calls},
436
+ };
437
+ if (!content.is_null() && !content.empty()) {
438
+ obj["content"] = content;
439
+ }
440
+ message["content"] = obj.dump(2);
441
+ message.erase("tool_calls");
442
+ }
443
+ }
444
+ if (polyfill_tool_responses && role == "tool") {
445
+ message["role"] = "user";
446
+ auto obj = json {
447
+ {"tool_response", json::object()},
448
+ };
449
+ if (message.contains("name")) {
450
+ obj["tool_response"]["tool"] = message.at("name");
451
+ }
452
+ obj["tool_response"]["content"] = message.at("content");
453
+ if (message.contains("tool_call_id")) {
454
+ obj["tool_response"]["tool_call_id"] = message.at("tool_call_id");
455
+ }
456
+ message["content"] = obj.dump(2);
457
+ message.erase("name");
458
+ }
459
+
460
+ if (!message["content"].is_null() && polyfill_system_role) {
461
+ std::string content = message.at("content");
462
+ if (role == "system") {
463
+ if (!pending_system.empty()) pending_system += "\n";
464
+ pending_system += content;
465
+ continue;
466
+ } else {
467
+ if (role == "user") {
468
+ if (!pending_system.empty()) {
469
+ message["content"] = pending_system + (content.empty() ? "" : "\n" + content);
470
+ pending_system.clear();
471
+ }
472
+ } else {
473
+ flush_sys();
474
+ }
475
+ }
476
+ }
477
+ add_message(message);
478
+ }
479
+ flush_sys();
480
+ } else {
481
+ actual_messages = inputs.messages;
482
+ }
483
+
484
+ auto context = minja::Context::make(json({
485
+ {"messages", actual_messages},
486
+ {"add_generation_prompt", inputs.add_generation_prompt},
487
+ }));
488
+ context->set("bos_token", opts.use_bos_token ? bos_token_ : "");
489
+ context->set("eos_token", opts.use_eos_token ? eos_token_ : "");
490
+ if (opts.define_strftime_now) {
491
+ auto now = inputs.now;
492
+ context->set("strftime_now", Value::callable([now](const std::shared_ptr<minja::Context> &, minja::ArgumentsValue & args) {
493
+ args.expectArgs("strftime_now", {1, 1}, {0, 0});
494
+ auto format = args.args[0].get<std::string>();
495
+
496
+ auto time = std::chrono::system_clock::to_time_t(now);
497
+ auto local_time = *std::localtime(&time);
498
+ std::ostringstream ss;
499
+ ss << std::put_time(&local_time, format.c_str());
500
+ return ss.str();
501
+ }));
502
+ }
503
+ if (!inputs.tools.is_null()) {
504
+ context->set("tools", minja::Value(inputs.tools));
505
+ }
506
+ if (!inputs.extra_context.is_null()) {
507
+ for (auto & kv : inputs.extra_context.items()) {
508
+ context->set(kv.key(), minja::Value(kv.value()));
509
+ }
510
+ }
511
+
512
+ auto ret = template_root_->render(context);
513
+ // fprintf(stderr, "actual_messages: %s\n", actual_messages.dump(2).c_str());
514
+ // fprintf(stderr, "apply: %s\n\n", ret.c_str());
515
+ return ret;
516
+ }
517
+
518
+ static nlohmann::ordered_json add_system(const nlohmann::ordered_json & messages, const std::string & system_prompt) {
519
+ json messages_with_system = messages;
520
+
521
+ if (!messages_with_system.empty() && messages_with_system[0].at("role") == "system") {
522
+ std::string existing_system = messages_with_system.at(0).at("content");
523
+ messages_with_system[0] = json {
524
+ {"role", "system"},
525
+ {"content", existing_system + "\n\n" + system_prompt},
526
+ };
527
+ } else {
528
+ messages_with_system.insert(messages_with_system.begin(), json {
529
+ {"role", "system"},
530
+ {"content", system_prompt},
531
+ });
532
+ }
533
+ return messages_with_system;
534
+ }
535
+ };
536
+
537
+ } // namespace minja