cui-llama.rn 1.4.4 → 1.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. package/android/src/main/CMakeLists.txt +2 -2
  2. package/android/src/main/jni.cpp +12 -10
  3. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  4. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  9. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  10. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  11. package/cpp/chat-template.hpp +529 -529
  12. package/cpp/chat.cpp +959 -265
  13. package/cpp/chat.h +135 -0
  14. package/cpp/common.cpp +2064 -1996
  15. package/cpp/common.h +700 -744
  16. package/cpp/ggml-alloc.c +1039 -1030
  17. package/cpp/ggml-alloc.h +1 -1
  18. package/cpp/ggml-backend-impl.h +255 -255
  19. package/cpp/ggml-backend-reg.cpp +586 -582
  20. package/cpp/ggml-backend.cpp +2004 -2002
  21. package/cpp/ggml-backend.h +354 -354
  22. package/cpp/ggml-common.h +1851 -1851
  23. package/cpp/ggml-cpp.h +39 -39
  24. package/cpp/ggml-cpu-aarch64.cpp +4248 -4247
  25. package/cpp/ggml-cpu-aarch64.h +8 -8
  26. package/cpp/ggml-cpu-impl.h +531 -380
  27. package/cpp/ggml-cpu-quants.c +12527 -11517
  28. package/cpp/ggml-cpu-traits.cpp +36 -36
  29. package/cpp/ggml-cpu-traits.h +38 -38
  30. package/cpp/ggml-cpu.c +15766 -14485
  31. package/cpp/ggml-cpu.cpp +655 -633
  32. package/cpp/ggml-cpu.h +138 -135
  33. package/cpp/ggml-impl.h +567 -567
  34. package/cpp/ggml-metal-impl.h +235 -0
  35. package/cpp/ggml-metal.h +66 -66
  36. package/cpp/ggml-metal.m +5146 -5002
  37. package/cpp/ggml-opt.cpp +854 -854
  38. package/cpp/ggml-opt.h +216 -216
  39. package/cpp/ggml-quants.c +5238 -5238
  40. package/cpp/ggml-threading.h +14 -14
  41. package/cpp/ggml.c +6529 -6524
  42. package/cpp/ggml.h +2198 -2194
  43. package/cpp/gguf.cpp +1329 -1329
  44. package/cpp/gguf.h +202 -202
  45. package/cpp/json-schema-to-grammar.cpp +1024 -1025
  46. package/cpp/json-schema-to-grammar.h +21 -22
  47. package/cpp/json.hpp +24766 -24766
  48. package/cpp/llama-adapter.cpp +347 -347
  49. package/cpp/llama-adapter.h +74 -74
  50. package/cpp/llama-arch.cpp +1513 -1492
  51. package/cpp/llama-arch.h +403 -402
  52. package/cpp/llama-batch.cpp +368 -368
  53. package/cpp/llama-batch.h +88 -88
  54. package/cpp/llama-chat.cpp +588 -587
  55. package/cpp/llama-chat.h +53 -53
  56. package/cpp/llama-context.cpp +1775 -1775
  57. package/cpp/llama-context.h +128 -128
  58. package/cpp/llama-cparams.cpp +1 -1
  59. package/cpp/llama-cparams.h +37 -37
  60. package/cpp/llama-cpp.h +30 -30
  61. package/cpp/llama-grammar.cpp +1219 -1219
  62. package/cpp/llama-grammar.h +173 -164
  63. package/cpp/llama-hparams.cpp +71 -71
  64. package/cpp/llama-hparams.h +139 -139
  65. package/cpp/llama-impl.cpp +167 -167
  66. package/cpp/llama-impl.h +61 -61
  67. package/cpp/llama-kv-cache.cpp +718 -718
  68. package/cpp/llama-kv-cache.h +219 -218
  69. package/cpp/llama-mmap.cpp +600 -590
  70. package/cpp/llama-mmap.h +68 -68
  71. package/cpp/llama-model-loader.cpp +1124 -1124
  72. package/cpp/llama-model-loader.h +167 -167
  73. package/cpp/llama-model.cpp +4087 -4023
  74. package/cpp/llama-model.h +370 -370
  75. package/cpp/llama-sampling.cpp +2558 -2525
  76. package/cpp/llama-sampling.h +32 -32
  77. package/cpp/llama-vocab.cpp +3264 -3252
  78. package/cpp/llama-vocab.h +125 -125
  79. package/cpp/llama.cpp +10284 -10137
  80. package/cpp/llama.h +1354 -1340
  81. package/cpp/log.cpp +393 -423
  82. package/cpp/log.h +132 -132
  83. package/cpp/minja/chat-template.hpp +529 -0
  84. package/cpp/minja/minja.hpp +2915 -0
  85. package/cpp/minja.hpp +2915 -2883
  86. package/cpp/rn-llama.cpp +20 -37
  87. package/cpp/rn-llama.h +12 -2
  88. package/cpp/sampling.cpp +570 -532
  89. package/cpp/sgemm.cpp +2598 -2598
  90. package/cpp/sgemm.h +14 -14
  91. package/cpp/speculative.cpp +278 -277
  92. package/cpp/speculative.h +28 -28
  93. package/package.json +1 -1
  94. package/android/src/main/build-arm64/CMakeCache.txt +0 -429
  95. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCCompiler.cmake +0 -81
  96. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCXXCompiler.cmake +0 -101
  97. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_C.bin +0 -0
  98. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_CXX.bin +0 -0
  99. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeSystem.cmake +0 -15
  100. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.c +0 -904
  101. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.o +0 -0
  102. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.cpp +0 -919
  103. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.o +0 -0
  104. package/android/src/main/build-arm64/CMakeFiles/CMakeConfigureLog.yaml +0 -431
  105. package/android/src/main/build-arm64/CMakeFiles/CMakeDirectoryInformation.cmake +0 -16
  106. package/android/src/main/build-arm64/CMakeFiles/Makefile.cmake +0 -165
  107. package/android/src/main/build-arm64/CMakeFiles/Makefile2 +0 -297
  108. package/android/src/main/build-arm64/CMakeFiles/Progress/1 +0 -1
  109. package/android/src/main/build-arm64/CMakeFiles/Progress/2 +0 -1
  110. package/android/src/main/build-arm64/CMakeFiles/Progress/3 +0 -1
  111. package/android/src/main/build-arm64/CMakeFiles/Progress/4 +0 -1
  112. package/android/src/main/build-arm64/CMakeFiles/Progress/5 +0 -1
  113. package/android/src/main/build-arm64/CMakeFiles/Progress/6 +0 -1
  114. package/android/src/main/build-arm64/CMakeFiles/Progress/count.txt +0 -1
  115. package/android/src/main/build-arm64/CMakeFiles/TargetDirectories.txt +0 -8
  116. package/android/src/main/build-arm64/CMakeFiles/cmake.check_cache +0 -1
  117. package/android/src/main/build-arm64/CMakeFiles/progress.marks +0 -1
  118. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o +0 -0
  119. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o.d +0 -58
  120. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o +0 -0
  121. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o.d +0 -756
  122. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o +0 -0
  123. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o.d +0 -709
  124. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o +0 -0
  125. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o.d +0 -714
  126. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o +0 -0
  127. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o.d +0 -62
  128. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o +0 -0
  129. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o.d +0 -708
  130. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o +0 -0
  131. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o.d +0 -113
  132. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o +0 -0
  133. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o.d +0 -713
  134. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o +0 -0
  135. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o.d +0 -763
  136. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o +0 -0
  137. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o.d +0 -61
  138. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o +0 -0
  139. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o.d +0 -707
  140. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o +0 -0
  141. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o.d +0 -104
  142. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o +0 -0
  143. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o.d +0 -714
  144. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o +0 -0
  145. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o.d +0 -723
  146. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/DependInfo.cmake +0 -62
  147. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/build.make +0 -722
  148. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/cmake_clean.cmake +0 -89
  149. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.make +0 -2
  150. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.ts +0 -2
  151. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/depend.make +0 -2
  152. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/flags.make +0 -17
  153. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/progress.make +0 -41
  154. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/DependInfo.cmake +0 -62
  155. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/build.make +0 -722
  156. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/cmake_clean.cmake +0 -89
  157. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.make +0 -2
  158. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.ts +0 -2
  159. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/depend.make +0 -2
  160. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/flags.make +0 -17
  161. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/progress.make +0 -41
  162. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/DependInfo.cmake +0 -62
  163. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/build.make +0 -722
  164. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/cmake_clean.cmake +0 -89
  165. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.make +0 -2
  166. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.ts +0 -2
  167. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/depend.make +0 -2
  168. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/flags.make +0 -17
  169. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/progress.make +0 -41
  170. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/DependInfo.cmake +0 -62
  171. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/build.make +0 -722
  172. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/cmake_clean.cmake +0 -89
  173. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.make +0 -2
  174. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.ts +0 -2
  175. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/depend.make +0 -2
  176. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/flags.make +0 -17
  177. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/progress.make +0 -41
  178. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/DependInfo.cmake +0 -62
  179. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/build.make +0 -722
  180. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/cmake_clean.cmake +0 -89
  181. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.make +0 -2
  182. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.ts +0 -2
  183. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/depend.make +0 -2
  184. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/flags.make +0 -17
  185. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/progress.make +0 -41
  186. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/DependInfo.cmake +0 -62
  187. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/build.make +0 -722
  188. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/cmake_clean.cmake +0 -89
  189. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.make +0 -2
  190. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.ts +0 -2
  191. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/depend.make +0 -2
  192. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/flags.make +0 -17
  193. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/progress.make +0 -41
  194. package/android/src/main/build-arm64/Makefile +0 -1862
  195. package/android/src/main/build-arm64/cmake_install.cmake +0 -66
  196. package/cpp/chat.hpp +0 -55
  197. package/cpp/rn-llama.hpp +0 -913
@@ -1,529 +1,529 @@
1
- /*
2
- Copyright 2024 Google LLC
3
-
4
- Use of this source code is governed by an MIT-style
5
- license that can be found in the LICENSE file or at
6
- https://opensource.org/licenses/MIT.
7
- */
8
- // SPDX-License-Identifier: MIT
9
- #pragma once
10
-
11
- #include "minja.hpp"
12
- #include "json.hpp"
13
- #include <string>
14
- #include <vector>
15
-
16
- using json = nlohmann::ordered_json;
17
-
18
- namespace minja {
19
-
20
- struct chat_template_caps {
21
- bool supports_tools = false;
22
- bool supports_tool_calls = false;
23
- bool supports_tool_responses = false;
24
- bool supports_system_role = false;
25
- bool supports_parallel_tool_calls = false;
26
- bool supports_tool_call_id = false;
27
- // meta-llama/Llama-3.1-8B-Instruct expects arguments to be an object.
28
- // Most other templates (and OpenAI's API) expect the arguments object to be stringified.
29
- bool requires_object_arguments = false;
30
- // CohereForAI/c4ai-command-r-plus simple variant
31
- bool requires_non_null_content = false;
32
- // MiniMaxAI/MiniMax-Text-01 special
33
- bool requires_typed_content = false;
34
- };
35
-
36
- struct chat_template_inputs {
37
- nlohmann::ordered_json messages;
38
- nlohmann::ordered_json tools;
39
- bool add_generation_prompt = true;
40
- nlohmann::ordered_json extra_context;
41
- std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
42
- };
43
-
44
- struct chat_template_options {
45
- bool apply_polyfills = true;
46
- bool use_bos_token = true;
47
- bool use_eos_token = true;
48
- bool define_strftime_now = true;
49
-
50
- bool polyfill_tools = true;
51
- bool polyfill_tool_call_examples = true;
52
- bool polyfill_tool_calls = true;
53
- bool polyfill_tool_responses = true;
54
- bool polyfill_system_role = true;
55
- bool polyfill_object_arguments = true;
56
- bool polyfill_typed_content = true;
57
- };
58
-
59
- class chat_template {
60
-
61
- private:
62
- chat_template_caps caps_;
63
- std::string source_;
64
- std::string bos_token_;
65
- std::string eos_token_;
66
- std::shared_ptr<minja::TemplateNode> template_root_;
67
- std::string tool_call_example_;
68
-
69
- std::string try_raw_render(
70
- const nlohmann::ordered_json & messages,
71
- const nlohmann::ordered_json & tools,
72
- bool add_generation_prompt,
73
- const nlohmann::ordered_json & extra_context = nlohmann::ordered_json()) const
74
- {
75
- try {
76
- chat_template_inputs inputs;
77
- inputs.messages = messages;
78
- inputs.tools = tools;
79
- inputs.add_generation_prompt = add_generation_prompt;
80
- inputs.extra_context = extra_context;
81
- // Use fixed date for tests
82
- inputs.now = std::chrono::system_clock::from_time_t(0);
83
-
84
- chat_template_options opts;
85
- opts.apply_polyfills = false;
86
-
87
- auto prompt = apply(inputs, opts);
88
- // fprintf(stderr, "try_raw_render: %s\n", prompt.c_str());
89
- return prompt;
90
- } catch (const std::exception & e) {
91
- // fprintf(stderr, "try_raw_render error: %s\n", e.what());
92
- return "";
93
- }
94
- }
95
-
96
- public:
97
-
98
- chat_template(const std::string & source, const std::string & bos_token, const std::string & eos_token)
99
- : source_(source), bos_token_(bos_token), eos_token_(eos_token)
100
- {
101
- template_root_ = minja::Parser::parse(source_, {
102
- /* .trim_blocks = */ true,
103
- /* .lstrip_blocks = */ true,
104
- /* .keep_trailing_newline = */ false,
105
- });
106
-
107
- auto contains = [](const std::string & haystack, const std::string & needle) {
108
- return haystack.find(needle) != std::string::npos;
109
- };
110
-
111
- const std::string user_needle = "<User Needle>";
112
- const std::string sys_needle = "<System Needle>";
113
- const json dummy_str_user_msg = {{"role", "user"}, {"content", user_needle}};
114
- const json dummy_typed_user_msg = {{"role", "user"}, {"content", json::array({{{"type", "text"}, {"text", user_needle}}})}};
115
-
116
- caps_.requires_typed_content =
117
- !contains(try_raw_render(json::array({dummy_str_user_msg}), {}, false), user_needle)
118
- && contains(try_raw_render(json::array({dummy_typed_user_msg}), {}, false), user_needle);
119
-
120
- const auto dummy_user_msg = caps_.requires_typed_content
121
- ? dummy_typed_user_msg
122
- : dummy_str_user_msg;
123
- const json needle_system_msg = {
124
- {"role", "system"},
125
- {"content", caps_.requires_typed_content ? json::array({{{"type", "text"}, {"text", sys_needle}}}) : json(sys_needle)},
126
- };
127
-
128
- caps_.supports_system_role = contains(try_raw_render({needle_system_msg, dummy_user_msg,}, {}, false), sys_needle);
129
-
130
- auto out = try_raw_render(json::array({
131
- dummy_user_msg
132
- }), json::array({
133
- {
134
- {"name", "some_tool"},
135
- {"type", "function"},
136
- {"function", {
137
- {"name", "some_tool"},
138
- {"description", "Some tool."},
139
- {"parameters", {
140
- {"type", "object"},
141
- {"properties", {
142
- {"arg", {
143
- {"type", "string"},
144
- {"description", "Some argument."},
145
- }},
146
- }},
147
- {"required", json::array({ "arg" })},
148
- }},
149
- }},
150
- },
151
- }), false);
152
- caps_.supports_tools = contains(out, "some_tool");
153
-
154
- auto make_tool_calls_msg = [&](const json & tool_calls) {
155
- return json {
156
- {"role", "assistant"},
157
- {"content", nullptr},
158
- {"tool_calls", tool_calls},
159
- };
160
- };
161
- auto make_tool_call = [](const std::string & tool_name, const json & arguments) {
162
- return json {
163
- {"id", "call_1___"},
164
- {"type", "function"},
165
- {"function", {
166
- {"arguments", arguments},
167
- {"name", tool_name},
168
- }},
169
- };
170
- };
171
- const json dummy_args_obj {{"argument_needle", "print('Hello, World!')"}};
172
-
173
- // Note: the arguments are rendered in both cases, but may be double-escaped, which we don't want.
174
- out = try_raw_render(json::array({
175
- dummy_user_msg,
176
- make_tool_calls_msg(json::array({make_tool_call("ipython", dummy_args_obj.dump())})),
177
- }), {}, false);
178
- auto tool_call_renders_str_arguments = contains(out, "\"argument_needle\":") || contains(out, "'argument_needle':");
179
- out = try_raw_render(json::array({
180
- dummy_user_msg,
181
- make_tool_calls_msg(json::array({make_tool_call("ipython", dummy_args_obj)})),
182
- }), {}, false);
183
- auto tool_call_renders_obj_arguments = contains(out, "\"argument_needle\":") || contains(out, "'argument_needle':");
184
-
185
- caps_.supports_tool_calls = tool_call_renders_str_arguments || tool_call_renders_obj_arguments;
186
- caps_.requires_object_arguments = !tool_call_renders_str_arguments && tool_call_renders_obj_arguments;
187
- auto out_empty = try_raw_render(json::array({dummy_user_msg, {{"role", "assistant"}, {"content", ""}}}), {}, false);
188
- auto out_null = try_raw_render(json::array({dummy_user_msg, {{"role", "assistant"}, {"content", nullptr}}}), {}, false);
189
- caps_.requires_non_null_content = contains(out_empty, user_needle) && !contains(out_null, user_needle);
190
-
191
- if (caps_.supports_tool_calls) {
192
- auto dummy_args = caps_.requires_object_arguments ? dummy_args_obj : json(dummy_args_obj.dump());
193
- auto tc1 = make_tool_call("test_tool1", dummy_args);
194
- auto tc2 = make_tool_call("test_tool2", dummy_args);
195
- auto out = try_raw_render(json::array({
196
- dummy_user_msg,
197
- make_tool_calls_msg(json::array({tc1, tc2})),
198
- }), {}, false);
199
- caps_.supports_parallel_tool_calls = contains(out, "test_tool1") && contains(out, "test_tool2");
200
-
201
- out = try_raw_render(json::array({
202
- dummy_user_msg,
203
- make_tool_calls_msg(json::array({tc1})),
204
- {
205
- {"role", "tool"},
206
- {"name", "test_tool1"},
207
- {"content", "Some response!"},
208
- {"tool_call_id", "call_911_"},
209
- }
210
- }), {}, false);
211
- caps_.supports_tool_responses = contains(out, "Some response!");
212
- caps_.supports_tool_call_id = contains(out, "call_911_");
213
- }
214
-
215
- try {
216
- if (!caps_.supports_tools) {
217
- const json user_msg {
218
- {"role", "user"},
219
- {"content", "Hey"},
220
- };
221
- const json args {
222
- {"arg1", "some_value"},
223
- };
224
- const json tool_call_msg {
225
- {"role", "assistant"},
226
- {"content", nullptr},
227
- {"tool_calls", json::array({
228
- {
229
- // TODO: detect if requires numerical id or fixed length == 6 like Nemo
230
- {"id", "call_1___"},
231
- {"type", "function"},
232
- {"function", {
233
- {"name", "tool_name"},
234
- {"arguments", (caps_.requires_object_arguments ? args : json(minja::Value(args).dump(-1, /* to_json= */ true)))},
235
- }},
236
- },
237
- })},
238
- };
239
- std::string prefix, full;
240
- {
241
- chat_template_inputs inputs;
242
- inputs.messages = json::array({user_msg});
243
- inputs.add_generation_prompt = true;
244
- prefix = apply(inputs);
245
- }
246
- {
247
- chat_template_inputs inputs;
248
- inputs.messages = json::array({user_msg, tool_call_msg});
249
- inputs.add_generation_prompt = false;
250
- full = apply(inputs);
251
- }
252
- auto eos_pos_last = full.rfind(eos_token_);
253
- if (eos_pos_last == prefix.size() - eos_token_.size() ||
254
- (full[full.size() - 1] == '\n' && (eos_pos_last == full.size() - eos_token_.size() - 1))) {
255
- full = full.substr(0, eos_pos_last);
256
- }
257
- size_t common_prefix_length = 0;
258
- for (size_t i = 0; i < prefix.size() && i < full.size(); ++i) {
259
- if (prefix[i] != full[i]) {
260
- break;
261
- }
262
- if (prefix[i] == '<') {
263
- // DeepSeek R1's template (as of 20250209) adds a trailing <think> if add_generation_prompt,
264
- // but it removes thinking tags for past messages.
265
- // The prefix and full strings diverge at <think> vs. <|tool▁calls▁begin|>, we avoid consuming the leading <.
266
- continue;
267
- }
268
- common_prefix_length = i + 1;
269
- }
270
- auto example = full.substr(common_prefix_length);
271
- if (example.find("tool_name") == std::string::npos && example.find("some_value") == std::string::npos) {
272
- fprintf(stderr, "Failed to infer a tool call example (possible template bug)\n");
273
- } else {
274
- tool_call_example_ = example;
275
- }
276
- }
277
- } catch (const std::exception & e) {
278
- fprintf(stderr, "Failed to generate tool call example: %s\n", e.what());
279
- }
280
- }
281
-
282
- const std::string & source() const { return source_; }
283
- const std::string & bos_token() const { return bos_token_; }
284
- const std::string & eos_token() const { return eos_token_; }
285
- const chat_template_caps & original_caps() const { return caps_; }
286
-
287
- // Deprecated, please use the form with chat_template_inputs and chat_template_options
288
- std::string apply(
289
- const nlohmann::ordered_json & messages,
290
- const nlohmann::ordered_json & tools,
291
- bool add_generation_prompt,
292
- const nlohmann::ordered_json & extra_context = nlohmann::ordered_json(),
293
- bool apply_polyfills = true)
294
- {
295
- fprintf(stderr, "[%s] Deprecated!\n", __func__);
296
- chat_template_inputs inputs;
297
- inputs.messages = messages;
298
- inputs.tools = tools;
299
- inputs.add_generation_prompt = add_generation_prompt;
300
- inputs.extra_context = extra_context;
301
- inputs.now = std::chrono::system_clock::now();
302
-
303
- chat_template_options opts;
304
- opts.apply_polyfills = apply_polyfills;
305
-
306
- return apply(inputs, opts);
307
- }
308
-
309
- std::string apply(
310
- const chat_template_inputs & inputs,
311
- const chat_template_options & opts = chat_template_options()) const
312
- {
313
- json actual_messages;
314
-
315
- auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
316
- auto has_tool_calls = false;
317
- auto has_tool_responses = false;
318
- auto has_string_content = false;
319
- for (const auto & message : inputs.messages) {
320
- if (message.contains("tool_calls") && !message["tool_calls"].is_null()) {
321
- has_tool_calls = true;
322
- }
323
- if (message.contains("role") && message["role"] == "tool") {
324
- has_tool_responses = true;
325
- }
326
- if (message.contains("content") && message["content"].is_string()) {
327
- has_string_content = true;
328
- }
329
- }
330
-
331
- auto polyfill_system_role = opts.polyfill_system_role && !caps_.supports_system_role;
332
- auto polyfill_tools = opts.polyfill_tools && has_tools && !caps_.supports_tools;
333
- auto polyfill_tool_call_example = polyfill_tools && opts.polyfill_tool_call_examples;
334
- auto polyfill_tool_calls = opts.polyfill_tool_calls && has_tool_calls && !caps_.supports_tool_calls;
335
- auto polyfill_tool_responses = opts.polyfill_tool_responses && has_tool_responses && !caps_.supports_tool_responses;
336
- auto polyfill_object_arguments = opts.polyfill_object_arguments && has_tool_calls && caps_.requires_object_arguments;
337
- auto polyfill_typed_content = opts.polyfill_typed_content && has_string_content && caps_.requires_typed_content;
338
-
339
- auto needs_polyfills = opts.apply_polyfills && (false
340
- || polyfill_system_role
341
- || polyfill_tools
342
- || polyfill_tool_calls
343
- || polyfill_tool_responses
344
- || polyfill_object_arguments
345
- || polyfill_typed_content
346
- );
347
-
348
- if (needs_polyfills) {
349
- actual_messages = json::array();
350
-
351
- auto add_message = [&](const json & msg) {
352
- if (polyfill_typed_content && msg.contains("content") && !msg.at("content").is_null() && msg.at("content").is_string()) {
353
- actual_messages.push_back({
354
- {"role", msg.at("role")},
355
- {"content", {{
356
- {"type", "text"},
357
- {"text", msg.at("content")},
358
- }}},
359
- });
360
- } else {
361
- actual_messages.push_back(msg);
362
- }
363
- };
364
-
365
- std::string pending_system;
366
- auto flush_sys = [&]() {
367
- if (!pending_system.empty()) {
368
- add_message({
369
- {"role", "user"},
370
- {"content", pending_system},
371
- });
372
- pending_system.clear();
373
- }
374
- };
375
-
376
- json adjusted_messages;
377
- if (polyfill_tools) {
378
- adjusted_messages = add_system(inputs.messages,
379
- "You can call any of the following tools to satisfy the user's requests: " + minja::Value(inputs.tools).dump(2, /* to_json= */ true) +
380
- (!polyfill_tool_call_example || tool_call_example_.empty() ? "" : "\n\nExample tool call syntax:\n\n" + tool_call_example_ + "\n\n"));
381
- } else {
382
- adjusted_messages = inputs.messages;
383
- }
384
-
385
- for (const auto & message_ : adjusted_messages) {
386
- auto message = message_;
387
- if (!message.contains("role") || !message.contains("content")) {
388
- throw std::runtime_error("message must have 'role' and 'content' fields: " + message.dump());
389
- }
390
- std::string role = message.at("role");
391
-
392
- if (message.contains("tool_calls")) {
393
- if (polyfill_object_arguments || polyfill_tool_calls) {
394
- for (auto & tool_call : message.at("tool_calls")) {
395
- if (tool_call["type"] == "function") {
396
- auto & function = tool_call.at("function");
397
- auto & arguments = function.at("arguments");
398
- if (arguments.is_string()) {
399
- try {
400
- arguments = json::parse(arguments.get<std::string>());
401
- } catch (const std::exception & ecvt) {
402
- fprintf(stderr, "Failed to parse arguments: %s\n", ecvt.what());
403
- }
404
- }
405
- }
406
- }
407
- }
408
- if (polyfill_tool_calls) {
409
- auto content = message.at("content");
410
- auto tool_calls = json::array();
411
- for (const auto & tool_call : message.at("tool_calls")) {
412
- if (tool_call.at("type") != "function") {
413
- continue;
414
- }
415
- const auto & function = tool_call.at("function");
416
- auto tc = json {
417
- {"name", function.at("name")},
418
- {"arguments", function.at("arguments")},
419
- };
420
- if (tool_call.contains("id")) {
421
- tc["id"] = tool_call["id"];
422
- }
423
- tool_calls.push_back(tc);
424
- }
425
- auto obj = json {
426
- {"tool_calls", tool_calls},
427
- };
428
- if (!content.is_null() && content != "") {
429
- obj["content"] = content;
430
- }
431
- message["content"] = obj.dump(2);
432
- message.erase("tool_calls");
433
- }
434
- }
435
- if (polyfill_tool_responses && role == "tool") {
436
- message["role"] = "user";
437
- auto obj = json {
438
- {"tool_response", {
439
- {"content", message.at("content")},
440
- }},
441
- };
442
- if (message.contains("name")) {
443
- obj["tool_response"]["name"] = message.at("name");
444
- }
445
- if (message.contains("tool_call_id")) {
446
- obj["tool_response"]["tool_call_id"] = message.at("tool_call_id");
447
- }
448
- message["content"] = obj.dump(2);
449
- message.erase("name");
450
- }
451
-
452
- if (!message["content"].is_null() && polyfill_system_role) {
453
- std::string content = message.at("content");
454
- if (role == "system") {
455
- if (!pending_system.empty()) pending_system += "\n";
456
- pending_system += content;
457
- continue;
458
- } else {
459
- if (role == "user") {
460
- if (!pending_system.empty()) {
461
- message["content"] = pending_system + (content.empty() ? "" : "\n" + content);
462
- pending_system.clear();
463
- }
464
- } else {
465
- flush_sys();
466
- }
467
- }
468
- }
469
- add_message(message);
470
- }
471
- flush_sys();
472
- } else {
473
- actual_messages = inputs.messages;
474
- }
475
-
476
- auto context = minja::Context::make(json({
477
- {"messages", actual_messages},
478
- {"add_generation_prompt", inputs.add_generation_prompt},
479
- }));
480
- context->set("bos_token", opts.use_bos_token ? bos_token_ : "");
481
- context->set("eos_token", opts.use_eos_token ? eos_token_ : "");
482
- if (opts.define_strftime_now) {
483
- auto now = inputs.now;
484
- context->set("strftime_now", Value::callable([now](const std::shared_ptr<minja::Context> &, minja::ArgumentsValue & args) {
485
- args.expectArgs("strftime_now", {1, 1}, {0, 0});
486
- auto format = args.args[0].get<std::string>();
487
-
488
- auto time = std::chrono::system_clock::to_time_t(now);
489
- auto local_time = *std::localtime(&time);
490
- std::ostringstream ss;
491
- ss << std::put_time(&local_time, format.c_str());
492
- return ss.str();
493
- }));
494
- }
495
- if (!inputs.tools.is_null()) {
496
- context->set("tools", minja::Value(inputs.tools));
497
- }
498
- if (!inputs.extra_context.is_null()) {
499
- for (auto & kv : inputs.extra_context.items()) {
500
- context->set(kv.key(), minja::Value(kv.value()));
501
- }
502
- }
503
-
504
- auto ret = template_root_->render(context);
505
- // fprintf(stderr, "actual_messages: %s\n", actual_messages.dump(2).c_str());
506
- // fprintf(stderr, "apply: %s\n\n", ret.c_str());
507
- return ret;
508
- }
509
-
510
- static nlohmann::ordered_json add_system(const nlohmann::ordered_json & messages, const std::string & system_prompt) {
511
- json messages_with_system = messages;
512
-
513
- if (messages_with_system.size() > 0 && messages_with_system[0].at("role") == "system") {
514
- std::string existing_system = messages_with_system.at(0).at("content");
515
- messages_with_system[0] = json {
516
- {"role", "system"},
517
- {"content", existing_system + "\n\n" + system_prompt},
518
- };
519
- } else {
520
- messages_with_system.insert(messages_with_system.begin(), json {
521
- {"role", "system"},
522
- {"content", system_prompt},
523
- });
524
- }
525
- return messages_with_system;
526
- }
527
- };
528
-
529
- } // namespace minja
1
+ /*
2
+ Copyright 2024 Google LLC
3
+
4
+ Use of this source code is governed by an MIT-style
5
+ license that can be found in the LICENSE file or at
6
+ https://opensource.org/licenses/MIT.
7
+ */
8
+ // SPDX-License-Identifier: MIT
9
+ #pragma once
10
+
11
+ #include "minja.hpp"
12
+ #include "json.hpp"
13
+ #include <string>
14
+ #include <vector>
15
+
16
+ using json = nlohmann::ordered_json;
17
+
18
+ namespace minja {
19
+
20
+ struct chat_template_caps {
21
+ bool supports_tools = false;
22
+ bool supports_tool_calls = false;
23
+ bool supports_tool_responses = false;
24
+ bool supports_system_role = false;
25
+ bool supports_parallel_tool_calls = false;
26
+ bool supports_tool_call_id = false;
27
+ // meta-llama/Llama-3.1-8B-Instruct expects arguments to be an object.
28
+ // Most other templates (and OpenAI's API) expect the arguments object to be stringified.
29
+ bool requires_object_arguments = false;
30
+ // CohereForAI/c4ai-command-r-plus simple variant
31
+ bool requires_non_null_content = false;
32
+ // MiniMaxAI/MiniMax-Text-01 special
33
+ bool requires_typed_content = false;
34
+ };
35
+
36
+ struct chat_template_inputs {
37
+ nlohmann::ordered_json messages;
38
+ nlohmann::ordered_json tools;
39
+ bool add_generation_prompt = true;
40
+ nlohmann::ordered_json extra_context;
41
+ std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
42
+ };
43
+
44
+ struct chat_template_options {
45
+ bool apply_polyfills = true;
46
+ bool use_bos_token = true;
47
+ bool use_eos_token = true;
48
+ bool define_strftime_now = true;
49
+
50
+ bool polyfill_tools = true;
51
+ bool polyfill_tool_call_examples = true;
52
+ bool polyfill_tool_calls = true;
53
+ bool polyfill_tool_responses = true;
54
+ bool polyfill_system_role = true;
55
+ bool polyfill_object_arguments = true;
56
+ bool polyfill_typed_content = true;
57
+ };
58
+
59
+ class chat_template {
60
+
61
+ private:
62
+ chat_template_caps caps_;
63
+ std::string source_;
64
+ std::string bos_token_;
65
+ std::string eos_token_;
66
+ std::shared_ptr<minja::TemplateNode> template_root_;
67
+ std::string tool_call_example_;
68
+
69
+ std::string try_raw_render(
70
+ const nlohmann::ordered_json & messages,
71
+ const nlohmann::ordered_json & tools,
72
+ bool add_generation_prompt,
73
+ const nlohmann::ordered_json & extra_context = nlohmann::ordered_json()) const
74
+ {
75
+ try {
76
+ chat_template_inputs inputs;
77
+ inputs.messages = messages;
78
+ inputs.tools = tools;
79
+ inputs.add_generation_prompt = add_generation_prompt;
80
+ inputs.extra_context = extra_context;
81
+ // Use fixed date for tests
82
+ inputs.now = std::chrono::system_clock::from_time_t(0);
83
+
84
+ chat_template_options opts;
85
+ opts.apply_polyfills = false;
86
+
87
+ auto prompt = apply(inputs, opts);
88
+ // fprintf(stderr, "try_raw_render: %s\n", prompt.c_str());
89
+ return prompt;
90
+ } catch (const std::exception & e) {
91
+ // fprintf(stderr, "try_raw_render error: %s\n", e.what());
92
+ return "";
93
+ }
94
+ }
95
+
96
+ public:
97
+
98
+ chat_template(const std::string & source, const std::string & bos_token, const std::string & eos_token)
99
+ : source_(source), bos_token_(bos_token), eos_token_(eos_token)
100
+ {
101
+ template_root_ = minja::Parser::parse(source_, {
102
+ /* .trim_blocks = */ true,
103
+ /* .lstrip_blocks = */ true,
104
+ /* .keep_trailing_newline = */ false,
105
+ });
106
+
107
+ auto contains = [](const std::string & haystack, const std::string & needle) {
108
+ return haystack.find(needle) != std::string::npos;
109
+ };
110
+
111
+ const std::string user_needle = "<User Needle>";
112
+ const std::string sys_needle = "<System Needle>";
113
+ const json dummy_str_user_msg = {{"role", "user"}, {"content", user_needle}};
114
+ const json dummy_typed_user_msg = {{"role", "user"}, {"content", json::array({{{"type", "text"}, {"text", user_needle}}})}};
115
+
116
+ caps_.requires_typed_content =
117
+ !contains(try_raw_render(json::array({dummy_str_user_msg}), {}, false), user_needle)
118
+ && contains(try_raw_render(json::array({dummy_typed_user_msg}), {}, false), user_needle);
119
+
120
+ const auto dummy_user_msg = caps_.requires_typed_content
121
+ ? dummy_typed_user_msg
122
+ : dummy_str_user_msg;
123
+ const json needle_system_msg = {
124
+ {"role", "system"},
125
+ {"content", caps_.requires_typed_content ? json::array({{{"type", "text"}, {"text", sys_needle}}}) : json(sys_needle)},
126
+ };
127
+
128
+ caps_.supports_system_role = contains(try_raw_render({needle_system_msg, dummy_user_msg,}, {}, false), sys_needle);
129
+
130
+ auto out = try_raw_render(json::array({
131
+ dummy_user_msg
132
+ }), json::array({
133
+ {
134
+ {"name", "some_tool"},
135
+ {"type", "function"},
136
+ {"function", {
137
+ {"name", "some_tool"},
138
+ {"description", "Some tool."},
139
+ {"parameters", {
140
+ {"type", "object"},
141
+ {"properties", {
142
+ {"arg", {
143
+ {"type", "string"},
144
+ {"description", "Some argument."},
145
+ }},
146
+ }},
147
+ {"required", json::array({ "arg" })},
148
+ }},
149
+ }},
150
+ },
151
+ }), false);
152
+ caps_.supports_tools = contains(out, "some_tool");
153
+
154
+ auto make_tool_calls_msg = [&](const json & tool_calls) {
155
+ return json {
156
+ {"role", "assistant"},
157
+ {"content", nullptr},
158
+ {"tool_calls", tool_calls},
159
+ };
160
+ };
161
+ auto make_tool_call = [](const std::string & tool_name, const json & arguments) {
162
+ return json {
163
+ {"id", "call_1___"},
164
+ {"type", "function"},
165
+ {"function", {
166
+ {"arguments", arguments},
167
+ {"name", tool_name},
168
+ }},
169
+ };
170
+ };
171
+ const json dummy_args_obj {{"argument_needle", "print('Hello, World!')"}};
172
+
173
+ // Note: the arguments are rendered in both cases, but may be double-escaped, which we don't want.
174
+ out = try_raw_render(json::array({
175
+ dummy_user_msg,
176
+ make_tool_calls_msg(json::array({make_tool_call("ipython", dummy_args_obj.dump())})),
177
+ }), {}, false);
178
+ auto tool_call_renders_str_arguments = contains(out, "\"argument_needle\":") || contains(out, "'argument_needle':");
179
+ out = try_raw_render(json::array({
180
+ dummy_user_msg,
181
+ make_tool_calls_msg(json::array({make_tool_call("ipython", dummy_args_obj)})),
182
+ }), {}, false);
183
+ auto tool_call_renders_obj_arguments = contains(out, "\"argument_needle\":") || contains(out, "'argument_needle':");
184
+
185
+ caps_.supports_tool_calls = tool_call_renders_str_arguments || tool_call_renders_obj_arguments;
186
+ caps_.requires_object_arguments = !tool_call_renders_str_arguments && tool_call_renders_obj_arguments;
187
+ auto out_empty = try_raw_render(json::array({dummy_user_msg, {{"role", "assistant"}, {"content", ""}}}), {}, false);
188
+ auto out_null = try_raw_render(json::array({dummy_user_msg, {{"role", "assistant"}, {"content", nullptr}}}), {}, false);
189
+ caps_.requires_non_null_content = contains(out_empty, user_needle) && !contains(out_null, user_needle);
190
+
191
+ if (caps_.supports_tool_calls) {
192
+ auto dummy_args = caps_.requires_object_arguments ? dummy_args_obj : json(dummy_args_obj.dump());
193
+ auto tc1 = make_tool_call("test_tool1", dummy_args);
194
+ auto tc2 = make_tool_call("test_tool2", dummy_args);
195
+ auto out = try_raw_render(json::array({
196
+ dummy_user_msg,
197
+ make_tool_calls_msg(json::array({tc1, tc2})),
198
+ }), {}, false);
199
+ caps_.supports_parallel_tool_calls = contains(out, "test_tool1") && contains(out, "test_tool2");
200
+
201
+ out = try_raw_render(json::array({
202
+ dummy_user_msg,
203
+ make_tool_calls_msg(json::array({tc1})),
204
+ {
205
+ {"role", "tool"},
206
+ {"name", "test_tool1"},
207
+ {"content", "Some response!"},
208
+ {"tool_call_id", "call_911_"},
209
+ }
210
+ }), {}, false);
211
+ caps_.supports_tool_responses = contains(out, "Some response!");
212
+ caps_.supports_tool_call_id = contains(out, "call_911_");
213
+ }
214
+
215
+ try {
216
+ if (!caps_.supports_tools) {
217
+ const json user_msg {
218
+ {"role", "user"},
219
+ {"content", "Hey"},
220
+ };
221
+ const json args {
222
+ {"arg1", "some_value"},
223
+ };
224
+ const json tool_call_msg {
225
+ {"role", "assistant"},
226
+ {"content", nullptr},
227
+ {"tool_calls", json::array({
228
+ {
229
+ // TODO: detect if requires numerical id or fixed length == 6 like Nemo
230
+ {"id", "call_1___"},
231
+ {"type", "function"},
232
+ {"function", {
233
+ {"name", "tool_name"},
234
+ {"arguments", (caps_.requires_object_arguments ? args : json(minja::Value(args).dump(-1, /* to_json= */ true)))},
235
+ }},
236
+ },
237
+ })},
238
+ };
239
+ std::string prefix, full;
240
+ {
241
+ chat_template_inputs inputs;
242
+ inputs.messages = json::array({user_msg});
243
+ inputs.add_generation_prompt = true;
244
+ prefix = apply(inputs);
245
+ }
246
+ {
247
+ chat_template_inputs inputs;
248
+ inputs.messages = json::array({user_msg, tool_call_msg});
249
+ inputs.add_generation_prompt = false;
250
+ full = apply(inputs);
251
+ }
252
+ auto eos_pos_last = full.rfind(eos_token_);
253
+ if (eos_pos_last == prefix.size() - eos_token_.size() ||
254
+ (full[full.size() - 1] == '\n' && (eos_pos_last == full.size() - eos_token_.size() - 1))) {
255
+ full = full.substr(0, eos_pos_last);
256
+ }
257
+ size_t common_prefix_length = 0;
258
+ for (size_t i = 0; i < prefix.size() && i < full.size(); ++i) {
259
+ if (prefix[i] != full[i]) {
260
+ break;
261
+ }
262
+ if (prefix[i] == '<') {
263
+ // DeepSeek R1's template (as of 20250209) adds a trailing <think> if add_generation_prompt,
264
+ // but it removes thinking tags for past messages.
265
+ // The prefix and full strings diverge at <think> vs. <|tool▁calls▁begin|>, we avoid consuming the leading <.
266
+ continue;
267
+ }
268
+ common_prefix_length = i + 1;
269
+ }
270
+ auto example = full.substr(common_prefix_length);
271
+ if (example.find("tool_name") == std::string::npos && example.find("some_value") == std::string::npos) {
272
+ fprintf(stderr, "Failed to infer a tool call example (possible template bug)\n");
273
+ } else {
274
+ tool_call_example_ = example;
275
+ }
276
+ }
277
+ } catch (const std::exception & e) {
278
+ fprintf(stderr, "Failed to generate tool call example: %s\n", e.what());
279
+ }
280
+ }
281
+
282
+ const std::string & source() const { return source_; }
283
+ const std::string & bos_token() const { return bos_token_; }
284
+ const std::string & eos_token() const { return eos_token_; }
285
+ const chat_template_caps & original_caps() const { return caps_; }
286
+
287
+ // Deprecated, please use the form with chat_template_inputs and chat_template_options
288
+ std::string apply(
289
+ const nlohmann::ordered_json & messages,
290
+ const nlohmann::ordered_json & tools,
291
+ bool add_generation_prompt,
292
+ const nlohmann::ordered_json & extra_context = nlohmann::ordered_json(),
293
+ bool apply_polyfills = true)
294
+ {
295
+ fprintf(stderr, "[%s] Deprecated!\n", __func__);
296
+ chat_template_inputs inputs;
297
+ inputs.messages = messages;
298
+ inputs.tools = tools;
299
+ inputs.add_generation_prompt = add_generation_prompt;
300
+ inputs.extra_context = extra_context;
301
+ inputs.now = std::chrono::system_clock::now();
302
+
303
+ chat_template_options opts;
304
+ opts.apply_polyfills = apply_polyfills;
305
+
306
+ return apply(inputs, opts);
307
+ }
308
+
309
+ std::string apply(
310
+ const chat_template_inputs & inputs,
311
+ const chat_template_options & opts = chat_template_options()) const
312
+ {
313
+ json actual_messages;
314
+
315
+ auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
316
+ auto has_tool_calls = false;
317
+ auto has_tool_responses = false;
318
+ auto has_string_content = false;
319
+ for (const auto & message : inputs.messages) {
320
+ if (message.contains("tool_calls") && !message["tool_calls"].is_null()) {
321
+ has_tool_calls = true;
322
+ }
323
+ if (message.contains("role") && message["role"] == "tool") {
324
+ has_tool_responses = true;
325
+ }
326
+ if (message.contains("content") && message["content"].is_string()) {
327
+ has_string_content = true;
328
+ }
329
+ }
330
+
331
+ auto polyfill_system_role = opts.polyfill_system_role && !caps_.supports_system_role;
332
+ auto polyfill_tools = opts.polyfill_tools && has_tools && !caps_.supports_tools;
333
+ auto polyfill_tool_call_example = polyfill_tools && opts.polyfill_tool_call_examples;
334
+ auto polyfill_tool_calls = opts.polyfill_tool_calls && has_tool_calls && !caps_.supports_tool_calls;
335
+ auto polyfill_tool_responses = opts.polyfill_tool_responses && has_tool_responses && !caps_.supports_tool_responses;
336
+ auto polyfill_object_arguments = opts.polyfill_object_arguments && has_tool_calls && caps_.requires_object_arguments;
337
+ auto polyfill_typed_content = opts.polyfill_typed_content && has_string_content && caps_.requires_typed_content;
338
+
339
+ auto needs_polyfills = opts.apply_polyfills && (false
340
+ || polyfill_system_role
341
+ || polyfill_tools
342
+ || polyfill_tool_calls
343
+ || polyfill_tool_responses
344
+ || polyfill_object_arguments
345
+ || polyfill_typed_content
346
+ );
347
+
348
+ if (needs_polyfills) {
349
+ actual_messages = json::array();
350
+
351
+ auto add_message = [&](const json & msg) {
352
+ if (polyfill_typed_content && msg.contains("content") && !msg.at("content").is_null() && msg.at("content").is_string()) {
353
+ actual_messages.push_back({
354
+ {"role", msg.at("role")},
355
+ {"content", {{
356
+ {"type", "text"},
357
+ {"text", msg.at("content")},
358
+ }}},
359
+ });
360
+ } else {
361
+ actual_messages.push_back(msg);
362
+ }
363
+ };
364
+
365
+ std::string pending_system;
366
+ auto flush_sys = [&]() {
367
+ if (!pending_system.empty()) {
368
+ add_message({
369
+ {"role", "user"},
370
+ {"content", pending_system},
371
+ });
372
+ pending_system.clear();
373
+ }
374
+ };
375
+
376
+ json adjusted_messages;
377
+ if (polyfill_tools) {
378
+ adjusted_messages = add_system(inputs.messages,
379
+ "You can call any of the following tools to satisfy the user's requests: " + minja::Value(inputs.tools).dump(2, /* to_json= */ true) +
380
+ (!polyfill_tool_call_example || tool_call_example_.empty() ? "" : "\n\nExample tool call syntax:\n\n" + tool_call_example_ + "\n\n"));
381
+ } else {
382
+ adjusted_messages = inputs.messages;
383
+ }
384
+
385
+ for (const auto & message_ : adjusted_messages) {
386
+ auto message = message_;
387
+ if (!message.contains("role") || !message.contains("content")) {
388
+ throw std::runtime_error("message must have 'role' and 'content' fields: " + message.dump());
389
+ }
390
+ std::string role = message.at("role");
391
+
392
+ if (message.contains("tool_calls")) {
393
+ if (polyfill_object_arguments || polyfill_tool_calls) {
394
+ for (auto & tool_call : message.at("tool_calls")) {
395
+ if (tool_call["type"] == "function") {
396
+ auto & function = tool_call.at("function");
397
+ auto & arguments = function.at("arguments");
398
+ if (arguments.is_string()) {
399
+ try {
400
+ arguments = json::parse(arguments.get<std::string>());
401
+ } catch (const std::exception & ecvt) {
402
+ fprintf(stderr, "Failed to parse arguments: %s\n", ecvt.what());
403
+ }
404
+ }
405
+ }
406
+ }
407
+ }
408
+ if (polyfill_tool_calls) {
409
+ auto content = message.at("content");
410
+ auto tool_calls = json::array();
411
+ for (const auto & tool_call : message.at("tool_calls")) {
412
+ if (tool_call.at("type") != "function") {
413
+ continue;
414
+ }
415
+ const auto & function = tool_call.at("function");
416
+ auto tc = json {
417
+ {"name", function.at("name")},
418
+ {"arguments", function.at("arguments")},
419
+ };
420
+ if (tool_call.contains("id")) {
421
+ tc["id"] = tool_call["id"];
422
+ }
423
+ tool_calls.push_back(tc);
424
+ }
425
+ auto obj = json {
426
+ {"tool_calls", tool_calls},
427
+ };
428
+ if (!content.is_null() && content != "") {
429
+ obj["content"] = content;
430
+ }
431
+ message["content"] = obj.dump(2);
432
+ message.erase("tool_calls");
433
+ }
434
+ }
435
+ if (polyfill_tool_responses && role == "tool") {
436
+ message["role"] = "user";
437
+ auto obj = json {
438
+ {"tool_response", {
439
+ {"content", message.at("content")},
440
+ }},
441
+ };
442
+ if (message.contains("name")) {
443
+ obj["tool_response"]["name"] = message.at("name");
444
+ }
445
+ if (message.contains("tool_call_id")) {
446
+ obj["tool_response"]["tool_call_id"] = message.at("tool_call_id");
447
+ }
448
+ message["content"] = obj.dump(2);
449
+ message.erase("name");
450
+ }
451
+
452
+ if (!message["content"].is_null() && polyfill_system_role) {
453
+ std::string content = message.at("content");
454
+ if (role == "system") {
455
+ if (!pending_system.empty()) pending_system += "\n";
456
+ pending_system += content;
457
+ continue;
458
+ } else {
459
+ if (role == "user") {
460
+ if (!pending_system.empty()) {
461
+ message["content"] = pending_system + (content.empty() ? "" : "\n" + content);
462
+ pending_system.clear();
463
+ }
464
+ } else {
465
+ flush_sys();
466
+ }
467
+ }
468
+ }
469
+ add_message(message);
470
+ }
471
+ flush_sys();
472
+ } else {
473
+ actual_messages = inputs.messages;
474
+ }
475
+
476
+ auto context = minja::Context::make(json({
477
+ {"messages", actual_messages},
478
+ {"add_generation_prompt", inputs.add_generation_prompt},
479
+ }));
480
+ context->set("bos_token", opts.use_bos_token ? bos_token_ : "");
481
+ context->set("eos_token", opts.use_eos_token ? eos_token_ : "");
482
+ if (opts.define_strftime_now) {
483
+ auto now = inputs.now;
484
+ context->set("strftime_now", Value::callable([now](const std::shared_ptr<minja::Context> &, minja::ArgumentsValue & args) {
485
+ args.expectArgs("strftime_now", {1, 1}, {0, 0});
486
+ auto format = args.args[0].get<std::string>();
487
+
488
+ auto time = std::chrono::system_clock::to_time_t(now);
489
+ auto local_time = *std::localtime(&time);
490
+ std::ostringstream ss;
491
+ ss << std::put_time(&local_time, format.c_str());
492
+ return ss.str();
493
+ }));
494
+ }
495
+ if (!inputs.tools.is_null()) {
496
+ context->set("tools", minja::Value(inputs.tools));
497
+ }
498
+ if (!inputs.extra_context.is_null()) {
499
+ for (auto & kv : inputs.extra_context.items()) {
500
+ context->set(kv.key(), minja::Value(kv.value()));
501
+ }
502
+ }
503
+
504
+ auto ret = template_root_->render(context);
505
+ // fprintf(stderr, "actual_messages: %s\n", actual_messages.dump(2).c_str());
506
+ // fprintf(stderr, "apply: %s\n\n", ret.c_str());
507
+ return ret;
508
+ }
509
+
510
+ static nlohmann::ordered_json add_system(const nlohmann::ordered_json & messages, const std::string & system_prompt) {
511
+ json messages_with_system = messages;
512
+
513
+ if (messages_with_system.size() > 0 && messages_with_system[0].at("role") == "system") {
514
+ std::string existing_system = messages_with_system.at(0).at("content");
515
+ messages_with_system[0] = json {
516
+ {"role", "system"},
517
+ {"content", existing_system + "\n\n" + system_prompt},
518
+ };
519
+ } else {
520
+ messages_with_system.insert(messages_with_system.begin(), json {
521
+ {"role", "system"},
522
+ {"content", system_prompt},
523
+ });
524
+ }
525
+ return messages_with_system;
526
+ }
527
+ };
528
+
529
+ } // namespace minja