cui-llama.rn 1.4.3 → 1.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/README.md +93 -114
  2. package/android/src/main/CMakeLists.txt +5 -0
  3. package/android/src/main/java/com/rnllama/LlamaContext.java +91 -17
  4. package/android/src/main/java/com/rnllama/RNLlama.java +37 -4
  5. package/android/src/main/jni-utils.h +6 -0
  6. package/android/src/main/jni.cpp +289 -31
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  9. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  10. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  11. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  12. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  13. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  14. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  15. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +7 -2
  16. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +7 -2
  17. package/cpp/chat-template.hpp +529 -0
  18. package/cpp/chat.cpp +1779 -0
  19. package/cpp/chat.h +135 -0
  20. package/cpp/common.cpp +2064 -1873
  21. package/cpp/common.h +700 -699
  22. package/cpp/ggml-alloc.c +1039 -1042
  23. package/cpp/ggml-alloc.h +1 -1
  24. package/cpp/ggml-backend-impl.h +255 -255
  25. package/cpp/ggml-backend-reg.cpp +586 -582
  26. package/cpp/ggml-backend.cpp +2004 -2002
  27. package/cpp/ggml-backend.h +354 -354
  28. package/cpp/ggml-common.h +1851 -1853
  29. package/cpp/ggml-cpp.h +39 -39
  30. package/cpp/ggml-cpu-aarch64.cpp +4248 -4247
  31. package/cpp/ggml-cpu-aarch64.h +8 -8
  32. package/cpp/ggml-cpu-impl.h +531 -386
  33. package/cpp/ggml-cpu-quants.c +12527 -10920
  34. package/cpp/ggml-cpu-traits.cpp +36 -36
  35. package/cpp/ggml-cpu-traits.h +38 -38
  36. package/cpp/ggml-cpu.c +15766 -14391
  37. package/cpp/ggml-cpu.cpp +655 -635
  38. package/cpp/ggml-cpu.h +138 -135
  39. package/cpp/ggml-impl.h +567 -567
  40. package/cpp/ggml-metal-impl.h +235 -0
  41. package/cpp/ggml-metal.h +1 -1
  42. package/cpp/ggml-metal.m +5146 -4884
  43. package/cpp/ggml-opt.cpp +854 -854
  44. package/cpp/ggml-opt.h +216 -216
  45. package/cpp/ggml-quants.c +5238 -5238
  46. package/cpp/ggml-threading.h +14 -14
  47. package/cpp/ggml.c +6529 -6514
  48. package/cpp/ggml.h +2198 -2194
  49. package/cpp/gguf.cpp +1329 -1329
  50. package/cpp/gguf.h +202 -202
  51. package/cpp/json-schema-to-grammar.cpp +1024 -1045
  52. package/cpp/json-schema-to-grammar.h +21 -8
  53. package/cpp/json.hpp +24766 -24766
  54. package/cpp/llama-adapter.cpp +347 -347
  55. package/cpp/llama-adapter.h +74 -74
  56. package/cpp/llama-arch.cpp +1513 -1487
  57. package/cpp/llama-arch.h +403 -400
  58. package/cpp/llama-batch.cpp +368 -368
  59. package/cpp/llama-batch.h +88 -88
  60. package/cpp/llama-chat.cpp +588 -578
  61. package/cpp/llama-chat.h +53 -52
  62. package/cpp/llama-context.cpp +1775 -1775
  63. package/cpp/llama-context.h +128 -128
  64. package/cpp/llama-cparams.cpp +1 -1
  65. package/cpp/llama-cparams.h +37 -37
  66. package/cpp/llama-cpp.h +30 -30
  67. package/cpp/llama-grammar.cpp +1219 -1139
  68. package/cpp/llama-grammar.h +173 -143
  69. package/cpp/llama-hparams.cpp +71 -71
  70. package/cpp/llama-hparams.h +139 -139
  71. package/cpp/llama-impl.cpp +167 -167
  72. package/cpp/llama-impl.h +61 -61
  73. package/cpp/llama-kv-cache.cpp +718 -718
  74. package/cpp/llama-kv-cache.h +219 -218
  75. package/cpp/llama-mmap.cpp +600 -590
  76. package/cpp/llama-mmap.h +68 -67
  77. package/cpp/llama-model-loader.cpp +1124 -1124
  78. package/cpp/llama-model-loader.h +167 -167
  79. package/cpp/llama-model.cpp +4087 -3997
  80. package/cpp/llama-model.h +370 -370
  81. package/cpp/llama-sampling.cpp +2558 -2408
  82. package/cpp/llama-sampling.h +32 -32
  83. package/cpp/llama-vocab.cpp +3264 -3247
  84. package/cpp/llama-vocab.h +125 -125
  85. package/cpp/llama.cpp +10284 -10077
  86. package/cpp/llama.h +1354 -1323
  87. package/cpp/log.cpp +393 -401
  88. package/cpp/log.h +132 -121
  89. package/cpp/minja/chat-template.hpp +529 -0
  90. package/cpp/minja/minja.hpp +2915 -0
  91. package/cpp/minja.hpp +2915 -0
  92. package/cpp/rn-llama.cpp +66 -6
  93. package/cpp/rn-llama.h +26 -1
  94. package/cpp/sampling.cpp +570 -505
  95. package/cpp/sampling.h +3 -0
  96. package/cpp/sgemm.cpp +2598 -2597
  97. package/cpp/sgemm.h +14 -14
  98. package/cpp/speculative.cpp +278 -277
  99. package/cpp/speculative.h +28 -28
  100. package/cpp/unicode.cpp +9 -2
  101. package/ios/CMakeLists.txt +6 -0
  102. package/ios/RNLlama.h +0 -8
  103. package/ios/RNLlama.mm +27 -3
  104. package/ios/RNLlamaContext.h +10 -1
  105. package/ios/RNLlamaContext.mm +269 -57
  106. package/jest/mock.js +21 -2
  107. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  108. package/lib/commonjs/grammar.js +3 -0
  109. package/lib/commonjs/grammar.js.map +1 -1
  110. package/lib/commonjs/index.js +87 -13
  111. package/lib/commonjs/index.js.map +1 -1
  112. package/lib/module/NativeRNLlama.js.map +1 -1
  113. package/lib/module/grammar.js +3 -0
  114. package/lib/module/grammar.js.map +1 -1
  115. package/lib/module/index.js +86 -13
  116. package/lib/module/index.js.map +1 -1
  117. package/lib/typescript/NativeRNLlama.d.ts +107 -2
  118. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  119. package/lib/typescript/grammar.d.ts.map +1 -1
  120. package/lib/typescript/index.d.ts +32 -7
  121. package/lib/typescript/index.d.ts.map +1 -1
  122. package/llama-rn.podspec +1 -1
  123. package/package.json +3 -2
  124. package/src/NativeRNLlama.ts +115 -3
  125. package/src/grammar.ts +3 -0
  126. package/src/index.ts +138 -21
  127. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCCompiler.cmake +0 -81
  128. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeSystem.cmake +0 -15
  129. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.c +0 -904
  130. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.o +0 -0
  131. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.cpp +0 -919
  132. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.o +0 -0
  133. package/android/src/main/build-arm64/CMakeFiles/CMakeConfigureLog.yaml +0 -55
  134. package/cpp/rn-llama.hpp +0 -913
@@ -0,0 +1,529 @@
1
+ /*
2
+ Copyright 2024 Google LLC
3
+
4
+ Use of this source code is governed by an MIT-style
5
+ license that can be found in the LICENSE file or at
6
+ https://opensource.org/licenses/MIT.
7
+ */
8
+ // SPDX-License-Identifier: MIT
9
+ #pragma once
10
+
11
+ #include "minja.hpp"
12
+ #include "json.hpp"
13
+ #include <string>
14
+ #include <vector>
15
+
16
+ using json = nlohmann::ordered_json;
17
+
18
+ namespace minja {
19
+
20
+ struct chat_template_caps {
21
+ bool supports_tools = false;
22
+ bool supports_tool_calls = false;
23
+ bool supports_tool_responses = false;
24
+ bool supports_system_role = false;
25
+ bool supports_parallel_tool_calls = false;
26
+ bool supports_tool_call_id = false;
27
+ // meta-llama/Llama-3.1-8B-Instruct expects arguments to be an object.
28
+ // Most other templates (and OpenAI's API) expect the arguments object to be stringified.
29
+ bool requires_object_arguments = false;
30
+ // CohereForAI/c4ai-command-r-plus simple variant
31
+ bool requires_non_null_content = false;
32
+ // MiniMaxAI/MiniMax-Text-01 special
33
+ bool requires_typed_content = false;
34
+ };
35
+
36
+ struct chat_template_inputs {
37
+ nlohmann::ordered_json messages;
38
+ nlohmann::ordered_json tools;
39
+ bool add_generation_prompt = true;
40
+ nlohmann::ordered_json extra_context;
41
+ std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
42
+ };
43
+
44
+ struct chat_template_options {
45
+ bool apply_polyfills = true;
46
+ bool use_bos_token = true;
47
+ bool use_eos_token = true;
48
+ bool define_strftime_now = true;
49
+
50
+ bool polyfill_tools = true;
51
+ bool polyfill_tool_call_examples = true;
52
+ bool polyfill_tool_calls = true;
53
+ bool polyfill_tool_responses = true;
54
+ bool polyfill_system_role = true;
55
+ bool polyfill_object_arguments = true;
56
+ bool polyfill_typed_content = true;
57
+ };
58
+
59
+ class chat_template {
60
+
61
+ private:
62
+ chat_template_caps caps_;
63
+ std::string source_;
64
+ std::string bos_token_;
65
+ std::string eos_token_;
66
+ std::shared_ptr<minja::TemplateNode> template_root_;
67
+ std::string tool_call_example_;
68
+
69
+ std::string try_raw_render(
70
+ const nlohmann::ordered_json & messages,
71
+ const nlohmann::ordered_json & tools,
72
+ bool add_generation_prompt,
73
+ const nlohmann::ordered_json & extra_context = nlohmann::ordered_json()) const
74
+ {
75
+ try {
76
+ chat_template_inputs inputs;
77
+ inputs.messages = messages;
78
+ inputs.tools = tools;
79
+ inputs.add_generation_prompt = add_generation_prompt;
80
+ inputs.extra_context = extra_context;
81
+ // Use fixed date for tests
82
+ inputs.now = std::chrono::system_clock::from_time_t(0);
83
+
84
+ chat_template_options opts;
85
+ opts.apply_polyfills = false;
86
+
87
+ auto prompt = apply(inputs, opts);
88
+ // fprintf(stderr, "try_raw_render: %s\n", prompt.c_str());
89
+ return prompt;
90
+ } catch (const std::exception & e) {
91
+ // fprintf(stderr, "try_raw_render error: %s\n", e.what());
92
+ return "";
93
+ }
94
+ }
95
+
96
+ public:
97
+
98
+ chat_template(const std::string & source, const std::string & bos_token, const std::string & eos_token)
99
+ : source_(source), bos_token_(bos_token), eos_token_(eos_token)
100
+ {
101
+ template_root_ = minja::Parser::parse(source_, {
102
+ /* .trim_blocks = */ true,
103
+ /* .lstrip_blocks = */ true,
104
+ /* .keep_trailing_newline = */ false,
105
+ });
106
+
107
+ auto contains = [](const std::string & haystack, const std::string & needle) {
108
+ return haystack.find(needle) != std::string::npos;
109
+ };
110
+
111
+ const std::string user_needle = "<User Needle>";
112
+ const std::string sys_needle = "<System Needle>";
113
+ const json dummy_str_user_msg = {{"role", "user"}, {"content", user_needle}};
114
+ const json dummy_typed_user_msg = {{"role", "user"}, {"content", json::array({{{"type", "text"}, {"text", user_needle}}})}};
115
+
116
+ caps_.requires_typed_content =
117
+ !contains(try_raw_render(json::array({dummy_str_user_msg}), {}, false), user_needle)
118
+ && contains(try_raw_render(json::array({dummy_typed_user_msg}), {}, false), user_needle);
119
+
120
+ const auto dummy_user_msg = caps_.requires_typed_content
121
+ ? dummy_typed_user_msg
122
+ : dummy_str_user_msg;
123
+ const json needle_system_msg = {
124
+ {"role", "system"},
125
+ {"content", caps_.requires_typed_content ? json::array({{{"type", "text"}, {"text", sys_needle}}}) : json(sys_needle)},
126
+ };
127
+
128
+ caps_.supports_system_role = contains(try_raw_render({needle_system_msg, dummy_user_msg,}, {}, false), sys_needle);
129
+
130
+ auto out = try_raw_render(json::array({
131
+ dummy_user_msg
132
+ }), json::array({
133
+ {
134
+ {"name", "some_tool"},
135
+ {"type", "function"},
136
+ {"function", {
137
+ {"name", "some_tool"},
138
+ {"description", "Some tool."},
139
+ {"parameters", {
140
+ {"type", "object"},
141
+ {"properties", {
142
+ {"arg", {
143
+ {"type", "string"},
144
+ {"description", "Some argument."},
145
+ }},
146
+ }},
147
+ {"required", json::array({ "arg" })},
148
+ }},
149
+ }},
150
+ },
151
+ }), false);
152
+ caps_.supports_tools = contains(out, "some_tool");
153
+
154
+ auto make_tool_calls_msg = [&](const json & tool_calls) {
155
+ return json {
156
+ {"role", "assistant"},
157
+ {"content", nullptr},
158
+ {"tool_calls", tool_calls},
159
+ };
160
+ };
161
+ auto make_tool_call = [](const std::string & tool_name, const json & arguments) {
162
+ return json {
163
+ {"id", "call_1___"},
164
+ {"type", "function"},
165
+ {"function", {
166
+ {"arguments", arguments},
167
+ {"name", tool_name},
168
+ }},
169
+ };
170
+ };
171
+ const json dummy_args_obj {{"argument_needle", "print('Hello, World!')"}};
172
+
173
+ // Note: the arguments are rendered in both cases, but may be double-escaped, which we don't want.
174
+ out = try_raw_render(json::array({
175
+ dummy_user_msg,
176
+ make_tool_calls_msg(json::array({make_tool_call("ipython", dummy_args_obj.dump())})),
177
+ }), {}, false);
178
+ auto tool_call_renders_str_arguments = contains(out, "\"argument_needle\":") || contains(out, "'argument_needle':");
179
+ out = try_raw_render(json::array({
180
+ dummy_user_msg,
181
+ make_tool_calls_msg(json::array({make_tool_call("ipython", dummy_args_obj)})),
182
+ }), {}, false);
183
+ auto tool_call_renders_obj_arguments = contains(out, "\"argument_needle\":") || contains(out, "'argument_needle':");
184
+
185
+ caps_.supports_tool_calls = tool_call_renders_str_arguments || tool_call_renders_obj_arguments;
186
+ caps_.requires_object_arguments = !tool_call_renders_str_arguments && tool_call_renders_obj_arguments;
187
+ auto out_empty = try_raw_render(json::array({dummy_user_msg, {{"role", "assistant"}, {"content", ""}}}), {}, false);
188
+ auto out_null = try_raw_render(json::array({dummy_user_msg, {{"role", "assistant"}, {"content", nullptr}}}), {}, false);
189
+ caps_.requires_non_null_content = contains(out_empty, user_needle) && !contains(out_null, user_needle);
190
+
191
+ if (caps_.supports_tool_calls) {
192
+ auto dummy_args = caps_.requires_object_arguments ? dummy_args_obj : json(dummy_args_obj.dump());
193
+ auto tc1 = make_tool_call("test_tool1", dummy_args);
194
+ auto tc2 = make_tool_call("test_tool2", dummy_args);
195
+ auto out = try_raw_render(json::array({
196
+ dummy_user_msg,
197
+ make_tool_calls_msg(json::array({tc1, tc2})),
198
+ }), {}, false);
199
+ caps_.supports_parallel_tool_calls = contains(out, "test_tool1") && contains(out, "test_tool2");
200
+
201
+ out = try_raw_render(json::array({
202
+ dummy_user_msg,
203
+ make_tool_calls_msg(json::array({tc1})),
204
+ {
205
+ {"role", "tool"},
206
+ {"name", "test_tool1"},
207
+ {"content", "Some response!"},
208
+ {"tool_call_id", "call_911_"},
209
+ }
210
+ }), {}, false);
211
+ caps_.supports_tool_responses = contains(out, "Some response!");
212
+ caps_.supports_tool_call_id = contains(out, "call_911_");
213
+ }
214
+
215
+ try {
216
+ if (!caps_.supports_tools) {
217
+ const json user_msg {
218
+ {"role", "user"},
219
+ {"content", "Hey"},
220
+ };
221
+ const json args {
222
+ {"arg1", "some_value"},
223
+ };
224
+ const json tool_call_msg {
225
+ {"role", "assistant"},
226
+ {"content", nullptr},
227
+ {"tool_calls", json::array({
228
+ {
229
+ // TODO: detect if requires numerical id or fixed length == 6 like Nemo
230
+ {"id", "call_1___"},
231
+ {"type", "function"},
232
+ {"function", {
233
+ {"name", "tool_name"},
234
+ {"arguments", (caps_.requires_object_arguments ? args : json(minja::Value(args).dump(-1, /* to_json= */ true)))},
235
+ }},
236
+ },
237
+ })},
238
+ };
239
+ std::string prefix, full;
240
+ {
241
+ chat_template_inputs inputs;
242
+ inputs.messages = json::array({user_msg});
243
+ inputs.add_generation_prompt = true;
244
+ prefix = apply(inputs);
245
+ }
246
+ {
247
+ chat_template_inputs inputs;
248
+ inputs.messages = json::array({user_msg, tool_call_msg});
249
+ inputs.add_generation_prompt = false;
250
+ full = apply(inputs);
251
+ }
252
+ auto eos_pos_last = full.rfind(eos_token_);
253
+ if (eos_pos_last == prefix.size() - eos_token_.size() ||
254
+ (full[full.size() - 1] == '\n' && (eos_pos_last == full.size() - eos_token_.size() - 1))) {
255
+ full = full.substr(0, eos_pos_last);
256
+ }
257
+ size_t common_prefix_length = 0;
258
+ for (size_t i = 0; i < prefix.size() && i < full.size(); ++i) {
259
+ if (prefix[i] != full[i]) {
260
+ break;
261
+ }
262
+ if (prefix[i] == '<') {
263
+ // DeepSeek R1's template (as of 20250209) adds a trailing <think> if add_generation_prompt,
264
+ // but it removes thinking tags for past messages.
265
+ // The prefix and full strings diverge at <think> vs. <|tool▁calls▁begin|>, we avoid consuming the leading <.
266
+ continue;
267
+ }
268
+ common_prefix_length = i + 1;
269
+ }
270
+ auto example = full.substr(common_prefix_length);
271
+ if (example.find("tool_name") == std::string::npos && example.find("some_value") == std::string::npos) {
272
+ fprintf(stderr, "Failed to infer a tool call example (possible template bug)\n");
273
+ } else {
274
+ tool_call_example_ = example;
275
+ }
276
+ }
277
+ } catch (const std::exception & e) {
278
+ fprintf(stderr, "Failed to generate tool call example: %s\n", e.what());
279
+ }
280
+ }
281
+
282
+ const std::string & source() const { return source_; }
283
+ const std::string & bos_token() const { return bos_token_; }
284
+ const std::string & eos_token() const { return eos_token_; }
285
+ const chat_template_caps & original_caps() const { return caps_; }
286
+
287
+ // Deprecated, please use the form with chat_template_inputs and chat_template_options
288
+ std::string apply(
289
+ const nlohmann::ordered_json & messages,
290
+ const nlohmann::ordered_json & tools,
291
+ bool add_generation_prompt,
292
+ const nlohmann::ordered_json & extra_context = nlohmann::ordered_json(),
293
+ bool apply_polyfills = true)
294
+ {
295
+ fprintf(stderr, "[%s] Deprecated!\n", __func__);
296
+ chat_template_inputs inputs;
297
+ inputs.messages = messages;
298
+ inputs.tools = tools;
299
+ inputs.add_generation_prompt = add_generation_prompt;
300
+ inputs.extra_context = extra_context;
301
+ inputs.now = std::chrono::system_clock::now();
302
+
303
+ chat_template_options opts;
304
+ opts.apply_polyfills = apply_polyfills;
305
+
306
+ return apply(inputs, opts);
307
+ }
308
+
309
+ std::string apply(
310
+ const chat_template_inputs & inputs,
311
+ const chat_template_options & opts = chat_template_options()) const
312
+ {
313
+ json actual_messages;
314
+
315
+ auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
316
+ auto has_tool_calls = false;
317
+ auto has_tool_responses = false;
318
+ auto has_string_content = false;
319
+ for (const auto & message : inputs.messages) {
320
+ if (message.contains("tool_calls") && !message["tool_calls"].is_null()) {
321
+ has_tool_calls = true;
322
+ }
323
+ if (message.contains("role") && message["role"] == "tool") {
324
+ has_tool_responses = true;
325
+ }
326
+ if (message.contains("content") && message["content"].is_string()) {
327
+ has_string_content = true;
328
+ }
329
+ }
330
+
331
+ auto polyfill_system_role = opts.polyfill_system_role && !caps_.supports_system_role;
332
+ auto polyfill_tools = opts.polyfill_tools && has_tools && !caps_.supports_tools;
333
+ auto polyfill_tool_call_example = polyfill_tools && opts.polyfill_tool_call_examples;
334
+ auto polyfill_tool_calls = opts.polyfill_tool_calls && has_tool_calls && !caps_.supports_tool_calls;
335
+ auto polyfill_tool_responses = opts.polyfill_tool_responses && has_tool_responses && !caps_.supports_tool_responses;
336
+ auto polyfill_object_arguments = opts.polyfill_object_arguments && has_tool_calls && caps_.requires_object_arguments;
337
+ auto polyfill_typed_content = opts.polyfill_typed_content && has_string_content && caps_.requires_typed_content;
338
+
339
+ auto needs_polyfills = opts.apply_polyfills && (false
340
+ || polyfill_system_role
341
+ || polyfill_tools
342
+ || polyfill_tool_calls
343
+ || polyfill_tool_responses
344
+ || polyfill_object_arguments
345
+ || polyfill_typed_content
346
+ );
347
+
348
+ if (needs_polyfills) {
349
+ actual_messages = json::array();
350
+
351
+ auto add_message = [&](const json & msg) {
352
+ if (polyfill_typed_content && msg.contains("content") && !msg.at("content").is_null() && msg.at("content").is_string()) {
353
+ actual_messages.push_back({
354
+ {"role", msg.at("role")},
355
+ {"content", {{
356
+ {"type", "text"},
357
+ {"text", msg.at("content")},
358
+ }}},
359
+ });
360
+ } else {
361
+ actual_messages.push_back(msg);
362
+ }
363
+ };
364
+
365
+ std::string pending_system;
366
+ auto flush_sys = [&]() {
367
+ if (!pending_system.empty()) {
368
+ add_message({
369
+ {"role", "user"},
370
+ {"content", pending_system},
371
+ });
372
+ pending_system.clear();
373
+ }
374
+ };
375
+
376
+ json adjusted_messages;
377
+ if (polyfill_tools) {
378
+ adjusted_messages = add_system(inputs.messages,
379
+ "You can call any of the following tools to satisfy the user's requests: " + minja::Value(inputs.tools).dump(2, /* to_json= */ true) +
380
+ (!polyfill_tool_call_example || tool_call_example_.empty() ? "" : "\n\nExample tool call syntax:\n\n" + tool_call_example_ + "\n\n"));
381
+ } else {
382
+ adjusted_messages = inputs.messages;
383
+ }
384
+
385
+ for (const auto & message_ : adjusted_messages) {
386
+ auto message = message_;
387
+ if (!message.contains("role") || !message.contains("content")) {
388
+ throw std::runtime_error("message must have 'role' and 'content' fields: " + message.dump());
389
+ }
390
+ std::string role = message.at("role");
391
+
392
+ if (message.contains("tool_calls")) {
393
+ if (polyfill_object_arguments || polyfill_tool_calls) {
394
+ for (auto & tool_call : message.at("tool_calls")) {
395
+ if (tool_call["type"] == "function") {
396
+ auto & function = tool_call.at("function");
397
+ auto & arguments = function.at("arguments");
398
+ if (arguments.is_string()) {
399
+ try {
400
+ arguments = json::parse(arguments.get<std::string>());
401
+ } catch (const std::exception & ecvt) {
402
+ fprintf(stderr, "Failed to parse arguments: %s\n", ecvt.what());
403
+ }
404
+ }
405
+ }
406
+ }
407
+ }
408
+ if (polyfill_tool_calls) {
409
+ auto content = message.at("content");
410
+ auto tool_calls = json::array();
411
+ for (const auto & tool_call : message.at("tool_calls")) {
412
+ if (tool_call.at("type") != "function") {
413
+ continue;
414
+ }
415
+ const auto & function = tool_call.at("function");
416
+ auto tc = json {
417
+ {"name", function.at("name")},
418
+ {"arguments", function.at("arguments")},
419
+ };
420
+ if (tool_call.contains("id")) {
421
+ tc["id"] = tool_call["id"];
422
+ }
423
+ tool_calls.push_back(tc);
424
+ }
425
+ auto obj = json {
426
+ {"tool_calls", tool_calls},
427
+ };
428
+ if (!content.is_null() && content != "") {
429
+ obj["content"] = content;
430
+ }
431
+ message["content"] = obj.dump(2);
432
+ message.erase("tool_calls");
433
+ }
434
+ }
435
+ if (polyfill_tool_responses && role == "tool") {
436
+ message["role"] = "user";
437
+ auto obj = json {
438
+ {"tool_response", {
439
+ {"content", message.at("content")},
440
+ }},
441
+ };
442
+ if (message.contains("name")) {
443
+ obj["tool_response"]["name"] = message.at("name");
444
+ }
445
+ if (message.contains("tool_call_id")) {
446
+ obj["tool_response"]["tool_call_id"] = message.at("tool_call_id");
447
+ }
448
+ message["content"] = obj.dump(2);
449
+ message.erase("name");
450
+ }
451
+
452
+ if (!message["content"].is_null() && polyfill_system_role) {
453
+ std::string content = message.at("content");
454
+ if (role == "system") {
455
+ if (!pending_system.empty()) pending_system += "\n";
456
+ pending_system += content;
457
+ continue;
458
+ } else {
459
+ if (role == "user") {
460
+ if (!pending_system.empty()) {
461
+ message["content"] = pending_system + (content.empty() ? "" : "\n" + content);
462
+ pending_system.clear();
463
+ }
464
+ } else {
465
+ flush_sys();
466
+ }
467
+ }
468
+ }
469
+ add_message(message);
470
+ }
471
+ flush_sys();
472
+ } else {
473
+ actual_messages = inputs.messages;
474
+ }
475
+
476
+ auto context = minja::Context::make(json({
477
+ {"messages", actual_messages},
478
+ {"add_generation_prompt", inputs.add_generation_prompt},
479
+ }));
480
+ context->set("bos_token", opts.use_bos_token ? bos_token_ : "");
481
+ context->set("eos_token", opts.use_eos_token ? eos_token_ : "");
482
+ if (opts.define_strftime_now) {
483
+ auto now = inputs.now;
484
+ context->set("strftime_now", Value::callable([now](const std::shared_ptr<minja::Context> &, minja::ArgumentsValue & args) {
485
+ args.expectArgs("strftime_now", {1, 1}, {0, 0});
486
+ auto format = args.args[0].get<std::string>();
487
+
488
+ auto time = std::chrono::system_clock::to_time_t(now);
489
+ auto local_time = *std::localtime(&time);
490
+ std::ostringstream ss;
491
+ ss << std::put_time(&local_time, format.c_str());
492
+ return ss.str();
493
+ }));
494
+ }
495
+ if (!inputs.tools.is_null()) {
496
+ context->set("tools", minja::Value(inputs.tools));
497
+ }
498
+ if (!inputs.extra_context.is_null()) {
499
+ for (auto & kv : inputs.extra_context.items()) {
500
+ context->set(kv.key(), minja::Value(kv.value()));
501
+ }
502
+ }
503
+
504
+ auto ret = template_root_->render(context);
505
+ // fprintf(stderr, "actual_messages: %s\n", actual_messages.dump(2).c_str());
506
+ // fprintf(stderr, "apply: %s\n\n", ret.c_str());
507
+ return ret;
508
+ }
509
+
510
+ static nlohmann::ordered_json add_system(const nlohmann::ordered_json & messages, const std::string & system_prompt) {
511
+ json messages_with_system = messages;
512
+
513
+ if (messages_with_system.size() > 0 && messages_with_system[0].at("role") == "system") {
514
+ std::string existing_system = messages_with_system.at(0).at("content");
515
+ messages_with_system[0] = json {
516
+ {"role", "system"},
517
+ {"content", existing_system + "\n\n" + system_prompt},
518
+ };
519
+ } else {
520
+ messages_with_system.insert(messages_with_system.begin(), json {
521
+ {"role", "system"},
522
+ {"content", system_prompt},
523
+ });
524
+ }
525
+ return messages_with_system;
526
+ }
527
+ };
528
+
529
+ } // namespace minja