@fugood/llama.node 1.4.14 → 1.5.0-rc.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/binding.ts +13 -6
- package/lib/index.js +2 -2
- package/lib/index.ts +8 -3
- package/package.json +15 -15
- package/scripts/llama.cpp.patch +77 -65
- package/src/LlamaContext.cpp +31 -34
- package/src/llama.cpp/CMakeLists.txt +24 -8
- package/src/llama.cpp/common/CMakeLists.txt +15 -34
- package/src/llama.cpp/common/arg.cpp +59 -10
- package/src/llama.cpp/common/chat-parser.cpp +115 -0
- package/src/llama.cpp/common/chat.cpp +356 -34
- package/src/llama.cpp/common/chat.h +17 -13
- package/src/llama.cpp/common/common.cpp +0 -1
- package/src/llama.cpp/common/common.h +30 -25
- package/src/llama.cpp/common/debug.cpp +165 -0
- package/src/llama.cpp/common/debug.h +43 -0
- package/src/llama.cpp/common/download.cpp +12 -342
- package/src/llama.cpp/common/download.h +6 -0
- package/src/llama.cpp/common/jinja/caps.cpp +237 -0
- package/src/llama.cpp/common/jinja/caps.h +24 -0
- package/src/llama.cpp/common/jinja/lexer.cpp +341 -0
- package/src/llama.cpp/common/jinja/lexer.h +157 -0
- package/src/llama.cpp/common/jinja/parser.cpp +591 -0
- package/src/llama.cpp/common/jinja/parser.h +21 -0
- package/src/llama.cpp/common/jinja/runtime.cpp +865 -0
- package/src/llama.cpp/common/jinja/runtime.h +628 -0
- package/src/llama.cpp/common/jinja/string.cpp +207 -0
- package/src/llama.cpp/common/jinja/string.h +58 -0
- package/src/llama.cpp/common/jinja/utils.h +49 -0
- package/src/llama.cpp/common/jinja/value.cpp +1221 -0
- package/src/llama.cpp/common/jinja/value.h +464 -0
- package/src/llama.cpp/common/preset.cpp +12 -2
- package/src/llama.cpp/common/sampling.cpp +52 -19
- package/src/llama.cpp/ggml/include/ggml.h +39 -7
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +4 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +63 -37
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +31 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +18 -0
- package/src/llama.cpp/include/llama-cpp.h +3 -1
- package/src/llama.cpp/include/llama.h +29 -2
- package/src/llama.cpp/src/CMakeLists.txt +1 -0
- package/src/llama.cpp/src/llama-adapter.cpp +7 -13
- package/src/llama.cpp/src/llama-adapter.h +1 -3
- package/src/llama.cpp/src/llama-arch.cpp +35 -0
- package/src/llama.cpp/src/llama-arch.h +1 -0
- package/src/llama.cpp/src/llama-chat.cpp +20 -0
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-context.cpp +232 -144
- package/src/llama.cpp/src/llama-context.h +10 -0
- package/src/llama.cpp/src/llama-cparams.h +2 -0
- package/src/llama.cpp/src/llama-graph.cpp +31 -43
- package/src/llama.cpp/src/llama-hparams.cpp +0 -36
- package/src/llama.cpp/src/llama-hparams.h +38 -1
- package/src/llama.cpp/src/llama-kv-cache.cpp +201 -59
- package/src/llama.cpp/src/llama-kv-cache.h +0 -2
- package/src/llama.cpp/src/llama-mmap.cpp +13 -6
- package/src/llama.cpp/src/llama-model-loader.cpp +21 -7
- package/src/llama.cpp/src/llama-model.cpp +215 -97
- package/src/llama.cpp/src/llama-model.h +3 -2
- package/src/llama.cpp/src/llama-sampling.cpp +170 -13
- package/src/llama.cpp/src/llama-vocab.cpp +37 -24
- package/src/llama.cpp/src/llama-vocab.h +1 -0
- package/src/llama.cpp/src/models/exaone-moe.cpp +146 -0
- package/src/llama.cpp/src/models/gemma3n-iswa.cpp +13 -3
- package/src/llama.cpp/src/models/models.h +13 -2
- package/src/llama.cpp/src/models/qwen3next.cpp +198 -182
|
@@ -7,6 +7,14 @@
|
|
|
7
7
|
#include "log.h"
|
|
8
8
|
#include "regex-partial.h"
|
|
9
9
|
|
|
10
|
+
// #include <minja/chat-template.hpp>
|
|
11
|
+
// #include <minja/minja.hpp>
|
|
12
|
+
|
|
13
|
+
#include "jinja/parser.h"
|
|
14
|
+
#include "jinja/value.h"
|
|
15
|
+
#include "jinja/runtime.h"
|
|
16
|
+
#include "jinja/caps.h"
|
|
17
|
+
|
|
10
18
|
#include <algorithm>
|
|
11
19
|
#include <cstdio>
|
|
12
20
|
#include <cctype>
|
|
@@ -132,6 +140,77 @@ std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const comm
|
|
|
132
140
|
return diffs;
|
|
133
141
|
}
|
|
134
142
|
|
|
143
|
+
using chat_template_caps = jinja::caps;
|
|
144
|
+
|
|
145
|
+
struct common_chat_template {
|
|
146
|
+
jinja::program prog;
|
|
147
|
+
std::string bos_tok;
|
|
148
|
+
std::string eos_tok;
|
|
149
|
+
std::string src;
|
|
150
|
+
chat_template_caps caps;
|
|
151
|
+
|
|
152
|
+
common_chat_template(const std::string & src, const std::string & bos_token, const std::string & eos_token) {
|
|
153
|
+
jinja::lexer lexer;
|
|
154
|
+
auto lexer_res = lexer.tokenize(src);
|
|
155
|
+
this->prog = jinja::parse_from_tokens(lexer_res);
|
|
156
|
+
|
|
157
|
+
this->src = lexer_res.source;
|
|
158
|
+
this->bos_tok = bos_token;
|
|
159
|
+
this->eos_tok = eos_token;
|
|
160
|
+
|
|
161
|
+
this->caps = jinja::caps_get(prog);
|
|
162
|
+
// LOG_INF("%s: caps:\n%s\n", __func__, this->caps.to_string().c_str());
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
const std::string & source() const { return src; }
|
|
166
|
+
const std::string & bos_token() const { return bos_tok; }
|
|
167
|
+
const std::string & eos_token() const { return eos_tok; }
|
|
168
|
+
|
|
169
|
+
// TODO: this is ugly, refactor it somehow
|
|
170
|
+
json add_system(const json & messages, const std::string & system_prompt) const {
|
|
171
|
+
GGML_ASSERT(messages.is_array());
|
|
172
|
+
auto msgs_copy = messages;
|
|
173
|
+
if (!caps.supports_system_role) {
|
|
174
|
+
if (msgs_copy.empty()) {
|
|
175
|
+
msgs_copy.insert(msgs_copy.begin(), json{
|
|
176
|
+
{"role", "user"},
|
|
177
|
+
{"content", system_prompt}
|
|
178
|
+
});
|
|
179
|
+
} else {
|
|
180
|
+
auto & first_msg = msgs_copy[0];
|
|
181
|
+
if (!first_msg.contains("content")) {
|
|
182
|
+
first_msg["content"] = "";
|
|
183
|
+
}
|
|
184
|
+
first_msg["content"] = system_prompt + "\n\n"
|
|
185
|
+
+ first_msg["content"].get<std::string>();
|
|
186
|
+
}
|
|
187
|
+
} else {
|
|
188
|
+
if (msgs_copy.empty() || msgs_copy[0].at("role") != "system") {
|
|
189
|
+
msgs_copy.insert(msgs_copy.begin(), json{
|
|
190
|
+
{"role", "system"},
|
|
191
|
+
{"content", system_prompt}
|
|
192
|
+
});
|
|
193
|
+
} else if (msgs_copy[0].at("role") == "system") {
|
|
194
|
+
msgs_copy[0]["content"] = system_prompt;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
return msgs_copy;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
chat_template_caps original_caps() const {
|
|
201
|
+
return caps;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
};
|
|
205
|
+
|
|
206
|
+
struct common_chat_templates {
|
|
207
|
+
bool add_bos;
|
|
208
|
+
bool add_eos;
|
|
209
|
+
bool has_explicit_template; // Model had builtin template or template overridde was specified.
|
|
210
|
+
std::unique_ptr<common_chat_template> template_default; // always set (defaults to chatml)
|
|
211
|
+
std::unique_ptr<common_chat_template> template_tool_use;
|
|
212
|
+
};
|
|
213
|
+
|
|
135
214
|
struct templates_params {
|
|
136
215
|
json messages;
|
|
137
216
|
json tools;
|
|
@@ -148,6 +227,7 @@ struct templates_params {
|
|
|
148
227
|
bool add_bos;
|
|
149
228
|
bool add_eos;
|
|
150
229
|
bool is_inference = true;
|
|
230
|
+
bool mark_input = true; // whether to mark input strings in the jinja context
|
|
151
231
|
};
|
|
152
232
|
|
|
153
233
|
common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice) {
|
|
@@ -521,18 +601,49 @@ bool common_chat_templates_was_explicit(const struct common_chat_templates * tmp
|
|
|
521
601
|
return tmpls->has_explicit_template;
|
|
522
602
|
}
|
|
523
603
|
|
|
524
|
-
|
|
525
|
-
if (variant
|
|
526
|
-
if (
|
|
604
|
+
std::string common_chat_templates_source(const struct common_chat_templates * tmpls, const std::string & variant) {
|
|
605
|
+
if (!variant.empty()) {
|
|
606
|
+
if (variant == "tool_use") {
|
|
527
607
|
if (tmpls->template_tool_use) {
|
|
528
|
-
return tmpls->template_tool_use->source()
|
|
608
|
+
return tmpls->template_tool_use->source();
|
|
529
609
|
}
|
|
530
|
-
return
|
|
610
|
+
return "";
|
|
531
611
|
} else {
|
|
532
|
-
LOG_DBG("%s: unknown template variant: %s\n", __func__, variant);
|
|
612
|
+
LOG_DBG("%s: unknown template variant: %s\n", __func__, variant.c_str());
|
|
533
613
|
}
|
|
534
614
|
}
|
|
535
|
-
return tmpls->template_default->source()
|
|
615
|
+
return tmpls->template_default->source();
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
common_chat_template_caps common_chat_templates_get_caps(const struct common_chat_templates * tmpls, const std::string & variant) {
|
|
619
|
+
common_chat_template_caps result;
|
|
620
|
+
const common_chat_template * tmpl = nullptr;
|
|
621
|
+
|
|
622
|
+
if (!variant.empty() && variant == "tool_use") {
|
|
623
|
+
tmpl = tmpls->template_tool_use.get();
|
|
624
|
+
} else {
|
|
625
|
+
tmpl = tmpls->template_default.get();
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
if (tmpl) {
|
|
629
|
+
auto caps = tmpl->original_caps();
|
|
630
|
+
result.supports_tools = caps.supports_tools;
|
|
631
|
+
result.supports_tool_calls = caps.supports_tool_calls;
|
|
632
|
+
result.supports_system_role = caps.supports_system_role;
|
|
633
|
+
result.supports_parallel_tool_calls = caps.supports_parallel_tool_calls;
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
return result;
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
bool common_chat_templates_has_variant(const struct common_chat_templates * tmpls, const std::string & variant) {
|
|
640
|
+
if (variant.empty() || variant == "default") {
|
|
641
|
+
return tmpls->template_default != nullptr;
|
|
642
|
+
}
|
|
643
|
+
if (variant == "tool_use") {
|
|
644
|
+
return tmpls->template_tool_use != nullptr;
|
|
645
|
+
}
|
|
646
|
+
return false;
|
|
536
647
|
}
|
|
537
648
|
|
|
538
649
|
common_chat_templates_ptr common_chat_templates_init(
|
|
@@ -614,14 +725,16 @@ common_chat_templates_ptr common_chat_templates_init(
|
|
|
614
725
|
tmpls->add_bos = add_bos;
|
|
615
726
|
tmpls->add_eos = add_eos;
|
|
616
727
|
try {
|
|
617
|
-
tmpls->template_default = std::make_unique<
|
|
728
|
+
tmpls->template_default = std::make_unique<common_chat_template>(default_template_src, token_bos, token_eos);
|
|
618
729
|
} catch (const std::exception & e) {
|
|
619
|
-
LOG_ERR("%s:
|
|
620
|
-
|
|
730
|
+
LOG_ERR("%s: error: %s\n", __func__, e.what());
|
|
731
|
+
LOG_ERR("%s: failed to initialize chat template\n", __func__);
|
|
732
|
+
LOG_ERR("%s: please consider disabling jinja via --no-jinja, or using another chat template\n", __func__);
|
|
733
|
+
throw e;
|
|
621
734
|
}
|
|
622
735
|
if (!template_tool_use_src.empty()) {
|
|
623
736
|
try {
|
|
624
|
-
tmpls->template_tool_use = std::make_unique<
|
|
737
|
+
tmpls->template_tool_use = std::make_unique<common_chat_template>(template_tool_use_src, token_bos, token_eos);
|
|
625
738
|
} catch (const std::exception & e) {
|
|
626
739
|
LOG_ERR("%s: failed to parse tool use chat template (ignoring it): %s\n", __func__, e.what());
|
|
627
740
|
}
|
|
@@ -657,6 +770,7 @@ const char * common_chat_format_name(common_chat_format format) {
|
|
|
657
770
|
case COMMON_CHAT_FORMAT_APRIEL_1_5: return "Apriel 1.5";
|
|
658
771
|
case COMMON_CHAT_FORMAT_XIAOMI_MIMO: return "Xiaomi MiMo";
|
|
659
772
|
case COMMON_CHAT_FORMAT_SOLAR_OPEN: return "Solar Open";
|
|
773
|
+
case COMMON_CHAT_FORMAT_EXAONE_MOE: return "EXAONE MoE";
|
|
660
774
|
case COMMON_CHAT_FORMAT_PEG_SIMPLE: return "peg-simple";
|
|
661
775
|
case COMMON_CHAT_FORMAT_PEG_NATIVE: return "peg-native";
|
|
662
776
|
case COMMON_CHAT_FORMAT_PEG_CONSTRUCTED: return "peg-constructed";
|
|
@@ -725,27 +839,44 @@ static std::string apply(
|
|
|
725
839
|
const std::optional<json> & tools_override = std::nullopt,
|
|
726
840
|
const std::optional<json> & additional_context = std::nullopt)
|
|
727
841
|
{
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
842
|
+
jinja::context ctx(tmpl.source());
|
|
843
|
+
|
|
844
|
+
nlohmann::ordered_json inp = nlohmann::ordered_json{
|
|
845
|
+
{"messages", messages_override.has_value() ? *messages_override : inputs.messages},
|
|
846
|
+
{"tools", tools_override.has_value() ? *tools_override : inputs.tools},
|
|
847
|
+
{"bos_token", tmpl.bos_token()},
|
|
848
|
+
{"eos_token", tmpl.eos_token()},
|
|
849
|
+
};
|
|
850
|
+
if (inputs.extra_context.is_object()) {
|
|
851
|
+
// TODO: do we need to merge, or replacing is fine?
|
|
852
|
+
for (const auto & [k, v] : inputs.extra_context.items()) {
|
|
853
|
+
inp[k] = v;
|
|
854
|
+
}
|
|
855
|
+
}
|
|
856
|
+
if (additional_context.has_value()) {
|
|
857
|
+
// TODO: merge properly instead of overwriting (matching old behavior)
|
|
858
|
+
for (const auto & [k, v] : additional_context->items()) {
|
|
859
|
+
inp[k] = v;
|
|
860
|
+
}
|
|
861
|
+
}
|
|
862
|
+
if (inputs.add_generation_prompt) {
|
|
863
|
+
inp["add_generation_prompt"] = true;
|
|
864
|
+
}
|
|
865
|
+
// Remove tools key when null, so templates can check "{% if tools is defined %}"
|
|
866
|
+
if (inp["tools"].is_null() || (inp["tools"].is_array() && inp["tools"].empty())) {
|
|
867
|
+
inp.erase("tools");
|
|
868
|
+
}
|
|
869
|
+
|
|
870
|
+
jinja::global_from_json(ctx, inp, inputs.mark_input);
|
|
871
|
+
|
|
872
|
+
// render
|
|
873
|
+
jinja::runtime runtime(ctx);
|
|
874
|
+
const jinja::value results = runtime.execute(tmpl.prog);
|
|
875
|
+
auto parts = runtime.gather_string_parts(results);
|
|
876
|
+
|
|
877
|
+
std::string result = parts->as_string().str();
|
|
878
|
+
|
|
879
|
+
// TODO: improve this later
|
|
749
880
|
if (inputs.add_bos && string_starts_with(result, tmpl.bos_token())) {
|
|
750
881
|
result = result.substr(tmpl.bos_token().size());
|
|
751
882
|
}
|
|
@@ -832,10 +963,17 @@ static common_chat_params common_chat_params_init_generic(const common_chat_temp
|
|
|
832
963
|
builder.add_schema("root", schema);
|
|
833
964
|
});
|
|
834
965
|
|
|
835
|
-
auto tweaked_messages =
|
|
966
|
+
auto tweaked_messages = tmpl.add_system(
|
|
836
967
|
inputs.messages,
|
|
837
968
|
"Respond in JSON format, either with `tool_call` (a request to call tools) or with `response` reply to the user's request");
|
|
838
969
|
|
|
970
|
+
// ensure all messages has "content" field
|
|
971
|
+
for (auto & message : tweaked_messages) {
|
|
972
|
+
if (!message.contains("content") || message["content"].is_null()) {
|
|
973
|
+
message["content"] = "";
|
|
974
|
+
}
|
|
975
|
+
}
|
|
976
|
+
|
|
839
977
|
data.prompt = apply(tmpl, inputs, /* messages_override= */ tweaked_messages);
|
|
840
978
|
data.format = COMMON_CHAT_FORMAT_GENERIC;
|
|
841
979
|
return data;
|
|
@@ -1350,7 +1488,7 @@ static common_chat_params common_chat_params_init_llama_3_x(const common_chat_te
|
|
|
1350
1488
|
data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, /* tools_override= */ std::nullopt, json {
|
|
1351
1489
|
{"date_string", format_time(inputs.now, "%d %b %Y")},
|
|
1352
1490
|
{"tools_in_user_message", false},
|
|
1353
|
-
{"builtin_tools", builtin_tools
|
|
1491
|
+
{"builtin_tools", builtin_tools},
|
|
1354
1492
|
});
|
|
1355
1493
|
return data;
|
|
1356
1494
|
}
|
|
@@ -2526,6 +2664,65 @@ static common_chat_params common_chat_params_init_solar_open(const common_chat_t
|
|
|
2526
2664
|
return data;
|
|
2527
2665
|
}
|
|
2528
2666
|
|
|
2667
|
+
static common_chat_params common_chat_params_init_exaone_moe(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
|
2668
|
+
common_chat_params data;
|
|
2669
|
+
|
|
2670
|
+
data.prompt = apply(tmpl, inputs);
|
|
2671
|
+
data.format = COMMON_CHAT_FORMAT_EXAONE_MOE;
|
|
2672
|
+
if (string_ends_with(data.prompt, "<think>\n")) {
|
|
2673
|
+
if (!inputs.enable_thinking) {
|
|
2674
|
+
data.prompt += "</think>\n\n";
|
|
2675
|
+
} else {
|
|
2676
|
+
data.thinking_forced_open = true;
|
|
2677
|
+
}
|
|
2678
|
+
}
|
|
2679
|
+
|
|
2680
|
+
if (inputs.tools.is_array() && !inputs.tools.empty()) {
|
|
2681
|
+
data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null();
|
|
2682
|
+
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
|
2683
|
+
std::vector<std::string> tool_rules;
|
|
2684
|
+
foreach_function(inputs.tools, [&](const json & tool) {
|
|
2685
|
+
const auto & function = tool.at("function");
|
|
2686
|
+
std::string name = function.at("name");
|
|
2687
|
+
auto parameters = function.at("parameters");
|
|
2688
|
+
builder.resolve_refs(parameters);
|
|
2689
|
+
// Expect: <tool_call>{"name": "<name>", "arguments": {...}}</tool_call>
|
|
2690
|
+
tool_rules.push_back(builder.add_rule(
|
|
2691
|
+
name + "-call",
|
|
2692
|
+
"\"<tool_call>\" space " +
|
|
2693
|
+
builder.add_schema(name + "-obj", json{
|
|
2694
|
+
{"type", "object"},
|
|
2695
|
+
{"properties", {
|
|
2696
|
+
{"name", json{{"const", name}}},
|
|
2697
|
+
{"arguments", parameters},
|
|
2698
|
+
}},
|
|
2699
|
+
{"required", json::array({"name", "arguments"})},
|
|
2700
|
+
}) +
|
|
2701
|
+
" space \"</tool_call>\" space"));
|
|
2702
|
+
});
|
|
2703
|
+
|
|
2704
|
+
auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | "));
|
|
2705
|
+
builder.add_rule("root",
|
|
2706
|
+
std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
|
|
2707
|
+
(inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call));
|
|
2708
|
+
|
|
2709
|
+
data.grammar_triggers.push_back({
|
|
2710
|
+
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
|
|
2711
|
+
std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)?" : "") +
|
|
2712
|
+
"(<tool_call>)[\\s\\S]*"
|
|
2713
|
+
});
|
|
2714
|
+
data.preserved_tokens = {
|
|
2715
|
+
"<think>",
|
|
2716
|
+
"</think>",
|
|
2717
|
+
"<tool_call>",
|
|
2718
|
+
"</tool_call>",
|
|
2719
|
+
};
|
|
2720
|
+
});
|
|
2721
|
+
}
|
|
2722
|
+
|
|
2723
|
+
return data;
|
|
2724
|
+
}
|
|
2725
|
+
|
|
2529
2726
|
static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
|
2530
2727
|
common_chat_params data;
|
|
2531
2728
|
data.prompt = apply(tmpl, inputs);
|
|
@@ -2596,6 +2793,107 @@ static common_chat_params common_chat_params_init_seed_oss(
|
|
|
2596
2793
|
return data;
|
|
2597
2794
|
}
|
|
2598
2795
|
|
|
2796
|
+
// various workarounds for known issues with certain templates or model behaviors
|
|
2797
|
+
// TODO @ngxson : improve this (how?)
|
|
2798
|
+
namespace workaround {
|
|
2799
|
+
|
|
2800
|
+
// if first message is system and template does not support it, merge it with next message
|
|
2801
|
+
static void system_message_not_supported(json & messages) {
|
|
2802
|
+
if (!messages.empty() && messages.front().at("role") == "system") {
|
|
2803
|
+
if (messages.size() > 1) {
|
|
2804
|
+
LOG_DBG("Merging system prompt into next message\n");
|
|
2805
|
+
auto & first_msg = messages.front();
|
|
2806
|
+
auto & second_msg = messages[1];
|
|
2807
|
+
second_msg["content"] = first_msg.at("content").get<std::string>()
|
|
2808
|
+
+ "\n" + second_msg.at("content").get<std::string>();
|
|
2809
|
+
messages.erase(messages.begin());
|
|
2810
|
+
} else {
|
|
2811
|
+
LOG_WRN("Removing system prompt due to template not supporting system role\n");
|
|
2812
|
+
messages.erase(messages.begin());
|
|
2813
|
+
}
|
|
2814
|
+
}
|
|
2815
|
+
}
|
|
2816
|
+
|
|
2817
|
+
static void func_args_not_string(json & messages) {
|
|
2818
|
+
GGML_ASSERT(messages.is_array());
|
|
2819
|
+
for (auto & message : messages) {
|
|
2820
|
+
if (message.contains("tool_calls")) {
|
|
2821
|
+
for (auto & tool_call : message["tool_calls"]) {
|
|
2822
|
+
if (tool_call.contains("function") && tool_call["function"].contains("arguments")) {
|
|
2823
|
+
auto & args = tool_call["function"]["arguments"];
|
|
2824
|
+
if (args.is_string()) {
|
|
2825
|
+
try {
|
|
2826
|
+
args = json::parse(args.get<std::string>());
|
|
2827
|
+
} catch (const std::exception & e) {
|
|
2828
|
+
throw std::runtime_error("Failed to parse tool call arguments as JSON: " + std::string(e.what()));
|
|
2829
|
+
}
|
|
2830
|
+
}
|
|
2831
|
+
}
|
|
2832
|
+
}
|
|
2833
|
+
}
|
|
2834
|
+
}
|
|
2835
|
+
}
|
|
2836
|
+
|
|
2837
|
+
static void move_tool_calls_to_content(json & messages, int indent_spaces = 2) {
|
|
2838
|
+
GGML_ASSERT(messages.is_array());
|
|
2839
|
+
for (auto & message : messages) {
|
|
2840
|
+
if (message.contains("tool_calls")) {
|
|
2841
|
+
auto tool_calls_new = json{
|
|
2842
|
+
{"tool_calls", message.at("tool_calls")}
|
|
2843
|
+
};
|
|
2844
|
+
message.erase("tool_calls");
|
|
2845
|
+
auto content = message.at("content");
|
|
2846
|
+
std::string content_new = content.is_null() ? "" : content.get<std::string>();
|
|
2847
|
+
message["content"] = content_new + tool_calls_new.dump(indent_spaces, ' ', false, json::error_handler_t::replace);
|
|
2848
|
+
}
|
|
2849
|
+
}
|
|
2850
|
+
}
|
|
2851
|
+
|
|
2852
|
+
// TODO @ngxson : we may remove support for generic schema in the future
|
|
2853
|
+
static void use_generic_schema(json & messages) {
|
|
2854
|
+
GGML_ASSERT(messages.is_array());
|
|
2855
|
+
for (auto & message : messages) {
|
|
2856
|
+
if (message.contains("tool_calls") && message.at("tool_calls").is_array()) {
|
|
2857
|
+
auto & tool_calls = message.at("tool_calls");
|
|
2858
|
+
for (auto & tool_call : tool_calls) {
|
|
2859
|
+
if (tool_call.contains("type") && tool_call.at("type") == "function" &&
|
|
2860
|
+
tool_call.contains("function") && tool_call.at("function").is_object()) {
|
|
2861
|
+
// Copy values before erasing to avoid use-after-free
|
|
2862
|
+
json name_value;
|
|
2863
|
+
json arguments_value;
|
|
2864
|
+
json id_value;
|
|
2865
|
+
const auto & function = tool_call.at("function");
|
|
2866
|
+
if (function.contains("name")) {
|
|
2867
|
+
name_value = function.at("name");
|
|
2868
|
+
}
|
|
2869
|
+
if (function.contains("arguments")) {
|
|
2870
|
+
arguments_value = function.at("arguments");
|
|
2871
|
+
}
|
|
2872
|
+
if (tool_call.contains("id")) {
|
|
2873
|
+
id_value = tool_call.at("id");
|
|
2874
|
+
}
|
|
2875
|
+
// Now safely erase and assign in the correct order
|
|
2876
|
+
tool_call.erase("type");
|
|
2877
|
+
tool_call.erase("function");
|
|
2878
|
+
tool_call.erase("id");
|
|
2879
|
+
// Reassign in desired order: name, arguments, id
|
|
2880
|
+
if (!name_value.is_null()) {
|
|
2881
|
+
tool_call["name"] = name_value;
|
|
2882
|
+
}
|
|
2883
|
+
if (!arguments_value.is_null()) {
|
|
2884
|
+
tool_call["arguments"] = arguments_value;
|
|
2885
|
+
}
|
|
2886
|
+
if (!id_value.is_null()) {
|
|
2887
|
+
tool_call["id"] = id_value;
|
|
2888
|
+
}
|
|
2889
|
+
}
|
|
2890
|
+
}
|
|
2891
|
+
}
|
|
2892
|
+
}
|
|
2893
|
+
}
|
|
2894
|
+
|
|
2895
|
+
} // namespace workaround
|
|
2896
|
+
|
|
2599
2897
|
static common_chat_params common_chat_templates_apply_jinja(
|
|
2600
2898
|
const struct common_chat_templates * tmpls,
|
|
2601
2899
|
const struct common_chat_templates_inputs & inputs)
|
|
@@ -2617,6 +2915,10 @@ static common_chat_params common_chat_templates_apply_jinja(
|
|
|
2617
2915
|
params.add_bos = tmpls->add_bos;
|
|
2618
2916
|
params.add_eos = tmpls->add_eos;
|
|
2619
2917
|
|
|
2918
|
+
if (!tmpl.original_caps().supports_system_role) {
|
|
2919
|
+
workaround::system_message_not_supported(params.messages);
|
|
2920
|
+
}
|
|
2921
|
+
|
|
2620
2922
|
params.extra_context = json::object();
|
|
2621
2923
|
for (auto el : inputs.chat_template_kwargs) {
|
|
2622
2924
|
params.extra_context[el.first] = json::parse(el.second);
|
|
@@ -2655,11 +2957,15 @@ static common_chat_params common_chat_templates_apply_jinja(
|
|
|
2655
2957
|
|
|
2656
2958
|
// Command R7B: : use handler in all cases except json schema (thinking / tools).
|
|
2657
2959
|
if (src.find("<|END_THINKING|><|START_ACTION|>") != std::string::npos && params.json_schema.is_null()) {
|
|
2960
|
+
workaround::func_args_not_string(params.messages);
|
|
2658
2961
|
return common_chat_params_init_command_r7b(tmpl, params);
|
|
2659
2962
|
}
|
|
2660
2963
|
|
|
2661
2964
|
// Granite (IBM) - detects thinking / tools support
|
|
2662
2965
|
if (src.find("elif thinking") != std::string::npos && src.find("<|tool_call|>") != std::string::npos) {
|
|
2966
|
+
workaround::func_args_not_string(params.messages);
|
|
2967
|
+
workaround::use_generic_schema(params.messages);
|
|
2968
|
+
workaround::move_tool_calls_to_content(params.messages);
|
|
2663
2969
|
return common_chat_params_init_granite(tmpl, params);
|
|
2664
2970
|
}
|
|
2665
2971
|
|
|
@@ -2668,6 +2974,7 @@ static common_chat_params common_chat_templates_apply_jinja(
|
|
|
2668
2974
|
src.find("<arg_key>") != std::string::npos &&
|
|
2669
2975
|
src.find("<arg_value>") != std::string::npos &&
|
|
2670
2976
|
params.json_schema.is_null()) {
|
|
2977
|
+
workaround::func_args_not_string(params.messages);
|
|
2671
2978
|
return common_chat_params_init_glm_4_5(tmpl, params);
|
|
2672
2979
|
}
|
|
2673
2980
|
|
|
@@ -2679,6 +2986,7 @@ static common_chat_params common_chat_templates_apply_jinja(
|
|
|
2679
2986
|
src.find("<function=") != std::string::npos &&
|
|
2680
2987
|
src.find("<parameters>") != std::string::npos &&
|
|
2681
2988
|
src.find("<parameter=") != std::string::npos) {
|
|
2989
|
+
workaround::func_args_not_string(params.messages);
|
|
2682
2990
|
// Nemotron 3 Nano 30B A3B
|
|
2683
2991
|
if (src.find("<think>") != std::string::npos) {
|
|
2684
2992
|
return common_chat_params_init_nemotron_v3(tmpl, params);
|
|
@@ -2696,6 +3004,13 @@ static common_chat_params common_chat_templates_apply_jinja(
|
|
|
2696
3004
|
return common_chat_params_init_xiaomi_mimo(tmpl, params);
|
|
2697
3005
|
}
|
|
2698
3006
|
|
|
3007
|
+
// EXAONE MoE format detection
|
|
3008
|
+
if (src.find("<tool_call>") != std::string::npos &&
|
|
3009
|
+
src.find("<tool_result>") != std::string::npos &&
|
|
3010
|
+
src.find("<|tool_declare|>") != std::string::npos) {
|
|
3011
|
+
return common_chat_params_init_exaone_moe(tmpl, params);
|
|
3012
|
+
}
|
|
3013
|
+
|
|
2699
3014
|
// Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
|
|
2700
3015
|
if (src.find("<tool_call>") != std::string::npos && params.json_schema.is_null()) {
|
|
2701
3016
|
return common_chat_params_init_hermes_2_pro(tmpl, params);
|
|
@@ -2708,6 +3023,7 @@ static common_chat_params common_chat_templates_apply_jinja(
|
|
|
2708
3023
|
|
|
2709
3024
|
// Seed-OSS
|
|
2710
3025
|
if (src.find("<seed:think>") != std::string::npos) {
|
|
3026
|
+
workaround::func_args_not_string(params.messages);
|
|
2711
3027
|
return common_chat_params_init_seed_oss(tmpl, params, inputs);
|
|
2712
3028
|
}
|
|
2713
3029
|
|
|
@@ -2729,6 +3045,7 @@ static common_chat_params common_chat_templates_apply_jinja(
|
|
|
2729
3045
|
|
|
2730
3046
|
// MiniMax-M2 format detection
|
|
2731
3047
|
if (src.find("]~!b[") != std::string::npos && src.find("]~b]") != std::string::npos) {
|
|
3048
|
+
workaround::func_args_not_string(params.messages);
|
|
2732
3049
|
return common_chat_params_init_minimax_m2(tmpl, params);
|
|
2733
3050
|
}
|
|
2734
3051
|
|
|
@@ -2775,6 +3092,7 @@ static common_chat_params common_chat_templates_apply_jinja(
|
|
|
2775
3092
|
// Llama 3.1, 3.2, 3.3 (also requires date_string so using it even w/o tools)
|
|
2776
3093
|
if (src.find("<|start_header_id|>ipython<|end_header_id|>") != std::string::npos) {
|
|
2777
3094
|
auto allow_python_tag_builtin_tools = src.find("<|python_tag|>") != std::string::npos;
|
|
3095
|
+
workaround::func_args_not_string(params.messages);
|
|
2778
3096
|
return common_chat_params_init_llama_3_x(tmpl, params, allow_python_tag_builtin_tools);
|
|
2779
3097
|
}
|
|
2780
3098
|
|
|
@@ -2803,10 +3121,14 @@ static common_chat_params common_chat_templates_apply_jinja(
|
|
|
2803
3121
|
|
|
2804
3122
|
// Mistral Nemo (w/ tools)
|
|
2805
3123
|
if (src.find("[TOOL_CALLS]") != std::string::npos) {
|
|
3124
|
+
workaround::func_args_not_string(params.messages);
|
|
2806
3125
|
return common_chat_params_init_mistral_nemo(tmpl, params);
|
|
2807
3126
|
}
|
|
2808
3127
|
|
|
2809
3128
|
// Generic fallback
|
|
3129
|
+
workaround::func_args_not_string(params.messages);
|
|
3130
|
+
workaround::use_generic_schema(params.messages);
|
|
3131
|
+
workaround::move_tool_calls_to_content(params.messages);
|
|
2810
3132
|
return common_chat_params_init_generic(tmpl, params);
|
|
2811
3133
|
}
|
|
2812
3134
|
|
|
@@ -10,18 +10,7 @@
|
|
|
10
10
|
#include <vector>
|
|
11
11
|
#include <map>
|
|
12
12
|
|
|
13
|
-
|
|
14
|
-
#include "minja/minja.hpp"
|
|
15
|
-
|
|
16
|
-
typedef minja::chat_template common_chat_template;
|
|
17
|
-
|
|
18
|
-
struct common_chat_templates {
|
|
19
|
-
bool add_bos;
|
|
20
|
-
bool add_eos;
|
|
21
|
-
bool has_explicit_template; // Model had builtin template or template overridde was specified.
|
|
22
|
-
std::unique_ptr<common_chat_template> template_default; // always set (defaults to chatml)
|
|
23
|
-
std::unique_ptr<common_chat_template> template_tool_use;
|
|
24
|
-
};
|
|
13
|
+
struct common_chat_templates;
|
|
25
14
|
|
|
26
15
|
struct common_chat_tool_call {
|
|
27
16
|
std::string name;
|
|
@@ -136,6 +125,7 @@ enum common_chat_format {
|
|
|
136
125
|
COMMON_CHAT_FORMAT_APRIEL_1_5,
|
|
137
126
|
COMMON_CHAT_FORMAT_XIAOMI_MIMO,
|
|
138
127
|
COMMON_CHAT_FORMAT_SOLAR_OPEN,
|
|
128
|
+
COMMON_CHAT_FORMAT_EXAONE_MOE,
|
|
139
129
|
|
|
140
130
|
// These are intended to be parsed by the PEG parser
|
|
141
131
|
COMMON_CHAT_FORMAT_PEG_SIMPLE,
|
|
@@ -201,7 +191,7 @@ common_chat_templates_ptr common_chat_templates_init(
|
|
|
201
191
|
const std::string & eos_token_override = "");
|
|
202
192
|
|
|
203
193
|
bool common_chat_templates_was_explicit(const struct common_chat_templates * tmpls);
|
|
204
|
-
|
|
194
|
+
std::string common_chat_templates_source(const struct common_chat_templates * tmpls, const std::string & variant = "");
|
|
205
195
|
|
|
206
196
|
|
|
207
197
|
struct common_chat_params common_chat_templates_apply(
|
|
@@ -232,6 +222,20 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin
|
|
|
232
222
|
|
|
233
223
|
bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates);
|
|
234
224
|
|
|
225
|
+
// Template capabilities structure (for exposing capabilities to external code)
|
|
226
|
+
struct common_chat_template_caps {
|
|
227
|
+
bool supports_tools = true;
|
|
228
|
+
bool supports_tool_calls = true;
|
|
229
|
+
bool supports_system_role = true;
|
|
230
|
+
bool supports_parallel_tool_calls = true;
|
|
231
|
+
};
|
|
232
|
+
|
|
233
|
+
// Get template capabilities for a specific variant ("" for default, "tool_use" for tool_use template)
|
|
234
|
+
common_chat_template_caps common_chat_templates_get_caps(const struct common_chat_templates * tmpls, const std::string & variant = "");
|
|
235
|
+
|
|
236
|
+
// Check if a template variant exists
|
|
237
|
+
bool common_chat_templates_has_variant(const struct common_chat_templates * tmpls, const std::string & variant);
|
|
238
|
+
|
|
235
239
|
// Parses a JSON array of messages in OpenAI's chat completion API format.
|
|
236
240
|
// T can be std::string containing JSON or nlohmann::ordered_json
|
|
237
241
|
template <class T> std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const T & messages);
|
|
@@ -1172,7 +1172,6 @@ common_init_result::common_init_result(common_params & params) :
|
|
|
1172
1172
|
pimpl->samplers_seq_config[i] = { i, common_sampler_get(pimpl->samplers[i].get()) };
|
|
1173
1173
|
}
|
|
1174
1174
|
|
|
1175
|
-
// TODO: temporarily gated behind a flag
|
|
1176
1175
|
if (params.sampling.backend_sampling) {
|
|
1177
1176
|
cparams.samplers = pimpl->samplers_seq_config.data();
|
|
1178
1177
|
cparams.n_samplers = pimpl->samplers_seq_config.size();
|