@fugood/llama.node 1.4.6 → 1.4.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/binding.ts +8 -0
- package/package.json +15 -15
- package/scripts/llama.cpp.patch +25 -26
- package/src/LlamaContext.cpp +2 -2
- package/src/llama.cpp/common/CMakeLists.txt +2 -0
- package/src/llama.cpp/common/arg.cpp +364 -193
- package/src/llama.cpp/common/arg.h +43 -2
- package/src/llama.cpp/common/chat-parser-xml-toolcall.cpp +36 -18
- package/src/llama.cpp/common/chat-parser-xml-toolcall.h +1 -1
- package/src/llama.cpp/common/chat-parser.cpp +3 -2
- package/src/llama.cpp/common/chat-peg-parser.cpp +16 -2
- package/src/llama.cpp/common/chat.cpp +272 -0
- package/src/llama.cpp/common/common.cpp +130 -67
- package/src/llama.cpp/common/common.h +40 -16
- package/src/llama.cpp/common/console.cpp +680 -47
- package/src/llama.cpp/common/console.h +30 -8
- package/src/llama.cpp/common/download.cpp +69 -25
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +132 -3
- package/src/llama.cpp/common/json-schema-to-grammar.h +20 -0
- package/src/llama.cpp/common/log.cpp +5 -0
- package/src/llama.cpp/common/log.h +1 -0
- package/src/llama.cpp/common/peg-parser.cpp +1 -1
- package/src/llama.cpp/common/preset.cpp +206 -0
- package/src/llama.cpp/common/preset.h +32 -0
- package/src/llama.cpp/common/sampling.cpp +91 -92
- package/src/llama.cpp/common/sampling.h +11 -6
- package/src/llama.cpp/common/speculative.cpp +1 -1
- package/src/llama.cpp/ggml/CMakeLists.txt +5 -0
- package/src/llama.cpp/ggml/include/ggml-alloc.h +9 -0
- package/src/llama.cpp/ggml/include/ggml-backend.h +1 -0
- package/src/llama.cpp/ggml/include/ggml-cpu.h +1 -0
- package/src/llama.cpp/ggml/include/ggml.h +7 -8
- package/src/llama.cpp/ggml/src/CMakeLists.txt +3 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +3 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +2 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +69 -39
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +4 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +2 -1
- package/src/llama.cpp/include/llama.h +18 -1
- package/src/llama.cpp/src/CMakeLists.txt +2 -1
- package/src/llama.cpp/src/llama-arch.cpp +1890 -2248
- package/src/llama.cpp/src/llama-arch.h +9 -2
- package/src/llama.cpp/src/llama-batch.cpp +12 -2
- package/src/llama.cpp/src/llama-batch.h +4 -2
- package/src/llama.cpp/src/llama-context.cpp +99 -29
- package/src/llama.cpp/src/llama-context.h +9 -3
- package/src/llama.cpp/src/llama-grammar.cpp +233 -33
- package/src/llama.cpp/src/llama-grammar.h +20 -1
- package/src/llama.cpp/src/llama-graph.cpp +85 -17
- package/src/llama.cpp/src/llama-graph.h +17 -4
- package/src/llama.cpp/src/llama-hparams.cpp +6 -0
- package/src/llama.cpp/src/llama-hparams.h +5 -1
- package/src/llama.cpp/src/llama-impl.cpp +4 -0
- package/src/llama.cpp/src/llama-kv-cache.cpp +90 -42
- package/src/llama.cpp/src/llama-kv-cache.h +19 -2
- package/src/llama.cpp/src/llama-memory-hybrid.cpp +1 -1
- package/src/llama.cpp/src/llama-model-loader.cpp +2 -0
- package/src/llama.cpp/src/llama-model-loader.h +2 -0
- package/src/llama.cpp/src/llama-model.cpp +123 -52
- package/src/llama.cpp/src/llama-model.h +1 -0
- package/src/llama.cpp/src/llama-quant.cpp +1 -1
- package/src/llama.cpp/src/llama-vocab.cpp +2 -1
- package/src/llama.cpp/src/llama.cpp +675 -1
- package/src/llama.cpp/src/models/deepseek2.cpp +9 -5
- package/src/llama.cpp/src/models/{gemma3-iswa.cpp → gemma3.cpp} +30 -5
- package/src/llama.cpp/src/models/glm4-moe.cpp +28 -11
- package/src/llama.cpp/src/models/glm4.cpp +27 -4
- package/src/llama.cpp/src/models/models.h +8 -7
- package/src/llama.cpp/src/models/nemotron-h.cpp +35 -6
- package/src/llama.cpp/src/models/qwen2.cpp +12 -3
- package/src/llama.cpp/src/models/qwen3next.cpp +81 -266
|
@@ -3,8 +3,10 @@
|
|
|
3
3
|
#include "common.h"
|
|
4
4
|
|
|
5
5
|
#include <set>
|
|
6
|
+
#include <map>
|
|
6
7
|
#include <string>
|
|
7
8
|
#include <vector>
|
|
9
|
+
#include <cstring>
|
|
8
10
|
|
|
9
11
|
//
|
|
10
12
|
// CLI argument parsing
|
|
@@ -14,6 +16,7 @@ struct common_arg {
|
|
|
14
16
|
std::set<enum llama_example> examples = {LLAMA_EXAMPLE_COMMON};
|
|
15
17
|
std::set<enum llama_example> excludes = {};
|
|
16
18
|
std::vector<const char *> args;
|
|
19
|
+
std::vector<const char *> args_neg; // for negated args like --no-xxx
|
|
17
20
|
const char * value_hint = nullptr; // help text or example for arg value
|
|
18
21
|
const char * value_hint_2 = nullptr; // for second arg value
|
|
19
22
|
const char * env = nullptr;
|
|
@@ -23,6 +26,9 @@ struct common_arg {
|
|
|
23
26
|
void (*handler_string) (common_params & params, const std::string &) = nullptr;
|
|
24
27
|
void (*handler_str_str)(common_params & params, const std::string &, const std::string &) = nullptr;
|
|
25
28
|
void (*handler_int) (common_params & params, int) = nullptr;
|
|
29
|
+
void (*handler_bool) (common_params & params, bool) = nullptr;
|
|
30
|
+
|
|
31
|
+
common_arg() = default;
|
|
26
32
|
|
|
27
33
|
common_arg(
|
|
28
34
|
const std::initializer_list<const char *> & args,
|
|
@@ -44,6 +50,13 @@ struct common_arg {
|
|
|
44
50
|
void (*handler)(common_params & params)
|
|
45
51
|
) : args(args), help(help), handler_void(handler) {}
|
|
46
52
|
|
|
53
|
+
common_arg(
|
|
54
|
+
const std::initializer_list<const char *> & args,
|
|
55
|
+
const std::initializer_list<const char *> & args_neg,
|
|
56
|
+
const std::string & help,
|
|
57
|
+
void (*handler)(common_params & params, bool)
|
|
58
|
+
) : args(args), args_neg(args_neg), help(help), handler_bool(handler) {}
|
|
59
|
+
|
|
47
60
|
// support 2 values for arg
|
|
48
61
|
common_arg(
|
|
49
62
|
const std::initializer_list<const char *> & args,
|
|
@@ -61,9 +74,33 @@ struct common_arg {
|
|
|
61
74
|
bool is_exclude(enum llama_example ex);
|
|
62
75
|
bool get_value_from_env(std::string & output) const;
|
|
63
76
|
bool has_value_from_env() const;
|
|
64
|
-
std::string to_string();
|
|
77
|
+
std::string to_string() const;
|
|
78
|
+
|
|
79
|
+
// for using as key in std::map
|
|
80
|
+
bool operator<(const common_arg& other) const {
|
|
81
|
+
if (args.empty() || other.args.empty()) {
|
|
82
|
+
return false;
|
|
83
|
+
}
|
|
84
|
+
return strcmp(args[0], other.args[0]) < 0;
|
|
85
|
+
}
|
|
86
|
+
bool operator==(const common_arg& other) const {
|
|
87
|
+
if (args.empty() || other.args.empty()) {
|
|
88
|
+
return false;
|
|
89
|
+
}
|
|
90
|
+
return strcmp(args[0], other.args[0]) == 0;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// get all args and env vars (including negated args/env)
|
|
94
|
+
std::vector<std::string> get_args() const;
|
|
95
|
+
std::vector<std::string> get_env() const;
|
|
65
96
|
};
|
|
66
97
|
|
|
98
|
+
namespace common_arg_utils {
|
|
99
|
+
bool is_truthy(const std::string & value);
|
|
100
|
+
bool is_falsey(const std::string & value);
|
|
101
|
+
bool is_autoy(const std::string & value);
|
|
102
|
+
}
|
|
103
|
+
|
|
67
104
|
struct common_params_context {
|
|
68
105
|
enum llama_example ex = LLAMA_EXAMPLE_COMMON;
|
|
69
106
|
common_params & params;
|
|
@@ -76,7 +113,11 @@ struct common_params_context {
|
|
|
76
113
|
// if one argument has invalid value, it will automatically display usage of the specific argument (and not the full usage message)
|
|
77
114
|
bool common_params_parse(int argc, char ** argv, common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
|
|
78
115
|
|
|
79
|
-
//
|
|
116
|
+
// parse input arguments from CLI into a map
|
|
117
|
+
// TODO: support repeated args in the future
|
|
118
|
+
bool common_params_to_map(int argc, char ** argv, llama_example ex, std::map<common_arg, std::string> & out_map);
|
|
119
|
+
|
|
120
|
+
// initialize argument parser context - used by test-arg-parser and preset
|
|
80
121
|
common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
|
|
81
122
|
|
|
82
123
|
struct common_remote_params {
|
|
@@ -724,16 +724,10 @@ inline void parse_msg_with_xml_tool_calls(common_chat_msg_parser & builder, cons
|
|
|
724
724
|
if (reasoning_unclosed) {
|
|
725
725
|
if (auto pos = content.find(end_think); pos == std::string::npos && builder.pos() != builder.input().size()) {
|
|
726
726
|
unclosed_reasoning_content += content;
|
|
727
|
-
if (form.allow_toolcall_in_think) {
|
|
728
|
-
builder.move_to(tc->groups[0].begin);
|
|
729
|
-
if (!builder.try_consume_xml_tool_calls(form)) {
|
|
730
|
-
unclosed_reasoning_content += tool_call_start;
|
|
731
|
-
builder.move_to(tc->groups[0].end);
|
|
732
|
-
}
|
|
733
|
-
} else {
|
|
727
|
+
if (!(form.allow_toolcall_in_think && tc)) {
|
|
734
728
|
unclosed_reasoning_content += tool_call_start;
|
|
729
|
+
continue;
|
|
735
730
|
}
|
|
736
|
-
continue;
|
|
737
731
|
} else {
|
|
738
732
|
reasoning_unclosed = false;
|
|
739
733
|
std::string reasoning_content;
|
|
@@ -781,8 +775,12 @@ inline void parse_msg_with_xml_tool_calls(common_chat_msg_parser & builder, cons
|
|
|
781
775
|
}
|
|
782
776
|
} else {
|
|
783
777
|
// This <tool_call> start is in thinking block, skip this tool call
|
|
784
|
-
|
|
785
|
-
|
|
778
|
+
// This <tool_call> start is in thinking block
|
|
779
|
+
if (form.allow_toolcall_in_think) {
|
|
780
|
+
unclosed_reasoning_content = content.substr(think_start + start_think.size());
|
|
781
|
+
} else {
|
|
782
|
+
unclosed_reasoning_content = content.substr(think_start + start_think.size()) + tool_call_start;
|
|
783
|
+
}
|
|
786
784
|
reasoning_unclosed = true;
|
|
787
785
|
content.resize(think_start);
|
|
788
786
|
toolcall_in_think = true;
|
|
@@ -805,14 +803,35 @@ inline void parse_msg_with_xml_tool_calls(common_chat_msg_parser & builder, cons
|
|
|
805
803
|
}
|
|
806
804
|
|
|
807
805
|
// remove potential partial suffix
|
|
808
|
-
if (
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
806
|
+
if (builder.pos() == builder.input().size()) {
|
|
807
|
+
if (unclosed_reasoning_content.empty()) {
|
|
808
|
+
rstrip(content);
|
|
809
|
+
trim_potential_partial_word(content);
|
|
810
|
+
rstrip(content);
|
|
811
|
+
} else {
|
|
812
|
+
rstrip(unclosed_reasoning_content);
|
|
813
|
+
trim_potential_partial_word(unclosed_reasoning_content);
|
|
814
|
+
rstrip(unclosed_reasoning_content);
|
|
815
|
+
}
|
|
816
|
+
}
|
|
817
|
+
|
|
818
|
+
// consume unclosed_reasoning_content if allow_toolcall_in_think is set
|
|
819
|
+
if (form.allow_toolcall_in_think && !unclosed_reasoning_content.empty()) {
|
|
820
|
+
if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content) {
|
|
821
|
+
builder.add_reasoning_content(unclosed_reasoning_content);
|
|
822
|
+
} else {
|
|
823
|
+
if (content.empty()) {
|
|
824
|
+
content = start_think + unclosed_reasoning_content;
|
|
825
|
+
} else {
|
|
826
|
+
content += "\n\n" + start_think;
|
|
827
|
+
content += unclosed_reasoning_content;
|
|
828
|
+
}
|
|
829
|
+
}
|
|
830
|
+
unclosed_reasoning_content.clear();
|
|
812
831
|
}
|
|
813
832
|
|
|
814
833
|
// Add content
|
|
815
|
-
if (content.
|
|
834
|
+
if (!content.empty()) {
|
|
816
835
|
// If there are multiple content blocks
|
|
817
836
|
if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content && builder.result().content.size() != 0) {
|
|
818
837
|
builder.add_content("\n\n");
|
|
@@ -820,7 +839,7 @@ inline void parse_msg_with_xml_tool_calls(common_chat_msg_parser & builder, cons
|
|
|
820
839
|
builder.add_content(content);
|
|
821
840
|
}
|
|
822
841
|
|
|
823
|
-
// This <tool_call> start is in thinking block, skip this tool call
|
|
842
|
+
// This <tool_call> start is in thinking block and toolcall_in_think not set, skip this tool call
|
|
824
843
|
if (toolcall_in_think && !form.allow_toolcall_in_think) {
|
|
825
844
|
continue;
|
|
826
845
|
}
|
|
@@ -829,7 +848,7 @@ inline void parse_msg_with_xml_tool_calls(common_chat_msg_parser & builder, cons
|
|
|
829
848
|
if (!tc) {
|
|
830
849
|
GGML_ASSERT(builder.pos() == builder.input().size());
|
|
831
850
|
GGML_ASSERT(unclosed_reasoning_content.empty());
|
|
832
|
-
GGML_ASSERT(!reasoning_unclosed);
|
|
851
|
+
if (!form.allow_toolcall_in_think) GGML_ASSERT(!reasoning_unclosed);
|
|
833
852
|
break;
|
|
834
853
|
}
|
|
835
854
|
|
|
@@ -854,7 +873,6 @@ inline void parse_msg_with_xml_tool_calls(common_chat_msg_parser & builder, cons
|
|
|
854
873
|
|
|
855
874
|
/**
|
|
856
875
|
* Parse content uses reasoning and XML-Style tool call
|
|
857
|
-
* TODO: Note that form.allow_toolcall_in_think is not tested yet. If anyone confirms it works, this comment can be removed.
|
|
858
876
|
*/
|
|
859
877
|
void common_chat_msg_parser::consume_reasoning_with_xml_tool_calls(const struct xml_tool_call_format & form, const std::string & start_think, const std::string & end_think) {
|
|
860
878
|
parse_msg_with_xml_tool_calls(*this, form, start_think, end_think);
|
|
@@ -31,7 +31,7 @@ struct xml_tool_call_format {
|
|
|
31
31
|
std::optional<std::string> last_val_end = std::nullopt;
|
|
32
32
|
std::optional<std::string> last_tool_end = std::nullopt;
|
|
33
33
|
bool trim_raw_argval = false;
|
|
34
|
-
bool allow_toolcall_in_think = false;
|
|
34
|
+
bool allow_toolcall_in_think = false;
|
|
35
35
|
};
|
|
36
36
|
|
|
37
37
|
// make a GBNF that accept any strings except those containing any of the forbidden strings.
|
|
@@ -917,12 +917,13 @@ static void common_chat_parse_kimi_k2(common_chat_msg_parser & builder) {
|
|
|
917
917
|
form.tool_start = "<|tool_call_begin|>";
|
|
918
918
|
form.tool_sep = "<|tool_call_argument_begin|>{";
|
|
919
919
|
form.key_start = "\"";
|
|
920
|
-
form.key_val_sep = "\":
|
|
921
|
-
form.val_end = ",
|
|
920
|
+
form.key_val_sep = "\":";
|
|
921
|
+
form.val_end = ",";
|
|
922
922
|
form.tool_end = "}<|tool_call_end|>";
|
|
923
923
|
form.scope_end = "<|tool_calls_section_end|>";
|
|
924
924
|
form.raw_argval = false;
|
|
925
925
|
form.last_val_end = "";
|
|
926
|
+
form.allow_toolcall_in_think = true;
|
|
926
927
|
return form;
|
|
927
928
|
})();
|
|
928
929
|
builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
|
|
@@ -1,8 +1,17 @@
|
|
|
1
1
|
#include "chat-peg-parser.h"
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
#include <nlohmann/json.hpp>
|
|
4
|
+
|
|
5
|
+
using json = nlohmann::ordered_json;
|
|
6
|
+
|
|
7
|
+
static std::string_view trim_trailing_space(std::string_view sv, int max = -1) {
|
|
8
|
+
int count = 0;
|
|
4
9
|
while (!sv.empty() && std::isspace(static_cast<unsigned char>(sv.back()))) {
|
|
10
|
+
if (max != -1 && count <= max) {
|
|
11
|
+
break;
|
|
12
|
+
}
|
|
5
13
|
sv.remove_suffix(1);
|
|
14
|
+
count++;
|
|
6
15
|
}
|
|
7
16
|
return sv;
|
|
8
17
|
}
|
|
@@ -89,7 +98,7 @@ void common_chat_peg_constructed_mapper::map(const common_peg_ast_node & node) {
|
|
|
89
98
|
|
|
90
99
|
if (is_arg_string && current_tool) {
|
|
91
100
|
// Serialize to JSON, but exclude the end quote
|
|
92
|
-
std::string dumped = json(node.text).dump();
|
|
101
|
+
std::string dumped = json(trim_trailing_space(node.text)).dump();
|
|
93
102
|
current_tool->arguments += dumped.substr(0, dumped.size() - 1);
|
|
94
103
|
needs_closing_quote = true;
|
|
95
104
|
}
|
|
@@ -97,6 +106,7 @@ void common_chat_peg_constructed_mapper::map(const common_peg_ast_node & node) {
|
|
|
97
106
|
if (is_arg_close && current_tool) {
|
|
98
107
|
if (needs_closing_quote) {
|
|
99
108
|
current_tool->arguments += "\"";
|
|
109
|
+
needs_closing_quote = false;
|
|
100
110
|
}
|
|
101
111
|
}
|
|
102
112
|
|
|
@@ -105,6 +115,10 @@ void common_chat_peg_constructed_mapper::map(const common_peg_ast_node & node) {
|
|
|
105
115
|
}
|
|
106
116
|
|
|
107
117
|
if (is_tool_close && current_tool) {
|
|
118
|
+
if (needs_closing_quote) {
|
|
119
|
+
current_tool->arguments += "\"";
|
|
120
|
+
needs_closing_quote = false;
|
|
121
|
+
}
|
|
108
122
|
current_tool->arguments += "}";
|
|
109
123
|
}
|
|
110
124
|
}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
#include "chat.h"
|
|
2
2
|
#include "chat-parser.h"
|
|
3
|
+
#include "chat-peg-parser.h"
|
|
3
4
|
#include "common.h"
|
|
4
5
|
#include "json-partial.h"
|
|
5
6
|
#include "json-schema-to-grammar.h"
|
|
@@ -137,6 +138,7 @@ struct templates_params {
|
|
|
137
138
|
common_chat_tool_choice tool_choice;
|
|
138
139
|
json json_schema;
|
|
139
140
|
bool parallel_tool_calls;
|
|
141
|
+
common_reasoning_format reasoning_format;
|
|
140
142
|
bool stream;
|
|
141
143
|
std::string grammar;
|
|
142
144
|
bool add_generation_prompt = true;
|
|
@@ -576,6 +578,16 @@ common_chat_templates_ptr common_chat_templates_init(
|
|
|
576
578
|
"{%- if false %}");
|
|
577
579
|
}
|
|
578
580
|
|
|
581
|
+
// TODO @aldehir : this is a temporary fix, pending Minja changes
|
|
582
|
+
// Ref: https://github.com/ggml-org/llama.cpp/pull/17713#issuecomment-3631342664
|
|
583
|
+
if (default_template_src.find("[TOOL_CALLS]") != std::string::npos
|
|
584
|
+
// search for the error message and patch it
|
|
585
|
+
&& default_template_src.find("if (message['content'] is none or") != std::string::npos) {
|
|
586
|
+
string_replace_all(default_template_src,
|
|
587
|
+
"{%- if (message['content'] is none or message['content'] == '' or message['content']|length == 0) and (message['tool_calls'] is not defined or message['tool_calls'] is none or message['tool_calls']|length == 0) %}",
|
|
588
|
+
"{%- if false %}");
|
|
589
|
+
}
|
|
590
|
+
|
|
579
591
|
std::string token_bos = bos_token_override;
|
|
580
592
|
std::string token_eos = eos_token_override;
|
|
581
593
|
bool add_bos = false;
|
|
@@ -686,6 +698,25 @@ static void foreach_function(const json & tools, const std::function<void(const
|
|
|
686
698
|
}
|
|
687
699
|
}
|
|
688
700
|
|
|
701
|
+
static void foreach_parameter(const json & function, const std::function<void(const std::string &, const json &, bool)> & fn) {
|
|
702
|
+
if (!function.contains("parameters") || !function.at("parameters").is_object()) {
|
|
703
|
+
return;
|
|
704
|
+
}
|
|
705
|
+
const auto & params = function.at("parameters");
|
|
706
|
+
if (!params.contains("properties") || !params.at("properties").is_object()) {
|
|
707
|
+
return;
|
|
708
|
+
}
|
|
709
|
+
const auto & props = params.at("properties");
|
|
710
|
+
std::set<std::string> required;
|
|
711
|
+
if (params.contains("required") && params.at("required").is_array()) {
|
|
712
|
+
params.at("required").get_to(required);
|
|
713
|
+
}
|
|
714
|
+
for (const auto & [name, prop] : props.items()) {
|
|
715
|
+
bool is_required = (required.find(name) != required.end());
|
|
716
|
+
fn(name, prop, is_required);
|
|
717
|
+
}
|
|
718
|
+
}
|
|
719
|
+
|
|
689
720
|
static std::string apply(
|
|
690
721
|
const common_chat_template & tmpl,
|
|
691
722
|
const struct templates_params & inputs,
|
|
@@ -974,6 +1005,118 @@ static common_chat_params common_chat_params_init_lfm2(const common_chat_templat
|
|
|
974
1005
|
return data;
|
|
975
1006
|
}
|
|
976
1007
|
|
|
1008
|
+
static common_chat_params common_chat_params_init_ministral_3(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
|
1009
|
+
common_chat_params data;
|
|
1010
|
+
|
|
1011
|
+
// Build up messages to follow the format: https://huggingface.co/mistralai/Ministral-3-14B-Reasoning-2512/blob/main/chat_template.jinja
|
|
1012
|
+
auto adjusted_messages = json::array();
|
|
1013
|
+
for (const auto & msg : inputs.messages) {
|
|
1014
|
+
auto role = msg.value("role", "");
|
|
1015
|
+
if (role != "system" && role != "assistant") {
|
|
1016
|
+
// Only adjust system and assistant messages. Interestingly, the system message may contain thinking.
|
|
1017
|
+
adjusted_messages.push_back(msg);
|
|
1018
|
+
continue;
|
|
1019
|
+
}
|
|
1020
|
+
|
|
1021
|
+
auto content = json::array();
|
|
1022
|
+
|
|
1023
|
+
// If message contains `reasoning_content`, add it as a block of type `thinking`
|
|
1024
|
+
if (msg.contains("reasoning_content") && msg.at("reasoning_content").is_string()) {
|
|
1025
|
+
content.push_back({
|
|
1026
|
+
{"type", "thinking"},
|
|
1027
|
+
{"thinking", msg.at("reasoning_content").get<std::string>()},
|
|
1028
|
+
});
|
|
1029
|
+
}
|
|
1030
|
+
|
|
1031
|
+
// If message contains `content`, add it as a block of type `text`
|
|
1032
|
+
if (msg.contains("content")) {
|
|
1033
|
+
if (msg.at("content").is_string()) {
|
|
1034
|
+
content.push_back({
|
|
1035
|
+
{"type", "text"},
|
|
1036
|
+
{"text", msg.at("content").get<std::string>()},
|
|
1037
|
+
});
|
|
1038
|
+
} else if (msg.at("content").is_array()) {
|
|
1039
|
+
auto blocks = msg.at("content");
|
|
1040
|
+
content.insert(content.end(), blocks.begin(), blocks.end());
|
|
1041
|
+
}
|
|
1042
|
+
}
|
|
1043
|
+
|
|
1044
|
+
auto adjusted = msg;
|
|
1045
|
+
adjusted["content"] = content;
|
|
1046
|
+
adjusted.erase("reasoning_content");
|
|
1047
|
+
adjusted_messages.push_back(adjusted);
|
|
1048
|
+
}
|
|
1049
|
+
|
|
1050
|
+
auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
|
|
1051
|
+
auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
|
|
1052
|
+
auto include_grammar = true;
|
|
1053
|
+
|
|
1054
|
+
data.prompt = apply(tmpl, inputs, /* messages_override = */ adjusted_messages);
|
|
1055
|
+
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
|
|
1056
|
+
data.preserved_tokens = {
|
|
1057
|
+
"[THINK]",
|
|
1058
|
+
"[/THINK]",
|
|
1059
|
+
"[TOOL_CALLS]",
|
|
1060
|
+
"[ARGS]",
|
|
1061
|
+
};
|
|
1062
|
+
|
|
1063
|
+
auto parser = build_chat_peg_native_parser([&](common_chat_peg_native_builder & p) {
|
|
1064
|
+
auto reasoning = extract_reasoning ? p.optional("[THINK]" + p.reasoning(p.until("[/THINK]")) + "[/THINK]") : p.eps();
|
|
1065
|
+
|
|
1066
|
+
// Response format parser
|
|
1067
|
+
if (inputs.json_schema.is_object() && !inputs.json_schema.empty()) {
|
|
1068
|
+
// Ministral wants to emit json surrounded by code fences
|
|
1069
|
+
return reasoning << "```json" << p.content(p.schema(p.json(), "response-format", inputs.json_schema)) << "```";
|
|
1070
|
+
}
|
|
1071
|
+
|
|
1072
|
+
// Tool call parser
|
|
1073
|
+
if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) {
|
|
1074
|
+
auto tool_choice = p.choice();
|
|
1075
|
+
foreach_function(inputs.tools, [&](const json & tool) {
|
|
1076
|
+
const auto & function = tool.at("function");
|
|
1077
|
+
std::string name = function.at("name");
|
|
1078
|
+
const auto & schema = function.at("parameters");
|
|
1079
|
+
|
|
1080
|
+
tool_choice |= p.rule("tool-" + name,
|
|
1081
|
+
p.tool_open(p.tool_name(p.literal(name)) + "[ARGS]")
|
|
1082
|
+
+ p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema))
|
|
1083
|
+
);
|
|
1084
|
+
});
|
|
1085
|
+
|
|
1086
|
+
auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0;
|
|
1087
|
+
auto max_calls = inputs.parallel_tool_calls ? -1 : 1;
|
|
1088
|
+
auto tool_calls = p.trigger_rule("tool-call", p.repeat("[TOOL_CALLS]" + tool_choice, min_calls, max_calls));
|
|
1089
|
+
|
|
1090
|
+
return reasoning << p.content(p.until("[TOOL_CALLS]")) << tool_calls;
|
|
1091
|
+
}
|
|
1092
|
+
|
|
1093
|
+
// Content only parser
|
|
1094
|
+
include_grammar = false;
|
|
1095
|
+
return reasoning << p.content(p.rest());
|
|
1096
|
+
});
|
|
1097
|
+
|
|
1098
|
+
data.parser = parser.save();
|
|
1099
|
+
|
|
1100
|
+
if (include_grammar) {
|
|
1101
|
+
data.grammar_lazy = has_tools && inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
|
|
1102
|
+
|
|
1103
|
+
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
|
1104
|
+
foreach_function(inputs.tools, [&](const json & tool) {
|
|
1105
|
+
const auto & function = tool.at("function");
|
|
1106
|
+
auto schema = function.at("parameters");
|
|
1107
|
+
builder.resolve_refs(schema);
|
|
1108
|
+
});
|
|
1109
|
+
parser.build_grammar(builder, data.grammar_lazy);
|
|
1110
|
+
});
|
|
1111
|
+
|
|
1112
|
+
data.grammar_triggers = {
|
|
1113
|
+
{COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]"}
|
|
1114
|
+
};
|
|
1115
|
+
}
|
|
1116
|
+
|
|
1117
|
+
return data;
|
|
1118
|
+
}
|
|
1119
|
+
|
|
977
1120
|
static common_chat_params common_chat_params_init_magistral(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
|
978
1121
|
common_chat_params data;
|
|
979
1122
|
data.prompt = apply(tmpl, inputs);
|
|
@@ -1272,6 +1415,123 @@ static common_chat_params common_chat_params_init_nemotron_v2(const common_chat_
|
|
|
1272
1415
|
return data;
|
|
1273
1416
|
}
|
|
1274
1417
|
|
|
1418
|
+
static common_chat_params common_chat_params_init_nemotron_v3(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
|
1419
|
+
common_chat_params data;
|
|
1420
|
+
|
|
1421
|
+
data.prompt = apply(tmpl, inputs);
|
|
1422
|
+
data.format = COMMON_CHAT_FORMAT_PEG_CONSTRUCTED;
|
|
1423
|
+
|
|
1424
|
+
// Handle thinking tags appropriately based on inputs.enable_thinking
|
|
1425
|
+
if (string_ends_with(data.prompt, "<think>\n")) {
|
|
1426
|
+
if (!inputs.enable_thinking) {
|
|
1427
|
+
data.prompt += "</think>";
|
|
1428
|
+
} else {
|
|
1429
|
+
data.thinking_forced_open = true;
|
|
1430
|
+
}
|
|
1431
|
+
}
|
|
1432
|
+
|
|
1433
|
+
data.preserved_tokens = {
|
|
1434
|
+
"<think>",
|
|
1435
|
+
"</think>",
|
|
1436
|
+
"<tool_call>",
|
|
1437
|
+
"</tool_call>",
|
|
1438
|
+
};
|
|
1439
|
+
|
|
1440
|
+
auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
|
|
1441
|
+
auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
|
|
1442
|
+
auto include_grammar = true;
|
|
1443
|
+
|
|
1444
|
+
auto parser = build_chat_peg_constructed_parser([&](auto & p) {
|
|
1445
|
+
auto reasoning = p.eps();
|
|
1446
|
+
if (inputs.enable_thinking && extract_reasoning) {
|
|
1447
|
+
auto reasoning_content = p.reasoning(p.until("</think>")) + ("</think>" | p.end());
|
|
1448
|
+
if (data.thinking_forced_open) {
|
|
1449
|
+
reasoning = reasoning_content;
|
|
1450
|
+
}
|
|
1451
|
+
}
|
|
1452
|
+
|
|
1453
|
+
// Response format parser
|
|
1454
|
+
if (inputs.json_schema.is_object() && !inputs.json_schema.empty()) {
|
|
1455
|
+
return reasoning << p.content(p.schema(p.json(), "response-format", inputs.json_schema));
|
|
1456
|
+
}
|
|
1457
|
+
|
|
1458
|
+
// Tool call parser
|
|
1459
|
+
if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) {
|
|
1460
|
+
auto tool_choice = p.choice();
|
|
1461
|
+
foreach_function(inputs.tools, [&](const json & tool) {
|
|
1462
|
+
const auto & function = tool.at("function");
|
|
1463
|
+
std::string name = function.at("name");
|
|
1464
|
+
auto parameters = function.at("parameters");
|
|
1465
|
+
|
|
1466
|
+
auto schema_info = common_schema_info();
|
|
1467
|
+
schema_info.resolve_refs(parameters);
|
|
1468
|
+
|
|
1469
|
+
auto tool_open = "<function=" + p.tool_name(p.literal(name)) + ">\n";
|
|
1470
|
+
auto tool_close = p.literal("</function>\n");
|
|
1471
|
+
auto args = p.sequence();
|
|
1472
|
+
auto arg_string = p.rule("xml-arg-string", p.until_one_of({
|
|
1473
|
+
"\n</parameter>",
|
|
1474
|
+
"\n<parameter=",
|
|
1475
|
+
"\n</function>"
|
|
1476
|
+
}));
|
|
1477
|
+
|
|
1478
|
+
foreach_parameter(function, [&](const auto & param_name, const json & param_schema, bool is_required) {
|
|
1479
|
+
auto rule_name = "tool-" + name + "-arg-" + param_name;
|
|
1480
|
+
|
|
1481
|
+
auto arg_open = "<parameter=" + p.tool_arg_name(p.literal(param_name)) + ">\n";
|
|
1482
|
+
auto arg_close = p.literal("</parameter>\n");
|
|
1483
|
+
auto arg_value = p.eps();
|
|
1484
|
+
|
|
1485
|
+
if (schema_info.resolves_to_string(param_schema)) {
|
|
1486
|
+
arg_value = p.tool_arg_string_value(arg_string) + "\n";
|
|
1487
|
+
} else {
|
|
1488
|
+
arg_value = p.tool_arg_json_value(p.schema(p.json(), rule_name + "-schema", param_schema));
|
|
1489
|
+
}
|
|
1490
|
+
|
|
1491
|
+
// Model may or my not close with </parameter>
|
|
1492
|
+
auto arg_rule = p.rule(rule_name, p.tool_arg_open(arg_open) + arg_value + p.optional(p.tool_arg_close(arg_close)));
|
|
1493
|
+
args += p.repeat(arg_rule, /* min = */ is_required ? 1 : 0, /* max = */ 1);
|
|
1494
|
+
});
|
|
1495
|
+
|
|
1496
|
+
tool_choice |= p.rule("tool-" + name, p.tool_open(tool_open) + args + p.tool_close(tool_close));
|
|
1497
|
+
});
|
|
1498
|
+
|
|
1499
|
+
auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0;
|
|
1500
|
+
auto max_calls = inputs.parallel_tool_calls ? -1 : 1;
|
|
1501
|
+
auto tool_call = p.rule("tool-call", "<tool_call>\n" + tool_choice + "</tool_call>" + p.space());
|
|
1502
|
+
auto tool_calls = p.trigger_rule("tool-call-root", p.repeat(tool_call, /* min = */ min_calls, /* max = */ max_calls));
|
|
1503
|
+
|
|
1504
|
+
return reasoning << p.content(p.until("<tool_call>")) << tool_calls;
|
|
1505
|
+
}
|
|
1506
|
+
|
|
1507
|
+
// Content only parser
|
|
1508
|
+
include_grammar = false;
|
|
1509
|
+
return reasoning << p.content(p.rest());
|
|
1510
|
+
});
|
|
1511
|
+
|
|
1512
|
+
data.parser = parser.save();
|
|
1513
|
+
|
|
1514
|
+
if (include_grammar) {
|
|
1515
|
+
data.grammar_lazy = has_tools && inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
|
|
1516
|
+
|
|
1517
|
+
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
|
1518
|
+
foreach_function(inputs.tools, [&](const json & tool) {
|
|
1519
|
+
const auto & function = tool.at("function");
|
|
1520
|
+
auto schema = function.at("parameters");
|
|
1521
|
+
builder.resolve_refs(schema);
|
|
1522
|
+
});
|
|
1523
|
+
parser.build_grammar(builder, data.grammar_lazy);
|
|
1524
|
+
});
|
|
1525
|
+
|
|
1526
|
+
data.grammar_triggers = {
|
|
1527
|
+
{COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<tool_call>"}
|
|
1528
|
+
};
|
|
1529
|
+
}
|
|
1530
|
+
|
|
1531
|
+
return data;
|
|
1532
|
+
}
|
|
1533
|
+
|
|
1534
|
+
|
|
1275
1535
|
static common_chat_params common_chat_params_init_apertus(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
|
1276
1536
|
common_chat_params data;
|
|
1277
1537
|
|
|
@@ -2328,6 +2588,7 @@ static common_chat_params common_chat_templates_apply_jinja(
|
|
|
2328
2588
|
params.messages = common_chat_msgs_to_json_oaicompat<json>(inputs.messages, /* concat_text= */ !tmpl.original_caps().requires_typed_content);
|
|
2329
2589
|
params.add_generation_prompt = inputs.add_generation_prompt;
|
|
2330
2590
|
params.tool_choice = inputs.tool_choice;
|
|
2591
|
+
params.reasoning_format = inputs.reasoning_format;
|
|
2331
2592
|
params.enable_thinking = inputs.enable_thinking;
|
|
2332
2593
|
params.grammar = inputs.grammar;
|
|
2333
2594
|
params.now = inputs.now;
|
|
@@ -2396,6 +2657,10 @@ static common_chat_params common_chat_templates_apply_jinja(
|
|
|
2396
2657
|
src.find("<function=") != std::string::npos &&
|
|
2397
2658
|
src.find("<parameters>") != std::string::npos &&
|
|
2398
2659
|
src.find("<parameter=") != std::string::npos) {
|
|
2660
|
+
// Nemotron 3 Nano 30B A3B
|
|
2661
|
+
if (src.find("<think>") != std::string::npos) {
|
|
2662
|
+
return common_chat_params_init_nemotron_v3(tmpl, params);
|
|
2663
|
+
}
|
|
2399
2664
|
return common_chat_params_init_qwen3_coder_xml(tmpl, params);
|
|
2400
2665
|
}
|
|
2401
2666
|
|
|
@@ -2491,6 +2756,13 @@ static common_chat_params common_chat_templates_apply_jinja(
|
|
|
2491
2756
|
return common_chat_params_init_llama_3_x(tmpl, params, allow_python_tag_builtin_tools);
|
|
2492
2757
|
}
|
|
2493
2758
|
|
|
2759
|
+
// Ministral/Mistral Large 3
|
|
2760
|
+
if (src.find("[SYSTEM_PROMPT]") != std::string::npos &&
|
|
2761
|
+
src.find("[TOOL_CALLS]") != std::string::npos &&
|
|
2762
|
+
src.find("[ARGS]") != std::string::npos) {
|
|
2763
|
+
return common_chat_params_init_ministral_3(tmpl, params);
|
|
2764
|
+
}
|
|
2765
|
+
|
|
2494
2766
|
if (src.find("[THINK]") != std::string::npos && src.find("[/THINK]") != std::string::npos) {
|
|
2495
2767
|
return common_chat_params_init_magistral(tmpl, params);
|
|
2496
2768
|
}
|