@fugood/llama.node 1.4.6 → 1.4.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/lib/binding.ts +8 -0
  2. package/package.json +15 -15
  3. package/scripts/llama.cpp.patch +25 -26
  4. package/src/LlamaContext.cpp +2 -2
  5. package/src/llama.cpp/common/CMakeLists.txt +2 -0
  6. package/src/llama.cpp/common/arg.cpp +364 -193
  7. package/src/llama.cpp/common/arg.h +43 -2
  8. package/src/llama.cpp/common/chat-parser-xml-toolcall.cpp +36 -18
  9. package/src/llama.cpp/common/chat-parser-xml-toolcall.h +1 -1
  10. package/src/llama.cpp/common/chat-parser.cpp +3 -2
  11. package/src/llama.cpp/common/chat-peg-parser.cpp +16 -2
  12. package/src/llama.cpp/common/chat.cpp +272 -0
  13. package/src/llama.cpp/common/common.cpp +130 -67
  14. package/src/llama.cpp/common/common.h +40 -16
  15. package/src/llama.cpp/common/console.cpp +680 -47
  16. package/src/llama.cpp/common/console.h +30 -8
  17. package/src/llama.cpp/common/download.cpp +69 -25
  18. package/src/llama.cpp/common/json-schema-to-grammar.cpp +132 -3
  19. package/src/llama.cpp/common/json-schema-to-grammar.h +20 -0
  20. package/src/llama.cpp/common/log.cpp +5 -0
  21. package/src/llama.cpp/common/log.h +1 -0
  22. package/src/llama.cpp/common/peg-parser.cpp +1 -1
  23. package/src/llama.cpp/common/preset.cpp +206 -0
  24. package/src/llama.cpp/common/preset.h +32 -0
  25. package/src/llama.cpp/common/sampling.cpp +91 -92
  26. package/src/llama.cpp/common/sampling.h +11 -6
  27. package/src/llama.cpp/common/speculative.cpp +1 -1
  28. package/src/llama.cpp/ggml/CMakeLists.txt +5 -0
  29. package/src/llama.cpp/ggml/include/ggml-alloc.h +9 -0
  30. package/src/llama.cpp/ggml/include/ggml-backend.h +1 -0
  31. package/src/llama.cpp/ggml/include/ggml-cpu.h +1 -0
  32. package/src/llama.cpp/ggml/include/ggml.h +7 -8
  33. package/src/llama.cpp/ggml/src/CMakeLists.txt +3 -0
  34. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +3 -0
  35. package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +2 -0
  36. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +69 -39
  37. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +4 -0
  38. package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +2 -1
  39. package/src/llama.cpp/include/llama.h +18 -1
  40. package/src/llama.cpp/src/CMakeLists.txt +2 -1
  41. package/src/llama.cpp/src/llama-arch.cpp +1890 -2248
  42. package/src/llama.cpp/src/llama-arch.h +9 -2
  43. package/src/llama.cpp/src/llama-batch.cpp +12 -2
  44. package/src/llama.cpp/src/llama-batch.h +4 -2
  45. package/src/llama.cpp/src/llama-context.cpp +99 -29
  46. package/src/llama.cpp/src/llama-context.h +9 -3
  47. package/src/llama.cpp/src/llama-grammar.cpp +233 -33
  48. package/src/llama.cpp/src/llama-grammar.h +20 -1
  49. package/src/llama.cpp/src/llama-graph.cpp +85 -17
  50. package/src/llama.cpp/src/llama-graph.h +17 -4
  51. package/src/llama.cpp/src/llama-hparams.cpp +6 -0
  52. package/src/llama.cpp/src/llama-hparams.h +5 -1
  53. package/src/llama.cpp/src/llama-impl.cpp +4 -0
  54. package/src/llama.cpp/src/llama-kv-cache.cpp +90 -42
  55. package/src/llama.cpp/src/llama-kv-cache.h +19 -2
  56. package/src/llama.cpp/src/llama-memory-hybrid.cpp +1 -1
  57. package/src/llama.cpp/src/llama-model-loader.cpp +2 -0
  58. package/src/llama.cpp/src/llama-model-loader.h +2 -0
  59. package/src/llama.cpp/src/llama-model.cpp +123 -52
  60. package/src/llama.cpp/src/llama-model.h +1 -0
  61. package/src/llama.cpp/src/llama-quant.cpp +1 -1
  62. package/src/llama.cpp/src/llama-vocab.cpp +2 -1
  63. package/src/llama.cpp/src/llama.cpp +675 -1
  64. package/src/llama.cpp/src/models/deepseek2.cpp +9 -5
  65. package/src/llama.cpp/src/models/{gemma3-iswa.cpp → gemma3.cpp} +30 -5
  66. package/src/llama.cpp/src/models/glm4-moe.cpp +28 -11
  67. package/src/llama.cpp/src/models/glm4.cpp +27 -4
  68. package/src/llama.cpp/src/models/models.h +8 -7
  69. package/src/llama.cpp/src/models/nemotron-h.cpp +35 -6
  70. package/src/llama.cpp/src/models/qwen2.cpp +12 -3
  71. package/src/llama.cpp/src/models/qwen3next.cpp +81 -266
@@ -3,8 +3,10 @@
3
3
  #include "common.h"
4
4
 
5
5
  #include <set>
6
+ #include <map>
6
7
  #include <string>
7
8
  #include <vector>
9
+ #include <cstring>
8
10
 
9
11
  //
10
12
  // CLI argument parsing
@@ -14,6 +16,7 @@ struct common_arg {
14
16
  std::set<enum llama_example> examples = {LLAMA_EXAMPLE_COMMON};
15
17
  std::set<enum llama_example> excludes = {};
16
18
  std::vector<const char *> args;
19
+ std::vector<const char *> args_neg; // for negated args like --no-xxx
17
20
  const char * value_hint = nullptr; // help text or example for arg value
18
21
  const char * value_hint_2 = nullptr; // for second arg value
19
22
  const char * env = nullptr;
@@ -23,6 +26,9 @@ struct common_arg {
23
26
  void (*handler_string) (common_params & params, const std::string &) = nullptr;
24
27
  void (*handler_str_str)(common_params & params, const std::string &, const std::string &) = nullptr;
25
28
  void (*handler_int) (common_params & params, int) = nullptr;
29
+ void (*handler_bool) (common_params & params, bool) = nullptr;
30
+
31
+ common_arg() = default;
26
32
 
27
33
  common_arg(
28
34
  const std::initializer_list<const char *> & args,
@@ -44,6 +50,13 @@ struct common_arg {
44
50
  void (*handler)(common_params & params)
45
51
  ) : args(args), help(help), handler_void(handler) {}
46
52
 
53
+ common_arg(
54
+ const std::initializer_list<const char *> & args,
55
+ const std::initializer_list<const char *> & args_neg,
56
+ const std::string & help,
57
+ void (*handler)(common_params & params, bool)
58
+ ) : args(args), args_neg(args_neg), help(help), handler_bool(handler) {}
59
+
47
60
  // support 2 values for arg
48
61
  common_arg(
49
62
  const std::initializer_list<const char *> & args,
@@ -61,9 +74,33 @@ struct common_arg {
61
74
  bool is_exclude(enum llama_example ex);
62
75
  bool get_value_from_env(std::string & output) const;
63
76
  bool has_value_from_env() const;
64
- std::string to_string();
77
+ std::string to_string() const;
78
+
79
+ // for using as key in std::map
80
+ bool operator<(const common_arg& other) const {
81
+ if (args.empty() || other.args.empty()) {
82
+ return false;
83
+ }
84
+ return strcmp(args[0], other.args[0]) < 0;
85
+ }
86
+ bool operator==(const common_arg& other) const {
87
+ if (args.empty() || other.args.empty()) {
88
+ return false;
89
+ }
90
+ return strcmp(args[0], other.args[0]) == 0;
91
+ }
92
+
93
+ // get all args and env vars (including negated args/env)
94
+ std::vector<std::string> get_args() const;
95
+ std::vector<std::string> get_env() const;
65
96
  };
66
97
 
98
+ namespace common_arg_utils {
99
+ bool is_truthy(const std::string & value);
100
+ bool is_falsey(const std::string & value);
101
+ bool is_autoy(const std::string & value);
102
+ }
103
+
67
104
  struct common_params_context {
68
105
  enum llama_example ex = LLAMA_EXAMPLE_COMMON;
69
106
  common_params & params;
@@ -76,7 +113,11 @@ struct common_params_context {
76
113
  // if one argument has invalid value, it will automatically display usage of the specific argument (and not the full usage message)
77
114
  bool common_params_parse(int argc, char ** argv, common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
78
115
 
79
- // function to be used by test-arg-parser
116
+ // parse input arguments from CLI into a map
117
+ // TODO: support repeated args in the future
118
+ bool common_params_to_map(int argc, char ** argv, llama_example ex, std::map<common_arg, std::string> & out_map);
119
+
120
+ // initialize argument parser context - used by test-arg-parser and preset
80
121
  common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
81
122
 
82
123
  struct common_remote_params {
@@ -724,16 +724,10 @@ inline void parse_msg_with_xml_tool_calls(common_chat_msg_parser & builder, cons
724
724
  if (reasoning_unclosed) {
725
725
  if (auto pos = content.find(end_think); pos == std::string::npos && builder.pos() != builder.input().size()) {
726
726
  unclosed_reasoning_content += content;
727
- if (form.allow_toolcall_in_think) {
728
- builder.move_to(tc->groups[0].begin);
729
- if (!builder.try_consume_xml_tool_calls(form)) {
730
- unclosed_reasoning_content += tool_call_start;
731
- builder.move_to(tc->groups[0].end);
732
- }
733
- } else {
727
+ if (!(form.allow_toolcall_in_think && tc)) {
734
728
  unclosed_reasoning_content += tool_call_start;
729
+ continue;
735
730
  }
736
- continue;
737
731
  } else {
738
732
  reasoning_unclosed = false;
739
733
  std::string reasoning_content;
@@ -781,8 +775,12 @@ inline void parse_msg_with_xml_tool_calls(common_chat_msg_parser & builder, cons
781
775
  }
782
776
  } else {
783
777
  // This <tool_call> start is in thinking block, skip this tool call
784
- auto pos = think_start + start_think.size();
785
- unclosed_reasoning_content = content.substr(pos) + tool_call_start;
778
+ // This <tool_call> start is in thinking block
779
+ if (form.allow_toolcall_in_think) {
780
+ unclosed_reasoning_content = content.substr(think_start + start_think.size());
781
+ } else {
782
+ unclosed_reasoning_content = content.substr(think_start + start_think.size()) + tool_call_start;
783
+ }
786
784
  reasoning_unclosed = true;
787
785
  content.resize(think_start);
788
786
  toolcall_in_think = true;
@@ -805,14 +803,35 @@ inline void parse_msg_with_xml_tool_calls(common_chat_msg_parser & builder, cons
805
803
  }
806
804
 
807
805
  // remove potential partial suffix
808
- if (content.size() > 0 && builder.pos() == builder.input().size() && unclosed_reasoning_content.empty()) {
809
- rstrip(content);
810
- trim_potential_partial_word(content);
811
- rstrip(content);
806
+ if (builder.pos() == builder.input().size()) {
807
+ if (unclosed_reasoning_content.empty()) {
808
+ rstrip(content);
809
+ trim_potential_partial_word(content);
810
+ rstrip(content);
811
+ } else {
812
+ rstrip(unclosed_reasoning_content);
813
+ trim_potential_partial_word(unclosed_reasoning_content);
814
+ rstrip(unclosed_reasoning_content);
815
+ }
816
+ }
817
+
818
+ // consume unclosed_reasoning_content if allow_toolcall_in_think is set
819
+ if (form.allow_toolcall_in_think && !unclosed_reasoning_content.empty()) {
820
+ if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content) {
821
+ builder.add_reasoning_content(unclosed_reasoning_content);
822
+ } else {
823
+ if (content.empty()) {
824
+ content = start_think + unclosed_reasoning_content;
825
+ } else {
826
+ content += "\n\n" + start_think;
827
+ content += unclosed_reasoning_content;
828
+ }
829
+ }
830
+ unclosed_reasoning_content.clear();
812
831
  }
813
832
 
814
833
  // Add content
815
- if (content.size() != 0) {
834
+ if (!content.empty()) {
816
835
  // If there are multiple content blocks
817
836
  if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content && builder.result().content.size() != 0) {
818
837
  builder.add_content("\n\n");
@@ -820,7 +839,7 @@ inline void parse_msg_with_xml_tool_calls(common_chat_msg_parser & builder, cons
820
839
  builder.add_content(content);
821
840
  }
822
841
 
823
- // This <tool_call> start is in thinking block, skip this tool call
842
+ // This <tool_call> start is in thinking block and toolcall_in_think not set, skip this tool call
824
843
  if (toolcall_in_think && !form.allow_toolcall_in_think) {
825
844
  continue;
826
845
  }
@@ -829,7 +848,7 @@ inline void parse_msg_with_xml_tool_calls(common_chat_msg_parser & builder, cons
829
848
  if (!tc) {
830
849
  GGML_ASSERT(builder.pos() == builder.input().size());
831
850
  GGML_ASSERT(unclosed_reasoning_content.empty());
832
- GGML_ASSERT(!reasoning_unclosed);
851
+ if (!form.allow_toolcall_in_think) GGML_ASSERT(!reasoning_unclosed);
833
852
  break;
834
853
  }
835
854
 
@@ -854,7 +873,6 @@ inline void parse_msg_with_xml_tool_calls(common_chat_msg_parser & builder, cons
854
873
 
855
874
  /**
856
875
  * Parse content uses reasoning and XML-Style tool call
857
- * TODO: Note that form.allow_toolcall_in_think is not tested yet. If anyone confirms it works, this comment can be removed.
858
876
  */
859
877
  void common_chat_msg_parser::consume_reasoning_with_xml_tool_calls(const struct xml_tool_call_format & form, const std::string & start_think, const std::string & end_think) {
860
878
  parse_msg_with_xml_tool_calls(*this, form, start_think, end_think);
@@ -31,7 +31,7 @@ struct xml_tool_call_format {
31
31
  std::optional<std::string> last_val_end = std::nullopt;
32
32
  std::optional<std::string> last_tool_end = std::nullopt;
33
33
  bool trim_raw_argval = false;
34
- bool allow_toolcall_in_think = false; // TODO: UNTESTED!!!
34
+ bool allow_toolcall_in_think = false;
35
35
  };
36
36
 
37
37
  // make a GBNF that accept any strings except those containing any of the forbidden strings.
@@ -917,12 +917,13 @@ static void common_chat_parse_kimi_k2(common_chat_msg_parser & builder) {
917
917
  form.tool_start = "<|tool_call_begin|>";
918
918
  form.tool_sep = "<|tool_call_argument_begin|>{";
919
919
  form.key_start = "\"";
920
- form.key_val_sep = "\": ";
921
- form.val_end = ", ";
920
+ form.key_val_sep = "\":";
921
+ form.val_end = ",";
922
922
  form.tool_end = "}<|tool_call_end|>";
923
923
  form.scope_end = "<|tool_calls_section_end|>";
924
924
  form.raw_argval = false;
925
925
  form.last_val_end = "";
926
+ form.allow_toolcall_in_think = true;
926
927
  return form;
927
928
  })();
928
929
  builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
@@ -1,8 +1,17 @@
1
1
  #include "chat-peg-parser.h"
2
2
 
3
- static std::string_view trim_trailing_space(std::string_view sv) {
3
+ #include <nlohmann/json.hpp>
4
+
5
+ using json = nlohmann::ordered_json;
6
+
7
+ static std::string_view trim_trailing_space(std::string_view sv, int max = -1) {
8
+ int count = 0;
4
9
  while (!sv.empty() && std::isspace(static_cast<unsigned char>(sv.back()))) {
10
+ if (max != -1 && count <= max) {
11
+ break;
12
+ }
5
13
  sv.remove_suffix(1);
14
+ count++;
6
15
  }
7
16
  return sv;
8
17
  }
@@ -89,7 +98,7 @@ void common_chat_peg_constructed_mapper::map(const common_peg_ast_node & node) {
89
98
 
90
99
  if (is_arg_string && current_tool) {
91
100
  // Serialize to JSON, but exclude the end quote
92
- std::string dumped = json(node.text).dump();
101
+ std::string dumped = json(trim_trailing_space(node.text)).dump();
93
102
  current_tool->arguments += dumped.substr(0, dumped.size() - 1);
94
103
  needs_closing_quote = true;
95
104
  }
@@ -97,6 +106,7 @@ void common_chat_peg_constructed_mapper::map(const common_peg_ast_node & node) {
97
106
  if (is_arg_close && current_tool) {
98
107
  if (needs_closing_quote) {
99
108
  current_tool->arguments += "\"";
109
+ needs_closing_quote = false;
100
110
  }
101
111
  }
102
112
 
@@ -105,6 +115,10 @@ void common_chat_peg_constructed_mapper::map(const common_peg_ast_node & node) {
105
115
  }
106
116
 
107
117
  if (is_tool_close && current_tool) {
118
+ if (needs_closing_quote) {
119
+ current_tool->arguments += "\"";
120
+ needs_closing_quote = false;
121
+ }
108
122
  current_tool->arguments += "}";
109
123
  }
110
124
  }
@@ -1,5 +1,6 @@
1
1
  #include "chat.h"
2
2
  #include "chat-parser.h"
3
+ #include "chat-peg-parser.h"
3
4
  #include "common.h"
4
5
  #include "json-partial.h"
5
6
  #include "json-schema-to-grammar.h"
@@ -137,6 +138,7 @@ struct templates_params {
137
138
  common_chat_tool_choice tool_choice;
138
139
  json json_schema;
139
140
  bool parallel_tool_calls;
141
+ common_reasoning_format reasoning_format;
140
142
  bool stream;
141
143
  std::string grammar;
142
144
  bool add_generation_prompt = true;
@@ -576,6 +578,16 @@ common_chat_templates_ptr common_chat_templates_init(
576
578
  "{%- if false %}");
577
579
  }
578
580
 
581
+ // TODO @aldehir : this is a temporary fix, pending Minja changes
582
+ // Ref: https://github.com/ggml-org/llama.cpp/pull/17713#issuecomment-3631342664
583
+ if (default_template_src.find("[TOOL_CALLS]") != std::string::npos
584
+ // search for the error message and patch it
585
+ && default_template_src.find("if (message['content'] is none or") != std::string::npos) {
586
+ string_replace_all(default_template_src,
587
+ "{%- if (message['content'] is none or message['content'] == '' or message['content']|length == 0) and (message['tool_calls'] is not defined or message['tool_calls'] is none or message['tool_calls']|length == 0) %}",
588
+ "{%- if false %}");
589
+ }
590
+
579
591
  std::string token_bos = bos_token_override;
580
592
  std::string token_eos = eos_token_override;
581
593
  bool add_bos = false;
@@ -686,6 +698,25 @@ static void foreach_function(const json & tools, const std::function<void(const
686
698
  }
687
699
  }
688
700
 
701
+ static void foreach_parameter(const json & function, const std::function<void(const std::string &, const json &, bool)> & fn) {
702
+ if (!function.contains("parameters") || !function.at("parameters").is_object()) {
703
+ return;
704
+ }
705
+ const auto & params = function.at("parameters");
706
+ if (!params.contains("properties") || !params.at("properties").is_object()) {
707
+ return;
708
+ }
709
+ const auto & props = params.at("properties");
710
+ std::set<std::string> required;
711
+ if (params.contains("required") && params.at("required").is_array()) {
712
+ params.at("required").get_to(required);
713
+ }
714
+ for (const auto & [name, prop] : props.items()) {
715
+ bool is_required = (required.find(name) != required.end());
716
+ fn(name, prop, is_required);
717
+ }
718
+ }
719
+
689
720
  static std::string apply(
690
721
  const common_chat_template & tmpl,
691
722
  const struct templates_params & inputs,
@@ -974,6 +1005,118 @@ static common_chat_params common_chat_params_init_lfm2(const common_chat_templat
974
1005
  return data;
975
1006
  }
976
1007
 
1008
+ static common_chat_params common_chat_params_init_ministral_3(const common_chat_template & tmpl, const struct templates_params & inputs) {
1009
+ common_chat_params data;
1010
+
1011
+ // Build up messages to follow the format: https://huggingface.co/mistralai/Ministral-3-14B-Reasoning-2512/blob/main/chat_template.jinja
1012
+ auto adjusted_messages = json::array();
1013
+ for (const auto & msg : inputs.messages) {
1014
+ auto role = msg.value("role", "");
1015
+ if (role != "system" && role != "assistant") {
1016
+ // Only adjust system and assistant messages. Interestingly, the system message may contain thinking.
1017
+ adjusted_messages.push_back(msg);
1018
+ continue;
1019
+ }
1020
+
1021
+ auto content = json::array();
1022
+
1023
+ // If message contains `reasoning_content`, add it as a block of type `thinking`
1024
+ if (msg.contains("reasoning_content") && msg.at("reasoning_content").is_string()) {
1025
+ content.push_back({
1026
+ {"type", "thinking"},
1027
+ {"thinking", msg.at("reasoning_content").get<std::string>()},
1028
+ });
1029
+ }
1030
+
1031
+ // If message contains `content`, add it as a block of type `text`
1032
+ if (msg.contains("content")) {
1033
+ if (msg.at("content").is_string()) {
1034
+ content.push_back({
1035
+ {"type", "text"},
1036
+ {"text", msg.at("content").get<std::string>()},
1037
+ });
1038
+ } else if (msg.at("content").is_array()) {
1039
+ auto blocks = msg.at("content");
1040
+ content.insert(content.end(), blocks.begin(), blocks.end());
1041
+ }
1042
+ }
1043
+
1044
+ auto adjusted = msg;
1045
+ adjusted["content"] = content;
1046
+ adjusted.erase("reasoning_content");
1047
+ adjusted_messages.push_back(adjusted);
1048
+ }
1049
+
1050
+ auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
1051
+ auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
1052
+ auto include_grammar = true;
1053
+
1054
+ data.prompt = apply(tmpl, inputs, /* messages_override = */ adjusted_messages);
1055
+ data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
1056
+ data.preserved_tokens = {
1057
+ "[THINK]",
1058
+ "[/THINK]",
1059
+ "[TOOL_CALLS]",
1060
+ "[ARGS]",
1061
+ };
1062
+
1063
+ auto parser = build_chat_peg_native_parser([&](common_chat_peg_native_builder & p) {
1064
+ auto reasoning = extract_reasoning ? p.optional("[THINK]" + p.reasoning(p.until("[/THINK]")) + "[/THINK]") : p.eps();
1065
+
1066
+ // Response format parser
1067
+ if (inputs.json_schema.is_object() && !inputs.json_schema.empty()) {
1068
+ // Ministral wants to emit json surrounded by code fences
1069
+ return reasoning << "```json" << p.content(p.schema(p.json(), "response-format", inputs.json_schema)) << "```";
1070
+ }
1071
+
1072
+ // Tool call parser
1073
+ if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) {
1074
+ auto tool_choice = p.choice();
1075
+ foreach_function(inputs.tools, [&](const json & tool) {
1076
+ const auto & function = tool.at("function");
1077
+ std::string name = function.at("name");
1078
+ const auto & schema = function.at("parameters");
1079
+
1080
+ tool_choice |= p.rule("tool-" + name,
1081
+ p.tool_open(p.tool_name(p.literal(name)) + "[ARGS]")
1082
+ + p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema))
1083
+ );
1084
+ });
1085
+
1086
+ auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0;
1087
+ auto max_calls = inputs.parallel_tool_calls ? -1 : 1;
1088
+ auto tool_calls = p.trigger_rule("tool-call", p.repeat("[TOOL_CALLS]" + tool_choice, min_calls, max_calls));
1089
+
1090
+ return reasoning << p.content(p.until("[TOOL_CALLS]")) << tool_calls;
1091
+ }
1092
+
1093
+ // Content only parser
1094
+ include_grammar = false;
1095
+ return reasoning << p.content(p.rest());
1096
+ });
1097
+
1098
+ data.parser = parser.save();
1099
+
1100
+ if (include_grammar) {
1101
+ data.grammar_lazy = has_tools && inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
1102
+
1103
+ data.grammar = build_grammar([&](const common_grammar_builder & builder) {
1104
+ foreach_function(inputs.tools, [&](const json & tool) {
1105
+ const auto & function = tool.at("function");
1106
+ auto schema = function.at("parameters");
1107
+ builder.resolve_refs(schema);
1108
+ });
1109
+ parser.build_grammar(builder, data.grammar_lazy);
1110
+ });
1111
+
1112
+ data.grammar_triggers = {
1113
+ {COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]"}
1114
+ };
1115
+ }
1116
+
1117
+ return data;
1118
+ }
1119
+
977
1120
  static common_chat_params common_chat_params_init_magistral(const common_chat_template & tmpl, const struct templates_params & inputs) {
978
1121
  common_chat_params data;
979
1122
  data.prompt = apply(tmpl, inputs);
@@ -1272,6 +1415,123 @@ static common_chat_params common_chat_params_init_nemotron_v2(const common_chat_
1272
1415
  return data;
1273
1416
  }
1274
1417
 
1418
+ static common_chat_params common_chat_params_init_nemotron_v3(const common_chat_template & tmpl, const struct templates_params & inputs) {
1419
+ common_chat_params data;
1420
+
1421
+ data.prompt = apply(tmpl, inputs);
1422
+ data.format = COMMON_CHAT_FORMAT_PEG_CONSTRUCTED;
1423
+
1424
+ // Handle thinking tags appropriately based on inputs.enable_thinking
1425
+ if (string_ends_with(data.prompt, "<think>\n")) {
1426
+ if (!inputs.enable_thinking) {
1427
+ data.prompt += "</think>";
1428
+ } else {
1429
+ data.thinking_forced_open = true;
1430
+ }
1431
+ }
1432
+
1433
+ data.preserved_tokens = {
1434
+ "<think>",
1435
+ "</think>",
1436
+ "<tool_call>",
1437
+ "</tool_call>",
1438
+ };
1439
+
1440
+ auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
1441
+ auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
1442
+ auto include_grammar = true;
1443
+
1444
+ auto parser = build_chat_peg_constructed_parser([&](auto & p) {
1445
+ auto reasoning = p.eps();
1446
+ if (inputs.enable_thinking && extract_reasoning) {
1447
+ auto reasoning_content = p.reasoning(p.until("</think>")) + ("</think>" | p.end());
1448
+ if (data.thinking_forced_open) {
1449
+ reasoning = reasoning_content;
1450
+ }
1451
+ }
1452
+
1453
+ // Response format parser
1454
+ if (inputs.json_schema.is_object() && !inputs.json_schema.empty()) {
1455
+ return reasoning << p.content(p.schema(p.json(), "response-format", inputs.json_schema));
1456
+ }
1457
+
1458
+ // Tool call parser
1459
+ if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) {
1460
+ auto tool_choice = p.choice();
1461
+ foreach_function(inputs.tools, [&](const json & tool) {
1462
+ const auto & function = tool.at("function");
1463
+ std::string name = function.at("name");
1464
+ auto parameters = function.at("parameters");
1465
+
1466
+ auto schema_info = common_schema_info();
1467
+ schema_info.resolve_refs(parameters);
1468
+
1469
+ auto tool_open = "<function=" + p.tool_name(p.literal(name)) + ">\n";
1470
+ auto tool_close = p.literal("</function>\n");
1471
+ auto args = p.sequence();
1472
+ auto arg_string = p.rule("xml-arg-string", p.until_one_of({
1473
+ "\n</parameter>",
1474
+ "\n<parameter=",
1475
+ "\n</function>"
1476
+ }));
1477
+
1478
+ foreach_parameter(function, [&](const auto & param_name, const json & param_schema, bool is_required) {
1479
+ auto rule_name = "tool-" + name + "-arg-" + param_name;
1480
+
1481
+ auto arg_open = "<parameter=" + p.tool_arg_name(p.literal(param_name)) + ">\n";
1482
+ auto arg_close = p.literal("</parameter>\n");
1483
+ auto arg_value = p.eps();
1484
+
1485
+ if (schema_info.resolves_to_string(param_schema)) {
1486
+ arg_value = p.tool_arg_string_value(arg_string) + "\n";
1487
+ } else {
1488
+ arg_value = p.tool_arg_json_value(p.schema(p.json(), rule_name + "-schema", param_schema));
1489
+ }
1490
+
1491
+ // Model may or my not close with </parameter>
1492
+ auto arg_rule = p.rule(rule_name, p.tool_arg_open(arg_open) + arg_value + p.optional(p.tool_arg_close(arg_close)));
1493
+ args += p.repeat(arg_rule, /* min = */ is_required ? 1 : 0, /* max = */ 1);
1494
+ });
1495
+
1496
+ tool_choice |= p.rule("tool-" + name, p.tool_open(tool_open) + args + p.tool_close(tool_close));
1497
+ });
1498
+
1499
+ auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0;
1500
+ auto max_calls = inputs.parallel_tool_calls ? -1 : 1;
1501
+ auto tool_call = p.rule("tool-call", "<tool_call>\n" + tool_choice + "</tool_call>" + p.space());
1502
+ auto tool_calls = p.trigger_rule("tool-call-root", p.repeat(tool_call, /* min = */ min_calls, /* max = */ max_calls));
1503
+
1504
+ return reasoning << p.content(p.until("<tool_call>")) << tool_calls;
1505
+ }
1506
+
1507
+ // Content only parser
1508
+ include_grammar = false;
1509
+ return reasoning << p.content(p.rest());
1510
+ });
1511
+
1512
+ data.parser = parser.save();
1513
+
1514
+ if (include_grammar) {
1515
+ data.grammar_lazy = has_tools && inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
1516
+
1517
+ data.grammar = build_grammar([&](const common_grammar_builder & builder) {
1518
+ foreach_function(inputs.tools, [&](const json & tool) {
1519
+ const auto & function = tool.at("function");
1520
+ auto schema = function.at("parameters");
1521
+ builder.resolve_refs(schema);
1522
+ });
1523
+ parser.build_grammar(builder, data.grammar_lazy);
1524
+ });
1525
+
1526
+ data.grammar_triggers = {
1527
+ {COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<tool_call>"}
1528
+ };
1529
+ }
1530
+
1531
+ return data;
1532
+ }
1533
+
1534
+
1275
1535
  static common_chat_params common_chat_params_init_apertus(const common_chat_template & tmpl, const struct templates_params & inputs) {
1276
1536
  common_chat_params data;
1277
1537
 
@@ -2328,6 +2588,7 @@ static common_chat_params common_chat_templates_apply_jinja(
2328
2588
  params.messages = common_chat_msgs_to_json_oaicompat<json>(inputs.messages, /* concat_text= */ !tmpl.original_caps().requires_typed_content);
2329
2589
  params.add_generation_prompt = inputs.add_generation_prompt;
2330
2590
  params.tool_choice = inputs.tool_choice;
2591
+ params.reasoning_format = inputs.reasoning_format;
2331
2592
  params.enable_thinking = inputs.enable_thinking;
2332
2593
  params.grammar = inputs.grammar;
2333
2594
  params.now = inputs.now;
@@ -2396,6 +2657,10 @@ static common_chat_params common_chat_templates_apply_jinja(
2396
2657
  src.find("<function=") != std::string::npos &&
2397
2658
  src.find("<parameters>") != std::string::npos &&
2398
2659
  src.find("<parameter=") != std::string::npos) {
2660
+ // Nemotron 3 Nano 30B A3B
2661
+ if (src.find("<think>") != std::string::npos) {
2662
+ return common_chat_params_init_nemotron_v3(tmpl, params);
2663
+ }
2399
2664
  return common_chat_params_init_qwen3_coder_xml(tmpl, params);
2400
2665
  }
2401
2666
 
@@ -2491,6 +2756,13 @@ static common_chat_params common_chat_templates_apply_jinja(
2491
2756
  return common_chat_params_init_llama_3_x(tmpl, params, allow_python_tag_builtin_tools);
2492
2757
  }
2493
2758
 
2759
+ // Ministral/Mistral Large 3
2760
+ if (src.find("[SYSTEM_PROMPT]") != std::string::npos &&
2761
+ src.find("[TOOL_CALLS]") != std::string::npos &&
2762
+ src.find("[ARGS]") != std::string::npos) {
2763
+ return common_chat_params_init_ministral_3(tmpl, params);
2764
+ }
2765
+
2494
2766
  if (src.find("[THINK]") != std::string::npos && src.find("[/THINK]") != std::string::npos) {
2495
2767
  return common_chat_params_init_magistral(tmpl, params);
2496
2768
  }