@fugood/llama.node 1.3.8 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/binding.js +25 -18
- package/lib/binding.ts +19 -1
- package/lib/index.js +3 -3
- package/lib/index.ts +1 -1
- package/package.json +17 -17
- package/scripts/llama.cpp.patch +53 -4
- package/src/LlamaCompletionWorker.cpp +2 -2
- package/src/LlamaContext.cpp +6 -1
- package/src/llama.cpp/common/arg.cpp +1 -1
- package/src/llama.cpp/common/chat-parser.cpp +968 -0
- package/src/llama.cpp/common/chat.cpp +0 -952
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +2 -2
- package/src/llama.cpp/ggml/CMakeLists.txt +1 -0
- package/src/llama.cpp/ggml/include/ggml-rpc.h +1 -1
- package/src/llama.cpp/ggml/src/CMakeLists.txt +11 -4
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +336 -3
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +11 -8
- package/src/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +22 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +234 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.h +6 -0
- package/src/llama.cpp/src/CMakeLists.txt +1 -0
- package/src/llama.cpp/src/llama-arch.cpp +48 -3
- package/src/llama.cpp/src/llama-arch.h +2 -0
- package/src/llama.cpp/src/llama-context.cpp +6 -2
- package/src/llama.cpp/src/llama-hparams.h +1 -1
- package/src/llama.cpp/src/llama-model.cpp +102 -5
- package/src/llama.cpp/src/llama-model.h +4 -0
- package/src/llama.cpp/src/llama-quant.cpp +13 -5
- package/src/llama.cpp/src/models/lfm2.cpp +5 -3
- package/src/llama.cpp/src/models/models.h +51 -1
- package/src/llama.cpp/src/models/qwen3next.cpp +1042 -0
|
@@ -13,6 +13,120 @@
|
|
|
13
13
|
|
|
14
14
|
using json = nlohmann::ordered_json;
|
|
15
15
|
|
|
16
|
+
static void parse_prefixed_json_tool_call_array(common_chat_msg_parser & builder,
|
|
17
|
+
const common_regex & prefix,
|
|
18
|
+
size_t rstrip_prefix = 0) {
|
|
19
|
+
static const std::vector<std::vector<std::string>> args_paths = { { "arguments" } };
|
|
20
|
+
if (auto res = builder.try_find_regex(prefix)) {
|
|
21
|
+
builder.move_back(rstrip_prefix);
|
|
22
|
+
auto tool_calls = builder.consume_json_with_dumped_args(args_paths);
|
|
23
|
+
if (!builder.add_tool_calls(tool_calls.value) || tool_calls.is_partial) {
|
|
24
|
+
throw common_chat_msg_partial_exception("incomplete tool call array");
|
|
25
|
+
}
|
|
26
|
+
} else {
|
|
27
|
+
builder.add_content(builder.consume_rest());
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
static std::string wrap_code_as_arguments(common_chat_msg_parser & builder, const std::string & code) {
|
|
32
|
+
std::string arguments;
|
|
33
|
+
if (builder.is_partial()) {
|
|
34
|
+
arguments = (json{
|
|
35
|
+
{ "code", code + builder.healing_marker() }
|
|
36
|
+
})
|
|
37
|
+
.dump();
|
|
38
|
+
auto idx = arguments.find(builder.healing_marker());
|
|
39
|
+
if (idx != std::string::npos) {
|
|
40
|
+
arguments.resize(idx);
|
|
41
|
+
}
|
|
42
|
+
} else {
|
|
43
|
+
arguments = (json{
|
|
44
|
+
{ "code", code }
|
|
45
|
+
})
|
|
46
|
+
.dump();
|
|
47
|
+
}
|
|
48
|
+
return arguments;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Takes a prefix regex that must have 1 group to capture the function name, a closing suffix, and expects json parameters in between.
|
|
53
|
+
* Aggregates the prefix, suffix and in-between text into the content.
|
|
54
|
+
*/
|
|
55
|
+
static void parse_json_tool_calls(
|
|
56
|
+
common_chat_msg_parser & builder,
|
|
57
|
+
const std::optional<common_regex> & block_open,
|
|
58
|
+
const std::optional<common_regex> & function_regex_start_only,
|
|
59
|
+
const std::optional<common_regex> & function_regex,
|
|
60
|
+
const common_regex & close_regex,
|
|
61
|
+
const std::optional<common_regex> & block_close,
|
|
62
|
+
bool allow_raw_python = false,
|
|
63
|
+
const std::function<std::string(const common_chat_msg_parser::find_regex_result & fres)> & get_function_name =
|
|
64
|
+
nullptr) {
|
|
65
|
+
auto parse_tool_calls = [&]() {
|
|
66
|
+
size_t from = std::string::npos;
|
|
67
|
+
auto first = true;
|
|
68
|
+
while (true) {
|
|
69
|
+
auto start_pos = builder.pos();
|
|
70
|
+
auto res = function_regex_start_only && first ? builder.try_consume_regex(*function_regex_start_only) :
|
|
71
|
+
function_regex ? builder.try_find_regex(*function_regex, from) :
|
|
72
|
+
std::nullopt;
|
|
73
|
+
|
|
74
|
+
if (res) {
|
|
75
|
+
std::string name;
|
|
76
|
+
if (get_function_name) {
|
|
77
|
+
name = get_function_name(*res);
|
|
78
|
+
} else {
|
|
79
|
+
GGML_ASSERT(res->groups.size() == 2);
|
|
80
|
+
name = builder.str(res->groups[1]);
|
|
81
|
+
}
|
|
82
|
+
first = false;
|
|
83
|
+
if (name.empty()) {
|
|
84
|
+
// get_function_name signalled us that we should skip this match and treat it as content.
|
|
85
|
+
from = res->groups[0].begin + 1;
|
|
86
|
+
continue;
|
|
87
|
+
}
|
|
88
|
+
from = std::string::npos;
|
|
89
|
+
|
|
90
|
+
auto maybe_raw_python = name == "python" && allow_raw_python;
|
|
91
|
+
if (builder.input()[builder.pos()] == '{' || !maybe_raw_python) {
|
|
92
|
+
if (auto arguments = builder.try_consume_json_with_dumped_args({ {} })) {
|
|
93
|
+
if (!builder.add_tool_call(name, "", arguments->value) || arguments->is_partial) {
|
|
94
|
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
|
95
|
+
}
|
|
96
|
+
builder.consume_regex(close_regex);
|
|
97
|
+
}
|
|
98
|
+
continue;
|
|
99
|
+
}
|
|
100
|
+
if (maybe_raw_python) {
|
|
101
|
+
auto arguments = wrap_code_as_arguments(builder, builder.consume_rest());
|
|
102
|
+
if (!builder.add_tool_call(name, "", arguments)) {
|
|
103
|
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
|
104
|
+
}
|
|
105
|
+
return;
|
|
106
|
+
}
|
|
107
|
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
|
108
|
+
} else {
|
|
109
|
+
builder.move_to(start_pos);
|
|
110
|
+
}
|
|
111
|
+
break;
|
|
112
|
+
}
|
|
113
|
+
if (block_close) {
|
|
114
|
+
builder.consume_regex(*block_close);
|
|
115
|
+
}
|
|
116
|
+
builder.consume_spaces();
|
|
117
|
+
builder.add_content(builder.consume_rest());
|
|
118
|
+
};
|
|
119
|
+
if (block_open) {
|
|
120
|
+
if (auto res = builder.try_find_regex(*block_open)) {
|
|
121
|
+
parse_tool_calls();
|
|
122
|
+
} else {
|
|
123
|
+
builder.add_content(builder.consume_rest());
|
|
124
|
+
}
|
|
125
|
+
} else {
|
|
126
|
+
parse_tool_calls();
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
16
130
|
common_chat_msg_parser::common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_syntax & syntax)
|
|
17
131
|
: input_(input), is_partial_(is_partial), syntax_(syntax)
|
|
18
132
|
{
|
|
@@ -532,3 +646,857 @@ std::optional<common_chat_msg_parser::consume_json_result> common_chat_msg_parse
|
|
|
532
646
|
void common_chat_msg_parser::clear_tools() {
|
|
533
647
|
result_.tool_calls.clear();
|
|
534
648
|
}
|
|
649
|
+
|
|
650
|
+
/**
|
|
651
|
+
* All common_chat_parse_* moved from chat.cpp to chat-parser.cpp below
|
|
652
|
+
* to reduce incremental compile time for parser changes.
|
|
653
|
+
*/
|
|
654
|
+
static void common_chat_parse_generic(common_chat_msg_parser & builder) {
|
|
655
|
+
if (!builder.syntax().parse_tool_calls) {
|
|
656
|
+
builder.add_content(builder.consume_rest());
|
|
657
|
+
return;
|
|
658
|
+
}
|
|
659
|
+
static const std::vector<std::vector<std::string>> content_paths = {
|
|
660
|
+
{"response"},
|
|
661
|
+
};
|
|
662
|
+
static const std::vector<std::vector<std::string>> args_paths = {
|
|
663
|
+
{"tool_call", "arguments"},
|
|
664
|
+
{"tool_calls", "arguments"},
|
|
665
|
+
};
|
|
666
|
+
auto data = builder.consume_json_with_dumped_args(args_paths, content_paths);
|
|
667
|
+
if (data.value.contains("tool_calls")) {
|
|
668
|
+
if (!builder.add_tool_calls(data.value.at("tool_calls")) || data.is_partial) {
|
|
669
|
+
throw common_chat_msg_partial_exception("incomplete tool calls");
|
|
670
|
+
}
|
|
671
|
+
} else if (data.value.contains("tool_call")) {
|
|
672
|
+
if (!builder.add_tool_call(data.value.at("tool_call")) || data.is_partial) {
|
|
673
|
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
|
674
|
+
}
|
|
675
|
+
} else if (data.value.contains("response")) {
|
|
676
|
+
const auto & response = data.value.at("response");
|
|
677
|
+
builder.add_content(response.is_string() ? response.template get<std::string>() : response.dump(2));
|
|
678
|
+
if (data.is_partial) {
|
|
679
|
+
throw common_chat_msg_partial_exception("incomplete response");
|
|
680
|
+
}
|
|
681
|
+
} else {
|
|
682
|
+
throw common_chat_msg_partial_exception("Expected 'tool_call', 'tool_calls' or 'response' in JSON");
|
|
683
|
+
}
|
|
684
|
+
}
|
|
685
|
+
|
|
686
|
+
static void common_chat_parse_mistral_nemo(common_chat_msg_parser & builder) {
|
|
687
|
+
if (!builder.syntax().parse_tool_calls) {
|
|
688
|
+
builder.add_content(builder.consume_rest());
|
|
689
|
+
return;
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
static const common_regex prefix(regex_escape("[TOOL_CALLS]"));
|
|
693
|
+
parse_prefixed_json_tool_call_array(builder, prefix);
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
static void common_chat_parse_magistral(common_chat_msg_parser & builder) {
|
|
697
|
+
builder.try_parse_reasoning("[THINK]", "[/THINK]");
|
|
698
|
+
|
|
699
|
+
if (!builder.syntax().parse_tool_calls) {
|
|
700
|
+
builder.add_content(builder.consume_rest());
|
|
701
|
+
return;
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
static const common_regex prefix(regex_escape("[TOOL_CALLS]"));
|
|
705
|
+
parse_prefixed_json_tool_call_array(builder, prefix);
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
static void common_chat_parse_command_r7b(common_chat_msg_parser & builder) {
|
|
709
|
+
builder.try_parse_reasoning("<|START_THINKING|>", "<|END_THINKING|>");
|
|
710
|
+
|
|
711
|
+
static const common_regex start_action_regex("<\\|START_ACTION\\|>");
|
|
712
|
+
static const common_regex end_action_regex("<\\|END_ACTION\\|>");
|
|
713
|
+
static const common_regex start_response_regex("<\\|START_RESPONSE\\|>");
|
|
714
|
+
static const common_regex end_response_regex("<\\|END_RESPONSE\\|>");
|
|
715
|
+
|
|
716
|
+
if (auto res = builder.try_find_regex(start_action_regex)) {
|
|
717
|
+
// If we didn't extract thoughts, prelude includes them.
|
|
718
|
+
auto tool_calls = builder.consume_json_with_dumped_args({{"parameters"}});
|
|
719
|
+
for (const auto & tool_call : tool_calls.value) {
|
|
720
|
+
std::string name = tool_call.contains("tool_name") ? tool_call.at("tool_name") : "";
|
|
721
|
+
std::string id = tool_call.contains("tool_call_id") ? tool_call.at("tool_call_id") : "";
|
|
722
|
+
std::string arguments = tool_call.contains("parameters") ? tool_call.at("parameters") : "";
|
|
723
|
+
if (!builder.add_tool_call(name, id, arguments) || tool_calls.is_partial) {
|
|
724
|
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
|
725
|
+
}
|
|
726
|
+
}
|
|
727
|
+
if (tool_calls.is_partial) {
|
|
728
|
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
|
729
|
+
}
|
|
730
|
+
builder.consume_regex(end_action_regex);
|
|
731
|
+
} else if (auto res = builder.try_find_regex(start_response_regex)) {
|
|
732
|
+
if (!builder.try_find_regex(end_response_regex)) {
|
|
733
|
+
builder.add_content(builder.consume_rest());
|
|
734
|
+
throw common_chat_msg_partial_exception(end_response_regex.str());
|
|
735
|
+
}
|
|
736
|
+
} else {
|
|
737
|
+
builder.add_content(builder.consume_rest());
|
|
738
|
+
}
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
static void common_chat_parse_llama_3_1(common_chat_msg_parser & builder, bool with_builtin_tools = false) {
|
|
742
|
+
builder.try_parse_reasoning("<think>", "</think>");
|
|
743
|
+
|
|
744
|
+
if (!builder.syntax().parse_tool_calls) {
|
|
745
|
+
builder.add_content(builder.consume_rest());
|
|
746
|
+
return;
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
static const common_regex function_regex(
|
|
750
|
+
"\\s*\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\"([^\"]+)\"\\s*,\\s*\"parameters\"\\s*: ");
|
|
751
|
+
static const common_regex close_regex("\\}\\s*");
|
|
752
|
+
|
|
753
|
+
static const common_regex function_name_regex("\\s*(\\w+)\\s*\\.\\s*call\\(");
|
|
754
|
+
static const common_regex arg_name_regex("\\s*(\\w+)\\s*=\\s*");
|
|
755
|
+
|
|
756
|
+
if (with_builtin_tools) {
|
|
757
|
+
static const common_regex builtin_call_regex("<\\|python_tag\\|>");
|
|
758
|
+
if (auto res = builder.try_find_regex(builtin_call_regex)) {
|
|
759
|
+
auto fun_res = builder.consume_regex(function_name_regex);
|
|
760
|
+
auto function_name = builder.str(fun_res.groups[1]);
|
|
761
|
+
|
|
762
|
+
common_healing_marker healing_marker;
|
|
763
|
+
json args = json::object();
|
|
764
|
+
while (true) {
|
|
765
|
+
if (auto arg_res = builder.try_consume_regex(arg_name_regex)) {
|
|
766
|
+
auto arg_name = builder.str(arg_res->groups[1]);
|
|
767
|
+
auto partial = builder.consume_json();
|
|
768
|
+
args[arg_name] = partial.json;
|
|
769
|
+
healing_marker.marker = partial.healing_marker.marker;
|
|
770
|
+
healing_marker.json_dump_marker = partial.healing_marker.json_dump_marker;
|
|
771
|
+
builder.consume_spaces();
|
|
772
|
+
if (!builder.try_consume_literal(",")) {
|
|
773
|
+
break;
|
|
774
|
+
}
|
|
775
|
+
} else {
|
|
776
|
+
break;
|
|
777
|
+
}
|
|
778
|
+
}
|
|
779
|
+
builder.consume_literal(")");
|
|
780
|
+
builder.consume_spaces();
|
|
781
|
+
|
|
782
|
+
auto arguments = args.dump();
|
|
783
|
+
if (!builder.add_tool_call(function_name, "", arguments)) {
|
|
784
|
+
throw common_chat_msg_partial_exception("Incomplete tool call");
|
|
785
|
+
}
|
|
786
|
+
return;
|
|
787
|
+
}
|
|
788
|
+
}
|
|
789
|
+
parse_json_tool_calls(
|
|
790
|
+
builder,
|
|
791
|
+
/* block_open= */ std::nullopt,
|
|
792
|
+
/* function_regex_start_only= */ function_regex,
|
|
793
|
+
/* function_regex= */ std::nullopt,
|
|
794
|
+
close_regex,
|
|
795
|
+
std::nullopt);
|
|
796
|
+
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) {
|
|
800
|
+
builder.try_parse_reasoning("<think>", "</think>");
|
|
801
|
+
if (!builder.syntax().parse_tool_calls) {
|
|
802
|
+
builder.add_content(builder.consume_rest());
|
|
803
|
+
return;
|
|
804
|
+
}
|
|
805
|
+
|
|
806
|
+
static const common_regex tool_calls_begin("(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)");
|
|
807
|
+
static const common_regex tool_calls_end("<|tool▁calls▁end|>");
|
|
808
|
+
static const common_regex function_regex("(?:<|tool▁call▁begin|>)?function<|tool▁sep|>([^\n]+)\n```json\n");
|
|
809
|
+
static const common_regex close_regex("```[\\s\\r\\n]*<|tool▁call▁end|>");
|
|
810
|
+
|
|
811
|
+
parse_json_tool_calls(
|
|
812
|
+
builder,
|
|
813
|
+
/* block_open= */ tool_calls_begin,
|
|
814
|
+
/* function_regex_start_only= */ std::nullopt,
|
|
815
|
+
function_regex,
|
|
816
|
+
close_regex,
|
|
817
|
+
tool_calls_end);
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
static void common_chat_parse_deepseek_v3_1_content(common_chat_msg_parser & builder) {
|
|
821
|
+
static const common_regex function_regex("(?:<|tool▁call▁begin|>)?([^\\n<]+)(?:<|tool▁sep|>)");
|
|
822
|
+
|
|
823
|
+
static const common_regex close_regex("(?:[\\s]*)?<|tool▁call▁end|>");
|
|
824
|
+
static const common_regex tool_calls_begin("(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)");
|
|
825
|
+
static const common_regex tool_calls_end("<|tool▁calls▁end|>");
|
|
826
|
+
|
|
827
|
+
if (!builder.syntax().parse_tool_calls) {
|
|
828
|
+
LOG_DBG("%s: not parse_tool_calls\n", __func__);
|
|
829
|
+
builder.add_content(builder.consume_rest());
|
|
830
|
+
return;
|
|
831
|
+
}
|
|
832
|
+
|
|
833
|
+
LOG_DBG("%s: parse_tool_calls\n", __func__);
|
|
834
|
+
|
|
835
|
+
parse_json_tool_calls(
|
|
836
|
+
builder,
|
|
837
|
+
/* block_open= */ tool_calls_begin,
|
|
838
|
+
/* function_regex_start_only= */ std::nullopt,
|
|
839
|
+
function_regex,
|
|
840
|
+
close_regex,
|
|
841
|
+
tool_calls_end);
|
|
842
|
+
}
|
|
843
|
+
|
|
844
|
+
static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) {
|
|
845
|
+
// DeepSeek V3.1 outputs reasoning content between "<think>" and "</think>" tags, followed by regular content
|
|
846
|
+
// First try to parse using the standard reasoning parsing method
|
|
847
|
+
LOG_DBG("%s: thinking_forced_open: %s\n", __func__, std::to_string(builder.syntax().thinking_forced_open).c_str());
|
|
848
|
+
|
|
849
|
+
auto start_pos = builder.pos();
|
|
850
|
+
auto found_end_think = builder.try_find_literal("</think>");
|
|
851
|
+
builder.move_to(start_pos);
|
|
852
|
+
|
|
853
|
+
if (builder.syntax().thinking_forced_open && !builder.is_partial() && !found_end_think) {
|
|
854
|
+
LOG_DBG("%s: no end_think, not partial, adding content\n", __func__);
|
|
855
|
+
common_chat_parse_deepseek_v3_1_content(builder);
|
|
856
|
+
} else if (builder.try_parse_reasoning("<think>", "</think>")) {
|
|
857
|
+
// If reasoning was parsed successfully, the remaining content is regular content
|
|
858
|
+
LOG_DBG("%s: parsed reasoning, adding content\n", __func__);
|
|
859
|
+
// </think><|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>NAME\n```json\nJSON\n```<|tool▁call▁end|><|tool▁calls▁end|>
|
|
860
|
+
common_chat_parse_deepseek_v3_1_content(builder);
|
|
861
|
+
} else {
|
|
862
|
+
if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE) {
|
|
863
|
+
LOG_DBG("%s: reasoning_format none, adding content\n", __func__);
|
|
864
|
+
common_chat_parse_deepseek_v3_1_content(builder);
|
|
865
|
+
return;
|
|
866
|
+
}
|
|
867
|
+
// If no reasoning tags found, check if we should treat everything as reasoning
|
|
868
|
+
if (builder.syntax().thinking_forced_open) {
|
|
869
|
+
// If thinking is forced open but no tags found, treat everything as reasoning
|
|
870
|
+
LOG_DBG("%s: thinking_forced_open, adding reasoning content\n", __func__);
|
|
871
|
+
builder.add_reasoning_content(builder.consume_rest());
|
|
872
|
+
} else {
|
|
873
|
+
LOG_DBG("%s: no thinking_forced_open, adding content\n", __func__);
|
|
874
|
+
// <|tool▁call▁begin|>NAME<|tool▁sep|>JSON<|tool▁call▁end|>
|
|
875
|
+
common_chat_parse_deepseek_v3_1_content(builder);
|
|
876
|
+
}
|
|
877
|
+
}
|
|
878
|
+
}
|
|
879
|
+
|
|
880
|
+
static void common_chat_parse_minimax_m2(common_chat_msg_parser & builder) {
|
|
881
|
+
static const xml_tool_call_format form {
|
|
882
|
+
/* form.scope_start = */ "<minimax:tool_call>",
|
|
883
|
+
/* form.tool_start = */ "<invoke name=\"",
|
|
884
|
+
/* form.tool_sep = */ "\">",
|
|
885
|
+
/* form.key_start = */ "<parameter name=\"",
|
|
886
|
+
/* form.key_val_sep = */ "\">",
|
|
887
|
+
/* form.val_end = */ "</parameter>",
|
|
888
|
+
/* form.tool_end = */ "</invoke>",
|
|
889
|
+
/* form.scope_end = */ "</minimax:tool_call>",
|
|
890
|
+
};
|
|
891
|
+
builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
|
|
892
|
+
}
|
|
893
|
+
|
|
894
|
+
static void common_chat_parse_qwen3_coder_xml(common_chat_msg_parser & builder) {
|
|
895
|
+
static const xml_tool_call_format form = ([]() {
|
|
896
|
+
xml_tool_call_format form {};
|
|
897
|
+
form.scope_start = "<tool_call>";
|
|
898
|
+
form.tool_start = "<function=";
|
|
899
|
+
form.tool_sep = ">";
|
|
900
|
+
form.key_start = "<parameter=";
|
|
901
|
+
form.key_val_sep = ">";
|
|
902
|
+
form.val_end = "</parameter>";
|
|
903
|
+
form.tool_end = "</function>";
|
|
904
|
+
form.scope_end = "</tool_call>";
|
|
905
|
+
form.trim_raw_argval = true;
|
|
906
|
+
return form;
|
|
907
|
+
})();
|
|
908
|
+
builder.consume_reasoning_with_xml_tool_calls(form);
|
|
909
|
+
}
|
|
910
|
+
|
|
911
|
+
static void common_chat_parse_kimi_k2(common_chat_msg_parser & builder) {
|
|
912
|
+
static const xml_tool_call_format form = ([]() {
|
|
913
|
+
xml_tool_call_format form {};
|
|
914
|
+
form.scope_start = "<|tool_calls_section_begin|>";
|
|
915
|
+
form.tool_start = "<|tool_call_begin|>";
|
|
916
|
+
form.tool_sep = "<|tool_call_argument_begin|>{";
|
|
917
|
+
form.key_start = "\"";
|
|
918
|
+
form.key_val_sep = "\": ";
|
|
919
|
+
form.val_end = ", ";
|
|
920
|
+
form.tool_end = "}<|tool_call_end|>";
|
|
921
|
+
form.scope_end = "<|tool_calls_section_end|>";
|
|
922
|
+
form.raw_argval = false;
|
|
923
|
+
form.last_val_end = "";
|
|
924
|
+
return form;
|
|
925
|
+
})();
|
|
926
|
+
builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
|
|
927
|
+
}
|
|
928
|
+
|
|
929
|
+
static void common_chat_parse_apriel_1_5(common_chat_msg_parser & builder) {
|
|
930
|
+
static const xml_tool_call_format form = ([]() {
|
|
931
|
+
xml_tool_call_format form {};
|
|
932
|
+
form.scope_start = "<tool_calls>[";
|
|
933
|
+
form.tool_start = "{\"name\": \"";
|
|
934
|
+
form.tool_sep = "\", \"arguments\": {";
|
|
935
|
+
form.key_start = "\"";
|
|
936
|
+
form.key_val_sep = "\": ";
|
|
937
|
+
form.val_end = ", ";
|
|
938
|
+
form.tool_end = "}, ";
|
|
939
|
+
form.scope_end = "]</tool_calls>";
|
|
940
|
+
form.raw_argval = false;
|
|
941
|
+
form.last_val_end = "";
|
|
942
|
+
form.last_tool_end = "}";
|
|
943
|
+
return form;
|
|
944
|
+
})();
|
|
945
|
+
builder.consume_reasoning_with_xml_tool_calls(form, "<thinking>", "</thinking>");
|
|
946
|
+
}
|
|
947
|
+
|
|
948
|
+
static void common_chat_parse_xiaomi_mimo(common_chat_msg_parser & builder) {
|
|
949
|
+
static const xml_tool_call_format form = ([]() {
|
|
950
|
+
xml_tool_call_format form {};
|
|
951
|
+
form.scope_start = "";
|
|
952
|
+
form.tool_start = "<tool_call>\n{\"name\": \"";
|
|
953
|
+
form.tool_sep = "\", \"arguments\": {";
|
|
954
|
+
form.key_start = "\"";
|
|
955
|
+
form.key_val_sep = "\": ";
|
|
956
|
+
form.val_end = ", ";
|
|
957
|
+
form.tool_end = "}\n</tool_call>";
|
|
958
|
+
form.scope_end = "";
|
|
959
|
+
form.raw_argval = false;
|
|
960
|
+
form.last_val_end = "";
|
|
961
|
+
return form;
|
|
962
|
+
})();
|
|
963
|
+
builder.consume_reasoning_with_xml_tool_calls(form);
|
|
964
|
+
}
|
|
965
|
+
|
|
966
|
+
static void common_chat_parse_gpt_oss(common_chat_msg_parser & builder) {
|
|
967
|
+
static const std::string constraint = "(?: (<\\|constrain\\|>)?([a-zA-Z0-9_-]+))";
|
|
968
|
+
static const std::string recipient("(?: to=functions\\.([^<\\s]+))");
|
|
969
|
+
|
|
970
|
+
static const common_regex start_regex("<\\|start\\|>assistant");
|
|
971
|
+
static const common_regex analysis_regex("<\\|channel\\|>analysis");
|
|
972
|
+
static const common_regex final_regex("<\\|channel\\|>final" + constraint + "?");
|
|
973
|
+
static const common_regex preamble_regex("<\\|channel\\|>commentary");
|
|
974
|
+
static const common_regex tool_call1_regex(recipient + "<\\|channel\\|>(analysis|commentary)" + constraint + "?");
|
|
975
|
+
static const common_regex tool_call2_regex("<\\|channel\\|>(analysis|commentary)" + recipient + constraint + "?");
|
|
976
|
+
|
|
977
|
+
auto consume_end = [&](bool include_end = false) {
|
|
978
|
+
if (auto res = builder.try_find_literal("<|end|>")) {
|
|
979
|
+
return res->prelude + (include_end ? builder.str(res->groups[0]) : "");
|
|
980
|
+
}
|
|
981
|
+
return builder.consume_rest();
|
|
982
|
+
};
|
|
983
|
+
|
|
984
|
+
auto handle_tool_call = [&](const std::string & name) {
|
|
985
|
+
if (auto args = builder.try_consume_json_with_dumped_args({{}})) {
|
|
986
|
+
if (builder.syntax().parse_tool_calls) {
|
|
987
|
+
if (!builder.add_tool_call(name, "", args->value) || args->is_partial) {
|
|
988
|
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
|
989
|
+
}
|
|
990
|
+
} else if (args->is_partial) {
|
|
991
|
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
|
992
|
+
}
|
|
993
|
+
}
|
|
994
|
+
};
|
|
995
|
+
|
|
996
|
+
auto regex_match = [](const common_regex & regex, const std::string & input) -> std::optional<common_regex_match> {
|
|
997
|
+
auto match = regex.search(input, 0, true);
|
|
998
|
+
if (match.type == COMMON_REGEX_MATCH_TYPE_FULL) {
|
|
999
|
+
return match;
|
|
1000
|
+
}
|
|
1001
|
+
return std::nullopt;
|
|
1002
|
+
};
|
|
1003
|
+
|
|
1004
|
+
do {
|
|
1005
|
+
auto header_start_pos = builder.pos();
|
|
1006
|
+
auto content_start = builder.try_find_literal("<|message|>");
|
|
1007
|
+
if (!content_start) {
|
|
1008
|
+
throw common_chat_msg_partial_exception("incomplete header");
|
|
1009
|
+
}
|
|
1010
|
+
|
|
1011
|
+
auto header = content_start->prelude;
|
|
1012
|
+
|
|
1013
|
+
if (auto match = regex_match(tool_call1_regex, header)) {
|
|
1014
|
+
auto group = match->groups[1];
|
|
1015
|
+
auto name = header.substr(group.begin, group.end - group.begin);
|
|
1016
|
+
handle_tool_call(name);
|
|
1017
|
+
continue;
|
|
1018
|
+
}
|
|
1019
|
+
|
|
1020
|
+
if (auto match = regex_match(tool_call2_regex, header)) {
|
|
1021
|
+
auto group = match->groups[2];
|
|
1022
|
+
auto name = header.substr(group.begin, group.end - group.begin);
|
|
1023
|
+
handle_tool_call(name);
|
|
1024
|
+
continue;
|
|
1025
|
+
}
|
|
1026
|
+
|
|
1027
|
+
if (regex_match(analysis_regex, header)) {
|
|
1028
|
+
builder.move_to(header_start_pos);
|
|
1029
|
+
if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE || builder.syntax().reasoning_in_content) {
|
|
1030
|
+
builder.add_content(consume_end(true));
|
|
1031
|
+
} else {
|
|
1032
|
+
builder.try_parse_reasoning("<|channel|>analysis<|message|>", "<|end|>");
|
|
1033
|
+
}
|
|
1034
|
+
continue;
|
|
1035
|
+
}
|
|
1036
|
+
|
|
1037
|
+
if(regex_match(final_regex, header) || regex_match(preamble_regex, header)) {
|
|
1038
|
+
builder.add_content(consume_end());
|
|
1039
|
+
continue;
|
|
1040
|
+
}
|
|
1041
|
+
|
|
1042
|
+
// Possibly a malformed message, attempt to recover by rolling
|
|
1043
|
+
// back to pick up the next <|start|>
|
|
1044
|
+
LOG_DBG("%s: unknown header from message: %s\n", __func__, header.c_str());
|
|
1045
|
+
builder.move_to(header_start_pos);
|
|
1046
|
+
} while (builder.try_find_regex(start_regex, std::string::npos, false));
|
|
1047
|
+
|
|
1048
|
+
auto remaining = builder.consume_rest();
|
|
1049
|
+
if (!remaining.empty()) {
|
|
1050
|
+
LOG_DBG("%s: content after last message: %s\n", __func__, remaining.c_str());
|
|
1051
|
+
}
|
|
1052
|
+
}
|
|
1053
|
+
|
|
1054
|
+
static void common_chat_parse_glm_4_5(common_chat_msg_parser & builder) {
|
|
1055
|
+
static const xml_tool_call_format form {
|
|
1056
|
+
/* form.scope_start = */ "",
|
|
1057
|
+
/* form.tool_start = */ "<tool_call>",
|
|
1058
|
+
/* form.tool_sep = */ "",
|
|
1059
|
+
/* form.key_start = */ "<arg_key>",
|
|
1060
|
+
/* form.key_val_sep = */ "</arg_key>",
|
|
1061
|
+
/* form.val_end = */ "</arg_value>",
|
|
1062
|
+
/* form.tool_end = */ "</tool_call>",
|
|
1063
|
+
/* form.scope_end = */ "",
|
|
1064
|
+
/* form.key_val_sep2 = */ "<arg_value>",
|
|
1065
|
+
};
|
|
1066
|
+
builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
|
|
1067
|
+
}
|
|
1068
|
+
|
|
1069
|
+
static void common_chat_parse_firefunction_v2(common_chat_msg_parser & builder) {
|
|
1070
|
+
if (!builder.syntax().parse_tool_calls) {
|
|
1071
|
+
builder.add_content(builder.consume_rest());
|
|
1072
|
+
return;
|
|
1073
|
+
}
|
|
1074
|
+
static const common_regex prefix(regex_escape(" functools["));
|
|
1075
|
+
parse_prefixed_json_tool_call_array(builder, prefix, /* rstrip_prefix= */ 1);
|
|
1076
|
+
}
|
|
1077
|
+
|
|
1078
|
+
static void common_chat_parse_functionary_v3_2(common_chat_msg_parser & builder) {
|
|
1079
|
+
static const common_regex function_regex_start_only(R"((\w+\n\{|python\n|all\n))");
|
|
1080
|
+
static const common_regex function_regex(R"(>>>(\w+\n\{|python\n|all\n))");
|
|
1081
|
+
static const common_regex close_regex(R"(\s*)");
|
|
1082
|
+
|
|
1083
|
+
parse_json_tool_calls(
|
|
1084
|
+
builder,
|
|
1085
|
+
std::nullopt,
|
|
1086
|
+
function_regex_start_only,
|
|
1087
|
+
function_regex,
|
|
1088
|
+
close_regex,
|
|
1089
|
+
std::nullopt,
|
|
1090
|
+
/* allow_raw_python= */ true,
|
|
1091
|
+
/* get_function_name= */ [&](const auto & res) -> std::string {
|
|
1092
|
+
auto at_start = res.groups[0].begin == 0;
|
|
1093
|
+
auto name = builder.str(res.groups[1]);
|
|
1094
|
+
if (!name.empty() && name.back() == '{') {
|
|
1095
|
+
// Unconsume the opening brace '{' to ensure the JSON parsing goes well.
|
|
1096
|
+
builder.move_back(1);
|
|
1097
|
+
}
|
|
1098
|
+
auto idx = name.find_last_not_of("\n{");
|
|
1099
|
+
name = name.substr(0, idx + 1);
|
|
1100
|
+
if (at_start && name == "all") {
|
|
1101
|
+
return "";
|
|
1102
|
+
}
|
|
1103
|
+
return name;
|
|
1104
|
+
});
|
|
1105
|
+
}
|
|
1106
|
+
|
|
1107
|
+
static void common_chat_parse_functionary_v3_1_llama_3_1(common_chat_msg_parser & builder) {
|
|
1108
|
+
if (!builder.syntax().parse_tool_calls) {
|
|
1109
|
+
builder.add_content(builder.consume_rest());
|
|
1110
|
+
return;
|
|
1111
|
+
}
|
|
1112
|
+
// This version of Functionary still supports the llama 3.1 tool call format for the python tool.
|
|
1113
|
+
static const common_regex python_tag_regex(regex_escape("<|python_tag|>"));
|
|
1114
|
+
|
|
1115
|
+
static const common_regex function_regex(R"(<function=(\w+)>)");
|
|
1116
|
+
static const common_regex close_regex(R"(</function>)");
|
|
1117
|
+
|
|
1118
|
+
parse_json_tool_calls(
|
|
1119
|
+
builder,
|
|
1120
|
+
/* block_open= */ std::nullopt,
|
|
1121
|
+
/* function_regex_start_only= */ std::nullopt,
|
|
1122
|
+
function_regex,
|
|
1123
|
+
close_regex,
|
|
1124
|
+
std::nullopt);
|
|
1125
|
+
|
|
1126
|
+
if (auto res = builder.try_find_regex(python_tag_regex)) {
|
|
1127
|
+
auto arguments = wrap_code_as_arguments(builder, builder.consume_rest());
|
|
1128
|
+
builder.add_tool_call("python", "", arguments);
|
|
1129
|
+
return;
|
|
1130
|
+
}
|
|
1131
|
+
}
|
|
1132
|
+
|
|
1133
|
+
static void common_chat_parse_hermes_2_pro(common_chat_msg_parser & builder) {
|
|
1134
|
+
builder.try_parse_reasoning("<think>", "</think>");
|
|
1135
|
+
if (!builder.syntax().parse_tool_calls) {
|
|
1136
|
+
builder.add_content(builder.consume_rest());
|
|
1137
|
+
return;
|
|
1138
|
+
}
|
|
1139
|
+
|
|
1140
|
+
static const common_regex open_regex(
|
|
1141
|
+
"(?:"
|
|
1142
|
+
"(```(?:xml|json)?\\n\\s*)?" // match 1 (block_start)
|
|
1143
|
+
"(" // match 2 (open_tag)
|
|
1144
|
+
"<tool_call>"
|
|
1145
|
+
"|<function_call>"
|
|
1146
|
+
"|<tool>"
|
|
1147
|
+
"|<tools>"
|
|
1148
|
+
"|<response>"
|
|
1149
|
+
"|<json>"
|
|
1150
|
+
"|<xml>"
|
|
1151
|
+
"|<JSON>"
|
|
1152
|
+
")?"
|
|
1153
|
+
"(\\s*\\{\\s*\"name\")" // match 3 (named tool call)
|
|
1154
|
+
")"
|
|
1155
|
+
"|<function=([^>]+)>" // match 4 (function name)
|
|
1156
|
+
"|<function name=\"([^\"]+)\">" // match 5 (function name again)
|
|
1157
|
+
);
|
|
1158
|
+
|
|
1159
|
+
while (auto res = builder.try_find_regex(open_regex)) {
|
|
1160
|
+
const auto & block_start = res->groups[1];
|
|
1161
|
+
std::string block_end = block_start.empty() ? "" : "```";
|
|
1162
|
+
|
|
1163
|
+
const auto & open_tag = res->groups[2];
|
|
1164
|
+
std::string close_tag;
|
|
1165
|
+
|
|
1166
|
+
if (!res->groups[3].empty()) {
|
|
1167
|
+
builder.move_to(res->groups[3].begin);
|
|
1168
|
+
close_tag = open_tag.empty() ? "" : "</" + builder.str(open_tag).substr(1);
|
|
1169
|
+
|
|
1170
|
+
if (auto tool_call = builder.try_consume_json_with_dumped_args({{"arguments"}})) {
|
|
1171
|
+
if (!builder.add_tool_call(tool_call->value) || tool_call->is_partial) {
|
|
1172
|
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
|
1173
|
+
}
|
|
1174
|
+
builder.consume_spaces();
|
|
1175
|
+
builder.consume_literal(close_tag);
|
|
1176
|
+
builder.consume_spaces();
|
|
1177
|
+
if (!block_end.empty()) {
|
|
1178
|
+
builder.consume_literal(block_end);
|
|
1179
|
+
builder.consume_spaces();
|
|
1180
|
+
}
|
|
1181
|
+
} else {
|
|
1182
|
+
throw common_chat_msg_partial_exception("failed to parse tool call");
|
|
1183
|
+
}
|
|
1184
|
+
} else {
|
|
1185
|
+
auto function_name = builder.str(res->groups[4]);
|
|
1186
|
+
if (function_name.empty()) {
|
|
1187
|
+
function_name = builder.str(res->groups[5]);
|
|
1188
|
+
}
|
|
1189
|
+
GGML_ASSERT(!function_name.empty());
|
|
1190
|
+
|
|
1191
|
+
close_tag = "</function>";
|
|
1192
|
+
|
|
1193
|
+
if (auto arguments = builder.try_consume_json_with_dumped_args({{}})) {
|
|
1194
|
+
if (!builder.add_tool_call(function_name, "", arguments->value) || arguments->is_partial) {
|
|
1195
|
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
|
1196
|
+
}
|
|
1197
|
+
builder.consume_spaces();
|
|
1198
|
+
builder.consume_literal(close_tag);
|
|
1199
|
+
builder.consume_spaces();
|
|
1200
|
+
if (!block_end.empty()) {
|
|
1201
|
+
builder.consume_literal(block_end);
|
|
1202
|
+
builder.consume_spaces();
|
|
1203
|
+
}
|
|
1204
|
+
}
|
|
1205
|
+
}
|
|
1206
|
+
}
|
|
1207
|
+
|
|
1208
|
+
builder.add_content(builder.consume_rest());
|
|
1209
|
+
}
|
|
1210
|
+
|
|
1211
|
+
static void common_chat_parse_granite(common_chat_msg_parser & builder) {
|
|
1212
|
+
// Parse thinking tags
|
|
1213
|
+
static const common_regex start_think_regex(regex_escape("<think>"));
|
|
1214
|
+
static const common_regex end_think_regex(regex_escape("</think>"));
|
|
1215
|
+
// Granite models output partial tokens such as "<" and "<think".
|
|
1216
|
+
// By leveraging try_consume_regex()/try_find_regex() throwing
|
|
1217
|
+
// common_chat_msg_partial_exception for these partial tokens,
|
|
1218
|
+
// processing is interrupted and the tokens are not passed to add_content().
|
|
1219
|
+
if (auto res = builder.try_consume_regex(start_think_regex)) {
|
|
1220
|
+
// Restore position for try_parse_reasoning()
|
|
1221
|
+
builder.move_to(res->groups[0].begin);
|
|
1222
|
+
builder.try_find_regex(end_think_regex, std::string::npos, false);
|
|
1223
|
+
// Restore position for try_parse_reasoning()
|
|
1224
|
+
builder.move_to(res->groups[0].begin);
|
|
1225
|
+
}
|
|
1226
|
+
builder.try_parse_reasoning("<think>", "</think>");
|
|
1227
|
+
|
|
1228
|
+
// Parse response tags
|
|
1229
|
+
static const common_regex start_response_regex(regex_escape("<response>"));
|
|
1230
|
+
static const common_regex end_response_regex(regex_escape("</response>"));
|
|
1231
|
+
// Granite models output partial tokens such as "<" and "<response".
|
|
1232
|
+
// Same hack as reasoning parsing.
|
|
1233
|
+
if (builder.try_consume_regex(start_response_regex)) {
|
|
1234
|
+
builder.try_find_regex(end_response_regex);
|
|
1235
|
+
}
|
|
1236
|
+
|
|
1237
|
+
if (!builder.syntax().parse_tool_calls) {
|
|
1238
|
+
builder.add_content(builder.consume_rest());
|
|
1239
|
+
return;
|
|
1240
|
+
}
|
|
1241
|
+
|
|
1242
|
+
// Look for tool calls
|
|
1243
|
+
static const common_regex tool_call_regex(regex_escape("<|tool_call|>"));
|
|
1244
|
+
if (auto res = builder.try_find_regex(tool_call_regex)) {
|
|
1245
|
+
builder.move_to(res->groups[0].end);
|
|
1246
|
+
|
|
1247
|
+
// Expect JSON array of tool calls
|
|
1248
|
+
if (auto tool_call = builder.try_consume_json_with_dumped_args({{{"arguments"}}})) {
|
|
1249
|
+
if (!builder.add_tool_calls(tool_call->value) || tool_call->is_partial) {
|
|
1250
|
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
|
1251
|
+
}
|
|
1252
|
+
}
|
|
1253
|
+
} else {
|
|
1254
|
+
builder.add_content(builder.consume_rest());
|
|
1255
|
+
}
|
|
1256
|
+
}
|
|
1257
|
+
|
|
1258
|
+
static void common_chat_parse_nemotron_v2(common_chat_msg_parser & builder) {
|
|
1259
|
+
// Parse thinking tags
|
|
1260
|
+
builder.try_parse_reasoning("<think>", "</think>");
|
|
1261
|
+
if (!builder.syntax().parse_tool_calls) {
|
|
1262
|
+
builder.add_content(builder.consume_rest());
|
|
1263
|
+
return;
|
|
1264
|
+
}
|
|
1265
|
+
|
|
1266
|
+
// Look for tool calls
|
|
1267
|
+
static const common_regex tool_call_regex(regex_escape("<TOOLCALL>"));
|
|
1268
|
+
if (auto res = builder.try_find_regex(tool_call_regex)) {
|
|
1269
|
+
builder.move_to(res->groups[0].end);
|
|
1270
|
+
|
|
1271
|
+
// Expect JSON array of tool calls
|
|
1272
|
+
auto tool_calls_data = builder.consume_json();
|
|
1273
|
+
if (tool_calls_data.json.is_array()) {
|
|
1274
|
+
if (!builder.try_consume_literal("</TOOLCALL>")) {
|
|
1275
|
+
throw common_chat_msg_partial_exception("Incomplete tool call");
|
|
1276
|
+
}
|
|
1277
|
+
builder.add_tool_calls(tool_calls_data.json);
|
|
1278
|
+
} else {
|
|
1279
|
+
throw common_chat_msg_partial_exception("Incomplete tool call");
|
|
1280
|
+
}
|
|
1281
|
+
}
|
|
1282
|
+
builder.add_content(builder.consume_rest());
|
|
1283
|
+
}
|
|
1284
|
+
|
|
1285
|
+
static void common_chat_parse_apertus(common_chat_msg_parser & builder) {
|
|
1286
|
+
// Parse thinking tags
|
|
1287
|
+
builder.try_parse_reasoning("<|inner_prefix|>", "<|inner_suffix|>");
|
|
1288
|
+
if (!builder.syntax().parse_tool_calls) {
|
|
1289
|
+
builder.add_content(builder.consume_rest());
|
|
1290
|
+
return;
|
|
1291
|
+
}
|
|
1292
|
+
|
|
1293
|
+
// Look for tool calls
|
|
1294
|
+
static const common_regex tool_call_regex(regex_escape("<|tools_prefix|>"));
|
|
1295
|
+
if (auto res = builder.try_find_regex(tool_call_regex)) {
|
|
1296
|
+
builder.move_to(res->groups[0].end);
|
|
1297
|
+
|
|
1298
|
+
auto tool_calls_data = builder.consume_json();
|
|
1299
|
+
if (tool_calls_data.json.is_array()) {
|
|
1300
|
+
builder.consume_spaces();
|
|
1301
|
+
if (!builder.try_consume_literal("<|tools_suffix|>")) {
|
|
1302
|
+
throw common_chat_msg_partial_exception("Incomplete tool call");
|
|
1303
|
+
}
|
|
1304
|
+
for (const auto & value : tool_calls_data.json) {
|
|
1305
|
+
if (value.is_object()) {
|
|
1306
|
+
builder.add_tool_call_short_form(value);
|
|
1307
|
+
}
|
|
1308
|
+
}
|
|
1309
|
+
} else {
|
|
1310
|
+
throw common_chat_msg_partial_exception("Incomplete tool call");
|
|
1311
|
+
}
|
|
1312
|
+
}
|
|
1313
|
+
builder.add_content(builder.consume_rest());
|
|
1314
|
+
}
|
|
1315
|
+
|
|
1316
|
+
|
|
1317
|
+
static void common_chat_parse_lfm2(common_chat_msg_parser & builder) {
|
|
1318
|
+
if (!builder.syntax().parse_tool_calls) {
|
|
1319
|
+
builder.add_content(builder.consume_rest());
|
|
1320
|
+
return;
|
|
1321
|
+
}
|
|
1322
|
+
|
|
1323
|
+
// LFM2 format: <|tool_call_start|>[{"name": "get_current_time", "arguments": {"location": "Paris"}}]<|tool_call_end|>
|
|
1324
|
+
static const common_regex tool_call_start_regex(regex_escape("<|tool_call_start|>"));
|
|
1325
|
+
static const common_regex tool_call_end_regex(regex_escape("<|tool_call_end|>"));
|
|
1326
|
+
|
|
1327
|
+
// Loop through all tool calls
|
|
1328
|
+
while (auto res = builder.try_find_regex(tool_call_start_regex, std::string::npos, /* add_prelude_to_content= */ true)) {
|
|
1329
|
+
builder.move_to(res->groups[0].end);
|
|
1330
|
+
|
|
1331
|
+
// Parse JSON array format: [{"name": "...", "arguments": {...}}]
|
|
1332
|
+
auto tool_calls_data = builder.consume_json();
|
|
1333
|
+
|
|
1334
|
+
// Consume end marker
|
|
1335
|
+
builder.consume_spaces();
|
|
1336
|
+
if (!builder.try_consume_regex(tool_call_end_regex)) {
|
|
1337
|
+
throw common_chat_msg_partial_exception("Expected <|tool_call_end|>");
|
|
1338
|
+
}
|
|
1339
|
+
|
|
1340
|
+
// Process each tool call in the array
|
|
1341
|
+
if (tool_calls_data.json.is_array()) {
|
|
1342
|
+
for (const auto & tool_call : tool_calls_data.json) {
|
|
1343
|
+
if (!tool_call.is_object()) {
|
|
1344
|
+
throw common_chat_msg_partial_exception("Tool call must be an object");
|
|
1345
|
+
}
|
|
1346
|
+
|
|
1347
|
+
if (!tool_call.contains("name")) {
|
|
1348
|
+
throw common_chat_msg_partial_exception("Tool call missing 'name' field");
|
|
1349
|
+
}
|
|
1350
|
+
|
|
1351
|
+
std::string function_name = tool_call.at("name");
|
|
1352
|
+
std::string arguments = "{}";
|
|
1353
|
+
|
|
1354
|
+
if (tool_call.contains("arguments")) {
|
|
1355
|
+
if (tool_call.at("arguments").is_object()) {
|
|
1356
|
+
arguments = tool_call.at("arguments").dump();
|
|
1357
|
+
} else if (tool_call.at("arguments").is_string()) {
|
|
1358
|
+
arguments = tool_call.at("arguments");
|
|
1359
|
+
}
|
|
1360
|
+
}
|
|
1361
|
+
|
|
1362
|
+
if (!builder.add_tool_call(function_name, "", arguments)) {
|
|
1363
|
+
throw common_chat_msg_partial_exception("Incomplete tool call");
|
|
1364
|
+
}
|
|
1365
|
+
}
|
|
1366
|
+
} else {
|
|
1367
|
+
throw common_chat_msg_partial_exception("Expected JSON array for tool calls");
|
|
1368
|
+
}
|
|
1369
|
+
|
|
1370
|
+
// Consume any trailing whitespace after this tool call
|
|
1371
|
+
builder.consume_spaces();
|
|
1372
|
+
}
|
|
1373
|
+
|
|
1374
|
+
// Consume any remaining content after all tool calls
|
|
1375
|
+
auto remaining = builder.consume_rest();
|
|
1376
|
+
if (!string_strip(remaining).empty()) {
|
|
1377
|
+
builder.add_content(remaining);
|
|
1378
|
+
}
|
|
1379
|
+
}
|
|
1380
|
+
|
|
1381
|
+
static void common_chat_parse_seed_oss(common_chat_msg_parser & builder) {
|
|
1382
|
+
static const xml_tool_call_format form {
|
|
1383
|
+
/* form.scope_start = */ "<seed:tool_call>",
|
|
1384
|
+
/* form.tool_start = */ "<function=",
|
|
1385
|
+
/* form.tool_sep = */ ">",
|
|
1386
|
+
/* form.key_start = */ "<parameter=",
|
|
1387
|
+
/* form.key_val_sep = */ ">",
|
|
1388
|
+
/* form.val_end = */ "</parameter>",
|
|
1389
|
+
/* form.tool_end = */ "</function>",
|
|
1390
|
+
/* form.scope_end = */ "</seed:tool_call>",
|
|
1391
|
+
};
|
|
1392
|
+
builder.consume_reasoning_with_xml_tool_calls(form, "<seed:think>", "</seed:think>");
|
|
1393
|
+
}
|
|
1394
|
+
|
|
1395
|
+
static void common_chat_parse_content_only(common_chat_msg_parser & builder) {
|
|
1396
|
+
builder.try_parse_reasoning("<think>", "</think>");
|
|
1397
|
+
builder.add_content(builder.consume_rest());
|
|
1398
|
+
}
|
|
1399
|
+
|
|
1400
|
+
static void common_chat_parse(common_chat_msg_parser & builder) {
|
|
1401
|
+
LOG_DBG("Parsing input with format %s: %s\n", common_chat_format_name(builder.syntax().format), builder.input().c_str());
|
|
1402
|
+
|
|
1403
|
+
switch (builder.syntax().format) {
|
|
1404
|
+
case COMMON_CHAT_FORMAT_CONTENT_ONLY:
|
|
1405
|
+
common_chat_parse_content_only(builder);
|
|
1406
|
+
break;
|
|
1407
|
+
case COMMON_CHAT_FORMAT_GENERIC:
|
|
1408
|
+
common_chat_parse_generic(builder);
|
|
1409
|
+
break;
|
|
1410
|
+
case COMMON_CHAT_FORMAT_MISTRAL_NEMO:
|
|
1411
|
+
common_chat_parse_mistral_nemo(builder);
|
|
1412
|
+
break;
|
|
1413
|
+
case COMMON_CHAT_FORMAT_MAGISTRAL:
|
|
1414
|
+
common_chat_parse_magistral(builder);
|
|
1415
|
+
break;
|
|
1416
|
+
case COMMON_CHAT_FORMAT_LLAMA_3_X:
|
|
1417
|
+
common_chat_parse_llama_3_1(builder);
|
|
1418
|
+
break;
|
|
1419
|
+
case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS:
|
|
1420
|
+
common_chat_parse_llama_3_1(builder, /* with_builtin_tools= */ true);
|
|
1421
|
+
break;
|
|
1422
|
+
case COMMON_CHAT_FORMAT_DEEPSEEK_R1:
|
|
1423
|
+
common_chat_parse_deepseek_r1(builder);
|
|
1424
|
+
break;
|
|
1425
|
+
case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1:
|
|
1426
|
+
common_chat_parse_deepseek_v3_1(builder);
|
|
1427
|
+
break;
|
|
1428
|
+
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2:
|
|
1429
|
+
common_chat_parse_functionary_v3_2(builder);
|
|
1430
|
+
break;
|
|
1431
|
+
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1:
|
|
1432
|
+
common_chat_parse_functionary_v3_1_llama_3_1(builder);
|
|
1433
|
+
break;
|
|
1434
|
+
case COMMON_CHAT_FORMAT_HERMES_2_PRO:
|
|
1435
|
+
common_chat_parse_hermes_2_pro(builder);
|
|
1436
|
+
break;
|
|
1437
|
+
case COMMON_CHAT_FORMAT_FIREFUNCTION_V2:
|
|
1438
|
+
common_chat_parse_firefunction_v2(builder);
|
|
1439
|
+
break;
|
|
1440
|
+
case COMMON_CHAT_FORMAT_COMMAND_R7B:
|
|
1441
|
+
common_chat_parse_command_r7b(builder);
|
|
1442
|
+
break;
|
|
1443
|
+
case COMMON_CHAT_FORMAT_GRANITE:
|
|
1444
|
+
common_chat_parse_granite(builder);
|
|
1445
|
+
break;
|
|
1446
|
+
case COMMON_CHAT_FORMAT_GPT_OSS:
|
|
1447
|
+
common_chat_parse_gpt_oss(builder);
|
|
1448
|
+
break;
|
|
1449
|
+
case COMMON_CHAT_FORMAT_SEED_OSS:
|
|
1450
|
+
common_chat_parse_seed_oss(builder);
|
|
1451
|
+
break;
|
|
1452
|
+
case COMMON_CHAT_FORMAT_NEMOTRON_V2:
|
|
1453
|
+
common_chat_parse_nemotron_v2(builder);
|
|
1454
|
+
break;
|
|
1455
|
+
case COMMON_CHAT_FORMAT_APERTUS:
|
|
1456
|
+
common_chat_parse_apertus(builder);
|
|
1457
|
+
break;
|
|
1458
|
+
case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS:
|
|
1459
|
+
common_chat_parse_lfm2(builder);
|
|
1460
|
+
break;
|
|
1461
|
+
case COMMON_CHAT_FORMAT_MINIMAX_M2:
|
|
1462
|
+
common_chat_parse_minimax_m2(builder);
|
|
1463
|
+
break;
|
|
1464
|
+
case COMMON_CHAT_FORMAT_GLM_4_5:
|
|
1465
|
+
common_chat_parse_glm_4_5(builder);
|
|
1466
|
+
break;
|
|
1467
|
+
case COMMON_CHAT_FORMAT_KIMI_K2:
|
|
1468
|
+
common_chat_parse_kimi_k2(builder);
|
|
1469
|
+
break;
|
|
1470
|
+
case COMMON_CHAT_FORMAT_QWEN3_CODER_XML:
|
|
1471
|
+
common_chat_parse_qwen3_coder_xml(builder);
|
|
1472
|
+
break;
|
|
1473
|
+
case COMMON_CHAT_FORMAT_APRIEL_1_5:
|
|
1474
|
+
common_chat_parse_apriel_1_5(builder);
|
|
1475
|
+
break;
|
|
1476
|
+
case COMMON_CHAT_FORMAT_XIAOMI_MIMO:
|
|
1477
|
+
common_chat_parse_xiaomi_mimo(builder);
|
|
1478
|
+
break;
|
|
1479
|
+
default:
|
|
1480
|
+
throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
|
|
1481
|
+
}
|
|
1482
|
+
builder.finish();
|
|
1483
|
+
}
|
|
1484
|
+
|
|
1485
|
+
common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax) {
|
|
1486
|
+
common_chat_msg_parser builder(input, is_partial, syntax);
|
|
1487
|
+
try {
|
|
1488
|
+
common_chat_parse(builder);
|
|
1489
|
+
} catch (const common_chat_msg_partial_exception & ex) {
|
|
1490
|
+
LOG_DBG("Partial parse: %s\n", ex.what());
|
|
1491
|
+
if (!is_partial) {
|
|
1492
|
+
builder.clear_tools();
|
|
1493
|
+
builder.move_to(0);
|
|
1494
|
+
common_chat_parse_content_only(builder);
|
|
1495
|
+
}
|
|
1496
|
+
}
|
|
1497
|
+
auto msg = builder.result();
|
|
1498
|
+
if (!is_partial) {
|
|
1499
|
+
LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat<json>({msg}).at(0).dump().c_str());
|
|
1500
|
+
}
|
|
1501
|
+
return msg;
|
|
1502
|
+
}
|