@fugood/llama.node 1.3.4 → 1.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/binding.js +1 -1
- package/lib/binding.ts +40 -14
- package/lib/index.js +4 -1
- package/lib/index.ts +13 -9
- package/package.json +14 -14
- package/scripts/llama.cpp.patch +10 -10
- package/src/LlamaContext.cpp +36 -0
- package/src/LlamaContext.h +2 -0
- package/src/llama.cpp/common/CMakeLists.txt +2 -0
- package/src/llama.cpp/common/chat-parser-xml-toolcall.cpp +861 -0
- package/src/llama.cpp/common/chat-parser-xml-toolcall.h +45 -0
- package/src/llama.cpp/common/chat-parser.h +10 -0
- package/src/llama.cpp/common/chat.cpp +461 -87
- package/src/llama.cpp/common/chat.h +6 -0
- package/src/llama.cpp/common/common.cpp +8 -1
- package/src/llama.cpp/common/common.h +12 -5
- package/src/llama.cpp/common/json-partial.cpp +19 -2
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +2 -0
- package/src/llama.cpp/common/json-schema-to-grammar.h +2 -0
- package/src/llama.cpp/common/sampling.cpp +60 -6
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +31 -38
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +6 -6
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +15 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +2 -3
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +16 -14
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +49 -48
- package/src/llama.cpp/src/llama-grammar.cpp +17 -9
- package/src/llama.cpp/src/llama-impl.cpp +3 -3
- package/src/llama.cpp/src/llama-sampling.cpp +3 -6
- package/src/llama.cpp/src/llama-vocab.cpp +1 -0
|
@@ -0,0 +1,861 @@
|
|
|
1
|
+
#include "chat.h"
|
|
2
|
+
#include "chat-parser.h"
|
|
3
|
+
#include "common.h"
|
|
4
|
+
#include "json-partial.h"
|
|
5
|
+
#include "json-schema-to-grammar.h"
|
|
6
|
+
#include "log.h"
|
|
7
|
+
#include "regex-partial.h"
|
|
8
|
+
|
|
9
|
+
using json = nlohmann::ordered_json;
|
|
10
|
+
|
|
11
|
+
class xml_toolcall_syntax_exception : public std::runtime_error {
|
|
12
|
+
public:
|
|
13
|
+
xml_toolcall_syntax_exception(const std::string & message) : std::runtime_error(message) {}
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
template<typename T>
|
|
17
|
+
inline void sort_uniq(std::vector<T> &vec) {
|
|
18
|
+
std::sort(vec.begin(), vec.end());
|
|
19
|
+
vec.erase(std::unique(vec.begin(), vec.end()), vec.end());
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
template<typename T>
|
|
23
|
+
inline bool all_space(const T &str) {
|
|
24
|
+
return std::all_of(str.begin(), str.end(), [](unsigned char ch) { return std::isspace(ch); });
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
static size_t utf8_truncate_safe(const std::string_view s) {
|
|
28
|
+
size_t len = s.size();
|
|
29
|
+
if (len == 0) return 0;
|
|
30
|
+
size_t i = len;
|
|
31
|
+
for (size_t back = 0; back < 4 && i > 0; ++back) {
|
|
32
|
+
--i;
|
|
33
|
+
unsigned char c = s[i];
|
|
34
|
+
if ((c & 0x80) == 0) {
|
|
35
|
+
return len;
|
|
36
|
+
} else if ((c & 0xC0) == 0xC0) {
|
|
37
|
+
size_t expected_len = 0;
|
|
38
|
+
if ((c & 0xE0) == 0xC0) expected_len = 2;
|
|
39
|
+
else if ((c & 0xF0) == 0xE0) expected_len = 3;
|
|
40
|
+
else if ((c & 0xF8) == 0xF0) expected_len = 4;
|
|
41
|
+
else return i;
|
|
42
|
+
if (len - i >= expected_len) {
|
|
43
|
+
return len;
|
|
44
|
+
} else {
|
|
45
|
+
return i;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
return len - std::min(len, size_t(3));
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
inline void utf8_truncate_safe_resize(std::string &s) {
|
|
53
|
+
s.resize(utf8_truncate_safe(s));
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
inline std::string_view utf8_truncate_safe_view(const std::string_view s) {
|
|
57
|
+
return s.substr(0, utf8_truncate_safe(s));
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
static std::optional<common_chat_msg_parser::find_regex_result> try_find_2_literal_splited_by_spaces(common_chat_msg_parser & builder, const std::string & literal1, const std::string & literal2) {
|
|
61
|
+
if (literal1.size() == 0) return builder.try_find_literal(literal2);
|
|
62
|
+
const auto saved_pos = builder.pos();
|
|
63
|
+
while (auto res = builder.try_find_literal(literal1)) {
|
|
64
|
+
builder.consume_spaces();
|
|
65
|
+
const auto match_len = std::min(literal2.size(), builder.input().size() - builder.pos());
|
|
66
|
+
if (builder.input().compare(builder.pos(), match_len, literal2, 0, match_len) == 0) {
|
|
67
|
+
if (res->prelude.size() != res->groups[0].begin - saved_pos) {
|
|
68
|
+
res->prelude = builder.str({saved_pos, res->groups[0].begin});
|
|
69
|
+
}
|
|
70
|
+
builder.move_to(builder.pos() + match_len);
|
|
71
|
+
res->groups[0].end = builder.pos();
|
|
72
|
+
GGML_ASSERT(res->groups[0].begin != res->groups[0].end);
|
|
73
|
+
return res;
|
|
74
|
+
}
|
|
75
|
+
builder.move_to(res->groups[0].begin + 1);
|
|
76
|
+
}
|
|
77
|
+
builder.move_to(saved_pos);
|
|
78
|
+
return std::nullopt;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* make a GBNF that accept any strings except those containing any of the forbidden strings.
|
|
83
|
+
*/
|
|
84
|
+
std::string make_gbnf_excluding(std::vector<std::string> forbids) {
|
|
85
|
+
constexpr auto charclass_escape = [](unsigned char c) -> std::string {
|
|
86
|
+
if (c == '\\' || c == ']' || c == '^' || c == '-') {
|
|
87
|
+
std::string s = "\\";
|
|
88
|
+
s.push_back((char)c);
|
|
89
|
+
return s;
|
|
90
|
+
}
|
|
91
|
+
if (isprint(c)) {
|
|
92
|
+
return std::string(1, (char)c);
|
|
93
|
+
}
|
|
94
|
+
char buf[16];
|
|
95
|
+
snprintf(buf, 15, "\\x%02X", c);
|
|
96
|
+
return std::string(buf);
|
|
97
|
+
};
|
|
98
|
+
constexpr auto build_expr = [charclass_escape](auto self, const std::vector<std::string>& forbids, int l, int r, int depth) -> std::string {
|
|
99
|
+
std::vector<std::pair<unsigned char, std::pair<int,int>>> children;
|
|
100
|
+
int i = l;
|
|
101
|
+
while (i < r) {
|
|
102
|
+
const std::string &s = forbids[i];
|
|
103
|
+
if ((int)s.size() == depth) {
|
|
104
|
+
++i;
|
|
105
|
+
continue;
|
|
106
|
+
}
|
|
107
|
+
unsigned char c = (unsigned char)s[depth];
|
|
108
|
+
int j = i;
|
|
109
|
+
while (j < r && (int)forbids[j].size() > depth &&
|
|
110
|
+
(unsigned char)forbids[j][depth] == c) {
|
|
111
|
+
++j;
|
|
112
|
+
}
|
|
113
|
+
children.push_back({c, {i, j}});
|
|
114
|
+
i = j;
|
|
115
|
+
}
|
|
116
|
+
std::vector<std::string> alts;
|
|
117
|
+
if (!children.empty()) {
|
|
118
|
+
std::string cls;
|
|
119
|
+
for (auto &ch : children) cls += charclass_escape(ch.first);
|
|
120
|
+
alts.push_back(std::string("[^") + cls + "]");
|
|
121
|
+
}
|
|
122
|
+
for (auto &ch : children) {
|
|
123
|
+
std::string childExpr = self(self, forbids, ch.second.first, ch.second.second, depth+1);
|
|
124
|
+
if (!childExpr.empty()) {
|
|
125
|
+
std::string quoted_ch = "\"";
|
|
126
|
+
if (ch.first == '\\') quoted_ch += "\\\\";
|
|
127
|
+
else if (ch.first == '"') quoted_ch += "\\\"";
|
|
128
|
+
else if (isprint(ch.first)) quoted_ch.push_back(ch.first);
|
|
129
|
+
else {
|
|
130
|
+
char buf[16];
|
|
131
|
+
snprintf(buf, 15, "\\x%02X", ch.first);
|
|
132
|
+
quoted_ch += buf;
|
|
133
|
+
}
|
|
134
|
+
quoted_ch += "\"";
|
|
135
|
+
std::string branch = quoted_ch + std::string(" ") + childExpr;
|
|
136
|
+
alts.push_back(branch);
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
if (alts.empty()) return "";
|
|
140
|
+
std::ostringstream oss;
|
|
141
|
+
oss << "( ";
|
|
142
|
+
for (size_t k = 0; k < alts.size(); ++k) {
|
|
143
|
+
if (k) oss << " | ";
|
|
144
|
+
oss << alts[k];
|
|
145
|
+
}
|
|
146
|
+
oss << " )";
|
|
147
|
+
return oss.str();
|
|
148
|
+
};
|
|
149
|
+
if (forbids.empty()) return "( . )*";
|
|
150
|
+
sort(forbids.begin(), forbids.end());
|
|
151
|
+
std::string expr = build_expr(build_expr, forbids, 0, forbids.size(), 0);
|
|
152
|
+
if (expr.empty()) {
|
|
153
|
+
std::string cls;
|
|
154
|
+
for (auto &s : forbids) if (!s.empty()) cls += charclass_escape((unsigned char)s[0]);
|
|
155
|
+
expr = std::string("( [^") + cls + "] )";
|
|
156
|
+
}
|
|
157
|
+
if (forbids.size() == 1)
|
|
158
|
+
return expr + "*";
|
|
159
|
+
else
|
|
160
|
+
return std::string("( ") + expr + " )*";
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* Build grammar for xml-style tool call
|
|
165
|
+
* form.scope_start and form.scope_end can be empty.
|
|
166
|
+
* Requires data.format for model-specific hacks.
|
|
167
|
+
*/
|
|
168
|
+
void build_grammar_xml_tool_call(common_chat_params & data, const json & tools, const struct xml_tool_call_format & form) {
|
|
169
|
+
GGML_ASSERT(!form.tool_start.empty());
|
|
170
|
+
GGML_ASSERT(!form.tool_sep.empty());
|
|
171
|
+
GGML_ASSERT(!form.key_start.empty());
|
|
172
|
+
GGML_ASSERT(!form.val_end.empty());
|
|
173
|
+
GGML_ASSERT(!form.tool_end.empty());
|
|
174
|
+
|
|
175
|
+
std::string key_val_sep = form.key_val_sep;
|
|
176
|
+
if (form.key_val_sep2) {
|
|
177
|
+
key_val_sep += "\n";
|
|
178
|
+
key_val_sep += *form.key_val_sep2;
|
|
179
|
+
}
|
|
180
|
+
GGML_ASSERT(!key_val_sep.empty());
|
|
181
|
+
|
|
182
|
+
if (tools.is_array() && !tools.empty()) {
|
|
183
|
+
data.grammar = build_grammar([&](const common_grammar_builder &builder) {
|
|
184
|
+
auto string_arg_val = form.last_val_end ?
|
|
185
|
+
builder.add_rule("string-arg-val", make_gbnf_excluding({form.val_end, *form.last_val_end})) :
|
|
186
|
+
builder.add_rule("string-arg-val", make_gbnf_excluding({form.val_end}));
|
|
187
|
+
|
|
188
|
+
std::vector<std::string> tool_rules;
|
|
189
|
+
for (const auto & tool : tools) {
|
|
190
|
+
if (!tool.contains("type") || tool.at("type") != "function" || !tool.contains("function")) {
|
|
191
|
+
LOG_WRN("Skipping tool without function: %s", tool.dump(2).c_str());
|
|
192
|
+
continue;
|
|
193
|
+
}
|
|
194
|
+
const auto & function = tool.at("function");
|
|
195
|
+
if (!function.contains("name") || !function.at("name").is_string()) {
|
|
196
|
+
LOG_WRN("Skipping invalid function (invalid name): %s", function.dump(2).c_str());
|
|
197
|
+
continue;
|
|
198
|
+
}
|
|
199
|
+
if (!function.contains("parameters") || !function.at("parameters").is_object()) {
|
|
200
|
+
LOG_WRN("Skipping invalid function (invalid parameters): %s", function.dump(2).c_str());
|
|
201
|
+
continue;
|
|
202
|
+
}
|
|
203
|
+
std::string name = function.at("name");
|
|
204
|
+
auto parameters = function.at("parameters");
|
|
205
|
+
builder.resolve_refs(parameters);
|
|
206
|
+
|
|
207
|
+
struct parameter_rule {
|
|
208
|
+
std::string symbol_name;
|
|
209
|
+
bool is_required;
|
|
210
|
+
};
|
|
211
|
+
std::vector<parameter_rule> arg_rules;
|
|
212
|
+
if (!parameters.contains("properties") || !parameters.at("properties").is_object()) {
|
|
213
|
+
LOG_WRN("Skipping invalid function (invalid properties): %s", function.dump(2).c_str());
|
|
214
|
+
continue;
|
|
215
|
+
} else {
|
|
216
|
+
std::vector<std::string> requiredParameters;
|
|
217
|
+
if (parameters.contains("required")) {
|
|
218
|
+
try { parameters.at("required").get_to(requiredParameters); }
|
|
219
|
+
catch (const std::runtime_error&) {
|
|
220
|
+
LOG_WRN("Invalid function required parameters, ignoring: %s", function.at("required").dump(2).c_str());
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
sort_uniq(requiredParameters);
|
|
224
|
+
for (const auto & [key, value] : parameters.at("properties").items()) {
|
|
225
|
+
std::string quoted_key = key;
|
|
226
|
+
bool required = std::binary_search(requiredParameters.begin(), requiredParameters.end(), key);
|
|
227
|
+
if (form.key_start.back() == '"' && key_val_sep[0] == '"') {
|
|
228
|
+
quoted_key = gbnf_format_literal(key);
|
|
229
|
+
quoted_key = quoted_key.substr(1, quoted_key.size() - 2);
|
|
230
|
+
}
|
|
231
|
+
arg_rules.push_back(parameter_rule {builder.add_rule("func-" + name + "-kv-" + key,
|
|
232
|
+
gbnf_format_literal(form.key_start) + " " +
|
|
233
|
+
gbnf_format_literal(quoted_key) + " " +
|
|
234
|
+
gbnf_format_literal(key_val_sep) + " " +
|
|
235
|
+
((value.contains("type") && value["type"].is_string() && value["type"] == "string" && (!form.raw_argval || *form.raw_argval)) ?
|
|
236
|
+
(form.raw_argval ?
|
|
237
|
+
string_arg_val :
|
|
238
|
+
"( " + string_arg_val + " | " + builder.add_schema(name + "-arg-" + key, value) + " )"
|
|
239
|
+
) :
|
|
240
|
+
builder.add_schema(name + "-arg-" + key, value)
|
|
241
|
+
)
|
|
242
|
+
), required});
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
auto next_arg_with_sep = builder.add_rule(name + "-last-arg-end", form.last_val_end ? gbnf_format_literal(*form.last_val_end) : gbnf_format_literal(form.val_end));
|
|
247
|
+
decltype(next_arg_with_sep) next_arg = "\"\"";
|
|
248
|
+
for (auto i = arg_rules.size() - 1; /* i >= 0 && */ i < arg_rules.size(); --i) {
|
|
249
|
+
std::string include_this_arg = arg_rules[i].symbol_name + " " + next_arg_with_sep;
|
|
250
|
+
next_arg = builder.add_rule(name + "-arg-after-" + std::to_string(i), arg_rules[i].is_required ?
|
|
251
|
+
include_this_arg : "( " + include_this_arg + " ) | " + next_arg
|
|
252
|
+
);
|
|
253
|
+
include_this_arg = gbnf_format_literal(form.val_end) + " " + include_this_arg;
|
|
254
|
+
next_arg_with_sep = builder.add_rule(name + "-arg-after-" + std::to_string(i) + "-with-sep", arg_rules[i].is_required ?
|
|
255
|
+
include_this_arg : "( " + include_this_arg + " ) | " + next_arg_with_sep
|
|
256
|
+
);
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
std::string quoted_name = name;
|
|
260
|
+
if (form.tool_start.back() == '"' && form.tool_sep[0] == '"') {
|
|
261
|
+
quoted_name = gbnf_format_literal(name);
|
|
262
|
+
quoted_name = quoted_name.substr(1, quoted_name.size() - 2);
|
|
263
|
+
}
|
|
264
|
+
quoted_name = gbnf_format_literal(quoted_name);
|
|
265
|
+
// Kimi-K2 uses functions.{{ tool_call['function']['name'] }}:{{ loop.index }} as function name
|
|
266
|
+
if (data.format == COMMON_CHAT_FORMAT_KIMI_K2) {
|
|
267
|
+
quoted_name = "\"functions.\" " + quoted_name + " \":\" [0-9]+";
|
|
268
|
+
}
|
|
269
|
+
tool_rules.push_back(builder.add_rule(name + "-call",
|
|
270
|
+
gbnf_format_literal(form.tool_start) + " " +
|
|
271
|
+
quoted_name + " " +
|
|
272
|
+
gbnf_format_literal(form.tool_sep) + " " +
|
|
273
|
+
next_arg
|
|
274
|
+
));
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
auto tool_call_once = builder.add_rule("root-tool-call-once", string_join(tool_rules, " | "));
|
|
278
|
+
auto tool_call_more = builder.add_rule("root-tool-call-more", gbnf_format_literal(form.tool_end) + " " + tool_call_once);
|
|
279
|
+
auto call_end = builder.add_rule("root-call-end", form.last_tool_end ? gbnf_format_literal(*form.last_tool_end) : gbnf_format_literal(form.tool_end));
|
|
280
|
+
auto tool_call_multiple_with_end = builder.add_rule("root-tool-call-multiple-with-end", tool_call_once + " " + tool_call_more + "* " + call_end);
|
|
281
|
+
builder.add_rule("root",
|
|
282
|
+
(form.scope_start.empty() ? "" : gbnf_format_literal(form.scope_start) + " ") +
|
|
283
|
+
tool_call_multiple_with_end + "?" +
|
|
284
|
+
(form.scope_end.empty() ? "" : " " + gbnf_format_literal(form.scope_end))
|
|
285
|
+
);
|
|
286
|
+
});
|
|
287
|
+
|
|
288
|
+
// grammar trigger for tool call
|
|
289
|
+
data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, form.scope_start + form.tool_start });
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
/**
|
|
294
|
+
* Parse XML-Style tool call for given xml_tool_call_format. Return false for invalid syntax and get the position untouched.
|
|
295
|
+
* Throws xml_toolcall_syntax_exception if there is invalid syntax and cannot recover the original status for common_chat_msg_parser.
|
|
296
|
+
* form.scope_start, form.tool_sep and form.scope_end can be empty.
|
|
297
|
+
*/
|
|
298
|
+
inline bool parse_xml_tool_calls(common_chat_msg_parser & builder, const struct xml_tool_call_format & form) {
|
|
299
|
+
GGML_ASSERT(!form.tool_start.empty());
|
|
300
|
+
GGML_ASSERT(!form.key_start.empty());
|
|
301
|
+
GGML_ASSERT(!form.key_val_sep.empty());
|
|
302
|
+
GGML_ASSERT(!form.val_end.empty());
|
|
303
|
+
GGML_ASSERT(!form.tool_end.empty());
|
|
304
|
+
|
|
305
|
+
// Helper to choose return false or throw error
|
|
306
|
+
constexpr auto return_error = [](common_chat_msg_parser & builder, auto &start_pos, const bool &recovery) {
|
|
307
|
+
LOG_DBG("Failed to parse XML-Style tool call at position: %s\n", gbnf_format_literal(builder.consume_rest().substr(0, 20)).c_str());
|
|
308
|
+
if (recovery) {
|
|
309
|
+
builder.move_to(start_pos);
|
|
310
|
+
return false;
|
|
311
|
+
} else throw xml_toolcall_syntax_exception("Tool call parsing failed with unrecoverable errors. Try using a grammar to constrain the model’s output.");
|
|
312
|
+
};
|
|
313
|
+
// Drop substring from needle to end from a JSON
|
|
314
|
+
constexpr auto partial_json = [](std::string &json_str, std::string_view needle = "XML_TOOL_CALL_PARTIAL_FLAG") {
|
|
315
|
+
auto pos = json_str.rfind(needle);
|
|
316
|
+
if (pos == std::string::npos) {
|
|
317
|
+
return false;
|
|
318
|
+
}
|
|
319
|
+
for (auto i = pos + needle.size(); i < json_str.size(); ++i) {
|
|
320
|
+
unsigned char ch = static_cast<unsigned char>(json_str[i]);
|
|
321
|
+
if (ch != '\'' && ch != '"' && ch != '}' && ch != ':' && !std::isspace(ch)) {
|
|
322
|
+
return false;
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
if (pos != 0 && json_str[pos - 1] == '"') {
|
|
326
|
+
--pos;
|
|
327
|
+
}
|
|
328
|
+
json_str.resize(pos);
|
|
329
|
+
return true;
|
|
330
|
+
};
|
|
331
|
+
// Helper to generate a partial argument JSON
|
|
332
|
+
constexpr auto gen_partial_json = [partial_json](auto set_partial_arg, auto &arguments, auto &builder, auto &function_name) {
|
|
333
|
+
auto rest = builder.consume_rest();
|
|
334
|
+
utf8_truncate_safe_resize(rest);
|
|
335
|
+
set_partial_arg(rest, "XML_TOOL_CALL_PARTIAL_FLAG");
|
|
336
|
+
auto tool_str = arguments.dump();
|
|
337
|
+
if (partial_json(tool_str)) {
|
|
338
|
+
if (builder.add_tool_call(function_name, "", tool_str)) {
|
|
339
|
+
return;
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
LOG_DBG("Failed to parse partial XML-Style tool call, fallback to non-partial: %s\n", tool_str.c_str());
|
|
343
|
+
};
|
|
344
|
+
// Helper to find a close (because there may be form.last_val_end or form.last_tool_end)
|
|
345
|
+
constexpr auto try_find_close = [](
|
|
346
|
+
common_chat_msg_parser & builder,
|
|
347
|
+
const std::string & end,
|
|
348
|
+
const std::optional<std::string> & alt_end,
|
|
349
|
+
const std::string & end_next,
|
|
350
|
+
const std::optional<std::string> & alt_end_next
|
|
351
|
+
) {
|
|
352
|
+
auto saved_pos = builder.pos();
|
|
353
|
+
auto tc = builder.try_find_literal(end);
|
|
354
|
+
auto val_end_size = end.size();
|
|
355
|
+
if (alt_end) {
|
|
356
|
+
auto pos_1 = builder.pos();
|
|
357
|
+
builder.move_to(saved_pos);
|
|
358
|
+
auto tc2 = try_find_2_literal_splited_by_spaces(builder, *alt_end, end_next);
|
|
359
|
+
if (alt_end_next) {
|
|
360
|
+
builder.move_to(saved_pos);
|
|
361
|
+
auto tc3 = try_find_2_literal_splited_by_spaces(builder, *alt_end, *alt_end_next);
|
|
362
|
+
if (tc3 && (!tc2 || tc2->prelude.size() > tc3->prelude.size())) {
|
|
363
|
+
tc2 = tc3;
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
if (tc2 && (!tc || tc->prelude.size() > tc2->prelude.size())) {
|
|
367
|
+
tc = tc2;
|
|
368
|
+
tc->groups[0].end = std::min(builder.input().size(), tc->groups[0].begin + alt_end->size());
|
|
369
|
+
builder.move_to(tc->groups[0].end);
|
|
370
|
+
val_end_size = alt_end->size();
|
|
371
|
+
} else {
|
|
372
|
+
builder.move_to(pos_1);
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
return std::make_pair(val_end_size, tc);
|
|
376
|
+
};
|
|
377
|
+
// Helper to find a val_end or last_val_end, returns matched pattern size
|
|
378
|
+
const auto try_find_val_end = [try_find_close, &builder, &form]() {
|
|
379
|
+
return try_find_close(builder, form.val_end, form.last_val_end, form.tool_end, form.last_tool_end);
|
|
380
|
+
};
|
|
381
|
+
// Helper to find a tool_end or last_tool_end, returns matched pattern size
|
|
382
|
+
const auto try_find_tool_end = [try_find_close, &builder, &form]() {
|
|
383
|
+
return try_find_close(builder, form.tool_end, form.last_tool_end, form.scope_end, std::nullopt);
|
|
384
|
+
};
|
|
385
|
+
|
|
386
|
+
bool recovery = true;
|
|
387
|
+
const auto start_pos = builder.pos();
|
|
388
|
+
if (!all_space(form.scope_start)) {
|
|
389
|
+
if (auto tc = builder.try_find_literal(form.scope_start)) {
|
|
390
|
+
if (all_space(tc->prelude)) {
|
|
391
|
+
if (form.scope_start.size() != tc->groups[0].end - tc->groups[0].begin)
|
|
392
|
+
throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.scope_start));
|
|
393
|
+
} else {
|
|
394
|
+
builder.move_to(start_pos);
|
|
395
|
+
return false;
|
|
396
|
+
}
|
|
397
|
+
} else return false;
|
|
398
|
+
}
|
|
399
|
+
while (auto tc = builder.try_find_literal(form.tool_start)) {
|
|
400
|
+
if (!all_space(tc->prelude)) {
|
|
401
|
+
LOG_DBG("XML-Style tool call: Expected %s, but found %s, trying to match next pattern\n",
|
|
402
|
+
gbnf_format_literal(form.tool_start).c_str(),
|
|
403
|
+
gbnf_format_literal(tc->prelude).c_str()
|
|
404
|
+
);
|
|
405
|
+
builder.move_to(tc->groups[0].begin - tc->prelude.size());
|
|
406
|
+
break;
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
// Find tool name
|
|
410
|
+
auto func_name = builder.try_find_literal(all_space(form.tool_sep) ? form.key_start : form.tool_sep);
|
|
411
|
+
if (!func_name) {
|
|
412
|
+
auto [sz, tc] = try_find_tool_end();
|
|
413
|
+
func_name = tc;
|
|
414
|
+
}
|
|
415
|
+
if (!func_name) {
|
|
416
|
+
// Partial tool name not supported
|
|
417
|
+
throw common_chat_msg_partial_exception("incomplete tool_call");
|
|
418
|
+
}
|
|
419
|
+
// If the model generate multiple tool call and the first tool call has no argument
|
|
420
|
+
if (func_name->prelude.find(form.tool_end) != std::string::npos || (form.last_tool_end ? func_name->prelude.find(*form.last_tool_end) != std::string::npos : false)) {
|
|
421
|
+
builder.move_to(func_name->groups[0].begin - func_name->prelude.size());
|
|
422
|
+
auto [sz, tc] = try_find_tool_end();
|
|
423
|
+
func_name = tc;
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
// Parse tool name
|
|
427
|
+
builder.move_to(all_space(form.tool_sep) ? func_name->groups[0].begin : func_name->groups[0].end);
|
|
428
|
+
std::string function_name = string_strip(func_name->prelude);
|
|
429
|
+
// Kimi-K2 uses functions.{{ tool_call['function']['name'] }}:{{ loop.index }} as function name
|
|
430
|
+
if (builder.syntax().format == COMMON_CHAT_FORMAT_KIMI_K2) {
|
|
431
|
+
if (string_starts_with(function_name, "functions.")) {
|
|
432
|
+
static const std::regex re(":\\d+$");
|
|
433
|
+
if (std::regex_search(function_name, re)) {
|
|
434
|
+
function_name = function_name.substr(10, function_name.rfind(":") - 10);
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
// Argument JSON
|
|
440
|
+
json arguments = json::object();
|
|
441
|
+
|
|
442
|
+
// Helper to generate a partial argument JSON
|
|
443
|
+
const auto gen_partial_args = [&](auto set_partial_arg) {
|
|
444
|
+
gen_partial_json(set_partial_arg, arguments, builder, function_name);
|
|
445
|
+
};
|
|
446
|
+
|
|
447
|
+
// Parse all arg_key/arg_value pairs
|
|
448
|
+
while (auto tc = builder.try_find_literal(form.key_start)) {
|
|
449
|
+
if (!all_space(tc->prelude)) {
|
|
450
|
+
LOG_DBG("XML-Style tool call: Expected %s, but found %s, trying to match next pattern\n",
|
|
451
|
+
gbnf_format_literal(form.key_start).c_str(),
|
|
452
|
+
gbnf_format_literal(tc->prelude).c_str()
|
|
453
|
+
);
|
|
454
|
+
builder.move_to(tc->groups[0].begin - tc->prelude.size());
|
|
455
|
+
break;
|
|
456
|
+
}
|
|
457
|
+
if (tc->groups[0].end - tc->groups[0].begin != form.key_start.size()) {
|
|
458
|
+
auto tool_call_arg = arguments.dump();
|
|
459
|
+
if (tool_call_arg.size() != 0 && tool_call_arg[tool_call_arg.size() - 1] == '}') {
|
|
460
|
+
tool_call_arg.resize(tool_call_arg.size() - 1);
|
|
461
|
+
}
|
|
462
|
+
builder.add_tool_call(function_name, "", tool_call_arg);
|
|
463
|
+
throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.key_start));
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
// Parse arg_key
|
|
467
|
+
auto key_res = builder.try_find_literal(form.key_val_sep);
|
|
468
|
+
if (!key_res) {
|
|
469
|
+
gen_partial_args([&](auto &rest, auto &needle) {arguments[rest + needle] = "";});
|
|
470
|
+
throw common_chat_msg_partial_exception("Expected " + gbnf_format_literal(form.key_val_sep) + " after " + gbnf_format_literal(form.key_start));
|
|
471
|
+
}
|
|
472
|
+
if (key_res->groups[0].end - key_res->groups[0].begin != form.key_val_sep.size()) {
|
|
473
|
+
gen_partial_args([&](auto &, auto &needle) {arguments[key_res->prelude + needle] = "";});
|
|
474
|
+
throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.key_val_sep));
|
|
475
|
+
}
|
|
476
|
+
auto &key = key_res->prelude;
|
|
477
|
+
recovery = false;
|
|
478
|
+
|
|
479
|
+
// Parse arg_value
|
|
480
|
+
if (form.key_val_sep2) {
|
|
481
|
+
if (auto tc = builder.try_find_literal(*form.key_val_sep2)) {
|
|
482
|
+
if (!all_space(tc->prelude)) {
|
|
483
|
+
LOG_DBG("Failed to parse XML-Style tool call: Unexcepted %s between %s and %s\n",
|
|
484
|
+
gbnf_format_literal(tc->prelude).c_str(),
|
|
485
|
+
gbnf_format_literal(form.key_val_sep).c_str(),
|
|
486
|
+
gbnf_format_literal(*form.key_val_sep2).c_str()
|
|
487
|
+
);
|
|
488
|
+
return return_error(builder, start_pos, false);
|
|
489
|
+
}
|
|
490
|
+
if (tc->groups[0].end - tc->groups[0].begin != form.key_val_sep2->size()) {
|
|
491
|
+
gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;});
|
|
492
|
+
throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(*form.key_val_sep2));
|
|
493
|
+
}
|
|
494
|
+
} else {
|
|
495
|
+
gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;});
|
|
496
|
+
throw common_chat_msg_partial_exception("Expected " + gbnf_format_literal(*form.key_val_sep2) + " after " + gbnf_format_literal(form.key_val_sep));
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
auto val_start = builder.pos();
|
|
500
|
+
|
|
501
|
+
// Test if arg_val is a partial JSON
|
|
502
|
+
std::optional<common_json> value_json = std::nullopt;
|
|
503
|
+
if (!form.raw_argval || !*form.raw_argval) {
|
|
504
|
+
try { value_json = builder.try_consume_json(); }
|
|
505
|
+
catch (const std::runtime_error&) { builder.move_to(val_start); }
|
|
506
|
+
// TODO: Delete this when json_partial adds top-level support for null/true/false
|
|
507
|
+
if (builder.pos() == val_start) {
|
|
508
|
+
const static std::regex number_regex(R"([0-9-][0-9]*(\.\d*)?([eE][+-]?\d*)?)");
|
|
509
|
+
builder.consume_spaces();
|
|
510
|
+
std::string_view sv = utf8_truncate_safe_view(builder.input());
|
|
511
|
+
sv.remove_prefix(builder.pos());
|
|
512
|
+
std::string rest = "a";
|
|
513
|
+
if (sv.size() < 6) rest = sv;
|
|
514
|
+
if (string_starts_with("null", rest) || string_starts_with("true", rest) || string_starts_with("false", rest) || std::regex_match(sv.begin(), sv.end(), number_regex)) {
|
|
515
|
+
value_json = {123, {"123", "123"}};
|
|
516
|
+
builder.consume_rest();
|
|
517
|
+
} else {
|
|
518
|
+
builder.move_to(val_start);
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
// If it is a JSON and followed by </arg_value>, parse as json
|
|
524
|
+
// cannot support streaming because it may be a plain text starting with JSON
|
|
525
|
+
if (value_json) {
|
|
526
|
+
auto json_end = builder.pos();
|
|
527
|
+
builder.consume_spaces();
|
|
528
|
+
if (builder.pos() == builder.input().size()) {
|
|
529
|
+
if (form.raw_argval && !*form.raw_argval && (value_json->json.is_string() || value_json->json.is_object() || value_json->json.is_array())) {
|
|
530
|
+
arguments[key] = value_json->json;
|
|
531
|
+
auto json_str = arguments.dump();
|
|
532
|
+
if (!value_json->healing_marker.json_dump_marker.empty()) {
|
|
533
|
+
GGML_ASSERT(std::string::npos != json_str.rfind(value_json->healing_marker.json_dump_marker));
|
|
534
|
+
json_str.resize(json_str.rfind(value_json->healing_marker.json_dump_marker));
|
|
535
|
+
} else {
|
|
536
|
+
GGML_ASSERT(json_str.back() == '}');
|
|
537
|
+
json_str.resize(json_str.size() - 1);
|
|
538
|
+
}
|
|
539
|
+
builder.add_tool_call(function_name, "", json_str);
|
|
540
|
+
} else {
|
|
541
|
+
gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;});
|
|
542
|
+
}
|
|
543
|
+
LOG_DBG("Possible JSON arg_value: %s\n", value_json->json.dump().c_str());
|
|
544
|
+
throw common_chat_msg_partial_exception("JSON arg_value detected. Waiting for more tokens for validations.");
|
|
545
|
+
}
|
|
546
|
+
builder.move_to(json_end);
|
|
547
|
+
auto [val_end_size, tc] = try_find_val_end();
|
|
548
|
+
if (tc && all_space(tc->prelude) && value_json->healing_marker.marker.empty()) {
|
|
549
|
+
if (tc->groups[0].end - tc->groups[0].begin != val_end_size) {
|
|
550
|
+
gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;});
|
|
551
|
+
LOG_DBG("Possible terminated JSON arg_value: %s\n", value_json->json.dump().c_str());
|
|
552
|
+
throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.val_end) + (form.last_val_end ? gbnf_format_literal(*form.last_val_end) : ""));
|
|
553
|
+
} else arguments[key] = value_json->json;
|
|
554
|
+
} else builder.move_to(val_start);
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
// If not, parse as plain text
|
|
558
|
+
if (val_start == builder.pos()) {
|
|
559
|
+
if (auto [val_end_size, value_plain] = try_find_val_end(); value_plain) {
|
|
560
|
+
auto &value_str = value_plain->prelude;
|
|
561
|
+
if (form.trim_raw_argval) value_str = string_strip(value_str);
|
|
562
|
+
if (value_plain->groups[0].end - value_plain->groups[0].begin != val_end_size) {
|
|
563
|
+
gen_partial_args([&](auto &, auto &needle) {arguments[key] = value_str + needle;});
|
|
564
|
+
throw common_chat_msg_partial_exception(
|
|
565
|
+
"Expected " + gbnf_format_literal(form.val_end) +
|
|
566
|
+
" after " + gbnf_format_literal(form.key_val_sep) +
|
|
567
|
+
(form.key_val_sep2 ? " " + gbnf_format_literal(*form.key_val_sep2) : "")
|
|
568
|
+
);
|
|
569
|
+
}
|
|
570
|
+
arguments[key] = value_str;
|
|
571
|
+
} else {
|
|
572
|
+
if (form.trim_raw_argval) {
|
|
573
|
+
gen_partial_args([&](auto &rest, auto &needle) {arguments[key] = string_strip(rest) + needle;});
|
|
574
|
+
} else {
|
|
575
|
+
gen_partial_args([&](auto &rest, auto &needle) {arguments[key] = rest + needle;});
|
|
576
|
+
}
|
|
577
|
+
throw common_chat_msg_partial_exception(
|
|
578
|
+
"Expected " + gbnf_format_literal(form.val_end) +
|
|
579
|
+
" after " + gbnf_format_literal(form.key_val_sep) +
|
|
580
|
+
(form.key_val_sep2 ? " " + gbnf_format_literal(*form.key_val_sep2) : "")
|
|
581
|
+
);
|
|
582
|
+
}
|
|
583
|
+
}
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
// Consume closing tag
|
|
587
|
+
if (auto [tool_end_size, tc] = try_find_tool_end(); tc) {
|
|
588
|
+
if (!all_space(tc->prelude)) {
|
|
589
|
+
LOG_DBG("Failed to parse XML-Style tool call: Expected %s, but found %s\n",
|
|
590
|
+
gbnf_format_literal(form.tool_end).c_str(),
|
|
591
|
+
gbnf_format_literal(tc->prelude).c_str()
|
|
592
|
+
);
|
|
593
|
+
return return_error(builder, start_pos, recovery);
|
|
594
|
+
}
|
|
595
|
+
if (tc->groups[0].end - tc->groups[0].begin == tool_end_size) {
|
|
596
|
+
// Add the parsed tool call
|
|
597
|
+
if (!builder.add_tool_call(function_name, "", arguments.dump())) {
|
|
598
|
+
throw common_chat_msg_partial_exception("Failed to add XML-Style tool call");
|
|
599
|
+
}
|
|
600
|
+
recovery = false;
|
|
601
|
+
continue;
|
|
602
|
+
}
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
auto tool_call_arg = arguments.dump();
|
|
606
|
+
if (tool_call_arg.size() != 0 && tool_call_arg[tool_call_arg.size() - 1] == '}') {
|
|
607
|
+
tool_call_arg.resize(tool_call_arg.size() - 1);
|
|
608
|
+
}
|
|
609
|
+
builder.add_tool_call(function_name, "", tool_call_arg);
|
|
610
|
+
throw common_chat_msg_partial_exception("Expected " + gbnf_format_literal(form.tool_end) + " after " + gbnf_format_literal(form.val_end));
|
|
611
|
+
}
|
|
612
|
+
if (auto tc = builder.try_find_literal(form.scope_end)) {
|
|
613
|
+
if (!all_space(tc->prelude)) {
|
|
614
|
+
LOG_DBG("Failed to parse XML-Style tool call: Expected %s, but found %s\n",
|
|
615
|
+
gbnf_format_literal(form.scope_end).c_str(),
|
|
616
|
+
gbnf_format_literal(tc->prelude).c_str()
|
|
617
|
+
);
|
|
618
|
+
return return_error(builder, start_pos, recovery);
|
|
619
|
+
}
|
|
620
|
+
} else {
|
|
621
|
+
if (all_space(form.scope_end)) return true;
|
|
622
|
+
builder.consume_spaces();
|
|
623
|
+
if (builder.pos() == builder.input().size())
|
|
624
|
+
throw common_chat_msg_partial_exception("incomplete tool calls");
|
|
625
|
+
LOG_DBG("Failed to parse XML-Style tool call: Expected %s, but found %s\n",
|
|
626
|
+
gbnf_format_literal(form.scope_end).c_str(),
|
|
627
|
+
gbnf_format_literal(builder.consume_rest()).c_str()
|
|
628
|
+
);
|
|
629
|
+
return return_error(builder, start_pos, recovery);
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
return true;
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
/**
|
|
636
|
+
* Parse XML-Style tool call for given xml_tool_call_format. Return false for invalid syntax and get the position untouched.
|
|
637
|
+
* May cause std::runtime_error if there is invalid syntax because partial valid tool call is already sent out to client.
|
|
638
|
+
* form.scope_start, form.tool_sep and form.scope_end can be empty.
|
|
639
|
+
*/
|
|
640
|
+
bool common_chat_msg_parser::try_consume_xml_tool_calls(const struct xml_tool_call_format & form) {
|
|
641
|
+
auto pos = pos_;
|
|
642
|
+
auto tsize = result_.tool_calls.size();
|
|
643
|
+
try { return parse_xml_tool_calls(*this, form); }
|
|
644
|
+
catch (const xml_toolcall_syntax_exception&) {}
|
|
645
|
+
move_to(pos);
|
|
646
|
+
result_.tool_calls.resize(tsize);
|
|
647
|
+
return false;
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
/**
|
|
651
|
+
* Parse content uses reasoning and XML-Style tool call
|
|
652
|
+
* TODO: Note that form.allow_toolcall_in_think is not tested yet. If anyone confirms it works, this comment can be removed.
|
|
653
|
+
*/
|
|
654
|
+
inline void parse_msg_with_xml_tool_calls(common_chat_msg_parser & builder, const struct xml_tool_call_format & form, const std::string & start_think = "<think>", const std::string & end_think = "</think>") {
|
|
655
|
+
constexpr auto rstrip = [](std::string &s) {
|
|
656
|
+
s.resize(std::distance(s.begin(), std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) { return !std::isspace(ch); }).base()));
|
|
657
|
+
};
|
|
658
|
+
// Erase substring from l to r, along with additional spaces nearby
|
|
659
|
+
constexpr auto erase_spaces = [](auto &str, size_t l, size_t r) {
|
|
660
|
+
while (/* l > -1 && */ --l < str.size() && std::isspace(static_cast<unsigned char>(str[l])));
|
|
661
|
+
++l;
|
|
662
|
+
while (++r < str.size() && std::isspace(static_cast<unsigned char>(str[r])));
|
|
663
|
+
if (l < r) str[l] = '\n';
|
|
664
|
+
if (l + 1 < r) str[l + 1] = '\n';
|
|
665
|
+
if (l != 0) l += 2;
|
|
666
|
+
str.erase(l, r - l);
|
|
667
|
+
return l;
|
|
668
|
+
};
|
|
669
|
+
constexpr auto trim_suffix = [](std::string &content, std::initializer_list<std::string_view> list) {
|
|
670
|
+
auto best_match = content.size();
|
|
671
|
+
for (auto pattern: list) {
|
|
672
|
+
if (pattern.size() == 0) continue;
|
|
673
|
+
for (auto match_idx = content.size() - std::min(pattern.size(), content.size()); content.size() > match_idx; match_idx++) {
|
|
674
|
+
auto match_len = content.size() - match_idx;
|
|
675
|
+
if (content.compare(match_idx, match_len, pattern.data(), match_len) == 0 && best_match > match_idx) {
|
|
676
|
+
best_match = match_idx;
|
|
677
|
+
}
|
|
678
|
+
}
|
|
679
|
+
}
|
|
680
|
+
if (content.size() > best_match) {
|
|
681
|
+
content.erase(best_match);
|
|
682
|
+
}
|
|
683
|
+
};
|
|
684
|
+
const auto trim_potential_partial_word = [&start_think, &end_think, &form, trim_suffix](std::string &content) {
|
|
685
|
+
return trim_suffix(content, {
|
|
686
|
+
start_think, end_think, form.scope_start, form.tool_start, form.tool_sep, form.key_start,
|
|
687
|
+
form.key_val_sep, form.key_val_sep2 ? form.key_val_sep2->c_str() : "",
|
|
688
|
+
form.val_end, form.last_val_end ? form.last_val_end->c_str() : "",
|
|
689
|
+
form.tool_end, form.last_tool_end ? form.last_tool_end->c_str() : "",
|
|
690
|
+
form.scope_end
|
|
691
|
+
});
|
|
692
|
+
};
|
|
693
|
+
|
|
694
|
+
|
|
695
|
+
// Trim leading spaces without affecting keyword matching
|
|
696
|
+
static const common_regex spaces_regex("\\s*");
|
|
697
|
+
{
|
|
698
|
+
auto tc = builder.consume_regex(spaces_regex);
|
|
699
|
+
auto spaces = builder.str(tc.groups[0]);
|
|
700
|
+
auto s1 = spaces.size();
|
|
701
|
+
trim_potential_partial_word(spaces);
|
|
702
|
+
auto s2 = spaces.size();
|
|
703
|
+
builder.move_to(builder.pos() - (s1 - s2));
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
// Parse content
|
|
707
|
+
bool reasoning_unclosed = builder.syntax().thinking_forced_open;
|
|
708
|
+
std::string unclosed_reasoning_content("");
|
|
709
|
+
for (;;) {
|
|
710
|
+
auto tc = try_find_2_literal_splited_by_spaces(builder, form.scope_start, form.tool_start);
|
|
711
|
+
std::string content;
|
|
712
|
+
std::string tool_call_start;
|
|
713
|
+
|
|
714
|
+
if (tc) {
|
|
715
|
+
content = std::move(tc->prelude);
|
|
716
|
+
tool_call_start = builder.str(tc->groups[0]);
|
|
717
|
+
LOG_DBG("Matched tool start: %s\n", gbnf_format_literal(tool_call_start).c_str());
|
|
718
|
+
} else {
|
|
719
|
+
content = builder.consume_rest();
|
|
720
|
+
utf8_truncate_safe_resize(content);
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
// Handle unclosed think block
|
|
724
|
+
if (reasoning_unclosed) {
|
|
725
|
+
if (auto pos = content.find(end_think); pos == std::string::npos && builder.pos() != builder.input().size()) {
|
|
726
|
+
unclosed_reasoning_content += content;
|
|
727
|
+
if (form.allow_toolcall_in_think) {
|
|
728
|
+
builder.move_to(tc->groups[0].begin);
|
|
729
|
+
if (!builder.try_consume_xml_tool_calls(form)) {
|
|
730
|
+
unclosed_reasoning_content += tool_call_start;
|
|
731
|
+
builder.move_to(tc->groups[0].end);
|
|
732
|
+
}
|
|
733
|
+
} else {
|
|
734
|
+
unclosed_reasoning_content += tool_call_start;
|
|
735
|
+
}
|
|
736
|
+
continue;
|
|
737
|
+
} else {
|
|
738
|
+
reasoning_unclosed = false;
|
|
739
|
+
std::string reasoning_content;
|
|
740
|
+
if (pos == std::string::npos) {
|
|
741
|
+
reasoning_content = std::move(content);
|
|
742
|
+
} else {
|
|
743
|
+
reasoning_content = content.substr(0, pos);
|
|
744
|
+
content.erase(0, pos + end_think.size());
|
|
745
|
+
}
|
|
746
|
+
if (builder.pos() == builder.input().size() && all_space(content)) {
|
|
747
|
+
rstrip(reasoning_content);
|
|
748
|
+
trim_potential_partial_word(reasoning_content);
|
|
749
|
+
rstrip(reasoning_content);
|
|
750
|
+
if (reasoning_content.empty()) {
|
|
751
|
+
rstrip(unclosed_reasoning_content);
|
|
752
|
+
trim_potential_partial_word(unclosed_reasoning_content);
|
|
753
|
+
rstrip(unclosed_reasoning_content);
|
|
754
|
+
if (unclosed_reasoning_content.empty()) continue;
|
|
755
|
+
}
|
|
756
|
+
}
|
|
757
|
+
if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE || builder.syntax().reasoning_in_content) {
|
|
758
|
+
builder.add_content(start_think);
|
|
759
|
+
builder.add_content(unclosed_reasoning_content);
|
|
760
|
+
builder.add_content(reasoning_content);
|
|
761
|
+
if (builder.pos() != builder.input().size() || !all_space(content))
|
|
762
|
+
builder.add_content(end_think);
|
|
763
|
+
} else {
|
|
764
|
+
builder.add_reasoning_content(unclosed_reasoning_content);
|
|
765
|
+
builder.add_reasoning_content(reasoning_content);
|
|
766
|
+
}
|
|
767
|
+
unclosed_reasoning_content.clear();
|
|
768
|
+
}
|
|
769
|
+
}
|
|
770
|
+
|
|
771
|
+
// Handle multiple think block
|
|
772
|
+
bool toolcall_in_think = false;
|
|
773
|
+
for (auto think_start = content.find(start_think); think_start != std::string::npos; think_start = content.find(start_think, think_start)) {
|
|
774
|
+
if (auto think_end = content.find(end_think, think_start + start_think.size()); think_end != std::string::npos) {
|
|
775
|
+
if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content) {
|
|
776
|
+
auto reasoning_content = content.substr(think_start + start_think.size(), think_end - think_start - start_think.size());
|
|
777
|
+
builder.add_reasoning_content(reasoning_content);
|
|
778
|
+
think_start = erase_spaces(content, think_start, think_end + end_think.size() - 1);
|
|
779
|
+
} else {
|
|
780
|
+
think_start = think_end + end_think.size() - 1;
|
|
781
|
+
}
|
|
782
|
+
} else {
|
|
783
|
+
// This <tool_call> start is in thinking block, skip this tool call
|
|
784
|
+
auto pos = think_start + start_think.size();
|
|
785
|
+
unclosed_reasoning_content = content.substr(pos) + tool_call_start;
|
|
786
|
+
reasoning_unclosed = true;
|
|
787
|
+
content.resize(think_start);
|
|
788
|
+
toolcall_in_think = true;
|
|
789
|
+
}
|
|
790
|
+
}
|
|
791
|
+
|
|
792
|
+
if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content) {
|
|
793
|
+
rstrip(content);
|
|
794
|
+
// Handle unclosed </think> token from content: delete all </think> token
|
|
795
|
+
if (auto pos = content.rfind(end_think); pos != std::string::npos) {
|
|
796
|
+
while (pos != std::string::npos) {
|
|
797
|
+
pos = erase_spaces(content, pos, pos + end_think.size() - 1);
|
|
798
|
+
pos = content.rfind(end_think, pos);
|
|
799
|
+
}
|
|
800
|
+
}
|
|
801
|
+
// Strip if needed
|
|
802
|
+
if (content.size() > 0 && std::isspace(static_cast<unsigned char>(content[0]))) {
|
|
803
|
+
content = string_strip(content);
|
|
804
|
+
}
|
|
805
|
+
}
|
|
806
|
+
|
|
807
|
+
// remove potential partial suffix
|
|
808
|
+
if (content.size() > 0 && builder.pos() == builder.input().size() && unclosed_reasoning_content.empty()) {
|
|
809
|
+
rstrip(content);
|
|
810
|
+
trim_potential_partial_word(content);
|
|
811
|
+
rstrip(content);
|
|
812
|
+
}
|
|
813
|
+
|
|
814
|
+
// Add content
|
|
815
|
+
if (content.size() != 0) {
|
|
816
|
+
// If there are multiple content blocks
|
|
817
|
+
if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content && builder.result().content.size() != 0) {
|
|
818
|
+
builder.add_content("\n\n");
|
|
819
|
+
}
|
|
820
|
+
builder.add_content(content);
|
|
821
|
+
}
|
|
822
|
+
|
|
823
|
+
// This <tool_call> start is in thinking block, skip this tool call
|
|
824
|
+
if (toolcall_in_think && !form.allow_toolcall_in_think) {
|
|
825
|
+
continue;
|
|
826
|
+
}
|
|
827
|
+
|
|
828
|
+
// There is no tool call and all content is parsed
|
|
829
|
+
if (!tc) {
|
|
830
|
+
GGML_ASSERT(builder.pos() == builder.input().size());
|
|
831
|
+
GGML_ASSERT(unclosed_reasoning_content.empty());
|
|
832
|
+
GGML_ASSERT(!reasoning_unclosed);
|
|
833
|
+
break;
|
|
834
|
+
}
|
|
835
|
+
|
|
836
|
+
builder.move_to(tc->groups[0].begin);
|
|
837
|
+
if (builder.try_consume_xml_tool_calls(form)) {
|
|
838
|
+
auto end_of_tool = builder.pos();
|
|
839
|
+
builder.consume_spaces();
|
|
840
|
+
if (builder.pos() != builder.input().size()) {
|
|
841
|
+
builder.move_to(end_of_tool);
|
|
842
|
+
if (!builder.result().content.empty()) {
|
|
843
|
+
builder.add_content("\n\n");
|
|
844
|
+
}
|
|
845
|
+
}
|
|
846
|
+
} else {
|
|
847
|
+
static const common_regex next_char_regex(".");
|
|
848
|
+
auto c = builder.str(builder.consume_regex(next_char_regex).groups[0]);
|
|
849
|
+
rstrip(c);
|
|
850
|
+
builder.add_content(c);
|
|
851
|
+
}
|
|
852
|
+
}
|
|
853
|
+
}
|
|
854
|
+
|
|
855
|
+
/**
|
|
856
|
+
* Parse content uses reasoning and XML-Style tool call
|
|
857
|
+
* TODO: Note that form.allow_toolcall_in_think is not tested yet. If anyone confirms it works, this comment can be removed.
|
|
858
|
+
*/
|
|
859
|
+
void common_chat_msg_parser::consume_reasoning_with_xml_tool_calls(const struct xml_tool_call_format & form, const std::string & start_think, const std::string & end_think) {
|
|
860
|
+
parse_msg_with_xml_tool_calls(*this, form, start_think, end_think);
|
|
861
|
+
}
|