@fugood/llama.node 0.3.11 → 0.3.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +1 -0
- package/lib/index.js +26 -20
- package/lib/index.ts +32 -28
- package/package.json +1 -1
- package/src/LlamaCompletionWorker.cpp +14 -0
- package/src/LlamaContext.cpp +13 -4
- package/src/llama.cpp/.github/workflows/build.yml +35 -3
- package/src/llama.cpp/.github/workflows/docker.yml +2 -0
- package/src/llama.cpp/.github/workflows/labeler.yml +1 -1
- package/src/llama.cpp/common/CMakeLists.txt +20 -3
- package/src/llama.cpp/common/arg.cpp +180 -3
- package/src/llama.cpp/common/chat-template.hpp +21 -7
- package/src/llama.cpp/common/chat.cpp +220 -101
- package/src/llama.cpp/common/chat.hpp +3 -0
- package/src/llama.cpp/common/common.h +15 -7
- package/src/llama.cpp/common/llguidance.cpp +3 -3
- package/src/llama.cpp/common/log.cpp +1 -0
- package/src/llama.cpp/common/log.h +2 -1
- package/src/llama.cpp/common/minja.hpp +24 -9
- package/src/llama.cpp/common/sampling.cpp +52 -46
- package/src/llama.cpp/common/speculative.h +1 -1
- package/src/llama.cpp/docs/build.md +2 -2
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +2 -1
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +6 -5
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +1 -0
- package/src/llama.cpp/examples/run/run.cpp +5 -12
- package/src/llama.cpp/examples/server/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/server/httplib.h +381 -292
- package/src/llama.cpp/examples/server/server.cpp +58 -47
- package/src/llama.cpp/examples/server/utils.hpp +7 -5
- package/src/llama.cpp/ggml/include/ggml-cpu.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-metal.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-vulkan.h +0 -2
- package/src/llama.cpp/ggml/include/ggml.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-common.h +0 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +6 -12
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +852 -268
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +200 -107
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +2 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +9 -8
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +2 -2
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +26 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +6 -7
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +812 -569
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +25 -1
- package/src/llama.cpp/ggml/src/ggml.c +1 -1
- package/src/llama.cpp/include/llama.h +14 -10
- package/src/llama.cpp/src/llama-grammar.cpp +1 -1
- package/src/llama.cpp/src/llama-grammar.h +1 -1
- package/src/llama.cpp/src/llama-impl.h +6 -6
- package/src/llama.cpp/src/llama-kv-cache.h +1 -1
- package/src/llama.cpp/src/llama-mmap.h +1 -0
- package/src/llama.cpp/src/llama-model.cpp +1 -1
- package/src/llama.cpp/src/llama-sampling.cpp +131 -57
- package/src/llama.cpp/src/llama.cpp +7 -5
- package/src/llama.cpp/src/unicode.cpp +9 -2
- package/src/llama.cpp/tests/test-backend-ops.cpp +5 -5
- package/src/llama.cpp/tests/test-chat.cpp +237 -69
- package/src/llama.cpp/tests/test-gguf.cpp +4 -4
- package/src/llama.cpp/tests/test-sampling.cpp +15 -0
|
@@ -12,11 +12,13 @@ std::string common_chat_format_name(common_chat_format format) {
|
|
|
12
12
|
case COMMON_CHAT_FORMAT_LLAMA_3_X: return "Llama 3.x";
|
|
13
13
|
case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS: return "Llama 3.x with builtin tools";
|
|
14
14
|
case COMMON_CHAT_FORMAT_DEEPSEEK_R1: return "DeepSeek R1";
|
|
15
|
+
case COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING: return "DeepSeek R1 (extract reasoning)";
|
|
15
16
|
case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: return "FireFunction v2";
|
|
16
17
|
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return "Functionary v3.2";
|
|
17
18
|
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return "Functionary v3.1 Llama 3.1";
|
|
18
19
|
case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro";
|
|
19
20
|
case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B";
|
|
21
|
+
case COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING: return "Command R7B (extract reasoning)";
|
|
20
22
|
default:
|
|
21
23
|
throw std::runtime_error("Unknown chat format");
|
|
22
24
|
}
|
|
@@ -105,7 +107,6 @@ static common_chat_msg parse_json_tool_calls(
|
|
|
105
107
|
std::sregex_iterator rend;
|
|
106
108
|
std::sregex_iterator rit(it, end, function_regex);
|
|
107
109
|
if (rit == rend) {
|
|
108
|
-
fprintf(stderr, "No more tool calls found\n");
|
|
109
110
|
result.content += std::string(it, end);
|
|
110
111
|
break;
|
|
111
112
|
}
|
|
@@ -115,14 +116,21 @@ static common_chat_msg parse_json_tool_calls(
|
|
|
115
116
|
|
|
116
117
|
json arguments;
|
|
117
118
|
if (!parse_json(it, end, arguments)) {
|
|
118
|
-
throw std::runtime_error("Failed to parse json tool call arguments");
|
|
119
|
+
throw std::runtime_error("Failed to parse json tool call arguments: " + input);
|
|
119
120
|
}
|
|
120
121
|
if (!std::regex_search(it, end, match, close_regex)) {
|
|
121
|
-
throw std::runtime_error("Malformed input, missing closing pattern");
|
|
122
|
+
throw std::runtime_error("Malformed input, missing closing pattern: " + input);
|
|
122
123
|
}
|
|
123
124
|
it = match.suffix().first;
|
|
124
125
|
result.tool_calls.push_back({name, arguments.is_string() ? arguments.get<std::string>() : arguments.dump(), /* id= */ ""});
|
|
125
126
|
}
|
|
127
|
+
|
|
128
|
+
if (!result.tool_calls.empty()) {
|
|
129
|
+
if (!string_strip(result.content).empty()) {
|
|
130
|
+
LOG_WRN("Content found with tool calls: %s\n", result.content.c_str());
|
|
131
|
+
}
|
|
132
|
+
result.content = "";
|
|
133
|
+
}
|
|
126
134
|
return result;
|
|
127
135
|
}
|
|
128
136
|
|
|
@@ -134,11 +142,11 @@ static common_chat_msg parse_prefixed_json_tool_call_array(const std::string& in
|
|
|
134
142
|
result.role = "assistant";
|
|
135
143
|
const auto process_tool_calls = [&](const json & tool_calls) {
|
|
136
144
|
for (const auto & tool_call : tool_calls) {
|
|
137
|
-
const auto & arguments = tool_call
|
|
145
|
+
const auto & arguments = tool_call.at("arguments");
|
|
138
146
|
result.tool_calls.push_back({
|
|
139
|
-
tool_call
|
|
147
|
+
tool_call.at("name"),
|
|
140
148
|
arguments.is_string() ? arguments.get<std::string>() : arguments.dump(),
|
|
141
|
-
tool_call.contains("id") ? tool_call
|
|
149
|
+
tool_call.contains("id") ? tool_call.at("id") : "",
|
|
142
150
|
});
|
|
143
151
|
}
|
|
144
152
|
};
|
|
@@ -155,7 +163,7 @@ static common_chat_msg parse_prefixed_json_tool_call_array(const std::string& in
|
|
|
155
163
|
|
|
156
164
|
static void foreach_function(const json & tools, const std::function<void(const json &)> & fn) {
|
|
157
165
|
for (const auto & tool : tools) {
|
|
158
|
-
if (!tool.contains("type") || tool
|
|
166
|
+
if (!tool.contains("type") || tool.at("type") != "function" || !tool.contains("function")) {
|
|
159
167
|
LOG_INF("Skipping tool without function: %s", tool.dump(2).c_str());
|
|
160
168
|
continue;
|
|
161
169
|
}
|
|
@@ -190,27 +198,27 @@ static common_chat_params common_chat_params_init_generic(const common_chat_temp
|
|
|
190
198
|
|
|
191
199
|
auto tool_call_schemas = json::array();
|
|
192
200
|
foreach_function(inputs.tools, [&](const json & tool) {
|
|
193
|
-
const auto & function = tool
|
|
201
|
+
const auto & function = tool.at("function");
|
|
194
202
|
auto tool_schema = json {
|
|
195
203
|
{"type", "object"},
|
|
196
204
|
{"properties", {
|
|
197
205
|
{"name", {
|
|
198
206
|
{"type", "string"},
|
|
199
|
-
{"const", function
|
|
207
|
+
{"const", function.at("name")},
|
|
200
208
|
}},
|
|
201
|
-
{"arguments", function
|
|
209
|
+
{"arguments", function.at("parameters")},
|
|
202
210
|
}},
|
|
203
211
|
{"required", json::array({"name", "arguments"})},
|
|
204
212
|
};
|
|
205
213
|
if (function.contains("description")) {
|
|
206
|
-
tool_schema["description"] = function
|
|
214
|
+
tool_schema["description"] = function.at("description");
|
|
207
215
|
}
|
|
208
216
|
if (inputs.parallel_tool_calls) {
|
|
209
|
-
tool_schema
|
|
217
|
+
tool_schema.at("properties")["id"] = {
|
|
210
218
|
{"type", "string"},
|
|
211
219
|
{"minLength", 4},
|
|
212
220
|
};
|
|
213
|
-
tool_schema
|
|
221
|
+
tool_schema.at("required").push_back("id");
|
|
214
222
|
}
|
|
215
223
|
tool_call_schemas.emplace_back(tool_schema);
|
|
216
224
|
});
|
|
@@ -275,21 +283,21 @@ static common_chat_msg common_chat_parse_generic(const std::string & input) {
|
|
|
275
283
|
common_chat_msg result;
|
|
276
284
|
result.role = "assistant";
|
|
277
285
|
if (data.contains("tool_calls")) {
|
|
278
|
-
for (const auto & tool_call : data
|
|
286
|
+
for (const auto & tool_call : data.at("tool_calls")) {
|
|
279
287
|
result.tool_calls.push_back({
|
|
280
|
-
tool_call
|
|
281
|
-
tool_call
|
|
282
|
-
tool_call.contains("id") ? tool_call
|
|
288
|
+
tool_call.at("name"),
|
|
289
|
+
tool_call.at("arguments").dump(),
|
|
290
|
+
tool_call.contains("id") ? tool_call.at("id") : "",
|
|
283
291
|
});
|
|
284
292
|
}
|
|
285
293
|
} else if (data.contains("tool_call")) {
|
|
286
294
|
result.tool_calls.push_back({
|
|
287
|
-
data
|
|
288
|
-
data
|
|
295
|
+
data.at("tool_call").at("name"),
|
|
296
|
+
data.at("tool_call").at("arguments").dump(),
|
|
289
297
|
/* id= */ "",
|
|
290
298
|
});
|
|
291
299
|
} else if (data.contains("response")) {
|
|
292
|
-
const auto & response = data
|
|
300
|
+
const auto & response = data.at("response");
|
|
293
301
|
result.content = response.is_string() ? response.get<std::string>() : response.dump(2);
|
|
294
302
|
}
|
|
295
303
|
return result;
|
|
@@ -301,7 +309,7 @@ static common_chat_params common_chat_params_init_mistral_nemo(const common_chat
|
|
|
301
309
|
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
|
302
310
|
auto schemas = json::array();
|
|
303
311
|
foreach_function(inputs.tools, [&](const json & tool) {
|
|
304
|
-
const auto & function = tool
|
|
312
|
+
const auto & function = tool.at("function");
|
|
305
313
|
schemas.push_back({
|
|
306
314
|
{"type", "object"},
|
|
307
315
|
{"properties", {
|
|
@@ -309,9 +317,9 @@ static common_chat_params common_chat_params_init_mistral_nemo(const common_chat
|
|
|
309
317
|
// It's hard to constrain that for now (while reusing the JSON schema conversion), so we're just expecting a plain object.
|
|
310
318
|
{"name", {
|
|
311
319
|
{"type", "string"},
|
|
312
|
-
{"const", function
|
|
320
|
+
{"const", function.at("name")},
|
|
313
321
|
}},
|
|
314
|
-
{"arguments", function
|
|
322
|
+
{"arguments", function.at("parameters")},
|
|
315
323
|
{"id", {
|
|
316
324
|
{"type", "string"},
|
|
317
325
|
// Nemo's template expects a 9-character alphanumeric ID.
|
|
@@ -346,7 +354,7 @@ static common_chat_params common_chat_params_init_command_r7b(const common_chat_
|
|
|
346
354
|
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
|
347
355
|
auto schemas = json::array();
|
|
348
356
|
foreach_function(inputs.tools, [&](const json & tool) {
|
|
349
|
-
const auto & function = tool
|
|
357
|
+
const auto & function = tool.at("function");
|
|
350
358
|
schemas.push_back({
|
|
351
359
|
{"type", "object"},
|
|
352
360
|
{"properties", {
|
|
@@ -357,9 +365,9 @@ static common_chat_params common_chat_params_init_command_r7b(const common_chat_
|
|
|
357
365
|
}},
|
|
358
366
|
{"tool_name", {
|
|
359
367
|
{"type", "string"},
|
|
360
|
-
{"const", function
|
|
368
|
+
{"const", function.at("name")},
|
|
361
369
|
}},
|
|
362
|
-
{"parameters", function
|
|
370
|
+
{"parameters", function.at("parameters")},
|
|
363
371
|
}},
|
|
364
372
|
{"required", json::array({"tool_call_id", "tool_name", "parameters"})},
|
|
365
373
|
});
|
|
@@ -382,39 +390,65 @@ static common_chat_params common_chat_params_init_command_r7b(const common_chat_
|
|
|
382
390
|
"<|END_THINKING|>",
|
|
383
391
|
"<|END_ACTION|>",
|
|
384
392
|
};
|
|
385
|
-
|
|
386
|
-
|
|
393
|
+
auto adjusted_messages = json::array();
|
|
394
|
+
for (const auto & msg : inputs.messages) {
|
|
395
|
+
auto has_reasoning_content = msg.contains("reasoning_content") && msg.at("reasoning_content").is_string();
|
|
396
|
+
auto has_tool_calls = msg.contains("tool_calls") && msg.at("tool_calls").is_array();
|
|
397
|
+
if (has_reasoning_content && has_tool_calls) {
|
|
398
|
+
auto adjusted_message = msg;
|
|
399
|
+
adjusted_message["tool_plan"] = msg.at("reasoning_content");
|
|
400
|
+
adjusted_message.erase("reasoning_content");
|
|
401
|
+
adjusted_messages.push_back(adjusted_message);
|
|
402
|
+
} else {
|
|
403
|
+
adjusted_messages.push_back(msg);
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
data.prompt = apply(tmpl, adjusted_messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt, {});
|
|
407
|
+
data.format = inputs.extract_reasoning ? COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING : COMMON_CHAT_FORMAT_COMMAND_R7B;
|
|
387
408
|
return data;
|
|
388
409
|
}
|
|
389
|
-
static common_chat_msg common_chat_parse_command_r7b(const std::string & input) {
|
|
390
|
-
static std::regex
|
|
391
|
-
static std::regex
|
|
410
|
+
static common_chat_msg common_chat_parse_command_r7b(const std::string & input, bool extract_reasoning) {
|
|
411
|
+
static std::regex thought_regex("(<\\|START_THINKING\\|>([\\s\\S\\n\\r]*?)<\\|END_THINKING\\|>)([\\s\\S\\n\\r]*)");
|
|
412
|
+
static std::regex action_regex("<\\|START_ACTION\\|>([\\s\\S\\n\\r]*?)<\\|END_ACTION\\|>");
|
|
413
|
+
static std::regex response_regex("(?:<\\|START_RESPONSE\\|>)?([\\s\\S\\n\\r]*?)<\\|END_RESPONSE\\|>");
|
|
414
|
+
|
|
392
415
|
std::smatch match;
|
|
393
416
|
|
|
394
417
|
common_chat_msg result;
|
|
395
418
|
result.role = "assistant";
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
419
|
+
|
|
420
|
+
std::string rest = input;
|
|
421
|
+
|
|
422
|
+
if (std::regex_match(rest, match, thought_regex)) {
|
|
423
|
+
if (extract_reasoning) {
|
|
424
|
+
result.reasoning_content = match[2].str();
|
|
425
|
+
} else if (!match[2].str().empty()) {
|
|
426
|
+
// Let the unparsed thinking tags through in content only if their insides aren't empty.
|
|
427
|
+
result.content = match[1].str();
|
|
428
|
+
}
|
|
429
|
+
rest = match[3].str();
|
|
430
|
+
}
|
|
431
|
+
if (std::regex_match(rest, match, action_regex)) {
|
|
432
|
+
auto actions_str = match[1].str();
|
|
401
433
|
auto actions = json::parse(actions_str);
|
|
402
434
|
for (const auto & action : actions) {
|
|
403
435
|
result.tool_calls.push_back({
|
|
404
|
-
/* .name = */ action
|
|
405
|
-
/* .arguments = */ action
|
|
406
|
-
/* .id = */ action
|
|
436
|
+
/* .name = */ action.at("tool_name"),
|
|
437
|
+
/* .arguments = */ action.at("parameters").dump(),
|
|
438
|
+
/* .id = */ action.at("tool_call_id"),
|
|
407
439
|
});
|
|
408
440
|
}
|
|
441
|
+
} else if (std::regex_match(rest, match, response_regex)) {
|
|
442
|
+
auto response = match[1].str();
|
|
443
|
+
result.content += response;
|
|
409
444
|
} else {
|
|
410
|
-
|
|
411
|
-
result.content = input;
|
|
445
|
+
result.content += rest;
|
|
412
446
|
}
|
|
413
447
|
return result;
|
|
414
448
|
}
|
|
415
449
|
|
|
416
450
|
static void expect_tool_parameters(const std::string & name, const json & parameters, const std::vector<std::string> & expected_properties) {
|
|
417
|
-
if (!parameters.is_object() || !parameters.contains("type") || parameters
|
|
451
|
+
if (!parameters.is_object() || !parameters.contains("type") || parameters.at("type") != "object" || !parameters.contains("properties") || !parameters.contains("required")) {
|
|
418
452
|
throw std::runtime_error("Parameters of tool " + name + " must be an object w/ required properties");
|
|
419
453
|
}
|
|
420
454
|
const auto & parameters_properties = parameters.at("properties");
|
|
@@ -468,9 +502,9 @@ static common_chat_params common_chat_params_init_llama_3_1_tool_calls(const com
|
|
|
468
502
|
};
|
|
469
503
|
|
|
470
504
|
foreach_function(inputs.tools, [&](const json & tool) {
|
|
471
|
-
const auto & function = tool
|
|
472
|
-
std::string name = function
|
|
473
|
-
auto parameters = function
|
|
505
|
+
const auto & function = tool.at("function");
|
|
506
|
+
std::string name = function.at("name");
|
|
507
|
+
auto parameters = function.at("parameters");
|
|
474
508
|
builder.resolve_refs(parameters);
|
|
475
509
|
|
|
476
510
|
// https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/remote/tool_runtime
|
|
@@ -546,34 +580,90 @@ static common_chat_msg common_chat_parse_llama_3_1(const std::string & input, bo
|
|
|
546
580
|
|
|
547
581
|
static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
|
|
548
582
|
common_chat_params data;
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
const
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
583
|
+
if (inputs.tools.is_array() && !inputs.tools.empty()) {
|
|
584
|
+
data.grammar_lazy = inputs.tool_choice != "required" && inputs.json_schema.is_null();
|
|
585
|
+
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
|
586
|
+
std::vector<std::string> tool_rules;
|
|
587
|
+
foreach_function(inputs.tools, [&](const json & tool) {
|
|
588
|
+
const auto & function = tool.at("function");
|
|
589
|
+
std::string name = function.at("name");
|
|
590
|
+
auto parameters = function.at("parameters");
|
|
591
|
+
auto args_rule = builder.add_schema(name + "-args", parameters);
|
|
592
|
+
tool_rules.push_back(builder.add_rule(name + "-call",
|
|
593
|
+
"\"<|tool▁call▁begin|>function<|tool▁sep|>" + name + "\\n"
|
|
594
|
+
"```json\\n\" " + args_rule + " \"```<|tool▁call▁end|>\""));
|
|
595
|
+
});
|
|
596
|
+
// Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag,
|
|
597
|
+
// so we accept common variants (then it's all constrained)
|
|
598
|
+
builder.add_rule("root",
|
|
599
|
+
"( \"<|tool▁calls▁begin|>\" | \"<|tool_calls_begin|>\" | \"<|tool calls begin|>\" | \"<|tool\\\\_calls\\\\_begin|>\" ) "
|
|
600
|
+
"(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
|
|
601
|
+
"\"<|tool▁calls▁end|>\""
|
|
602
|
+
" space");
|
|
603
|
+
data.grammar_triggers.push_back({"<|tool▁calls▁begin|>", /* .at_start = */ false});
|
|
604
|
+
data.grammar_triggers.push_back({"<|tool_calls_begin|>", /* .at_start = */ false});
|
|
605
|
+
data.grammar_triggers.push_back({"<|tool calls begin|>", /* .at_start = */ false});
|
|
606
|
+
data.grammar_triggers.push_back({"<|tool\\_calls\\_begin|>", /* .at_start = */ false});
|
|
607
|
+
data.preserved_tokens = {
|
|
608
|
+
"<think>",
|
|
609
|
+
"</think>",
|
|
610
|
+
"<|tool▁sep|>",
|
|
611
|
+
"<|tool▁calls▁end|",
|
|
612
|
+
"<|tool▁call▁end|>",
|
|
613
|
+
};
|
|
614
|
+
}, grammar_options);
|
|
615
|
+
}
|
|
567
616
|
auto prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
|
617
|
+
|
|
618
|
+
// Hacks to fix the official (broken) prompt.
|
|
619
|
+
// It is advisable to use --chat-template-file models/templates/llama-cpp-deepseek-r1.jinja instead,
|
|
620
|
+
// until the official template is fixed.
|
|
621
|
+
if (tmpl.source().find("{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}") != std::string::npos) {
|
|
622
|
+
// Don't leave the chat dangling after tool results
|
|
623
|
+
if (string_ends_with(prompt, "<|tool▁outputs▁end|>")) {
|
|
624
|
+
prompt += "<|end▁of▁sentence|>";
|
|
625
|
+
if (inputs.add_generation_prompt) {
|
|
626
|
+
prompt += "<|Assistant|>";
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
// Fix up tool call delta example added by Minja
|
|
630
|
+
prompt = std::regex_replace(
|
|
631
|
+
prompt,
|
|
632
|
+
std::regex("(<|tool▁call▁end|>)[\\s\\r\\n]*(<|tool▁outputs▁begin|>|<|User|>)"),
|
|
633
|
+
"$1<|tool▁calls▁end|><|end▁of▁sentence|>$2");
|
|
634
|
+
}
|
|
568
635
|
data.prompt = prompt;
|
|
569
|
-
data.format = COMMON_CHAT_FORMAT_DEEPSEEK_R1;
|
|
636
|
+
data.format = inputs.extract_reasoning ? COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING : COMMON_CHAT_FORMAT_DEEPSEEK_R1;
|
|
570
637
|
return data;
|
|
571
638
|
}
|
|
572
|
-
static common_chat_msg common_chat_parse_deepseek_r1(const std::string & input) {
|
|
573
|
-
static std::regex trigger_regex("<|tool▁calls▁begin|>");
|
|
639
|
+
static common_chat_msg common_chat_parse_deepseek_r1(const std::string & input, bool extract_reasoning) {
|
|
574
640
|
static std::regex function_regex("<|tool▁call▁begin|>function<|tool▁sep|>([^\n]+)\n```json\n");
|
|
575
|
-
static std::regex close_regex("
|
|
576
|
-
|
|
641
|
+
static std::regex close_regex("```[\\s\\r\\n]*<|tool▁call▁end|>");
|
|
642
|
+
static std::regex reasoning_content_regex("((?:<think>)?([\\s\\S\\r\\n]*?)</think>)?([\\s\\S\\r\\n]*)");
|
|
643
|
+
static std::regex tool_calls_regex("[\\s\\r\\n]*(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>)([\\s\\S\\r\\n]*?)<|tool▁calls▁end|>");
|
|
644
|
+
common_chat_msg msg;
|
|
645
|
+
msg.role = "assistant";
|
|
646
|
+
std::smatch match;
|
|
647
|
+
if (std::regex_match(input, match, reasoning_content_regex)) {
|
|
648
|
+
std::string rest;
|
|
649
|
+
if (extract_reasoning) {
|
|
650
|
+
msg.reasoning_content = string_strip(match[2].str());
|
|
651
|
+
} else {
|
|
652
|
+
msg.content = match[1].str();
|
|
653
|
+
}
|
|
654
|
+
rest = match[3].str();
|
|
655
|
+
|
|
656
|
+
if (std::regex_search(rest, match, tool_calls_regex)) {
|
|
657
|
+
auto tool_calls = match[1].str();
|
|
658
|
+
auto msg2 = parse_json_tool_calls(tool_calls, std::nullopt, function_regex, close_regex);
|
|
659
|
+
msg.tool_calls = std::move(msg2.tool_calls);
|
|
660
|
+
} else {
|
|
661
|
+
msg.content += std::string(rest.begin() + rest.find_first_not_of(" \r\n"), rest.end());
|
|
662
|
+
}
|
|
663
|
+
} else {
|
|
664
|
+
msg.content = input;
|
|
665
|
+
}
|
|
666
|
+
return msg;
|
|
577
667
|
}
|
|
578
668
|
|
|
579
669
|
static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
|
|
@@ -583,20 +673,20 @@ static common_chat_params common_chat_params_init_firefunction_v2(const common_c
|
|
|
583
673
|
{"datetime", "Jan 29 2025 13:00:00 GMT"},
|
|
584
674
|
{"functions", json(inputs.tools.empty() ? "" : inputs.tools.dump(2))},
|
|
585
675
|
});
|
|
586
|
-
if (
|
|
676
|
+
if (inputs.tools.is_array() && !inputs.tools.empty()) {
|
|
587
677
|
data.grammar_lazy = inputs.tool_choice != "required";
|
|
588
678
|
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
|
589
679
|
auto schemas = json::array();
|
|
590
680
|
foreach_function(inputs.tools, [&](const json & tool) {
|
|
591
|
-
const auto & function = tool
|
|
681
|
+
const auto & function = tool.at("function");
|
|
592
682
|
schemas.push_back({
|
|
593
683
|
{"type", "object"},
|
|
594
684
|
{"properties", {
|
|
595
685
|
{"name", {
|
|
596
686
|
{"type", "string"},
|
|
597
|
-
{"const", function
|
|
687
|
+
{"const", function.at("name")},
|
|
598
688
|
}},
|
|
599
|
-
{"arguments", function
|
|
689
|
+
{"arguments", function.at("parameters")},
|
|
600
690
|
}},
|
|
601
691
|
{"required", json::array({"name", "arguments", "id"})},
|
|
602
692
|
});
|
|
@@ -628,15 +718,15 @@ static common_chat_params common_chat_params_init_functionary_v3_2(const common_
|
|
|
628
718
|
common_chat_params data;
|
|
629
719
|
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
|
630
720
|
data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2;
|
|
631
|
-
if (
|
|
721
|
+
if (inputs.tools.is_array() && !inputs.tools.empty()) {
|
|
632
722
|
data.grammar_lazy = inputs.tool_choice != "required";
|
|
633
723
|
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
|
634
724
|
std::vector<std::string> first_tool_rules;
|
|
635
725
|
std::vector<std::string> subsequent_tool_rules;
|
|
636
726
|
foreach_function(inputs.tools, [&](const json & tool) {
|
|
637
|
-
const auto & function = tool
|
|
638
|
-
std::string name = function
|
|
639
|
-
auto parameters = function
|
|
727
|
+
const auto & function = tool.at("function");
|
|
728
|
+
std::string name = function.at("name");
|
|
729
|
+
auto parameters = function.at("parameters");
|
|
640
730
|
auto args_rule = builder.add_schema(name + "-args", parameters);
|
|
641
731
|
first_tool_rules.push_back(builder.add_rule(name + "-call", "\"" + name + "\\n\" " + args_rule));
|
|
642
732
|
subsequent_tool_rules.push_back(builder.add_rule(name + "-call2", "\">>>" + name + "\\n\" " + args_rule));
|
|
@@ -716,9 +806,9 @@ static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(con
|
|
|
716
806
|
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
|
717
807
|
std::vector<std::string> tool_rules;
|
|
718
808
|
foreach_function(inputs.tools, [&](const json & tool) {
|
|
719
|
-
const auto & function = tool
|
|
720
|
-
const auto & parameters = function
|
|
721
|
-
std::string name = function
|
|
809
|
+
const auto & function = tool.at("function");
|
|
810
|
+
const auto & parameters = function.at("parameters");
|
|
811
|
+
std::string name = function.at("name");
|
|
722
812
|
if (name == "python" || name == "ipython") {
|
|
723
813
|
if (!parameters.contains("type")) {
|
|
724
814
|
throw std::runtime_error("Missing type in python tool");
|
|
@@ -789,9 +879,9 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
|
|
|
789
879
|
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
|
790
880
|
std::vector<std::string> tool_rules;
|
|
791
881
|
foreach_function(inputs.tools, [&](const json & tool) {
|
|
792
|
-
const auto & function = tool
|
|
793
|
-
std::string name = function
|
|
794
|
-
auto parameters = function
|
|
882
|
+
const auto & function = tool.at("function");
|
|
883
|
+
std::string name = function.at("name");
|
|
884
|
+
auto parameters = function.at("parameters");
|
|
795
885
|
builder.resolve_refs(parameters);
|
|
796
886
|
tool_rules.push_back(builder.add_schema(name + "-call", {
|
|
797
887
|
{"type", "object"},
|
|
@@ -839,9 +929,9 @@ static common_chat_msg common_chat_parse_hermes_2_pro(const std::string & input)
|
|
|
839
929
|
if (!parse_json(it, end, call)) {
|
|
840
930
|
throw std::runtime_error("Failed to parse json tool call");
|
|
841
931
|
}
|
|
842
|
-
const auto & arguments = call
|
|
932
|
+
const auto & arguments = call.at("arguments");
|
|
843
933
|
result.tool_calls.push_back({
|
|
844
|
-
call
|
|
934
|
+
call.at("name"),
|
|
845
935
|
arguments.dump(),
|
|
846
936
|
// arguments.is_string() ? arguments.get<std::string>() : arguments.dump(),
|
|
847
937
|
/* id= */ "",
|
|
@@ -878,53 +968,78 @@ static common_chat_params common_chat_params_init_without_tools(const common_cha
|
|
|
878
968
|
}
|
|
879
969
|
data.grammar = json_schema_to_grammar(inputs.json_schema);
|
|
880
970
|
} else {
|
|
881
|
-
data.grammar = inputs.grammar
|
|
971
|
+
data.grammar = inputs.grammar;
|
|
882
972
|
}
|
|
883
973
|
return data;
|
|
884
974
|
}
|
|
885
975
|
|
|
886
976
|
common_chat_params common_chat_params_init(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
|
|
887
|
-
auto
|
|
888
|
-
|
|
977
|
+
const auto & src = tmpl.source();
|
|
978
|
+
const auto & caps = tmpl.original_caps();
|
|
889
979
|
|
|
890
|
-
if (
|
|
891
|
-
|
|
980
|
+
if (inputs.tools.is_array()) {
|
|
981
|
+
if (inputs.tool_choice != "none" && !inputs.grammar.empty()) {
|
|
982
|
+
throw std::runtime_error("Cannot specify grammar with tools");
|
|
983
|
+
}
|
|
984
|
+
if (caps.supports_tool_calls && !caps.supports_tools) {
|
|
985
|
+
LOG_WRN("Template supports tool calls but does not natively describe tools. The fallback behaviour used may produce bad results, inspect prompt w/ --verbose & consider overriding the template.\n");
|
|
986
|
+
}
|
|
892
987
|
}
|
|
893
988
|
|
|
894
|
-
|
|
989
|
+
// DeepSeek R1: use handler in all cases except json schema (thinking / tools).
|
|
990
|
+
if (src.find("<|tool▁calls▁begin|>") != std::string::npos && inputs.json_schema.is_null()) {
|
|
991
|
+
return common_chat_params_init_deepseek_r1(tmpl, inputs);
|
|
992
|
+
}
|
|
993
|
+
|
|
994
|
+
// Command R7B: : use handler in all cases except json schema (thinking / tools).
|
|
995
|
+
if (src.find("<|END_THINKING|><|START_ACTION|>") != std::string::npos && inputs.json_schema.is_null()) {
|
|
996
|
+
return common_chat_params_init_command_r7b(tmpl, inputs);
|
|
997
|
+
}
|
|
998
|
+
|
|
999
|
+
// Use generic handler when mixing tools + JSON schema.
|
|
1000
|
+
// TODO: support that mix in handlers below.
|
|
1001
|
+
if ((!inputs.tools.is_array() && inputs.json_schema.is_object())) {
|
|
1002
|
+
return common_chat_params_init_generic(tmpl, inputs);
|
|
1003
|
+
}
|
|
1004
|
+
|
|
1005
|
+
// Functionary prepends "all\n" to plain content outputs, so we use its handler in all cases.
|
|
895
1006
|
if (src.find(">>>all") != std::string::npos) {
|
|
896
|
-
// Functionary prepends "all\n" to plain content outputs, so we use the parser no matter when
|
|
897
1007
|
return common_chat_params_init_functionary_v3_2(tmpl, inputs);
|
|
898
1008
|
}
|
|
1009
|
+
|
|
1010
|
+
// Firefunction v2 requires datetime and functions in the context even w/o tools, so we also use its handler in all cases.
|
|
899
1011
|
if (src.find(" functools[") != std::string::npos) {
|
|
900
|
-
// Firefunction v2 requires datetime and functions in the context, even w/o tools.
|
|
901
1012
|
return common_chat_params_init_firefunction_v2(tmpl, inputs);
|
|
902
1013
|
}
|
|
903
1014
|
|
|
904
|
-
|
|
1015
|
+
// Plain handler (no tools)
|
|
1016
|
+
if (inputs.tools.is_null() || inputs.tool_choice == "none") {
|
|
905
1017
|
return common_chat_params_init_without_tools(tmpl, inputs);
|
|
906
1018
|
}
|
|
907
1019
|
|
|
1020
|
+
// Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
|
|
908
1021
|
if (src.find("<tool_call>") != std::string::npos) {
|
|
909
1022
|
return common_chat_params_init_hermes_2_pro(tmpl, inputs);
|
|
910
1023
|
}
|
|
1024
|
+
|
|
1025
|
+
// Functionary v3.1 (w/ tools)
|
|
911
1026
|
if (src.find("<|start_header_id|>") != std::string::npos
|
|
912
1027
|
&& src.find("<function=") != std::string::npos) {
|
|
913
1028
|
return common_chat_params_init_functionary_v3_1_llama_3_1(tmpl, inputs);
|
|
914
1029
|
}
|
|
1030
|
+
|
|
1031
|
+
// Llama 3.1, 3.2, 3.3 (w/ tools)
|
|
915
1032
|
if (src.find("<|start_header_id|>ipython<|end_header_id|>") != std::string::npos) {
|
|
916
1033
|
auto allow_python_tag_builtin_tools = src.find("<|python_tag|>") != std::string::npos;
|
|
917
1034
|
return common_chat_params_init_llama_3_1_tool_calls(tmpl, inputs, allow_python_tag_builtin_tools);
|
|
918
1035
|
}
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
}
|
|
1036
|
+
|
|
1037
|
+
// Mistral Nemo (w/ tools)
|
|
922
1038
|
if (src.find("[TOOL_CALLS]") != std::string::npos) {
|
|
923
1039
|
return common_chat_params_init_mistral_nemo(tmpl, inputs);
|
|
924
1040
|
}
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
}
|
|
1041
|
+
|
|
1042
|
+
// Generic fallback
|
|
928
1043
|
return common_chat_params_init_generic(tmpl, inputs);
|
|
929
1044
|
}
|
|
930
1045
|
|
|
@@ -949,7 +1064,9 @@ common_chat_msg common_chat_parse(const std::string & input, common_chat_format
|
|
|
949
1064
|
case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS:
|
|
950
1065
|
return common_chat_parse_llama_3_1(input, /* with_builtin_tools= */ true);
|
|
951
1066
|
case COMMON_CHAT_FORMAT_DEEPSEEK_R1:
|
|
952
|
-
return common_chat_parse_deepseek_r1(input);
|
|
1067
|
+
return common_chat_parse_deepseek_r1(input, /* extract_reasoning= */ false);
|
|
1068
|
+
case COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING:
|
|
1069
|
+
return common_chat_parse_deepseek_r1(input, /* extract_reasoning= */ true);
|
|
953
1070
|
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2:
|
|
954
1071
|
return common_chat_parse_functionary_v3_2(input);
|
|
955
1072
|
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1:
|
|
@@ -959,7 +1076,9 @@ common_chat_msg common_chat_parse(const std::string & input, common_chat_format
|
|
|
959
1076
|
case COMMON_CHAT_FORMAT_FIREFUNCTION_V2:
|
|
960
1077
|
return common_chat_parse_firefunction_v2(input);
|
|
961
1078
|
case COMMON_CHAT_FORMAT_COMMAND_R7B:
|
|
962
|
-
return common_chat_parse_command_r7b(input);
|
|
1079
|
+
return common_chat_parse_command_r7b(input, /* extract_reasoning= */ false);
|
|
1080
|
+
case COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING:
|
|
1081
|
+
return common_chat_parse_command_r7b(input, /* extract_reasoning= */ true);
|
|
963
1082
|
default:
|
|
964
1083
|
throw std::runtime_error("Unsupported format: " + common_chat_format_name(format));
|
|
965
1084
|
}
|
|
@@ -19,6 +19,7 @@ struct common_chat_inputs {
|
|
|
19
19
|
bool stream;
|
|
20
20
|
std::string grammar;
|
|
21
21
|
bool add_generation_prompt = true;
|
|
22
|
+
bool extract_reasoning = true;
|
|
22
23
|
};
|
|
23
24
|
|
|
24
25
|
enum common_chat_format {
|
|
@@ -28,11 +29,13 @@ enum common_chat_format {
|
|
|
28
29
|
COMMON_CHAT_FORMAT_LLAMA_3_X,
|
|
29
30
|
COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
|
|
30
31
|
COMMON_CHAT_FORMAT_DEEPSEEK_R1,
|
|
32
|
+
COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING,
|
|
31
33
|
COMMON_CHAT_FORMAT_FIREFUNCTION_V2,
|
|
32
34
|
COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2,
|
|
33
35
|
COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
|
|
34
36
|
COMMON_CHAT_FORMAT_HERMES_2_PRO,
|
|
35
37
|
COMMON_CHAT_FORMAT_COMMAND_R7B,
|
|
38
|
+
COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING,
|
|
36
39
|
|
|
37
40
|
COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
|
|
38
41
|
};
|