@fugood/llama.node 0.3.11 → 0.3.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +1 -0
- package/lib/index.js +26 -20
- package/lib/index.ts +32 -28
- package/package.json +1 -1
- package/src/LlamaCompletionWorker.cpp +14 -0
- package/src/LlamaContext.cpp +13 -4
- package/src/llama.cpp/.github/workflows/build.yml +35 -3
- package/src/llama.cpp/.github/workflows/docker.yml +2 -0
- package/src/llama.cpp/.github/workflows/labeler.yml +1 -1
- package/src/llama.cpp/common/CMakeLists.txt +20 -3
- package/src/llama.cpp/common/arg.cpp +180 -3
- package/src/llama.cpp/common/chat-template.hpp +21 -7
- package/src/llama.cpp/common/chat.cpp +220 -101
- package/src/llama.cpp/common/chat.hpp +3 -0
- package/src/llama.cpp/common/common.h +15 -7
- package/src/llama.cpp/common/llguidance.cpp +3 -3
- package/src/llama.cpp/common/log.cpp +1 -0
- package/src/llama.cpp/common/log.h +2 -1
- package/src/llama.cpp/common/minja.hpp +24 -9
- package/src/llama.cpp/common/sampling.cpp +52 -46
- package/src/llama.cpp/common/speculative.h +1 -1
- package/src/llama.cpp/docs/build.md +2 -2
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +2 -1
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +6 -5
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +1 -0
- package/src/llama.cpp/examples/run/run.cpp +5 -12
- package/src/llama.cpp/examples/server/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/server/httplib.h +381 -292
- package/src/llama.cpp/examples/server/server.cpp +58 -47
- package/src/llama.cpp/examples/server/utils.hpp +7 -5
- package/src/llama.cpp/ggml/include/ggml-cpu.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-metal.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-vulkan.h +0 -2
- package/src/llama.cpp/ggml/include/ggml.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-common.h +0 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +6 -12
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +852 -268
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +200 -107
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +2 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +9 -8
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +2 -2
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +26 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +6 -7
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +812 -569
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +25 -1
- package/src/llama.cpp/ggml/src/ggml.c +1 -1
- package/src/llama.cpp/include/llama.h +14 -10
- package/src/llama.cpp/src/llama-grammar.cpp +1 -1
- package/src/llama.cpp/src/llama-grammar.h +1 -1
- package/src/llama.cpp/src/llama-impl.h +6 -6
- package/src/llama.cpp/src/llama-kv-cache.h +1 -1
- package/src/llama.cpp/src/llama-mmap.h +1 -0
- package/src/llama.cpp/src/llama-model.cpp +1 -1
- package/src/llama.cpp/src/llama-sampling.cpp +131 -57
- package/src/llama.cpp/src/llama.cpp +7 -5
- package/src/llama.cpp/src/unicode.cpp +9 -2
- package/src/llama.cpp/tests/test-backend-ops.cpp +5 -5
- package/src/llama.cpp/tests/test-chat.cpp +237 -69
- package/src/llama.cpp/tests/test-gguf.cpp +4 -4
- package/src/llama.cpp/tests/test-sampling.cpp +15 -0
|
@@ -8801,12 +8801,14 @@ static int llama_decode_impl(
|
|
|
8801
8801
|
//llama_synchronize(&lctx);
|
|
8802
8802
|
|
|
8803
8803
|
// decide if we need to defrag the kv cache
|
|
8804
|
-
if (cparams.causal_attn && cparams.defrag_thold
|
|
8805
|
-
|
|
8804
|
+
if (cparams.causal_attn && cparams.defrag_thold > 0.0f) {
|
|
8805
|
+
// - do not defrag small contexts (i.e. < 2048 tokens)
|
|
8806
|
+
// - count the padding towards the number of used tokens
|
|
8807
|
+
const float fragmentation = kv_self.n >= 2048 ? std::max(0.0f, 1.0f - float(kv_self.used + llama_kv_cache_get_padding(cparams))/float(kv_self.n)) : 0.0f;
|
|
8806
8808
|
|
|
8807
8809
|
// queue defragmentation for next llama_kv_cache_update
|
|
8808
8810
|
if (fragmentation > cparams.defrag_thold) {
|
|
8809
|
-
|
|
8811
|
+
LLAMA_LOG_DEBUG("%s: fragmentation: %.2f - requesting defrag\n", __func__, fragmentation);
|
|
8810
8812
|
|
|
8811
8813
|
llama_kv_cache_defrag(kv_self);
|
|
8812
8814
|
}
|
|
@@ -9428,8 +9430,6 @@ static struct llama_model * llama_model_load_from_file_impl(
|
|
|
9428
9430
|
struct llama_model_params params) {
|
|
9429
9431
|
ggml_time_init();
|
|
9430
9432
|
|
|
9431
|
-
llama_model * model = new llama_model(params);
|
|
9432
|
-
|
|
9433
9433
|
unsigned cur_percentage = 0;
|
|
9434
9434
|
if (params.progress_callback == NULL) {
|
|
9435
9435
|
params.progress_callback_user_data = &cur_percentage;
|
|
@@ -9447,6 +9447,8 @@ static struct llama_model * llama_model_load_from_file_impl(
|
|
|
9447
9447
|
};
|
|
9448
9448
|
}
|
|
9449
9449
|
|
|
9450
|
+
llama_model * model = new llama_model(params);
|
|
9451
|
+
|
|
9450
9452
|
// create list of devices to use with this model
|
|
9451
9453
|
if (params.devices) {
|
|
9452
9454
|
for (ggml_backend_dev_t * dev = params.devices; *dev; ++dev) {
|
|
@@ -618,7 +618,14 @@ std::vector<uint32_t> unicode_cpts_from_utf8(const std::string & utf8) {
|
|
|
618
618
|
result.reserve(utf8.size());
|
|
619
619
|
size_t offset = 0;
|
|
620
620
|
while (offset < utf8.size()) {
|
|
621
|
-
|
|
621
|
+
try {
|
|
622
|
+
result.push_back(unicode_cpt_from_utf8(utf8, offset));
|
|
623
|
+
}
|
|
624
|
+
catch (const std::invalid_argument & /*ex*/) {
|
|
625
|
+
// Silently ignore invalid UTF-8 input to avoid leaking the exception beyond llama_tokenize
|
|
626
|
+
++offset;
|
|
627
|
+
result.emplace_back(0xFFFD); // replacement character
|
|
628
|
+
}
|
|
622
629
|
}
|
|
623
630
|
return result;
|
|
624
631
|
}
|
|
@@ -701,7 +708,7 @@ std::vector<std::string> unicode_regex_split(const std::string & text, const std
|
|
|
701
708
|
const auto cpts = unicode_cpts_from_utf8(text);
|
|
702
709
|
|
|
703
710
|
// generate a "collapsed" representation of the text, where all codepoints are replaced by a single byte
|
|
704
|
-
// ref: https://github.com/
|
|
711
|
+
// ref: https://github.com/ggml-org/llama.cpp/pull/6920#issuecomment-2081479935
|
|
705
712
|
std::string text_collapsed;
|
|
706
713
|
if (need_collapse) {
|
|
707
714
|
// collapse all unicode categories
|
|
@@ -1254,7 +1254,7 @@ struct test_count_equal : public test_case {
|
|
|
1254
1254
|
ggml_tensor * b = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
1255
1255
|
ggml_set_name(b, "b");
|
|
1256
1256
|
|
|
1257
|
-
ggml_tensor * b_argmax = ggml_argmax(ctx,
|
|
1257
|
+
ggml_tensor * b_argmax = ggml_argmax(ctx, b);
|
|
1258
1258
|
ggml_set_name(b_argmax, "b_argmax");
|
|
1259
1259
|
|
|
1260
1260
|
ggml_tensor * out = ggml_count_equal(ctx, a_argmax, b_argmax);
|
|
@@ -1511,6 +1511,7 @@ struct test_cont : public test_case {
|
|
|
1511
1511
|
};
|
|
1512
1512
|
|
|
1513
1513
|
// GGML_OP_ADD
|
|
1514
|
+
// GGML_OP_SUB
|
|
1514
1515
|
// GGML_OP_MUL
|
|
1515
1516
|
// GGML_OP_DIV
|
|
1516
1517
|
struct test_bin_bcast : public test_case {
|
|
@@ -3860,7 +3861,8 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|
|
3860
3861
|
test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {3,1,2,1}, 1, 0, 1));
|
|
3861
3862
|
test_cases.emplace_back(new test_conv_transpose_1d({2,1,1,1}, {3,1,1,1}, 1, 0, 1));
|
|
3862
3863
|
|
|
3863
|
-
test_cases.emplace_back(new test_count_equal());
|
|
3864
|
+
test_cases.emplace_back(new test_count_equal(GGML_TYPE_F32, {4, 500, 1, 1}));
|
|
3865
|
+
test_cases.emplace_back(new test_count_equal(GGML_TYPE_F32, {4, 5000, 1, 1}));
|
|
3864
3866
|
|
|
3865
3867
|
test_cases.emplace_back(new test_argmax(GGML_TYPE_F32, {32, 1, 1, 1}));
|
|
3866
3868
|
test_cases.emplace_back(new test_argmax(GGML_TYPE_F32, {100, 10, 1, 1}));
|
|
@@ -3885,8 +3887,6 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|
|
3885
3887
|
test_cases.emplace_back(new test_repeat_back(GGML_TYPE_F32, {8, 6, 4, 2}, {1, 2, 1, 1}, view));
|
|
3886
3888
|
test_cases.emplace_back(new test_repeat_back(GGML_TYPE_F32, {8, 6, 4, 2}, {1, 1, 2, 1}, view));
|
|
3887
3889
|
test_cases.emplace_back(new test_repeat_back(GGML_TYPE_F32, {8, 6, 4, 2}, {1, 1, 1, 2}, view));
|
|
3888
|
-
test_cases.emplace_back(new test_repeat_back(GGML_TYPE_I32, {8, 6, 4, 2}, {2, 1, 1, 1}, view));
|
|
3889
|
-
test_cases.emplace_back(new test_repeat_back(GGML_TYPE_I16, {8, 6, 4, 2}, {1, 1, 1, 2}, view));
|
|
3890
3890
|
}
|
|
3891
3891
|
|
|
3892
3892
|
test_cases.emplace_back(new test_dup(GGML_TYPE_F32));
|
|
@@ -3938,7 +3938,7 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|
|
3938
3938
|
test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 3, 5 ,7}));
|
|
3939
3939
|
|
|
3940
3940
|
auto add_test_bin_bcast = [&](ggml_type type, std::array<int64_t, 4> ne, std::array<int, 4> nr) {
|
|
3941
|
-
for (auto op : {ggml_add, ggml_mul, ggml_div}) {
|
|
3941
|
+
for (auto op : {ggml_add, ggml_sub, ggml_mul, ggml_div}) {
|
|
3942
3942
|
test_cases.emplace_back(new test_bin_bcast(op, type, ne, nr));
|
|
3943
3943
|
}
|
|
3944
3944
|
};
|
|
@@ -24,7 +24,10 @@ static common_chat_msg msg_from_json(const json & message) {
|
|
|
24
24
|
ret.content = message.at("content");
|
|
25
25
|
}
|
|
26
26
|
if (message.contains("tool_plan")) {
|
|
27
|
-
ret.
|
|
27
|
+
ret.reasoning_content = message.at("tool_plan");
|
|
28
|
+
}
|
|
29
|
+
if (message.contains("reasoning_content")) {
|
|
30
|
+
ret.reasoning_content = message.at("reasoning_content");
|
|
28
31
|
}
|
|
29
32
|
auto has_tool_calls = message.contains("tool_calls");
|
|
30
33
|
if (has_tool_calls) {
|
|
@@ -105,6 +108,7 @@ static std::string dump(const json & j) {
|
|
|
105
108
|
static void assert_msg_equals(const common_chat_msg & expected, const common_chat_msg & actual) {
|
|
106
109
|
assert_equals(expected.role, actual.role);
|
|
107
110
|
assert_equals(expected.content, actual.content);
|
|
111
|
+
assert_equals(expected.reasoning_content, actual.reasoning_content);
|
|
108
112
|
assert_equals(expected.tool_calls.size(), actual.tool_calls.size());
|
|
109
113
|
for (size_t i = 0; i < expected.tool_calls.size(); i++) {
|
|
110
114
|
const auto & expected_tool_call = expected.tool_calls[i];
|
|
@@ -176,13 +180,15 @@ struct delta_data {
|
|
|
176
180
|
|
|
177
181
|
static delta_data init_delta(const common_chat_template & tmpl, const std::vector<std::string> & end_tokens,
|
|
178
182
|
const json & user_message, const json & delta_message, const json & tools,
|
|
179
|
-
const json & tool_choice
|
|
183
|
+
const json & tool_choice,
|
|
184
|
+
bool think = false) {
|
|
180
185
|
common_chat_inputs inputs;
|
|
181
186
|
inputs.parallel_tool_calls = true;
|
|
182
187
|
inputs.messages = json::array();
|
|
183
188
|
inputs.messages.push_back(user_message);
|
|
184
189
|
inputs.tools = tools;
|
|
185
190
|
inputs.tool_choice = tool_choice;
|
|
191
|
+
inputs.extract_reasoning = think;
|
|
186
192
|
auto params_prefix = common_chat_params_init(tmpl, inputs);
|
|
187
193
|
|
|
188
194
|
inputs.messages.push_back(delta_message);
|
|
@@ -192,17 +198,24 @@ static delta_data init_delta(const common_chat_template & tmpl, const std::vecto
|
|
|
192
198
|
std::string prefix = params_prefix.prompt;
|
|
193
199
|
std::string full = params_full.prompt;
|
|
194
200
|
|
|
195
|
-
// Check full starts with prefix
|
|
196
|
-
if (full.find(prefix) != 0) {
|
|
197
|
-
fprintf(stderr, "Full:\n%s\n\nPrefix:\n%s\n\n", full.c_str(), prefix.c_str());
|
|
198
|
-
throw std::runtime_error("Full message does not start with prefix");
|
|
199
|
-
}
|
|
200
|
-
|
|
201
201
|
if (full == prefix) {
|
|
202
202
|
throw std::runtime_error("Full message is the same as the prefix");
|
|
203
203
|
}
|
|
204
204
|
|
|
205
|
-
|
|
205
|
+
size_t common_prefix_length = 0;
|
|
206
|
+
for (size_t i = 0; i < prefix.size() && i < full.size(); ++i) {
|
|
207
|
+
if (prefix[i] != full[i]) {
|
|
208
|
+
break;
|
|
209
|
+
}
|
|
210
|
+
if (prefix[i] == '<') {
|
|
211
|
+
// DeepSeek R1's template (as of 20250209) adds a trailing <think> if add_generation_prompt,
|
|
212
|
+
// but it removes thinking tags for past messages.
|
|
213
|
+
// The prefix and full strings diverge at <think> vs. <|tool▁calls▁begin|>, we avoid consuming the leading <.
|
|
214
|
+
continue;
|
|
215
|
+
}
|
|
216
|
+
common_prefix_length = i + 1;
|
|
217
|
+
}
|
|
218
|
+
auto delta = full.substr(common_prefix_length);
|
|
206
219
|
|
|
207
220
|
// Strip end tokens
|
|
208
221
|
for (const auto & end_token : end_tokens) {
|
|
@@ -223,7 +236,9 @@ static delta_data init_delta(const common_chat_template & tmpl, const std::vecto
|
|
|
223
236
|
*/
|
|
224
237
|
static void test_template(const common_chat_template & tmpl, const std::vector<std::string> & end_tokens,
|
|
225
238
|
const json & test_message, const json & tools = {}, const std::string & expected_delta = "",
|
|
226
|
-
bool expect_grammar_triggered = true
|
|
239
|
+
bool expect_grammar_triggered = true,
|
|
240
|
+
bool test_grammar_if_triggered = true,
|
|
241
|
+
bool think = false) {
|
|
227
242
|
common_chat_msg expected_msg = msg_from_json(test_message);
|
|
228
243
|
|
|
229
244
|
auto user_message = json{
|
|
@@ -232,7 +247,7 @@ static void test_template(const common_chat_template & tmpl, const std::vector<s
|
|
|
232
247
|
};
|
|
233
248
|
|
|
234
249
|
for (const auto & tool_choice : json({ "auto", "required" })) {
|
|
235
|
-
auto data = init_delta(tmpl, end_tokens, user_message, test_message, tools, tool_choice);
|
|
250
|
+
auto data = init_delta(tmpl, end_tokens, user_message, test_message, tools, tool_choice, think);
|
|
236
251
|
if (!expected_delta.empty()) {
|
|
237
252
|
assert_equals(expected_delta, data.delta);
|
|
238
253
|
}
|
|
@@ -274,7 +289,7 @@ static void test_template(const common_chat_template & tmpl, const std::vector<s
|
|
|
274
289
|
assert_equals(expect_grammar_triggered, grammar_triggered);
|
|
275
290
|
}
|
|
276
291
|
|
|
277
|
-
if (grammar_triggered && !match_string(constrained, grammar.get())) {
|
|
292
|
+
if (grammar_triggered && test_grammar_if_triggered && !match_string(constrained, grammar.get())) {
|
|
278
293
|
throw std::runtime_error("Failed to match delta against grammar:\n\n" + data.delta +
|
|
279
294
|
"\n\nGrammar: " + data.params.grammar);
|
|
280
295
|
}
|
|
@@ -283,16 +298,33 @@ static void test_template(const common_chat_template & tmpl, const std::vector<s
|
|
|
283
298
|
}
|
|
284
299
|
|
|
285
300
|
static void test_template_output_parsers() {
|
|
286
|
-
json
|
|
301
|
+
json message_user {
|
|
302
|
+
{ "role", "user" },
|
|
303
|
+
{ "content", "Hey there!" },
|
|
304
|
+
};
|
|
305
|
+
json message_assist {
|
|
306
|
+
{ "role", "assistant" },
|
|
307
|
+
{ "content", "Hello, world!\nWhat's up?" },
|
|
308
|
+
};
|
|
309
|
+
json message_assist_thoughts_unparsed_think {
|
|
310
|
+
{ "role", "assistant" },
|
|
311
|
+
{ "content", "<think>I'm thinking</think>Hello, world!\nWhat's up?" },
|
|
312
|
+
};
|
|
313
|
+
json message_assist_thoughts_unparsed_r7b {
|
|
314
|
+
{ "role", "assistant" },
|
|
315
|
+
{ "content", "<|START_THINKING|>I'm thinking<|END_THINKING|>Hello, world!\nWhat's up?" },
|
|
316
|
+
};
|
|
317
|
+
json message_assist_thoughts {
|
|
287
318
|
{ "role", "assistant" },
|
|
288
319
|
{ "content", "Hello, world!\nWhat's up?" },
|
|
320
|
+
{ "reasoning_content", "I'm thinking" },
|
|
289
321
|
};
|
|
290
322
|
json tool_calls = json::array({{
|
|
291
323
|
{ "type", "function" },
|
|
292
324
|
{ "function", { { "name", "special_function" }, { "arguments", "{\"arg1\": 1}" } } },
|
|
293
325
|
}});
|
|
294
326
|
|
|
295
|
-
json
|
|
327
|
+
json message_assist_call {
|
|
296
328
|
{ "role", "assistant"},
|
|
297
329
|
{ "content", {}},
|
|
298
330
|
{ "tool_calls", {
|
|
@@ -305,7 +337,34 @@ static void test_template_output_parsers() {
|
|
|
305
337
|
},
|
|
306
338
|
}},
|
|
307
339
|
};
|
|
308
|
-
json
|
|
340
|
+
json message_assist_call_thoughts = {
|
|
341
|
+
{ "role", "assistant" },
|
|
342
|
+
{ "content", nullptr },
|
|
343
|
+
{ "reasoning_content", "I'm\nthinking" },
|
|
344
|
+
{ "tool_calls", {
|
|
345
|
+
{
|
|
346
|
+
{ "type", "function" },
|
|
347
|
+
{ "function", {
|
|
348
|
+
{ "name", "special_function" },
|
|
349
|
+
{ "arguments", "{\"arg1\": 1}" },
|
|
350
|
+
}},
|
|
351
|
+
},
|
|
352
|
+
}},
|
|
353
|
+
};
|
|
354
|
+
json message_assist_call_thoughts_unparsed = {
|
|
355
|
+
{ "role", "assistant" },
|
|
356
|
+
{ "content", "<think>I'm\nthinking</think>" },
|
|
357
|
+
{ "tool_calls", {
|
|
358
|
+
{
|
|
359
|
+
{ "type", "function" },
|
|
360
|
+
{ "function", {
|
|
361
|
+
{ "name", "special_function" },
|
|
362
|
+
{ "arguments", "{\"arg1\": 1}" },
|
|
363
|
+
}},
|
|
364
|
+
},
|
|
365
|
+
}},
|
|
366
|
+
};
|
|
367
|
+
json message_assist_call_id {
|
|
309
368
|
{ "role", "assistant"},
|
|
310
369
|
{ "content", {}},
|
|
311
370
|
{ "tool_calls", {
|
|
@@ -322,10 +381,9 @@ static void test_template_output_parsers() {
|
|
|
322
381
|
{ "content", {} },
|
|
323
382
|
{ "tool_calls", tool_calls }
|
|
324
383
|
};
|
|
325
|
-
json
|
|
384
|
+
json message_assist_call_idx {
|
|
326
385
|
{ "role", "assistant"},
|
|
327
386
|
{ "content", {}},
|
|
328
|
-
{ "tool_plan", "I'm not so sure"},
|
|
329
387
|
{ "tool_calls", {
|
|
330
388
|
{
|
|
331
389
|
{ "type", "function" },
|
|
@@ -341,8 +399,10 @@ static void test_template_output_parsers() {
|
|
|
341
399
|
{ "content", {} },
|
|
342
400
|
{ "tool_calls", tool_calls }
|
|
343
401
|
};
|
|
402
|
+
json message_assist_call_tool_plan_idx = message_assist_call_idx;
|
|
403
|
+
message_assist_call_tool_plan_idx["tool_plan"] = "I'm thinking";
|
|
344
404
|
|
|
345
|
-
auto
|
|
405
|
+
auto python_message_assist_call = json{
|
|
346
406
|
{ "role", "assistant" },
|
|
347
407
|
{ "content", {} },
|
|
348
408
|
{ "tool_calls", json{ {
|
|
@@ -357,7 +417,7 @@ static void test_template_output_parsers() {
|
|
|
357
417
|
} },
|
|
358
418
|
} } }
|
|
359
419
|
};
|
|
360
|
-
auto
|
|
420
|
+
auto code_interpreter_message_assist_call = json{
|
|
361
421
|
{ "role", "assistant" },
|
|
362
422
|
{ "content", {} },
|
|
363
423
|
{ "tool_calls", json{ {
|
|
@@ -374,17 +434,27 @@ static void test_template_output_parsers() {
|
|
|
374
434
|
};
|
|
375
435
|
|
|
376
436
|
common_chat_inputs inputs_no_tools;
|
|
377
|
-
inputs_no_tools.messages
|
|
378
|
-
|
|
379
|
-
};
|
|
437
|
+
inputs_no_tools.messages = json::array({message_user});
|
|
438
|
+
inputs_no_tools.extract_reasoning = false;
|
|
380
439
|
|
|
381
|
-
common_chat_inputs
|
|
382
|
-
|
|
383
|
-
|
|
440
|
+
common_chat_inputs inputs_no_tools_think;
|
|
441
|
+
inputs_no_tools_think.messages = json::array({message_user});
|
|
442
|
+
inputs_no_tools_think.extract_reasoning = true;
|
|
384
443
|
|
|
385
|
-
common_chat_inputs
|
|
386
|
-
|
|
387
|
-
|
|
444
|
+
common_chat_inputs inputs_tools;
|
|
445
|
+
inputs_tools.messages = json::array({message_user});
|
|
446
|
+
inputs_tools.tools = json::array({special_function_tool});
|
|
447
|
+
inputs_tools.extract_reasoning = false;
|
|
448
|
+
|
|
449
|
+
common_chat_inputs inputs_tools_think;
|
|
450
|
+
inputs_tools_think.messages = json::array({message_user});
|
|
451
|
+
inputs_tools_think.tools = json::array({special_function_tool});
|
|
452
|
+
inputs_tools_think.extract_reasoning = true;
|
|
453
|
+
|
|
454
|
+
common_chat_inputs inputs_tools_builtin;
|
|
455
|
+
inputs_tools_builtin.messages = json::array({message_user});
|
|
456
|
+
inputs_tools_builtin.tools = json::array({python_tool});
|
|
457
|
+
inputs_tools_builtin.extract_reasoning = false;
|
|
388
458
|
|
|
389
459
|
{
|
|
390
460
|
// Not supported yet
|
|
@@ -395,15 +465,53 @@ static void test_template_output_parsers() {
|
|
|
395
465
|
const common_chat_template tmpl(read_file("models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja"), "<s>", "</s>");
|
|
396
466
|
std::vector<std::string> end_tokens{ "<|END_OF_TURN_TOKEN|>" };
|
|
397
467
|
|
|
398
|
-
assert_equals(
|
|
399
|
-
assert_equals(COMMON_CHAT_FORMAT_COMMAND_R7B,
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
468
|
+
assert_equals(COMMON_CHAT_FORMAT_COMMAND_R7B, common_chat_params_init(tmpl, inputs_no_tools).format);
|
|
469
|
+
assert_equals(COMMON_CHAT_FORMAT_COMMAND_R7B, common_chat_params_init(tmpl, inputs_tools).format);
|
|
470
|
+
assert_equals(COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING, common_chat_params_init(tmpl, inputs_tools_think).format);
|
|
471
|
+
|
|
472
|
+
assert_msg_equals(msg_from_json(message_assist),
|
|
473
|
+
common_chat_parse(
|
|
474
|
+
"Hello, world!\nWhat's up?",
|
|
475
|
+
COMMON_CHAT_FORMAT_COMMAND_R7B));
|
|
476
|
+
assert_msg_equals(msg_from_json(message_assist),
|
|
477
|
+
common_chat_parse(
|
|
478
|
+
"Hello, world!\nWhat's up?<|END_RESPONSE|>",
|
|
479
|
+
COMMON_CHAT_FORMAT_COMMAND_R7B));
|
|
480
|
+
assert_msg_equals(msg_from_json(message_assist),
|
|
481
|
+
common_chat_parse(
|
|
482
|
+
"<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>",
|
|
483
|
+
COMMON_CHAT_FORMAT_COMMAND_R7B));
|
|
484
|
+
assert_msg_equals(msg_from_json(message_assist_thoughts_unparsed_r7b),
|
|
485
|
+
common_chat_parse(
|
|
486
|
+
"<|START_THINKING|>I'm thinking<|END_THINKING|>"
|
|
487
|
+
"<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>",
|
|
488
|
+
COMMON_CHAT_FORMAT_COMMAND_R7B));
|
|
489
|
+
assert_msg_equals(msg_from_json(message_assist_thoughts_unparsed_r7b),
|
|
490
|
+
common_chat_parse(
|
|
491
|
+
"<|START_THINKING|>I'm thinking<|END_THINKING|>"
|
|
492
|
+
"Hello, world!\nWhat's up?<|END_RESPONSE|>",
|
|
493
|
+
COMMON_CHAT_FORMAT_COMMAND_R7B));
|
|
494
|
+
|
|
495
|
+
assert_msg_equals(msg_from_json(message_assist_thoughts),
|
|
496
|
+
common_chat_parse(
|
|
497
|
+
"<|START_THINKING|>I'm thinking<|END_THINKING|>"
|
|
498
|
+
"<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>",
|
|
499
|
+
COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING));
|
|
500
|
+
|
|
501
|
+
test_template(tmpl, end_tokens, message_assist_call_idx, tools,
|
|
502
|
+
"<|START_THINKING|><|END_THINKING|>"
|
|
403
503
|
"<|START_ACTION|>[\n"
|
|
404
504
|
" {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n"
|
|
405
505
|
"]<|END_ACTION|>");
|
|
406
|
-
test_template(tmpl, end_tokens,
|
|
506
|
+
test_template(tmpl, end_tokens, message_assist_call_tool_plan_idx, tools,
|
|
507
|
+
"<|START_THINKING|>I'm thinking<|END_THINKING|>"
|
|
508
|
+
"<|START_ACTION|>[\n"
|
|
509
|
+
" {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n"
|
|
510
|
+
"]<|END_ACTION|>",
|
|
511
|
+
/* expect_grammar_triggered= */ true,
|
|
512
|
+
/* test_grammar_if_triggered= */ true,
|
|
513
|
+
/* think= */ true);
|
|
514
|
+
test_template(tmpl, end_tokens, message_assist, tools,
|
|
407
515
|
"<|START_RESPONSE|>Hello, world!\n"
|
|
408
516
|
"What's up?<|END_RESPONSE|>",
|
|
409
517
|
/* expect_grammar_triggered= */ false);
|
|
@@ -423,12 +531,12 @@ static void test_template_output_parsers() {
|
|
|
423
531
|
|
|
424
532
|
// Generic tool calls doesn't generate / parse content-only messages symmetrically.
|
|
425
533
|
|
|
426
|
-
assert_msg_equals(msg_from_json(
|
|
534
|
+
assert_msg_equals(msg_from_json(message_assist),
|
|
427
535
|
common_chat_parse("{\n"
|
|
428
536
|
" \"response\": \"Hello, world!\\nWhat's up?\"\n"
|
|
429
537
|
"}",
|
|
430
538
|
common_chat_params_init(tmpl, inputs_tools).format));
|
|
431
|
-
test_template(tmpl, end_tokens,
|
|
539
|
+
test_template(tmpl, end_tokens, message_assist_call_id, tools,
|
|
432
540
|
"{\n"
|
|
433
541
|
" \"tool_calls\": [\n"
|
|
434
542
|
" {\n"
|
|
@@ -448,9 +556,9 @@ static void test_template_output_parsers() {
|
|
|
448
556
|
|
|
449
557
|
assert_equals(COMMON_CHAT_FORMAT_MISTRAL_NEMO, common_chat_params_init(tmpl, inputs_tools).format);
|
|
450
558
|
|
|
451
|
-
test_template(tmpl, end_tokens,
|
|
559
|
+
test_template(tmpl, end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
|
|
452
560
|
test_template(
|
|
453
|
-
tmpl, end_tokens,
|
|
561
|
+
tmpl, end_tokens, message_assist_call_id, tools,
|
|
454
562
|
"[TOOL_CALLS][{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, \"id\": \"123456789\"}]");
|
|
455
563
|
}
|
|
456
564
|
{
|
|
@@ -473,12 +581,12 @@ static void test_template_output_parsers() {
|
|
|
473
581
|
inputs_tools)
|
|
474
582
|
.format);
|
|
475
583
|
|
|
476
|
-
test_template(tmpl, end_tokens,
|
|
477
|
-
test_template(tmpl, end_tokens,
|
|
584
|
+
test_template(tmpl, end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
|
|
585
|
+
test_template(tmpl, end_tokens, message_assist_call, tools,
|
|
478
586
|
"<tool_call>\n"
|
|
479
587
|
"{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
|
|
480
588
|
"</tool_call>");
|
|
481
|
-
test_template(tmpl, end_tokens,
|
|
589
|
+
test_template(tmpl, end_tokens, python_message_assist_call, tools,
|
|
482
590
|
"<tool_call>\n"
|
|
483
591
|
"{\"name\": \"python\", \"arguments\": {\"code\": \"print('hey')\"}}\n"
|
|
484
592
|
"</tool_call>");
|
|
@@ -498,12 +606,12 @@ static void test_template_output_parsers() {
|
|
|
498
606
|
inputs_tools_builtin)
|
|
499
607
|
.format);
|
|
500
608
|
|
|
501
|
-
// test_template(tmpl, end_tokens,
|
|
502
|
-
test_template(tmpl, end_tokens,
|
|
609
|
+
// test_template(tmpl, end_tokens, message_assist, tools, R"(?)", /* expect_grammar_triggered= */ false);
|
|
610
|
+
test_template(tmpl, end_tokens, code_interpreter_message_assist_call, llama_3_1_tools,
|
|
503
611
|
"<|python_tag|>code_interpreter.call(code=\"print('hey')\")");
|
|
504
|
-
test_template(tmpl, end_tokens,
|
|
612
|
+
test_template(tmpl, end_tokens, python_message_assist_call, tools,
|
|
505
613
|
"<|python_tag|>python.call(code=\"print('hey')\")");
|
|
506
|
-
test_template(tmpl, end_tokens,
|
|
614
|
+
test_template(tmpl, end_tokens, message_assist_call, tools,
|
|
507
615
|
"{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}");
|
|
508
616
|
}
|
|
509
617
|
{
|
|
@@ -513,8 +621,8 @@ static void test_template_output_parsers() {
|
|
|
513
621
|
|
|
514
622
|
assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X, common_chat_params_init(tmpl, inputs_tools).format);
|
|
515
623
|
|
|
516
|
-
test_template(tmpl, end_tokens,
|
|
517
|
-
test_template(tmpl, end_tokens,
|
|
624
|
+
test_template(tmpl, end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
|
|
625
|
+
test_template(tmpl, end_tokens, message_assist_call, tools,
|
|
518
626
|
"{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}");
|
|
519
627
|
}
|
|
520
628
|
{
|
|
@@ -525,8 +633,8 @@ static void test_template_output_parsers() {
|
|
|
525
633
|
assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
|
|
526
634
|
common_chat_params_init(tmpl, inputs_tools).format);
|
|
527
635
|
|
|
528
|
-
test_template(tmpl, end_tokens,
|
|
529
|
-
test_template(tmpl, end_tokens,
|
|
636
|
+
test_template(tmpl, end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
|
|
637
|
+
test_template(tmpl, end_tokens, message_assist_call, tools,
|
|
530
638
|
"<function=special_function>{\"arg1\": 1}</function>");
|
|
531
639
|
}
|
|
532
640
|
{
|
|
@@ -537,12 +645,12 @@ static void test_template_output_parsers() {
|
|
|
537
645
|
assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, common_chat_params_init(tmpl, inputs_no_tools).format);
|
|
538
646
|
assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, common_chat_params_init(tmpl, inputs_tools).format);
|
|
539
647
|
|
|
540
|
-
test_template(tmpl, end_tokens,
|
|
648
|
+
test_template(tmpl, end_tokens, message_assist, {},
|
|
541
649
|
"all\n"
|
|
542
650
|
"Hello, world!\n"
|
|
543
651
|
"What's up?",
|
|
544
652
|
/* expect_grammar_triggered= */ false);
|
|
545
|
-
test_template(tmpl, end_tokens,
|
|
653
|
+
test_template(tmpl, end_tokens, message_assist_call, tools,
|
|
546
654
|
"special_function\n"
|
|
547
655
|
"{\"arg1\": 1}");
|
|
548
656
|
}
|
|
@@ -553,23 +661,79 @@ static void test_template_output_parsers() {
|
|
|
553
661
|
|
|
554
662
|
assert_equals(COMMON_CHAT_FORMAT_FIREFUNCTION_V2, common_chat_params_init(tmpl, inputs_tools).format);
|
|
555
663
|
|
|
556
|
-
test_template(tmpl, end_tokens,
|
|
557
|
-
test_template(tmpl, end_tokens,
|
|
664
|
+
test_template(tmpl, end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
|
|
665
|
+
test_template(tmpl, end_tokens, message_assist_call, tools,
|
|
558
666
|
" functools[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]");
|
|
559
667
|
}
|
|
560
668
|
{
|
|
669
|
+
// Original DeepSeek R1 template. Leaves <|tool▁calls▁begin|> and others unclosed. Our logic fixes the prompt.
|
|
561
670
|
const common_chat_template tmpl(read_file("models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja"),
|
|
562
671
|
"<s>", "</s>");
|
|
563
672
|
std::vector<std::string> end_tokens{ "<|end▁of▁sentence|>" };
|
|
564
673
|
|
|
565
|
-
assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1,
|
|
674
|
+
assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, common_chat_params_init(tmpl, inputs_tools).format);
|
|
675
|
+
assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING, common_chat_params_init(tmpl, inputs_tools_think).format);
|
|
676
|
+
|
|
677
|
+
test_template(tmpl, end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
|
|
678
|
+
test_template(tmpl, end_tokens, message_assist_thoughts, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
|
|
679
|
+
assert_msg_equals(msg_from_json(message_assist_thoughts_unparsed_think),
|
|
680
|
+
common_chat_parse("<think>I'm thinking</think>Hello, world!\nWhat's up?",
|
|
681
|
+
COMMON_CHAT_FORMAT_DEEPSEEK_R1));
|
|
682
|
+
assert_msg_equals(msg_from_json(message_assist_thoughts),
|
|
683
|
+
common_chat_parse("<think>I'm thinking</think>Hello, world!\nWhat's up?",
|
|
684
|
+
COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING));
|
|
685
|
+
assert_msg_equals(msg_from_json(message_assist_thoughts),
|
|
686
|
+
// Latest template update (ast of 20250209) adds a trailing <think>\n if add_generation_prompt is true.
|
|
687
|
+
common_chat_parse("I'm thinking</think>Hello, world!\nWhat's up?",
|
|
688
|
+
COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING));
|
|
689
|
+
// test_template(tmpl, end_tokens, message_assist_call, tools,
|
|
690
|
+
// "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n"
|
|
691
|
+
// "```json\n"
|
|
692
|
+
// "{\"arg1\": 1}\n"
|
|
693
|
+
// // Look what's not here: <|tool▁calls▁end|> (also missing the <|end▁of▁sentence|>, but that is removed lazily by the test's delta logic)
|
|
694
|
+
// "```<|tool▁call▁end|>",
|
|
695
|
+
// /* expect_grammar_triggered= */ true,
|
|
696
|
+
// /* test_grammar_if_triggered= */ false);
|
|
697
|
+
}
|
|
698
|
+
{
|
|
699
|
+
// Replacement DeepSeek R1 template. Makes the Distill Qwen 7B/32B models happy to call tools and all.
|
|
700
|
+
const common_chat_template tmpl(read_file("models/templates/llama-cpp-deepseek-r1.jinja"),
|
|
701
|
+
"<s>", "</s>");
|
|
702
|
+
std::vector<std::string> end_tokens{ "<|end▁of▁sentence|>" };
|
|
566
703
|
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
704
|
+
assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, common_chat_params_init(tmpl, inputs_tools).format);
|
|
705
|
+
assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING, common_chat_params_init(tmpl, inputs_tools_think).format);
|
|
706
|
+
|
|
707
|
+
test_template(tmpl, end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
|
|
708
|
+
test_template(tmpl, end_tokens, message_assist_thoughts, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
|
|
709
|
+
assert_msg_equals(msg_from_json(message_assist_thoughts_unparsed_think),
|
|
710
|
+
common_chat_parse("<think>I'm thinking</think>Hello, world!\nWhat's up?",
|
|
711
|
+
COMMON_CHAT_FORMAT_DEEPSEEK_R1));
|
|
712
|
+
assert_msg_equals(msg_from_json(message_assist_thoughts),
|
|
713
|
+
common_chat_parse("<think>I'm thinking</think>Hello, world!\nWhat's up?",
|
|
714
|
+
COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING));
|
|
715
|
+
|
|
716
|
+
assert_msg_equals(msg_from_json(message_assist_call_thoughts_unparsed),
|
|
717
|
+
common_chat_parse(
|
|
718
|
+
"<think>I'm\nthinking</think>\n\n"
|
|
719
|
+
"<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n"
|
|
720
|
+
"```json\n"
|
|
721
|
+
"{\"arg1\": 1}\n"
|
|
722
|
+
"```<|tool▁call▁end|><|tool▁calls▁end|>",
|
|
723
|
+
COMMON_CHAT_FORMAT_DEEPSEEK_R1));
|
|
724
|
+
assert_msg_equals(msg_from_json(message_assist_call_thoughts),
|
|
725
|
+
common_chat_parse(
|
|
726
|
+
"<think>I'm\nthinking</think>\n\n"
|
|
727
|
+
"<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n"
|
|
728
|
+
"```json\n"
|
|
729
|
+
"{\"arg1\": 1}\n"
|
|
730
|
+
"```<|tool▁call▁end|><|tool▁calls▁end|>",
|
|
731
|
+
COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING));
|
|
732
|
+
test_template(tmpl, end_tokens, message_assist_call, tools,
|
|
733
|
+
"<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n"
|
|
734
|
+
"```json\n"
|
|
735
|
+
"{\"arg1\": 1}\n"
|
|
736
|
+
"```<|tool▁call▁end|><|tool▁calls▁end|>");
|
|
573
737
|
}
|
|
574
738
|
}
|
|
575
739
|
|
|
@@ -586,16 +750,20 @@ int main(int argc, char ** argv) {
|
|
|
586
750
|
std::cout << "|----------|--------|\n";
|
|
587
751
|
|
|
588
752
|
for (int i = 1; i < argc; i++) {
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
753
|
+
try {
|
|
754
|
+
std::string path = argv[i];
|
|
755
|
+
if (path.rfind(".jinja") != path.size() - 6) {
|
|
756
|
+
std::cerr << "Skipping non-jinja file: " << path << std::endl;
|
|
757
|
+
continue;
|
|
758
|
+
}
|
|
759
|
+
common_chat_template tmpl(read_file(path), "", "");
|
|
760
|
+
auto parts = string_split(path, "/");
|
|
761
|
+
auto name = parts[parts.size() - 1];
|
|
762
|
+
auto format = common_chat_format_name(common_chat_params_init(tmpl, inputs).format);
|
|
763
|
+
std::cout << "| " << name << " | " << format << " |\n";
|
|
764
|
+
} catch (const std::exception & e) {
|
|
765
|
+
std::cerr << "Failed to process " << argv[i] << ": " << e.what() << std::endl;
|
|
593
766
|
}
|
|
594
|
-
common_chat_template tmpl(read_file(path), "", "");
|
|
595
|
-
auto parts = string_split(path, "/");
|
|
596
|
-
auto name = parts[parts.size() - 1];
|
|
597
|
-
std::cout << "| " << name << " | " << common_chat_format_name(common_chat_params_init(tmpl, inputs).format)
|
|
598
|
-
<< " |\n";
|
|
599
767
|
}
|
|
600
768
|
} else
|
|
601
769
|
#endif
|
|
@@ -697,8 +697,8 @@ static std::pair<int, int> test_handcrafted_file(const unsigned int seed) {
|
|
|
697
697
|
|
|
698
698
|
#ifdef _WIN32
|
|
699
699
|
if (!file) {
|
|
700
|
-
printf("
|
|
701
|
-
printf("
|
|
700
|
+
printf("failed to create tmpfile(), needs elevated privileges on Windows");
|
|
701
|
+
printf("skipping tests");
|
|
702
702
|
continue;
|
|
703
703
|
}
|
|
704
704
|
#else
|
|
@@ -1086,8 +1086,8 @@ static std::pair<int, int> test_roundtrip(ggml_backend_dev_t dev, const unsigned
|
|
|
1086
1086
|
|
|
1087
1087
|
#ifdef _WIN32
|
|
1088
1088
|
if (!file) {
|
|
1089
|
-
printf("
|
|
1090
|
-
printf("
|
|
1089
|
+
printf("failed to create tmpfile(), needs elevated privileges on Windows");
|
|
1090
|
+
printf("skipping tests");
|
|
1091
1091
|
return std::make_pair(0, 0);
|
|
1092
1092
|
}
|
|
1093
1093
|
#else
|