@fugood/llama.node 1.4.1 → 1.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +1 -1
- package/lib/binding.js +3 -0
- package/lib/binding.ts +2 -0
- package/package.json +16 -16
- package/scripts/llama.cpp.patch +25 -11
- package/src/LlamaContext.cpp +2 -2
- package/src/llama.cpp/CMakeLists.txt +21 -6
- package/src/llama.cpp/common/CMakeLists.txt +6 -0
- package/src/llama.cpp/common/arg.cpp +65 -16
- package/src/llama.cpp/common/chat-parser.cpp +40 -0
- package/src/llama.cpp/common/chat-peg-parser.cpp +110 -0
- package/src/llama.cpp/common/chat-peg-parser.h +105 -0
- package/src/llama.cpp/common/chat.cpp +40 -29
- package/src/llama.cpp/common/chat.h +10 -1
- package/src/llama.cpp/common/common.cpp +24 -5
- package/src/llama.cpp/common/common.h +16 -5
- package/src/llama.cpp/common/download.cpp +18 -8
- package/src/llama.cpp/common/download.h +3 -1
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
- package/src/llama.cpp/common/log.cpp +15 -1
- package/src/llama.cpp/common/log.h +19 -12
- package/src/llama.cpp/common/peg-parser.cpp +1712 -0
- package/src/llama.cpp/common/peg-parser.h +459 -0
- package/src/llama.cpp/common/unicode.cpp +64 -0
- package/src/llama.cpp/common/unicode.h +22 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +48 -48
- package/src/llama.cpp/ggml/include/ggml.h +7 -2
- package/src/llama.cpp/ggml/src/CMakeLists.txt +0 -4
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +4 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +10 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +60 -1
- package/src/llama.cpp/src/CMakeLists.txt +1 -0
- package/src/llama.cpp/src/llama-arch.cpp +30 -1
- package/src/llama.cpp/src/llama-arch.h +3 -0
- package/src/llama.cpp/src/llama-graph.cpp +3 -6
- package/src/llama.cpp/src/llama-hparams.h +2 -2
- package/src/llama.cpp/src/llama-impl.h +1 -1
- package/src/llama.cpp/src/llama-mmap.cpp +1 -1
- package/src/llama.cpp/src/llama-model.cpp +50 -6
- package/src/llama.cpp/src/llama-vocab.cpp +1 -2
- package/src/llama.cpp/src/models/mistral3.cpp +160 -0
- package/src/llama.cpp/src/models/models.h +4 -0
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "chat.h"
|
|
4
|
+
#include "peg-parser.h"
|
|
5
|
+
|
|
6
|
+
class common_chat_peg_builder : public common_peg_parser_builder {
|
|
7
|
+
public:
|
|
8
|
+
static constexpr const char * REASONING_BLOCK = "reasoning-block";
|
|
9
|
+
static constexpr const char * REASONING = "reasoning";
|
|
10
|
+
static constexpr const char * CONTENT = "content";
|
|
11
|
+
|
|
12
|
+
common_peg_parser reasoning_block(const common_peg_parser & p) { return tag(REASONING_BLOCK, p); }
|
|
13
|
+
common_peg_parser reasoning(const common_peg_parser & p) { return tag(REASONING, p); }
|
|
14
|
+
common_peg_parser content(const common_peg_parser & p) { return tag(CONTENT, p); }
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
inline common_peg_arena build_chat_peg_parser(const std::function<common_peg_parser(common_chat_peg_builder & builder)> & fn) {
|
|
18
|
+
common_chat_peg_builder builder;
|
|
19
|
+
builder.set_root(fn(builder));
|
|
20
|
+
return builder.build();
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
class common_chat_peg_mapper {
|
|
24
|
+
public:
|
|
25
|
+
common_chat_msg & result;
|
|
26
|
+
|
|
27
|
+
common_chat_peg_mapper(common_chat_msg & msg) : result(msg) {}
|
|
28
|
+
|
|
29
|
+
virtual void from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result);
|
|
30
|
+
virtual void map(const common_peg_ast_node & node);
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
class common_chat_peg_native_builder : public common_chat_peg_builder {
|
|
34
|
+
public:
|
|
35
|
+
static constexpr const char * TOOL = "tool";
|
|
36
|
+
static constexpr const char * TOOL_OPEN = "tool-open";
|
|
37
|
+
static constexpr const char * TOOL_CLOSE = "tool-close";
|
|
38
|
+
static constexpr const char * TOOL_ID = "tool-id";
|
|
39
|
+
static constexpr const char * TOOL_NAME = "tool-name";
|
|
40
|
+
static constexpr const char * TOOL_ARGS = "tool-args";
|
|
41
|
+
|
|
42
|
+
common_peg_parser tool(const common_peg_parser & p) { return tag(TOOL, p); }
|
|
43
|
+
common_peg_parser tool_open(const common_peg_parser & p) { return atomic(tag(TOOL_OPEN, p)); }
|
|
44
|
+
common_peg_parser tool_close(const common_peg_parser & p) { return atomic(tag(TOOL_CLOSE, p)); }
|
|
45
|
+
common_peg_parser tool_id(const common_peg_parser & p) { return atomic(tag(TOOL_ID, p)); }
|
|
46
|
+
common_peg_parser tool_name(const common_peg_parser & p) { return atomic(tag(TOOL_NAME, p)); }
|
|
47
|
+
common_peg_parser tool_args(const common_peg_parser & p) { return tag(TOOL_ARGS, p); }
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
class common_chat_peg_native_mapper : public common_chat_peg_mapper {
|
|
51
|
+
common_chat_tool_call * current_tool;
|
|
52
|
+
|
|
53
|
+
public:
|
|
54
|
+
common_chat_peg_native_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {}
|
|
55
|
+
|
|
56
|
+
void map(const common_peg_ast_node & node) override;
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
inline common_peg_arena build_chat_peg_native_parser(const std::function<common_peg_parser(common_chat_peg_native_builder & builder)> & fn) {
|
|
60
|
+
common_chat_peg_native_builder builder;
|
|
61
|
+
builder.set_root(fn(builder));
|
|
62
|
+
return builder.build();
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
class common_chat_peg_constructed_builder : public common_chat_peg_builder {
|
|
66
|
+
public:
|
|
67
|
+
static constexpr const char * TOOL = "tool";
|
|
68
|
+
static constexpr const char * TOOL_OPEN = "tool-open";
|
|
69
|
+
static constexpr const char * TOOL_CLOSE = "tool-close";
|
|
70
|
+
static constexpr const char * TOOL_NAME = "tool-name";
|
|
71
|
+
static constexpr const char * TOOL_ARG = "tool-arg";
|
|
72
|
+
static constexpr const char * TOOL_ARG_OPEN = "tool-arg-open";
|
|
73
|
+
static constexpr const char * TOOL_ARG_CLOSE = "tool-arg-close";
|
|
74
|
+
static constexpr const char * TOOL_ARG_NAME = "tool-arg-name";
|
|
75
|
+
static constexpr const char * TOOL_ARG_STRING_VALUE = "tool-arg-string-value";
|
|
76
|
+
static constexpr const char * TOOL_ARG_JSON_VALUE = "tool-arg-json-value";
|
|
77
|
+
|
|
78
|
+
common_peg_parser tool(const common_peg_parser & p) { return tag(TOOL, p); }
|
|
79
|
+
common_peg_parser tool_open(const common_peg_parser & p) { return atomic(tag(TOOL_OPEN, p)); }
|
|
80
|
+
common_peg_parser tool_close(const common_peg_parser & p) { return atomic(tag(TOOL_CLOSE, p)); }
|
|
81
|
+
common_peg_parser tool_name(const common_peg_parser & p) { return atomic(tag(TOOL_NAME, p)); }
|
|
82
|
+
common_peg_parser tool_arg(const common_peg_parser & p) { return tag(TOOL_ARG, p); }
|
|
83
|
+
common_peg_parser tool_arg_open(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_OPEN, p)); }
|
|
84
|
+
common_peg_parser tool_arg_close(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_CLOSE, p)); }
|
|
85
|
+
common_peg_parser tool_arg_name(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_NAME, p)); }
|
|
86
|
+
common_peg_parser tool_arg_string_value(const common_peg_parser & p) { return tag(TOOL_ARG_STRING_VALUE, p); }
|
|
87
|
+
common_peg_parser tool_arg_json_value(const common_peg_parser & p) { return tag(TOOL_ARG_JSON_VALUE, p); }
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
class common_chat_peg_constructed_mapper : public common_chat_peg_mapper {
|
|
91
|
+
common_chat_tool_call * current_tool;
|
|
92
|
+
int arg_count = 0;
|
|
93
|
+
bool needs_closing_quote = false;
|
|
94
|
+
|
|
95
|
+
public:
|
|
96
|
+
common_chat_peg_constructed_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {}
|
|
97
|
+
|
|
98
|
+
void map(const common_peg_ast_node & node) override;
|
|
99
|
+
};
|
|
100
|
+
|
|
101
|
+
inline common_peg_arena build_chat_peg_constructed_parser(const std::function<common_peg_parser(common_chat_peg_constructed_builder & builder)> & fn) {
|
|
102
|
+
common_chat_peg_constructed_builder builder;
|
|
103
|
+
builder.set_root(fn(builder));
|
|
104
|
+
return builder.build();
|
|
105
|
+
}
|
|
@@ -82,29 +82,36 @@ json common_chat_msg::to_json_oaicompat() const
|
|
|
82
82
|
return message;
|
|
83
83
|
}
|
|
84
84
|
|
|
85
|
-
std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const common_chat_msg &
|
|
85
|
+
std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const common_chat_msg & msg_prv, const common_chat_msg & msg_new) {
|
|
86
86
|
std::vector<common_chat_msg_diff> diffs;
|
|
87
|
-
if (
|
|
87
|
+
if (msg_new.tool_calls.size() > msg_prv.tool_calls.size()) {
|
|
88
|
+
diffs.reserve(msg_new.tool_calls.size() - msg_prv.tool_calls.size() + 3);
|
|
89
|
+
} else {
|
|
90
|
+
diffs.reserve(3);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// TODO: these can become expensive for long messages - how to optimize?
|
|
94
|
+
if (msg_prv.reasoning_content != msg_new.reasoning_content) {
|
|
88
95
|
auto & diff = diffs.emplace_back();
|
|
89
|
-
diff.reasoning_content_delta = string_diff(
|
|
96
|
+
diff.reasoning_content_delta = string_diff(msg_prv.reasoning_content, msg_new.reasoning_content);
|
|
90
97
|
}
|
|
91
|
-
if (
|
|
98
|
+
if (msg_prv.content != msg_new.content) {
|
|
92
99
|
auto & diff = diffs.emplace_back();
|
|
93
|
-
diff.content_delta = string_diff(
|
|
100
|
+
diff.content_delta = string_diff(msg_prv.content, msg_new.content);
|
|
94
101
|
}
|
|
95
102
|
|
|
96
|
-
if (
|
|
103
|
+
if (msg_new.tool_calls.size() < msg_prv.tool_calls.size()) {
|
|
97
104
|
throw std::runtime_error("Invalid diff: now finding less tool calls!");
|
|
98
105
|
}
|
|
99
106
|
|
|
100
|
-
if (!
|
|
101
|
-
auto idx =
|
|
102
|
-
const auto & pref =
|
|
103
|
-
const auto & newf =
|
|
107
|
+
if (!msg_prv.tool_calls.empty()) {
|
|
108
|
+
const auto idx = msg_prv.tool_calls.size() - 1;
|
|
109
|
+
const auto & pref = msg_prv.tool_calls[idx];
|
|
110
|
+
const auto & newf = msg_new.tool_calls[idx];
|
|
104
111
|
if (pref.name != newf.name) {
|
|
105
112
|
throw std::runtime_error("Invalid diff: tool call mismatch!");
|
|
106
113
|
}
|
|
107
|
-
auto args_diff = string_diff(pref.arguments, newf.arguments);
|
|
114
|
+
const auto args_diff = string_diff(pref.arguments, newf.arguments);
|
|
108
115
|
if (!args_diff.empty() || pref.id != newf.id) {
|
|
109
116
|
auto & diff = diffs.emplace_back();
|
|
110
117
|
diff.tool_call_index = idx;
|
|
@@ -115,11 +122,12 @@ std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const comm
|
|
|
115
122
|
diff.tool_call_delta.arguments = args_diff;
|
|
116
123
|
}
|
|
117
124
|
}
|
|
118
|
-
for (size_t idx =
|
|
125
|
+
for (size_t idx = msg_prv.tool_calls.size(); idx < msg_new.tool_calls.size(); ++idx) {
|
|
119
126
|
auto & diff = diffs.emplace_back();
|
|
120
127
|
diff.tool_call_index = idx;
|
|
121
|
-
diff.tool_call_delta =
|
|
128
|
+
diff.tool_call_delta = msg_new.tool_calls[idx];
|
|
122
129
|
}
|
|
130
|
+
|
|
123
131
|
return diffs;
|
|
124
132
|
}
|
|
125
133
|
|
|
@@ -150,7 +158,7 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin
|
|
|
150
158
|
if (tool_choice == "required") {
|
|
151
159
|
return COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
|
152
160
|
}
|
|
153
|
-
throw std::
|
|
161
|
+
throw std::invalid_argument("Invalid tool_choice: " + tool_choice);
|
|
154
162
|
}
|
|
155
163
|
|
|
156
164
|
bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates) {
|
|
@@ -173,17 +181,17 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
|
|
|
173
181
|
try {
|
|
174
182
|
|
|
175
183
|
if (!messages.is_array()) {
|
|
176
|
-
throw std::
|
|
184
|
+
throw std::invalid_argument("Expected 'messages' to be an array, got " + messages.dump());
|
|
177
185
|
}
|
|
178
186
|
|
|
179
187
|
for (const auto & message : messages) {
|
|
180
188
|
if (!message.is_object()) {
|
|
181
|
-
throw std::
|
|
189
|
+
throw std::invalid_argument("Expected 'message' to be an object, got " + message.dump());
|
|
182
190
|
}
|
|
183
191
|
|
|
184
192
|
common_chat_msg msg;
|
|
185
193
|
if (!message.contains("role")) {
|
|
186
|
-
throw std::
|
|
194
|
+
throw std::invalid_argument("Missing 'role' in message: " + message.dump());
|
|
187
195
|
}
|
|
188
196
|
msg.role = message.at("role");
|
|
189
197
|
|
|
@@ -196,11 +204,11 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
|
|
|
196
204
|
} else if (content.is_array()) {
|
|
197
205
|
for (const auto & part : content) {
|
|
198
206
|
if (!part.contains("type")) {
|
|
199
|
-
throw std::
|
|
207
|
+
throw std::invalid_argument("Missing content part type: " + part.dump());
|
|
200
208
|
}
|
|
201
209
|
const auto & type = part.at("type");
|
|
202
210
|
if (type != "text") {
|
|
203
|
-
throw std::
|
|
211
|
+
throw std::invalid_argument("Unsupported content part type: " + type.dump());
|
|
204
212
|
}
|
|
205
213
|
common_chat_msg_content_part msg_part;
|
|
206
214
|
msg_part.type = type;
|
|
@@ -208,25 +216,25 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
|
|
|
208
216
|
msg.content_parts.push_back(msg_part);
|
|
209
217
|
}
|
|
210
218
|
} else if (!content.is_null()) {
|
|
211
|
-
throw std::
|
|
219
|
+
throw std::invalid_argument("Invalid 'content' type: expected string or array, got " + content.dump() + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)");
|
|
212
220
|
}
|
|
213
221
|
}
|
|
214
222
|
if (has_tool_calls) {
|
|
215
223
|
for (const auto & tool_call : message.at("tool_calls")) {
|
|
216
224
|
common_chat_tool_call tc;
|
|
217
225
|
if (!tool_call.contains("type")) {
|
|
218
|
-
throw std::
|
|
226
|
+
throw std::invalid_argument("Missing tool call type: " + tool_call.dump());
|
|
219
227
|
}
|
|
220
228
|
const auto & type = tool_call.at("type");
|
|
221
229
|
if (type != "function") {
|
|
222
|
-
throw std::
|
|
230
|
+
throw std::invalid_argument("Unsupported tool call type: " + tool_call.dump());
|
|
223
231
|
}
|
|
224
232
|
if (!tool_call.contains("function")) {
|
|
225
|
-
throw std::
|
|
233
|
+
throw std::invalid_argument("Missing tool call function: " + tool_call.dump());
|
|
226
234
|
}
|
|
227
235
|
const auto & fc = tool_call.at("function");
|
|
228
236
|
if (!fc.contains("name")) {
|
|
229
|
-
throw std::
|
|
237
|
+
throw std::invalid_argument("Missing tool call name: " + tool_call.dump());
|
|
230
238
|
}
|
|
231
239
|
tc.name = fc.at("name");
|
|
232
240
|
tc.arguments = fc.at("arguments");
|
|
@@ -237,7 +245,7 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
|
|
|
237
245
|
}
|
|
238
246
|
}
|
|
239
247
|
if (!has_content && !has_tool_calls) {
|
|
240
|
-
throw std::
|
|
248
|
+
throw std::invalid_argument("Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & https://github.com/ggml-org/llama.cpp/issues/12279)");
|
|
241
249
|
}
|
|
242
250
|
if (message.contains("reasoning_content")) {
|
|
243
251
|
msg.reasoning_content = message.at("reasoning_content");
|
|
@@ -340,18 +348,18 @@ std::vector<common_chat_tool> common_chat_tools_parse_oaicompat(const json & too
|
|
|
340
348
|
try {
|
|
341
349
|
if (!tools.is_null()) {
|
|
342
350
|
if (!tools.is_array()) {
|
|
343
|
-
throw std::
|
|
351
|
+
throw std::invalid_argument("Expected 'tools' to be an array, got " + tools.dump());
|
|
344
352
|
}
|
|
345
353
|
for (const auto & tool : tools) {
|
|
346
354
|
if (!tool.contains("type")) {
|
|
347
|
-
throw std::
|
|
355
|
+
throw std::invalid_argument("Missing tool type: " + tool.dump());
|
|
348
356
|
}
|
|
349
357
|
const auto & type = tool.at("type");
|
|
350
358
|
if (!type.is_string() || type != "function") {
|
|
351
|
-
throw std::
|
|
359
|
+
throw std::invalid_argument("Unsupported tool type: " + tool.dump());
|
|
352
360
|
}
|
|
353
361
|
if (!tool.contains("function")) {
|
|
354
|
-
throw std::
|
|
362
|
+
throw std::invalid_argument("Missing tool function: " + tool.dump());
|
|
355
363
|
}
|
|
356
364
|
|
|
357
365
|
const auto & function = tool.at("function");
|
|
@@ -636,6 +644,9 @@ const char * common_chat_format_name(common_chat_format format) {
|
|
|
636
644
|
case COMMON_CHAT_FORMAT_QWEN3_CODER_XML: return "Qwen3 Coder";
|
|
637
645
|
case COMMON_CHAT_FORMAT_APRIEL_1_5: return "Apriel 1.5";
|
|
638
646
|
case COMMON_CHAT_FORMAT_XIAOMI_MIMO: return "Xiaomi MiMo";
|
|
647
|
+
case COMMON_CHAT_FORMAT_PEG_SIMPLE: return "peg-simple";
|
|
648
|
+
case COMMON_CHAT_FORMAT_PEG_NATIVE: return "peg-native";
|
|
649
|
+
case COMMON_CHAT_FORMAT_PEG_CONSTRUCTED: return "peg-constructed";
|
|
639
650
|
default:
|
|
640
651
|
throw std::runtime_error("Unknown chat format");
|
|
641
652
|
}
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
#pragma once
|
|
4
4
|
|
|
5
5
|
#include "common.h"
|
|
6
|
+
#include "peg-parser.h"
|
|
6
7
|
#include <functional>
|
|
7
8
|
#include <chrono>
|
|
8
9
|
#include <string>
|
|
@@ -87,7 +88,7 @@ struct common_chat_msg_diff {
|
|
|
87
88
|
size_t tool_call_index = std::string::npos;
|
|
88
89
|
common_chat_tool_call tool_call_delta;
|
|
89
90
|
|
|
90
|
-
static std::vector<common_chat_msg_diff> compute_diffs(const common_chat_msg &
|
|
91
|
+
static std::vector<common_chat_msg_diff> compute_diffs(const common_chat_msg & msg_prv, const common_chat_msg & msg_new);
|
|
91
92
|
|
|
92
93
|
bool operator==(const common_chat_msg_diff & other) const {
|
|
93
94
|
return content_delta == other.content_delta
|
|
@@ -135,6 +136,11 @@ enum common_chat_format {
|
|
|
135
136
|
COMMON_CHAT_FORMAT_APRIEL_1_5,
|
|
136
137
|
COMMON_CHAT_FORMAT_XIAOMI_MIMO,
|
|
137
138
|
|
|
139
|
+
// These are intended to be parsed by the PEG parser
|
|
140
|
+
COMMON_CHAT_FORMAT_PEG_SIMPLE,
|
|
141
|
+
COMMON_CHAT_FORMAT_PEG_NATIVE,
|
|
142
|
+
COMMON_CHAT_FORMAT_PEG_CONSTRUCTED,
|
|
143
|
+
|
|
138
144
|
COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
|
|
139
145
|
};
|
|
140
146
|
|
|
@@ -165,6 +171,7 @@ struct common_chat_params {
|
|
|
165
171
|
std::vector<common_grammar_trigger> grammar_triggers;
|
|
166
172
|
std::vector<std::string> preserved_tokens;
|
|
167
173
|
std::vector<std::string> additional_stops;
|
|
174
|
+
std::string parser;
|
|
168
175
|
};
|
|
169
176
|
|
|
170
177
|
struct common_chat_syntax {
|
|
@@ -174,6 +181,7 @@ struct common_chat_syntax {
|
|
|
174
181
|
bool reasoning_in_content = false;
|
|
175
182
|
bool thinking_forced_open = false;
|
|
176
183
|
bool parse_tool_calls = true;
|
|
184
|
+
common_peg_arena parser = {};
|
|
177
185
|
};
|
|
178
186
|
|
|
179
187
|
// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
|
|
@@ -217,6 +225,7 @@ const char* common_chat_format_name(common_chat_format format);
|
|
|
217
225
|
const char* common_reasoning_format_name(common_reasoning_format format);
|
|
218
226
|
common_reasoning_format common_reasoning_format_from_name(const std::string & format);
|
|
219
227
|
common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax);
|
|
228
|
+
common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_syntax & syntax);
|
|
220
229
|
|
|
221
230
|
common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);
|
|
222
231
|
|
|
@@ -694,7 +694,7 @@ bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_over
|
|
|
694
694
|
|
|
695
695
|
// Validate if a filename is safe to use
|
|
696
696
|
// To validate a full path, split the path by the OS-specific path separator, and validate each part with this function
|
|
697
|
-
bool fs_validate_filename(const std::string & filename) {
|
|
697
|
+
bool fs_validate_filename(const std::string & filename, bool allow_subdirs) {
|
|
698
698
|
if (!filename.length()) {
|
|
699
699
|
// Empty filename invalid
|
|
700
700
|
return false;
|
|
@@ -754,10 +754,14 @@ bool fs_validate_filename(const std::string & filename) {
|
|
|
754
754
|
|| (c >= 0xD800 && c <= 0xDFFF) // UTF-16 surrogate pairs
|
|
755
755
|
|| c == 0xFFFD // Replacement Character (UTF-8)
|
|
756
756
|
|| c == 0xFEFF // Byte Order Mark (BOM)
|
|
757
|
-
|| c == '
|
|
757
|
+
|| c == ':' || c == '*' // Illegal characters
|
|
758
758
|
|| c == '?' || c == '"' || c == '<' || c == '>' || c == '|') {
|
|
759
759
|
return false;
|
|
760
760
|
}
|
|
761
|
+
if (!allow_subdirs && (c == '/' || c == '\\')) {
|
|
762
|
+
// Subdirectories not allowed, reject path separators
|
|
763
|
+
return false;
|
|
764
|
+
}
|
|
761
765
|
}
|
|
762
766
|
|
|
763
767
|
// Reject any leading or trailing ' ', or any trailing '.', these are stripped on Windows and will cause a different filename
|
|
@@ -859,6 +863,11 @@ bool fs_create_directory_with_parents(const std::string & path) {
|
|
|
859
863
|
#endif // _WIN32
|
|
860
864
|
}
|
|
861
865
|
|
|
866
|
+
bool fs_is_directory(const std::string & path) {
|
|
867
|
+
std::filesystem::path dir(path);
|
|
868
|
+
return std::filesystem::exists(dir) && std::filesystem::is_directory(dir);
|
|
869
|
+
}
|
|
870
|
+
|
|
862
871
|
std::string fs_get_cache_directory() {
|
|
863
872
|
std::string cache_directory = "";
|
|
864
873
|
auto ensure_trailing_slash = [](std::string p) {
|
|
@@ -893,6 +902,8 @@ std::string fs_get_cache_directory() {
|
|
|
893
902
|
cache_directory = std::getenv("HOME") + std::string("/Library/Caches/");
|
|
894
903
|
#elif defined(_WIN32)
|
|
895
904
|
cache_directory = std::getenv("LOCALAPPDATA");
|
|
905
|
+
#elif defined(__EMSCRIPTEN__)
|
|
906
|
+
GGML_ABORT("not implemented on this platform");
|
|
896
907
|
#else
|
|
897
908
|
# error Unknown architecture
|
|
898
909
|
#endif
|
|
@@ -912,7 +923,7 @@ std::string fs_get_cache_file(const std::string & filename) {
|
|
|
912
923
|
return cache_directory + filename;
|
|
913
924
|
}
|
|
914
925
|
|
|
915
|
-
std::vector<common_file_info>
|
|
926
|
+
std::vector<common_file_info> fs_list(const std::string & path, bool include_directories) {
|
|
916
927
|
std::vector<common_file_info> files;
|
|
917
928
|
if (path.empty()) return files;
|
|
918
929
|
|
|
@@ -927,14 +938,22 @@ std::vector<common_file_info> fs_list_files(const std::string & path) {
|
|
|
927
938
|
const auto & p = entry.path();
|
|
928
939
|
if (std::filesystem::is_regular_file(p)) {
|
|
929
940
|
common_file_info info;
|
|
930
|
-
info.path
|
|
931
|
-
info.name
|
|
941
|
+
info.path = p.string();
|
|
942
|
+
info.name = p.filename().string();
|
|
943
|
+
info.is_dir = false;
|
|
932
944
|
try {
|
|
933
945
|
info.size = static_cast<size_t>(std::filesystem::file_size(p));
|
|
934
946
|
} catch (const std::filesystem::filesystem_error &) {
|
|
935
947
|
info.size = 0;
|
|
936
948
|
}
|
|
937
949
|
files.push_back(std::move(info));
|
|
950
|
+
} else if (include_directories && std::filesystem::is_directory(p)) {
|
|
951
|
+
common_file_info info;
|
|
952
|
+
info.path = p.string();
|
|
953
|
+
info.name = p.filename().string();
|
|
954
|
+
info.size = 0; // Directories have no size
|
|
955
|
+
info.is_dir = true;
|
|
956
|
+
files.push_back(std::move(info));
|
|
938
957
|
}
|
|
939
958
|
} catch (const std::filesystem::filesystem_error &) {
|
|
940
959
|
// skip entries we cannot inspect
|
|
@@ -12,6 +12,10 @@
|
|
|
12
12
|
#include <vector>
|
|
13
13
|
#include <map>
|
|
14
14
|
|
|
15
|
+
#if defined(_WIN32) && !defined(_WIN32_WINNT)
|
|
16
|
+
#define _WIN32_WINNT 0x0A00
|
|
17
|
+
#endif
|
|
18
|
+
|
|
15
19
|
#ifdef _WIN32
|
|
16
20
|
#define DIRECTORY_SEPARATOR '\\'
|
|
17
21
|
#else
|
|
@@ -26,8 +30,6 @@
|
|
|
26
30
|
fprintf(stderr, "%s: built with %s for %s\n", __func__, LLAMA_COMPILER, LLAMA_BUILD_TARGET); \
|
|
27
31
|
} while(0)
|
|
28
32
|
|
|
29
|
-
#define DEFAULT_MODEL_PATH "models/7B/ggml-model-f16.gguf"
|
|
30
|
-
|
|
31
33
|
struct common_time_meas {
|
|
32
34
|
common_time_meas(int64_t & t_acc, bool disable = false);
|
|
33
35
|
~common_time_meas();
|
|
@@ -223,6 +225,7 @@ struct common_params_model {
|
|
|
223
225
|
std::string hf_repo = ""; // HF repo // NOLINT
|
|
224
226
|
std::string hf_file = ""; // HF file // NOLINT
|
|
225
227
|
std::string docker_repo = ""; // Docker repo // NOLINT
|
|
228
|
+
std::string name = ""; // in format <user>/<model>[:<tag>] (tag is optional) // NOLINT
|
|
226
229
|
};
|
|
227
230
|
|
|
228
231
|
struct common_params_speculative {
|
|
@@ -370,7 +373,7 @@ struct common_params {
|
|
|
370
373
|
|
|
371
374
|
std::vector<common_control_vector_load_info> control_vectors; // control vector with user defined scale
|
|
372
375
|
|
|
373
|
-
int32_t verbosity =
|
|
376
|
+
int32_t verbosity = 3; // LOG_LEVEL_INFO
|
|
374
377
|
int32_t control_vector_layer_start = -1; // layer range for control vector
|
|
375
378
|
int32_t control_vector_layer_end = -1; // layer range for control vector
|
|
376
379
|
bool offline = false;
|
|
@@ -479,9 +482,15 @@ struct common_params {
|
|
|
479
482
|
bool endpoint_props = false; // only control POST requests, not GET
|
|
480
483
|
bool endpoint_metrics = false;
|
|
481
484
|
|
|
485
|
+
// router server configs
|
|
486
|
+
std::string models_dir = ""; // directory containing models for the router server
|
|
487
|
+
int models_max = 4; // maximum number of models to load simultaneously
|
|
488
|
+
bool models_autoload = true; // automatically load models when requested via the router server
|
|
489
|
+
|
|
482
490
|
bool log_json = false;
|
|
483
491
|
|
|
484
492
|
std::string slot_save_path;
|
|
493
|
+
std::string media_path; // path to directory for loading media files
|
|
485
494
|
|
|
486
495
|
float slot_prompt_similarity = 0.1f;
|
|
487
496
|
|
|
@@ -632,8 +641,9 @@ std::string string_from(const struct llama_context * ctx, const struct llama_bat
|
|
|
632
641
|
// Filesystem utils
|
|
633
642
|
//
|
|
634
643
|
|
|
635
|
-
bool fs_validate_filename(const std::string & filename);
|
|
644
|
+
bool fs_validate_filename(const std::string & filename, bool allow_subdirs = false);
|
|
636
645
|
bool fs_create_directory_with_parents(const std::string & path);
|
|
646
|
+
bool fs_is_directory(const std::string & path);
|
|
637
647
|
|
|
638
648
|
std::string fs_get_cache_directory();
|
|
639
649
|
std::string fs_get_cache_file(const std::string & filename);
|
|
@@ -642,8 +652,9 @@ struct common_file_info {
|
|
|
642
652
|
std::string path;
|
|
643
653
|
std::string name;
|
|
644
654
|
size_t size = 0; // in bytes
|
|
655
|
+
bool is_dir = false;
|
|
645
656
|
};
|
|
646
|
-
std::vector<common_file_info>
|
|
657
|
+
std::vector<common_file_info> fs_list(const std::string & path, bool include_directories);
|
|
647
658
|
|
|
648
659
|
//
|
|
649
660
|
// Model utils
|
|
@@ -24,6 +24,7 @@
|
|
|
24
24
|
#include "http.h"
|
|
25
25
|
#endif
|
|
26
26
|
|
|
27
|
+
#ifndef __EMSCRIPTEN__
|
|
27
28
|
#ifdef __linux__
|
|
28
29
|
#include <linux/limits.h>
|
|
29
30
|
#elif defined(_WIN32)
|
|
@@ -35,6 +36,8 @@
|
|
|
35
36
|
#else
|
|
36
37
|
#include <sys/syslimits.h>
|
|
37
38
|
#endif
|
|
39
|
+
#endif
|
|
40
|
+
|
|
38
41
|
#define LLAMA_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
|
|
39
42
|
|
|
40
43
|
// isatty
|
|
@@ -430,7 +433,7 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string &
|
|
|
430
433
|
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
|
|
431
434
|
curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L);
|
|
432
435
|
curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
|
|
433
|
-
curl_easy_setopt(curl.get(), CURLOPT_VERBOSE,
|
|
436
|
+
curl_easy_setopt(curl.get(), CURLOPT_VERBOSE, 0L);
|
|
434
437
|
typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
|
|
435
438
|
auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t {
|
|
436
439
|
auto data_vec = static_cast<std::vector<char> *>(data);
|
|
@@ -517,16 +520,18 @@ static bool common_pull_file(httplib::Client & cli,
|
|
|
517
520
|
headers.emplace("Range", "bytes=" + std::to_string(existing_size) + "-");
|
|
518
521
|
}
|
|
519
522
|
|
|
520
|
-
|
|
523
|
+
const char * func = __func__; // avoid __func__ inside a lambda
|
|
524
|
+
size_t downloaded = existing_size;
|
|
525
|
+
size_t progress_step = 0;
|
|
521
526
|
|
|
522
527
|
auto res = cli.Get(resolve_path, headers,
|
|
523
528
|
[&](const httplib::Response &response) {
|
|
524
529
|
if (existing_size > 0 && response.status != 206) {
|
|
525
|
-
LOG_WRN("%s: server did not respond with 206 Partial Content for a resume request. Status: %d\n",
|
|
530
|
+
LOG_WRN("%s: server did not respond with 206 Partial Content for a resume request. Status: %d\n", func, response.status);
|
|
526
531
|
return false;
|
|
527
532
|
}
|
|
528
533
|
if (existing_size == 0 && response.status != 200) {
|
|
529
|
-
LOG_WRN("%s: download received non-successful status code: %d\n",
|
|
534
|
+
LOG_WRN("%s: download received non-successful status code: %d\n", func, response.status);
|
|
530
535
|
return false;
|
|
531
536
|
}
|
|
532
537
|
if (total_size == 0 && response.has_header("Content-Length")) {
|
|
@@ -534,7 +539,7 @@ static bool common_pull_file(httplib::Client & cli,
|
|
|
534
539
|
size_t content_length = std::stoull(response.get_header_value("Content-Length"));
|
|
535
540
|
total_size = existing_size + content_length;
|
|
536
541
|
} catch (const std::exception &e) {
|
|
537
|
-
LOG_WRN("%s: invalid Content-Length header: %s\n",
|
|
542
|
+
LOG_WRN("%s: invalid Content-Length header: %s\n", func, e.what());
|
|
538
543
|
}
|
|
539
544
|
}
|
|
540
545
|
return true;
|
|
@@ -542,11 +547,16 @@ static bool common_pull_file(httplib::Client & cli,
|
|
|
542
547
|
[&](const char *data, size_t len) {
|
|
543
548
|
ofs.write(data, len);
|
|
544
549
|
if (!ofs) {
|
|
545
|
-
LOG_ERR("%s: error writing to file: %s\n",
|
|
550
|
+
LOG_ERR("%s: error writing to file: %s\n", func, path_tmp.c_str());
|
|
546
551
|
return false;
|
|
547
552
|
}
|
|
548
553
|
downloaded += len;
|
|
549
|
-
|
|
554
|
+
progress_step += len;
|
|
555
|
+
|
|
556
|
+
if (progress_step >= total_size / 1000 || downloaded == total_size) {
|
|
557
|
+
print_progress(downloaded, total_size);
|
|
558
|
+
progress_step = 0;
|
|
559
|
+
}
|
|
550
560
|
return true;
|
|
551
561
|
},
|
|
552
562
|
nullptr
|
|
@@ -1047,7 +1057,7 @@ std::string common_docker_resolve_model(const std::string &) {
|
|
|
1047
1057
|
std::vector<common_cached_model_info> common_list_cached_models() {
|
|
1048
1058
|
std::vector<common_cached_model_info> models;
|
|
1049
1059
|
const std::string cache_dir = fs_get_cache_directory();
|
|
1050
|
-
const std::vector<common_file_info> files =
|
|
1060
|
+
const std::vector<common_file_info> files = fs_list(cache_dir, false);
|
|
1051
1061
|
for (const auto & file : files) {
|
|
1052
1062
|
if (string_starts_with(file.name, "manifest=") && string_ends_with(file.name, ".json")) {
|
|
1053
1063
|
common_cached_model_info model_info;
|
|
@@ -14,8 +14,10 @@ struct common_cached_model_info {
|
|
|
14
14
|
std::string model;
|
|
15
15
|
std::string tag;
|
|
16
16
|
size_t size = 0; // GGUF size in bytes
|
|
17
|
+
// return string representation like "user/model:tag"
|
|
18
|
+
// if tag is "latest", it will be omitted
|
|
17
19
|
std::string to_string() const {
|
|
18
|
-
return user + "/" + model + ":" + tag;
|
|
20
|
+
return user + "/" + model + (tag == "latest" ? "" : ":" + tag);
|
|
19
21
|
}
|
|
20
22
|
};
|
|
21
23
|
|
|
@@ -974,7 +974,7 @@ public:
|
|
|
974
974
|
|
|
975
975
|
void check_errors() {
|
|
976
976
|
if (!_errors.empty()) {
|
|
977
|
-
throw std::
|
|
977
|
+
throw std::invalid_argument("JSON schema conversion failed:\n" + string_join(_errors, "\n"));
|
|
978
978
|
}
|
|
979
979
|
if (!_warnings.empty()) {
|
|
980
980
|
fprintf(stderr, "WARNING: JSON schema conversion was incomplete: %s\n", string_join(_warnings, "; ").c_str());
|
|
@@ -443,8 +443,22 @@ void common_log_set_timestamps(struct common_log * log, bool timestamps) {
|
|
|
443
443
|
log->set_timestamps(timestamps);
|
|
444
444
|
}
|
|
445
445
|
|
|
446
|
+
static int common_get_verbosity(enum ggml_log_level level) {
|
|
447
|
+
switch (level) {
|
|
448
|
+
case GGML_LOG_LEVEL_DEBUG: return LOG_LEVEL_DEBUG;
|
|
449
|
+
case GGML_LOG_LEVEL_INFO: return LOG_LEVEL_INFO;
|
|
450
|
+
case GGML_LOG_LEVEL_WARN: return LOG_LEVEL_WARN;
|
|
451
|
+
case GGML_LOG_LEVEL_ERROR: return LOG_LEVEL_ERROR;
|
|
452
|
+
case GGML_LOG_LEVEL_CONT: return LOG_LEVEL_INFO; // same as INFO
|
|
453
|
+
case GGML_LOG_LEVEL_NONE:
|
|
454
|
+
default:
|
|
455
|
+
return LOG_LEVEL_OUTPUT;
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
|
|
446
459
|
void common_log_default_callback(enum ggml_log_level level, const char * text, void * /*user_data*/) {
|
|
447
|
-
|
|
460
|
+
auto verbosity = common_get_verbosity(level);
|
|
461
|
+
if (verbosity <= common_log_verbosity_thold) {
|
|
448
462
|
common_log_add(common_log_main(), level, "%s", text);
|
|
449
463
|
}
|
|
450
464
|
}
|