@fugood/llama.node 1.4.2 → 1.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/CMakeLists.txt +1 -1
  2. package/lib/binding.js +3 -0
  3. package/lib/binding.ts +10 -0
  4. package/lib/index.js +9 -0
  5. package/lib/index.ts +10 -0
  6. package/package.json +15 -15
  7. package/scripts/llama.cpp.patch +25 -11
  8. package/src/LlamaContext.cpp +24 -0
  9. package/src/LlamaContext.h +3 -0
  10. package/src/llama.cpp/CMakeLists.txt +21 -6
  11. package/src/llama.cpp/common/CMakeLists.txt +6 -0
  12. package/src/llama.cpp/common/arg.cpp +83 -22
  13. package/src/llama.cpp/common/chat-parser.cpp +40 -0
  14. package/src/llama.cpp/common/chat-peg-parser.cpp +110 -0
  15. package/src/llama.cpp/common/chat-peg-parser.h +105 -0
  16. package/src/llama.cpp/common/chat.cpp +40 -29
  17. package/src/llama.cpp/common/chat.h +10 -1
  18. package/src/llama.cpp/common/common.cpp +70 -7
  19. package/src/llama.cpp/common/common.h +23 -5
  20. package/src/llama.cpp/common/download.cpp +18 -8
  21. package/src/llama.cpp/common/download.h +3 -1
  22. package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
  23. package/src/llama.cpp/common/log.cpp +18 -27
  24. package/src/llama.cpp/common/log.h +19 -12
  25. package/src/llama.cpp/common/peg-parser.cpp +1712 -0
  26. package/src/llama.cpp/common/peg-parser.h +459 -0
  27. package/src/llama.cpp/common/unicode.cpp +64 -0
  28. package/src/llama.cpp/common/unicode.h +22 -0
  29. package/src/llama.cpp/ggml/CMakeLists.txt +52 -48
  30. package/src/llama.cpp/ggml/include/ggml-rpc.h +1 -2
  31. package/src/llama.cpp/ggml/include/ggml-zendnn.h +22 -0
  32. package/src/llama.cpp/ggml/include/ggml.h +29 -2
  33. package/src/llama.cpp/ggml/src/CMakeLists.txt +1 -4
  34. package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +4 -0
  35. package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +0 -2
  36. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +10 -13
  37. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm-ppc.h +333 -0
  38. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +51 -125
  39. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +6 -0
  40. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +98 -12
  41. package/src/llama.cpp/src/CMakeLists.txt +1 -0
  42. package/src/llama.cpp/src/llama-arch.cpp +30 -1
  43. package/src/llama.cpp/src/llama-arch.h +3 -0
  44. package/src/llama.cpp/src/llama-graph.cpp +3 -6
  45. package/src/llama.cpp/src/llama-hparams.h +2 -2
  46. package/src/llama.cpp/src/llama-impl.h +1 -1
  47. package/src/llama.cpp/src/llama-mmap.cpp +1 -1
  48. package/src/llama.cpp/src/llama-model.cpp +54 -6
  49. package/src/llama.cpp/src/llama-quant.cpp +0 -29
  50. package/src/llama.cpp/src/llama-vocab.cpp +1 -2
  51. package/src/llama.cpp/src/models/deepseek2.cpp +18 -0
  52. package/src/llama.cpp/src/models/mistral3.cpp +160 -0
  53. package/src/llama.cpp/src/models/models.h +4 -0
  54. package/src/llama.cpp/src/unicode.cpp +2 -2
@@ -0,0 +1,110 @@
1
+ #include "chat-peg-parser.h"
2
+
3
+ static std::string_view trim_trailing_space(std::string_view sv) {
4
+ while (!sv.empty() && std::isspace(static_cast<unsigned char>(sv.back()))) {
5
+ sv.remove_suffix(1);
6
+ }
7
+ return sv;
8
+ }
9
+
10
+ void common_chat_peg_mapper::from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result) {
11
+ arena.visit(result, [this](const common_peg_ast_node & node) {
12
+ map(node);
13
+ });
14
+ }
15
+
16
+ void common_chat_peg_mapper::map(const common_peg_ast_node & node) {
17
+ bool is_reasoning = node.tag == common_chat_peg_builder::REASONING;
18
+ bool is_content = node.tag == common_chat_peg_builder::CONTENT;
19
+
20
+ if (is_reasoning) {
21
+ result.reasoning_content = std::string(trim_trailing_space(node.text));
22
+ }
23
+
24
+ if (is_content) {
25
+ result.content = std::string(trim_trailing_space(node.text));
26
+ }
27
+ }
28
+
29
+ void common_chat_peg_native_mapper::map(const common_peg_ast_node & node) {
30
+ common_chat_peg_mapper::map(node);
31
+
32
+ bool is_tool_open = node.tag == common_chat_peg_native_builder::TOOL_OPEN;
33
+ bool is_tool_name = node.tag == common_chat_peg_native_builder::TOOL_NAME;
34
+ bool is_tool_id = node.tag == common_chat_peg_native_builder::TOOL_ID;
35
+ bool is_tool_args = node.tag == common_chat_peg_native_builder::TOOL_ARGS;
36
+
37
+ if (is_tool_open) {
38
+ result.tool_calls.emplace_back();
39
+ current_tool = &result.tool_calls.back();
40
+ }
41
+
42
+ if (is_tool_id && current_tool) {
43
+ current_tool->id = std::string(trim_trailing_space(node.text));
44
+ }
45
+
46
+ if (is_tool_name && current_tool) {
47
+ current_tool->name = std::string(trim_trailing_space(node.text));
48
+ }
49
+
50
+ if (is_tool_args && current_tool) {
51
+ current_tool->arguments = std::string(trim_trailing_space(node.text));
52
+ }
53
+ }
54
+
55
+ void common_chat_peg_constructed_mapper::map(const common_peg_ast_node & node) {
56
+ common_chat_peg_mapper::map(node);
57
+
58
+ bool is_tool_open = node.tag == common_chat_peg_constructed_builder::TOOL_OPEN;
59
+ bool is_tool_name = node.tag == common_chat_peg_constructed_builder::TOOL_NAME;
60
+ bool is_tool_close = node.tag == common_chat_peg_constructed_builder::TOOL_CLOSE;
61
+ bool is_arg_open = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_OPEN;
62
+ bool is_arg_close = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_CLOSE;
63
+ bool is_arg_name = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_NAME;
64
+ bool is_arg_string = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_STRING_VALUE;
65
+ bool is_arg_json = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_JSON_VALUE;
66
+
67
+ if (is_tool_open) {
68
+ result.tool_calls.emplace_back();
69
+ current_tool = &result.tool_calls.back();
70
+ arg_count = 0;
71
+ }
72
+
73
+ if (is_tool_name) {
74
+ current_tool->name = std::string(node.text);
75
+ current_tool->arguments = "{";
76
+ }
77
+
78
+ if (is_arg_open) {
79
+ needs_closing_quote = false;
80
+ }
81
+
82
+ if (is_arg_name && current_tool) {
83
+ if (arg_count > 0) {
84
+ current_tool->arguments += ",";
85
+ }
86
+ current_tool->arguments += json(trim_trailing_space(node.text)).dump() + ":";
87
+ ++arg_count;
88
+ }
89
+
90
+ if (is_arg_string && current_tool) {
91
+ // Serialize to JSON, but exclude the end quote
92
+ std::string dumped = json(node.text).dump();
93
+ current_tool->arguments += dumped.substr(0, dumped.size() - 1);
94
+ needs_closing_quote = true;
95
+ }
96
+
97
+ if (is_arg_close && current_tool) {
98
+ if (needs_closing_quote) {
99
+ current_tool->arguments += "\"";
100
+ }
101
+ }
102
+
103
+ if (is_arg_json && current_tool) {
104
+ current_tool->arguments += std::string(trim_trailing_space(node.text));
105
+ }
106
+
107
+ if (is_tool_close && current_tool) {
108
+ current_tool->arguments += "}";
109
+ }
110
+ }
@@ -0,0 +1,105 @@
1
+ #pragma once
2
+
3
+ #include "chat.h"
4
+ #include "peg-parser.h"
5
+
6
+ class common_chat_peg_builder : public common_peg_parser_builder {
7
+ public:
8
+ static constexpr const char * REASONING_BLOCK = "reasoning-block";
9
+ static constexpr const char * REASONING = "reasoning";
10
+ static constexpr const char * CONTENT = "content";
11
+
12
+ common_peg_parser reasoning_block(const common_peg_parser & p) { return tag(REASONING_BLOCK, p); }
13
+ common_peg_parser reasoning(const common_peg_parser & p) { return tag(REASONING, p); }
14
+ common_peg_parser content(const common_peg_parser & p) { return tag(CONTENT, p); }
15
+ };
16
+
17
+ inline common_peg_arena build_chat_peg_parser(const std::function<common_peg_parser(common_chat_peg_builder & builder)> & fn) {
18
+ common_chat_peg_builder builder;
19
+ builder.set_root(fn(builder));
20
+ return builder.build();
21
+ }
22
+
23
+ class common_chat_peg_mapper {
24
+ public:
25
+ common_chat_msg & result;
26
+
27
+ common_chat_peg_mapper(common_chat_msg & msg) : result(msg) {}
28
+
29
+ virtual void from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result);
30
+ virtual void map(const common_peg_ast_node & node);
31
+ };
32
+
33
+ class common_chat_peg_native_builder : public common_chat_peg_builder {
34
+ public:
35
+ static constexpr const char * TOOL = "tool";
36
+ static constexpr const char * TOOL_OPEN = "tool-open";
37
+ static constexpr const char * TOOL_CLOSE = "tool-close";
38
+ static constexpr const char * TOOL_ID = "tool-id";
39
+ static constexpr const char * TOOL_NAME = "tool-name";
40
+ static constexpr const char * TOOL_ARGS = "tool-args";
41
+
42
+ common_peg_parser tool(const common_peg_parser & p) { return tag(TOOL, p); }
43
+ common_peg_parser tool_open(const common_peg_parser & p) { return atomic(tag(TOOL_OPEN, p)); }
44
+ common_peg_parser tool_close(const common_peg_parser & p) { return atomic(tag(TOOL_CLOSE, p)); }
45
+ common_peg_parser tool_id(const common_peg_parser & p) { return atomic(tag(TOOL_ID, p)); }
46
+ common_peg_parser tool_name(const common_peg_parser & p) { return atomic(tag(TOOL_NAME, p)); }
47
+ common_peg_parser tool_args(const common_peg_parser & p) { return tag(TOOL_ARGS, p); }
48
+ };
49
+
50
+ class common_chat_peg_native_mapper : public common_chat_peg_mapper {
51
+ common_chat_tool_call * current_tool;
52
+
53
+ public:
54
+ common_chat_peg_native_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {}
55
+
56
+ void map(const common_peg_ast_node & node) override;
57
+ };
58
+
59
+ inline common_peg_arena build_chat_peg_native_parser(const std::function<common_peg_parser(common_chat_peg_native_builder & builder)> & fn) {
60
+ common_chat_peg_native_builder builder;
61
+ builder.set_root(fn(builder));
62
+ return builder.build();
63
+ }
64
+
65
+ class common_chat_peg_constructed_builder : public common_chat_peg_builder {
66
+ public:
67
+ static constexpr const char * TOOL = "tool";
68
+ static constexpr const char * TOOL_OPEN = "tool-open";
69
+ static constexpr const char * TOOL_CLOSE = "tool-close";
70
+ static constexpr const char * TOOL_NAME = "tool-name";
71
+ static constexpr const char * TOOL_ARG = "tool-arg";
72
+ static constexpr const char * TOOL_ARG_OPEN = "tool-arg-open";
73
+ static constexpr const char * TOOL_ARG_CLOSE = "tool-arg-close";
74
+ static constexpr const char * TOOL_ARG_NAME = "tool-arg-name";
75
+ static constexpr const char * TOOL_ARG_STRING_VALUE = "tool-arg-string-value";
76
+ static constexpr const char * TOOL_ARG_JSON_VALUE = "tool-arg-json-value";
77
+
78
+ common_peg_parser tool(const common_peg_parser & p) { return tag(TOOL, p); }
79
+ common_peg_parser tool_open(const common_peg_parser & p) { return atomic(tag(TOOL_OPEN, p)); }
80
+ common_peg_parser tool_close(const common_peg_parser & p) { return atomic(tag(TOOL_CLOSE, p)); }
81
+ common_peg_parser tool_name(const common_peg_parser & p) { return atomic(tag(TOOL_NAME, p)); }
82
+ common_peg_parser tool_arg(const common_peg_parser & p) { return tag(TOOL_ARG, p); }
83
+ common_peg_parser tool_arg_open(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_OPEN, p)); }
84
+ common_peg_parser tool_arg_close(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_CLOSE, p)); }
85
+ common_peg_parser tool_arg_name(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_NAME, p)); }
86
+ common_peg_parser tool_arg_string_value(const common_peg_parser & p) { return tag(TOOL_ARG_STRING_VALUE, p); }
87
+ common_peg_parser tool_arg_json_value(const common_peg_parser & p) { return tag(TOOL_ARG_JSON_VALUE, p); }
88
+ };
89
+
90
+ class common_chat_peg_constructed_mapper : public common_chat_peg_mapper {
91
+ common_chat_tool_call * current_tool;
92
+ int arg_count = 0;
93
+ bool needs_closing_quote = false;
94
+
95
+ public:
96
+ common_chat_peg_constructed_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {}
97
+
98
+ void map(const common_peg_ast_node & node) override;
99
+ };
100
+
101
+ inline common_peg_arena build_chat_peg_constructed_parser(const std::function<common_peg_parser(common_chat_peg_constructed_builder & builder)> & fn) {
102
+ common_chat_peg_constructed_builder builder;
103
+ builder.set_root(fn(builder));
104
+ return builder.build();
105
+ }
@@ -82,29 +82,36 @@ json common_chat_msg::to_json_oaicompat() const
82
82
  return message;
83
83
  }
84
84
 
85
- std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const common_chat_msg & previous_msg, const common_chat_msg & new_msg) {
85
+ std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const common_chat_msg & msg_prv, const common_chat_msg & msg_new) {
86
86
  std::vector<common_chat_msg_diff> diffs;
87
- if (previous_msg.reasoning_content != new_msg.reasoning_content) {
87
+ if (msg_new.tool_calls.size() > msg_prv.tool_calls.size()) {
88
+ diffs.reserve(msg_new.tool_calls.size() - msg_prv.tool_calls.size() + 3);
89
+ } else {
90
+ diffs.reserve(3);
91
+ }
92
+
93
+ // TODO: these can become expensive for long messages - how to optimize?
94
+ if (msg_prv.reasoning_content != msg_new.reasoning_content) {
88
95
  auto & diff = diffs.emplace_back();
89
- diff.reasoning_content_delta = string_diff(previous_msg.reasoning_content, new_msg.reasoning_content);
96
+ diff.reasoning_content_delta = string_diff(msg_prv.reasoning_content, msg_new.reasoning_content);
90
97
  }
91
- if (previous_msg.content != new_msg.content) {
98
+ if (msg_prv.content != msg_new.content) {
92
99
  auto & diff = diffs.emplace_back();
93
- diff.content_delta = string_diff(previous_msg.content, new_msg.content);
100
+ diff.content_delta = string_diff(msg_prv.content, msg_new.content);
94
101
  }
95
102
 
96
- if (new_msg.tool_calls.size() < previous_msg.tool_calls.size()) {
103
+ if (msg_new.tool_calls.size() < msg_prv.tool_calls.size()) {
97
104
  throw std::runtime_error("Invalid diff: now finding less tool calls!");
98
105
  }
99
106
 
100
- if (!previous_msg.tool_calls.empty()) {
101
- auto idx = previous_msg.tool_calls.size() - 1;
102
- const auto & pref = previous_msg.tool_calls[idx];
103
- const auto & newf = new_msg.tool_calls[idx];
107
+ if (!msg_prv.tool_calls.empty()) {
108
+ const auto idx = msg_prv.tool_calls.size() - 1;
109
+ const auto & pref = msg_prv.tool_calls[idx];
110
+ const auto & newf = msg_new.tool_calls[idx];
104
111
  if (pref.name != newf.name) {
105
112
  throw std::runtime_error("Invalid diff: tool call mismatch!");
106
113
  }
107
- auto args_diff = string_diff(pref.arguments, newf.arguments);
114
+ const auto args_diff = string_diff(pref.arguments, newf.arguments);
108
115
  if (!args_diff.empty() || pref.id != newf.id) {
109
116
  auto & diff = diffs.emplace_back();
110
117
  diff.tool_call_index = idx;
@@ -115,11 +122,12 @@ std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const comm
115
122
  diff.tool_call_delta.arguments = args_diff;
116
123
  }
117
124
  }
118
- for (size_t idx = previous_msg.tool_calls.size(); idx < new_msg.tool_calls.size(); ++idx) {
125
+ for (size_t idx = msg_prv.tool_calls.size(); idx < msg_new.tool_calls.size(); ++idx) {
119
126
  auto & diff = diffs.emplace_back();
120
127
  diff.tool_call_index = idx;
121
- diff.tool_call_delta = new_msg.tool_calls[idx];
128
+ diff.tool_call_delta = msg_new.tool_calls[idx];
122
129
  }
130
+
123
131
  return diffs;
124
132
  }
125
133
 
@@ -150,7 +158,7 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin
150
158
  if (tool_choice == "required") {
151
159
  return COMMON_CHAT_TOOL_CHOICE_REQUIRED;
152
160
  }
153
- throw std::runtime_error("Invalid tool_choice: " + tool_choice);
161
+ throw std::invalid_argument("Invalid tool_choice: " + tool_choice);
154
162
  }
155
163
 
156
164
  bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates) {
@@ -173,17 +181,17 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
173
181
  try {
174
182
 
175
183
  if (!messages.is_array()) {
176
- throw std::runtime_error("Expected 'messages' to be an array, got " + messages.dump());
184
+ throw std::invalid_argument("Expected 'messages' to be an array, got " + messages.dump());
177
185
  }
178
186
 
179
187
  for (const auto & message : messages) {
180
188
  if (!message.is_object()) {
181
- throw std::runtime_error("Expected 'message' to be an object, got " + message.dump());
189
+ throw std::invalid_argument("Expected 'message' to be an object, got " + message.dump());
182
190
  }
183
191
 
184
192
  common_chat_msg msg;
185
193
  if (!message.contains("role")) {
186
- throw std::runtime_error("Missing 'role' in message: " + message.dump());
194
+ throw std::invalid_argument("Missing 'role' in message: " + message.dump());
187
195
  }
188
196
  msg.role = message.at("role");
189
197
 
@@ -196,11 +204,11 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
196
204
  } else if (content.is_array()) {
197
205
  for (const auto & part : content) {
198
206
  if (!part.contains("type")) {
199
- throw std::runtime_error("Missing content part type: " + part.dump());
207
+ throw std::invalid_argument("Missing content part type: " + part.dump());
200
208
  }
201
209
  const auto & type = part.at("type");
202
210
  if (type != "text") {
203
- throw std::runtime_error("Unsupported content part type: " + type.dump());
211
+ throw std::invalid_argument("Unsupported content part type: " + type.dump());
204
212
  }
205
213
  common_chat_msg_content_part msg_part;
206
214
  msg_part.type = type;
@@ -208,25 +216,25 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
208
216
  msg.content_parts.push_back(msg_part);
209
217
  }
210
218
  } else if (!content.is_null()) {
211
- throw std::runtime_error("Invalid 'content' type: expected string or array, got " + content.dump() + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)");
219
+ throw std::invalid_argument("Invalid 'content' type: expected string or array, got " + content.dump() + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)");
212
220
  }
213
221
  }
214
222
  if (has_tool_calls) {
215
223
  for (const auto & tool_call : message.at("tool_calls")) {
216
224
  common_chat_tool_call tc;
217
225
  if (!tool_call.contains("type")) {
218
- throw std::runtime_error("Missing tool call type: " + tool_call.dump());
226
+ throw std::invalid_argument("Missing tool call type: " + tool_call.dump());
219
227
  }
220
228
  const auto & type = tool_call.at("type");
221
229
  if (type != "function") {
222
- throw std::runtime_error("Unsupported tool call type: " + tool_call.dump());
230
+ throw std::invalid_argument("Unsupported tool call type: " + tool_call.dump());
223
231
  }
224
232
  if (!tool_call.contains("function")) {
225
- throw std::runtime_error("Missing tool call function: " + tool_call.dump());
233
+ throw std::invalid_argument("Missing tool call function: " + tool_call.dump());
226
234
  }
227
235
  const auto & fc = tool_call.at("function");
228
236
  if (!fc.contains("name")) {
229
- throw std::runtime_error("Missing tool call name: " + tool_call.dump());
237
+ throw std::invalid_argument("Missing tool call name: " + tool_call.dump());
230
238
  }
231
239
  tc.name = fc.at("name");
232
240
  tc.arguments = fc.at("arguments");
@@ -237,7 +245,7 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
237
245
  }
238
246
  }
239
247
  if (!has_content && !has_tool_calls) {
240
- throw std::runtime_error("Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & https://github.com/ggml-org/llama.cpp/issues/12279)");
248
+ throw std::invalid_argument("Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & https://github.com/ggml-org/llama.cpp/issues/12279)");
241
249
  }
242
250
  if (message.contains("reasoning_content")) {
243
251
  msg.reasoning_content = message.at("reasoning_content");
@@ -340,18 +348,18 @@ std::vector<common_chat_tool> common_chat_tools_parse_oaicompat(const json & too
340
348
  try {
341
349
  if (!tools.is_null()) {
342
350
  if (!tools.is_array()) {
343
- throw std::runtime_error("Expected 'tools' to be an array, got " + tools.dump());
351
+ throw std::invalid_argument("Expected 'tools' to be an array, got " + tools.dump());
344
352
  }
345
353
  for (const auto & tool : tools) {
346
354
  if (!tool.contains("type")) {
347
- throw std::runtime_error("Missing tool type: " + tool.dump());
355
+ throw std::invalid_argument("Missing tool type: " + tool.dump());
348
356
  }
349
357
  const auto & type = tool.at("type");
350
358
  if (!type.is_string() || type != "function") {
351
- throw std::runtime_error("Unsupported tool type: " + tool.dump());
359
+ throw std::invalid_argument("Unsupported tool type: " + tool.dump());
352
360
  }
353
361
  if (!tool.contains("function")) {
354
- throw std::runtime_error("Missing tool function: " + tool.dump());
362
+ throw std::invalid_argument("Missing tool function: " + tool.dump());
355
363
  }
356
364
 
357
365
  const auto & function = tool.at("function");
@@ -636,6 +644,9 @@ const char * common_chat_format_name(common_chat_format format) {
636
644
  case COMMON_CHAT_FORMAT_QWEN3_CODER_XML: return "Qwen3 Coder";
637
645
  case COMMON_CHAT_FORMAT_APRIEL_1_5: return "Apriel 1.5";
638
646
  case COMMON_CHAT_FORMAT_XIAOMI_MIMO: return "Xiaomi MiMo";
647
+ case COMMON_CHAT_FORMAT_PEG_SIMPLE: return "peg-simple";
648
+ case COMMON_CHAT_FORMAT_PEG_NATIVE: return "peg-native";
649
+ case COMMON_CHAT_FORMAT_PEG_CONSTRUCTED: return "peg-constructed";
639
650
  default:
640
651
  throw std::runtime_error("Unknown chat format");
641
652
  }
@@ -3,6 +3,7 @@
3
3
  #pragma once
4
4
 
5
5
  #include "common.h"
6
+ #include "peg-parser.h"
6
7
  #include <functional>
7
8
  #include <chrono>
8
9
  #include <string>
@@ -87,7 +88,7 @@ struct common_chat_msg_diff {
87
88
  size_t tool_call_index = std::string::npos;
88
89
  common_chat_tool_call tool_call_delta;
89
90
 
90
- static std::vector<common_chat_msg_diff> compute_diffs(const common_chat_msg & previous_msg, const common_chat_msg & new_msg);
91
+ static std::vector<common_chat_msg_diff> compute_diffs(const common_chat_msg & msg_prv, const common_chat_msg & msg_new);
91
92
 
92
93
  bool operator==(const common_chat_msg_diff & other) const {
93
94
  return content_delta == other.content_delta
@@ -135,6 +136,11 @@ enum common_chat_format {
135
136
  COMMON_CHAT_FORMAT_APRIEL_1_5,
136
137
  COMMON_CHAT_FORMAT_XIAOMI_MIMO,
137
138
 
139
+ // These are intended to be parsed by the PEG parser
140
+ COMMON_CHAT_FORMAT_PEG_SIMPLE,
141
+ COMMON_CHAT_FORMAT_PEG_NATIVE,
142
+ COMMON_CHAT_FORMAT_PEG_CONSTRUCTED,
143
+
138
144
  COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
139
145
  };
140
146
 
@@ -165,6 +171,7 @@ struct common_chat_params {
165
171
  std::vector<common_grammar_trigger> grammar_triggers;
166
172
  std::vector<std::string> preserved_tokens;
167
173
  std::vector<std::string> additional_stops;
174
+ std::string parser;
168
175
  };
169
176
 
170
177
  struct common_chat_syntax {
@@ -174,6 +181,7 @@ struct common_chat_syntax {
174
181
  bool reasoning_in_content = false;
175
182
  bool thinking_forced_open = false;
176
183
  bool parse_tool_calls = true;
184
+ common_peg_arena parser = {};
177
185
  };
178
186
 
179
187
  // Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
@@ -217,6 +225,7 @@ const char* common_chat_format_name(common_chat_format format);
217
225
  const char* common_reasoning_format_name(common_reasoning_format format);
218
226
  common_reasoning_format common_reasoning_format_from_name(const std::string & format);
219
227
  common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax);
228
+ common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_syntax & syntax);
220
229
 
221
230
  common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);
222
231
 
@@ -694,7 +694,7 @@ bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_over
694
694
 
695
695
  // Validate if a filename is safe to use
696
696
  // To validate a full path, split the path by the OS-specific path separator, and validate each part with this function
697
- bool fs_validate_filename(const std::string & filename) {
697
+ bool fs_validate_filename(const std::string & filename, bool allow_subdirs) {
698
698
  if (!filename.length()) {
699
699
  // Empty filename invalid
700
700
  return false;
@@ -754,10 +754,14 @@ bool fs_validate_filename(const std::string & filename) {
754
754
  || (c >= 0xD800 && c <= 0xDFFF) // UTF-16 surrogate pairs
755
755
  || c == 0xFFFD // Replacement Character (UTF-8)
756
756
  || c == 0xFEFF // Byte Order Mark (BOM)
757
- || c == '/' || c == '\\' || c == ':' || c == '*' // Illegal characters
757
+ || c == ':' || c == '*' // Illegal characters
758
758
  || c == '?' || c == '"' || c == '<' || c == '>' || c == '|') {
759
759
  return false;
760
760
  }
761
+ if (!allow_subdirs && (c == '/' || c == '\\')) {
762
+ // Subdirectories not allowed, reject path separators
763
+ return false;
764
+ }
761
765
  }
762
766
 
763
767
  // Reject any leading or trailing ' ', or any trailing '.', these are stripped on Windows and will cause a different filename
@@ -782,11 +786,29 @@ bool fs_validate_filename(const std::string & filename) {
782
786
  #include <iostream>
783
787
 
784
788
 
789
+ #ifdef _WIN32
790
+ static std::wstring utf8_to_wstring(const std::string & str) {
791
+ if (str.empty()) {
792
+ return std::wstring();
793
+ }
794
+
795
+ int size = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), (int)str.size(), NULL, 0);
796
+
797
+ if (size <= 0) {
798
+ return std::wstring();
799
+ }
800
+
801
+ std::wstring wstr(size, 0);
802
+ MultiByteToWideChar(CP_UTF8, 0, str.c_str(), (int)str.size(), &wstr[0], size);
803
+
804
+ return wstr;
805
+ }
806
+ #endif
807
+
785
808
  // returns true if successful, false otherwise
786
809
  bool fs_create_directory_with_parents(const std::string & path) {
787
810
  #ifdef _WIN32
788
- std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
789
- std::wstring wpath = converter.from_bytes(path);
811
+ std::wstring wpath = utf8_to_wstring(path);
790
812
 
791
813
  // if the path already exists, check whether it's a directory
792
814
  const DWORD attributes = GetFileAttributesW(wpath.c_str());
@@ -859,6 +881,11 @@ bool fs_create_directory_with_parents(const std::string & path) {
859
881
  #endif // _WIN32
860
882
  }
861
883
 
884
+ bool fs_is_directory(const std::string & path) {
885
+ std::filesystem::path dir(path);
886
+ return std::filesystem::exists(dir) && std::filesystem::is_directory(dir);
887
+ }
888
+
862
889
  std::string fs_get_cache_directory() {
863
890
  std::string cache_directory = "";
864
891
  auto ensure_trailing_slash = [](std::string p) {
@@ -893,6 +920,8 @@ std::string fs_get_cache_directory() {
893
920
  cache_directory = std::getenv("HOME") + std::string("/Library/Caches/");
894
921
  #elif defined(_WIN32)
895
922
  cache_directory = std::getenv("LOCALAPPDATA");
923
+ #elif defined(__EMSCRIPTEN__)
924
+ GGML_ABORT("not implemented on this platform");
896
925
  #else
897
926
  # error Unknown architecture
898
927
  #endif
@@ -912,7 +941,7 @@ std::string fs_get_cache_file(const std::string & filename) {
912
941
  return cache_directory + filename;
913
942
  }
914
943
 
915
- std::vector<common_file_info> fs_list_files(const std::string & path) {
944
+ std::vector<common_file_info> fs_list(const std::string & path, bool include_directories) {
916
945
  std::vector<common_file_info> files;
917
946
  if (path.empty()) return files;
918
947
 
@@ -927,14 +956,22 @@ std::vector<common_file_info> fs_list_files(const std::string & path) {
927
956
  const auto & p = entry.path();
928
957
  if (std::filesystem::is_regular_file(p)) {
929
958
  common_file_info info;
930
- info.path = p.string();
931
- info.name = p.filename().string();
959
+ info.path = p.string();
960
+ info.name = p.filename().string();
961
+ info.is_dir = false;
932
962
  try {
933
963
  info.size = static_cast<size_t>(std::filesystem::file_size(p));
934
964
  } catch (const std::filesystem::filesystem_error &) {
935
965
  info.size = 0;
936
966
  }
937
967
  files.push_back(std::move(info));
968
+ } else if (include_directories && std::filesystem::is_directory(p)) {
969
+ common_file_info info;
970
+ info.path = p.string();
971
+ info.name = p.filename().string();
972
+ info.size = 0; // Directories have no size
973
+ info.is_dir = true;
974
+ files.push_back(std::move(info));
938
975
  }
939
976
  } catch (const std::filesystem::filesystem_error &) {
940
977
  // skip entries we cannot inspect
@@ -945,6 +982,32 @@ std::vector<common_file_info> fs_list_files(const std::string & path) {
945
982
  return files;
946
983
  }
947
984
 
985
+ //
986
+ // TTY utils
987
+ //
988
+
989
+ bool tty_can_use_colors() {
990
+ // Check NO_COLOR environment variable (https://no-color.org/)
991
+ if (const char * no_color = std::getenv("NO_COLOR")) {
992
+ if (no_color[0] != '\0') {
993
+ return false;
994
+ }
995
+ }
996
+
997
+ // Check TERM environment variable
998
+ if (const char * term = std::getenv("TERM")) {
999
+ if (std::strcmp(term, "dumb") == 0) {
1000
+ return false;
1001
+ }
1002
+ }
1003
+
1004
+ // Check if stdout and stderr are connected to a terminal
1005
+ // We check both because log messages can go to either
1006
+ bool stdout_is_tty = isatty(fileno(stdout));
1007
+ bool stderr_is_tty = isatty(fileno(stderr));
1008
+
1009
+ return stdout_is_tty || stderr_is_tty;
1010
+ }
948
1011
 
949
1012
  //
950
1013
  // Model utils
@@ -12,6 +12,10 @@
12
12
  #include <vector>
13
13
  #include <map>
14
14
 
15
+ #if defined(_WIN32) && !defined(_WIN32_WINNT)
16
+ #define _WIN32_WINNT 0x0A00
17
+ #endif
18
+
15
19
  #ifdef _WIN32
16
20
  #define DIRECTORY_SEPARATOR '\\'
17
21
  #else
@@ -26,8 +30,6 @@
26
30
  fprintf(stderr, "%s: built with %s for %s\n", __func__, LLAMA_COMPILER, LLAMA_BUILD_TARGET); \
27
31
  } while(0)
28
32
 
29
- #define DEFAULT_MODEL_PATH "models/7B/ggml-model-f16.gguf"
30
-
31
33
  struct common_time_meas {
32
34
  common_time_meas(int64_t & t_acc, bool disable = false);
33
35
  ~common_time_meas();
@@ -223,6 +225,7 @@ struct common_params_model {
223
225
  std::string hf_repo = ""; // HF repo // NOLINT
224
226
  std::string hf_file = ""; // HF file // NOLINT
225
227
  std::string docker_repo = ""; // Docker repo // NOLINT
228
+ std::string name = ""; // in format <user>/<model>[:<tag>] (tag is optional) // NOLINT
226
229
  };
227
230
 
228
231
  struct common_params_speculative {
@@ -370,7 +373,7 @@ struct common_params {
370
373
 
371
374
  std::vector<common_control_vector_load_info> control_vectors; // control vector with user defined scale
372
375
 
373
- int32_t verbosity = 0;
376
+ int32_t verbosity = 3; // LOG_LEVEL_INFO
374
377
  int32_t control_vector_layer_start = -1; // layer range for control vector
375
378
  int32_t control_vector_layer_end = -1; // layer range for control vector
376
379
  bool offline = false;
@@ -479,9 +482,15 @@ struct common_params {
479
482
  bool endpoint_props = false; // only control POST requests, not GET
480
483
  bool endpoint_metrics = false;
481
484
 
485
+ // router server configs
486
+ std::string models_dir = ""; // directory containing models for the router server
487
+ int models_max = 4; // maximum number of models to load simultaneously
488
+ bool models_autoload = true; // automatically load models when requested via the router server
489
+
482
490
  bool log_json = false;
483
491
 
484
492
  std::string slot_save_path;
493
+ std::string media_path; // path to directory for loading media files
485
494
 
486
495
  float slot_prompt_similarity = 0.1f;
487
496
 
@@ -632,8 +641,9 @@ std::string string_from(const struct llama_context * ctx, const struct llama_bat
632
641
  // Filesystem utils
633
642
  //
634
643
 
635
- bool fs_validate_filename(const std::string & filename);
644
+ bool fs_validate_filename(const std::string & filename, bool allow_subdirs = false);
636
645
  bool fs_create_directory_with_parents(const std::string & path);
646
+ bool fs_is_directory(const std::string & path);
637
647
 
638
648
  std::string fs_get_cache_directory();
639
649
  std::string fs_get_cache_file(const std::string & filename);
@@ -642,8 +652,16 @@ struct common_file_info {
642
652
  std::string path;
643
653
  std::string name;
644
654
  size_t size = 0; // in bytes
655
+ bool is_dir = false;
645
656
  };
646
- std::vector<common_file_info> fs_list_files(const std::string & path);
657
+ std::vector<common_file_info> fs_list(const std::string & path, bool include_directories);
658
+
659
+ //
660
+ // TTY utils
661
+ //
662
+
663
+ // Auto-detect if colors can be enabled based on terminal and environment
664
+ bool tty_can_use_colors();
647
665
 
648
666
  //
649
667
  // Model utils