@fugood/llama.node 1.4.1 → 1.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/CMakeLists.txt +1 -1
  2. package/lib/binding.js +3 -0
  3. package/lib/binding.ts +2 -0
  4. package/package.json +16 -16
  5. package/scripts/llama.cpp.patch +25 -11
  6. package/src/LlamaContext.cpp +2 -2
  7. package/src/llama.cpp/CMakeLists.txt +21 -6
  8. package/src/llama.cpp/common/CMakeLists.txt +6 -0
  9. package/src/llama.cpp/common/arg.cpp +65 -16
  10. package/src/llama.cpp/common/chat-parser.cpp +40 -0
  11. package/src/llama.cpp/common/chat-peg-parser.cpp +110 -0
  12. package/src/llama.cpp/common/chat-peg-parser.h +105 -0
  13. package/src/llama.cpp/common/chat.cpp +40 -29
  14. package/src/llama.cpp/common/chat.h +10 -1
  15. package/src/llama.cpp/common/common.cpp +24 -5
  16. package/src/llama.cpp/common/common.h +16 -5
  17. package/src/llama.cpp/common/download.cpp +18 -8
  18. package/src/llama.cpp/common/download.h +3 -1
  19. package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
  20. package/src/llama.cpp/common/log.cpp +15 -1
  21. package/src/llama.cpp/common/log.h +19 -12
  22. package/src/llama.cpp/common/peg-parser.cpp +1712 -0
  23. package/src/llama.cpp/common/peg-parser.h +459 -0
  24. package/src/llama.cpp/common/unicode.cpp +64 -0
  25. package/src/llama.cpp/common/unicode.h +22 -0
  26. package/src/llama.cpp/ggml/CMakeLists.txt +48 -48
  27. package/src/llama.cpp/ggml/include/ggml.h +7 -2
  28. package/src/llama.cpp/ggml/src/CMakeLists.txt +0 -4
  29. package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +4 -0
  30. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +10 -13
  31. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +60 -1
  32. package/src/llama.cpp/src/CMakeLists.txt +1 -0
  33. package/src/llama.cpp/src/llama-arch.cpp +30 -1
  34. package/src/llama.cpp/src/llama-arch.h +3 -0
  35. package/src/llama.cpp/src/llama-graph.cpp +3 -6
  36. package/src/llama.cpp/src/llama-hparams.h +2 -2
  37. package/src/llama.cpp/src/llama-impl.h +1 -1
  38. package/src/llama.cpp/src/llama-mmap.cpp +1 -1
  39. package/src/llama.cpp/src/llama-model.cpp +50 -6
  40. package/src/llama.cpp/src/llama-vocab.cpp +1 -2
  41. package/src/llama.cpp/src/models/mistral3.cpp +160 -0
  42. package/src/llama.cpp/src/models/models.h +4 -0
@@ -0,0 +1,105 @@
1
+ #pragma once
2
+
3
+ #include "chat.h"
4
+ #include "peg-parser.h"
5
+
6
+ class common_chat_peg_builder : public common_peg_parser_builder {
7
+ public:
8
+ static constexpr const char * REASONING_BLOCK = "reasoning-block";
9
+ static constexpr const char * REASONING = "reasoning";
10
+ static constexpr const char * CONTENT = "content";
11
+
12
+ common_peg_parser reasoning_block(const common_peg_parser & p) { return tag(REASONING_BLOCK, p); }
13
+ common_peg_parser reasoning(const common_peg_parser & p) { return tag(REASONING, p); }
14
+ common_peg_parser content(const common_peg_parser & p) { return tag(CONTENT, p); }
15
+ };
16
+
17
+ inline common_peg_arena build_chat_peg_parser(const std::function<common_peg_parser(common_chat_peg_builder & builder)> & fn) {
18
+ common_chat_peg_builder builder;
19
+ builder.set_root(fn(builder));
20
+ return builder.build();
21
+ }
22
+
23
+ class common_chat_peg_mapper {
24
+ public:
25
+ common_chat_msg & result;
26
+
27
+ common_chat_peg_mapper(common_chat_msg & msg) : result(msg) {}
28
+
29
+ virtual void from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result);
30
+ virtual void map(const common_peg_ast_node & node);
31
+ };
32
+
33
+ class common_chat_peg_native_builder : public common_chat_peg_builder {
34
+ public:
35
+ static constexpr const char * TOOL = "tool";
36
+ static constexpr const char * TOOL_OPEN = "tool-open";
37
+ static constexpr const char * TOOL_CLOSE = "tool-close";
38
+ static constexpr const char * TOOL_ID = "tool-id";
39
+ static constexpr const char * TOOL_NAME = "tool-name";
40
+ static constexpr const char * TOOL_ARGS = "tool-args";
41
+
42
+ common_peg_parser tool(const common_peg_parser & p) { return tag(TOOL, p); }
43
+ common_peg_parser tool_open(const common_peg_parser & p) { return atomic(tag(TOOL_OPEN, p)); }
44
+ common_peg_parser tool_close(const common_peg_parser & p) { return atomic(tag(TOOL_CLOSE, p)); }
45
+ common_peg_parser tool_id(const common_peg_parser & p) { return atomic(tag(TOOL_ID, p)); }
46
+ common_peg_parser tool_name(const common_peg_parser & p) { return atomic(tag(TOOL_NAME, p)); }
47
+ common_peg_parser tool_args(const common_peg_parser & p) { return tag(TOOL_ARGS, p); }
48
+ };
49
+
50
+ class common_chat_peg_native_mapper : public common_chat_peg_mapper {
51
+ common_chat_tool_call * current_tool;
52
+
53
+ public:
54
+ common_chat_peg_native_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {}
55
+
56
+ void map(const common_peg_ast_node & node) override;
57
+ };
58
+
59
+ inline common_peg_arena build_chat_peg_native_parser(const std::function<common_peg_parser(common_chat_peg_native_builder & builder)> & fn) {
60
+ common_chat_peg_native_builder builder;
61
+ builder.set_root(fn(builder));
62
+ return builder.build();
63
+ }
64
+
65
+ class common_chat_peg_constructed_builder : public common_chat_peg_builder {
66
+ public:
67
+ static constexpr const char * TOOL = "tool";
68
+ static constexpr const char * TOOL_OPEN = "tool-open";
69
+ static constexpr const char * TOOL_CLOSE = "tool-close";
70
+ static constexpr const char * TOOL_NAME = "tool-name";
71
+ static constexpr const char * TOOL_ARG = "tool-arg";
72
+ static constexpr const char * TOOL_ARG_OPEN = "tool-arg-open";
73
+ static constexpr const char * TOOL_ARG_CLOSE = "tool-arg-close";
74
+ static constexpr const char * TOOL_ARG_NAME = "tool-arg-name";
75
+ static constexpr const char * TOOL_ARG_STRING_VALUE = "tool-arg-string-value";
76
+ static constexpr const char * TOOL_ARG_JSON_VALUE = "tool-arg-json-value";
77
+
78
+ common_peg_parser tool(const common_peg_parser & p) { return tag(TOOL, p); }
79
+ common_peg_parser tool_open(const common_peg_parser & p) { return atomic(tag(TOOL_OPEN, p)); }
80
+ common_peg_parser tool_close(const common_peg_parser & p) { return atomic(tag(TOOL_CLOSE, p)); }
81
+ common_peg_parser tool_name(const common_peg_parser & p) { return atomic(tag(TOOL_NAME, p)); }
82
+ common_peg_parser tool_arg(const common_peg_parser & p) { return tag(TOOL_ARG, p); }
83
+ common_peg_parser tool_arg_open(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_OPEN, p)); }
84
+ common_peg_parser tool_arg_close(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_CLOSE, p)); }
85
+ common_peg_parser tool_arg_name(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_NAME, p)); }
86
+ common_peg_parser tool_arg_string_value(const common_peg_parser & p) { return tag(TOOL_ARG_STRING_VALUE, p); }
87
+ common_peg_parser tool_arg_json_value(const common_peg_parser & p) { return tag(TOOL_ARG_JSON_VALUE, p); }
88
+ };
89
+
90
+ class common_chat_peg_constructed_mapper : public common_chat_peg_mapper {
91
+ common_chat_tool_call * current_tool;
92
+ int arg_count = 0;
93
+ bool needs_closing_quote = false;
94
+
95
+ public:
96
+ common_chat_peg_constructed_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {}
97
+
98
+ void map(const common_peg_ast_node & node) override;
99
+ };
100
+
101
+ inline common_peg_arena build_chat_peg_constructed_parser(const std::function<common_peg_parser(common_chat_peg_constructed_builder & builder)> & fn) {
102
+ common_chat_peg_constructed_builder builder;
103
+ builder.set_root(fn(builder));
104
+ return builder.build();
105
+ }
@@ -82,29 +82,36 @@ json common_chat_msg::to_json_oaicompat() const
82
82
  return message;
83
83
  }
84
84
 
85
- std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const common_chat_msg & previous_msg, const common_chat_msg & new_msg) {
85
+ std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const common_chat_msg & msg_prv, const common_chat_msg & msg_new) {
86
86
  std::vector<common_chat_msg_diff> diffs;
87
- if (previous_msg.reasoning_content != new_msg.reasoning_content) {
87
+ if (msg_new.tool_calls.size() > msg_prv.tool_calls.size()) {
88
+ diffs.reserve(msg_new.tool_calls.size() - msg_prv.tool_calls.size() + 3);
89
+ } else {
90
+ diffs.reserve(3);
91
+ }
92
+
93
+ // TODO: these can become expensive for long messages - how to optimize?
94
+ if (msg_prv.reasoning_content != msg_new.reasoning_content) {
88
95
  auto & diff = diffs.emplace_back();
89
- diff.reasoning_content_delta = string_diff(previous_msg.reasoning_content, new_msg.reasoning_content);
96
+ diff.reasoning_content_delta = string_diff(msg_prv.reasoning_content, msg_new.reasoning_content);
90
97
  }
91
- if (previous_msg.content != new_msg.content) {
98
+ if (msg_prv.content != msg_new.content) {
92
99
  auto & diff = diffs.emplace_back();
93
- diff.content_delta = string_diff(previous_msg.content, new_msg.content);
100
+ diff.content_delta = string_diff(msg_prv.content, msg_new.content);
94
101
  }
95
102
 
96
- if (new_msg.tool_calls.size() < previous_msg.tool_calls.size()) {
103
+ if (msg_new.tool_calls.size() < msg_prv.tool_calls.size()) {
97
104
  throw std::runtime_error("Invalid diff: now finding less tool calls!");
98
105
  }
99
106
 
100
- if (!previous_msg.tool_calls.empty()) {
101
- auto idx = previous_msg.tool_calls.size() - 1;
102
- const auto & pref = previous_msg.tool_calls[idx];
103
- const auto & newf = new_msg.tool_calls[idx];
107
+ if (!msg_prv.tool_calls.empty()) {
108
+ const auto idx = msg_prv.tool_calls.size() - 1;
109
+ const auto & pref = msg_prv.tool_calls[idx];
110
+ const auto & newf = msg_new.tool_calls[idx];
104
111
  if (pref.name != newf.name) {
105
112
  throw std::runtime_error("Invalid diff: tool call mismatch!");
106
113
  }
107
- auto args_diff = string_diff(pref.arguments, newf.arguments);
114
+ const auto args_diff = string_diff(pref.arguments, newf.arguments);
108
115
  if (!args_diff.empty() || pref.id != newf.id) {
109
116
  auto & diff = diffs.emplace_back();
110
117
  diff.tool_call_index = idx;
@@ -115,11 +122,12 @@ std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const comm
115
122
  diff.tool_call_delta.arguments = args_diff;
116
123
  }
117
124
  }
118
- for (size_t idx = previous_msg.tool_calls.size(); idx < new_msg.tool_calls.size(); ++idx) {
125
+ for (size_t idx = msg_prv.tool_calls.size(); idx < msg_new.tool_calls.size(); ++idx) {
119
126
  auto & diff = diffs.emplace_back();
120
127
  diff.tool_call_index = idx;
121
- diff.tool_call_delta = new_msg.tool_calls[idx];
128
+ diff.tool_call_delta = msg_new.tool_calls[idx];
122
129
  }
130
+
123
131
  return diffs;
124
132
  }
125
133
 
@@ -150,7 +158,7 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin
150
158
  if (tool_choice == "required") {
151
159
  return COMMON_CHAT_TOOL_CHOICE_REQUIRED;
152
160
  }
153
- throw std::runtime_error("Invalid tool_choice: " + tool_choice);
161
+ throw std::invalid_argument("Invalid tool_choice: " + tool_choice);
154
162
  }
155
163
 
156
164
  bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates) {
@@ -173,17 +181,17 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
173
181
  try {
174
182
 
175
183
  if (!messages.is_array()) {
176
- throw std::runtime_error("Expected 'messages' to be an array, got " + messages.dump());
184
+ throw std::invalid_argument("Expected 'messages' to be an array, got " + messages.dump());
177
185
  }
178
186
 
179
187
  for (const auto & message : messages) {
180
188
  if (!message.is_object()) {
181
- throw std::runtime_error("Expected 'message' to be an object, got " + message.dump());
189
+ throw std::invalid_argument("Expected 'message' to be an object, got " + message.dump());
182
190
  }
183
191
 
184
192
  common_chat_msg msg;
185
193
  if (!message.contains("role")) {
186
- throw std::runtime_error("Missing 'role' in message: " + message.dump());
194
+ throw std::invalid_argument("Missing 'role' in message: " + message.dump());
187
195
  }
188
196
  msg.role = message.at("role");
189
197
 
@@ -196,11 +204,11 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
196
204
  } else if (content.is_array()) {
197
205
  for (const auto & part : content) {
198
206
  if (!part.contains("type")) {
199
- throw std::runtime_error("Missing content part type: " + part.dump());
207
+ throw std::invalid_argument("Missing content part type: " + part.dump());
200
208
  }
201
209
  const auto & type = part.at("type");
202
210
  if (type != "text") {
203
- throw std::runtime_error("Unsupported content part type: " + type.dump());
211
+ throw std::invalid_argument("Unsupported content part type: " + type.dump());
204
212
  }
205
213
  common_chat_msg_content_part msg_part;
206
214
  msg_part.type = type;
@@ -208,25 +216,25 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
208
216
  msg.content_parts.push_back(msg_part);
209
217
  }
210
218
  } else if (!content.is_null()) {
211
- throw std::runtime_error("Invalid 'content' type: expected string or array, got " + content.dump() + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)");
219
+ throw std::invalid_argument("Invalid 'content' type: expected string or array, got " + content.dump() + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)");
212
220
  }
213
221
  }
214
222
  if (has_tool_calls) {
215
223
  for (const auto & tool_call : message.at("tool_calls")) {
216
224
  common_chat_tool_call tc;
217
225
  if (!tool_call.contains("type")) {
218
- throw std::runtime_error("Missing tool call type: " + tool_call.dump());
226
+ throw std::invalid_argument("Missing tool call type: " + tool_call.dump());
219
227
  }
220
228
  const auto & type = tool_call.at("type");
221
229
  if (type != "function") {
222
- throw std::runtime_error("Unsupported tool call type: " + tool_call.dump());
230
+ throw std::invalid_argument("Unsupported tool call type: " + tool_call.dump());
223
231
  }
224
232
  if (!tool_call.contains("function")) {
225
- throw std::runtime_error("Missing tool call function: " + tool_call.dump());
233
+ throw std::invalid_argument("Missing tool call function: " + tool_call.dump());
226
234
  }
227
235
  const auto & fc = tool_call.at("function");
228
236
  if (!fc.contains("name")) {
229
- throw std::runtime_error("Missing tool call name: " + tool_call.dump());
237
+ throw std::invalid_argument("Missing tool call name: " + tool_call.dump());
230
238
  }
231
239
  tc.name = fc.at("name");
232
240
  tc.arguments = fc.at("arguments");
@@ -237,7 +245,7 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
237
245
  }
238
246
  }
239
247
  if (!has_content && !has_tool_calls) {
240
- throw std::runtime_error("Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & https://github.com/ggml-org/llama.cpp/issues/12279)");
248
+ throw std::invalid_argument("Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & https://github.com/ggml-org/llama.cpp/issues/12279)");
241
249
  }
242
250
  if (message.contains("reasoning_content")) {
243
251
  msg.reasoning_content = message.at("reasoning_content");
@@ -340,18 +348,18 @@ std::vector<common_chat_tool> common_chat_tools_parse_oaicompat(const json & too
340
348
  try {
341
349
  if (!tools.is_null()) {
342
350
  if (!tools.is_array()) {
343
- throw std::runtime_error("Expected 'tools' to be an array, got " + tools.dump());
351
+ throw std::invalid_argument("Expected 'tools' to be an array, got " + tools.dump());
344
352
  }
345
353
  for (const auto & tool : tools) {
346
354
  if (!tool.contains("type")) {
347
- throw std::runtime_error("Missing tool type: " + tool.dump());
355
+ throw std::invalid_argument("Missing tool type: " + tool.dump());
348
356
  }
349
357
  const auto & type = tool.at("type");
350
358
  if (!type.is_string() || type != "function") {
351
- throw std::runtime_error("Unsupported tool type: " + tool.dump());
359
+ throw std::invalid_argument("Unsupported tool type: " + tool.dump());
352
360
  }
353
361
  if (!tool.contains("function")) {
354
- throw std::runtime_error("Missing tool function: " + tool.dump());
362
+ throw std::invalid_argument("Missing tool function: " + tool.dump());
355
363
  }
356
364
 
357
365
  const auto & function = tool.at("function");
@@ -636,6 +644,9 @@ const char * common_chat_format_name(common_chat_format format) {
636
644
  case COMMON_CHAT_FORMAT_QWEN3_CODER_XML: return "Qwen3 Coder";
637
645
  case COMMON_CHAT_FORMAT_APRIEL_1_5: return "Apriel 1.5";
638
646
  case COMMON_CHAT_FORMAT_XIAOMI_MIMO: return "Xiaomi MiMo";
647
+ case COMMON_CHAT_FORMAT_PEG_SIMPLE: return "peg-simple";
648
+ case COMMON_CHAT_FORMAT_PEG_NATIVE: return "peg-native";
649
+ case COMMON_CHAT_FORMAT_PEG_CONSTRUCTED: return "peg-constructed";
639
650
  default:
640
651
  throw std::runtime_error("Unknown chat format");
641
652
  }
@@ -3,6 +3,7 @@
3
3
  #pragma once
4
4
 
5
5
  #include "common.h"
6
+ #include "peg-parser.h"
6
7
  #include <functional>
7
8
  #include <chrono>
8
9
  #include <string>
@@ -87,7 +88,7 @@ struct common_chat_msg_diff {
87
88
  size_t tool_call_index = std::string::npos;
88
89
  common_chat_tool_call tool_call_delta;
89
90
 
90
- static std::vector<common_chat_msg_diff> compute_diffs(const common_chat_msg & previous_msg, const common_chat_msg & new_msg);
91
+ static std::vector<common_chat_msg_diff> compute_diffs(const common_chat_msg & msg_prv, const common_chat_msg & msg_new);
91
92
 
92
93
  bool operator==(const common_chat_msg_diff & other) const {
93
94
  return content_delta == other.content_delta
@@ -135,6 +136,11 @@ enum common_chat_format {
135
136
  COMMON_CHAT_FORMAT_APRIEL_1_5,
136
137
  COMMON_CHAT_FORMAT_XIAOMI_MIMO,
137
138
 
139
+ // These are intended to be parsed by the PEG parser
140
+ COMMON_CHAT_FORMAT_PEG_SIMPLE,
141
+ COMMON_CHAT_FORMAT_PEG_NATIVE,
142
+ COMMON_CHAT_FORMAT_PEG_CONSTRUCTED,
143
+
138
144
  COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
139
145
  };
140
146
 
@@ -165,6 +171,7 @@ struct common_chat_params {
165
171
  std::vector<common_grammar_trigger> grammar_triggers;
166
172
  std::vector<std::string> preserved_tokens;
167
173
  std::vector<std::string> additional_stops;
174
+ std::string parser;
168
175
  };
169
176
 
170
177
  struct common_chat_syntax {
@@ -174,6 +181,7 @@ struct common_chat_syntax {
174
181
  bool reasoning_in_content = false;
175
182
  bool thinking_forced_open = false;
176
183
  bool parse_tool_calls = true;
184
+ common_peg_arena parser = {};
177
185
  };
178
186
 
179
187
  // Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
@@ -217,6 +225,7 @@ const char* common_chat_format_name(common_chat_format format);
217
225
  const char* common_reasoning_format_name(common_reasoning_format format);
218
226
  common_reasoning_format common_reasoning_format_from_name(const std::string & format);
219
227
  common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax);
228
+ common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_syntax & syntax);
220
229
 
221
230
  common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);
222
231
 
@@ -694,7 +694,7 @@ bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_over
694
694
 
695
695
  // Validate if a filename is safe to use
696
696
  // To validate a full path, split the path by the OS-specific path separator, and validate each part with this function
697
- bool fs_validate_filename(const std::string & filename) {
697
+ bool fs_validate_filename(const std::string & filename, bool allow_subdirs) {
698
698
  if (!filename.length()) {
699
699
  // Empty filename invalid
700
700
  return false;
@@ -754,10 +754,14 @@ bool fs_validate_filename(const std::string & filename) {
754
754
  || (c >= 0xD800 && c <= 0xDFFF) // UTF-16 surrogate pairs
755
755
  || c == 0xFFFD // Replacement Character (UTF-8)
756
756
  || c == 0xFEFF // Byte Order Mark (BOM)
757
- || c == '/' || c == '\\' || c == ':' || c == '*' // Illegal characters
757
+ || c == ':' || c == '*' // Illegal characters
758
758
  || c == '?' || c == '"' || c == '<' || c == '>' || c == '|') {
759
759
  return false;
760
760
  }
761
+ if (!allow_subdirs && (c == '/' || c == '\\')) {
762
+ // Subdirectories not allowed, reject path separators
763
+ return false;
764
+ }
761
765
  }
762
766
 
763
767
  // Reject any leading or trailing ' ', or any trailing '.', these are stripped on Windows and will cause a different filename
@@ -859,6 +863,11 @@ bool fs_create_directory_with_parents(const std::string & path) {
859
863
  #endif // _WIN32
860
864
  }
861
865
 
866
+ bool fs_is_directory(const std::string & path) {
867
+ std::filesystem::path dir(path);
868
+ return std::filesystem::exists(dir) && std::filesystem::is_directory(dir);
869
+ }
870
+
862
871
  std::string fs_get_cache_directory() {
863
872
  std::string cache_directory = "";
864
873
  auto ensure_trailing_slash = [](std::string p) {
@@ -893,6 +902,8 @@ std::string fs_get_cache_directory() {
893
902
  cache_directory = std::getenv("HOME") + std::string("/Library/Caches/");
894
903
  #elif defined(_WIN32)
895
904
  cache_directory = std::getenv("LOCALAPPDATA");
905
+ #elif defined(__EMSCRIPTEN__)
906
+ GGML_ABORT("not implemented on this platform");
896
907
  #else
897
908
  # error Unknown architecture
898
909
  #endif
@@ -912,7 +923,7 @@ std::string fs_get_cache_file(const std::string & filename) {
912
923
  return cache_directory + filename;
913
924
  }
914
925
 
915
- std::vector<common_file_info> fs_list_files(const std::string & path) {
926
+ std::vector<common_file_info> fs_list(const std::string & path, bool include_directories) {
916
927
  std::vector<common_file_info> files;
917
928
  if (path.empty()) return files;
918
929
 
@@ -927,14 +938,22 @@ std::vector<common_file_info> fs_list_files(const std::string & path) {
927
938
  const auto & p = entry.path();
928
939
  if (std::filesystem::is_regular_file(p)) {
929
940
  common_file_info info;
930
- info.path = p.string();
931
- info.name = p.filename().string();
941
+ info.path = p.string();
942
+ info.name = p.filename().string();
943
+ info.is_dir = false;
932
944
  try {
933
945
  info.size = static_cast<size_t>(std::filesystem::file_size(p));
934
946
  } catch (const std::filesystem::filesystem_error &) {
935
947
  info.size = 0;
936
948
  }
937
949
  files.push_back(std::move(info));
950
+ } else if (include_directories && std::filesystem::is_directory(p)) {
951
+ common_file_info info;
952
+ info.path = p.string();
953
+ info.name = p.filename().string();
954
+ info.size = 0; // Directories have no size
955
+ info.is_dir = true;
956
+ files.push_back(std::move(info));
938
957
  }
939
958
  } catch (const std::filesystem::filesystem_error &) {
940
959
  // skip entries we cannot inspect
@@ -12,6 +12,10 @@
12
12
  #include <vector>
13
13
  #include <map>
14
14
 
15
+ #if defined(_WIN32) && !defined(_WIN32_WINNT)
16
+ #define _WIN32_WINNT 0x0A00
17
+ #endif
18
+
15
19
  #ifdef _WIN32
16
20
  #define DIRECTORY_SEPARATOR '\\'
17
21
  #else
@@ -26,8 +30,6 @@
26
30
  fprintf(stderr, "%s: built with %s for %s\n", __func__, LLAMA_COMPILER, LLAMA_BUILD_TARGET); \
27
31
  } while(0)
28
32
 
29
- #define DEFAULT_MODEL_PATH "models/7B/ggml-model-f16.gguf"
30
-
31
33
  struct common_time_meas {
32
34
  common_time_meas(int64_t & t_acc, bool disable = false);
33
35
  ~common_time_meas();
@@ -223,6 +225,7 @@ struct common_params_model {
223
225
  std::string hf_repo = ""; // HF repo // NOLINT
224
226
  std::string hf_file = ""; // HF file // NOLINT
225
227
  std::string docker_repo = ""; // Docker repo // NOLINT
228
+ std::string name = ""; // in format <user>/<model>[:<tag>] (tag is optional) // NOLINT
226
229
  };
227
230
 
228
231
  struct common_params_speculative {
@@ -370,7 +373,7 @@ struct common_params {
370
373
 
371
374
  std::vector<common_control_vector_load_info> control_vectors; // control vector with user defined scale
372
375
 
373
- int32_t verbosity = 0;
376
+ int32_t verbosity = 3; // LOG_LEVEL_INFO
374
377
  int32_t control_vector_layer_start = -1; // layer range for control vector
375
378
  int32_t control_vector_layer_end = -1; // layer range for control vector
376
379
  bool offline = false;
@@ -479,9 +482,15 @@ struct common_params {
479
482
  bool endpoint_props = false; // only control POST requests, not GET
480
483
  bool endpoint_metrics = false;
481
484
 
485
+ // router server configs
486
+ std::string models_dir = ""; // directory containing models for the router server
487
+ int models_max = 4; // maximum number of models to load simultaneously
488
+ bool models_autoload = true; // automatically load models when requested via the router server
489
+
482
490
  bool log_json = false;
483
491
 
484
492
  std::string slot_save_path;
493
+ std::string media_path; // path to directory for loading media files
485
494
 
486
495
  float slot_prompt_similarity = 0.1f;
487
496
 
@@ -632,8 +641,9 @@ std::string string_from(const struct llama_context * ctx, const struct llama_bat
632
641
  // Filesystem utils
633
642
  //
634
643
 
635
- bool fs_validate_filename(const std::string & filename);
644
+ bool fs_validate_filename(const std::string & filename, bool allow_subdirs = false);
636
645
  bool fs_create_directory_with_parents(const std::string & path);
646
+ bool fs_is_directory(const std::string & path);
637
647
 
638
648
  std::string fs_get_cache_directory();
639
649
  std::string fs_get_cache_file(const std::string & filename);
@@ -642,8 +652,9 @@ struct common_file_info {
642
652
  std::string path;
643
653
  std::string name;
644
654
  size_t size = 0; // in bytes
655
+ bool is_dir = false;
645
656
  };
646
- std::vector<common_file_info> fs_list_files(const std::string & path);
657
+ std::vector<common_file_info> fs_list(const std::string & path, bool include_directories);
647
658
 
648
659
  //
649
660
  // Model utils
@@ -24,6 +24,7 @@
24
24
  #include "http.h"
25
25
  #endif
26
26
 
27
+ #ifndef __EMSCRIPTEN__
27
28
  #ifdef __linux__
28
29
  #include <linux/limits.h>
29
30
  #elif defined(_WIN32)
@@ -35,6 +36,8 @@
35
36
  #else
36
37
  #include <sys/syslimits.h>
37
38
  #endif
39
+ #endif
40
+
38
41
  #define LLAMA_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
39
42
 
40
43
  // isatty
@@ -430,7 +433,7 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string &
430
433
  curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
431
434
  curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L);
432
435
  curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
433
- curl_easy_setopt(curl.get(), CURLOPT_VERBOSE, 1L);
436
+ curl_easy_setopt(curl.get(), CURLOPT_VERBOSE, 0L);
434
437
  typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
435
438
  auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t {
436
439
  auto data_vec = static_cast<std::vector<char> *>(data);
@@ -517,16 +520,18 @@ static bool common_pull_file(httplib::Client & cli,
517
520
  headers.emplace("Range", "bytes=" + std::to_string(existing_size) + "-");
518
521
  }
519
522
 
520
- std::atomic<size_t> downloaded{existing_size};
523
+ const char * func = __func__; // avoid __func__ inside a lambda
524
+ size_t downloaded = existing_size;
525
+ size_t progress_step = 0;
521
526
 
522
527
  auto res = cli.Get(resolve_path, headers,
523
528
  [&](const httplib::Response &response) {
524
529
  if (existing_size > 0 && response.status != 206) {
525
- LOG_WRN("%s: server did not respond with 206 Partial Content for a resume request. Status: %d\n", __func__, response.status);
530
+ LOG_WRN("%s: server did not respond with 206 Partial Content for a resume request. Status: %d\n", func, response.status);
526
531
  return false;
527
532
  }
528
533
  if (existing_size == 0 && response.status != 200) {
529
- LOG_WRN("%s: download received non-successful status code: %d\n", __func__, response.status);
534
+ LOG_WRN("%s: download received non-successful status code: %d\n", func, response.status);
530
535
  return false;
531
536
  }
532
537
  if (total_size == 0 && response.has_header("Content-Length")) {
@@ -534,7 +539,7 @@ static bool common_pull_file(httplib::Client & cli,
534
539
  size_t content_length = std::stoull(response.get_header_value("Content-Length"));
535
540
  total_size = existing_size + content_length;
536
541
  } catch (const std::exception &e) {
537
- LOG_WRN("%s: invalid Content-Length header: %s\n", __func__, e.what());
542
+ LOG_WRN("%s: invalid Content-Length header: %s\n", func, e.what());
538
543
  }
539
544
  }
540
545
  return true;
@@ -542,11 +547,16 @@ static bool common_pull_file(httplib::Client & cli,
542
547
  [&](const char *data, size_t len) {
543
548
  ofs.write(data, len);
544
549
  if (!ofs) {
545
- LOG_ERR("%s: error writing to file: %s\n", __func__, path_tmp.c_str());
550
+ LOG_ERR("%s: error writing to file: %s\n", func, path_tmp.c_str());
546
551
  return false;
547
552
  }
548
553
  downloaded += len;
549
- print_progress(downloaded, total_size);
554
+ progress_step += len;
555
+
556
+ if (progress_step >= total_size / 1000 || downloaded == total_size) {
557
+ print_progress(downloaded, total_size);
558
+ progress_step = 0;
559
+ }
550
560
  return true;
551
561
  },
552
562
  nullptr
@@ -1047,7 +1057,7 @@ std::string common_docker_resolve_model(const std::string &) {
1047
1057
  std::vector<common_cached_model_info> common_list_cached_models() {
1048
1058
  std::vector<common_cached_model_info> models;
1049
1059
  const std::string cache_dir = fs_get_cache_directory();
1050
- const std::vector<common_file_info> files = fs_list_files(cache_dir);
1060
+ const std::vector<common_file_info> files = fs_list(cache_dir, false);
1051
1061
  for (const auto & file : files) {
1052
1062
  if (string_starts_with(file.name, "manifest=") && string_ends_with(file.name, ".json")) {
1053
1063
  common_cached_model_info model_info;
@@ -14,8 +14,10 @@ struct common_cached_model_info {
14
14
  std::string model;
15
15
  std::string tag;
16
16
  size_t size = 0; // GGUF size in bytes
17
+ // return string representation like "user/model:tag"
18
+ // if tag is "latest", it will be omitted
17
19
  std::string to_string() const {
18
- return user + "/" + model + ":" + tag;
20
+ return user + "/" + model + (tag == "latest" ? "" : ":" + tag);
19
21
  }
20
22
  };
21
23
 
@@ -974,7 +974,7 @@ public:
974
974
 
975
975
  void check_errors() {
976
976
  if (!_errors.empty()) {
977
- throw std::runtime_error("JSON schema conversion failed:\n" + string_join(_errors, "\n"));
977
+ throw std::invalid_argument("JSON schema conversion failed:\n" + string_join(_errors, "\n"));
978
978
  }
979
979
  if (!_warnings.empty()) {
980
980
  fprintf(stderr, "WARNING: JSON schema conversion was incomplete: %s\n", string_join(_warnings, "; ").c_str());
@@ -443,8 +443,22 @@ void common_log_set_timestamps(struct common_log * log, bool timestamps) {
443
443
  log->set_timestamps(timestamps);
444
444
  }
445
445
 
446
+ static int common_get_verbosity(enum ggml_log_level level) {
447
+ switch (level) {
448
+ case GGML_LOG_LEVEL_DEBUG: return LOG_LEVEL_DEBUG;
449
+ case GGML_LOG_LEVEL_INFO: return LOG_LEVEL_INFO;
450
+ case GGML_LOG_LEVEL_WARN: return LOG_LEVEL_WARN;
451
+ case GGML_LOG_LEVEL_ERROR: return LOG_LEVEL_ERROR;
452
+ case GGML_LOG_LEVEL_CONT: return LOG_LEVEL_INFO; // same as INFO
453
+ case GGML_LOG_LEVEL_NONE:
454
+ default:
455
+ return LOG_LEVEL_OUTPUT;
456
+ }
457
+ }
458
+
446
459
  void common_log_default_callback(enum ggml_log_level level, const char * text, void * /*user_data*/) {
447
- if (LOG_DEFAULT_LLAMA <= common_log_verbosity_thold) {
460
+ auto verbosity = common_get_verbosity(level);
461
+ if (verbosity <= common_log_verbosity_thold) {
448
462
  common_log_add(common_log_main(), level, "%s", text);
449
463
  }
450
464
  }