npm - @fugood/llama.node - Versions diffs - 1.4.1 → 1.4.3 - Mend

@fugood/llama.node 1.4.1 → 1.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

package/CMakeLists.txt +1 -1
package/lib/binding.js +3 -0
package/lib/binding.ts +2 -0
package/package.json +16 -16
package/scripts/llama.cpp.patch +25 -11
package/src/LlamaContext.cpp +2 -2
package/src/llama.cpp/CMakeLists.txt +21 -6
package/src/llama.cpp/common/CMakeLists.txt +6 -0
package/src/llama.cpp/common/arg.cpp +65 -16
package/src/llama.cpp/common/chat-parser.cpp +40 -0
package/src/llama.cpp/common/chat-peg-parser.cpp +110 -0
package/src/llama.cpp/common/chat-peg-parser.h +105 -0
package/src/llama.cpp/common/chat.cpp +40 -29
package/src/llama.cpp/common/chat.h +10 -1
package/src/llama.cpp/common/common.cpp +24 -5
package/src/llama.cpp/common/common.h +16 -5
package/src/llama.cpp/common/download.cpp +18 -8
package/src/llama.cpp/common/download.h +3 -1
package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
package/src/llama.cpp/common/log.cpp +15 -1
package/src/llama.cpp/common/log.h +19 -12
package/src/llama.cpp/common/peg-parser.cpp +1712 -0
package/src/llama.cpp/common/peg-parser.h +459 -0
package/src/llama.cpp/common/unicode.cpp +64 -0
package/src/llama.cpp/common/unicode.h +22 -0
package/src/llama.cpp/ggml/CMakeLists.txt +48 -48
package/src/llama.cpp/ggml/include/ggml.h +7 -2
package/src/llama.cpp/ggml/src/CMakeLists.txt +0 -4
package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +4 -0
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +10 -13
package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +60 -1
package/src/llama.cpp/src/CMakeLists.txt +1 -0
package/src/llama.cpp/src/llama-arch.cpp +30 -1
package/src/llama.cpp/src/llama-arch.h +3 -0
package/src/llama.cpp/src/llama-graph.cpp +3 -6
package/src/llama.cpp/src/llama-hparams.h +2 -2
package/src/llama.cpp/src/llama-impl.h +1 -1
package/src/llama.cpp/src/llama-mmap.cpp +1 -1
package/src/llama.cpp/src/llama-model.cpp +50 -6
package/src/llama.cpp/src/llama-vocab.cpp +1 -2
package/src/llama.cpp/src/models/mistral3.cpp +160 -0
package/src/llama.cpp/src/models/models.h +4 -0

package/src/llama.cpp/common/chat-peg-parser.h ADDED Viewed

@@ -0,0 +1,105 @@
+#pragma once
+#include "chat.h"
+#include "peg-parser.h"
+class common_chat_peg_builder : public common_peg_parser_builder {
+  public:
+    static constexpr const char * REASONING_BLOCK = "reasoning-block";
+    static constexpr const char * REASONING = "reasoning";
+    static constexpr const char * CONTENT = "content";
+    common_peg_parser reasoning_block(const common_peg_parser & p) { return tag(REASONING_BLOCK, p); }
+    common_peg_parser reasoning(const common_peg_parser & p) { return tag(REASONING, p); }
+    common_peg_parser content(const common_peg_parser & p) { return tag(CONTENT, p); }
+};
+inline common_peg_arena build_chat_peg_parser(const std::function<common_peg_parser(common_chat_peg_builder & builder)> & fn) {
+    common_chat_peg_builder builder;
+    builder.set_root(fn(builder));
+    return builder.build();
+}
+class common_chat_peg_mapper {
+  public:
+    common_chat_msg & result;
+    common_chat_peg_mapper(common_chat_msg & msg) : result(msg) {}
+    virtual void from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result);
+    virtual void map(const common_peg_ast_node & node);
+};
+class common_chat_peg_native_builder : public common_chat_peg_builder {
+  public:
+    static constexpr const char * TOOL = "tool";
+    static constexpr const char * TOOL_OPEN = "tool-open";
+    static constexpr const char * TOOL_CLOSE = "tool-close";
+    static constexpr const char * TOOL_ID = "tool-id";
+    static constexpr const char * TOOL_NAME = "tool-name";
+    static constexpr const char * TOOL_ARGS = "tool-args";
+    common_peg_parser tool(const common_peg_parser & p) { return tag(TOOL, p); }
+    common_peg_parser tool_open(const common_peg_parser & p) { return atomic(tag(TOOL_OPEN, p)); }
+    common_peg_parser tool_close(const common_peg_parser & p) { return atomic(tag(TOOL_CLOSE, p)); }
+    common_peg_parser tool_id(const common_peg_parser & p) { return atomic(tag(TOOL_ID, p)); }
+    common_peg_parser tool_name(const common_peg_parser & p) { return atomic(tag(TOOL_NAME, p)); }
+    common_peg_parser tool_args(const common_peg_parser & p) { return tag(TOOL_ARGS, p); }
+};
+class common_chat_peg_native_mapper : public common_chat_peg_mapper {
+    common_chat_tool_call * current_tool;
+  public:
+    common_chat_peg_native_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {}
+    void map(const common_peg_ast_node & node) override;
+};
+inline common_peg_arena build_chat_peg_native_parser(const std::function<common_peg_parser(common_chat_peg_native_builder & builder)> & fn) {
+    common_chat_peg_native_builder builder;
+    builder.set_root(fn(builder));
+    return builder.build();
+}
+class common_chat_peg_constructed_builder : public common_chat_peg_builder {
+  public:
+    static constexpr const char * TOOL = "tool";
+    static constexpr const char * TOOL_OPEN = "tool-open";
+    static constexpr const char * TOOL_CLOSE = "tool-close";
+    static constexpr const char * TOOL_NAME = "tool-name";
+    static constexpr const char * TOOL_ARG = "tool-arg";
+    static constexpr const char * TOOL_ARG_OPEN = "tool-arg-open";
+    static constexpr const char * TOOL_ARG_CLOSE = "tool-arg-close";
+    static constexpr const char * TOOL_ARG_NAME = "tool-arg-name";
+    static constexpr const char * TOOL_ARG_STRING_VALUE = "tool-arg-string-value";
+    static constexpr const char * TOOL_ARG_JSON_VALUE = "tool-arg-json-value";
+    common_peg_parser tool(const common_peg_parser & p) { return tag(TOOL, p); }
+    common_peg_parser tool_open(const common_peg_parser & p) { return atomic(tag(TOOL_OPEN, p)); }
+    common_peg_parser tool_close(const common_peg_parser & p) { return atomic(tag(TOOL_CLOSE, p)); }
+    common_peg_parser tool_name(const common_peg_parser & p) { return atomic(tag(TOOL_NAME, p)); }
+    common_peg_parser tool_arg(const common_peg_parser & p) { return tag(TOOL_ARG, p); }
+    common_peg_parser tool_arg_open(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_OPEN, p)); }
+    common_peg_parser tool_arg_close(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_CLOSE, p)); }
+    common_peg_parser tool_arg_name(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_NAME, p)); }
+    common_peg_parser tool_arg_string_value(const common_peg_parser & p) { return tag(TOOL_ARG_STRING_VALUE, p); }
+    common_peg_parser tool_arg_json_value(const common_peg_parser & p) { return tag(TOOL_ARG_JSON_VALUE, p); }
+};
+class common_chat_peg_constructed_mapper : public common_chat_peg_mapper {
+    common_chat_tool_call * current_tool;
+    int arg_count = 0;
+    bool needs_closing_quote = false;
+  public:
+    common_chat_peg_constructed_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {}
+    void map(const common_peg_ast_node & node) override;
+};
+inline common_peg_arena build_chat_peg_constructed_parser(const std::function<common_peg_parser(common_chat_peg_constructed_builder & builder)> & fn) {
+    common_chat_peg_constructed_builder builder;
+    builder.set_root(fn(builder));
+    return builder.build();
+}

package/src/llama.cpp/common/chat.cpp CHANGED Viewed

@@ -82,29 +82,36 @@ json common_chat_msg::to_json_oaicompat() const
     return message;
 }
-std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const common_chat_msg & previous_msg, const common_chat_msg & new_msg) {
+std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const common_chat_msg & msg_prv, const common_chat_msg & msg_new) {
     std::vector<common_chat_msg_diff> diffs;
-    if (previous_msg.reasoning_content != new_msg.reasoning_content) {
+    if (msg_new.tool_calls.size() > msg_prv.tool_calls.size()) {
+        diffs.reserve(msg_new.tool_calls.size() - msg_prv.tool_calls.size() + 3);
+    } else {
+        diffs.reserve(3);
+    }
+    // TODO: these can become expensive for long messages - how to optimize?
+    if (msg_prv.reasoning_content != msg_new.reasoning_content) {
         auto & diff = diffs.emplace_back();
-        diff.reasoning_content_delta = string_diff(previous_msg.reasoning_content, new_msg.reasoning_content);
+        diff.reasoning_content_delta = string_diff(msg_prv.reasoning_content, msg_new.reasoning_content);
     }
-    if (previous_msg.content != new_msg.content) {
+    if (msg_prv.content != msg_new.content) {
         auto & diff = diffs.emplace_back();
-        diff.content_delta = string_diff(previous_msg.content, new_msg.content);
+        diff.content_delta = string_diff(msg_prv.content, msg_new.content);
     }
-    if (new_msg.tool_calls.size() < previous_msg.tool_calls.size()) {
+    if (msg_new.tool_calls.size() < msg_prv.tool_calls.size()) {
         throw std::runtime_error("Invalid diff: now finding less tool calls!");
     }
-    if (!previous_msg.tool_calls.empty()) {
-        auto idx = previous_msg.tool_calls.size() - 1;
-        const auto & pref = previous_msg.tool_calls[idx];
-        const auto & newf = new_msg.tool_calls[idx];
+    if (!msg_prv.tool_calls.empty()) {
+        const auto idx = msg_prv.tool_calls.size() - 1;
+        const auto & pref = msg_prv.tool_calls[idx];
+        const auto & newf = msg_new.tool_calls[idx];
         if (pref.name != newf.name) {
             throw std::runtime_error("Invalid diff: tool call mismatch!");
         }
-        auto args_diff = string_diff(pref.arguments, newf.arguments);
+        const auto args_diff = string_diff(pref.arguments, newf.arguments);
         if (!args_diff.empty() || pref.id != newf.id) {
             auto & diff = diffs.emplace_back();
             diff.tool_call_index = idx;
@@ -115,11 +122,12 @@ std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const comm
             diff.tool_call_delta.arguments = args_diff;
         }
     }
-    for (size_t idx = previous_msg.tool_calls.size(); idx < new_msg.tool_calls.size(); ++idx) {
+    for (size_t idx = msg_prv.tool_calls.size(); idx < msg_new.tool_calls.size(); ++idx) {
         auto & diff = diffs.emplace_back();
         diff.tool_call_index = idx;
-        diff.tool_call_delta = new_msg.tool_calls[idx];
+        diff.tool_call_delta = msg_new.tool_calls[idx];
     }
     return diffs;
 }
@@ -150,7 +158,7 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin
     if (tool_choice == "required") {
         return COMMON_CHAT_TOOL_CHOICE_REQUIRED;
     }
-    throw std::runtime_error("Invalid tool_choice: " + tool_choice);
+    throw std::invalid_argument("Invalid tool_choice: " + tool_choice);
 }
 bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates) {
@@ -173,17 +181,17 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
     try {
         if (!messages.is_array()) {
-            throw std::runtime_error("Expected 'messages' to be an array, got " + messages.dump());
+            throw std::invalid_argument("Expected 'messages' to be an array, got " + messages.dump());
         }
         for (const auto & message : messages) {
             if (!message.is_object()) {
-                throw std::runtime_error("Expected 'message' to be an object, got " + message.dump());
+                throw std::invalid_argument("Expected 'message' to be an object, got " + message.dump());
             }
             common_chat_msg msg;
             if (!message.contains("role")) {
-                throw std::runtime_error("Missing 'role' in message: " + message.dump());
+                throw std::invalid_argument("Missing 'role' in message: " + message.dump());
             }
             msg.role = message.at("role");
@@ -196,11 +204,11 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
                 } else if (content.is_array()) {
                     for (const auto & part : content) {
                         if (!part.contains("type")) {
-                            throw std::runtime_error("Missing content part type: " + part.dump());
+                            throw std::invalid_argument("Missing content part type: " + part.dump());
                         }
                         const auto & type = part.at("type");
                         if (type != "text") {
-                            throw std::runtime_error("Unsupported content part type: " + type.dump());
+                            throw std::invalid_argument("Unsupported content part type: " + type.dump());
                         }
                         common_chat_msg_content_part msg_part;
                         msg_part.type = type;
@@ -208,25 +216,25 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
                         msg.content_parts.push_back(msg_part);
                     }
                 } else if (!content.is_null()) {
-                    throw std::runtime_error("Invalid 'content' type: expected string or array, got " + content.dump() + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)");
+                    throw std::invalid_argument("Invalid 'content' type: expected string or array, got " + content.dump() + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)");
                 }
             }
             if (has_tool_calls) {
                 for (const auto & tool_call : message.at("tool_calls")) {
                     common_chat_tool_call tc;
                     if (!tool_call.contains("type")) {
-                        throw std::runtime_error("Missing tool call type: " + tool_call.dump());
+                        throw std::invalid_argument("Missing tool call type: " + tool_call.dump());
                     }
                     const auto & type = tool_call.at("type");
                     if (type != "function") {
-                        throw std::runtime_error("Unsupported tool call type: " + tool_call.dump());
+                        throw std::invalid_argument("Unsupported tool call type: " + tool_call.dump());
                     }
                     if (!tool_call.contains("function")) {
-                        throw std::runtime_error("Missing tool call function: " + tool_call.dump());
+                        throw std::invalid_argument("Missing tool call function: " + tool_call.dump());
                     }
                     const auto & fc = tool_call.at("function");
                     if (!fc.contains("name")) {
-                        throw std::runtime_error("Missing tool call name: " + tool_call.dump());
+                        throw std::invalid_argument("Missing tool call name: " + tool_call.dump());
                     }
                     tc.name = fc.at("name");
                     tc.arguments = fc.at("arguments");
@@ -237,7 +245,7 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
                 }
             }
             if (!has_content && !has_tool_calls) {
-                throw std::runtime_error("Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & https://github.com/ggml-org/llama.cpp/issues/12279)");
+                throw std::invalid_argument("Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & https://github.com/ggml-org/llama.cpp/issues/12279)");
             }
             if (message.contains("reasoning_content")) {
                 msg.reasoning_content = message.at("reasoning_content");
@@ -340,18 +348,18 @@ std::vector<common_chat_tool> common_chat_tools_parse_oaicompat(const json & too
     try {
         if (!tools.is_null()) {
             if (!tools.is_array()) {
-                throw std::runtime_error("Expected 'tools' to be an array, got " + tools.dump());
+                throw std::invalid_argument("Expected 'tools' to be an array, got " + tools.dump());
             }
             for (const auto & tool : tools) {
                 if (!tool.contains("type")) {
-                    throw std::runtime_error("Missing tool type: " + tool.dump());
+                    throw std::invalid_argument("Missing tool type: " + tool.dump());
                 }
                 const auto & type = tool.at("type");
                 if (!type.is_string() || type != "function") {
-                    throw std::runtime_error("Unsupported tool type: " + tool.dump());
+                    throw std::invalid_argument("Unsupported tool type: " + tool.dump());
                 }
                 if (!tool.contains("function")) {
-                    throw std::runtime_error("Missing tool function: " + tool.dump());
+                    throw std::invalid_argument("Missing tool function: " + tool.dump());
                 }
                 const auto & function = tool.at("function");
@@ -636,6 +644,9 @@ const char * common_chat_format_name(common_chat_format format) {
         case COMMON_CHAT_FORMAT_QWEN3_CODER_XML: return "Qwen3 Coder";
         case COMMON_CHAT_FORMAT_APRIEL_1_5: return "Apriel 1.5";
         case COMMON_CHAT_FORMAT_XIAOMI_MIMO: return "Xiaomi MiMo";
+        case COMMON_CHAT_FORMAT_PEG_SIMPLE: return "peg-simple";
+        case COMMON_CHAT_FORMAT_PEG_NATIVE: return "peg-native";
+        case COMMON_CHAT_FORMAT_PEG_CONSTRUCTED: return "peg-constructed";
         default:
             throw std::runtime_error("Unknown chat format");
     }

package/src/llama.cpp/common/chat.h CHANGED Viewed

@@ -3,6 +3,7 @@
 #pragma once
 #include "common.h"
+#include "peg-parser.h"
 #include <functional>
 #include <chrono>
 #include <string>
@@ -87,7 +88,7 @@ struct common_chat_msg_diff {
     size_t tool_call_index = std::string::npos;
     common_chat_tool_call tool_call_delta;
-    static std::vector<common_chat_msg_diff> compute_diffs(const common_chat_msg & previous_msg, const common_chat_msg & new_msg);
+    static std::vector<common_chat_msg_diff> compute_diffs(const common_chat_msg & msg_prv, const common_chat_msg & msg_new);
     bool operator==(const common_chat_msg_diff & other) const {
         return content_delta == other.content_delta
@@ -135,6 +136,11 @@ enum common_chat_format {
     COMMON_CHAT_FORMAT_APRIEL_1_5,
     COMMON_CHAT_FORMAT_XIAOMI_MIMO,
+    // These are intended to be parsed by the PEG parser
+    COMMON_CHAT_FORMAT_PEG_SIMPLE,
+    COMMON_CHAT_FORMAT_PEG_NATIVE,
+    COMMON_CHAT_FORMAT_PEG_CONSTRUCTED,
     COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
 };
@@ -165,6 +171,7 @@ struct common_chat_params {
     std::vector<common_grammar_trigger> grammar_triggers;
     std::vector<std::string>            preserved_tokens;
     std::vector<std::string>            additional_stops;
+    std::string                         parser;
 };
 struct common_chat_syntax {
@@ -174,6 +181,7 @@ struct common_chat_syntax {
     bool                     reasoning_in_content  = false;
     bool                     thinking_forced_open  = false;
     bool                     parse_tool_calls      = true;
+    common_peg_arena         parser                = {};
 };
 // Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
@@ -217,6 +225,7 @@ const char*               common_chat_format_name(common_chat_format format);
 const char*               common_reasoning_format_name(common_reasoning_format format);
 common_reasoning_format   common_reasoning_format_from_name(const std::string & format);
 common_chat_msg           common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax);
+common_chat_msg           common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_syntax & syntax);
 common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);

package/src/llama.cpp/common/common.cpp CHANGED Viewed

@@ -694,7 +694,7 @@ bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_over
 // Validate if a filename is safe to use
 // To validate a full path, split the path by the OS-specific path separator, and validate each part with this function
-bool fs_validate_filename(const std::string & filename) {
+bool fs_validate_filename(const std::string & filename, bool allow_subdirs) {
     if (!filename.length()) {
         // Empty filename invalid
         return false;
@@ -754,10 +754,14 @@ bool fs_validate_filename(const std::string & filename) {
             || (c >= 0xD800 && c <= 0xDFFF) // UTF-16 surrogate pairs
             || c == 0xFFFD // Replacement Character (UTF-8)
             || c == 0xFEFF // Byte Order Mark (BOM)
-            || c == '/' || c == '\\' || c == ':' || c == '*' // Illegal characters
+            || c == ':' || c == '*' // Illegal characters
             || c == '?' || c == '"' || c == '<' || c == '>' || c == '|') {
             return false;
         }
+        if (!allow_subdirs && (c == '/' || c == '\\')) {
+            // Subdirectories not allowed, reject path separators
+            return false;
+        }
     }
     // Reject any leading or trailing ' ', or any trailing '.', these are stripped on Windows and will cause a different filename
@@ -859,6 +863,11 @@ bool fs_create_directory_with_parents(const std::string & path) {
 #endif // _WIN32
 }
+bool fs_is_directory(const std::string & path) {
+    std::filesystem::path dir(path);
+    return std::filesystem::exists(dir) && std::filesystem::is_directory(dir);
+}
 std::string fs_get_cache_directory() {
     std::string cache_directory = "";
     auto ensure_trailing_slash = [](std::string p) {
@@ -893,6 +902,8 @@ std::string fs_get_cache_directory() {
         cache_directory = std::getenv("HOME") + std::string("/Library/Caches/");
 #elif defined(_WIN32)
         cache_directory = std::getenv("LOCALAPPDATA");
+#elif defined(__EMSCRIPTEN__)
+        GGML_ABORT("not implemented on this platform");
 #else
 #  error Unknown architecture
 #endif
@@ -912,7 +923,7 @@ std::string fs_get_cache_file(const std::string & filename) {
     return cache_directory + filename;
 }
-std::vector<common_file_info> fs_list_files(const std::string & path) {
+std::vector<common_file_info> fs_list(const std::string & path, bool include_directories) {
     std::vector<common_file_info> files;
     if (path.empty()) return files;
@@ -927,14 +938,22 @@ std::vector<common_file_info> fs_list_files(const std::string & path) {
             const auto & p = entry.path();
             if (std::filesystem::is_regular_file(p)) {
                 common_file_info info;
-                info.path = p.string();
-                info.name = p.filename().string();
+                info.path   = p.string();
+                info.name   = p.filename().string();
+                info.is_dir = false;
                 try {
                     info.size = static_cast<size_t>(std::filesystem::file_size(p));
                 } catch (const std::filesystem::filesystem_error &) {
                     info.size = 0;
                 }
                 files.push_back(std::move(info));
+            } else if (include_directories && std::filesystem::is_directory(p)) {
+                common_file_info info;
+                info.path   = p.string();
+                info.name   = p.filename().string();
+                info.size   = 0; // Directories have no size
+                info.is_dir = true;
+                files.push_back(std::move(info));
             }
         } catch (const std::filesystem::filesystem_error &) {
             // skip entries we cannot inspect

package/src/llama.cpp/common/common.h CHANGED Viewed

@@ -12,6 +12,10 @@
 #include <vector>
 #include <map>
+#if defined(_WIN32) && !defined(_WIN32_WINNT)
+#define _WIN32_WINNT 0x0A00
+#endif
 #ifdef _WIN32
 #define DIRECTORY_SEPARATOR '\\'
 #else
@@ -26,8 +30,6 @@
     fprintf(stderr, "%s: built with %s for %s\n", __func__, LLAMA_COMPILER, LLAMA_BUILD_TARGET);    \
 } while(0)
-#define DEFAULT_MODEL_PATH "models/7B/ggml-model-f16.gguf"
 struct common_time_meas {
     common_time_meas(int64_t & t_acc, bool disable = false);
     ~common_time_meas();
@@ -223,6 +225,7 @@ struct common_params_model {
     std::string hf_repo     = ""; // HF repo                                                // NOLINT
     std::string hf_file     = ""; // HF file                                                // NOLINT
     std::string docker_repo = ""; // Docker repo                                            // NOLINT
+    std::string name        = ""; // in format <user>/<model>[:<tag>] (tag is optional)     // NOLINT
 };
 struct common_params_speculative {
@@ -370,7 +373,7 @@ struct common_params {
     std::vector<common_control_vector_load_info> control_vectors; // control vector with user defined scale
-    int32_t verbosity                  = 0;
+    int32_t verbosity                  = 3;  // LOG_LEVEL_INFO
     int32_t control_vector_layer_start = -1; // layer range for control vector
     int32_t control_vector_layer_end   = -1; // layer range for control vector
     bool    offline                    = false;
@@ -479,9 +482,15 @@ struct common_params {
     bool endpoint_props   = false; // only control POST requests, not GET
     bool endpoint_metrics = false;
+    // router server configs
+    std::string models_dir = ""; // directory containing models for the router server
+    int models_max = 4;          // maximum number of models to load simultaneously
+    bool models_autoload = true; // automatically load models when requested via the router server
     bool log_json = false;
     std::string slot_save_path;
+    std::string media_path; // path to directory for loading media files
     float slot_prompt_similarity = 0.1f;
@@ -632,8 +641,9 @@ std::string string_from(const struct llama_context * ctx, const struct llama_bat
 // Filesystem utils
 //
-bool fs_validate_filename(const std::string & filename);
+bool fs_validate_filename(const std::string & filename, bool allow_subdirs = false);
 bool fs_create_directory_with_parents(const std::string & path);
+bool fs_is_directory(const std::string & path);
 std::string fs_get_cache_directory();
 std::string fs_get_cache_file(const std::string & filename);
@@ -642,8 +652,9 @@ struct common_file_info {
     std::string path;
     std::string name;
     size_t      size = 0; // in bytes
+    bool        is_dir = false;
 };
-std::vector<common_file_info> fs_list_files(const std::string & path);
+std::vector<common_file_info> fs_list(const std::string & path, bool include_directories);
 //
 // Model utils

package/src/llama.cpp/common/download.cpp CHANGED Viewed

@@ -24,6 +24,7 @@
 #include "http.h"
 #endif
+#ifndef __EMSCRIPTEN__
 #ifdef __linux__
 #include <linux/limits.h>
 #elif defined(_WIN32)
@@ -35,6 +36,8 @@
 #else
 #include <sys/syslimits.h>
 #endif
+#endif
 #define LLAMA_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
 // isatty
@@ -430,7 +433,7 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string &
     curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
     curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L);
     curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
-    curl_easy_setopt(curl.get(), CURLOPT_VERBOSE, 1L);
+    curl_easy_setopt(curl.get(), CURLOPT_VERBOSE, 0L);
     typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
     auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t {
         auto data_vec = static_cast<std::vector<char> *>(data);
@@ -517,16 +520,18 @@ static bool common_pull_file(httplib::Client & cli,
         headers.emplace("Range", "bytes=" + std::to_string(existing_size) + "-");
     }
-    std::atomic<size_t> downloaded{existing_size};
+    const char * func = __func__; // avoid __func__ inside a lambda
+    size_t downloaded = existing_size;
+    size_t progress_step = 0;
     auto res = cli.Get(resolve_path, headers,
         [&](const httplib::Response &response) {
             if (existing_size > 0 && response.status != 206) {
-                LOG_WRN("%s: server did not respond with 206 Partial Content for a resume request. Status: %d\n", __func__, response.status);
+                LOG_WRN("%s: server did not respond with 206 Partial Content for a resume request. Status: %d\n", func, response.status);
                 return false;
             }
             if (existing_size == 0 && response.status != 200) {
-                LOG_WRN("%s: download received non-successful status code: %d\n", __func__, response.status);
+                LOG_WRN("%s: download received non-successful status code: %d\n", func, response.status);
                 return false;
             }
             if (total_size == 0 && response.has_header("Content-Length")) {
@@ -534,7 +539,7 @@ static bool common_pull_file(httplib::Client & cli,
                     size_t content_length = std::stoull(response.get_header_value("Content-Length"));
                     total_size = existing_size + content_length;
                 } catch (const std::exception &e) {
-                    LOG_WRN("%s: invalid Content-Length header: %s\n", __func__, e.what());
+                    LOG_WRN("%s: invalid Content-Length header: %s\n", func, e.what());
                 }
             }
             return true;
@@ -542,11 +547,16 @@ static bool common_pull_file(httplib::Client & cli,
         [&](const char *data, size_t len) {
             ofs.write(data, len);
             if (!ofs) {
-                LOG_ERR("%s: error writing to file: %s\n", __func__, path_tmp.c_str());
+                LOG_ERR("%s: error writing to file: %s\n", func, path_tmp.c_str());
                 return false;
             }
             downloaded += len;
-            print_progress(downloaded, total_size);
+            progress_step += len;
+            if (progress_step >= total_size / 1000 || downloaded == total_size) {
+                print_progress(downloaded, total_size);
+                progress_step = 0;
+            }
             return true;
         },
         nullptr
@@ -1047,7 +1057,7 @@ std::string common_docker_resolve_model(const std::string &) {
 std::vector<common_cached_model_info> common_list_cached_models() {
     std::vector<common_cached_model_info> models;
     const std::string cache_dir = fs_get_cache_directory();
-    const std::vector<common_file_info> files = fs_list_files(cache_dir);
+    const std::vector<common_file_info> files = fs_list(cache_dir, false);
     for (const auto & file : files) {
         if (string_starts_with(file.name, "manifest=") && string_ends_with(file.name, ".json")) {
             common_cached_model_info model_info;

package/src/llama.cpp/common/download.h CHANGED Viewed

@@ -14,8 +14,10 @@ struct common_cached_model_info {
     std::string model;
     std::string tag;
     size_t      size = 0; // GGUF size in bytes
+    // return string representation like "user/model:tag"
+    // if tag is "latest", it will be omitted
     std::string to_string() const {
-        return user + "/" + model + ":" + tag;
+        return user + "/" + model + (tag == "latest" ? "" : ":" + tag);
     }
 };

package/src/llama.cpp/common/json-schema-to-grammar.cpp CHANGED Viewed

@@ -974,7 +974,7 @@ public:
     void check_errors() {
         if (!_errors.empty()) {
-            throw std::runtime_error("JSON schema conversion failed:\n" + string_join(_errors, "\n"));
+            throw std::invalid_argument("JSON schema conversion failed:\n" + string_join(_errors, "\n"));
         }
         if (!_warnings.empty()) {
             fprintf(stderr, "WARNING: JSON schema conversion was incomplete: %s\n", string_join(_warnings, "; ").c_str());

package/src/llama.cpp/common/log.cpp CHANGED Viewed

@@ -443,8 +443,22 @@ void common_log_set_timestamps(struct common_log * log, bool timestamps) {
     log->set_timestamps(timestamps);
 }
+static int common_get_verbosity(enum ggml_log_level level) {
+    switch (level) {
+        case GGML_LOG_LEVEL_DEBUG: return LOG_LEVEL_DEBUG;
+        case GGML_LOG_LEVEL_INFO:  return LOG_LEVEL_INFO;
+        case GGML_LOG_LEVEL_WARN:  return LOG_LEVEL_WARN;
+        case GGML_LOG_LEVEL_ERROR: return LOG_LEVEL_ERROR;
+        case GGML_LOG_LEVEL_CONT:  return LOG_LEVEL_INFO; // same as INFO
+        case GGML_LOG_LEVEL_NONE:
+        default:
+            return LOG_LEVEL_OUTPUT;
+    }
+}
 void common_log_default_callback(enum ggml_log_level level, const char * text, void * /*user_data*/) {
-    if (LOG_DEFAULT_LLAMA <= common_log_verbosity_thold) {
+    auto verbosity = common_get_verbosity(level);
+    if (verbosity <= common_log_verbosity_thold) {
         common_log_add(common_log_main(), level, "%s", text);
     }
 }