npm - @fugood/llama.node - Versions diffs - 1.5.0-rc.0 → 1.6.0-rc.0 - Mend

@fugood/llama.node 1.5.0-rc.0 → 1.6.0-rc.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/package.json +15 -15
package/scripts/llama.cpp.patch +4 -4
package/src/llama.cpp/common/chat-parser.cpp +3 -3
package/src/llama.cpp/common/chat-parser.h +4 -4
package/src/llama.cpp/common/chat.h +16 -7
package/src/llama.cpp/common/common.h +3 -0
package/src/llama.cpp/common/download.cpp +19 -14
package/src/llama.cpp/common/json-partial.h +1 -0

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@fugood/llama.node",
   "access": "public",
-  "version": "1.5.0-rc.0",
+  "version": "1.6.0-rc.0",
   "description": "An another Node binding of llama.cpp",
   "main": "lib/index.js",
   "scripts": {
@@ -72,20 +72,20 @@
     "CMakeLists.txt"
   ],
   "optionalDependencies": {
-    "@fugood/node-llama-darwin-arm64": "1.5.0-rc.0",
-    "@fugood/node-llama-darwin-x64": "1.5.0-rc.0",
-    "@fugood/node-llama-linux-arm64": "1.5.0-rc.0",
-    "@fugood/node-llama-linux-arm64-cuda": "1.5.0-rc.0",
-    "@fugood/node-llama-linux-arm64-snapdragon": "1.5.0-rc.0",
-    "@fugood/node-llama-linux-arm64-vulkan": "1.5.0-rc.0",
-    "@fugood/node-llama-linux-x64": "1.5.0-rc.0",
-    "@fugood/node-llama-linux-x64-cuda": "1.5.0-rc.0",
-    "@fugood/node-llama-linux-x64-vulkan": "1.5.0-rc.0",
-    "@fugood/node-llama-win32-arm64": "1.5.0-rc.0",
-    "@fugood/node-llama-win32-arm64-vulkan": "1.5.0-rc.0",
-    "@fugood/node-llama-win32-x64": "1.5.0-rc.0",
-    "@fugood/node-llama-win32-x64-cuda": "1.5.0-rc.0",
-    "@fugood/node-llama-win32-x64-vulkan": "1.5.0-rc.0"
+    "@fugood/node-llama-darwin-arm64": "1.6.0-rc.0",
+    "@fugood/node-llama-darwin-x64": "1.6.0-rc.0",
+    "@fugood/node-llama-linux-arm64": "1.6.0-rc.0",
+    "@fugood/node-llama-linux-arm64-cuda": "1.6.0-rc.0",
+    "@fugood/node-llama-linux-arm64-snapdragon": "1.6.0-rc.0",
+    "@fugood/node-llama-linux-arm64-vulkan": "1.6.0-rc.0",
+    "@fugood/node-llama-linux-x64": "1.6.0-rc.0",
+    "@fugood/node-llama-linux-x64-cuda": "1.6.0-rc.0",
+    "@fugood/node-llama-linux-x64-vulkan": "1.6.0-rc.0",
+    "@fugood/node-llama-win32-arm64": "1.6.0-rc.0",
+    "@fugood/node-llama-win32-arm64-vulkan": "1.6.0-rc.0",
+    "@fugood/node-llama-win32-x64": "1.6.0-rc.0",
+    "@fugood/node-llama-win32-x64-cuda": "1.6.0-rc.0",
+    "@fugood/node-llama-win32-x64-vulkan": "1.6.0-rc.0"
   },
   "devDependencies": {
     "@babel/preset-env": "^7.24.4",

package/scripts/llama.cpp.patch CHANGED Viewed

@@ -83,10 +83,10 @@ index b29544dac..5fa2c6c17 100644
      jinja::global_from_json(ctx, inp, inputs.mark_input);
 diff --git a/src/llama.cpp/common/chat.h b/src/llama.cpp/common/chat.h
-index 148801738..0317f1ab1 100644
+index ac19348ec..f6f9f612f 100644
 --- a/src/llama.cpp/common/chat.h
 +++ b/src/llama.cpp/common/chat.h
-@@ -222,6 +222,20 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin
+@@ -231,6 +231,20 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin
  bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates);
@@ -120,10 +120,10 @@ index 26250abb6..72ceddcc7 100644
      mparams.main_gpu        = params.main_gpu;
      mparams.split_mode      = params.split_mode;
 diff --git a/src/llama.cpp/common/common.h b/src/llama.cpp/common/common.h
-index b9566df62..c9425ad2f 100644
+index 96c990c05..c0b0b3093 100644
 --- a/src/llama.cpp/common/common.h
 +++ b/src/llama.cpp/common/common.h
-@@ -314,6 +314,7 @@ struct lr_opt {
+@@ -317,6 +317,7 @@ struct lr_opt {
  struct ggml_opt_optimizer_params common_opt_lr_pars(void * userdata);
  struct common_params {

package/src/llama.cpp/common/chat-parser.cpp CHANGED Viewed

@@ -129,7 +129,7 @@ static void parse_json_tool_calls(
     }
 }
-common_chat_msg_parser::common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_syntax & syntax)
+common_chat_msg_parser::common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_parser_params & syntax)
     : input_(input), is_partial_(is_partial), syntax_(syntax)
 {
     result_.role = "assistant";
@@ -1611,7 +1611,7 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
     builder.finish();
 }
-common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax) {
+common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_parser_params & syntax) {
     if (syntax.format == COMMON_CHAT_FORMAT_PEG_SIMPLE ||
         syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE ||
         syntax.format == COMMON_CHAT_FORMAT_PEG_CONSTRUCTED) {
@@ -1635,7 +1635,7 @@ common_chat_msg common_chat_parse(const std::string & input, bool is_partial, co
     return msg;
 }
-common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_syntax & syntax) {
+common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_parser_params & syntax) {
     if (parser.empty()) {
         throw std::runtime_error("Failed to parse due to missing parser definition.");
     }

package/src/llama.cpp/common/chat-parser.h CHANGED Viewed

@@ -5,7 +5,7 @@
 #include "json-partial.h"
 #include "regex-partial.h"
-#include <nlohmann/json.hpp>
+#include <nlohmann/json_fwd.hpp>
 #include <optional>
 #include <string>
@@ -19,20 +19,20 @@ class common_chat_msg_partial_exception : public std::runtime_error {
 class common_chat_msg_parser {
     std::string input_;
     bool is_partial_;
-    common_chat_syntax syntax_;
+    common_chat_parser_params syntax_; // TODO: rename to params
     std::string healing_marker_;
     size_t pos_ = 0;
     common_chat_msg result_;
   public:
-    common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_syntax & syntax);
+    common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_parser_params & syntax);
     const std::string & input() const { return input_; }
     size_t pos() const { return pos_; }
     const std::string & healing_marker() const { return healing_marker_; }
     const bool & is_partial() const { return is_partial_; }
     const common_chat_msg & result() const { return result_; }
-    const common_chat_syntax & syntax() const { return syntax_; }
+    const common_chat_parser_params & syntax() const { return syntax_; }
     void move_to(size_t pos) {
         if (pos > input_.size()) {

package/src/llama.cpp/common/chat.h CHANGED Viewed

@@ -145,7 +145,7 @@ struct common_chat_templates_inputs {
     std::vector<common_chat_tool> tools;
     common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
     bool parallel_tool_calls = false;
-    common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE;
+    common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE; // TODO: refactor this to "bool enable_thinking"
     bool enable_thinking = true;
     std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
     std::map<std::string, std::string> chat_template_kwargs;
@@ -165,14 +165,21 @@ struct common_chat_params {
     std::string                         parser;
 };
-struct common_chat_syntax {
+// per-message parsing syntax
+// should be derived from common_chat_params
+struct common_chat_parser_params {
     common_chat_format       format                = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    common_reasoning_format  reasoning_format      = COMMON_REASONING_FORMAT_NONE;
+    common_reasoning_format  reasoning_format      = COMMON_REASONING_FORMAT_NONE; // TODO: refactor this to "bool parse_reasoning"
     // Whether reasoning_content should be inlined in the content (e.g. for reasoning_format=deepseek in stream mode)
     bool                     reasoning_in_content  = false;
     bool                     thinking_forced_open  = false;
     bool                     parse_tool_calls      = true;
     common_peg_arena         parser                = {};
+    common_chat_parser_params() = default;
+    common_chat_parser_params(const common_chat_params & chat_params) {
+        format               = chat_params.format;
+        thinking_forced_open = chat_params.thinking_forced_open;
+    }
 };
 // Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
@@ -213,10 +220,12 @@ std::string common_chat_format_example(
     const std::map<std::string, std::string> & chat_template_kwargs);
 const char*               common_chat_format_name(common_chat_format format);
-const char*               common_reasoning_format_name(common_reasoning_format format);
-common_reasoning_format   common_reasoning_format_from_name(const std::string & format);
-common_chat_msg           common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax);
-common_chat_msg           common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_syntax & syntax);
+common_chat_msg           common_chat_parse(const std::string & input, bool is_partial, const common_chat_parser_params & syntax);
+common_chat_msg           common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_parser_params & syntax);
+// used by arg and server
+const char *             common_reasoning_format_name(common_reasoning_format format);
+common_reasoning_format  common_reasoning_format_from_name(const std::string & format);
 common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);

package/src/llama.cpp/common/common.h CHANGED Viewed

@@ -57,6 +57,8 @@ extern const char * LLAMA_COMMIT;
 extern const char * LLAMA_COMPILER;
 extern const char * LLAMA_BUILD_TARGET;
+const static std::string build_info("b" + std::to_string(LLAMA_BUILD_NUMBER) + "-" + LLAMA_COMMIT);
 struct common_control_vector_load_info;
 //
@@ -284,6 +286,7 @@ struct common_params_diffusion {
 };
 // reasoning API response format (not to be confused as chat template's reasoning format)
+// only used by server
 enum common_reasoning_format {
     COMMON_REASONING_FORMAT_NONE,
     COMMON_REASONING_FORMAT_AUTO,            // Same as deepseek, using `message.reasoning_content`

package/src/llama.cpp/common/download.cpp CHANGED Viewed

@@ -314,23 +314,26 @@ static bool common_pull_file(httplib::Client & cli,
 // download one single file from remote URL to local path
 // returns status code or -1 on error
-static int common_download_file_single_online(const std::string & url,
-                                               const std::string & path,
-                                               const std::string & bearer_token,
-                                               const common_header_list & custom_headers) {
+static int common_download_file_single_online(const std::string        & url,
+                                              const std::string        & path,
+                                              const std::string        & bearer_token,
+                                              const common_header_list & custom_headers) {
     static const int max_attempts        = 3;
     static const int retry_delay_seconds = 2;
     auto [cli, parts] = common_http_client(url);
-    httplib::Headers default_headers = {{"User-Agent", "llama-cpp"}};
-    if (!bearer_token.empty()) {
-        default_headers.insert({"Authorization", "Bearer " + bearer_token});
-    }
+    httplib::Headers headers;
     for (const auto & h : custom_headers) {
-        default_headers.emplace(h.first, h.second);
+        headers.emplace(h.first, h.second);
     }
-    cli.set_default_headers(default_headers);
+    if (headers.find("User-Agent") == headers.end()) {
+        headers.emplace("User-Agent", "llama-cpp/" + build_info);
+    }
+    if (!bearer_token.empty()) {
+        headers.emplace("Authorization", "Bearer " + bearer_token);
+    }
+    cli.set_default_headers(headers);
     const bool file_exists = std::filesystem::exists(path);
@@ -437,10 +440,12 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string
                                                              const common_remote_params & params) {
     auto [cli, parts] = common_http_client(url);
-    httplib::Headers headers = {{"User-Agent", "llama-cpp"}};
-    for (const auto & header : params.headers) {
-        headers.emplace(header.first, header.second);
+    httplib::Headers headers;
+    for (const auto & h : params.headers) {
+        headers.emplace(h.first, h.second);
+    }
+    if (headers.find("User-Agent") == headers.end()) {
+        headers.emplace("User-Agent", "llama-cpp/" + build_info);
     }
     if (params.timeout > 0) {

package/src/llama.cpp/common/json-partial.h CHANGED Viewed

@@ -1,5 +1,6 @@
 #pragma once
+// TODO: use json_fwd.hpp when possible
 #include <nlohmann/json.hpp>
 // Healing marker (empty if the JSON was fully parsed / wasn't healed).