@fugood/llama.node 1.5.0-rc.0 → 1.6.0-rc.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "1.5.0-rc.0",
4
+ "version": "1.6.0-rc.0",
5
5
  "description": "An another Node binding of llama.cpp",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -72,20 +72,20 @@
72
72
  "CMakeLists.txt"
73
73
  ],
74
74
  "optionalDependencies": {
75
- "@fugood/node-llama-darwin-arm64": "1.5.0-rc.0",
76
- "@fugood/node-llama-darwin-x64": "1.5.0-rc.0",
77
- "@fugood/node-llama-linux-arm64": "1.5.0-rc.0",
78
- "@fugood/node-llama-linux-arm64-cuda": "1.5.0-rc.0",
79
- "@fugood/node-llama-linux-arm64-snapdragon": "1.5.0-rc.0",
80
- "@fugood/node-llama-linux-arm64-vulkan": "1.5.0-rc.0",
81
- "@fugood/node-llama-linux-x64": "1.5.0-rc.0",
82
- "@fugood/node-llama-linux-x64-cuda": "1.5.0-rc.0",
83
- "@fugood/node-llama-linux-x64-vulkan": "1.5.0-rc.0",
84
- "@fugood/node-llama-win32-arm64": "1.5.0-rc.0",
85
- "@fugood/node-llama-win32-arm64-vulkan": "1.5.0-rc.0",
86
- "@fugood/node-llama-win32-x64": "1.5.0-rc.0",
87
- "@fugood/node-llama-win32-x64-cuda": "1.5.0-rc.0",
88
- "@fugood/node-llama-win32-x64-vulkan": "1.5.0-rc.0"
75
+ "@fugood/node-llama-darwin-arm64": "1.6.0-rc.0",
76
+ "@fugood/node-llama-darwin-x64": "1.6.0-rc.0",
77
+ "@fugood/node-llama-linux-arm64": "1.6.0-rc.0",
78
+ "@fugood/node-llama-linux-arm64-cuda": "1.6.0-rc.0",
79
+ "@fugood/node-llama-linux-arm64-snapdragon": "1.6.0-rc.0",
80
+ "@fugood/node-llama-linux-arm64-vulkan": "1.6.0-rc.0",
81
+ "@fugood/node-llama-linux-x64": "1.6.0-rc.0",
82
+ "@fugood/node-llama-linux-x64-cuda": "1.6.0-rc.0",
83
+ "@fugood/node-llama-linux-x64-vulkan": "1.6.0-rc.0",
84
+ "@fugood/node-llama-win32-arm64": "1.6.0-rc.0",
85
+ "@fugood/node-llama-win32-arm64-vulkan": "1.6.0-rc.0",
86
+ "@fugood/node-llama-win32-x64": "1.6.0-rc.0",
87
+ "@fugood/node-llama-win32-x64-cuda": "1.6.0-rc.0",
88
+ "@fugood/node-llama-win32-x64-vulkan": "1.6.0-rc.0"
89
89
  },
90
90
  "devDependencies": {
91
91
  "@babel/preset-env": "^7.24.4",
@@ -83,10 +83,10 @@ index b29544dac..5fa2c6c17 100644
83
83
 
84
84
  jinja::global_from_json(ctx, inp, inputs.mark_input);
85
85
  diff --git a/src/llama.cpp/common/chat.h b/src/llama.cpp/common/chat.h
86
- index 148801738..0317f1ab1 100644
86
+ index ac19348ec..f6f9f612f 100644
87
87
  --- a/src/llama.cpp/common/chat.h
88
88
  +++ b/src/llama.cpp/common/chat.h
89
- @@ -222,6 +222,20 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin
89
+ @@ -231,6 +231,20 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin
90
90
 
91
91
  bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates);
92
92
 
@@ -120,10 +120,10 @@ index 26250abb6..72ceddcc7 100644
120
120
  mparams.main_gpu = params.main_gpu;
121
121
  mparams.split_mode = params.split_mode;
122
122
  diff --git a/src/llama.cpp/common/common.h b/src/llama.cpp/common/common.h
123
- index b9566df62..c9425ad2f 100644
123
+ index 96c990c05..c0b0b3093 100644
124
124
  --- a/src/llama.cpp/common/common.h
125
125
  +++ b/src/llama.cpp/common/common.h
126
- @@ -314,6 +314,7 @@ struct lr_opt {
126
+ @@ -317,6 +317,7 @@ struct lr_opt {
127
127
  struct ggml_opt_optimizer_params common_opt_lr_pars(void * userdata);
128
128
 
129
129
  struct common_params {
@@ -129,7 +129,7 @@ static void parse_json_tool_calls(
129
129
  }
130
130
  }
131
131
 
132
- common_chat_msg_parser::common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_syntax & syntax)
132
+ common_chat_msg_parser::common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_parser_params & syntax)
133
133
  : input_(input), is_partial_(is_partial), syntax_(syntax)
134
134
  {
135
135
  result_.role = "assistant";
@@ -1611,7 +1611,7 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
1611
1611
  builder.finish();
1612
1612
  }
1613
1613
 
1614
- common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax) {
1614
+ common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_parser_params & syntax) {
1615
1615
  if (syntax.format == COMMON_CHAT_FORMAT_PEG_SIMPLE ||
1616
1616
  syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE ||
1617
1617
  syntax.format == COMMON_CHAT_FORMAT_PEG_CONSTRUCTED) {
@@ -1635,7 +1635,7 @@ common_chat_msg common_chat_parse(const std::string & input, bool is_partial, co
1635
1635
  return msg;
1636
1636
  }
1637
1637
 
1638
- common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_syntax & syntax) {
1638
+ common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_parser_params & syntax) {
1639
1639
  if (parser.empty()) {
1640
1640
  throw std::runtime_error("Failed to parse due to missing parser definition.");
1641
1641
  }
@@ -5,7 +5,7 @@
5
5
  #include "json-partial.h"
6
6
  #include "regex-partial.h"
7
7
 
8
- #include <nlohmann/json.hpp>
8
+ #include <nlohmann/json_fwd.hpp>
9
9
 
10
10
  #include <optional>
11
11
  #include <string>
@@ -19,20 +19,20 @@ class common_chat_msg_partial_exception : public std::runtime_error {
19
19
  class common_chat_msg_parser {
20
20
  std::string input_;
21
21
  bool is_partial_;
22
- common_chat_syntax syntax_;
22
+ common_chat_parser_params syntax_; // TODO: rename to params
23
23
  std::string healing_marker_;
24
24
 
25
25
  size_t pos_ = 0;
26
26
  common_chat_msg result_;
27
27
 
28
28
  public:
29
- common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_syntax & syntax);
29
+ common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_parser_params & syntax);
30
30
  const std::string & input() const { return input_; }
31
31
  size_t pos() const { return pos_; }
32
32
  const std::string & healing_marker() const { return healing_marker_; }
33
33
  const bool & is_partial() const { return is_partial_; }
34
34
  const common_chat_msg & result() const { return result_; }
35
- const common_chat_syntax & syntax() const { return syntax_; }
35
+ const common_chat_parser_params & syntax() const { return syntax_; }
36
36
 
37
37
  void move_to(size_t pos) {
38
38
  if (pos > input_.size()) {
@@ -145,7 +145,7 @@ struct common_chat_templates_inputs {
145
145
  std::vector<common_chat_tool> tools;
146
146
  common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
147
147
  bool parallel_tool_calls = false;
148
- common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE;
148
+ common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE; // TODO: refactor this to "bool enable_thinking"
149
149
  bool enable_thinking = true;
150
150
  std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
151
151
  std::map<std::string, std::string> chat_template_kwargs;
@@ -165,14 +165,21 @@ struct common_chat_params {
165
165
  std::string parser;
166
166
  };
167
167
 
168
- struct common_chat_syntax {
168
+ // per-message parsing syntax
169
+ // should be derived from common_chat_params
170
+ struct common_chat_parser_params {
169
171
  common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
170
- common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE;
172
+ common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE; // TODO: refactor this to "bool parse_reasoning"
171
173
  // Whether reasoning_content should be inlined in the content (e.g. for reasoning_format=deepseek in stream mode)
172
174
  bool reasoning_in_content = false;
173
175
  bool thinking_forced_open = false;
174
176
  bool parse_tool_calls = true;
175
177
  common_peg_arena parser = {};
178
+ common_chat_parser_params() = default;
179
+ common_chat_parser_params(const common_chat_params & chat_params) {
180
+ format = chat_params.format;
181
+ thinking_forced_open = chat_params.thinking_forced_open;
182
+ }
176
183
  };
177
184
 
178
185
  // Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
@@ -213,10 +220,12 @@ std::string common_chat_format_example(
213
220
  const std::map<std::string, std::string> & chat_template_kwargs);
214
221
 
215
222
  const char* common_chat_format_name(common_chat_format format);
216
- const char* common_reasoning_format_name(common_reasoning_format format);
217
- common_reasoning_format common_reasoning_format_from_name(const std::string & format);
218
- common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax);
219
- common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_syntax & syntax);
223
+ common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_parser_params & syntax);
224
+ common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_parser_params & syntax);
225
+
226
+ // used by arg and server
227
+ const char * common_reasoning_format_name(common_reasoning_format format);
228
+ common_reasoning_format common_reasoning_format_from_name(const std::string & format);
220
229
 
221
230
  common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);
222
231
 
@@ -57,6 +57,8 @@ extern const char * LLAMA_COMMIT;
57
57
  extern const char * LLAMA_COMPILER;
58
58
  extern const char * LLAMA_BUILD_TARGET;
59
59
 
60
+ const static std::string build_info("b" + std::to_string(LLAMA_BUILD_NUMBER) + "-" + LLAMA_COMMIT);
61
+
60
62
  struct common_control_vector_load_info;
61
63
 
62
64
  //
@@ -284,6 +286,7 @@ struct common_params_diffusion {
284
286
  };
285
287
 
286
288
  // reasoning API response format (not to be confused as chat template's reasoning format)
289
+ // only used by server
287
290
  enum common_reasoning_format {
288
291
  COMMON_REASONING_FORMAT_NONE,
289
292
  COMMON_REASONING_FORMAT_AUTO, // Same as deepseek, using `message.reasoning_content`
@@ -314,23 +314,26 @@ static bool common_pull_file(httplib::Client & cli,
314
314
 
315
315
  // download one single file from remote URL to local path
316
316
  // returns status code or -1 on error
317
- static int common_download_file_single_online(const std::string & url,
318
- const std::string & path,
319
- const std::string & bearer_token,
320
- const common_header_list & custom_headers) {
317
+ static int common_download_file_single_online(const std::string & url,
318
+ const std::string & path,
319
+ const std::string & bearer_token,
320
+ const common_header_list & custom_headers) {
321
321
  static const int max_attempts = 3;
322
322
  static const int retry_delay_seconds = 2;
323
323
 
324
324
  auto [cli, parts] = common_http_client(url);
325
325
 
326
- httplib::Headers default_headers = {{"User-Agent", "llama-cpp"}};
327
- if (!bearer_token.empty()) {
328
- default_headers.insert({"Authorization", "Bearer " + bearer_token});
329
- }
326
+ httplib::Headers headers;
330
327
  for (const auto & h : custom_headers) {
331
- default_headers.emplace(h.first, h.second);
328
+ headers.emplace(h.first, h.second);
332
329
  }
333
- cli.set_default_headers(default_headers);
330
+ if (headers.find("User-Agent") == headers.end()) {
331
+ headers.emplace("User-Agent", "llama-cpp/" + build_info);
332
+ }
333
+ if (!bearer_token.empty()) {
334
+ headers.emplace("Authorization", "Bearer " + bearer_token);
335
+ }
336
+ cli.set_default_headers(headers);
334
337
 
335
338
  const bool file_exists = std::filesystem::exists(path);
336
339
 
@@ -437,10 +440,12 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string
437
440
  const common_remote_params & params) {
438
441
  auto [cli, parts] = common_http_client(url);
439
442
 
440
- httplib::Headers headers = {{"User-Agent", "llama-cpp"}};
441
-
442
- for (const auto & header : params.headers) {
443
- headers.emplace(header.first, header.second);
443
+ httplib::Headers headers;
444
+ for (const auto & h : params.headers) {
445
+ headers.emplace(h.first, h.second);
446
+ }
447
+ if (headers.find("User-Agent") == headers.end()) {
448
+ headers.emplace("User-Agent", "llama-cpp/" + build_info);
444
449
  }
445
450
 
446
451
  if (params.timeout > 0) {
@@ -1,5 +1,6 @@
1
1
  #pragma once
2
2
 
3
+ // TODO: use json_fwd.hpp when possible
3
4
  #include <nlohmann/json.hpp>
4
5
 
5
6
  // Healing marker (empty if the JSON was fully parsed / wasn't healed).