@fugood/llama.node 1.5.0-rc.0 → 1.6.0-rc.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +15 -15
- package/scripts/llama.cpp.patch +4 -4
- package/src/llama.cpp/common/chat-parser.cpp +3 -3
- package/src/llama.cpp/common/chat-parser.h +4 -4
- package/src/llama.cpp/common/chat.h +16 -7
- package/src/llama.cpp/common/common.h +3 -0
- package/src/llama.cpp/common/download.cpp +19 -14
- package/src/llama.cpp/common/json-partial.h +1 -0
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "1.
|
|
4
|
+
"version": "1.6.0-rc.0",
|
|
5
5
|
"description": "An another Node binding of llama.cpp",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -72,20 +72,20 @@
|
|
|
72
72
|
"CMakeLists.txt"
|
|
73
73
|
],
|
|
74
74
|
"optionalDependencies": {
|
|
75
|
-
"@fugood/node-llama-darwin-arm64": "1.
|
|
76
|
-
"@fugood/node-llama-darwin-x64": "1.
|
|
77
|
-
"@fugood/node-llama-linux-arm64": "1.
|
|
78
|
-
"@fugood/node-llama-linux-arm64-cuda": "1.
|
|
79
|
-
"@fugood/node-llama-linux-arm64-snapdragon": "1.
|
|
80
|
-
"@fugood/node-llama-linux-arm64-vulkan": "1.
|
|
81
|
-
"@fugood/node-llama-linux-x64": "1.
|
|
82
|
-
"@fugood/node-llama-linux-x64-cuda": "1.
|
|
83
|
-
"@fugood/node-llama-linux-x64-vulkan": "1.
|
|
84
|
-
"@fugood/node-llama-win32-arm64": "1.
|
|
85
|
-
"@fugood/node-llama-win32-arm64-vulkan": "1.
|
|
86
|
-
"@fugood/node-llama-win32-x64": "1.
|
|
87
|
-
"@fugood/node-llama-win32-x64-cuda": "1.
|
|
88
|
-
"@fugood/node-llama-win32-x64-vulkan": "1.
|
|
75
|
+
"@fugood/node-llama-darwin-arm64": "1.6.0-rc.0",
|
|
76
|
+
"@fugood/node-llama-darwin-x64": "1.6.0-rc.0",
|
|
77
|
+
"@fugood/node-llama-linux-arm64": "1.6.0-rc.0",
|
|
78
|
+
"@fugood/node-llama-linux-arm64-cuda": "1.6.0-rc.0",
|
|
79
|
+
"@fugood/node-llama-linux-arm64-snapdragon": "1.6.0-rc.0",
|
|
80
|
+
"@fugood/node-llama-linux-arm64-vulkan": "1.6.0-rc.0",
|
|
81
|
+
"@fugood/node-llama-linux-x64": "1.6.0-rc.0",
|
|
82
|
+
"@fugood/node-llama-linux-x64-cuda": "1.6.0-rc.0",
|
|
83
|
+
"@fugood/node-llama-linux-x64-vulkan": "1.6.0-rc.0",
|
|
84
|
+
"@fugood/node-llama-win32-arm64": "1.6.0-rc.0",
|
|
85
|
+
"@fugood/node-llama-win32-arm64-vulkan": "1.6.0-rc.0",
|
|
86
|
+
"@fugood/node-llama-win32-x64": "1.6.0-rc.0",
|
|
87
|
+
"@fugood/node-llama-win32-x64-cuda": "1.6.0-rc.0",
|
|
88
|
+
"@fugood/node-llama-win32-x64-vulkan": "1.6.0-rc.0"
|
|
89
89
|
},
|
|
90
90
|
"devDependencies": {
|
|
91
91
|
"@babel/preset-env": "^7.24.4",
|
package/scripts/llama.cpp.patch
CHANGED
|
@@ -83,10 +83,10 @@ index b29544dac..5fa2c6c17 100644
|
|
|
83
83
|
|
|
84
84
|
jinja::global_from_json(ctx, inp, inputs.mark_input);
|
|
85
85
|
diff --git a/src/llama.cpp/common/chat.h b/src/llama.cpp/common/chat.h
|
|
86
|
-
index
|
|
86
|
+
index ac19348ec..f6f9f612f 100644
|
|
87
87
|
--- a/src/llama.cpp/common/chat.h
|
|
88
88
|
+++ b/src/llama.cpp/common/chat.h
|
|
89
|
-
@@ -
|
|
89
|
+
@@ -231,6 +231,20 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin
|
|
90
90
|
|
|
91
91
|
bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates);
|
|
92
92
|
|
|
@@ -120,10 +120,10 @@ index 26250abb6..72ceddcc7 100644
|
|
|
120
120
|
mparams.main_gpu = params.main_gpu;
|
|
121
121
|
mparams.split_mode = params.split_mode;
|
|
122
122
|
diff --git a/src/llama.cpp/common/common.h b/src/llama.cpp/common/common.h
|
|
123
|
-
index
|
|
123
|
+
index 96c990c05..c0b0b3093 100644
|
|
124
124
|
--- a/src/llama.cpp/common/common.h
|
|
125
125
|
+++ b/src/llama.cpp/common/common.h
|
|
126
|
-
@@ -
|
|
126
|
+
@@ -317,6 +317,7 @@ struct lr_opt {
|
|
127
127
|
struct ggml_opt_optimizer_params common_opt_lr_pars(void * userdata);
|
|
128
128
|
|
|
129
129
|
struct common_params {
|
|
@@ -129,7 +129,7 @@ static void parse_json_tool_calls(
|
|
|
129
129
|
}
|
|
130
130
|
}
|
|
131
131
|
|
|
132
|
-
common_chat_msg_parser::common_chat_msg_parser(const std::string & input, bool is_partial, const
|
|
132
|
+
common_chat_msg_parser::common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_parser_params & syntax)
|
|
133
133
|
: input_(input), is_partial_(is_partial), syntax_(syntax)
|
|
134
134
|
{
|
|
135
135
|
result_.role = "assistant";
|
|
@@ -1611,7 +1611,7 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
|
|
|
1611
1611
|
builder.finish();
|
|
1612
1612
|
}
|
|
1613
1613
|
|
|
1614
|
-
common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const
|
|
1614
|
+
common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_parser_params & syntax) {
|
|
1615
1615
|
if (syntax.format == COMMON_CHAT_FORMAT_PEG_SIMPLE ||
|
|
1616
1616
|
syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE ||
|
|
1617
1617
|
syntax.format == COMMON_CHAT_FORMAT_PEG_CONSTRUCTED) {
|
|
@@ -1635,7 +1635,7 @@ common_chat_msg common_chat_parse(const std::string & input, bool is_partial, co
|
|
|
1635
1635
|
return msg;
|
|
1636
1636
|
}
|
|
1637
1637
|
|
|
1638
|
-
common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const
|
|
1638
|
+
common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_parser_params & syntax) {
|
|
1639
1639
|
if (parser.empty()) {
|
|
1640
1640
|
throw std::runtime_error("Failed to parse due to missing parser definition.");
|
|
1641
1641
|
}
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
#include "json-partial.h"
|
|
6
6
|
#include "regex-partial.h"
|
|
7
7
|
|
|
8
|
-
#include <nlohmann/
|
|
8
|
+
#include <nlohmann/json_fwd.hpp>
|
|
9
9
|
|
|
10
10
|
#include <optional>
|
|
11
11
|
#include <string>
|
|
@@ -19,20 +19,20 @@ class common_chat_msg_partial_exception : public std::runtime_error {
|
|
|
19
19
|
class common_chat_msg_parser {
|
|
20
20
|
std::string input_;
|
|
21
21
|
bool is_partial_;
|
|
22
|
-
|
|
22
|
+
common_chat_parser_params syntax_; // TODO: rename to params
|
|
23
23
|
std::string healing_marker_;
|
|
24
24
|
|
|
25
25
|
size_t pos_ = 0;
|
|
26
26
|
common_chat_msg result_;
|
|
27
27
|
|
|
28
28
|
public:
|
|
29
|
-
common_chat_msg_parser(const std::string & input, bool is_partial, const
|
|
29
|
+
common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_parser_params & syntax);
|
|
30
30
|
const std::string & input() const { return input_; }
|
|
31
31
|
size_t pos() const { return pos_; }
|
|
32
32
|
const std::string & healing_marker() const { return healing_marker_; }
|
|
33
33
|
const bool & is_partial() const { return is_partial_; }
|
|
34
34
|
const common_chat_msg & result() const { return result_; }
|
|
35
|
-
const
|
|
35
|
+
const common_chat_parser_params & syntax() const { return syntax_; }
|
|
36
36
|
|
|
37
37
|
void move_to(size_t pos) {
|
|
38
38
|
if (pos > input_.size()) {
|
|
@@ -145,7 +145,7 @@ struct common_chat_templates_inputs {
|
|
|
145
145
|
std::vector<common_chat_tool> tools;
|
|
146
146
|
common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
|
|
147
147
|
bool parallel_tool_calls = false;
|
|
148
|
-
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE;
|
|
148
|
+
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE; // TODO: refactor this to "bool enable_thinking"
|
|
149
149
|
bool enable_thinking = true;
|
|
150
150
|
std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
|
|
151
151
|
std::map<std::string, std::string> chat_template_kwargs;
|
|
@@ -165,14 +165,21 @@ struct common_chat_params {
|
|
|
165
165
|
std::string parser;
|
|
166
166
|
};
|
|
167
167
|
|
|
168
|
-
|
|
168
|
+
// per-message parsing syntax
|
|
169
|
+
// should be derived from common_chat_params
|
|
170
|
+
struct common_chat_parser_params {
|
|
169
171
|
common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
|
|
170
|
-
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE;
|
|
172
|
+
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE; // TODO: refactor this to "bool parse_reasoning"
|
|
171
173
|
// Whether reasoning_content should be inlined in the content (e.g. for reasoning_format=deepseek in stream mode)
|
|
172
174
|
bool reasoning_in_content = false;
|
|
173
175
|
bool thinking_forced_open = false;
|
|
174
176
|
bool parse_tool_calls = true;
|
|
175
177
|
common_peg_arena parser = {};
|
|
178
|
+
common_chat_parser_params() = default;
|
|
179
|
+
common_chat_parser_params(const common_chat_params & chat_params) {
|
|
180
|
+
format = chat_params.format;
|
|
181
|
+
thinking_forced_open = chat_params.thinking_forced_open;
|
|
182
|
+
}
|
|
176
183
|
};
|
|
177
184
|
|
|
178
185
|
// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
|
|
@@ -213,10 +220,12 @@ std::string common_chat_format_example(
|
|
|
213
220
|
const std::map<std::string, std::string> & chat_template_kwargs);
|
|
214
221
|
|
|
215
222
|
const char* common_chat_format_name(common_chat_format format);
|
|
216
|
-
const
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
223
|
+
common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_parser_params & syntax);
|
|
224
|
+
common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_parser_params & syntax);
|
|
225
|
+
|
|
226
|
+
// used by arg and server
|
|
227
|
+
const char * common_reasoning_format_name(common_reasoning_format format);
|
|
228
|
+
common_reasoning_format common_reasoning_format_from_name(const std::string & format);
|
|
220
229
|
|
|
221
230
|
common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);
|
|
222
231
|
|
|
@@ -57,6 +57,8 @@ extern const char * LLAMA_COMMIT;
|
|
|
57
57
|
extern const char * LLAMA_COMPILER;
|
|
58
58
|
extern const char * LLAMA_BUILD_TARGET;
|
|
59
59
|
|
|
60
|
+
const static std::string build_info("b" + std::to_string(LLAMA_BUILD_NUMBER) + "-" + LLAMA_COMMIT);
|
|
61
|
+
|
|
60
62
|
struct common_control_vector_load_info;
|
|
61
63
|
|
|
62
64
|
//
|
|
@@ -284,6 +286,7 @@ struct common_params_diffusion {
|
|
|
284
286
|
};
|
|
285
287
|
|
|
286
288
|
// reasoning API response format (not to be confused as chat template's reasoning format)
|
|
289
|
+
// only used by server
|
|
287
290
|
enum common_reasoning_format {
|
|
288
291
|
COMMON_REASONING_FORMAT_NONE,
|
|
289
292
|
COMMON_REASONING_FORMAT_AUTO, // Same as deepseek, using `message.reasoning_content`
|
|
@@ -314,23 +314,26 @@ static bool common_pull_file(httplib::Client & cli,
|
|
|
314
314
|
|
|
315
315
|
// download one single file from remote URL to local path
|
|
316
316
|
// returns status code or -1 on error
|
|
317
|
-
static int common_download_file_single_online(const std::string
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
317
|
+
static int common_download_file_single_online(const std::string & url,
|
|
318
|
+
const std::string & path,
|
|
319
|
+
const std::string & bearer_token,
|
|
320
|
+
const common_header_list & custom_headers) {
|
|
321
321
|
static const int max_attempts = 3;
|
|
322
322
|
static const int retry_delay_seconds = 2;
|
|
323
323
|
|
|
324
324
|
auto [cli, parts] = common_http_client(url);
|
|
325
325
|
|
|
326
|
-
httplib::Headers
|
|
327
|
-
if (!bearer_token.empty()) {
|
|
328
|
-
default_headers.insert({"Authorization", "Bearer " + bearer_token});
|
|
329
|
-
}
|
|
326
|
+
httplib::Headers headers;
|
|
330
327
|
for (const auto & h : custom_headers) {
|
|
331
|
-
|
|
328
|
+
headers.emplace(h.first, h.second);
|
|
332
329
|
}
|
|
333
|
-
|
|
330
|
+
if (headers.find("User-Agent") == headers.end()) {
|
|
331
|
+
headers.emplace("User-Agent", "llama-cpp/" + build_info);
|
|
332
|
+
}
|
|
333
|
+
if (!bearer_token.empty()) {
|
|
334
|
+
headers.emplace("Authorization", "Bearer " + bearer_token);
|
|
335
|
+
}
|
|
336
|
+
cli.set_default_headers(headers);
|
|
334
337
|
|
|
335
338
|
const bool file_exists = std::filesystem::exists(path);
|
|
336
339
|
|
|
@@ -437,10 +440,12 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string
|
|
|
437
440
|
const common_remote_params & params) {
|
|
438
441
|
auto [cli, parts] = common_http_client(url);
|
|
439
442
|
|
|
440
|
-
httplib::Headers headers
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
443
|
+
httplib::Headers headers;
|
|
444
|
+
for (const auto & h : params.headers) {
|
|
445
|
+
headers.emplace(h.first, h.second);
|
|
446
|
+
}
|
|
447
|
+
if (headers.find("User-Agent") == headers.end()) {
|
|
448
|
+
headers.emplace("User-Agent", "llama-cpp/" + build_info);
|
|
444
449
|
}
|
|
445
450
|
|
|
446
451
|
if (params.timeout > 0) {
|