@fugood/llama.node 1.4.6 → 1.4.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/binding.ts +8 -0
- package/package.json +15 -15
- package/scripts/llama.cpp.patch +25 -26
- package/src/LlamaContext.cpp +2 -2
- package/src/llama.cpp/common/CMakeLists.txt +2 -0
- package/src/llama.cpp/common/arg.cpp +364 -193
- package/src/llama.cpp/common/arg.h +43 -2
- package/src/llama.cpp/common/chat-parser-xml-toolcall.cpp +36 -18
- package/src/llama.cpp/common/chat-parser-xml-toolcall.h +1 -1
- package/src/llama.cpp/common/chat-parser.cpp +3 -2
- package/src/llama.cpp/common/chat-peg-parser.cpp +16 -2
- package/src/llama.cpp/common/chat.cpp +272 -0
- package/src/llama.cpp/common/common.cpp +130 -67
- package/src/llama.cpp/common/common.h +40 -16
- package/src/llama.cpp/common/console.cpp +680 -47
- package/src/llama.cpp/common/console.h +30 -8
- package/src/llama.cpp/common/download.cpp +69 -25
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +132 -3
- package/src/llama.cpp/common/json-schema-to-grammar.h +20 -0
- package/src/llama.cpp/common/log.cpp +5 -0
- package/src/llama.cpp/common/log.h +1 -0
- package/src/llama.cpp/common/peg-parser.cpp +1 -1
- package/src/llama.cpp/common/preset.cpp +206 -0
- package/src/llama.cpp/common/preset.h +32 -0
- package/src/llama.cpp/common/sampling.cpp +91 -92
- package/src/llama.cpp/common/sampling.h +11 -6
- package/src/llama.cpp/common/speculative.cpp +1 -1
- package/src/llama.cpp/ggml/CMakeLists.txt +5 -0
- package/src/llama.cpp/ggml/include/ggml-alloc.h +9 -0
- package/src/llama.cpp/ggml/include/ggml-backend.h +1 -0
- package/src/llama.cpp/ggml/include/ggml-cpu.h +1 -0
- package/src/llama.cpp/ggml/include/ggml.h +7 -8
- package/src/llama.cpp/ggml/src/CMakeLists.txt +3 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +3 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +2 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +69 -39
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +4 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +2 -1
- package/src/llama.cpp/include/llama.h +18 -1
- package/src/llama.cpp/src/CMakeLists.txt +2 -1
- package/src/llama.cpp/src/llama-arch.cpp +1890 -2248
- package/src/llama.cpp/src/llama-arch.h +9 -2
- package/src/llama.cpp/src/llama-batch.cpp +12 -2
- package/src/llama.cpp/src/llama-batch.h +4 -2
- package/src/llama.cpp/src/llama-context.cpp +99 -29
- package/src/llama.cpp/src/llama-context.h +9 -3
- package/src/llama.cpp/src/llama-grammar.cpp +233 -33
- package/src/llama.cpp/src/llama-grammar.h +20 -1
- package/src/llama.cpp/src/llama-graph.cpp +85 -17
- package/src/llama.cpp/src/llama-graph.h +17 -4
- package/src/llama.cpp/src/llama-hparams.cpp +6 -0
- package/src/llama.cpp/src/llama-hparams.h +5 -1
- package/src/llama.cpp/src/llama-impl.cpp +4 -0
- package/src/llama.cpp/src/llama-kv-cache.cpp +90 -42
- package/src/llama.cpp/src/llama-kv-cache.h +19 -2
- package/src/llama.cpp/src/llama-memory-hybrid.cpp +1 -1
- package/src/llama.cpp/src/llama-model-loader.cpp +2 -0
- package/src/llama.cpp/src/llama-model-loader.h +2 -0
- package/src/llama.cpp/src/llama-model.cpp +123 -52
- package/src/llama.cpp/src/llama-model.h +1 -0
- package/src/llama.cpp/src/llama-quant.cpp +1 -1
- package/src/llama.cpp/src/llama-vocab.cpp +2 -1
- package/src/llama.cpp/src/llama.cpp +675 -1
- package/src/llama.cpp/src/models/deepseek2.cpp +9 -5
- package/src/llama.cpp/src/models/{gemma3-iswa.cpp → gemma3.cpp} +30 -5
- package/src/llama.cpp/src/models/glm4-moe.cpp +28 -11
- package/src/llama.cpp/src/models/glm4.cpp +27 -4
- package/src/llama.cpp/src/models/models.h +8 -7
- package/src/llama.cpp/src/models/nemotron-h.cpp +35 -6
- package/src/llama.cpp/src/models/qwen2.cpp +12 -3
- package/src/llama.cpp/src/models/qwen3next.cpp +81 -266
package/lib/binding.ts
CHANGED
|
@@ -198,6 +198,14 @@ export type LlamaParallelCompletionOptions = LlamaCompletionOptions & {
|
|
|
198
198
|
*/
|
|
199
199
|
save_state_path?: string
|
|
200
200
|
|
|
201
|
+
/**
|
|
202
|
+
* Number of tokens to load when loading state.
|
|
203
|
+
* If not specified or <= 0, all tokens from the state file will be loaded.
|
|
204
|
+
* Use this to limit how much of a saved state is restored.
|
|
205
|
+
* Example: `512` to load only the first 512 tokens from the state file
|
|
206
|
+
*/
|
|
207
|
+
load_state_size?: number
|
|
208
|
+
|
|
201
209
|
/**
|
|
202
210
|
* Number of tokens to save when saving session state.
|
|
203
211
|
* If not specified or <= 0, all tokens will be saved.
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "1.4.
|
|
4
|
+
"version": "1.4.8",
|
|
5
5
|
"description": "An another Node binding of llama.cpp",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -72,20 +72,20 @@
|
|
|
72
72
|
"CMakeLists.txt"
|
|
73
73
|
],
|
|
74
74
|
"optionalDependencies": {
|
|
75
|
-
"@fugood/node-llama-darwin-arm64": "1.4.
|
|
76
|
-
"@fugood/node-llama-darwin-x64": "1.4.
|
|
77
|
-
"@fugood/node-llama-linux-arm64": "1.4.
|
|
78
|
-
"@fugood/node-llama-linux-arm64-cuda": "1.4.
|
|
79
|
-
"@fugood/node-llama-linux-arm64-snapdragon": "1.4.
|
|
80
|
-
"@fugood/node-llama-linux-arm64-vulkan": "1.4.
|
|
81
|
-
"@fugood/node-llama-linux-x64": "1.4.
|
|
82
|
-
"@fugood/node-llama-linux-x64-cuda": "1.4.
|
|
83
|
-
"@fugood/node-llama-linux-x64-vulkan": "1.4.
|
|
84
|
-
"@fugood/node-llama-win32-arm64": "1.4.
|
|
85
|
-
"@fugood/node-llama-win32-arm64-vulkan": "1.4.
|
|
86
|
-
"@fugood/node-llama-win32-x64": "1.4.
|
|
87
|
-
"@fugood/node-llama-win32-x64-cuda": "1.4.
|
|
88
|
-
"@fugood/node-llama-win32-x64-vulkan": "1.4.
|
|
75
|
+
"@fugood/node-llama-darwin-arm64": "1.4.8",
|
|
76
|
+
"@fugood/node-llama-darwin-x64": "1.4.8",
|
|
77
|
+
"@fugood/node-llama-linux-arm64": "1.4.8",
|
|
78
|
+
"@fugood/node-llama-linux-arm64-cuda": "1.4.8",
|
|
79
|
+
"@fugood/node-llama-linux-arm64-snapdragon": "1.4.8",
|
|
80
|
+
"@fugood/node-llama-linux-arm64-vulkan": "1.4.8",
|
|
81
|
+
"@fugood/node-llama-linux-x64": "1.4.8",
|
|
82
|
+
"@fugood/node-llama-linux-x64-cuda": "1.4.8",
|
|
83
|
+
"@fugood/node-llama-linux-x64-vulkan": "1.4.8",
|
|
84
|
+
"@fugood/node-llama-win32-arm64": "1.4.8",
|
|
85
|
+
"@fugood/node-llama-win32-arm64-vulkan": "1.4.8",
|
|
86
|
+
"@fugood/node-llama-win32-x64": "1.4.8",
|
|
87
|
+
"@fugood/node-llama-win32-x64-cuda": "1.4.8",
|
|
88
|
+
"@fugood/node-llama-win32-x64-vulkan": "1.4.8"
|
|
89
89
|
},
|
|
90
90
|
"devDependencies": {
|
|
91
91
|
"@babel/preset-env": "^7.24.4",
|
package/scripts/llama.cpp.patch
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
diff --git a/src/llama.cpp/common/CMakeLists.txt b/src/llama.cpp/common/CMakeLists.txt
|
|
2
|
-
index
|
|
2
|
+
index 0182767c2..f8c4a4f63 100644
|
|
3
3
|
--- a/src/llama.cpp/common/CMakeLists.txt
|
|
4
4
|
+++ b/src/llama.cpp/common/CMakeLists.txt
|
|
5
|
-
@@ -
|
|
5
|
+
@@ -151,9 +151,16 @@ if (LLAMA_LLGUIDANCE)
|
|
6
6
|
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS})
|
|
7
7
|
endif ()
|
|
8
8
|
|
|
@@ -21,24 +21,23 @@ index 377b26846..1873b5206 100644
|
|
|
21
21
|
|
|
22
22
|
#
|
|
23
23
|
diff --git a/src/llama.cpp/common/chat-peg-parser.cpp b/src/llama.cpp/common/chat-peg-parser.cpp
|
|
24
|
-
index
|
|
24
|
+
index 1bcba9cd8..b7cd68734 100644
|
|
25
25
|
--- a/src/llama.cpp/common/chat-peg-parser.cpp
|
|
26
26
|
+++ b/src/llama.cpp/common/chat-peg-parser.cpp
|
|
27
|
-
@@ -
|
|
28
|
-
|
|
27
|
+
@@ -2,7 +2,7 @@
|
|
28
|
+
|
|
29
|
+
#include <nlohmann/json.hpp>
|
|
29
30
|
|
|
30
|
-
-#include <nlohmann/json.hpp>
|
|
31
|
-
-
|
|
32
31
|
-using json = nlohmann::json;
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
32
|
+
+using json = nlohmann::ordered_json;
|
|
33
|
+
|
|
34
|
+
static std::string_view trim_trailing_space(std::string_view sv, int max = -1) {
|
|
35
|
+
int count = 0;
|
|
37
36
|
diff --git a/src/llama.cpp/common/chat.cpp b/src/llama.cpp/common/chat.cpp
|
|
38
|
-
index
|
|
37
|
+
index 0a426f447..ab02be247 100644
|
|
39
38
|
--- a/src/llama.cpp/common/chat.cpp
|
|
40
39
|
+++ b/src/llama.cpp/common/chat.cpp
|
|
41
|
-
@@ -
|
|
40
|
+
@@ -7,9 +7,6 @@
|
|
42
41
|
#include "log.h"
|
|
43
42
|
#include "regex-partial.h"
|
|
44
43
|
|
|
@@ -48,7 +47,7 @@ index 41a5bb42d..da5cf4b94 100644
|
|
|
48
47
|
#include <algorithm>
|
|
49
48
|
#include <cstdio>
|
|
50
49
|
#include <cctype>
|
|
51
|
-
@@ -
|
|
50
|
+
@@ -135,16 +132,6 @@ std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const comm
|
|
52
51
|
return diffs;
|
|
53
52
|
}
|
|
54
53
|
|
|
@@ -65,7 +64,7 @@ index 41a5bb42d..da5cf4b94 100644
|
|
|
65
64
|
struct templates_params {
|
|
66
65
|
json messages;
|
|
67
66
|
json tools;
|
|
68
|
-
@@ -
|
|
67
|
+
@@ -751,7 +738,7 @@ static std::string apply(
|
|
69
68
|
tmpl_inputs.extra_context.merge_patch(*additional_context);
|
|
70
69
|
}
|
|
71
70
|
// TODO: add flag to control date/time, if only for testing purposes.
|
|
@@ -99,10 +98,10 @@ index 6085510a4..263076ce2 100644
|
|
|
99
98
|
struct common_chat_tool_call {
|
|
100
99
|
std::string name;
|
|
101
100
|
diff --git a/src/llama.cpp/common/common.cpp b/src/llama.cpp/common/common.cpp
|
|
102
|
-
index
|
|
101
|
+
index 5a8cf5248..8010a990e 100644
|
|
103
102
|
--- a/src/llama.cpp/common/common.cpp
|
|
104
103
|
+++ b/src/llama.cpp/common/common.cpp
|
|
105
|
-
@@ -
|
|
104
|
+
@@ -1343,6 +1343,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
|
|
106
105
|
mparams.n_gpu_layers = params.n_gpu_layers;
|
|
107
106
|
}
|
|
108
107
|
|
|
@@ -111,19 +110,19 @@ index f07af1d86..1b10c7b13 100644
|
|
|
111
110
|
mparams.split_mode = params.split_mode;
|
|
112
111
|
mparams.tensor_split = params.tensor_split;
|
|
113
112
|
diff --git a/src/llama.cpp/common/common.h b/src/llama.cpp/common/common.h
|
|
114
|
-
index
|
|
113
|
+
index d70744840..dea8c4546 100644
|
|
115
114
|
--- a/src/llama.cpp/common/common.h
|
|
116
115
|
+++ b/src/llama.cpp/common/common.h
|
|
117
|
-
@@ -
|
|
116
|
+
@@ -307,6 +307,7 @@ struct lr_opt {
|
|
118
117
|
struct ggml_opt_optimizer_params common_opt_lr_pars(void * userdata);
|
|
119
118
|
|
|
120
119
|
struct common_params {
|
|
121
120
|
+ bool vocab_only = false;
|
|
122
|
-
int32_t n_predict = -1; // new tokens to predict
|
|
123
|
-
int32_t n_ctx =
|
|
121
|
+
int32_t n_predict = -1; // max. number of new tokens to predict, -1 == no limit
|
|
122
|
+
int32_t n_ctx = 0; // context size, 0 == context the model was trained with
|
|
124
123
|
int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
|
|
125
124
|
diff --git a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
|
|
126
|
-
index
|
|
125
|
+
index fc31089f3..aa9befe4c 100644
|
|
127
126
|
--- a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
|
|
128
127
|
+++ b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
|
|
129
128
|
@@ -106,7 +106,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
@@ -136,10 +135,10 @@ index 7e53a57b7..a328d4db4 100644
|
|
|
136
135
|
check_cxx_compiler_flag(-mfp16-format=ieee GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E)
|
|
137
136
|
if (NOT "${GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
|
|
138
137
|
diff --git a/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp b/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
|
|
139
|
-
index
|
|
138
|
+
index 514f086f6..792abaa58 100644
|
|
140
139
|
--- a/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
|
|
141
140
|
+++ b/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
|
|
142
|
-
@@ -
|
|
141
|
+
@@ -3213,11 +3213,26 @@ static const char * ggml_backend_hexagon_device_get_description(ggml_backend_dev
|
|
143
142
|
GGML_UNUSED(dev);
|
|
144
143
|
}
|
|
145
144
|
|
|
@@ -169,7 +168,7 @@ index 72a82a891..1b681f4dd 100644
|
|
|
169
168
|
GGML_UNUSED(dev);
|
|
170
169
|
}
|
|
171
170
|
|
|
172
|
-
@@ -
|
|
171
|
+
@@ -3398,10 +3413,17 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
|
|
173
172
|
}
|
|
174
173
|
}
|
|
175
174
|
|
|
@@ -188,7 +187,7 @@ index 72a82a891..1b681f4dd 100644
|
|
|
188
187
|
|
|
189
188
|
GGML_LOG_INFO("ggml-hex: Hexagon Arch version v%d\n", opt_arch);
|
|
190
189
|
|
|
191
|
-
@@ -
|
|
190
|
+
@@ -3414,6 +3436,8 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
|
|
192
191
|
} catch (std::exception const &exc) {
|
|
193
192
|
GGML_LOG_ERROR("ggml-hex: failed to create device/session %zu\n", i);
|
|
194
193
|
devices[i].context = nullptr;
|
package/src/LlamaContext.cpp
CHANGED
|
@@ -416,8 +416,8 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
416
416
|
_rn_ctx->attachThreadpoolsIfAvailable();
|
|
417
417
|
|
|
418
418
|
// Collect used devices from the loaded model
|
|
419
|
-
if (_rn_ctx->llama_init
|
|
420
|
-
const auto &model_devices = _rn_ctx->llama_init
|
|
419
|
+
if (_rn_ctx->llama_init->model()) {
|
|
420
|
+
const auto &model_devices = _rn_ctx->llama_init->model()->devices;
|
|
421
421
|
for (auto dev : model_devices) {
|
|
422
422
|
const char *dev_name = ggml_backend_dev_name(dev);
|
|
423
423
|
if (dev_name != nullptr) {
|