@fugood/llama.node 1.1.3 → 1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +3 -0
- package/package.json +14 -14
- package/src/LlamaCompletionWorker.cpp +45 -5
package/CMakeLists.txt
CHANGED
|
@@ -114,6 +114,9 @@ set(LLAMA_BUILD_COMMON ON CACHE BOOL "Build common")
|
|
|
114
114
|
set(LLAMA_CURL OFF CACHE BOOL "Build curl")
|
|
115
115
|
|
|
116
116
|
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libraries")
|
|
117
|
+
|
|
118
|
+
add_definitions(-DGGML_MAX_NAME=80)
|
|
119
|
+
|
|
117
120
|
add_subdirectory("src/llama.cpp")
|
|
118
121
|
add_subdirectory("src/llama.cpp/tools/mtmd")
|
|
119
122
|
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "1.1.
|
|
4
|
+
"version": "1.1.4",
|
|
5
5
|
"description": "An another Node binding of llama.cpp",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -71,19 +71,19 @@
|
|
|
71
71
|
"CMakeLists.txt"
|
|
72
72
|
],
|
|
73
73
|
"optionalDependencies": {
|
|
74
|
-
"@fugood/node-llama-linux-x64": "1.1.
|
|
75
|
-
"@fugood/node-llama-linux-x64-vulkan": "1.1.
|
|
76
|
-
"@fugood/node-llama-linux-x64-cuda": "1.1.
|
|
77
|
-
"@fugood/node-llama-linux-arm64": "1.1.
|
|
78
|
-
"@fugood/node-llama-linux-arm64-vulkan": "1.1.
|
|
79
|
-
"@fugood/node-llama-linux-arm64-cuda": "1.1.
|
|
80
|
-
"@fugood/node-llama-win32-x64": "1.1.
|
|
81
|
-
"@fugood/node-llama-win32-x64-vulkan": "1.1.
|
|
82
|
-
"@fugood/node-llama-win32-x64-cuda": "1.1.
|
|
83
|
-
"@fugood/node-llama-win32-arm64": "1.1.
|
|
84
|
-
"@fugood/node-llama-win32-arm64-vulkan": "1.1.
|
|
85
|
-
"@fugood/node-llama-darwin-x64": "1.1.
|
|
86
|
-
"@fugood/node-llama-darwin-arm64": "1.1.
|
|
74
|
+
"@fugood/node-llama-linux-x64": "1.1.4",
|
|
75
|
+
"@fugood/node-llama-linux-x64-vulkan": "1.1.4",
|
|
76
|
+
"@fugood/node-llama-linux-x64-cuda": "1.1.4",
|
|
77
|
+
"@fugood/node-llama-linux-arm64": "1.1.4",
|
|
78
|
+
"@fugood/node-llama-linux-arm64-vulkan": "1.1.4",
|
|
79
|
+
"@fugood/node-llama-linux-arm64-cuda": "1.1.4",
|
|
80
|
+
"@fugood/node-llama-win32-x64": "1.1.4",
|
|
81
|
+
"@fugood/node-llama-win32-x64-vulkan": "1.1.4",
|
|
82
|
+
"@fugood/node-llama-win32-x64-cuda": "1.1.4",
|
|
83
|
+
"@fugood/node-llama-win32-arm64": "1.1.4",
|
|
84
|
+
"@fugood/node-llama-win32-arm64-vulkan": "1.1.4",
|
|
85
|
+
"@fugood/node-llama-darwin-x64": "1.1.4",
|
|
86
|
+
"@fugood/node-llama-darwin-arm64": "1.1.4"
|
|
87
87
|
},
|
|
88
88
|
"devDependencies": {
|
|
89
89
|
"@babel/preset-env": "^7.24.4",
|
|
@@ -64,6 +64,7 @@ void LlamaCompletionWorker::Execute() {
|
|
|
64
64
|
size_t n_input = 0;
|
|
65
65
|
const auto model = _sess->model();
|
|
66
66
|
auto vocab = llama_model_get_vocab(model);
|
|
67
|
+
const bool is_enc_dec = llama_model_has_encoder(model);
|
|
67
68
|
|
|
68
69
|
const bool add_bos = llama_vocab_get_add_bos(vocab);
|
|
69
70
|
auto ctx = _sess->context();
|
|
@@ -110,7 +111,7 @@ void LlamaCompletionWorker::Execute() {
|
|
|
110
111
|
} else {
|
|
111
112
|
// Text-only path
|
|
112
113
|
std::vector<llama_token> prompt_tokens =
|
|
113
|
-
::common_tokenize(ctx, _params.prompt, add_bos, true);
|
|
114
|
+
::common_tokenize(ctx, _params.prompt, add_bos || is_enc_dec, true);
|
|
114
115
|
n_input = prompt_tokens.size();
|
|
115
116
|
|
|
116
117
|
if (_sess->tokens_ptr()->size() > 0) {
|
|
@@ -126,9 +127,47 @@ void LlamaCompletionWorker::Execute() {
|
|
|
126
127
|
}
|
|
127
128
|
|
|
128
129
|
const int max_len = _params.n_predict < 0 ? std::numeric_limits<int>::max() : _params.n_predict;
|
|
129
|
-
_sess->tokens_ptr()->reserve(_sess->tokens_ptr()->size() + max_len);
|
|
130
|
-
|
|
131
130
|
auto embd = _sess->tokens_ptr();
|
|
131
|
+
embd->reserve(embd->size() + max_len);
|
|
132
|
+
|
|
133
|
+
if (is_enc_dec) {
|
|
134
|
+
if (n_input > 0) {
|
|
135
|
+
// Decode tokens in batches using n_batch as chunk size
|
|
136
|
+
int n_past_batch = n_cur;
|
|
137
|
+
int n_remaining = n_input;
|
|
138
|
+
|
|
139
|
+
while (n_remaining > 0) {
|
|
140
|
+
int n_eval = n_remaining;
|
|
141
|
+
if (n_eval > _params.n_batch) {
|
|
142
|
+
n_eval = _params.n_batch;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
int ret = llama_encode(ctx, llama_batch_get_one(embd->data() + n_past_batch, n_eval));
|
|
146
|
+
if (ret < 0) {
|
|
147
|
+
SetError("Failed to encode token batch, code: " + std::to_string(ret) +
|
|
148
|
+
", n_eval: " + std::to_string(n_eval) +
|
|
149
|
+
", n_past_batch: " + std::to_string(n_past_batch));
|
|
150
|
+
_sess->get_mutex().unlock();
|
|
151
|
+
return;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
n_past_batch += n_eval;
|
|
155
|
+
n_remaining -= n_eval;
|
|
156
|
+
n_cur += n_eval;
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
_result.tokens_evaluated += n_input;
|
|
160
|
+
|
|
161
|
+
llama_token decode_bos = llama_model_decoder_start_token(model);
|
|
162
|
+
if (decode_bos == LLAMA_TOKEN_NULL) {
|
|
163
|
+
decode_bos = llama_vocab_bos(vocab);
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
embd->emplace_back(decode_bos);
|
|
167
|
+
common_sampler_accept(sampling.get(), decode_bos, false);
|
|
168
|
+
n_input = 1;
|
|
169
|
+
}
|
|
170
|
+
|
|
132
171
|
for (int i = 0; (i < max_len || _interrupted) && !_params.vocab_only; i++) {
|
|
133
172
|
// check if we need to remove some tokens
|
|
134
173
|
if (embd->size() >= _params.n_ctx) {
|
|
@@ -166,13 +205,14 @@ void LlamaCompletionWorker::Execute() {
|
|
|
166
205
|
if (n_eval > _params.n_batch) {
|
|
167
206
|
n_eval = _params.n_batch;
|
|
168
207
|
}
|
|
169
|
-
|
|
208
|
+
|
|
170
209
|
int ret = llama_decode(ctx, llama_batch_get_one(embd->data() + n_past_batch, n_eval));
|
|
171
210
|
if (ret < 0) {
|
|
172
211
|
SetError("Failed to decode token batch, code: " + std::to_string(ret) +
|
|
173
212
|
", n_eval: " + std::to_string(n_eval) +
|
|
174
213
|
", n_past_batch: " + std::to_string(n_past_batch));
|
|
175
|
-
|
|
214
|
+
_sess->get_mutex().unlock();
|
|
215
|
+
return;
|
|
176
216
|
}
|
|
177
217
|
|
|
178
218
|
n_past_batch += n_eval;
|