@fugood/llama.node 0.4.1 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/package.json +1 -1
- package/src/LlamaCompletionWorker.cpp +32 -1
- package/src/LoadSessionWorker.cpp +7 -0
- package/src/SaveSessionWorker.cpp +7 -0
- package/src/common.hpp +9 -0
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/bin/win32/arm64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
package/bin/win32/x64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -107,6 +107,8 @@ llama_pos processImage(
|
|
|
107
107
|
// Prepare bitmaps array for all images
|
|
108
108
|
mtmd::bitmaps bitmaps;
|
|
109
109
|
|
|
110
|
+
std::vector<std::string> bitmap_hashes;
|
|
111
|
+
|
|
110
112
|
// Load all images
|
|
111
113
|
for (const auto& image_path : image_paths) {
|
|
112
114
|
fprintf(stdout, "[DEBUG] Loading image: %s\n",
|
|
@@ -147,6 +149,7 @@ llama_pos processImage(
|
|
|
147
149
|
std::string hash = fnv_hash(bmp.data(), bmp.nx()*bmp.ny()*3);
|
|
148
150
|
bmp.set_id(hash.c_str());
|
|
149
151
|
bitmaps.entries.push_back(std::move(bmp));
|
|
152
|
+
bitmap_hashes.push_back(hash.c_str());
|
|
150
153
|
} catch (const std::exception& e) {
|
|
151
154
|
bitmaps.entries.clear();
|
|
152
155
|
return false;
|
|
@@ -180,6 +183,7 @@ llama_pos processImage(
|
|
|
180
183
|
std::string hash = fnv_hash(bmp.data(), bmp.nx()*bmp.ny()*3);
|
|
181
184
|
bmp.set_id(hash.c_str());
|
|
182
185
|
bitmaps.entries.push_back(std::move(bmp));
|
|
186
|
+
bitmap_hashes.push_back(hash.c_str());
|
|
183
187
|
}
|
|
184
188
|
}
|
|
185
189
|
|
|
@@ -229,6 +233,7 @@ llama_pos processImage(
|
|
|
229
233
|
|
|
230
234
|
// chunk pos
|
|
231
235
|
std::vector<size_t> chunk_pos;
|
|
236
|
+
std::vector<size_t> chunk_pos_images;
|
|
232
237
|
for (size_t i = 0; i < num_chunks; i++) {
|
|
233
238
|
chunk_pos.push_back(total_token_count);
|
|
234
239
|
|
|
@@ -244,6 +249,8 @@ llama_pos processImage(
|
|
|
244
249
|
all_tokens.insert(all_tokens.end(), tokens, tokens + n_tokens);
|
|
245
250
|
total_token_count += n_tokens;
|
|
246
251
|
} else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
|
|
252
|
+
chunk_pos_images.push_back(total_token_count);
|
|
253
|
+
|
|
247
254
|
const mtmd_image_tokens* img_tokens = mtmd_input_chunk_get_tokens_image(chunk);
|
|
248
255
|
size_t n_tokens = mtmd_image_tokens_get_n_tokens(img_tokens);
|
|
249
256
|
size_t n_pos = mtmd_image_tokens_get_n_pos(img_tokens);
|
|
@@ -259,6 +266,28 @@ llama_pos processImage(
|
|
|
259
266
|
|
|
260
267
|
llama_pos new_n_past = n_past;
|
|
261
268
|
|
|
269
|
+
// Compare bitmap hashes, if they are not the same, backtrack n_past to the position of the first mismatch
|
|
270
|
+
auto mtmd_bitmap_past_hashes = sess->mtmd_bitmap_past_hashes_ptr();
|
|
271
|
+
if (mtmd_bitmap_past_hashes->size() > 0) {
|
|
272
|
+
for (size_t i = 0; i < bitmap_hashes.size(); i++) {
|
|
273
|
+
auto pos = chunk_pos_images[i];
|
|
274
|
+
if (n_past < pos) {
|
|
275
|
+
break;
|
|
276
|
+
}
|
|
277
|
+
if (i >= mtmd_bitmap_past_hashes->size()) {
|
|
278
|
+
break;
|
|
279
|
+
}
|
|
280
|
+
if (bitmap_hashes[i] != (*mtmd_bitmap_past_hashes)[i]) {
|
|
281
|
+
n_past = chunk_pos_images[i];
|
|
282
|
+
new_n_past = n_past;
|
|
283
|
+
break;
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
// Clear all KV cache entries after position n_past
|
|
289
|
+
llama_kv_self_seq_rm(ctx, 0, n_past, -1);
|
|
290
|
+
|
|
262
291
|
for (size_t i = 0; i < chunk_pos.size(); i++) {
|
|
263
292
|
fprintf(stdout, "[DEBUG] Evaluating chunk %zu: n_past=%d, chunk_pos=%zu\n", i, n_past, chunk_pos[i]);
|
|
264
293
|
|
|
@@ -288,7 +317,7 @@ llama_pos processImage(
|
|
|
288
317
|
}
|
|
289
318
|
}
|
|
290
319
|
|
|
291
|
-
if (n_past == total_token_count) {
|
|
320
|
+
if (n_past == total_token_count && n_past > 0 && all_tokens[n_past - 1] != LLAMA_TOKEN_NULL) {
|
|
292
321
|
// we have to evaluate at least 1 token to generate logits.
|
|
293
322
|
n_past--;
|
|
294
323
|
}
|
|
@@ -302,6 +331,8 @@ llama_pos processImage(
|
|
|
302
331
|
// Set the tokens
|
|
303
332
|
sess->set_tokens(std::move(all_tokens));
|
|
304
333
|
|
|
334
|
+
sess->set_mtmd_bitmap_past_hashes(bitmap_hashes);
|
|
335
|
+
|
|
305
336
|
// Clean up image resources
|
|
306
337
|
mtmd_input_chunks_free(chunks);
|
|
307
338
|
bitmaps.entries.clear();
|
|
@@ -11,6 +11,13 @@ void LoadSessionWorker::Execute() {
|
|
|
11
11
|
// reserve the maximum number of tokens for capacity
|
|
12
12
|
std::vector<llama_token> tokens;
|
|
13
13
|
tokens.reserve(_sess->params().n_ctx);
|
|
14
|
+
|
|
15
|
+
// Find LLAMA_TOKEN_NULL in the tokens and resize the array to the index of the null token
|
|
16
|
+
auto null_token_iter = std::find(tokens.begin(), tokens.end(), LLAMA_TOKEN_NULL);
|
|
17
|
+
if (null_token_iter != tokens.end()) {
|
|
18
|
+
tokens.resize(std::distance(tokens.begin(), null_token_iter));
|
|
19
|
+
}
|
|
20
|
+
|
|
14
21
|
if (!llama_state_load_file(_sess->context(), _path.c_str(), tokens.data(),
|
|
15
22
|
tokens.capacity(), &count)) {
|
|
16
23
|
SetError("Failed to load session");
|
|
@@ -9,6 +9,13 @@ SaveSessionWorker::SaveSessionWorker(const Napi::CallbackInfo &info,
|
|
|
9
9
|
void SaveSessionWorker::Execute() {
|
|
10
10
|
_sess->get_mutex().lock();
|
|
11
11
|
auto tokens = _sess->tokens_ptr();
|
|
12
|
+
|
|
13
|
+
// Find LLAMA_TOKEN_NULL in the tokens and resize the array to the index of the null token
|
|
14
|
+
auto null_token_iter = std::find(tokens->begin(), tokens->end(), LLAMA_TOKEN_NULL);
|
|
15
|
+
if (null_token_iter != tokens->end()) {
|
|
16
|
+
tokens->resize(std::distance(tokens->begin(), null_token_iter));
|
|
17
|
+
}
|
|
18
|
+
|
|
12
19
|
if (!llama_state_save_file(_sess->context(), _path.c_str(), tokens->data(),
|
|
13
20
|
tokens->size())) {
|
|
14
21
|
SetError("Failed to save session");
|
package/src/common.hpp
CHANGED
|
@@ -80,6 +80,14 @@ public:
|
|
|
80
80
|
tokens_ = std::move(tokens);
|
|
81
81
|
}
|
|
82
82
|
|
|
83
|
+
inline std::vector<std::string> *mtmd_bitmap_past_hashes_ptr() {
|
|
84
|
+
return &mtmd_bitmap_past_hashes_;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
inline void set_mtmd_bitmap_past_hashes(std::vector<std::string> hashes) {
|
|
88
|
+
mtmd_bitmap_past_hashes_ = std::move(hashes);
|
|
89
|
+
}
|
|
90
|
+
|
|
83
91
|
inline const common_params ¶ms() const { return params_; }
|
|
84
92
|
|
|
85
93
|
inline std::mutex &get_mutex() { return mutex; }
|
|
@@ -106,6 +114,7 @@ private:
|
|
|
106
114
|
common_init_result llama_init_;
|
|
107
115
|
const common_params params_;
|
|
108
116
|
std::vector<llama_token> tokens_{};
|
|
117
|
+
std::vector<std::string> mtmd_bitmap_past_hashes_{};
|
|
109
118
|
std::mutex mutex;
|
|
110
119
|
mtmd_context* _mtmd_ctx = nullptr;
|
|
111
120
|
};
|