@fugood/llama.node 0.4.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "0.4.1",
4
+ "version": "0.4.2",
5
5
  "description": "An another Node binding of llama.cpp",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -107,6 +107,8 @@ llama_pos processImage(
107
107
  // Prepare bitmaps array for all images
108
108
  mtmd::bitmaps bitmaps;
109
109
 
110
+ std::vector<std::string> bitmap_hashes;
111
+
110
112
  // Load all images
111
113
  for (const auto& image_path : image_paths) {
112
114
  fprintf(stdout, "[DEBUG] Loading image: %s\n",
@@ -147,6 +149,7 @@ llama_pos processImage(
147
149
  std::string hash = fnv_hash(bmp.data(), bmp.nx()*bmp.ny()*3);
148
150
  bmp.set_id(hash.c_str());
149
151
  bitmaps.entries.push_back(std::move(bmp));
152
+ bitmap_hashes.push_back(hash.c_str());
150
153
  } catch (const std::exception& e) {
151
154
  bitmaps.entries.clear();
152
155
  return false;
@@ -180,6 +183,7 @@ llama_pos processImage(
180
183
  std::string hash = fnv_hash(bmp.data(), bmp.nx()*bmp.ny()*3);
181
184
  bmp.set_id(hash.c_str());
182
185
  bitmaps.entries.push_back(std::move(bmp));
186
+ bitmap_hashes.push_back(hash.c_str());
183
187
  }
184
188
  }
185
189
 
@@ -229,6 +233,7 @@ llama_pos processImage(
229
233
 
230
234
  // chunk pos
231
235
  std::vector<size_t> chunk_pos;
236
+ std::vector<size_t> chunk_pos_images;
232
237
  for (size_t i = 0; i < num_chunks; i++) {
233
238
  chunk_pos.push_back(total_token_count);
234
239
 
@@ -244,6 +249,8 @@ llama_pos processImage(
244
249
  all_tokens.insert(all_tokens.end(), tokens, tokens + n_tokens);
245
250
  total_token_count += n_tokens;
246
251
  } else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
252
+ chunk_pos_images.push_back(total_token_count);
253
+
247
254
  const mtmd_image_tokens* img_tokens = mtmd_input_chunk_get_tokens_image(chunk);
248
255
  size_t n_tokens = mtmd_image_tokens_get_n_tokens(img_tokens);
249
256
  size_t n_pos = mtmd_image_tokens_get_n_pos(img_tokens);
@@ -259,6 +266,28 @@ llama_pos processImage(
259
266
 
260
267
  llama_pos new_n_past = n_past;
261
268
 
269
+ // Compare bitmap hashes, if they are not the same, backtrack n_past to the position of the first mismatch
270
+ auto mtmd_bitmap_past_hashes = sess->mtmd_bitmap_past_hashes_ptr();
271
+ if (mtmd_bitmap_past_hashes->size() > 0) {
272
+ for (size_t i = 0; i < bitmap_hashes.size(); i++) {
273
+ auto pos = chunk_pos_images[i];
274
+ if (n_past < pos) {
275
+ break;
276
+ }
277
+ if (i >= mtmd_bitmap_past_hashes->size()) {
278
+ break;
279
+ }
280
+ if (bitmap_hashes[i] != (*mtmd_bitmap_past_hashes)[i]) {
281
+ n_past = chunk_pos_images[i];
282
+ new_n_past = n_past;
283
+ break;
284
+ }
285
+ }
286
+ }
287
+
288
+ // Clear all KV cache entries after position n_past
289
+ llama_kv_self_seq_rm(ctx, 0, n_past, -1);
290
+
262
291
  for (size_t i = 0; i < chunk_pos.size(); i++) {
263
292
  fprintf(stdout, "[DEBUG] Evaluating chunk %zu: n_past=%d, chunk_pos=%zu\n", i, n_past, chunk_pos[i]);
264
293
 
@@ -288,7 +317,7 @@ llama_pos processImage(
288
317
  }
289
318
  }
290
319
 
291
- if (n_past == total_token_count) {
320
+ if (n_past == total_token_count && n_past > 0 && all_tokens[n_past - 1] != LLAMA_TOKEN_NULL) {
292
321
  // we have to evaluate at least 1 token to generate logits.
293
322
  n_past--;
294
323
  }
@@ -302,6 +331,8 @@ llama_pos processImage(
302
331
  // Set the tokens
303
332
  sess->set_tokens(std::move(all_tokens));
304
333
 
334
+ sess->set_mtmd_bitmap_past_hashes(bitmap_hashes);
335
+
305
336
  // Clean up image resources
306
337
  mtmd_input_chunks_free(chunks);
307
338
  bitmaps.entries.clear();
@@ -11,6 +11,13 @@ void LoadSessionWorker::Execute() {
11
11
  // reserve the maximum number of tokens for capacity
12
12
  std::vector<llama_token> tokens;
13
13
  tokens.reserve(_sess->params().n_ctx);
14
+
15
+ // Find LLAMA_TOKEN_NULL in the tokens and resize the array to the index of the null token
16
+ auto null_token_iter = std::find(tokens.begin(), tokens.end(), LLAMA_TOKEN_NULL);
17
+ if (null_token_iter != tokens.end()) {
18
+ tokens.resize(std::distance(tokens.begin(), null_token_iter));
19
+ }
20
+
14
21
  if (!llama_state_load_file(_sess->context(), _path.c_str(), tokens.data(),
15
22
  tokens.capacity(), &count)) {
16
23
  SetError("Failed to load session");
@@ -9,6 +9,13 @@ SaveSessionWorker::SaveSessionWorker(const Napi::CallbackInfo &info,
9
9
  void SaveSessionWorker::Execute() {
10
10
  _sess->get_mutex().lock();
11
11
  auto tokens = _sess->tokens_ptr();
12
+
13
+ // Find LLAMA_TOKEN_NULL in the tokens and resize the array to the index of the null token
14
+ auto null_token_iter = std::find(tokens->begin(), tokens->end(), LLAMA_TOKEN_NULL);
15
+ if (null_token_iter != tokens->end()) {
16
+ tokens->resize(std::distance(tokens->begin(), null_token_iter));
17
+ }
18
+
12
19
  if (!llama_state_save_file(_sess->context(), _path.c_str(), tokens->data(),
13
20
  tokens->size())) {
14
21
  SetError("Failed to save session");
package/src/common.hpp CHANGED
@@ -80,6 +80,14 @@ public:
80
80
  tokens_ = std::move(tokens);
81
81
  }
82
82
 
83
+ inline std::vector<std::string> *mtmd_bitmap_past_hashes_ptr() {
84
+ return &mtmd_bitmap_past_hashes_;
85
+ }
86
+
87
+ inline void set_mtmd_bitmap_past_hashes(std::vector<std::string> hashes) {
88
+ mtmd_bitmap_past_hashes_ = std::move(hashes);
89
+ }
90
+
83
91
  inline const common_params &params() const { return params_; }
84
92
 
85
93
  inline std::mutex &get_mutex() { return mutex; }
@@ -106,6 +114,7 @@ private:
106
114
  common_init_result llama_init_;
107
115
  const common_params params_;
108
116
  std::vector<llama_token> tokens_{};
117
+ std::vector<std::string> mtmd_bitmap_past_hashes_{};
109
118
  std::mutex mutex;
110
119
  mtmd_context* _mtmd_ctx = nullptr;
111
120
  };