@fugood/llama.node 0.4.1 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
package/lib/binding.ts CHANGED
@@ -113,6 +113,8 @@ export type LlamaCompletionOptions = {
113
113
 
114
114
  export type LlamaCompletionResult = {
115
115
  text: string
116
+ reasoning_content?: string
117
+ content?: string
116
118
  tokens_predicted: number
117
119
  tokens_evaluated: number
118
120
  truncated: boolean
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "0.4.1",
4
+ "version": "0.4.3",
5
5
  "description": "An another Node binding of llama.cpp",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -107,6 +107,8 @@ llama_pos processImage(
107
107
  // Prepare bitmaps array for all images
108
108
  mtmd::bitmaps bitmaps;
109
109
 
110
+ std::vector<std::string> bitmap_hashes;
111
+
110
112
  // Load all images
111
113
  for (const auto& image_path : image_paths) {
112
114
  fprintf(stdout, "[DEBUG] Loading image: %s\n",
@@ -147,6 +149,7 @@ llama_pos processImage(
147
149
  std::string hash = fnv_hash(bmp.data(), bmp.nx()*bmp.ny()*3);
148
150
  bmp.set_id(hash.c_str());
149
151
  bitmaps.entries.push_back(std::move(bmp));
152
+ bitmap_hashes.push_back(hash.c_str());
150
153
  } catch (const std::exception& e) {
151
154
  bitmaps.entries.clear();
152
155
  return false;
@@ -180,6 +183,7 @@ llama_pos processImage(
180
183
  std::string hash = fnv_hash(bmp.data(), bmp.nx()*bmp.ny()*3);
181
184
  bmp.set_id(hash.c_str());
182
185
  bitmaps.entries.push_back(std::move(bmp));
186
+ bitmap_hashes.push_back(hash.c_str());
183
187
  }
184
188
  }
185
189
 
@@ -229,6 +233,7 @@ llama_pos processImage(
229
233
 
230
234
  // chunk pos
231
235
  std::vector<size_t> chunk_pos;
236
+ std::vector<size_t> chunk_pos_images;
232
237
  for (size_t i = 0; i < num_chunks; i++) {
233
238
  chunk_pos.push_back(total_token_count);
234
239
 
@@ -244,6 +249,8 @@ llama_pos processImage(
244
249
  all_tokens.insert(all_tokens.end(), tokens, tokens + n_tokens);
245
250
  total_token_count += n_tokens;
246
251
  } else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
252
+ chunk_pos_images.push_back(total_token_count);
253
+
247
254
  const mtmd_image_tokens* img_tokens = mtmd_input_chunk_get_tokens_image(chunk);
248
255
  size_t n_tokens = mtmd_image_tokens_get_n_tokens(img_tokens);
249
256
  size_t n_pos = mtmd_image_tokens_get_n_pos(img_tokens);
@@ -259,6 +266,28 @@ llama_pos processImage(
259
266
 
260
267
  llama_pos new_n_past = n_past;
261
268
 
269
+ // Compare bitmap hashes, if they are not the same, backtrack n_past to the position of the first mismatch
270
+ auto mtmd_bitmap_past_hashes = sess->mtmd_bitmap_past_hashes_ptr();
271
+ if (mtmd_bitmap_past_hashes->size() > 0) {
272
+ for (size_t i = 0; i < bitmap_hashes.size(); i++) {
273
+ auto pos = chunk_pos_images[i];
274
+ if (n_past < pos) {
275
+ break;
276
+ }
277
+ if (i >= mtmd_bitmap_past_hashes->size()) {
278
+ break;
279
+ }
280
+ if (bitmap_hashes[i] != (*mtmd_bitmap_past_hashes)[i]) {
281
+ n_past = chunk_pos_images[i];
282
+ new_n_past = n_past;
283
+ break;
284
+ }
285
+ }
286
+ }
287
+
288
+ // Clear all KV cache entries after position n_past
289
+ llama_kv_self_seq_rm(ctx, 0, n_past, -1);
290
+
262
291
  for (size_t i = 0; i < chunk_pos.size(); i++) {
263
292
  fprintf(stdout, "[DEBUG] Evaluating chunk %zu: n_past=%d, chunk_pos=%zu\n", i, n_past, chunk_pos[i]);
264
293
 
@@ -288,7 +317,7 @@ llama_pos processImage(
288
317
  }
289
318
  }
290
319
 
291
- if (n_past == total_token_count) {
320
+ if (n_past == total_token_count && n_past > 0 && all_tokens[n_past - 1] != LLAMA_TOKEN_NULL) {
292
321
  // we have to evaluate at least 1 token to generate logits.
293
322
  n_past--;
294
323
  }
@@ -302,6 +331,8 @@ llama_pos processImage(
302
331
  // Set the tokens
303
332
  sess->set_tokens(std::move(all_tokens));
304
333
 
334
+ sess->set_mtmd_bitmap_past_hashes(bitmap_hashes);
335
+
305
336
  // Clean up image resources
306
337
  mtmd_input_chunks_free(chunks);
307
338
  bitmaps.entries.clear();
@@ -520,16 +551,16 @@ void LlamaCompletionWorker::OnOK() {
520
551
  Napi::String::New(env, _result.text.c_str()));
521
552
 
522
553
  Napi::Array tool_calls = Napi::Array::New(Napi::AsyncWorker::Env());
523
- std::string * reasoning_content = nullptr;
524
- std::string * content = nullptr;
554
+ std::string reasoning_content = "";
555
+ std::string content;
525
556
  if (!_stop) {
526
557
  try {
527
558
  common_chat_msg message = common_chat_parse(_result.text, static_cast<common_chat_format>(_chat_format));
528
559
  if (!message.reasoning_content.empty()) {
529
- reasoning_content = &message.reasoning_content;
560
+ reasoning_content = message.reasoning_content;
530
561
  }
531
562
  if (!message.content.empty()) {
532
- content = &message.content;
563
+ content = message.content;
533
564
  }
534
565
  for (size_t i = 0; i < message.tool_calls.size(); i++) {
535
566
  const auto &tc = message.tool_calls[i];
@@ -551,11 +582,11 @@ void LlamaCompletionWorker::OnOK() {
551
582
  if (tool_calls.Length() > 0) {
552
583
  result.Set("tool_calls", tool_calls);
553
584
  }
554
- if (reasoning_content) {
555
- result.Set("reasoning_content", Napi::String::New(env, reasoning_content->c_str()));
585
+ if (!reasoning_content.empty()) {
586
+ result.Set("reasoning_content", Napi::String::New(env, reasoning_content.c_str()));
556
587
  }
557
- if (content) {
558
- result.Set("content", Napi::String::New(env, content->c_str()));
588
+ if (!content.empty()) {
589
+ result.Set("content", Napi::String::New(env, content.c_str()));
559
590
  }
560
591
 
561
592
  auto ctx = _sess->context();
@@ -11,6 +11,13 @@ void LoadSessionWorker::Execute() {
11
11
  // reserve the maximum number of tokens for capacity
12
12
  std::vector<llama_token> tokens;
13
13
  tokens.reserve(_sess->params().n_ctx);
14
+
15
+ // Find LLAMA_TOKEN_NULL in the tokens and resize the array to the index of the null token
16
+ auto null_token_iter = std::find(tokens.begin(), tokens.end(), LLAMA_TOKEN_NULL);
17
+ if (null_token_iter != tokens.end()) {
18
+ tokens.resize(std::distance(tokens.begin(), null_token_iter));
19
+ }
20
+
14
21
  if (!llama_state_load_file(_sess->context(), _path.c_str(), tokens.data(),
15
22
  tokens.capacity(), &count)) {
16
23
  SetError("Failed to load session");
@@ -9,6 +9,13 @@ SaveSessionWorker::SaveSessionWorker(const Napi::CallbackInfo &info,
9
9
  void SaveSessionWorker::Execute() {
10
10
  _sess->get_mutex().lock();
11
11
  auto tokens = _sess->tokens_ptr();
12
+
13
+ // Find LLAMA_TOKEN_NULL in the tokens and resize the array to the index of the null token
14
+ auto null_token_iter = std::find(tokens->begin(), tokens->end(), LLAMA_TOKEN_NULL);
15
+ if (null_token_iter != tokens->end()) {
16
+ tokens->resize(std::distance(tokens->begin(), null_token_iter));
17
+ }
18
+
12
19
  if (!llama_state_save_file(_sess->context(), _path.c_str(), tokens->data(),
13
20
  tokens->size())) {
14
21
  SetError("Failed to save session");
package/src/common.hpp CHANGED
@@ -80,6 +80,14 @@ public:
80
80
  tokens_ = std::move(tokens);
81
81
  }
82
82
 
83
+ inline std::vector<std::string> *mtmd_bitmap_past_hashes_ptr() {
84
+ return &mtmd_bitmap_past_hashes_;
85
+ }
86
+
87
+ inline void set_mtmd_bitmap_past_hashes(std::vector<std::string> hashes) {
88
+ mtmd_bitmap_past_hashes_ = std::move(hashes);
89
+ }
90
+
83
91
  inline const common_params &params() const { return params_; }
84
92
 
85
93
  inline std::mutex &get_mutex() { return mutex; }
@@ -106,6 +114,7 @@ private:
106
114
  common_init_result llama_init_;
107
115
  const common_params params_;
108
116
  std::vector<llama_token> tokens_{};
117
+ std::vector<std::string> mtmd_bitmap_past_hashes_{};
109
118
  std::mutex mutex;
110
119
  mtmd_context* _mtmd_ctx = nullptr;
111
120
  };