npm - @fugood/llama.node - Versions diffs - 0.4.1 → 0.4.3 - Mend

@fugood/llama.node 0.4.1 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/bin/darwin/arm64/llama-node.node +0 -0
package/bin/darwin/x64/llama-node.node +0 -0
package/bin/linux/arm64/llama-node.node +0 -0
package/bin/linux/x64/llama-node.node +0 -0
package/bin/linux-cuda/arm64/llama-node.node +0 -0
package/bin/linux-cuda/x64/llama-node.node +0 -0
package/bin/linux-vulkan/arm64/llama-node.node +0 -0
package/bin/linux-vulkan/x64/llama-node.node +0 -0
package/bin/win32/arm64/llama-node.node +0 -0
package/bin/win32/arm64/node.lib +0 -0
package/bin/win32/x64/llama-node.node +0 -0
package/bin/win32/x64/node.lib +0 -0
package/bin/win32-vulkan/arm64/llama-node.node +0 -0
package/bin/win32-vulkan/arm64/node.lib +0 -0
package/bin/win32-vulkan/x64/llama-node.node +0 -0
package/bin/win32-vulkan/x64/node.lib +0 -0
package/lib/binding.ts +2 -0
package/package.json +1 -1
package/src/LlamaCompletionWorker.cpp +40 -9
package/src/LoadSessionWorker.cpp +7 -0
package/src/SaveSessionWorker.cpp +7 -0
package/src/common.hpp +9 -0

package/bin/darwin/arm64/llama-node.node CHANGED Viewed

Binary file

package/bin/darwin/x64/llama-node.node CHANGED Viewed

Binary file

package/bin/linux/arm64/llama-node.node CHANGED Viewed

Binary file

package/bin/linux/x64/llama-node.node CHANGED Viewed

Binary file

package/bin/linux-cuda/arm64/llama-node.node CHANGED Viewed

Binary file

package/bin/linux-cuda/x64/llama-node.node CHANGED Viewed

Binary file

package/bin/linux-vulkan/arm64/llama-node.node CHANGED Viewed

Binary file

package/bin/linux-vulkan/x64/llama-node.node CHANGED Viewed

Binary file

package/bin/win32/arm64/llama-node.node CHANGED Viewed

Binary file

package/bin/win32/arm64/node.lib CHANGED Viewed

Binary file

package/bin/win32/x64/llama-node.node CHANGED Viewed

Binary file

package/bin/win32/x64/node.lib CHANGED Viewed

Binary file

package/bin/win32-vulkan/arm64/llama-node.node CHANGED Viewed

Binary file

package/bin/win32-vulkan/arm64/node.lib CHANGED Viewed

Binary file

package/bin/win32-vulkan/x64/llama-node.node CHANGED Viewed

Binary file

package/bin/win32-vulkan/x64/node.lib CHANGED Viewed

Binary file

package/lib/binding.ts CHANGED Viewed

@@ -113,6 +113,8 @@ export type LlamaCompletionOptions = {
 export type LlamaCompletionResult = {
   text: string
+  reasoning_content?: string
+  content?: string
   tokens_predicted: number
   tokens_evaluated: number
   truncated: boolean

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@fugood/llama.node",
   "access": "public",
-  "version": "0.4.1",
+  "version": "0.4.3",
   "description": "An another Node binding of llama.cpp",
   "main": "lib/index.js",
   "scripts": {

package/src/LlamaCompletionWorker.cpp CHANGED Viewed

@@ -107,6 +107,8 @@ llama_pos processImage(
   // Prepare bitmaps array for all images
   mtmd::bitmaps bitmaps;
+  std::vector<std::string> bitmap_hashes;
   // Load all images
   for (const auto& image_path : image_paths) {
     fprintf(stdout, "[DEBUG] Loading image: %s\n",
@@ -147,6 +149,7 @@ llama_pos processImage(
         std::string hash = fnv_hash(bmp.data(), bmp.nx()*bmp.ny()*3);
         bmp.set_id(hash.c_str());
         bitmaps.entries.push_back(std::move(bmp));
+        bitmap_hashes.push_back(hash.c_str());
       } catch (const std::exception& e) {
         bitmaps.entries.clear();
         return false;
@@ -180,6 +183,7 @@ llama_pos processImage(
       std::string hash = fnv_hash(bmp.data(), bmp.nx()*bmp.ny()*3);
       bmp.set_id(hash.c_str());
       bitmaps.entries.push_back(std::move(bmp));
+      bitmap_hashes.push_back(hash.c_str());
     }
   }
@@ -229,6 +233,7 @@ llama_pos processImage(
   // chunk pos
   std::vector<size_t> chunk_pos;
+  std::vector<size_t> chunk_pos_images;
   for (size_t i = 0; i < num_chunks; i++) {
     chunk_pos.push_back(total_token_count);
@@ -244,6 +249,8 @@ llama_pos processImage(
       all_tokens.insert(all_tokens.end(), tokens, tokens + n_tokens);
       total_token_count += n_tokens;
     } else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
+      chunk_pos_images.push_back(total_token_count);
       const mtmd_image_tokens* img_tokens = mtmd_input_chunk_get_tokens_image(chunk);
       size_t n_tokens = mtmd_image_tokens_get_n_tokens(img_tokens);
       size_t n_pos = mtmd_image_tokens_get_n_pos(img_tokens);
@@ -259,6 +266,28 @@ llama_pos processImage(
   llama_pos new_n_past = n_past;
+  // Compare bitmap hashes, if they are not the same, backtrack n_past to the position of the first mismatch
+  auto mtmd_bitmap_past_hashes = sess->mtmd_bitmap_past_hashes_ptr();
+  if (mtmd_bitmap_past_hashes->size() > 0) {
+    for (size_t i = 0; i < bitmap_hashes.size(); i++) {
+      auto pos = chunk_pos_images[i];
+      if (n_past < pos) {
+        break;
+      }
+      if (i >= mtmd_bitmap_past_hashes->size()) {
+        break;
+      }
+      if (bitmap_hashes[i] != (*mtmd_bitmap_past_hashes)[i]) {
+        n_past = chunk_pos_images[i];
+        new_n_past = n_past;
+        break;
+      }
+    }
+  }
+  // Clear all KV cache entries after position n_past
+  llama_kv_self_seq_rm(ctx, 0, n_past, -1);
   for (size_t i = 0; i < chunk_pos.size(); i++) {
     fprintf(stdout, "[DEBUG] Evaluating chunk %zu: n_past=%d, chunk_pos=%zu\n", i, n_past, chunk_pos[i]);
@@ -288,7 +317,7 @@ llama_pos processImage(
     }
   }
-  if (n_past == total_token_count) {
+  if (n_past == total_token_count && n_past > 0 && all_tokens[n_past - 1] != LLAMA_TOKEN_NULL) {
     // we have to evaluate at least 1 token to generate logits.
     n_past--;
   }
@@ -302,6 +331,8 @@ llama_pos processImage(
   // Set the tokens
   sess->set_tokens(std::move(all_tokens));
+  sess->set_mtmd_bitmap_past_hashes(bitmap_hashes);
   // Clean up image resources
   mtmd_input_chunks_free(chunks);
   bitmaps.entries.clear();
@@ -520,16 +551,16 @@ void LlamaCompletionWorker::OnOK() {
              Napi::String::New(env, _result.text.c_str()));
   Napi::Array tool_calls = Napi::Array::New(Napi::AsyncWorker::Env());
-  std::string * reasoning_content = nullptr;
-  std::string * content = nullptr;
+  std::string reasoning_content = "";
+  std::string content;
   if (!_stop) {
     try {
       common_chat_msg message = common_chat_parse(_result.text, static_cast<common_chat_format>(_chat_format));
       if (!message.reasoning_content.empty()) {
-        reasoning_content = &message.reasoning_content;
+        reasoning_content = message.reasoning_content;
       }
       if (!message.content.empty()) {
-        content = &message.content;
+        content = message.content;
       }
       for (size_t i = 0; i < message.tool_calls.size(); i++) {
         const auto &tc = message.tool_calls[i];
@@ -551,11 +582,11 @@ void LlamaCompletionWorker::OnOK() {
   if (tool_calls.Length() > 0) {
     result.Set("tool_calls", tool_calls);
   }
-  if (reasoning_content) {
-    result.Set("reasoning_content", Napi::String::New(env, reasoning_content->c_str()));
+  if (!reasoning_content.empty()) {
+    result.Set("reasoning_content", Napi::String::New(env, reasoning_content.c_str()));
   }
-  if (content) {
-    result.Set("content", Napi::String::New(env, content->c_str()));
+  if (!content.empty()) {
+    result.Set("content", Napi::String::New(env, content.c_str()));
   }
   auto ctx = _sess->context();

package/src/LoadSessionWorker.cpp CHANGED Viewed

@@ -11,6 +11,13 @@ void LoadSessionWorker::Execute() {
   // reserve the maximum number of tokens for capacity
   std::vector<llama_token> tokens;
   tokens.reserve(_sess->params().n_ctx);
+  // Find LLAMA_TOKEN_NULL in the tokens and resize the array to the index of the null token
+  auto null_token_iter = std::find(tokens.begin(), tokens.end(), LLAMA_TOKEN_NULL);
+  if (null_token_iter != tokens.end()) {
+    tokens.resize(std::distance(tokens.begin(), null_token_iter));
+  }
   if (!llama_state_load_file(_sess->context(), _path.c_str(), tokens.data(),
                              tokens.capacity(), &count)) {
     SetError("Failed to load session");

package/src/SaveSessionWorker.cpp CHANGED Viewed

@@ -9,6 +9,13 @@ SaveSessionWorker::SaveSessionWorker(const Napi::CallbackInfo &info,
 void SaveSessionWorker::Execute() {
   _sess->get_mutex().lock();
   auto tokens = _sess->tokens_ptr();
+  // Find LLAMA_TOKEN_NULL in the tokens and resize the array to the index of the null token
+  auto null_token_iter = std::find(tokens->begin(), tokens->end(), LLAMA_TOKEN_NULL);
+  if (null_token_iter != tokens->end()) {
+    tokens->resize(std::distance(tokens->begin(), null_token_iter));
+  }
   if (!llama_state_save_file(_sess->context(), _path.c_str(), tokens->data(),
                              tokens->size())) {
     SetError("Failed to save session");

package/src/common.hpp CHANGED Viewed

@@ -80,6 +80,14 @@ public:
     tokens_ = std::move(tokens);
   }
+  inline std::vector<std::string> *mtmd_bitmap_past_hashes_ptr() {
+    return &mtmd_bitmap_past_hashes_;
+  }
+  inline void set_mtmd_bitmap_past_hashes(std::vector<std::string> hashes) {
+    mtmd_bitmap_past_hashes_ = std::move(hashes);
+  }
   inline const common_params &params() const { return params_; }
   inline std::mutex &get_mutex() { return mutex; }
@@ -106,6 +114,7 @@ private:
   common_init_result llama_init_;
   const common_params params_;
   std::vector<llama_token> tokens_{};
+  std::vector<std::string> mtmd_bitmap_past_hashes_{};
   std::mutex mutex;
   mtmd_context* _mtmd_ctx = nullptr;
 };