npm - @fugood/llama.node - Versions diffs - 0.4.5 → 0.4.6 - Mend

@fugood/llama.node 0.4.5 → 0.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/bin/darwin/arm64/llama-node.node +0 -0
package/bin/darwin/x64/llama-node.node +0 -0
package/bin/linux/arm64/llama-node.node +0 -0
package/bin/linux/x64/llama-node.node +0 -0
package/bin/linux-cuda/arm64/llama-node.node +0 -0
package/bin/linux-cuda/x64/llama-node.node +0 -0
package/bin/linux-vulkan/arm64/llama-node.node +0 -0
package/bin/linux-vulkan/x64/llama-node.node +0 -0
package/bin/win32/arm64/llama-node.node +0 -0
package/bin/win32/arm64/node.lib +0 -0
package/bin/win32/x64/llama-node.node +0 -0
package/bin/win32/x64/node.lib +0 -0
package/bin/win32-vulkan/arm64/llama-node.node +0 -0
package/bin/win32-vulkan/arm64/node.lib +0 -0
package/bin/win32-vulkan/x64/llama-node.node +0 -0
package/bin/win32-vulkan/x64/node.lib +0 -0
package/package.json +1 -1
package/src/LlamaCompletionWorker.cpp +24 -0
package/src/SaveSessionWorker.cpp +6 -5

package/bin/darwin/arm64/llama-node.node CHANGED Viewed

Binary file

package/bin/darwin/x64/llama-node.node CHANGED Viewed

Binary file

package/bin/linux/arm64/llama-node.node CHANGED Viewed

Binary file

package/bin/linux/x64/llama-node.node CHANGED Viewed

Binary file

package/bin/linux-cuda/arm64/llama-node.node CHANGED Viewed

Binary file

package/bin/linux-cuda/x64/llama-node.node CHANGED Viewed

Binary file

package/bin/linux-vulkan/arm64/llama-node.node CHANGED Viewed

Binary file

package/bin/linux-vulkan/x64/llama-node.node CHANGED Viewed

Binary file

package/bin/win32/arm64/llama-node.node CHANGED Viewed

Binary file

package/bin/win32/arm64/node.lib CHANGED Viewed

Binary file

package/bin/win32/x64/llama-node.node CHANGED Viewed

Binary file

package/bin/win32/x64/node.lib CHANGED Viewed

Binary file

package/bin/win32-vulkan/arm64/llama-node.node CHANGED Viewed

Binary file

package/bin/win32-vulkan/arm64/node.lib CHANGED Viewed

Binary file

package/bin/win32-vulkan/x64/llama-node.node CHANGED Viewed

Binary file

package/bin/win32-vulkan/x64/node.lib CHANGED Viewed

Binary file

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@fugood/llama.node",
   "access": "public",
-  "version": "0.4.5",
+  "version": "0.4.6",
   "description": "An another Node binding of llama.cpp",
   "main": "lib/index.js",
   "scripts": {

package/src/LlamaCompletionWorker.cpp CHANGED Viewed

@@ -266,6 +266,30 @@ llama_pos processImage(
   llama_pos new_n_past = n_past;
+  // Adjust n_past to position of the text chunk
+  // TODO: Edit the text chunk to remove the tokens before n_past to speed up
+  // need to update the mtmd api
+  auto adjusted_n_past = -1;
+  for (size_t i = 0; i < chunk_pos.size(); i++) {
+    if (n_past < chunk_pos[i]) {
+      break;
+    }
+    bool is_end = i + 1 == chunk_pos.size();
+    if (
+      chunk_pos[i] < n_past &&
+      (!is_end && chunk_pos[i + 1] > n_past)
+      // is_end & n_past < total_token_count:
+      // don't need to adjust and it will skip eval_chunk_single, let nextToken() to finish the job
+    ) {
+      adjusted_n_past = chunk_pos[i];
+    }
+  }
+  if (adjusted_n_past != -1) {
+    n_past = adjusted_n_past;
+    new_n_past = n_past;
+    fprintf(stdout, "[DEBUG] Adjusted n_past to %d\n", n_past);
+  }
   // Compare bitmap hashes, if they are not the same, backtrack n_past to the position of the first mismatch
   auto mtmd_bitmap_past_hashes = sess->mtmd_bitmap_past_hashes_ptr();
   if (mtmd_bitmap_past_hashes->size() > 0) {

package/src/SaveSessionWorker.cpp CHANGED Viewed

@@ -9,15 +9,16 @@ SaveSessionWorker::SaveSessionWorker(const Napi::CallbackInfo &info,
 void SaveSessionWorker::Execute() {
   _sess->get_mutex().lock();
   auto tokens = _sess->tokens_ptr();
+  auto tokens_to_save = std::vector<llama_token>(tokens->begin(), tokens->end());
   // Find LLAMA_TOKEN_NULL in the tokens and resize the array to the index of the null token
-  auto null_token_iter = std::find(tokens->begin(), tokens->end(), LLAMA_TOKEN_NULL);
-  if (null_token_iter != tokens->end()) {
-    tokens->resize(std::distance(tokens->begin(), null_token_iter));
+  auto null_token_iter = std::find(tokens_to_save.begin(), tokens_to_save.end(), LLAMA_TOKEN_NULL);
+  if (null_token_iter != tokens_to_save.end()) {
+    tokens_to_save.resize(std::distance(tokens_to_save.begin(), null_token_iter));
   }
-  if (!llama_state_save_file(_sess->context(), _path.c_str(), tokens->data(),
-                             tokens->size())) {
+  if (!llama_state_save_file(_sess->context(), _path.c_str(), tokens_to_save.data(),
+                             tokens_to_save.size())) {
     SetError("Failed to save session");
   }
   _sess->get_mutex().unlock();