@fugood/llama.node 0.4.5 → 0.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "0.4.5",
4
+ "version": "0.4.6",
5
5
  "description": "An another Node binding of llama.cpp",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -266,6 +266,30 @@ llama_pos processImage(
266
266
 
267
267
  llama_pos new_n_past = n_past;
268
268
 
269
+ // Adjust n_past to position of the text chunk
270
+ // TODO: Edit the text chunk to remove the tokens before n_past to speed up
271
+ // need to update the mtmd api
272
+ auto adjusted_n_past = -1;
273
+ for (size_t i = 0; i < chunk_pos.size(); i++) {
274
+ if (n_past < chunk_pos[i]) {
275
+ break;
276
+ }
277
+ bool is_end = i + 1 == chunk_pos.size();
278
+ if (
279
+ chunk_pos[i] < n_past &&
280
+ (!is_end && chunk_pos[i + 1] > n_past)
281
+ // is_end & n_past < total_token_count:
282
+ // don't need to adjust and it will skip eval_chunk_single, let nextToken() to finish the job
283
+ ) {
284
+ adjusted_n_past = chunk_pos[i];
285
+ }
286
+ }
287
+ if (adjusted_n_past != -1) {
288
+ n_past = adjusted_n_past;
289
+ new_n_past = n_past;
290
+ fprintf(stdout, "[DEBUG] Adjusted n_past to %d\n", n_past);
291
+ }
292
+
269
293
  // Compare bitmap hashes, if they are not the same, backtrack n_past to the position of the first mismatch
270
294
  auto mtmd_bitmap_past_hashes = sess->mtmd_bitmap_past_hashes_ptr();
271
295
  if (mtmd_bitmap_past_hashes->size() > 0) {
@@ -9,15 +9,16 @@ SaveSessionWorker::SaveSessionWorker(const Napi::CallbackInfo &info,
9
9
  void SaveSessionWorker::Execute() {
10
10
  _sess->get_mutex().lock();
11
11
  auto tokens = _sess->tokens_ptr();
12
+ auto tokens_to_save = std::vector<llama_token>(tokens->begin(), tokens->end());
12
13
 
13
14
  // Find LLAMA_TOKEN_NULL in the tokens and resize the array to the index of the null token
14
- auto null_token_iter = std::find(tokens->begin(), tokens->end(), LLAMA_TOKEN_NULL);
15
- if (null_token_iter != tokens->end()) {
16
- tokens->resize(std::distance(tokens->begin(), null_token_iter));
15
+ auto null_token_iter = std::find(tokens_to_save.begin(), tokens_to_save.end(), LLAMA_TOKEN_NULL);
16
+ if (null_token_iter != tokens_to_save.end()) {
17
+ tokens_to_save.resize(std::distance(tokens_to_save.begin(), null_token_iter));
17
18
  }
18
19
 
19
- if (!llama_state_save_file(_sess->context(), _path.c_str(), tokens->data(),
20
- tokens->size())) {
20
+ if (!llama_state_save_file(_sess->context(), _path.c_str(), tokens_to_save.data(),
21
+ tokens_to_save.size())) {
21
22
  SetError("Failed to save session");
22
23
  }
23
24
  _sess->get_mutex().unlock();