@fugood/llama.node 0.4.5 → 0.4.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/package.json +1 -1
- package/src/LlamaCompletionWorker.cpp +24 -0
- package/src/SaveSessionWorker.cpp +6 -5
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/bin/win32/arm64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
package/bin/win32/x64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -266,6 +266,30 @@ llama_pos processImage(
|
|
|
266
266
|
|
|
267
267
|
llama_pos new_n_past = n_past;
|
|
268
268
|
|
|
269
|
+
// Adjust n_past to position of the text chunk
|
|
270
|
+
// TODO: Edit the text chunk to remove the tokens before n_past to speed up
|
|
271
|
+
// need to update the mtmd api
|
|
272
|
+
auto adjusted_n_past = -1;
|
|
273
|
+
for (size_t i = 0; i < chunk_pos.size(); i++) {
|
|
274
|
+
if (n_past < chunk_pos[i]) {
|
|
275
|
+
break;
|
|
276
|
+
}
|
|
277
|
+
bool is_end = i + 1 == chunk_pos.size();
|
|
278
|
+
if (
|
|
279
|
+
chunk_pos[i] < n_past &&
|
|
280
|
+
(!is_end && chunk_pos[i + 1] > n_past)
|
|
281
|
+
// is_end & n_past < total_token_count:
|
|
282
|
+
// don't need to adjust and it will skip eval_chunk_single, let nextToken() to finish the job
|
|
283
|
+
) {
|
|
284
|
+
adjusted_n_past = chunk_pos[i];
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
if (adjusted_n_past != -1) {
|
|
288
|
+
n_past = adjusted_n_past;
|
|
289
|
+
new_n_past = n_past;
|
|
290
|
+
fprintf(stdout, "[DEBUG] Adjusted n_past to %d\n", n_past);
|
|
291
|
+
}
|
|
292
|
+
|
|
269
293
|
// Compare bitmap hashes, if they are not the same, backtrack n_past to the position of the first mismatch
|
|
270
294
|
auto mtmd_bitmap_past_hashes = sess->mtmd_bitmap_past_hashes_ptr();
|
|
271
295
|
if (mtmd_bitmap_past_hashes->size() > 0) {
|
|
@@ -9,15 +9,16 @@ SaveSessionWorker::SaveSessionWorker(const Napi::CallbackInfo &info,
|
|
|
9
9
|
void SaveSessionWorker::Execute() {
|
|
10
10
|
_sess->get_mutex().lock();
|
|
11
11
|
auto tokens = _sess->tokens_ptr();
|
|
12
|
+
auto tokens_to_save = std::vector<llama_token>(tokens->begin(), tokens->end());
|
|
12
13
|
|
|
13
14
|
// Find LLAMA_TOKEN_NULL in the tokens and resize the array to the index of the null token
|
|
14
|
-
auto null_token_iter = std::find(
|
|
15
|
-
if (null_token_iter !=
|
|
16
|
-
|
|
15
|
+
auto null_token_iter = std::find(tokens_to_save.begin(), tokens_to_save.end(), LLAMA_TOKEN_NULL);
|
|
16
|
+
if (null_token_iter != tokens_to_save.end()) {
|
|
17
|
+
tokens_to_save.resize(std::distance(tokens_to_save.begin(), null_token_iter));
|
|
17
18
|
}
|
|
18
19
|
|
|
19
|
-
if (!llama_state_save_file(_sess->context(), _path.c_str(),
|
|
20
|
-
|
|
20
|
+
if (!llama_state_save_file(_sess->context(), _path.c_str(), tokens_to_save.data(),
|
|
21
|
+
tokens_to_save.size())) {
|
|
21
22
|
SetError("Failed to save session");
|
|
22
23
|
}
|
|
23
24
|
_sess->get_mutex().unlock();
|