@fugood/llama.node 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/binding.js +17 -7
- package/lib/binding.ts +1 -0
- package/package.json +14 -14
- package/src/LlamaCompletionWorker.cpp +22 -4
- package/src/LlamaContext.cpp +1 -1
package/lib/binding.js
CHANGED
|
@@ -15,13 +15,23 @@ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (
|
|
|
15
15
|
}) : function(o, v) {
|
|
16
16
|
o["default"] = v;
|
|
17
17
|
});
|
|
18
|
-
var __importStar = (this && this.__importStar) || function (
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
};
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
25
35
|
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
26
36
|
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
27
37
|
return new (P || (P = Promise))(function (resolve, reject) {
|
package/lib/binding.ts
CHANGED
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "1.1.
|
|
4
|
+
"version": "1.1.1",
|
|
5
5
|
"description": "An another Node binding of llama.cpp",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -71,19 +71,19 @@
|
|
|
71
71
|
"CMakeLists.txt"
|
|
72
72
|
],
|
|
73
73
|
"optionalDependencies": {
|
|
74
|
-
"@fugood/node-llama-linux-x64": "1.1.
|
|
75
|
-
"@fugood/node-llama-linux-x64-vulkan": "1.1.
|
|
76
|
-
"@fugood/node-llama-linux-x64-cuda": "1.1.
|
|
77
|
-
"@fugood/node-llama-linux-arm64": "1.1.
|
|
78
|
-
"@fugood/node-llama-linux-arm64-vulkan": "1.1.
|
|
79
|
-
"@fugood/node-llama-linux-arm64-cuda": "1.1.
|
|
80
|
-
"@fugood/node-llama-win32-x64": "1.1.
|
|
81
|
-
"@fugood/node-llama-win32-x64-vulkan": "1.1.
|
|
82
|
-
"@fugood/node-llama-win32-x64-cuda": "1.1.
|
|
83
|
-
"@fugood/node-llama-win32-arm64": "1.1.
|
|
84
|
-
"@fugood/node-llama-win32-arm64-vulkan": "1.1.
|
|
85
|
-
"@fugood/node-llama-darwin-x64": "1.1.
|
|
86
|
-
"@fugood/node-llama-darwin-arm64": "1.1.
|
|
74
|
+
"@fugood/node-llama-linux-x64": "1.1.1",
|
|
75
|
+
"@fugood/node-llama-linux-x64-vulkan": "1.1.1",
|
|
76
|
+
"@fugood/node-llama-linux-x64-cuda": "1.1.1",
|
|
77
|
+
"@fugood/node-llama-linux-arm64": "1.1.1",
|
|
78
|
+
"@fugood/node-llama-linux-arm64-vulkan": "1.1.1",
|
|
79
|
+
"@fugood/node-llama-linux-arm64-cuda": "1.1.1",
|
|
80
|
+
"@fugood/node-llama-win32-x64": "1.1.1",
|
|
81
|
+
"@fugood/node-llama-win32-x64-vulkan": "1.1.1",
|
|
82
|
+
"@fugood/node-llama-win32-x64-cuda": "1.1.1",
|
|
83
|
+
"@fugood/node-llama-win32-arm64": "1.1.1",
|
|
84
|
+
"@fugood/node-llama-win32-arm64-vulkan": "1.1.1",
|
|
85
|
+
"@fugood/node-llama-darwin-x64": "1.1.1",
|
|
86
|
+
"@fugood/node-llama-darwin-arm64": "1.1.1"
|
|
87
87
|
},
|
|
88
88
|
"devDependencies": {
|
|
89
89
|
"@babel/preset-env": "^7.24.4",
|
|
@@ -157,10 +157,26 @@ void LlamaCompletionWorker::Execute() {
|
|
|
157
157
|
// For multimodal input, n_past might already be set
|
|
158
158
|
// Only decode text tokens if we have any input left
|
|
159
159
|
if (n_input > 0) {
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
160
|
+
// Decode tokens in batches using n_batch as chunk size
|
|
161
|
+
int n_past_batch = n_cur;
|
|
162
|
+
int n_remaining = n_input;
|
|
163
|
+
|
|
164
|
+
while (n_remaining > 0) {
|
|
165
|
+
int n_eval = n_remaining;
|
|
166
|
+
if (n_eval > _params.n_batch) {
|
|
167
|
+
n_eval = _params.n_batch;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
int ret = llama_decode(ctx, llama_batch_get_one(embd->data() + n_past_batch, n_eval));
|
|
171
|
+
if (ret < 0) {
|
|
172
|
+
SetError("Failed to decode token batch, code: " + std::to_string(ret) +
|
|
173
|
+
", n_eval: " + std::to_string(n_eval) +
|
|
174
|
+
", n_past_batch: " + std::to_string(n_past_batch));
|
|
175
|
+
break;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
n_past_batch += n_eval;
|
|
179
|
+
n_remaining -= n_eval;
|
|
164
180
|
}
|
|
165
181
|
}
|
|
166
182
|
|
|
@@ -255,6 +271,8 @@ void LlamaCompletionWorker::OnOK() {
|
|
|
255
271
|
try {
|
|
256
272
|
common_chat_syntax chat_syntax;
|
|
257
273
|
chat_syntax.format = static_cast<common_chat_format>(_chat_format);
|
|
274
|
+
result.Set("chat_format", Napi::Number::New(env, _chat_format));
|
|
275
|
+
|
|
258
276
|
chat_syntax.thinking_forced_open = _thinking_forced_open;
|
|
259
277
|
|
|
260
278
|
if (_reasoning_format == "deepseek") {
|
package/src/LlamaContext.cpp
CHANGED
|
@@ -247,7 +247,7 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
247
247
|
params.cache_type_v = kv_cache_type_from_str(
|
|
248
248
|
get_option<std::string>(options, "cache_type_v", "f16").c_str());
|
|
249
249
|
params.ctx_shift = get_option<bool>(options, "ctx_shift", true);
|
|
250
|
-
params.kv_unified = get_option<bool>(options, "kv_unified",
|
|
250
|
+
params.kv_unified = get_option<bool>(options, "kv_unified", false);
|
|
251
251
|
|
|
252
252
|
params.use_mlock = get_option<bool>(options, "use_mlock", false);
|
|
253
253
|
params.use_mmap = get_option<bool>(options, "use_mmap", true);
|