@fugood/llama.node 1.3.0-rc.4 → 1.3.0-rc.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/binding.ts +1 -1
- package/lib/index.js +2 -2
- package/lib/index.ts +2 -1
- package/package.json +14 -14
- package/src/LlamaContext.cpp +59 -1
- package/src/LlamaContext.h +3 -0
package/lib/binding.ts
CHANGED
|
@@ -375,7 +375,7 @@ export type ToolCall = {
|
|
|
375
375
|
}
|
|
376
376
|
|
|
377
377
|
export interface LlamaContext {
|
|
378
|
-
new (options: LlamaModelOptions): LlamaContext
|
|
378
|
+
new (options: LlamaModelOptions, onProgress?: (progress: number) => void): LlamaContext
|
|
379
379
|
getSystemInfo(): string
|
|
380
380
|
getModelInfo(): ModelInfo
|
|
381
381
|
getFormattedChat(
|
package/lib/index.js
CHANGED
|
@@ -193,12 +193,12 @@ class LlamaContextWrapper {
|
|
|
193
193
|
return this.ctx.decodeAudioTokens(tokens);
|
|
194
194
|
}
|
|
195
195
|
}
|
|
196
|
-
const loadModel = (options) => __awaiter(void 0, void 0, void 0, function* () {
|
|
196
|
+
const loadModel = (options, onProgress) => __awaiter(void 0, void 0, void 0, function* () {
|
|
197
197
|
var _a, _b;
|
|
198
198
|
const variant = (_a = options.lib_variant) !== null && _a !== void 0 ? _a : 'default';
|
|
199
199
|
(_b = mods[variant]) !== null && _b !== void 0 ? _b : (mods[variant] = yield (0, binding_1.loadModule)(options.lib_variant));
|
|
200
200
|
refreshNativeLogSetup();
|
|
201
|
-
const nativeCtx = new mods[variant].LlamaContext(options);
|
|
201
|
+
const nativeCtx = new mods[variant].LlamaContext(options, onProgress);
|
|
202
202
|
return new LlamaContextWrapper(nativeCtx);
|
|
203
203
|
});
|
|
204
204
|
exports.loadModel = loadModel;
|
package/lib/index.ts
CHANGED
|
@@ -299,12 +299,13 @@ class LlamaContextWrapper {
|
|
|
299
299
|
|
|
300
300
|
export const loadModel = async (
|
|
301
301
|
options: LlamaModelOptionsExtended,
|
|
302
|
+
onProgress?: (progress: number) => void,
|
|
302
303
|
): Promise<LlamaContextWrapper> => {
|
|
303
304
|
const variant = options.lib_variant ?? 'default'
|
|
304
305
|
mods[variant] ??= await loadModule(options.lib_variant)
|
|
305
306
|
refreshNativeLogSetup()
|
|
306
307
|
|
|
307
|
-
const nativeCtx = new mods[variant].LlamaContext(options)
|
|
308
|
+
const nativeCtx = new mods[variant].LlamaContext(options, onProgress)
|
|
308
309
|
return new LlamaContextWrapper(nativeCtx)
|
|
309
310
|
}
|
|
310
311
|
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "1.3.0-rc.
|
|
4
|
+
"version": "1.3.0-rc.5",
|
|
5
5
|
"description": "An another Node binding of llama.cpp",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -72,19 +72,19 @@
|
|
|
72
72
|
"CMakeLists.txt"
|
|
73
73
|
],
|
|
74
74
|
"optionalDependencies": {
|
|
75
|
-
"@fugood/node-llama-linux-x64": "1.3.0-rc.
|
|
76
|
-
"@fugood/node-llama-linux-x64-vulkan": "1.3.0-rc.
|
|
77
|
-
"@fugood/node-llama-linux-x64-cuda": "1.3.0-rc.
|
|
78
|
-
"@fugood/node-llama-linux-arm64": "1.3.0-rc.
|
|
79
|
-
"@fugood/node-llama-linux-arm64-vulkan": "1.3.0-rc.
|
|
80
|
-
"@fugood/node-llama-linux-arm64-cuda": "1.3.0-rc.
|
|
81
|
-
"@fugood/node-llama-win32-x64": "1.3.0-rc.
|
|
82
|
-
"@fugood/node-llama-win32-x64-vulkan": "1.3.0-rc.
|
|
83
|
-
"@fugood/node-llama-win32-x64-cuda": "1.3.0-rc.
|
|
84
|
-
"@fugood/node-llama-win32-arm64": "1.3.0-rc.
|
|
85
|
-
"@fugood/node-llama-win32-arm64-vulkan": "1.3.0-rc.
|
|
86
|
-
"@fugood/node-llama-darwin-x64": "1.3.0-rc.
|
|
87
|
-
"@fugood/node-llama-darwin-arm64": "1.3.0-rc.
|
|
75
|
+
"@fugood/node-llama-linux-x64": "1.3.0-rc.5",
|
|
76
|
+
"@fugood/node-llama-linux-x64-vulkan": "1.3.0-rc.5",
|
|
77
|
+
"@fugood/node-llama-linux-x64-cuda": "1.3.0-rc.5",
|
|
78
|
+
"@fugood/node-llama-linux-arm64": "1.3.0-rc.5",
|
|
79
|
+
"@fugood/node-llama-linux-arm64-vulkan": "1.3.0-rc.5",
|
|
80
|
+
"@fugood/node-llama-linux-arm64-cuda": "1.3.0-rc.5",
|
|
81
|
+
"@fugood/node-llama-win32-x64": "1.3.0-rc.5",
|
|
82
|
+
"@fugood/node-llama-win32-x64-vulkan": "1.3.0-rc.5",
|
|
83
|
+
"@fugood/node-llama-win32-x64-cuda": "1.3.0-rc.5",
|
|
84
|
+
"@fugood/node-llama-win32-arm64": "1.3.0-rc.5",
|
|
85
|
+
"@fugood/node-llama-win32-arm64-vulkan": "1.3.0-rc.5",
|
|
86
|
+
"@fugood/node-llama-darwin-x64": "1.3.0-rc.5",
|
|
87
|
+
"@fugood/node-llama-darwin-arm64": "1.3.0-rc.5"
|
|
88
88
|
},
|
|
89
89
|
"devDependencies": {
|
|
90
90
|
"@babel/preset-env": "^7.24.4",
|
package/src/LlamaContext.cpp
CHANGED
|
@@ -221,7 +221,7 @@ static int32_t pooling_type_from_str(const std::string &s) {
|
|
|
221
221
|
}
|
|
222
222
|
|
|
223
223
|
// construct({ model, embedding, n_ctx, n_batch, n_threads, n_gpu_layers,
|
|
224
|
-
// use_mlock, use_mmap }): LlamaContext throws error
|
|
224
|
+
// use_mlock, use_mmap }, onProgress?: (progress: number) => void): LlamaContext throws error
|
|
225
225
|
LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
226
226
|
: Napi::ObjectWrap<LlamaContext>(info) {
|
|
227
227
|
Napi::Env env = info.Env();
|
|
@@ -230,6 +230,16 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
230
230
|
}
|
|
231
231
|
auto options = info[0].As<Napi::Object>();
|
|
232
232
|
|
|
233
|
+
// Check if progress callback is provided
|
|
234
|
+
bool has_progress_callback = info.Length() >= 2 && info[1].IsFunction();
|
|
235
|
+
if (has_progress_callback) {
|
|
236
|
+
_progress_tsfn = Napi::ThreadSafeFunction::New(
|
|
237
|
+
env, info[1].As<Napi::Function>(), "Model Loading Progress", 0, 1,
|
|
238
|
+
[](Napi::Env) {
|
|
239
|
+
// Finalizer callback
|
|
240
|
+
});
|
|
241
|
+
}
|
|
242
|
+
|
|
233
243
|
common_params params;
|
|
234
244
|
params.model.path = get_option<std::string>(options, "model", "");
|
|
235
245
|
if (params.model.path.empty()) {
|
|
@@ -323,12 +333,55 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
323
333
|
|
|
324
334
|
// Use rn-llama context instead of direct session
|
|
325
335
|
_rn_ctx = new llama_rn_context();
|
|
336
|
+
_rn_ctx->is_load_interrupted = false;
|
|
337
|
+
_rn_ctx->loading_progress = 0;
|
|
338
|
+
|
|
339
|
+
// Set up progress callback if provided
|
|
340
|
+
if (has_progress_callback) {
|
|
341
|
+
params.load_progress_callback = [](float progress, void *user_data) {
|
|
342
|
+
LlamaContext *self = static_cast<LlamaContext *>(user_data);
|
|
343
|
+
unsigned int percentage = static_cast<unsigned int>(100 * progress);
|
|
344
|
+
|
|
345
|
+
// Only call callback if progress increased
|
|
346
|
+
if (percentage > self->_rn_ctx->loading_progress) {
|
|
347
|
+
self->_rn_ctx->loading_progress = percentage;
|
|
348
|
+
|
|
349
|
+
// Create a heap-allocated copy of the percentage
|
|
350
|
+
auto *data = new unsigned int(percentage);
|
|
351
|
+
|
|
352
|
+
// Queue callback to be executed on the JavaScript thread
|
|
353
|
+
auto status = self->_progress_tsfn.NonBlockingCall(
|
|
354
|
+
data, [](Napi::Env env, Napi::Function jsCallback, unsigned int *data) {
|
|
355
|
+
jsCallback.Call({Napi::Number::New(env, *data)});
|
|
356
|
+
delete data;
|
|
357
|
+
});
|
|
358
|
+
|
|
359
|
+
// If the call failed, clean up the data
|
|
360
|
+
if (status != napi_ok) {
|
|
361
|
+
delete data;
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
// Return true to continue loading, false to interrupt
|
|
366
|
+
return !self->_rn_ctx->is_load_interrupted;
|
|
367
|
+
};
|
|
368
|
+
params.load_progress_callback_user_data = this;
|
|
369
|
+
}
|
|
370
|
+
|
|
326
371
|
if (!_rn_ctx->loadModel(params)) {
|
|
372
|
+
if (has_progress_callback) {
|
|
373
|
+
_progress_tsfn.Release();
|
|
374
|
+
}
|
|
327
375
|
delete _rn_ctx;
|
|
328
376
|
_rn_ctx = nullptr;
|
|
329
377
|
Napi::TypeError::New(env, "Failed to load model").ThrowAsJavaScriptException();
|
|
330
378
|
}
|
|
331
379
|
|
|
380
|
+
// Release progress callback after model is loaded
|
|
381
|
+
if (has_progress_callback) {
|
|
382
|
+
_progress_tsfn.Release();
|
|
383
|
+
}
|
|
384
|
+
|
|
332
385
|
// Handle LoRA adapters through rn-llama
|
|
333
386
|
if (!lora.empty()) {
|
|
334
387
|
_rn_ctx->applyLoraAdapters(lora);
|
|
@@ -343,6 +396,11 @@ LlamaContext::~LlamaContext() {
|
|
|
343
396
|
_context_valid->store(false);
|
|
344
397
|
}
|
|
345
398
|
|
|
399
|
+
// Interrupt model loading if in progress
|
|
400
|
+
if (_rn_ctx) {
|
|
401
|
+
_rn_ctx->is_load_interrupted = true;
|
|
402
|
+
}
|
|
403
|
+
|
|
346
404
|
// The DisposeWorker is responsible for cleanup of _rn_ctx
|
|
347
405
|
// If _rn_ctx is still not null here, it means disposal was not properly initiated
|
|
348
406
|
if (_rn_ctx) {
|
package/src/LlamaContext.h
CHANGED
|
@@ -78,4 +78,7 @@ private:
|
|
|
78
78
|
// Validity flag for async callbacks to prevent use-after-free
|
|
79
79
|
// Shared pointer ensures callbacks can safely check if context is still alive
|
|
80
80
|
std::shared_ptr<std::atomic<bool>> _context_valid;
|
|
81
|
+
|
|
82
|
+
// Progress callback support for model loading
|
|
83
|
+
Napi::ThreadSafeFunction _progress_tsfn;
|
|
81
84
|
};
|