@fugood/llama.node 1.3.0-rc.4 → 1.3.0-rc.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/binding.ts CHANGED
@@ -375,7 +375,7 @@ export type ToolCall = {
375
375
  }
376
376
 
377
377
  export interface LlamaContext {
378
- new (options: LlamaModelOptions): LlamaContext
378
+ new (options: LlamaModelOptions, onProgress?: (progress: number) => void): LlamaContext
379
379
  getSystemInfo(): string
380
380
  getModelInfo(): ModelInfo
381
381
  getFormattedChat(
package/lib/index.js CHANGED
@@ -193,12 +193,12 @@ class LlamaContextWrapper {
193
193
  return this.ctx.decodeAudioTokens(tokens);
194
194
  }
195
195
  }
196
- const loadModel = (options) => __awaiter(void 0, void 0, void 0, function* () {
196
+ const loadModel = (options, onProgress) => __awaiter(void 0, void 0, void 0, function* () {
197
197
  var _a, _b;
198
198
  const variant = (_a = options.lib_variant) !== null && _a !== void 0 ? _a : 'default';
199
199
  (_b = mods[variant]) !== null && _b !== void 0 ? _b : (mods[variant] = yield (0, binding_1.loadModule)(options.lib_variant));
200
200
  refreshNativeLogSetup();
201
- const nativeCtx = new mods[variant].LlamaContext(options);
201
+ const nativeCtx = new mods[variant].LlamaContext(options, onProgress);
202
202
  return new LlamaContextWrapper(nativeCtx);
203
203
  });
204
204
  exports.loadModel = loadModel;
package/lib/index.ts CHANGED
@@ -299,12 +299,13 @@ class LlamaContextWrapper {
299
299
 
300
300
  export const loadModel = async (
301
301
  options: LlamaModelOptionsExtended,
302
+ onProgress?: (progress: number) => void,
302
303
  ): Promise<LlamaContextWrapper> => {
303
304
  const variant = options.lib_variant ?? 'default'
304
305
  mods[variant] ??= await loadModule(options.lib_variant)
305
306
  refreshNativeLogSetup()
306
307
 
307
- const nativeCtx = new mods[variant].LlamaContext(options)
308
+ const nativeCtx = new mods[variant].LlamaContext(options, onProgress)
308
309
  return new LlamaContextWrapper(nativeCtx)
309
310
  }
310
311
 
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "1.3.0-rc.4",
4
+ "version": "1.3.0-rc.5",
5
5
  "description": "An another Node binding of llama.cpp",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -72,19 +72,19 @@
72
72
  "CMakeLists.txt"
73
73
  ],
74
74
  "optionalDependencies": {
75
- "@fugood/node-llama-linux-x64": "1.3.0-rc.4",
76
- "@fugood/node-llama-linux-x64-vulkan": "1.3.0-rc.4",
77
- "@fugood/node-llama-linux-x64-cuda": "1.3.0-rc.4",
78
- "@fugood/node-llama-linux-arm64": "1.3.0-rc.4",
79
- "@fugood/node-llama-linux-arm64-vulkan": "1.3.0-rc.4",
80
- "@fugood/node-llama-linux-arm64-cuda": "1.3.0-rc.4",
81
- "@fugood/node-llama-win32-x64": "1.3.0-rc.4",
82
- "@fugood/node-llama-win32-x64-vulkan": "1.3.0-rc.4",
83
- "@fugood/node-llama-win32-x64-cuda": "1.3.0-rc.4",
84
- "@fugood/node-llama-win32-arm64": "1.3.0-rc.4",
85
- "@fugood/node-llama-win32-arm64-vulkan": "1.3.0-rc.4",
86
- "@fugood/node-llama-darwin-x64": "1.3.0-rc.4",
87
- "@fugood/node-llama-darwin-arm64": "1.3.0-rc.4"
75
+ "@fugood/node-llama-linux-x64": "1.3.0-rc.5",
76
+ "@fugood/node-llama-linux-x64-vulkan": "1.3.0-rc.5",
77
+ "@fugood/node-llama-linux-x64-cuda": "1.3.0-rc.5",
78
+ "@fugood/node-llama-linux-arm64": "1.3.0-rc.5",
79
+ "@fugood/node-llama-linux-arm64-vulkan": "1.3.0-rc.5",
80
+ "@fugood/node-llama-linux-arm64-cuda": "1.3.0-rc.5",
81
+ "@fugood/node-llama-win32-x64": "1.3.0-rc.5",
82
+ "@fugood/node-llama-win32-x64-vulkan": "1.3.0-rc.5",
83
+ "@fugood/node-llama-win32-x64-cuda": "1.3.0-rc.5",
84
+ "@fugood/node-llama-win32-arm64": "1.3.0-rc.5",
85
+ "@fugood/node-llama-win32-arm64-vulkan": "1.3.0-rc.5",
86
+ "@fugood/node-llama-darwin-x64": "1.3.0-rc.5",
87
+ "@fugood/node-llama-darwin-arm64": "1.3.0-rc.5"
88
88
  },
89
89
  "devDependencies": {
90
90
  "@babel/preset-env": "^7.24.4",
@@ -221,7 +221,7 @@ static int32_t pooling_type_from_str(const std::string &s) {
221
221
  }
222
222
 
223
223
  // construct({ model, embedding, n_ctx, n_batch, n_threads, n_gpu_layers,
224
- // use_mlock, use_mmap }): LlamaContext throws error
224
+ // use_mlock, use_mmap }, onProgress?: (progress: number) => void): LlamaContext throws error
225
225
  LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
226
226
  : Napi::ObjectWrap<LlamaContext>(info) {
227
227
  Napi::Env env = info.Env();
@@ -230,6 +230,16 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
230
230
  }
231
231
  auto options = info[0].As<Napi::Object>();
232
232
 
233
+ // Check if progress callback is provided
234
+ bool has_progress_callback = info.Length() >= 2 && info[1].IsFunction();
235
+ if (has_progress_callback) {
236
+ _progress_tsfn = Napi::ThreadSafeFunction::New(
237
+ env, info[1].As<Napi::Function>(), "Model Loading Progress", 0, 1,
238
+ [](Napi::Env) {
239
+ // Finalizer callback
240
+ });
241
+ }
242
+
233
243
  common_params params;
234
244
  params.model.path = get_option<std::string>(options, "model", "");
235
245
  if (params.model.path.empty()) {
@@ -323,12 +333,55 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
323
333
 
324
334
  // Use rn-llama context instead of direct session
325
335
  _rn_ctx = new llama_rn_context();
336
+ _rn_ctx->is_load_interrupted = false;
337
+ _rn_ctx->loading_progress = 0;
338
+
339
+ // Set up progress callback if provided
340
+ if (has_progress_callback) {
341
+ params.load_progress_callback = [](float progress, void *user_data) {
342
+ LlamaContext *self = static_cast<LlamaContext *>(user_data);
343
+ unsigned int percentage = static_cast<unsigned int>(100 * progress);
344
+
345
+ // Only call callback if progress increased
346
+ if (percentage > self->_rn_ctx->loading_progress) {
347
+ self->_rn_ctx->loading_progress = percentage;
348
+
349
+ // Create a heap-allocated copy of the percentage
350
+ auto *data = new unsigned int(percentage);
351
+
352
+ // Queue callback to be executed on the JavaScript thread
353
+ auto status = self->_progress_tsfn.NonBlockingCall(
354
+ data, [](Napi::Env env, Napi::Function jsCallback, unsigned int *data) {
355
+ jsCallback.Call({Napi::Number::New(env, *data)});
356
+ delete data;
357
+ });
358
+
359
+ // If the call failed, clean up the data
360
+ if (status != napi_ok) {
361
+ delete data;
362
+ }
363
+ }
364
+
365
+ // Return true to continue loading, false to interrupt
366
+ return !self->_rn_ctx->is_load_interrupted;
367
+ };
368
+ params.load_progress_callback_user_data = this;
369
+ }
370
+
326
371
  if (!_rn_ctx->loadModel(params)) {
372
+ if (has_progress_callback) {
373
+ _progress_tsfn.Release();
374
+ }
327
375
  delete _rn_ctx;
328
376
  _rn_ctx = nullptr;
329
377
  Napi::TypeError::New(env, "Failed to load model").ThrowAsJavaScriptException();
330
378
  }
331
379
 
380
+ // Release progress callback after model is loaded
381
+ if (has_progress_callback) {
382
+ _progress_tsfn.Release();
383
+ }
384
+
332
385
  // Handle LoRA adapters through rn-llama
333
386
  if (!lora.empty()) {
334
387
  _rn_ctx->applyLoraAdapters(lora);
@@ -343,6 +396,11 @@ LlamaContext::~LlamaContext() {
343
396
  _context_valid->store(false);
344
397
  }
345
398
 
399
+ // Interrupt model loading if in progress
400
+ if (_rn_ctx) {
401
+ _rn_ctx->is_load_interrupted = true;
402
+ }
403
+
346
404
  // The DisposeWorker is responsible for cleanup of _rn_ctx
347
405
  // If _rn_ctx is still not null here, it means disposal was not properly initiated
348
406
  if (_rn_ctx) {
@@ -78,4 +78,7 @@ private:
78
78
  // Validity flag for async callbacks to prevent use-after-free
79
79
  // Shared pointer ensures callbacks can safely check if context is still alive
80
80
  std::shared_ptr<std::atomic<bool>> _context_valid;
81
+
82
+ // Progress callback support for model loading
83
+ Napi::ThreadSafeFunction _progress_tsfn;
81
84
  };