npm - @fugood/llama.node - Versions diffs - 1.3.0-rc.4 → 1.3.0-rc.5 - Mend

@fugood/llama.node 1.3.0-rc.4 → 1.3.0-rc.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/lib/binding.ts CHANGED Viewed

@@ -375,7 +375,7 @@ export type ToolCall = {
 }
 export interface LlamaContext {
-  new (options: LlamaModelOptions): LlamaContext
+  new (options: LlamaModelOptions, onProgress?: (progress: number) => void): LlamaContext
   getSystemInfo(): string
   getModelInfo(): ModelInfo
   getFormattedChat(

package/lib/index.js CHANGED Viewed

@@ -193,12 +193,12 @@ class LlamaContextWrapper {
         return this.ctx.decodeAudioTokens(tokens);
     }
 }
-const loadModel = (options) => __awaiter(void 0, void 0, void 0, function* () {
+const loadModel = (options, onProgress) => __awaiter(void 0, void 0, void 0, function* () {
     var _a, _b;
     const variant = (_a = options.lib_variant) !== null && _a !== void 0 ? _a : 'default';
     (_b = mods[variant]) !== null && _b !== void 0 ? _b : (mods[variant] = yield (0, binding_1.loadModule)(options.lib_variant));
     refreshNativeLogSetup();
-    const nativeCtx = new mods[variant].LlamaContext(options);
+    const nativeCtx = new mods[variant].LlamaContext(options, onProgress);
     return new LlamaContextWrapper(nativeCtx);
 });
 exports.loadModel = loadModel;

package/lib/index.ts CHANGED Viewed

@@ -299,12 +299,13 @@ class LlamaContextWrapper {
 export const loadModel = async (
   options: LlamaModelOptionsExtended,
+  onProgress?: (progress: number) => void,
 ): Promise<LlamaContextWrapper> => {
   const variant = options.lib_variant ?? 'default'
   mods[variant] ??= await loadModule(options.lib_variant)
   refreshNativeLogSetup()
-  const nativeCtx = new mods[variant].LlamaContext(options)
+  const nativeCtx = new mods[variant].LlamaContext(options, onProgress)
   return new LlamaContextWrapper(nativeCtx)
 }

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@fugood/llama.node",
   "access": "public",
-  "version": "1.3.0-rc.4",
+  "version": "1.3.0-rc.5",
   "description": "An another Node binding of llama.cpp",
   "main": "lib/index.js",
   "scripts": {
@@ -72,19 +72,19 @@
     "CMakeLists.txt"
   ],
   "optionalDependencies": {
-    "@fugood/node-llama-linux-x64": "1.3.0-rc.4",
-    "@fugood/node-llama-linux-x64-vulkan": "1.3.0-rc.4",
-    "@fugood/node-llama-linux-x64-cuda": "1.3.0-rc.4",
-    "@fugood/node-llama-linux-arm64": "1.3.0-rc.4",
-    "@fugood/node-llama-linux-arm64-vulkan": "1.3.0-rc.4",
-    "@fugood/node-llama-linux-arm64-cuda": "1.3.0-rc.4",
-    "@fugood/node-llama-win32-x64": "1.3.0-rc.4",
-    "@fugood/node-llama-win32-x64-vulkan": "1.3.0-rc.4",
-    "@fugood/node-llama-win32-x64-cuda": "1.3.0-rc.4",
-    "@fugood/node-llama-win32-arm64": "1.3.0-rc.4",
-    "@fugood/node-llama-win32-arm64-vulkan": "1.3.0-rc.4",
-    "@fugood/node-llama-darwin-x64": "1.3.0-rc.4",
-    "@fugood/node-llama-darwin-arm64": "1.3.0-rc.4"
+    "@fugood/node-llama-linux-x64": "1.3.0-rc.5",
+    "@fugood/node-llama-linux-x64-vulkan": "1.3.0-rc.5",
+    "@fugood/node-llama-linux-x64-cuda": "1.3.0-rc.5",
+    "@fugood/node-llama-linux-arm64": "1.3.0-rc.5",
+    "@fugood/node-llama-linux-arm64-vulkan": "1.3.0-rc.5",
+    "@fugood/node-llama-linux-arm64-cuda": "1.3.0-rc.5",
+    "@fugood/node-llama-win32-x64": "1.3.0-rc.5",
+    "@fugood/node-llama-win32-x64-vulkan": "1.3.0-rc.5",
+    "@fugood/node-llama-win32-x64-cuda": "1.3.0-rc.5",
+    "@fugood/node-llama-win32-arm64": "1.3.0-rc.5",
+    "@fugood/node-llama-win32-arm64-vulkan": "1.3.0-rc.5",
+    "@fugood/node-llama-darwin-x64": "1.3.0-rc.5",
+    "@fugood/node-llama-darwin-arm64": "1.3.0-rc.5"
   },
   "devDependencies": {
     "@babel/preset-env": "^7.24.4",

package/src/LlamaContext.cpp CHANGED Viewed

@@ -221,7 +221,7 @@ static int32_t pooling_type_from_str(const std::string &s) {
 }
 // construct({ model, embedding, n_ctx, n_batch, n_threads, n_gpu_layers,
-// use_mlock, use_mmap }): LlamaContext throws error
+// use_mlock, use_mmap }, onProgress?: (progress: number) => void): LlamaContext throws error
 LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
     : Napi::ObjectWrap<LlamaContext>(info) {
   Napi::Env env = info.Env();
@@ -230,6 +230,16 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
   }
   auto options = info[0].As<Napi::Object>();
+  // Check if progress callback is provided
+  bool has_progress_callback = info.Length() >= 2 && info[1].IsFunction();
+  if (has_progress_callback) {
+    _progress_tsfn = Napi::ThreadSafeFunction::New(
+        env, info[1].As<Napi::Function>(), "Model Loading Progress", 0, 1,
+        [](Napi::Env) {
+          // Finalizer callback
+        });
+  }
   common_params params;
   params.model.path = get_option<std::string>(options, "model", "");
   if (params.model.path.empty()) {
@@ -323,12 +333,55 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
   // Use rn-llama context instead of direct session
   _rn_ctx = new llama_rn_context();
+  _rn_ctx->is_load_interrupted = false;
+  _rn_ctx->loading_progress = 0;
+  // Set up progress callback if provided
+  if (has_progress_callback) {
+    params.load_progress_callback = [](float progress, void *user_data) {
+      LlamaContext *self = static_cast<LlamaContext *>(user_data);
+      unsigned int percentage = static_cast<unsigned int>(100 * progress);
+      // Only call callback if progress increased
+      if (percentage > self->_rn_ctx->loading_progress) {
+        self->_rn_ctx->loading_progress = percentage;
+        // Create a heap-allocated copy of the percentage
+        auto *data = new unsigned int(percentage);
+        // Queue callback to be executed on the JavaScript thread
+        auto status = self->_progress_tsfn.NonBlockingCall(
+            data, [](Napi::Env env, Napi::Function jsCallback, unsigned int *data) {
+              jsCallback.Call({Napi::Number::New(env, *data)});
+              delete data;
+            });
+        // If the call failed, clean up the data
+        if (status != napi_ok) {
+          delete data;
+        }
+      }
+      // Return true to continue loading, false to interrupt
+      return !self->_rn_ctx->is_load_interrupted;
+    };
+    params.load_progress_callback_user_data = this;
+  }
   if (!_rn_ctx->loadModel(params)) {
+    if (has_progress_callback) {
+      _progress_tsfn.Release();
+    }
     delete _rn_ctx;
     _rn_ctx = nullptr;
     Napi::TypeError::New(env, "Failed to load model").ThrowAsJavaScriptException();
   }
+  // Release progress callback after model is loaded
+  if (has_progress_callback) {
+    _progress_tsfn.Release();
+  }
   // Handle LoRA adapters through rn-llama
   if (!lora.empty()) {
     _rn_ctx->applyLoraAdapters(lora);
@@ -343,6 +396,11 @@ LlamaContext::~LlamaContext() {
     _context_valid->store(false);
   }
+  // Interrupt model loading if in progress
+  if (_rn_ctx) {
+    _rn_ctx->is_load_interrupted = true;
+  }
   // The DisposeWorker is responsible for cleanup of _rn_ctx
   // If _rn_ctx is still not null here, it means disposal was not properly initiated
   if (_rn_ctx) {

package/src/LlamaContext.h CHANGED Viewed

@@ -78,4 +78,7 @@ private:
   // Validity flag for async callbacks to prevent use-after-free
   // Shared pointer ensures callbacks can safely check if context is still alive
   std::shared_ptr<std::atomic<bool>> _context_valid;
+  // Progress callback support for model loading
+  Napi::ThreadSafeFunction _progress_tsfn;
 };