npm - @novastera-oss/llamarn - Versions diffs - 0.6.3 → 0.6.7 - Mend

@novastera-oss/llamarn 0.6.3 → 0.6.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/android/CMakeLists.txt CHANGED Viewed

@@ -235,14 +235,36 @@ add_custom_command(TARGET RNLlamaCpp POST_BUILD
     COMMENT "Copying dependency libraries to build output directory"
 )
-# libggml-cpu.so is REQUIRED when GGML_BACKEND_DL=ON (CPU backend is dynamically loaded)
-# Copy it so it gets packaged into the APK
-add_custom_command(TARGET RNLlamaCpp POST_BUILD
-    COMMAND ${CMAKE_COMMAND} -E copy_if_different
-        ${JNI_LIBS_DIR}/${ANDROID_ABI}/libggml-cpu.so
-        $<TARGET_FILE_DIR:RNLlamaCpp>/libggml-cpu.so
-    COMMENT "Copying libggml-cpu.so (REQUIRED for CPU backend when GGML_BACKEND_DL=ON)"
-)
+# CPU backend libraries: With GGML_CPU_ALL_VARIANTS, multiple variant libraries are built
+# (e.g., libggml-cpu-android_armv8.0_1.so, libggml-cpu-android_armv8.2_1.so, etc.)
+# The runtime loader will select the best variant based on CPU capabilities
+# If variants don't exist, fall back to single libggml-cpu.so (backward compatibility)
+file(GLOB CPU_VARIANT_LIBS "${JNI_LIBS_DIR}/${ANDROID_ABI}/libggml-cpu-*.so")
+if(CPU_VARIANT_LIBS)
+    # Copy all CPU variant libraries (GGML_CPU_ALL_VARIANTS enabled)
+    foreach(CPU_VARIANT_LIB ${CPU_VARIANT_LIBS})
+        get_filename_component(CPU_VARIANT_LIB_NAME ${CPU_VARIANT_LIB} NAME)
+        add_custom_command(TARGET RNLlamaCpp POST_BUILD
+            COMMAND ${CMAKE_COMMAND} -E copy_if_different
+                ${CPU_VARIANT_LIB}
+                $<TARGET_FILE_DIR:RNLlamaCpp>/${CPU_VARIANT_LIB_NAME}
+            COMMENT "Copying CPU variant library ${CPU_VARIANT_LIB_NAME} to build output directory"
+        )
+    endforeach()
+    list(LENGTH CPU_VARIANT_LIBS CPU_VARIANT_COUNT)
+    message(STATUS "Found ${CPU_VARIANT_COUNT} CPU variant libraries (GGML_CPU_ALL_VARIANTS enabled)")
+elseif(EXISTS ${JNI_LIBS_DIR}/${ANDROID_ABI}/libggml-cpu.so)
+    # Fallback: Copy single libggml-cpu.so (backward compatibility)
+    add_custom_command(TARGET RNLlamaCpp POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different
+            ${JNI_LIBS_DIR}/${ANDROID_ABI}/libggml-cpu.so
+            $<TARGET_FILE_DIR:RNLlamaCpp>/libggml-cpu.so
+        COMMENT "Copying libggml-cpu.so (single CPU backend - backward compatibility)"
+    )
+    message(STATUS "Found single libggml-cpu.so (backward compatibility mode)")
+else()
+    message(WARNING "No CPU backend libraries found in ${JNI_LIBS_DIR}/${ANDROID_ABI}/ - CPU backend will not work!")
+endif()
 # Also copy any optional GPU libraries if they exist
 if(EXISTS ${JNI_LIBS_DIR}/${ANDROID_ABI}/libggml-vulkan.so)

package/android/src/main/jniLibs/arm64-v8a/libggml-base.so CHANGED Viewed

Binary file

package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so CHANGED Viewed

Binary file

package/android/src/main/jniLibs/arm64-v8a/libggml-hexagon.so CHANGED Viewed

Binary file

package/android/src/main/jniLibs/arm64-v8a/libggml-htp-v73.so CHANGED Viewed

Binary file

package/android/src/main/jniLibs/arm64-v8a/libggml-htp-v75.so CHANGED Viewed

Binary file

package/android/src/main/jniLibs/arm64-v8a/libggml-htp-v79.so CHANGED Viewed

Binary file

package/android/src/main/jniLibs/arm64-v8a/libggml-htp-v81.so CHANGED Viewed

Binary file

package/android/src/main/jniLibs/arm64-v8a/libggml.so CHANGED Viewed

Binary file

package/android/src/main/jniLibs/arm64-v8a/libllama.so CHANGED Viewed

Binary file

package/android/src/main/jniLibs/armeabi-v7a/libggml-base.so CHANGED Viewed

Binary file

package/android/src/main/jniLibs/armeabi-v7a/libggml-cpu.so CHANGED Viewed

Binary file

package/android/src/main/jniLibs/armeabi-v7a/libggml.so CHANGED Viewed

Binary file

package/android/src/main/jniLibs/armeabi-v7a/libllama.so CHANGED Viewed

Binary file

package/android/src/main/jniLibs/x86/libggml-base.so CHANGED Viewed

Binary file

package/android/src/main/jniLibs/x86/libggml-cpu.so CHANGED Viewed

Binary file

package/android/src/main/jniLibs/x86/libggml.so CHANGED Viewed

Binary file

package/android/src/main/jniLibs/x86/libllama.so CHANGED Viewed

Binary file

package/android/src/main/jniLibs/x86_64/libggml-base.so CHANGED Viewed

Binary file

package/android/src/main/jniLibs/x86_64/libggml-cpu.so CHANGED Viewed

Binary file

package/android/src/main/jniLibs/x86_64/libggml.so CHANGED Viewed

Binary file

package/android/src/main/jniLibs/x86_64/libllama.so CHANGED Viewed

Binary file

package/cpp/PureCppImpl.cpp CHANGED Viewed

@@ -45,6 +45,129 @@
 namespace facebook::react {
+// Helper function to load CPU variant libraries on Android
+// On Android, ggml_backend_load_best() uses filesystem iteration which doesn't work
+// with APK-packaged libraries. This function manually loads CPU variant libraries
+// using dlopen() with just the library name - Android's linker finds them in the APK.
+// We score each variant and only register the best compatible one (score > 0).
+static void load_android_cpu_backends() {
+#ifdef __ANDROID__
+  // Skip if CPU backend is already registered
+  if (ggml_backend_reg_by_name("CPU")) {
+    return;
+  }
+  // Try loading all CPU variant libraries (from most advanced to baseline)
+  // Score each one and register only the best compatible variant
+  static const char* cpu_variants[] = {
+    "libggml-cpu-android_armv8.6_1.so",  // DOTPROD + FP16 + MATMUL_INT8
+    "libggml-cpu-android_armv8.2_2.so",  // DOTPROD + FP16
+    "libggml-cpu-android_armv8.2_1.so",  // DOTPROD
+    "libggml-cpu-android_armv8.0_1.so",  // Baseline (emulator compatible)
+    nullptr
+  };
+  typedef ggml_backend_reg_t (*backend_init_fn_t)();
+  typedef int (*backend_score_t)();
+  int best_score = 0;
+  void* best_handle = nullptr;
+  backend_init_fn_t best_init = nullptr;
+  // Score all variants and find the best one
+  for (int i = 0; cpu_variants[i] != nullptr; i++) {
+    void* cpu_handle = dlopen(cpu_variants[i], RTLD_LAZY | RTLD_LOCAL);
+    if (cpu_handle) {
+      backend_score_t score_fn = (backend_score_t)dlsym(cpu_handle, "ggml_backend_score");
+      if (score_fn) {
+        int score = score_fn();
+        if (score > best_score) {
+          // Close previous best handle if we had one
+          if (best_handle) {
+            dlclose(best_handle);
+          }
+          best_score = score;
+          best_handle = cpu_handle;
+          best_init = (backend_init_fn_t)dlsym(cpu_handle, "ggml_backend_init");
+        } else {
+          // This variant is not better, close it
+          dlclose(cpu_handle);
+        }
+      } else {
+        // No score function, close it
+        dlclose(cpu_handle);
+      }
+    }
+  }
+  // Register the best variant if we found one
+  if (best_handle && best_init && best_score > 0) {
+    ggml_backend_reg_t cpu_backend = best_init();
+    if (cpu_backend) {
+      ggml_backend_register(cpu_backend);
+      // Keep the handle open - it will be cleaned up when the backend is unloaded
+    } else {
+      dlclose(best_handle);
+    }
+  }
+#endif
+}
+// Helper function to load all Android backends manually
+// On Android, ggml_backend_load_best() uses filesystem iteration which doesn't work
+// with APK-packaged libraries. This function manually loads all backend libraries
+// using dlopen() with just the library name - Android's linker finds them in the APK.
+static void load_android_backends() {
+#ifdef __ANDROID__
+  typedef ggml_backend_reg_t (*backend_init_fn_t)();
+  // Load Hexagon backend first (Snapdragon DSP) - more performant than Vulkan on Snapdragon devices
+  if (!ggml_backend_reg_by_name("HTP")) {
+    void* hexagon_handle = dlopen("libggml-hexagon.so", RTLD_LAZY | RTLD_LOCAL);
+    if (hexagon_handle) {
+      backend_init_fn_t backend_init = (backend_init_fn_t)dlsym(hexagon_handle, "ggml_backend_init");
+      if (backend_init) {
+        ggml_backend_reg_t hexagon_backend = backend_init();
+        if (hexagon_backend) {
+          ggml_backend_register(hexagon_backend);
+        }
+      }
+    }
+  }
+  // Load OpenCL backend
+  if (!ggml_backend_reg_by_name("OpenCL")) {
+    void* opencl_handle = dlopen("libggml-opencl.so", RTLD_LAZY | RTLD_LOCAL);
+    if (opencl_handle) {
+      backend_init_fn_t backend_init = (backend_init_fn_t)dlsym(opencl_handle, "ggml_backend_init");
+      if (backend_init) {
+        ggml_backend_reg_t opencl_backend = backend_init();
+        if (opencl_backend) {
+          ggml_backend_register(opencl_backend);
+        }
+      }
+    }
+  }
+  // Load Vulkan backend (disabled by default on Android due to emulator crashes, but try anyway)
+  if (!ggml_backend_reg_by_name("Vulkan")) {
+    void* vulkan_handle = dlopen("libggml-vulkan.so", RTLD_LAZY | RTLD_LOCAL);
+    if (vulkan_handle) {
+      backend_init_fn_t backend_init = (backend_init_fn_t)dlsym(vulkan_handle, "ggml_backend_init");
+      if (backend_init) {
+        ggml_backend_reg_t vulkan_backend = backend_init();
+        if (vulkan_backend) {
+          ggml_backend_register(vulkan_backend);
+        }
+      }
+    }
+  }
+  // Load CPU variant libraries (scoring system selects best compatible one)
+  load_android_cpu_backends();
+#endif
+}
 // Factory method implementation
 std::shared_ptr<TurboModule> PureCppImpl::create(std::shared_ptr<CallInvoker> jsInvoker) {
   return std::make_shared<PureCppImpl>(std::move(jsInvoker));
@@ -97,48 +220,20 @@ jsi::Value PureCppImpl::loadLlamaModelInfo(jsi::Runtime &runtime, jsi::String mo
           // Load all available backends (CPU is dynamically loaded when GGML_BACKEND_DL is enabled)
           // With GGML_BACKEND_DL=ON, ALL backends (CPU + GPU) are dynamically loaded
-          // CPU backend is in libggml-cpu.so, GPU backends are in libggml-opencl.so, libggml-vulkan.so, libggml-hexagon.so
-          // On Android, dlopen() can load libraries by name even from inside APKs
+          // When GGML_CPU_ALL_VARIANTS is enabled, CPU backend variants are:
+          //   libggml-cpu-android_armv8.0_1.so (baseline - emulator compatible)
+          //   libggml-cpu-android_armv8.2_1.so (DOTPROD)
+          //   libggml-cpu-android_armv8.2_2.so (DOTPROD + FP16_VECTOR_ARITHMETIC)
+          //   libggml-cpu-android_armv8.6_1.so (DOTPROD + FP16_VECTOR_ARITHMETIC + MATMUL_INT8)
+          // GPU backends are in libggml-opencl.so, libggml-vulkan.so, libggml-hexagon.so
+          // On Android, manually load all backends since filesystem iteration doesn't work
+          // with APK-packaged libraries. ggml_backend_load_all() will skip already loaded backends.
           #ifdef __ANDROID__
-          // Load CPU backend directly - Android's linker will find it in the same directory
-          // Check if already registered to avoid duplicate registration
-          if (!ggml_backend_reg_by_name("CPU")) {
-            void* cpu_handle = dlopen("libggml-cpu.so", RTLD_LAZY | RTLD_LOCAL);
-            if (cpu_handle) {
-              typedef ggml_backend_reg_t (*backend_init_fn_t)();
-              backend_init_fn_t backend_init = (backend_init_fn_t)dlsym(cpu_handle, "ggml_backend_init");
-              if (backend_init) {
-                ggml_backend_reg_t cpu_backend = backend_init();
-                if (cpu_backend) {
-                  ggml_backend_register(cpu_backend);
-                }
-              }
-            }
-          }
-          // Load Hexagon backend (Snapdragon DSP) - more performant than Vulkan on Snapdragon devices
-          // Load before other GPU backends to give it priority
-          // Check if already registered to avoid duplicate registration
-          if (!ggml_backend_reg_by_name("HTP")) {
-            void* hexagon_handle = dlopen("libggml-hexagon.so", RTLD_LAZY | RTLD_LOCAL);
-            if (hexagon_handle) {
-              typedef ggml_backend_reg_t (*backend_init_fn_t)();
-              backend_init_fn_t backend_init = (backend_init_fn_t)dlsym(hexagon_handle, "ggml_backend_init");
-              if (backend_init) {
-                ggml_backend_reg_t hexagon_backend = backend_init();
-                if (hexagon_backend) {
-                  ggml_backend_register(hexagon_backend);
-                }
-              }
-            }
-          }
+          load_android_backends();
+          #endif
-          // Load other GPU backends (OpenCL, Vulkan) if present - they will be found by name
-          // ggml_backend_load_all() will skip backends that are already loaded
-          ggml_backend_load_all();
-          #else
+          // Load any remaining backends (ggml_backend_load_all will skip already loaded ones)
           ggml_backend_load_all();
-          #endif
           // Verify at least CPU backend was loaded
           if (ggml_backend_reg_count() == 0) {
@@ -389,26 +484,20 @@ jsi::Value PureCppImpl::initLlama(jsi::Runtime &runtime, jsi::Object options) {
           // Load all available backends (CPU is dynamically loaded when GGML_BACKEND_DL is enabled)
           // With GGML_BACKEND_DL=ON, ALL backends (CPU + GPU) are dynamically loaded
-          // CPU backend is in libggml-cpu.so, GPU backends are in libggml-opencl.so, libggml-vulkan.so
+          // When GGML_CPU_ALL_VARIANTS is enabled, CPU backend variants are:
+          //   libggml-cpu-android_armv8.0_1.so (baseline - emulator compatible)
+          //   libggml-cpu-android_armv8.2_1.so (DOTPROD)
+          //   libggml-cpu-android_armv8.2_2.so (DOTPROD + FP16_VECTOR_ARITHMETIC)
+          //   libggml-cpu-android_armv8.6_1.so (DOTPROD + FP16_VECTOR_ARITHMETIC + MATMUL_INT8)
+          // GPU backends are in libggml-opencl.so, libggml-vulkan.so, libggml-hexagon.so
+          // On Android, manually load all backends since filesystem iteration doesn't work
+          // with APK-packaged libraries. ggml_backend_load_all() will skip already loaded backends.
           #ifdef __ANDROID__
-          // Load CPU backend directly - Android's linker will find it in the same directory
-          void* cpu_handle = dlopen("libggml-cpu.so", RTLD_LAZY | RTLD_LOCAL);
-          if (cpu_handle) {
-            typedef ggml_backend_reg_t (*backend_init_fn_t)();
-            backend_init_fn_t backend_init = (backend_init_fn_t)dlsym(cpu_handle, "ggml_backend_init");
-            if (backend_init) {
-              ggml_backend_reg_t cpu_backend = backend_init();
-              if (cpu_backend) {
-                ggml_backend_register(cpu_backend);
-              }
-            }
-          }
+          load_android_backends();
+          #endif
-          // Load GPU backends (OpenCL, Vulkan) if present - they will be found by name
-          ggml_backend_load_all();
-          #else
+          // Load other backends (OpenCL, Vulkan, etc.) - ggml_backend_load_all will skip already loaded backends
           ggml_backend_load_all();
-          #endif
           // Verify at least CPU backend was loaded
           if (ggml_backend_reg_count() == 0) {
@@ -549,12 +638,30 @@ jsi::Value PureCppImpl::initLlama(jsi::Runtime &runtime, jsi::Object options) {
           // Now assign to the context
           selfPtr->rn_ctx_->params = rn_params;
-          selfPtr->rn_ctx_->chat_templates = common_chat_templates_init(selfPtr->rn_ctx_->model, params.chat_template);
+          // Initialize chat templates (matches server.cpp approach)
+          // common_chat_templates_init already has try-catch internally for template parsing errors,
+          // but exceptions can escape from chat_template constructor during capability detection.
+          // We catch all exceptions (not just std::exception) to handle any edge cases.
           try {
+            selfPtr->rn_ctx_->chat_templates = common_chat_templates_init(selfPtr->rn_ctx_->model, params.chat_template);
+            // Validate template by trying to format an example (catches runtime errors like null lstrip)
+            // This is optional - if it fails, we still use the template anyway (it might work in practice)
+            try {
               common_chat_format_example(selfPtr->rn_ctx_->chat_templates.get(), params.use_jinja, params.default_template_kwargs);
-          } catch (const std::exception & e) {
-              // Fallback to chatml if the original template parsing fails
+            } catch (...) {
+              // Template validation failed, but continue anyway - the template might work in practice
+              // This preserves backward compatibility for models that were working before
+            }
+          } catch (...) {
+            // Template initialization failed - fallback to chatml (matches server.cpp behavior)
+            // Catch all exceptions (not just std::exception) to handle any edge cases
+            try {
               selfPtr->rn_ctx_->chat_templates = common_chat_templates_init(selfPtr->rn_ctx_->model, "chatml");
+            } catch (...) {
+              // Even chatml failed - this should never happen, but handle it gracefully
+              // The model will still load, but chat templates won't work
+            }
           }
           // Schedule success callback on JS thread to create JSI objects

package/cpp/rn-completion.cpp CHANGED Viewed

@@ -379,19 +379,46 @@ CompletionResult run_chat_completion(
             chat_msgs = common_chat_msgs_parse_oaicompat(data["messages"]);
         }
-        // Apply template
+        // Apply template (matches server.cpp oaicompat_chat_params_parse approach)
         common_chat_templates_inputs template_inputs;
         template_inputs.messages = chat_msgs;
         template_inputs.add_generation_prompt = true;
         template_inputs.use_jinja = rn_ctx->params.use_jinja;
-        // Note: extract_reasoning field doesn't exist in current llama.cpp version
-        // template_inputs.extract_reasoning = true; // Default to true to extract reasoning content if available
+        template_inputs.reasoning_format = rn_ctx->params.reasoning_format;
+        // Set chat_template_kwargs from params (matches server.cpp line 712)
+        template_inputs.chat_template_kwargs = rn_ctx->params.default_template_kwargs;
+        // Merge any chat_template_kwargs from request body (if present in future)
+        // For now, we use the defaults from params
+        // Parse enable_thinking from chat_template_kwargs (matches server.cpp lines 718-725)
+        auto enable_thinking_kwarg = template_inputs.chat_template_kwargs.find("enable_thinking");
+        if (enable_thinking_kwarg != template_inputs.chat_template_kwargs.end()) {
+            const std::string& value = enable_thinking_kwarg->second;
+            if (value == "true") {
+                template_inputs.enable_thinking = true;
+            } else if (value == "false") {
+                template_inputs.enable_thinking = false;
+            }
+            // else: use default (true)
+        }
         // Add grammar if present in options
         if (!options.grammar.empty()) {
             template_inputs.grammar = options.grammar;
         }
+        // Parse json_schema if present (matches server.cpp line 696)
+        if (data.contains("json_schema") && !data["json_schema"].is_null()) {
+            template_inputs.json_schema = data["json_schema"].dump();
+        }
+        // Check for conflicting grammar and json_schema (matches server.cpp lines 570-572)
+        if (!template_inputs.json_schema.empty() && !template_inputs.grammar.empty()) {
+            throw std::runtime_error("Cannot use both json_schema and grammar");
+        }
         // Parse tools if present
         if (data.contains("tools") && !data["tools"].empty()) {
             template_inputs.tools = common_chat_tools_parse_oaicompat(data["tools"]);
@@ -407,8 +434,20 @@ CompletionResult run_chat_completion(
                 ? data["tool_choice"].get<std::string>()
                 : data["tool_choice"].dump());
         }
+        // Parse parallel_tool_calls if present (matches server.cpp line 699)
+        if (data.contains("parallel_tool_calls")) {
+            template_inputs.parallel_tool_calls = data["parallel_tool_calls"].get<bool>();
+        }
+        // Check for conflicting tools and grammar (matches server.cpp lines 703-706)
+        if (!template_inputs.tools.empty() && template_inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) {
+            if (!template_inputs.grammar.empty()) {
+                throw std::runtime_error("Cannot use custom grammar constraints with tools.");
+            }
+        }
-        // Apply template
+        // Apply template (matches server.cpp approach - no try-catch, exceptions propagate to outer handler)
         const auto& chat_params = common_chat_templates_apply(rn_ctx->chat_templates.get(), template_inputs);
         CompletionOptions cmpl_options = options;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@novastera-oss/llamarn",
-  "version": "0.6.3",
+  "version": "0.6.7",
   "description": "An attempt at a pure cpp turbo module library",
   "source": "./src/index.tsx",
   "main": "./lib/module/index.js",