npm - @novastera-oss/llamarn - Versions diffs - 0.5.1 → 0.5.3 - Mend

@novastera-oss/llamarn 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/android/CMakeLists.txt CHANGED Viewed

@@ -42,10 +42,17 @@ set_target_properties(ggml PROPERTIES
     IMPORTED_LOCATION ${JNI_LIBS_DIR}/${ANDROID_ABI}/libggml.so
     IMPORTED_NO_SONAME TRUE)
-add_library(ggml-cpu SHARED IMPORTED)
-set_target_properties(ggml-cpu PROPERTIES
-    IMPORTED_LOCATION ${JNI_LIBS_DIR}/${ANDROID_ABI}/libggml-cpu.so
-    IMPORTED_NO_SONAME TRUE)
+# ggml-cpu is optional - CPU backend is statically linked into libggml.so with GGML_USE_CPU=1
+# Only import it if it exists (for backward compatibility)
+if(EXISTS ${JNI_LIBS_DIR}/${ANDROID_ABI}/libggml-cpu.so)
+    add_library(ggml-cpu SHARED IMPORTED)
+    set_target_properties(ggml-cpu PROPERTIES
+        IMPORTED_LOCATION ${JNI_LIBS_DIR}/${ANDROID_ABI}/libggml-cpu.so
+        IMPORTED_NO_SONAME TRUE)
+    message(STATUS "libggml-cpu.so found (optional - CPU is statically linked)")
+else()
+    message(STATUS "libggml-cpu.so not found (expected - CPU is statically linked into libggml.so)")
+endif()
 # Create a minimal common library with only essential files that don't require missing GGML symbols
 add_library(
@@ -91,36 +98,49 @@ else()
     target_compile_options(RNLlamaCpp PRIVATE -Wno-unused-function)
 endif()
-# Check if Vulkan backend library is available
+# Check if GPU backend libraries are present in jniLibs (compile-time check only)
+# NOTE: This is NOT a runtime availability check - it only determines if we should
+#       compile OpenCL/Vulkan support into the code. Runtime availability is checked
+#       when ggml_backend_load_all() tries to load the backend libraries.
+#       Even if libggml-opencl.so exists, it will only work if the device has
+#       libOpenCL.so (system library) available at runtime.
+# Check if Vulkan backend library is present (for compile-time feature enablement)
 set(VULKAN_BACKEND_AVAILABLE FALSE)
 if(EXISTS ${JNI_LIBS_DIR}/${ANDROID_ABI}/libggml-vulkan.so)
     set(VULKAN_BACKEND_AVAILABLE TRUE)
-    message(STATUS "Vulkan backend library found for ${ANDROID_ABI}")
+    message(STATUS "Vulkan backend library found in jniLibs for ${ANDROID_ABI} (compile-time)")
+    message(STATUS "  Note: Runtime availability depends on device Vulkan support")
 else()
-    message(STATUS "Vulkan backend library not found for ${ANDROID_ABI}")
+    message(STATUS "Vulkan backend library not found in jniLibs for ${ANDROID_ABI}")
 endif()
-# Check if OpenCL backend library is available
+# Check if OpenCL backend library is present (for compile-time feature enablement)
 set(OPENCL_BACKEND_AVAILABLE FALSE)
-# Check only for libggml-opencl.so - libOpenCL.so is a system library (not shipped)
 if(EXISTS ${JNI_LIBS_DIR}/${ANDROID_ABI}/libggml-opencl.so)
     set(OPENCL_BACKEND_AVAILABLE TRUE)
-    message(STATUS "OpenCL backend library found for ${ANDROID_ABI}")
-    message(STATUS "Note: libOpenCL.so will be provided by the system at runtime")
+    message(STATUS "OpenCL backend library found in jniLibs for ${ANDROID_ABI} (compile-time)")
+    message(STATUS "  Note: Runtime availability requires system libOpenCL.so (not checked here)")
+    message(STATUS "  Note: Backend will gracefully fail to load if device lacks OpenCL support")
 else()
-    message(STATUS "OpenCL backend library not found for ${ANDROID_ABI}")
+    message(STATUS "OpenCL backend library not found in jniLibs for ${ANDROID_ABI}")
 endif()
-# Hybrid backend approach: CPU static (built into main libraries), GPU dynamic
-# CPU backend will be statically linked into main libraries (libggml.so, libllama.so)
-# GPU backends (OpenCL, Vulkan) will be dynamically loaded at runtime only if available
+# Dynamic backend approach: ALL backends (CPU + GPU) are dynamically loaded
+# With GGML_BACKEND_DL=ON, CPU backend is built as libggml-cpu.so (separate from libggml.so)
+# GPU backends (OpenCL, Vulkan) are also built as separate .so files
+# All backends are loaded dynamically via ggml_backend_load_all() at runtime
 target_compile_definitions(common PRIVATE
-    -DGGML_BACKEND_DL=1  # Enable dynamic loading for GPU backends
-    -DGGML_CPU=1         # CPU backend statically built into main libraries
+    -DGGML_BACKEND_DL=1  # Enable dynamic loading for ALL backends (CPU + GPU)
+    -DGGML_CPU=1         # CPU backend enabled (built as libggml-cpu.so when GGML_BACKEND_DL=ON)
+    # NOTE: Do NOT define GGML_USE_CPU=1 when GGML_BACKEND_DL=ON
+    #       CPU backend is loaded dynamically via ggml_backend_load_all()
 )
 target_compile_definitions(RNLlamaCpp PRIVATE
-    -DGGML_BACKEND_DL=1  # Enable dynamic loading for GPU backends
-    -DGGML_CPU=1         # CPU backend statically built into main libraries
+    -DGGML_BACKEND_DL=1  # Enable dynamic loading for ALL backends (CPU + GPU)
+    -DGGML_CPU=1         # CPU backend enabled (built as libggml-cpu.so when GGML_BACKEND_DL=ON)
+    # NOTE: Do NOT define GGML_USE_CPU=1 when GGML_BACKEND_DL=ON
+    #       CPU backend is loaded dynamically via ggml_backend_load_all()
     -DANDROID_SUPPORT_FLEXIBLE_PAGE_SIZES=ON  # Support Android 15+ 16KB page sizes
 )
@@ -179,7 +199,7 @@ target_link_libraries(
     llama       # Link against the imported prebuilt core llama library
     ggml-base   # Link against the imported GGML base library
     ggml        # Link against the imported GGML library
-    ggml-cpu    # Link against the imported GGML CPU library
+    # NOTE: ggml-cpu is NOT linked - CPU backend is dynamically loaded from libggml-cpu.so
     jsi
     reactnative
     fbjni
@@ -212,10 +232,16 @@ add_custom_command(TARGET RNLlamaCpp POST_BUILD
     COMMAND ${CMAKE_COMMAND} -E copy_if_different
         ${JNI_LIBS_DIR}/${ANDROID_ABI}/libggml.so
         $<TARGET_FILE_DIR:RNLlamaCpp>/libggml.so
+    COMMENT "Copying dependency libraries to build output directory"
+)
+# libggml-cpu.so is REQUIRED when GGML_BACKEND_DL=ON (CPU backend is dynamically loaded)
+# Copy it so it gets packaged into the APK
+add_custom_command(TARGET RNLlamaCpp POST_BUILD
     COMMAND ${CMAKE_COMMAND} -E copy_if_different
         ${JNI_LIBS_DIR}/${ANDROID_ABI}/libggml-cpu.so
         $<TARGET_FILE_DIR:RNLlamaCpp>/libggml-cpu.so
-    COMMENT "Copying dependency libraries to build output directory"
+    COMMENT "Copying libggml-cpu.so (REQUIRED for CPU backend when GGML_BACKEND_DL=ON)"
 )
 # Also copy any optional GPU libraries if they exist

package/android/src/main/AndroidManifest.xml CHANGED Viewed

@@ -1,3 +1,20 @@
 <manifest xmlns:android="http://schemas.android.com/apk/res/android"
           package="com.novastera.llamarn">
+    <!-- GPU backends we ship -->
+    <uses-native-library
+        android:name="libggml-opencl.so"
+        android:required="false" />
+    <uses-native-library
+        android:name="libggml-vulkan.so"
+        android:required="false" />
+    <!-- System loaders (only mapped if present on device) -->
+    <uses-native-library
+        android:name="libOpenCL.so"
+        android:required="false" />
+    <uses-native-library
+        android:name="libvulkan.so"
+        android:required="false" />
 </manifest>

package/android/src/main/jniLibs/arm64-v8a/libllama.so CHANGED Viewed

Binary file

package/android/src/main/jniLibs/armeabi-v7a/libllama.so CHANGED Viewed

Binary file

package/android/src/main/jniLibs/x86/libllama.so CHANGED Viewed

Binary file

package/android/src/main/jniLibs/x86_64/libllama.so CHANGED Viewed

Binary file

package/cpp/PureCppImpl.cpp CHANGED Viewed

@@ -8,6 +8,9 @@
 #include <unordered_map>
 #include <utility>
 #include <thread>
+#include <cstdio>
+#include <cstring>
+#include <cerrno>
 #include "SystemUtils.h"
 // Include our custom headers - this was missing!
 #include "rn-llama.h"
@@ -17,6 +20,18 @@
 #if defined(__ANDROID__) || defined(__linux__)
 #include <unistd.h>
+#include <dlfcn.h>
+#include <android/log.h>
+#define LOG_TAG "RNLlamaCpp"
+#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, __VA_ARGS__)
+#define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__)
+#define LOGW(...) __android_log_print(ANDROID_LOG_WARN, LOG_TAG, __VA_ARGS__)
+#define LOGD(...) __android_log_print(ANDROID_LOG_DEBUG, LOG_TAG, __VA_ARGS__)
+#else
+#define LOGI(...) fprintf(stderr, __VA_ARGS__)
+#define LOGE(...) fprintf(stderr, __VA_ARGS__)
+#define LOGW(...) fprintf(stderr, __VA_ARGS__)
+#define LOGD(...) fprintf(stderr, __VA_ARGS__)
 #endif
 // Include the llama.cpp headers directly
@@ -72,6 +87,42 @@ jsi::Value PureCppImpl::loadLlamaModelInfo(jsi::Runtime &runtime, jsi::String mo
       // Launch background thread for model info loading
       std::thread([selfPtr, path, resolve, reject, runtimePtr, invoker]() {
         try {
+          // Set up logging callback to capture llama.cpp error messages
+          llama_log_set([](enum ggml_log_level level, const char * text, void * /* user_data */) {
+            if (level >= GGML_LOG_LEVEL_ERROR) {
+              LOGE("llama.cpp: %s", text);
+            }
+          }, nullptr);
+          // Load all available backends (CPU is dynamically loaded when GGML_BACKEND_DL is enabled)
+          // With GGML_BACKEND_DL=ON, ALL backends (CPU + GPU) are dynamically loaded
+          // CPU backend is in libggml-cpu.so, GPU backends are in libggml-opencl.so, libggml-vulkan.so
+          // On Android, dlopen() can load libraries by name even from inside APKs
+          #ifdef __ANDROID__
+          // Load CPU backend directly - Android's linker will find it in the same directory
+          void* cpu_handle = dlopen("libggml-cpu.so", RTLD_LAZY | RTLD_LOCAL);
+          if (cpu_handle) {
+            typedef ggml_backend_reg_t (*backend_init_fn_t)();
+            backend_init_fn_t backend_init = (backend_init_fn_t)dlsym(cpu_handle, "ggml_backend_init");
+            if (backend_init) {
+              ggml_backend_reg_t cpu_backend = backend_init();
+              if (cpu_backend) {
+                ggml_backend_register(cpu_backend);
+              }
+            }
+          }
+          // Load GPU backends (OpenCL, Vulkan) if present - they will be found by name
+          ggml_backend_load_all();
+          #else
+          ggml_backend_load_all();
+          #endif
+          // Verify at least CPU backend was loaded
+          if (ggml_backend_reg_count() == 0) {
+            throw std::runtime_error("No backends registered - CPU backend library not found");
+          }
           // Initialize llama backend
           llama_backend_init();
@@ -313,7 +364,35 @@ jsi::Value PureCppImpl::initLlama(jsi::Runtime &runtime, jsi::Object options) {
         try {
           // Thread-safe access to member variables
           std::lock_guard<std::mutex> lock(selfPtr->mutex_);
+          // Load all available backends (CPU is dynamically loaded when GGML_BACKEND_DL is enabled)
+          // With GGML_BACKEND_DL=ON, ALL backends (CPU + GPU) are dynamically loaded
+          // CPU backend is in libggml-cpu.so, GPU backends are in libggml-opencl.so, libggml-vulkan.so
+          #ifdef __ANDROID__
+          // Load CPU backend directly - Android's linker will find it in the same directory
+          void* cpu_handle = dlopen("libggml-cpu.so", RTLD_LAZY | RTLD_LOCAL);
+          if (cpu_handle) {
+            typedef ggml_backend_reg_t (*backend_init_fn_t)();
+            backend_init_fn_t backend_init = (backend_init_fn_t)dlsym(cpu_handle, "ggml_backend_init");
+            if (backend_init) {
+              ggml_backend_reg_t cpu_backend = backend_init();
+              if (cpu_backend) {
+                ggml_backend_register(cpu_backend);
+              }
+            }
+          }
+          // Load GPU backends (OpenCL, Vulkan) if present - they will be found by name
+          ggml_backend_load_all();
+          #else
+          ggml_backend_load_all();
+          #endif
+          // Verify at least CPU backend was loaded
+          if (ggml_backend_reg_count() == 0) {
+            throw std::runtime_error("No backends registered - CPU backend library not found");
+          }
           // Initialize llama backend
           llama_backend_init();
@@ -374,8 +453,6 @@ jsi::Value PureCppImpl::initLlama(jsi::Runtime &runtime, jsi::Object options) {
           } catch (const std::exception& e) {
             // If we were trying to use GPU and got an error, retry with CPU-only
             if (params.n_gpu_layers > 0) {
-              fprintf(stderr, "GPU initialization failed (%s), retrying with CPU-only\n", e.what());
               params.n_gpu_layers = 0;
               try {
@@ -384,8 +461,6 @@ jsi::Value PureCppImpl::initLlama(jsi::Runtime &runtime, jsi::Object options) {
                 if (!result.model || !result.context) {
                   throw std::runtime_error("Failed to initialize model and context even with CPU-only mode");
                 }
-                fprintf(stderr, "Successfully recovered with CPU-only mode after GPU failure\n");
               } catch (const std::exception& cpu_e) {
                 throw std::runtime_error(std::string("Model initialization failed: ") + cpu_e.what());
               }
@@ -477,7 +552,6 @@ jsi::Value PureCppImpl::initLlama(jsi::Runtime &runtime, jsi::Object options) {
         } catch (const std::exception& e) {
           // Schedule error callback on JS thread
           std::string errorMsg(e.what());
-          fprintf(stderr, "initLlama error: %s\n", errorMsg.c_str());
           invoker->invokeAsync([reject, errorMsg, runtimePtr]() {
             try {
               reject->call(*runtimePtr, jsi::String::createFromUtf8(*runtimePtr, errorMsg));

package/cpp/build-info.cpp CHANGED Viewed

@@ -1,4 +1,4 @@
-int LLAMA_BUILD_NUMBER = 7134;
-char const *LLAMA_COMMIT = "96ac5a232";
+int LLAMA_BUILD_NUMBER = 7140;
+char const *LLAMA_COMMIT = "4902eebe3";
 char const *LLAMA_COMPILER = "unknown";
 char const *LLAMA_BUILD_TARGET = "unknown";

package/cpp/llama.cpp/convert_hf_to_gguf.py CHANGED Viewed

@@ -4183,6 +4183,21 @@ class Qwen3MoeModel(Qwen2MoeModel):
         super().set_vocab()
+@ModelBase.register("RND1")
+class RND1Model(Qwen2MoeModel):
+    model_arch = gguf.MODEL_ARCH.RND1
+    def set_gguf_parameters(self):
+        super().set_gguf_parameters()
+        # RND1 specific parameters
+        # RND1 uses bidirectional attention
+        self.gguf_writer.add_causal_attention(False)
+        if (mask_token_id := self.hparams.get("mask_token_id")) is not None:
+            self.gguf_writer.add_mask_token_id(mask_token_id)
 @ModelBase.register("Qwen3VLForConditionalGeneration", "Qwen3VLMoeForConditionalGeneration")
 class Qwen3VLVisionModel(MmprojModel):
     def __init__(self, *args, **kwargs):

package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp CHANGED Viewed

@@ -2303,9 +2303,9 @@ static enum ggml_status ggml_backend_cann_graph_compute(ggml_backend_t backend,
     // calculate rope cache for fist layer in current device.
     cann_ctx->rope_cache.cached = false;
+    bool cann_graph_update_required = false;
 #ifdef USE_ACL_GRAPH
     bool use_cann_graph             = true;
-    bool cann_graph_update_required = false;
     static bool prefill_use_graph = parse_bool(get_env("GGML_CANN_PREFILL_USE_GRAPH").value_or(""));
     if (!prefill_use_graph) {
@@ -2336,7 +2336,6 @@ static enum ggml_status ggml_backend_cann_graph_compute(ggml_backend_t backend,
     }
 #else
     bool use_cann_graph             = false;
-    bool cann_graph_update_required = false;
 #endif  // USE_ACL_GRAPH
     evaluate_and_capture_cann_graph(cann_ctx, cgraph, use_cann_graph, cann_graph_update_required);

package/cpp/llama.cpp/ggml/src/ggml-hexagon/CMakeLists.txt CHANGED Viewed

@@ -43,6 +43,14 @@ set(HTP_CMAKE_ARGS
     -DHEXAGON_TOOLS_ROOT=$ENV{HEXAGON_TOOLS_ROOT}
     -DHEXAGON_HTP_DEBUG=${GGML_HEXAGON_HTP_DEBUG})
+ExternalProject_Add(htp-v68
+    SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/htp BUILD_ALWAYS ON
+    CMAKE_ARGS ${HTP_CMAKE_ARGS} -DDSP_VERSION=v68 -DPREBUILT_LIB_DIR="toolv19_v68")
+ExternalProject_Add(htp-v69
+    SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/htp BUILD_ALWAYS ON
+    CMAKE_ARGS ${HTP_CMAKE_ARGS} -DDSP_VERSION=v69 -DPREBUILT_LIB_DIR="toolv19_v69")
 ExternalProject_Add(htp-v73
     SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/htp BUILD_ALWAYS ON
     CMAKE_ARGS ${HTP_CMAKE_ARGS} -DDSP_VERSION=v73 -DPREBUILT_LIB_DIR="toolv19_v73")
@@ -61,6 +69,8 @@ ExternalProject_Add(htp-v81
 # Install Hexagon skels required at runtime
 install(FILES
+    ${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-v68.so
+    ${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-v69.so
     ${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-v73.so
     ${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-v75.so
     ${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-v79.so