@novastera-oss/llamarn 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/android/CMakeLists.txt +47 -21
  2. package/android/src/main/AndroidManifest.xml +17 -0
  3. package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
  4. package/android/src/main/jniLibs/armeabi-v7a/libllama.so +0 -0
  5. package/android/src/main/jniLibs/x86/libllama.so +0 -0
  6. package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
  7. package/cpp/PureCppImpl.cpp +80 -6
  8. package/cpp/build-info.cpp +2 -2
  9. package/cpp/llama.cpp/convert_hf_to_gguf.py +15 -0
  10. package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +1 -2
  11. package/cpp/llama.cpp/ggml/src/ggml-hexagon/CMakeLists.txt +10 -0
  12. package/cpp/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp +99 -364
  13. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/htp-dma.h +7 -0
  14. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-exp.c +14 -13
  15. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-inverse.c +15 -3
  16. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-utils.h +36 -25
  17. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/main.c +12 -3
  18. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/rope-ops.c +80 -7
  19. package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp-utils.c +6 -0
  20. package/cpp/llama.cpp/gguf-py/gguf/constants.py +19 -0
  21. package/cpp/llama.cpp/src/CMakeLists.txt +1 -0
  22. package/cpp/llama.cpp/src/llama-arch.cpp +22 -0
  23. package/cpp/llama.cpp/src/llama-arch.h +1 -0
  24. package/cpp/llama.cpp/src/llama-model.cpp +21 -1
  25. package/cpp/llama.cpp/src/models/models.h +4 -0
  26. package/cpp/llama.cpp/src/models/rnd1.cpp +126 -0
  27. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  28. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +6403 -6395
  29. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
  30. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  31. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +6366 -6358
  32. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4815 -4809
  33. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
  34. package/package.json +1 -1
  35. package/android/src/main/AndroidManifestNew.xml +0 -2
@@ -42,10 +42,17 @@ set_target_properties(ggml PROPERTIES
42
42
  IMPORTED_LOCATION ${JNI_LIBS_DIR}/${ANDROID_ABI}/libggml.so
43
43
  IMPORTED_NO_SONAME TRUE)
44
44
 
45
- add_library(ggml-cpu SHARED IMPORTED)
46
- set_target_properties(ggml-cpu PROPERTIES
47
- IMPORTED_LOCATION ${JNI_LIBS_DIR}/${ANDROID_ABI}/libggml-cpu.so
48
- IMPORTED_NO_SONAME TRUE)
45
+ # ggml-cpu is optional - CPU backend is statically linked into libggml.so with GGML_USE_CPU=1
46
+ # Only import it if it exists (for backward compatibility)
47
+ if(EXISTS ${JNI_LIBS_DIR}/${ANDROID_ABI}/libggml-cpu.so)
48
+ add_library(ggml-cpu SHARED IMPORTED)
49
+ set_target_properties(ggml-cpu PROPERTIES
50
+ IMPORTED_LOCATION ${JNI_LIBS_DIR}/${ANDROID_ABI}/libggml-cpu.so
51
+ IMPORTED_NO_SONAME TRUE)
52
+ message(STATUS "libggml-cpu.so found (optional - CPU is statically linked)")
53
+ else()
54
+ message(STATUS "libggml-cpu.so not found (expected - CPU is statically linked into libggml.so)")
55
+ endif()
49
56
 
50
57
  # Create a minimal common library with only essential files that don't require missing GGML symbols
51
58
  add_library(
@@ -91,36 +98,49 @@ else()
91
98
  target_compile_options(RNLlamaCpp PRIVATE -Wno-unused-function)
92
99
  endif()
93
100
 
94
- # Check if Vulkan backend library is available
101
+ # Check if GPU backend libraries are present in jniLibs (compile-time check only)
102
+ # NOTE: This is NOT a runtime availability check - it only determines if we should
103
+ # compile OpenCL/Vulkan support into the code. Runtime availability is checked
104
+ # when ggml_backend_load_all() tries to load the backend libraries.
105
+ # Even if libggml-opencl.so exists, it will only work if the device has
106
+ # libOpenCL.so (system library) available at runtime.
107
+
108
+ # Check if Vulkan backend library is present (for compile-time feature enablement)
95
109
  set(VULKAN_BACKEND_AVAILABLE FALSE)
96
110
  if(EXISTS ${JNI_LIBS_DIR}/${ANDROID_ABI}/libggml-vulkan.so)
97
111
  set(VULKAN_BACKEND_AVAILABLE TRUE)
98
- message(STATUS "Vulkan backend library found for ${ANDROID_ABI}")
112
+ message(STATUS "Vulkan backend library found in jniLibs for ${ANDROID_ABI} (compile-time)")
113
+ message(STATUS " Note: Runtime availability depends on device Vulkan support")
99
114
  else()
100
- message(STATUS "Vulkan backend library not found for ${ANDROID_ABI}")
115
+ message(STATUS "Vulkan backend library not found in jniLibs for ${ANDROID_ABI}")
101
116
  endif()
102
117
 
103
- # Check if OpenCL backend library is available
118
+ # Check if OpenCL backend library is present (for compile-time feature enablement)
104
119
  set(OPENCL_BACKEND_AVAILABLE FALSE)
105
- # Check only for libggml-opencl.so - libOpenCL.so is a system library (not shipped)
106
120
  if(EXISTS ${JNI_LIBS_DIR}/${ANDROID_ABI}/libggml-opencl.so)
107
121
  set(OPENCL_BACKEND_AVAILABLE TRUE)
108
- message(STATUS "OpenCL backend library found for ${ANDROID_ABI}")
109
- message(STATUS "Note: libOpenCL.so will be provided by the system at runtime")
122
+ message(STATUS "OpenCL backend library found in jniLibs for ${ANDROID_ABI} (compile-time)")
123
+ message(STATUS " Note: Runtime availability requires system libOpenCL.so (not checked here)")
124
+ message(STATUS " Note: Backend will gracefully fail to load if device lacks OpenCL support")
110
125
  else()
111
- message(STATUS "OpenCL backend library not found for ${ANDROID_ABI}")
126
+ message(STATUS "OpenCL backend library not found in jniLibs for ${ANDROID_ABI}")
112
127
  endif()
113
128
 
114
- # Hybrid backend approach: CPU static (built into main libraries), GPU dynamic
115
- # CPU backend will be statically linked into main libraries (libggml.so, libllama.so)
116
- # GPU backends (OpenCL, Vulkan) will be dynamically loaded at runtime only if available
129
+ # Dynamic backend approach: ALL backends (CPU + GPU) are dynamically loaded
130
+ # With GGML_BACKEND_DL=ON, CPU backend is built as libggml-cpu.so (separate from libggml.so)
131
+ # GPU backends (OpenCL, Vulkan) are also built as separate .so files
132
+ # All backends are loaded dynamically via ggml_backend_load_all() at runtime
117
133
  target_compile_definitions(common PRIVATE
118
- -DGGML_BACKEND_DL=1 # Enable dynamic loading for GPU backends
119
- -DGGML_CPU=1 # CPU backend statically built into main libraries
134
+ -DGGML_BACKEND_DL=1 # Enable dynamic loading for ALL backends (CPU + GPU)
135
+ -DGGML_CPU=1 # CPU backend enabled (built as libggml-cpu.so when GGML_BACKEND_DL=ON)
136
+ # NOTE: Do NOT define GGML_USE_CPU=1 when GGML_BACKEND_DL=ON
137
+ # CPU backend is loaded dynamically via ggml_backend_load_all()
120
138
  )
121
139
  target_compile_definitions(RNLlamaCpp PRIVATE
122
- -DGGML_BACKEND_DL=1 # Enable dynamic loading for GPU backends
123
- -DGGML_CPU=1 # CPU backend statically built into main libraries
140
+ -DGGML_BACKEND_DL=1 # Enable dynamic loading for ALL backends (CPU + GPU)
141
+ -DGGML_CPU=1 # CPU backend enabled (built as libggml-cpu.so when GGML_BACKEND_DL=ON)
142
+ # NOTE: Do NOT define GGML_USE_CPU=1 when GGML_BACKEND_DL=ON
143
+ # CPU backend is loaded dynamically via ggml_backend_load_all()
124
144
  -DANDROID_SUPPORT_FLEXIBLE_PAGE_SIZES=ON # Support Android 15+ 16KB page sizes
125
145
  )
126
146
 
@@ -179,7 +199,7 @@ target_link_libraries(
179
199
  llama # Link against the imported prebuilt core llama library
180
200
  ggml-base # Link against the imported GGML base library
181
201
  ggml # Link against the imported GGML library
182
- ggml-cpu # Link against the imported GGML CPU library
202
+ # NOTE: ggml-cpu is NOT linked - CPU backend is dynamically loaded from libggml-cpu.so
183
203
  jsi
184
204
  reactnative
185
205
  fbjni
@@ -212,10 +232,16 @@ add_custom_command(TARGET RNLlamaCpp POST_BUILD
212
232
  COMMAND ${CMAKE_COMMAND} -E copy_if_different
213
233
  ${JNI_LIBS_DIR}/${ANDROID_ABI}/libggml.so
214
234
  $<TARGET_FILE_DIR:RNLlamaCpp>/libggml.so
235
+ COMMENT "Copying dependency libraries to build output directory"
236
+ )
237
+
238
+ # libggml-cpu.so is REQUIRED when GGML_BACKEND_DL=ON (CPU backend is dynamically loaded)
239
+ # Copy it so it gets packaged into the APK
240
+ add_custom_command(TARGET RNLlamaCpp POST_BUILD
215
241
  COMMAND ${CMAKE_COMMAND} -E copy_if_different
216
242
  ${JNI_LIBS_DIR}/${ANDROID_ABI}/libggml-cpu.so
217
243
  $<TARGET_FILE_DIR:RNLlamaCpp>/libggml-cpu.so
218
- COMMENT "Copying dependency libraries to build output directory"
244
+ COMMENT "Copying libggml-cpu.so (REQUIRED for CPU backend when GGML_BACKEND_DL=ON)"
219
245
  )
220
246
 
221
247
  # Also copy any optional GPU libraries if they exist
@@ -1,3 +1,20 @@
1
1
  <manifest xmlns:android="http://schemas.android.com/apk/res/android"
2
2
  package="com.novastera.llamarn">
3
+
4
+ <!-- GPU backends we ship -->
5
+ <uses-native-library
6
+ android:name="libggml-opencl.so"
7
+ android:required="false" />
8
+ <uses-native-library
9
+ android:name="libggml-vulkan.so"
10
+ android:required="false" />
11
+
12
+ <!-- System loaders (only mapped if present on device) -->
13
+ <uses-native-library
14
+ android:name="libOpenCL.so"
15
+ android:required="false" />
16
+ <uses-native-library
17
+ android:name="libvulkan.so"
18
+ android:required="false" />
19
+
3
20
  </manifest>
@@ -8,6 +8,9 @@
8
8
  #include <unordered_map>
9
9
  #include <utility>
10
10
  #include <thread>
11
+ #include <cstdio>
12
+ #include <cstring>
13
+ #include <cerrno>
11
14
  #include "SystemUtils.h"
12
15
  // Include our custom headers - this was missing!
13
16
  #include "rn-llama.h"
@@ -17,6 +20,18 @@
17
20
 
18
21
  #if defined(__ANDROID__) || defined(__linux__)
19
22
  #include <unistd.h>
23
+ #include <dlfcn.h>
24
+ #include <android/log.h>
25
+ #define LOG_TAG "RNLlamaCpp"
26
+ #define LOGI(...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, __VA_ARGS__)
27
+ #define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__)
28
+ #define LOGW(...) __android_log_print(ANDROID_LOG_WARN, LOG_TAG, __VA_ARGS__)
29
+ #define LOGD(...) __android_log_print(ANDROID_LOG_DEBUG, LOG_TAG, __VA_ARGS__)
30
+ #else
31
+ #define LOGI(...) fprintf(stderr, __VA_ARGS__)
32
+ #define LOGE(...) fprintf(stderr, __VA_ARGS__)
33
+ #define LOGW(...) fprintf(stderr, __VA_ARGS__)
34
+ #define LOGD(...) fprintf(stderr, __VA_ARGS__)
20
35
  #endif
21
36
 
22
37
  // Include the llama.cpp headers directly
@@ -72,6 +87,42 @@ jsi::Value PureCppImpl::loadLlamaModelInfo(jsi::Runtime &runtime, jsi::String mo
72
87
  // Launch background thread for model info loading
73
88
  std::thread([selfPtr, path, resolve, reject, runtimePtr, invoker]() {
74
89
  try {
90
+ // Set up logging callback to capture llama.cpp error messages
91
+ llama_log_set([](enum ggml_log_level level, const char * text, void * /* user_data */) {
92
+ if (level >= GGML_LOG_LEVEL_ERROR) {
93
+ LOGE("llama.cpp: %s", text);
94
+ }
95
+ }, nullptr);
96
+
97
+ // Load all available backends (CPU is dynamically loaded when GGML_BACKEND_DL is enabled)
98
+ // With GGML_BACKEND_DL=ON, ALL backends (CPU + GPU) are dynamically loaded
99
+ // CPU backend is in libggml-cpu.so, GPU backends are in libggml-opencl.so, libggml-vulkan.so
100
+ // On Android, dlopen() can load libraries by name even from inside APKs
101
+ #ifdef __ANDROID__
102
+ // Load CPU backend directly - Android's linker will find it in the same directory
103
+ void* cpu_handle = dlopen("libggml-cpu.so", RTLD_LAZY | RTLD_LOCAL);
104
+ if (cpu_handle) {
105
+ typedef ggml_backend_reg_t (*backend_init_fn_t)();
106
+ backend_init_fn_t backend_init = (backend_init_fn_t)dlsym(cpu_handle, "ggml_backend_init");
107
+ if (backend_init) {
108
+ ggml_backend_reg_t cpu_backend = backend_init();
109
+ if (cpu_backend) {
110
+ ggml_backend_register(cpu_backend);
111
+ }
112
+ }
113
+ }
114
+
115
+ // Load GPU backends (OpenCL, Vulkan) if present - they will be found by name
116
+ ggml_backend_load_all();
117
+ #else
118
+ ggml_backend_load_all();
119
+ #endif
120
+
121
+ // Verify at least CPU backend was loaded
122
+ if (ggml_backend_reg_count() == 0) {
123
+ throw std::runtime_error("No backends registered - CPU backend library not found");
124
+ }
125
+
75
126
  // Initialize llama backend
76
127
  llama_backend_init();
77
128
 
@@ -313,7 +364,35 @@ jsi::Value PureCppImpl::initLlama(jsi::Runtime &runtime, jsi::Object options) {
313
364
  try {
314
365
  // Thread-safe access to member variables
315
366
  std::lock_guard<std::mutex> lock(selfPtr->mutex_);
316
-
367
+
368
+ // Load all available backends (CPU is dynamically loaded when GGML_BACKEND_DL is enabled)
369
+ // With GGML_BACKEND_DL=ON, ALL backends (CPU + GPU) are dynamically loaded
370
+ // CPU backend is in libggml-cpu.so, GPU backends are in libggml-opencl.so, libggml-vulkan.so
371
+ #ifdef __ANDROID__
372
+ // Load CPU backend directly - Android's linker will find it in the same directory
373
+ void* cpu_handle = dlopen("libggml-cpu.so", RTLD_LAZY | RTLD_LOCAL);
374
+ if (cpu_handle) {
375
+ typedef ggml_backend_reg_t (*backend_init_fn_t)();
376
+ backend_init_fn_t backend_init = (backend_init_fn_t)dlsym(cpu_handle, "ggml_backend_init");
377
+ if (backend_init) {
378
+ ggml_backend_reg_t cpu_backend = backend_init();
379
+ if (cpu_backend) {
380
+ ggml_backend_register(cpu_backend);
381
+ }
382
+ }
383
+ }
384
+
385
+ // Load GPU backends (OpenCL, Vulkan) if present - they will be found by name
386
+ ggml_backend_load_all();
387
+ #else
388
+ ggml_backend_load_all();
389
+ #endif
390
+
391
+ // Verify at least CPU backend was loaded
392
+ if (ggml_backend_reg_count() == 0) {
393
+ throw std::runtime_error("No backends registered - CPU backend library not found");
394
+ }
395
+
317
396
  // Initialize llama backend
318
397
  llama_backend_init();
319
398
 
@@ -374,8 +453,6 @@ jsi::Value PureCppImpl::initLlama(jsi::Runtime &runtime, jsi::Object options) {
374
453
  } catch (const std::exception& e) {
375
454
  // If we were trying to use GPU and got an error, retry with CPU-only
376
455
  if (params.n_gpu_layers > 0) {
377
- fprintf(stderr, "GPU initialization failed (%s), retrying with CPU-only\n", e.what());
378
-
379
456
  params.n_gpu_layers = 0;
380
457
 
381
458
  try {
@@ -384,8 +461,6 @@ jsi::Value PureCppImpl::initLlama(jsi::Runtime &runtime, jsi::Object options) {
384
461
  if (!result.model || !result.context) {
385
462
  throw std::runtime_error("Failed to initialize model and context even with CPU-only mode");
386
463
  }
387
-
388
- fprintf(stderr, "Successfully recovered with CPU-only mode after GPU failure\n");
389
464
  } catch (const std::exception& cpu_e) {
390
465
  throw std::runtime_error(std::string("Model initialization failed: ") + cpu_e.what());
391
466
  }
@@ -477,7 +552,6 @@ jsi::Value PureCppImpl::initLlama(jsi::Runtime &runtime, jsi::Object options) {
477
552
  } catch (const std::exception& e) {
478
553
  // Schedule error callback on JS thread
479
554
  std::string errorMsg(e.what());
480
- fprintf(stderr, "initLlama error: %s\n", errorMsg.c_str());
481
555
  invoker->invokeAsync([reject, errorMsg, runtimePtr]() {
482
556
  try {
483
557
  reject->call(*runtimePtr, jsi::String::createFromUtf8(*runtimePtr, errorMsg));
@@ -1,4 +1,4 @@
1
- int LLAMA_BUILD_NUMBER = 7134;
2
- char const *LLAMA_COMMIT = "96ac5a232";
1
+ int LLAMA_BUILD_NUMBER = 7140;
2
+ char const *LLAMA_COMMIT = "4902eebe3";
3
3
  char const *LLAMA_COMPILER = "unknown";
4
4
  char const *LLAMA_BUILD_TARGET = "unknown";
@@ -4183,6 +4183,21 @@ class Qwen3MoeModel(Qwen2MoeModel):
4183
4183
  super().set_vocab()
4184
4184
 
4185
4185
 
4186
+ @ModelBase.register("RND1")
4187
+ class RND1Model(Qwen2MoeModel):
4188
+ model_arch = gguf.MODEL_ARCH.RND1
4189
+
4190
+ def set_gguf_parameters(self):
4191
+ super().set_gguf_parameters()
4192
+
4193
+ # RND1 specific parameters
4194
+ # RND1 uses bidirectional attention
4195
+ self.gguf_writer.add_causal_attention(False)
4196
+
4197
+ if (mask_token_id := self.hparams.get("mask_token_id")) is not None:
4198
+ self.gguf_writer.add_mask_token_id(mask_token_id)
4199
+
4200
+
4186
4201
  @ModelBase.register("Qwen3VLForConditionalGeneration", "Qwen3VLMoeForConditionalGeneration")
4187
4202
  class Qwen3VLVisionModel(MmprojModel):
4188
4203
  def __init__(self, *args, **kwargs):
@@ -2303,9 +2303,9 @@ static enum ggml_status ggml_backend_cann_graph_compute(ggml_backend_t backend,
2303
2303
  // calculate rope cache for fist layer in current device.
2304
2304
  cann_ctx->rope_cache.cached = false;
2305
2305
 
2306
+ bool cann_graph_update_required = false;
2306
2307
  #ifdef USE_ACL_GRAPH
2307
2308
  bool use_cann_graph = true;
2308
- bool cann_graph_update_required = false;
2309
2309
 
2310
2310
  static bool prefill_use_graph = parse_bool(get_env("GGML_CANN_PREFILL_USE_GRAPH").value_or(""));
2311
2311
  if (!prefill_use_graph) {
@@ -2336,7 +2336,6 @@ static enum ggml_status ggml_backend_cann_graph_compute(ggml_backend_t backend,
2336
2336
  }
2337
2337
  #else
2338
2338
  bool use_cann_graph = false;
2339
- bool cann_graph_update_required = false;
2340
2339
  #endif // USE_ACL_GRAPH
2341
2340
  evaluate_and_capture_cann_graph(cann_ctx, cgraph, use_cann_graph, cann_graph_update_required);
2342
2341
 
@@ -43,6 +43,14 @@ set(HTP_CMAKE_ARGS
43
43
  -DHEXAGON_TOOLS_ROOT=$ENV{HEXAGON_TOOLS_ROOT}
44
44
  -DHEXAGON_HTP_DEBUG=${GGML_HEXAGON_HTP_DEBUG})
45
45
 
46
+ ExternalProject_Add(htp-v68
47
+ SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/htp BUILD_ALWAYS ON
48
+ CMAKE_ARGS ${HTP_CMAKE_ARGS} -DDSP_VERSION=v68 -DPREBUILT_LIB_DIR="toolv19_v68")
49
+
50
+ ExternalProject_Add(htp-v69
51
+ SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/htp BUILD_ALWAYS ON
52
+ CMAKE_ARGS ${HTP_CMAKE_ARGS} -DDSP_VERSION=v69 -DPREBUILT_LIB_DIR="toolv19_v69")
53
+
46
54
  ExternalProject_Add(htp-v73
47
55
  SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/htp BUILD_ALWAYS ON
48
56
  CMAKE_ARGS ${HTP_CMAKE_ARGS} -DDSP_VERSION=v73 -DPREBUILT_LIB_DIR="toolv19_v73")
@@ -61,6 +69,8 @@ ExternalProject_Add(htp-v81
61
69
 
62
70
  # Install Hexagon skels required at runtime
63
71
  install(FILES
72
+ ${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-v68.so
73
+ ${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-v69.so
64
74
  ${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-v73.so
65
75
  ${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-v75.so
66
76
  ${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-v79.so