@novastera-oss/llamarn 0.5.3 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -7
- package/android/CMakeLists.txt +21 -0
- package/android/gradle.properties +1 -1
- package/android/src/main/AndroidManifest.xml +29 -1
- package/android/src/main/jniLibs/arm64-v8a/libggml-opencl.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-vulkan.so +0 -0
- package/android/src/main/jniLibs/x86/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86/libggml.so +0 -0
- package/android/src/main/jniLibs/x86/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-opencl.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-vulkan.so +0 -0
- package/cpp/PureCppImpl.cpp +48 -26
- package/package.json +1 -1
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
package/README.md
CHANGED
|
@@ -13,7 +13,8 @@
|
|
|
13
13
|
|
|
14
14
|
* Basic model loading and inference
|
|
15
15
|
* Metal support on iOS
|
|
16
|
-
* OpenCL/Vulkan
|
|
16
|
+
* OpenCL/Vulkan GPU acceleration on Android
|
|
17
|
+
* Snapdragon Hexagon NPU support on Android (arm64-v8a)
|
|
17
18
|
* Automatic CPU/GPU detection
|
|
18
19
|
* Chat completion with templates (including Jinja template support)
|
|
19
20
|
* Embeddings generation
|
|
@@ -26,15 +27,17 @@
|
|
|
26
27
|
|
|
27
28
|
We welcome contributions, especially in these areas:
|
|
28
29
|
|
|
29
|
-
1. **Android GPU
|
|
30
|
-
*
|
|
30
|
+
1. **Android GPU and NPU Testing**:
|
|
31
|
+
* **OpenCL/Vulkan GPU Libraries**: GPU acceleration libraries (OpenCL and Vulkan) have been built and integrated, but we need help testing them on various Android devices to ensure proper functionality and performance.
|
|
32
|
+
* **Snapdragon Hexagon NPU Support**: Hexagon NPU support has been added for Snapdragon devices (arm64-v8a), but we need community testing on actual Snapdragon devices to verify it works correctly.
|
|
33
|
+
* Development of reliable GPU/NPU detection mechanism in React Native
|
|
31
34
|
* Implementation of proper backend initialization verification
|
|
32
|
-
* Creation of robust testing framework for GPU availability
|
|
33
|
-
*
|
|
34
|
-
*
|
|
35
|
+
* Creation of robust testing framework for GPU/NPU availability
|
|
36
|
+
* Performance benchmarking and optimization for mobile GPUs and NPUs
|
|
37
|
+
* Real-world device testing across different manufacturers and chipset generations
|
|
35
38
|
|
|
36
39
|
2. **CI Improvements**:
|
|
37
|
-
* Adding automated Android GPU tests to CI pipeline
|
|
40
|
+
* Adding automated Android GPU/NPU tests to CI pipeline
|
|
38
41
|
* Implementing device-specific testing strategies
|
|
39
42
|
* Adding performance benchmarks to CI
|
|
40
43
|
|
package/android/CMakeLists.txt
CHANGED
|
@@ -269,6 +269,27 @@ if(EXISTS ${JNI_LIBS_DIR}/${ANDROID_ABI}/libggml-opencl.so)
|
|
|
269
269
|
)
|
|
270
270
|
endif()
|
|
271
271
|
|
|
272
|
+
# Copy Hexagon backend library if it exists (Snapdragon devices only - arm64-v8a)
|
|
273
|
+
if(EXISTS ${JNI_LIBS_DIR}/${ANDROID_ABI}/libggml-hexagon.so)
|
|
274
|
+
add_custom_command(TARGET RNLlamaCpp POST_BUILD
|
|
275
|
+
COMMAND ${CMAKE_COMMAND} -E copy_if_different
|
|
276
|
+
${JNI_LIBS_DIR}/${ANDROID_ABI}/libggml-hexagon.so
|
|
277
|
+
$<TARGET_FILE_DIR:RNLlamaCpp>/libggml-hexagon.so
|
|
278
|
+
COMMENT "Copying Hexagon backend library to build output directory"
|
|
279
|
+
)
|
|
280
|
+
# Also copy all HTP libraries (required for Hexagon backend)
|
|
281
|
+
file(GLOB HTP_LIBS "${JNI_LIBS_DIR}/${ANDROID_ABI}/libggml-htp-*.so")
|
|
282
|
+
foreach(HTP_LIB ${HTP_LIBS})
|
|
283
|
+
get_filename_component(HTP_LIB_NAME ${HTP_LIB} NAME)
|
|
284
|
+
add_custom_command(TARGET RNLlamaCpp POST_BUILD
|
|
285
|
+
COMMAND ${CMAKE_COMMAND} -E copy_if_different
|
|
286
|
+
${HTP_LIB}
|
|
287
|
+
$<TARGET_FILE_DIR:RNLlamaCpp>/${HTP_LIB_NAME}
|
|
288
|
+
COMMENT "Copying Hexagon HTP library ${HTP_LIB_NAME} to build output directory"
|
|
289
|
+
)
|
|
290
|
+
endforeach()
|
|
291
|
+
endif()
|
|
292
|
+
|
|
272
293
|
# NOTE: We do NOT copy libOpenCL.so or libvulkan.so - these are system libraries
|
|
273
294
|
# The ICD loader is built and installed to NDK sysroot for BUILD-TIME linking only.
|
|
274
295
|
# At runtime, the system will provide libOpenCL.so and libvulkan.so if the device supports them.
|
|
@@ -8,8 +8,36 @@
|
|
|
8
8
|
<uses-native-library
|
|
9
9
|
android:name="libggml-vulkan.so"
|
|
10
10
|
android:required="false" />
|
|
11
|
+
<uses-native-library
|
|
12
|
+
android:name="libggml-hexagon.so"
|
|
13
|
+
android:required="false" />
|
|
11
14
|
|
|
12
|
-
<!--
|
|
15
|
+
<!-- Hexagon HTP (Hexagon Tensor Processor) libraries for Snapdragon NPU -->
|
|
16
|
+
<!-- Unlike OpenCL/Vulkan which have system libraries (libOpenCL.so, libvulkan.so), -->
|
|
17
|
+
<!-- Hexagon has no system library - all HTP libraries must be bundled with the app. -->
|
|
18
|
+
<!-- At runtime, libggml-hexagon.so queries the device hardware via FastRPC to detect -->
|
|
19
|
+
<!-- which Hexagon version the device has, then loads the corresponding libggml-htp-vXX.so. -->
|
|
20
|
+
<!-- Since we don't know the device at build time, all versions are shipped. -->
|
|
21
|
+
<!-- v73, v75, v79, v81 are commonly used on modern Snapdragon devices. -->
|
|
22
|
+
<uses-native-library
|
|
23
|
+
android:name="libggml-htp-v73.so"
|
|
24
|
+
android:required="false" />
|
|
25
|
+
<uses-native-library
|
|
26
|
+
android:name="libggml-htp-v75.so"
|
|
27
|
+
android:required="false" />
|
|
28
|
+
<uses-native-library
|
|
29
|
+
android:name="libggml-htp-v79.so"
|
|
30
|
+
android:required="false" />
|
|
31
|
+
<uses-native-library
|
|
32
|
+
android:name="libggml-htp-v81.so"
|
|
33
|
+
android:required="false" />
|
|
34
|
+
|
|
35
|
+
<!-- System libraries (provided by device/Snapdragon, not shipped with app) -->
|
|
36
|
+
<!-- libcdsprpc.so: FastRPC library for communicating with CDSP (Compute DSP) domain on Snapdragon -->
|
|
37
|
+
<!-- Required for Hexagon backend to communicate with the NPU -->
|
|
38
|
+
<uses-native-library
|
|
39
|
+
android:name="libcdsprpc.so"
|
|
40
|
+
android:required="false" />
|
|
13
41
|
<uses-native-library
|
|
14
42
|
android:name="libOpenCL.so"
|
|
15
43
|
android:required="false" />
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/cpp/PureCppImpl.cpp
CHANGED
|
@@ -19,19 +19,20 @@
|
|
|
19
19
|
#include "chat.h"
|
|
20
20
|
|
|
21
21
|
#if defined(__ANDROID__) || defined(__linux__)
|
|
22
|
-
#include <unistd.h>
|
|
23
22
|
#include <dlfcn.h>
|
|
24
|
-
#include <android/log.h>
|
|
25
|
-
#
|
|
26
|
-
#define
|
|
27
|
-
#
|
|
28
|
-
#define
|
|
29
|
-
#define
|
|
23
|
+
// #include <android/log.h>
|
|
24
|
+
// #ifndef LOG_TAG
|
|
25
|
+
// #define LOG_TAG "RNLlamaCpp"
|
|
26
|
+
// #endif
|
|
27
|
+
// #define LOGI(...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, __VA_ARGS__)
|
|
28
|
+
// #define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__)
|
|
29
|
+
// #define LOGW(...) __android_log_print(ANDROID_LOG_WARN, LOG_TAG, __VA_ARGS__)
|
|
30
|
+
// #define LOGD(...) __android_log_print(ANDROID_LOG_DEBUG, LOG_TAG, __VA_ARGS__)
|
|
30
31
|
#else
|
|
31
|
-
#define LOGI(...) fprintf(stderr, __VA_ARGS__)
|
|
32
|
-
#define LOGE(...) fprintf(stderr, __VA_ARGS__)
|
|
33
|
-
#define LOGW(...) fprintf(stderr, __VA_ARGS__)
|
|
34
|
-
#define LOGD(...) fprintf(stderr, __VA_ARGS__)
|
|
32
|
+
// #define LOGI(...) fprintf(stderr, __VA_ARGS__)
|
|
33
|
+
// #define LOGE(...) fprintf(stderr, __VA_ARGS__)
|
|
34
|
+
// #define LOGW(...) fprintf(stderr, __VA_ARGS__)
|
|
35
|
+
// #define LOGD(...) fprintf(stderr, __VA_ARGS__)
|
|
35
36
|
#endif
|
|
36
37
|
|
|
37
38
|
// Include the llama.cpp headers directly
|
|
@@ -88,31 +89,52 @@ jsi::Value PureCppImpl::loadLlamaModelInfo(jsi::Runtime &runtime, jsi::String mo
|
|
|
88
89
|
std::thread([selfPtr, path, resolve, reject, runtimePtr, invoker]() {
|
|
89
90
|
try {
|
|
90
91
|
// Set up logging callback to capture llama.cpp error messages
|
|
91
|
-
llama_log_set([](enum ggml_log_level level, const char * text, void * /* user_data */) {
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
}, nullptr);
|
|
92
|
+
// llama_log_set([](enum ggml_log_level level, const char * text, void * /* user_data */) {
|
|
93
|
+
// if (level >= GGML_LOG_LEVEL_ERROR) {
|
|
94
|
+
// LOGE("llama.cpp: %s", text);
|
|
95
|
+
// }
|
|
96
|
+
// }, nullptr);
|
|
96
97
|
|
|
97
98
|
// Load all available backends (CPU is dynamically loaded when GGML_BACKEND_DL is enabled)
|
|
98
99
|
// With GGML_BACKEND_DL=ON, ALL backends (CPU + GPU) are dynamically loaded
|
|
99
|
-
// CPU backend is in libggml-cpu.so, GPU backends are in libggml-opencl.so, libggml-vulkan.so
|
|
100
|
+
// CPU backend is in libggml-cpu.so, GPU backends are in libggml-opencl.so, libggml-vulkan.so, libggml-hexagon.so
|
|
100
101
|
// On Android, dlopen() can load libraries by name even from inside APKs
|
|
101
102
|
#ifdef __ANDROID__
|
|
102
103
|
// Load CPU backend directly - Android's linker will find it in the same directory
|
|
103
|
-
|
|
104
|
-
if (
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
if (
|
|
110
|
-
|
|
104
|
+
// Check if already registered to avoid duplicate registration
|
|
105
|
+
if (!ggml_backend_reg_by_name("CPU")) {
|
|
106
|
+
void* cpu_handle = dlopen("libggml-cpu.so", RTLD_LAZY | RTLD_LOCAL);
|
|
107
|
+
if (cpu_handle) {
|
|
108
|
+
typedef ggml_backend_reg_t (*backend_init_fn_t)();
|
|
109
|
+
backend_init_fn_t backend_init = (backend_init_fn_t)dlsym(cpu_handle, "ggml_backend_init");
|
|
110
|
+
if (backend_init) {
|
|
111
|
+
ggml_backend_reg_t cpu_backend = backend_init();
|
|
112
|
+
if (cpu_backend) {
|
|
113
|
+
ggml_backend_register(cpu_backend);
|
|
114
|
+
}
|
|
111
115
|
}
|
|
112
116
|
}
|
|
113
117
|
}
|
|
114
118
|
|
|
115
|
-
// Load
|
|
119
|
+
// Load Hexagon backend (Snapdragon DSP) - more performant than Vulkan on Snapdragon devices
|
|
120
|
+
// Load before other GPU backends to give it priority
|
|
121
|
+
// Check if already registered to avoid duplicate registration
|
|
122
|
+
if (!ggml_backend_reg_by_name("HTP")) {
|
|
123
|
+
void* hexagon_handle = dlopen("libggml-hexagon.so", RTLD_LAZY | RTLD_LOCAL);
|
|
124
|
+
if (hexagon_handle) {
|
|
125
|
+
typedef ggml_backend_reg_t (*backend_init_fn_t)();
|
|
126
|
+
backend_init_fn_t backend_init = (backend_init_fn_t)dlsym(hexagon_handle, "ggml_backend_init");
|
|
127
|
+
if (backend_init) {
|
|
128
|
+
ggml_backend_reg_t hexagon_backend = backend_init();
|
|
129
|
+
if (hexagon_backend) {
|
|
130
|
+
ggml_backend_register(hexagon_backend);
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// Load other GPU backends (OpenCL, Vulkan) if present - they will be found by name
|
|
137
|
+
// ggml_backend_load_all() will skip backends that are already loaded
|
|
116
138
|
ggml_backend_load_all();
|
|
117
139
|
#else
|
|
118
140
|
ggml_backend_load_all();
|
package/package.json
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|