@novastera-oss/llamarn 0.5.1 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/CMakeLists.txt +47 -21
- package/android/src/main/AndroidManifest.xml +17 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/PureCppImpl.cpp +80 -6
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/convert_hf_to_gguf.py +15 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +1 -2
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/CMakeLists.txt +10 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp +99 -364
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/htp-dma.h +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-exp.c +14 -13
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-inverse.c +15 -3
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-utils.h +36 -25
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/main.c +12 -3
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp/rope-ops.c +80 -7
- package/cpp/llama.cpp/ggml/src/ggml-hexagon/htp-utils.c +6 -0
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +19 -0
- package/cpp/llama.cpp/src/CMakeLists.txt +1 -0
- package/cpp/llama.cpp/src/llama-arch.cpp +22 -0
- package/cpp/llama.cpp/src/llama-arch.h +1 -0
- package/cpp/llama.cpp/src/llama-model.cpp +21 -1
- package/cpp/llama.cpp/src/models/models.h +4 -0
- package/cpp/llama.cpp/src/models/rnd1.cpp +126 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +6403 -6395
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +6366 -6358
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4815 -4809
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +1 -1
- package/android/src/main/AndroidManifestNew.xml +0 -2
package/android/CMakeLists.txt
CHANGED
|
@@ -42,10 +42,17 @@ set_target_properties(ggml PROPERTIES
|
|
|
42
42
|
IMPORTED_LOCATION ${JNI_LIBS_DIR}/${ANDROID_ABI}/libggml.so
|
|
43
43
|
IMPORTED_NO_SONAME TRUE)
|
|
44
44
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
45
|
+
# ggml-cpu is optional - CPU backend is statically linked into libggml.so with GGML_USE_CPU=1
|
|
46
|
+
# Only import it if it exists (for backward compatibility)
|
|
47
|
+
if(EXISTS ${JNI_LIBS_DIR}/${ANDROID_ABI}/libggml-cpu.so)
|
|
48
|
+
add_library(ggml-cpu SHARED IMPORTED)
|
|
49
|
+
set_target_properties(ggml-cpu PROPERTIES
|
|
50
|
+
IMPORTED_LOCATION ${JNI_LIBS_DIR}/${ANDROID_ABI}/libggml-cpu.so
|
|
51
|
+
IMPORTED_NO_SONAME TRUE)
|
|
52
|
+
message(STATUS "libggml-cpu.so found (optional - CPU is statically linked)")
|
|
53
|
+
else()
|
|
54
|
+
message(STATUS "libggml-cpu.so not found (expected - CPU is statically linked into libggml.so)")
|
|
55
|
+
endif()
|
|
49
56
|
|
|
50
57
|
# Create a minimal common library with only essential files that don't require missing GGML symbols
|
|
51
58
|
add_library(
|
|
@@ -91,36 +98,49 @@ else()
|
|
|
91
98
|
target_compile_options(RNLlamaCpp PRIVATE -Wno-unused-function)
|
|
92
99
|
endif()
|
|
93
100
|
|
|
94
|
-
# Check if
|
|
101
|
+
# Check if GPU backend libraries are present in jniLibs (compile-time check only)
|
|
102
|
+
# NOTE: This is NOT a runtime availability check - it only determines if we should
|
|
103
|
+
# compile OpenCL/Vulkan support into the code. Runtime availability is checked
|
|
104
|
+
# when ggml_backend_load_all() tries to load the backend libraries.
|
|
105
|
+
# Even if libggml-opencl.so exists, it will only work if the device has
|
|
106
|
+
# libOpenCL.so (system library) available at runtime.
|
|
107
|
+
|
|
108
|
+
# Check if Vulkan backend library is present (for compile-time feature enablement)
|
|
95
109
|
set(VULKAN_BACKEND_AVAILABLE FALSE)
|
|
96
110
|
if(EXISTS ${JNI_LIBS_DIR}/${ANDROID_ABI}/libggml-vulkan.so)
|
|
97
111
|
set(VULKAN_BACKEND_AVAILABLE TRUE)
|
|
98
|
-
message(STATUS "Vulkan backend library found for ${ANDROID_ABI}")
|
|
112
|
+
message(STATUS "Vulkan backend library found in jniLibs for ${ANDROID_ABI} (compile-time)")
|
|
113
|
+
message(STATUS " Note: Runtime availability depends on device Vulkan support")
|
|
99
114
|
else()
|
|
100
|
-
message(STATUS "Vulkan backend library not found for ${ANDROID_ABI}")
|
|
115
|
+
message(STATUS "Vulkan backend library not found in jniLibs for ${ANDROID_ABI}")
|
|
101
116
|
endif()
|
|
102
117
|
|
|
103
|
-
# Check if OpenCL backend library is
|
|
118
|
+
# Check if OpenCL backend library is present (for compile-time feature enablement)
|
|
104
119
|
set(OPENCL_BACKEND_AVAILABLE FALSE)
|
|
105
|
-
# Check only for libggml-opencl.so - libOpenCL.so is a system library (not shipped)
|
|
106
120
|
if(EXISTS ${JNI_LIBS_DIR}/${ANDROID_ABI}/libggml-opencl.so)
|
|
107
121
|
set(OPENCL_BACKEND_AVAILABLE TRUE)
|
|
108
|
-
message(STATUS "OpenCL backend library found for ${ANDROID_ABI}")
|
|
109
|
-
message(STATUS "Note:
|
|
122
|
+
message(STATUS "OpenCL backend library found in jniLibs for ${ANDROID_ABI} (compile-time)")
|
|
123
|
+
message(STATUS " Note: Runtime availability requires system libOpenCL.so (not checked here)")
|
|
124
|
+
message(STATUS " Note: Backend will gracefully fail to load if device lacks OpenCL support")
|
|
110
125
|
else()
|
|
111
|
-
message(STATUS "OpenCL backend library not found for ${ANDROID_ABI}")
|
|
126
|
+
message(STATUS "OpenCL backend library not found in jniLibs for ${ANDROID_ABI}")
|
|
112
127
|
endif()
|
|
113
128
|
|
|
114
|
-
#
|
|
115
|
-
# CPU backend
|
|
116
|
-
# GPU backends (OpenCL, Vulkan)
|
|
129
|
+
# Dynamic backend approach: ALL backends (CPU + GPU) are dynamically loaded
|
|
130
|
+
# With GGML_BACKEND_DL=ON, CPU backend is built as libggml-cpu.so (separate from libggml.so)
|
|
131
|
+
# GPU backends (OpenCL, Vulkan) are also built as separate .so files
|
|
132
|
+
# All backends are loaded dynamically via ggml_backend_load_all() at runtime
|
|
117
133
|
target_compile_definitions(common PRIVATE
|
|
118
|
-
-DGGML_BACKEND_DL=1 # Enable dynamic loading for
|
|
119
|
-
-DGGML_CPU=1 # CPU backend
|
|
134
|
+
-DGGML_BACKEND_DL=1 # Enable dynamic loading for ALL backends (CPU + GPU)
|
|
135
|
+
-DGGML_CPU=1 # CPU backend enabled (built as libggml-cpu.so when GGML_BACKEND_DL=ON)
|
|
136
|
+
# NOTE: Do NOT define GGML_USE_CPU=1 when GGML_BACKEND_DL=ON
|
|
137
|
+
# CPU backend is loaded dynamically via ggml_backend_load_all()
|
|
120
138
|
)
|
|
121
139
|
target_compile_definitions(RNLlamaCpp PRIVATE
|
|
122
|
-
-DGGML_BACKEND_DL=1 # Enable dynamic loading for
|
|
123
|
-
-DGGML_CPU=1 # CPU backend
|
|
140
|
+
-DGGML_BACKEND_DL=1 # Enable dynamic loading for ALL backends (CPU + GPU)
|
|
141
|
+
-DGGML_CPU=1 # CPU backend enabled (built as libggml-cpu.so when GGML_BACKEND_DL=ON)
|
|
142
|
+
# NOTE: Do NOT define GGML_USE_CPU=1 when GGML_BACKEND_DL=ON
|
|
143
|
+
# CPU backend is loaded dynamically via ggml_backend_load_all()
|
|
124
144
|
-DANDROID_SUPPORT_FLEXIBLE_PAGE_SIZES=ON # Support Android 15+ 16KB page sizes
|
|
125
145
|
)
|
|
126
146
|
|
|
@@ -179,7 +199,7 @@ target_link_libraries(
|
|
|
179
199
|
llama # Link against the imported prebuilt core llama library
|
|
180
200
|
ggml-base # Link against the imported GGML base library
|
|
181
201
|
ggml # Link against the imported GGML library
|
|
182
|
-
ggml-cpu
|
|
202
|
+
# NOTE: ggml-cpu is NOT linked - CPU backend is dynamically loaded from libggml-cpu.so
|
|
183
203
|
jsi
|
|
184
204
|
reactnative
|
|
185
205
|
fbjni
|
|
@@ -212,10 +232,16 @@ add_custom_command(TARGET RNLlamaCpp POST_BUILD
|
|
|
212
232
|
COMMAND ${CMAKE_COMMAND} -E copy_if_different
|
|
213
233
|
${JNI_LIBS_DIR}/${ANDROID_ABI}/libggml.so
|
|
214
234
|
$<TARGET_FILE_DIR:RNLlamaCpp>/libggml.so
|
|
235
|
+
COMMENT "Copying dependency libraries to build output directory"
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
# libggml-cpu.so is REQUIRED when GGML_BACKEND_DL=ON (CPU backend is dynamically loaded)
|
|
239
|
+
# Copy it so it gets packaged into the APK
|
|
240
|
+
add_custom_command(TARGET RNLlamaCpp POST_BUILD
|
|
215
241
|
COMMAND ${CMAKE_COMMAND} -E copy_if_different
|
|
216
242
|
${JNI_LIBS_DIR}/${ANDROID_ABI}/libggml-cpu.so
|
|
217
243
|
$<TARGET_FILE_DIR:RNLlamaCpp>/libggml-cpu.so
|
|
218
|
-
COMMENT "Copying
|
|
244
|
+
COMMENT "Copying libggml-cpu.so (REQUIRED for CPU backend when GGML_BACKEND_DL=ON)"
|
|
219
245
|
)
|
|
220
246
|
|
|
221
247
|
# Also copy any optional GPU libraries if they exist
|
|
@@ -1,3 +1,20 @@
|
|
|
1
1
|
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
|
|
2
2
|
package="com.novastera.llamarn">
|
|
3
|
+
|
|
4
|
+
<!-- GPU backends we ship -->
|
|
5
|
+
<uses-native-library
|
|
6
|
+
android:name="libggml-opencl.so"
|
|
7
|
+
android:required="false" />
|
|
8
|
+
<uses-native-library
|
|
9
|
+
android:name="libggml-vulkan.so"
|
|
10
|
+
android:required="false" />
|
|
11
|
+
|
|
12
|
+
<!-- System loaders (only mapped if present on device) -->
|
|
13
|
+
<uses-native-library
|
|
14
|
+
android:name="libOpenCL.so"
|
|
15
|
+
android:required="false" />
|
|
16
|
+
<uses-native-library
|
|
17
|
+
android:name="libvulkan.so"
|
|
18
|
+
android:required="false" />
|
|
19
|
+
|
|
3
20
|
</manifest>
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/cpp/PureCppImpl.cpp
CHANGED
|
@@ -8,6 +8,9 @@
|
|
|
8
8
|
#include <unordered_map>
|
|
9
9
|
#include <utility>
|
|
10
10
|
#include <thread>
|
|
11
|
+
#include <cstdio>
|
|
12
|
+
#include <cstring>
|
|
13
|
+
#include <cerrno>
|
|
11
14
|
#include "SystemUtils.h"
|
|
12
15
|
// Include our custom headers - this was missing!
|
|
13
16
|
#include "rn-llama.h"
|
|
@@ -17,6 +20,18 @@
|
|
|
17
20
|
|
|
18
21
|
#if defined(__ANDROID__) || defined(__linux__)
|
|
19
22
|
#include <unistd.h>
|
|
23
|
+
#include <dlfcn.h>
|
|
24
|
+
#include <android/log.h>
|
|
25
|
+
#define LOG_TAG "RNLlamaCpp"
|
|
26
|
+
#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, __VA_ARGS__)
|
|
27
|
+
#define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__)
|
|
28
|
+
#define LOGW(...) __android_log_print(ANDROID_LOG_WARN, LOG_TAG, __VA_ARGS__)
|
|
29
|
+
#define LOGD(...) __android_log_print(ANDROID_LOG_DEBUG, LOG_TAG, __VA_ARGS__)
|
|
30
|
+
#else
|
|
31
|
+
#define LOGI(...) fprintf(stderr, __VA_ARGS__)
|
|
32
|
+
#define LOGE(...) fprintf(stderr, __VA_ARGS__)
|
|
33
|
+
#define LOGW(...) fprintf(stderr, __VA_ARGS__)
|
|
34
|
+
#define LOGD(...) fprintf(stderr, __VA_ARGS__)
|
|
20
35
|
#endif
|
|
21
36
|
|
|
22
37
|
// Include the llama.cpp headers directly
|
|
@@ -72,6 +87,42 @@ jsi::Value PureCppImpl::loadLlamaModelInfo(jsi::Runtime &runtime, jsi::String mo
|
|
|
72
87
|
// Launch background thread for model info loading
|
|
73
88
|
std::thread([selfPtr, path, resolve, reject, runtimePtr, invoker]() {
|
|
74
89
|
try {
|
|
90
|
+
// Set up logging callback to capture llama.cpp error messages
|
|
91
|
+
llama_log_set([](enum ggml_log_level level, const char * text, void * /* user_data */) {
|
|
92
|
+
if (level >= GGML_LOG_LEVEL_ERROR) {
|
|
93
|
+
LOGE("llama.cpp: %s", text);
|
|
94
|
+
}
|
|
95
|
+
}, nullptr);
|
|
96
|
+
|
|
97
|
+
// Load all available backends (CPU is dynamically loaded when GGML_BACKEND_DL is enabled)
|
|
98
|
+
// With GGML_BACKEND_DL=ON, ALL backends (CPU + GPU) are dynamically loaded
|
|
99
|
+
// CPU backend is in libggml-cpu.so, GPU backends are in libggml-opencl.so, libggml-vulkan.so
|
|
100
|
+
// On Android, dlopen() can load libraries by name even from inside APKs
|
|
101
|
+
#ifdef __ANDROID__
|
|
102
|
+
// Load CPU backend directly - Android's linker will find it in the same directory
|
|
103
|
+
void* cpu_handle = dlopen("libggml-cpu.so", RTLD_LAZY | RTLD_LOCAL);
|
|
104
|
+
if (cpu_handle) {
|
|
105
|
+
typedef ggml_backend_reg_t (*backend_init_fn_t)();
|
|
106
|
+
backend_init_fn_t backend_init = (backend_init_fn_t)dlsym(cpu_handle, "ggml_backend_init");
|
|
107
|
+
if (backend_init) {
|
|
108
|
+
ggml_backend_reg_t cpu_backend = backend_init();
|
|
109
|
+
if (cpu_backend) {
|
|
110
|
+
ggml_backend_register(cpu_backend);
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Load GPU backends (OpenCL, Vulkan) if present - they will be found by name
|
|
116
|
+
ggml_backend_load_all();
|
|
117
|
+
#else
|
|
118
|
+
ggml_backend_load_all();
|
|
119
|
+
#endif
|
|
120
|
+
|
|
121
|
+
// Verify at least CPU backend was loaded
|
|
122
|
+
if (ggml_backend_reg_count() == 0) {
|
|
123
|
+
throw std::runtime_error("No backends registered - CPU backend library not found");
|
|
124
|
+
}
|
|
125
|
+
|
|
75
126
|
// Initialize llama backend
|
|
76
127
|
llama_backend_init();
|
|
77
128
|
|
|
@@ -313,7 +364,35 @@ jsi::Value PureCppImpl::initLlama(jsi::Runtime &runtime, jsi::Object options) {
|
|
|
313
364
|
try {
|
|
314
365
|
// Thread-safe access to member variables
|
|
315
366
|
std::lock_guard<std::mutex> lock(selfPtr->mutex_);
|
|
316
|
-
|
|
367
|
+
|
|
368
|
+
// Load all available backends (CPU is dynamically loaded when GGML_BACKEND_DL is enabled)
|
|
369
|
+
// With GGML_BACKEND_DL=ON, ALL backends (CPU + GPU) are dynamically loaded
|
|
370
|
+
// CPU backend is in libggml-cpu.so, GPU backends are in libggml-opencl.so, libggml-vulkan.so
|
|
371
|
+
#ifdef __ANDROID__
|
|
372
|
+
// Load CPU backend directly - Android's linker will find it in the same directory
|
|
373
|
+
void* cpu_handle = dlopen("libggml-cpu.so", RTLD_LAZY | RTLD_LOCAL);
|
|
374
|
+
if (cpu_handle) {
|
|
375
|
+
typedef ggml_backend_reg_t (*backend_init_fn_t)();
|
|
376
|
+
backend_init_fn_t backend_init = (backend_init_fn_t)dlsym(cpu_handle, "ggml_backend_init");
|
|
377
|
+
if (backend_init) {
|
|
378
|
+
ggml_backend_reg_t cpu_backend = backend_init();
|
|
379
|
+
if (cpu_backend) {
|
|
380
|
+
ggml_backend_register(cpu_backend);
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
// Load GPU backends (OpenCL, Vulkan) if present - they will be found by name
|
|
386
|
+
ggml_backend_load_all();
|
|
387
|
+
#else
|
|
388
|
+
ggml_backend_load_all();
|
|
389
|
+
#endif
|
|
390
|
+
|
|
391
|
+
// Verify at least CPU backend was loaded
|
|
392
|
+
if (ggml_backend_reg_count() == 0) {
|
|
393
|
+
throw std::runtime_error("No backends registered - CPU backend library not found");
|
|
394
|
+
}
|
|
395
|
+
|
|
317
396
|
// Initialize llama backend
|
|
318
397
|
llama_backend_init();
|
|
319
398
|
|
|
@@ -374,8 +453,6 @@ jsi::Value PureCppImpl::initLlama(jsi::Runtime &runtime, jsi::Object options) {
|
|
|
374
453
|
} catch (const std::exception& e) {
|
|
375
454
|
// If we were trying to use GPU and got an error, retry with CPU-only
|
|
376
455
|
if (params.n_gpu_layers > 0) {
|
|
377
|
-
fprintf(stderr, "GPU initialization failed (%s), retrying with CPU-only\n", e.what());
|
|
378
|
-
|
|
379
456
|
params.n_gpu_layers = 0;
|
|
380
457
|
|
|
381
458
|
try {
|
|
@@ -384,8 +461,6 @@ jsi::Value PureCppImpl::initLlama(jsi::Runtime &runtime, jsi::Object options) {
|
|
|
384
461
|
if (!result.model || !result.context) {
|
|
385
462
|
throw std::runtime_error("Failed to initialize model and context even with CPU-only mode");
|
|
386
463
|
}
|
|
387
|
-
|
|
388
|
-
fprintf(stderr, "Successfully recovered with CPU-only mode after GPU failure\n");
|
|
389
464
|
} catch (const std::exception& cpu_e) {
|
|
390
465
|
throw std::runtime_error(std::string("Model initialization failed: ") + cpu_e.what());
|
|
391
466
|
}
|
|
@@ -477,7 +552,6 @@ jsi::Value PureCppImpl::initLlama(jsi::Runtime &runtime, jsi::Object options) {
|
|
|
477
552
|
} catch (const std::exception& e) {
|
|
478
553
|
// Schedule error callback on JS thread
|
|
479
554
|
std::string errorMsg(e.what());
|
|
480
|
-
fprintf(stderr, "initLlama error: %s\n", errorMsg.c_str());
|
|
481
555
|
invoker->invokeAsync([reject, errorMsg, runtimePtr]() {
|
|
482
556
|
try {
|
|
483
557
|
reject->call(*runtimePtr, jsi::String::createFromUtf8(*runtimePtr, errorMsg));
|
package/cpp/build-info.cpp
CHANGED
|
@@ -4183,6 +4183,21 @@ class Qwen3MoeModel(Qwen2MoeModel):
|
|
|
4183
4183
|
super().set_vocab()
|
|
4184
4184
|
|
|
4185
4185
|
|
|
4186
|
+
@ModelBase.register("RND1")
|
|
4187
|
+
class RND1Model(Qwen2MoeModel):
|
|
4188
|
+
model_arch = gguf.MODEL_ARCH.RND1
|
|
4189
|
+
|
|
4190
|
+
def set_gguf_parameters(self):
|
|
4191
|
+
super().set_gguf_parameters()
|
|
4192
|
+
|
|
4193
|
+
# RND1 specific parameters
|
|
4194
|
+
# RND1 uses bidirectional attention
|
|
4195
|
+
self.gguf_writer.add_causal_attention(False)
|
|
4196
|
+
|
|
4197
|
+
if (mask_token_id := self.hparams.get("mask_token_id")) is not None:
|
|
4198
|
+
self.gguf_writer.add_mask_token_id(mask_token_id)
|
|
4199
|
+
|
|
4200
|
+
|
|
4186
4201
|
@ModelBase.register("Qwen3VLForConditionalGeneration", "Qwen3VLMoeForConditionalGeneration")
|
|
4187
4202
|
class Qwen3VLVisionModel(MmprojModel):
|
|
4188
4203
|
def __init__(self, *args, **kwargs):
|
|
@@ -2303,9 +2303,9 @@ static enum ggml_status ggml_backend_cann_graph_compute(ggml_backend_t backend,
|
|
|
2303
2303
|
// calculate rope cache for fist layer in current device.
|
|
2304
2304
|
cann_ctx->rope_cache.cached = false;
|
|
2305
2305
|
|
|
2306
|
+
bool cann_graph_update_required = false;
|
|
2306
2307
|
#ifdef USE_ACL_GRAPH
|
|
2307
2308
|
bool use_cann_graph = true;
|
|
2308
|
-
bool cann_graph_update_required = false;
|
|
2309
2309
|
|
|
2310
2310
|
static bool prefill_use_graph = parse_bool(get_env("GGML_CANN_PREFILL_USE_GRAPH").value_or(""));
|
|
2311
2311
|
if (!prefill_use_graph) {
|
|
@@ -2336,7 +2336,6 @@ static enum ggml_status ggml_backend_cann_graph_compute(ggml_backend_t backend,
|
|
|
2336
2336
|
}
|
|
2337
2337
|
#else
|
|
2338
2338
|
bool use_cann_graph = false;
|
|
2339
|
-
bool cann_graph_update_required = false;
|
|
2340
2339
|
#endif // USE_ACL_GRAPH
|
|
2341
2340
|
evaluate_and_capture_cann_graph(cann_ctx, cgraph, use_cann_graph, cann_graph_update_required);
|
|
2342
2341
|
|
|
@@ -43,6 +43,14 @@ set(HTP_CMAKE_ARGS
|
|
|
43
43
|
-DHEXAGON_TOOLS_ROOT=$ENV{HEXAGON_TOOLS_ROOT}
|
|
44
44
|
-DHEXAGON_HTP_DEBUG=${GGML_HEXAGON_HTP_DEBUG})
|
|
45
45
|
|
|
46
|
+
ExternalProject_Add(htp-v68
|
|
47
|
+
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/htp BUILD_ALWAYS ON
|
|
48
|
+
CMAKE_ARGS ${HTP_CMAKE_ARGS} -DDSP_VERSION=v68 -DPREBUILT_LIB_DIR="toolv19_v68")
|
|
49
|
+
|
|
50
|
+
ExternalProject_Add(htp-v69
|
|
51
|
+
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/htp BUILD_ALWAYS ON
|
|
52
|
+
CMAKE_ARGS ${HTP_CMAKE_ARGS} -DDSP_VERSION=v69 -DPREBUILT_LIB_DIR="toolv19_v69")
|
|
53
|
+
|
|
46
54
|
ExternalProject_Add(htp-v73
|
|
47
55
|
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/htp BUILD_ALWAYS ON
|
|
48
56
|
CMAKE_ARGS ${HTP_CMAKE_ARGS} -DDSP_VERSION=v73 -DPREBUILT_LIB_DIR="toolv19_v73")
|
|
@@ -61,6 +69,8 @@ ExternalProject_Add(htp-v81
|
|
|
61
69
|
|
|
62
70
|
# Install Hexagon skels required at runtime
|
|
63
71
|
install(FILES
|
|
72
|
+
${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-v68.so
|
|
73
|
+
${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-v69.so
|
|
64
74
|
${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-v73.so
|
|
65
75
|
${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-v75.so
|
|
66
76
|
${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-v79.so
|