cui-llama.rn 1.0.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/CMakeLists.txt +23 -19
- package/android/src/main/java/com/rnllama/LlamaContext.java +32 -21
- package/cpp/common.cpp +4 -11
- package/cpp/common.h +1 -1
- package/cpp/ggml-aarch64.c +2193 -2193
- package/cpp/ggml-aarch64.h +39 -39
- package/cpp/ggml-alloc.c +1042 -1041
- package/cpp/ggml-backend-impl.h +153 -153
- package/cpp/ggml-backend.c +2234 -2225
- package/cpp/ggml-backend.h +238 -236
- package/cpp/ggml-common.h +1829 -1829
- package/cpp/ggml-impl.h +655 -655
- package/cpp/ggml-metal.h +65 -65
- package/cpp/ggml-metal.m +3269 -3273
- package/cpp/ggml-quants.c +14860 -15022
- package/cpp/ggml-quants.h +132 -132
- package/cpp/ggml.c +16 -6
- package/cpp/ggml.h +2447 -2444
- package/cpp/llama.cpp +634 -531
- package/cpp/llama.h +30 -14
- package/cpp/log.h +737 -737
- package/cpp/rn-llama.hpp +9 -1
- package/cpp/sampling.cpp +460 -460
- package/cpp/sgemm.cpp +1027 -1027
- package/cpp/sgemm.h +14 -14
- package/package.json +1 -1
@@ -23,13 +23,14 @@ set(
|
|
23
23
|
${RNLLAMA_LIB_DIR}/unicode.cpp
|
24
24
|
${RNLLAMA_LIB_DIR}/llama.cpp
|
25
25
|
${RNLLAMA_LIB_DIR}/sgemm.cpp
|
26
|
+
${RNLLAMA_LIB_DIR}/ggml-aarch64.c
|
26
27
|
${RNLLAMA_LIB_DIR}/rn-llama.hpp
|
27
28
|
${CMAKE_SOURCE_DIR}/jni.cpp
|
28
29
|
)
|
29
30
|
|
30
31
|
find_library(LOG_LIB log)
|
31
32
|
|
32
|
-
function(build_library target_name)
|
33
|
+
function(build_library target_name cpu_flags)
|
33
34
|
add_library(
|
34
35
|
${target_name}
|
35
36
|
SHARED
|
@@ -38,32 +39,35 @@ function(build_library target_name)
|
|
38
39
|
|
39
40
|
target_link_libraries(${target_name} ${LOG_LIB} android)
|
40
41
|
|
41
|
-
target_compile_options(${target_name} PRIVATE -pthread)
|
42
|
-
|
43
|
-
if (${target_name} STREQUAL "rnllama_v8fp16_va")
|
44
|
-
target_compile_options(${target_name} PRIVATE -march=armv8.4-a+fp16+dotprod)
|
45
|
-
endif ()
|
42
|
+
target_compile_options(${target_name} PRIVATE -pthread ${cpu_flags})
|
46
43
|
|
47
44
|
if (${CMAKE_BUILD_TYPE} STREQUAL "Debug")
|
48
45
|
target_compile_options(${target_name} PRIVATE -DRNLLAMA_ANDROID_ENABLE_LOGGING)
|
49
46
|
endif ()
|
50
47
|
|
51
|
-
#
|
52
|
-
|
48
|
+
#if (NOT ${CMAKE_BUILD_TYPE} STREQUAL "Debug")
|
49
|
+
target_compile_options(${target_name} PRIVATE -O3 -DNDEBUG)
|
50
|
+
target_compile_options(${target_name} PRIVATE -fvisibility=hidden -fvisibility-inlines-hidden)
|
51
|
+
target_compile_options(${target_name} PRIVATE -ffunction-sections -fdata-sections)
|
53
52
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
target_link_options(${target_name} PRIVATE -Wl,--gc-sections)
|
59
|
-
target_link_options(${target_name} PRIVATE -Wl,--exclude-libs,ALL)
|
60
|
-
target_link_options(${target_name} PRIVATE -flto)
|
61
|
-
|
62
|
-
# endif ()
|
53
|
+
target_link_options(${target_name} PRIVATE -Wl,--gc-sections)
|
54
|
+
target_link_options(${target_name} PRIVATE -Wl,--exclude-libs,ALL)
|
55
|
+
target_link_options(${target_name} PRIVATE -flto)
|
56
|
+
#endif ()
|
63
57
|
endfunction()
|
64
58
|
|
65
|
-
|
59
|
+
# Default target (no specific CPU features)
|
60
|
+
build_library("rnllama" "")
|
66
61
|
|
67
62
|
if (${ANDROID_ABI} STREQUAL "arm64-v8a")
|
68
|
-
|
63
|
+
# ARM64 targets
|
64
|
+
build_library("rnllama_v8_4_fp16_dotprod_i8mm" "-march=armv8.4-a+fp16+dotprod+i8mm")
|
65
|
+
build_library("rnllama_v8_4_fp16_dotprod" "-march=armv8.4-a+fp16+dotprod")
|
66
|
+
build_library("rnllama_v8_2_fp16_dotprod" "-march=armv8.2-a+fp16+dotprod")
|
67
|
+
build_library("rnllama_v8_2_fp16" "-march=armv8.2-a+fp16")
|
68
|
+
build_library("rnllama_v8" "-march=armv8-a")
|
69
|
+
elseif (${ANDROID_ABI} STREQUAL "x86_64")
|
70
|
+
# x86_64 target
|
71
|
+
build_library("rnllama_x86_64" "-march=x86-64" "-mtune=intel" "-msse4.2" "-mpopcnt")
|
72
|
+
|
69
73
|
endif ()
|
@@ -274,28 +274,36 @@ public class LlamaContext {
|
|
274
274
|
static {
|
275
275
|
Log.d(NAME, "Primary ABI: " + Build.SUPPORTED_ABIS[0]);
|
276
276
|
if (LlamaContext.isArm64V8a()) {
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
277
|
+
String cpuFeatures = LlamaContext.getCpuFeatures();
|
278
|
+
Log.d(NAME, "CPU features: " + cpuFeatures);
|
279
|
+
|
280
|
+
boolean hasFp16 = cpuFeatures.contains("fp16") || cpuFeatures.contains("fphp");
|
281
|
+
boolean hasDotProd = cpuFeatures.contains("dotprod") || cpuFeatures.contains("asimddp");
|
282
|
+
boolean isAtLeastArmV82 = cpuFeatures.contains("asimd") && cpuFeatures.contains("crc32") && cpuFeatures.contains("aes");
|
283
|
+
boolean isAtLeastArmV84 = cpuFeatures.contains("dcpop") && cpuFeatures.contains("uscat");
|
284
|
+
boolean hasInt8Matmul = cpuFeatures.contains("i8mm");
|
285
|
+
|
286
|
+
if (isAtLeastArmV84 && hasFp16 && hasDotProd && hasInt8Matmul) {
|
287
|
+
Log.d(NAME, "Loading librnllama_v8_4_fp16_dotprod_i8mm.so");
|
288
|
+
System.loadLibrary("rnllama_v8_4_fp16_dotprod_i8mm");
|
289
|
+
} else if (isAtLeastArmV84 && hasFp16 && hasDotProd) {
|
290
|
+
Log.d(NAME, "Loading librnllama_v8_4_fp16_dotprod.so");
|
291
|
+
System.loadLibrary("rnllama_v8_4_fp16_dotprod");
|
292
|
+
} else if (isAtLeastArmV82 && hasFp16 && hasDotProd) {
|
293
|
+
Log.d(NAME, "Loading librnllama_v8_2_fp16_dotprod.so");
|
294
|
+
System.loadLibrary("rnllama_v8_2_fp16_dotprod");
|
295
|
+
} else if (isAtLeastArmV82 && hasFp16) {
|
296
|
+
Log.d(NAME, "Loading librnllama_v8_2_fp16.so");
|
297
|
+
System.loadLibrary("rnllama_v8_2_fp16");
|
293
298
|
} else {
|
294
|
-
Log.d(NAME, "Loading
|
295
|
-
System.loadLibrary("
|
299
|
+
Log.d(NAME, "Loading librnllama_v8.so");
|
300
|
+
System.loadLibrary("rnllama_v8");
|
296
301
|
}
|
297
302
|
} else if (LlamaContext.isX86_64()) {
|
298
|
-
Log.d(NAME, "Loading
|
303
|
+
Log.d(NAME, "Loading librnllama_x86_64.so");
|
304
|
+
System.loadLibrary("rnllama_x86_64");
|
305
|
+
} else {
|
306
|
+
Log.d(NAME, "Loading default librnllama.so");
|
299
307
|
System.loadLibrary("rnllama");
|
300
308
|
}
|
301
309
|
}
|
@@ -308,20 +316,23 @@ public class LlamaContext {
|
|
308
316
|
return Build.SUPPORTED_ABIS[0].equals("x86_64");
|
309
317
|
}
|
310
318
|
|
311
|
-
private static String
|
319
|
+
private static String getCpuFeatures() {
|
312
320
|
File file = new File("/proc/cpuinfo");
|
313
321
|
StringBuilder stringBuilder = new StringBuilder();
|
314
322
|
try {
|
315
323
|
BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
|
316
324
|
String line;
|
317
325
|
while ((line = bufferedReader.readLine()) != null) {
|
326
|
+
if (line.startsWith("Features")) {
|
318
327
|
stringBuilder.append(line);
|
328
|
+
break;
|
329
|
+
}
|
319
330
|
}
|
320
331
|
bufferedReader.close();
|
321
332
|
return stringBuilder.toString();
|
322
333
|
} catch (IOException e) {
|
323
334
|
Log.w(NAME, "Couldn't read /proc/cpuinfo", e);
|
324
|
-
return
|
335
|
+
return "";
|
325
336
|
}
|
326
337
|
}
|
327
338
|
|
package/cpp/common.cpp
CHANGED
@@ -691,7 +691,6 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
|
|
691
691
|
if (arg == "--lora") {
|
692
692
|
CHECK_ARG
|
693
693
|
params.lora_adapter.emplace_back(argv[i], 1.0f);
|
694
|
-
params.use_mmap = false;
|
695
694
|
return true;
|
696
695
|
}
|
697
696
|
if (arg == "--lora-scaled") {
|
@@ -699,7 +698,6 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
|
|
699
698
|
const char* lora_adapter = argv[i];
|
700
699
|
CHECK_ARG
|
701
700
|
params.lora_adapter.emplace_back(lora_adapter, std::stof(argv[i]));
|
702
|
-
params.use_mmap = false;
|
703
701
|
return true;
|
704
702
|
}
|
705
703
|
if (arg == "--lora-base") {
|
@@ -2095,19 +2093,14 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
|
|
2095
2093
|
for (unsigned int i = 0; i < params.lora_adapter.size(); ++i) {
|
2096
2094
|
const std::string & lora_adapter = std::get<0>(params.lora_adapter[i]);
|
2097
2095
|
float lora_scale = std::get<1>(params.lora_adapter[i]);
|
2098
|
-
|
2099
|
-
|
2100
|
-
lora_scale,
|
2101
|
-
((i > 0) || params.lora_base.empty())
|
2102
|
-
? NULL
|
2103
|
-
: params.lora_base.c_str(),
|
2104
|
-
params.n_threads);
|
2105
|
-
if (err != 0) {
|
2096
|
+
auto adapter = llama_lora_adapter_init(model, lora_adapter.c_str());
|
2097
|
+
if (adapter == nullptr) {
|
2106
2098
|
fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__);
|
2107
2099
|
llama_free(lctx);
|
2108
2100
|
llama_free_model(model);
|
2109
2101
|
return std::make_tuple(nullptr, nullptr);
|
2110
2102
|
}
|
2103
|
+
llama_lora_adapter_set(lctx, adapter, lora_scale);
|
2111
2104
|
}
|
2112
2105
|
|
2113
2106
|
if (params.ignore_eos) {
|
@@ -2146,8 +2139,8 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
|
|
2146
2139
|
|
2147
2140
|
struct llama_model_params llama_model_params_from_gpt_params(const gpt_params & params) {
|
2148
2141
|
auto mparams = llama_model_default_params();
|
2142
|
+
mparams.vocab_only = params.vocab_only;
|
2149
2143
|
|
2150
|
-
mparams.vocab_only = params.vocab_only;
|
2151
2144
|
if (params.n_gpu_layers != -1) {
|
2152
2145
|
mparams.n_gpu_layers = params.n_gpu_layers;
|
2153
2146
|
}
|
package/cpp/common.h
CHANGED
@@ -72,7 +72,7 @@ enum dimre_method {
|
|
72
72
|
struct gpt_params {
|
73
73
|
uint32_t seed = LLAMA_DEFAULT_SEED; // RNG seed
|
74
74
|
|
75
|
-
|
75
|
+
bool vocab_only = false;
|
76
76
|
int32_t n_threads = cpu_get_num_math();
|
77
77
|
int32_t n_threads_draft = -1;
|
78
78
|
int32_t n_threads_batch = -1; // number of threads to use for batch processing (-1 = use n_threads)
|