cui-llama.rn 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -23,13 +23,14 @@ set(
23
23
  ${RNLLAMA_LIB_DIR}/unicode.cpp
24
24
  ${RNLLAMA_LIB_DIR}/llama.cpp
25
25
  ${RNLLAMA_LIB_DIR}/sgemm.cpp
26
+ ${RNLLAMA_LIB_DIR}/ggml-aarch64.c
26
27
  ${RNLLAMA_LIB_DIR}/rn-llama.hpp
27
28
  ${CMAKE_SOURCE_DIR}/jni.cpp
28
29
  )
29
30
 
30
31
  find_library(LOG_LIB log)
31
32
 
32
- function(build_library target_name)
33
+ function(build_library target_name cpu_flags)
33
34
  add_library(
34
35
  ${target_name}
35
36
  SHARED
@@ -38,32 +39,34 @@ function(build_library target_name)
38
39
 
39
40
  target_link_libraries(${target_name} ${LOG_LIB} android)
40
41
 
41
- target_compile_options(${target_name} PRIVATE -pthread)
42
-
43
- if (${target_name} STREQUAL "rnllama_v8fp16_va")
44
- target_compile_options(${target_name} PRIVATE -march=armv8.4-a+fp16+dotprod)
45
- endif ()
42
+ target_compile_options(${target_name} PRIVATE -pthread ${cpu_flags})
46
43
 
47
44
  if (${CMAKE_BUILD_TYPE} STREQUAL "Debug")
48
45
  target_compile_options(${target_name} PRIVATE -DRNLLAMA_ANDROID_ENABLE_LOGGING)
49
46
  endif ()
50
47
 
51
- # NOTE: If you want to debug the native code, you can uncomment if and endif
52
- # if (NOT ${CMAKE_BUILD_TYPE} STREQUAL "Debug")
48
+ #if (NOT ${CMAKE_BUILD_TYPE} STREQUAL "Debug")
49
+ target_compile_options(${target_name} PRIVATE -O3 -DNDEBUG)
50
+ target_compile_options(${target_name} PRIVATE -fvisibility=hidden -fvisibility-inlines-hidden)
51
+ target_compile_options(${target_name} PRIVATE -ffunction-sections -fdata-sections)
53
52
 
54
- target_compile_options(${target_name} PRIVATE -O3 -DNDEBUG)
55
- target_compile_options(${target_name} PRIVATE -fvisibility=hidden -fvisibility-inlines-hidden)
56
- target_compile_options(${target_name} PRIVATE -ffunction-sections -fdata-sections)
57
-
58
- target_link_options(${target_name} PRIVATE -Wl,--gc-sections)
59
- target_link_options(${target_name} PRIVATE -Wl,--exclude-libs,ALL)
60
- target_link_options(${target_name} PRIVATE -flto)
61
-
62
- # endif ()
53
+ target_link_options(${target_name} PRIVATE -Wl,--gc-sections)
54
+ target_link_options(${target_name} PRIVATE -Wl,--exclude-libs,ALL)
55
+ target_link_options(${target_name} PRIVATE -flto)
56
+ #endif ()
63
57
  endfunction()
64
58
 
65
- build_library("rnllama") # Default target
59
+ # Default target (no specific CPU features)
60
+ build_library("rnllama" "")
66
61
 
67
62
  if (${ANDROID_ABI} STREQUAL "arm64-v8a")
68
- build_library("rnllama_v8fp16_va")
63
+ # ARM64 targets
64
+ build_library("rnllama_v8_4_fp16_dotprod" "-march=armv8.4-a+fp16+dotprod")
65
+ build_library("rnllama_v8_2_fp16_dotprod" "-march=armv8.2-a+fp16+dotprod")
66
+ build_library("rnllama_v8_2_fp16" "-march=armv8.2-a+fp16")
67
+ build_library("rnllama_v8" "-march=armv8-a")
68
+ elseif (${ANDROID_ABI} STREQUAL "x86_64")
69
+ # x86_64 target
70
+ build_library("rnllama_x86_64" "-march=x86-64" "-mtune=intel" "-msse4.2" "-mpopcnt")
71
+
69
72
  endif ()
@@ -274,28 +274,32 @@ public class LlamaContext {
274
274
  static {
275
275
  Log.d(NAME, "Primary ABI: " + Build.SUPPORTED_ABIS[0]);
276
276
  if (LlamaContext.isArm64V8a()) {
277
- boolean loadV8fp16 = false;
278
- if (LlamaContext.isArm64V8a()) {
279
- // ARMv8.2a needs runtime detection support
280
- String cpuInfo = LlamaContext.cpuInfo();
281
- if (cpuInfo != null) {
282
- Log.d(NAME, "CPU info: " + cpuInfo);
283
- if (cpuInfo.contains("fphp")) {
284
- Log.d(NAME, "CPU supports fp16 arithmetic");
285
- loadV8fp16 = true;
286
- }
287
- }
288
- }
289
-
290
- if (loadV8fp16) {
291
- Log.d(NAME, "Loading librnllama_v8fp16_va.so");
292
- System.loadLibrary("rnllama_v8fp16_va");
277
+ String cpuFeatures = LlamaContext.getCpuFeatures();
278
+ Log.d(NAME, "CPU features: " + cpuFeatures);
279
+
280
+ boolean hasFp16 = cpuFeatures.contains("fp16") || cpuFeatures.contains("fphp");
281
+ boolean hasDotProd = cpuFeatures.contains("dotprod") || cpuFeatures.contains("asimddp");
282
+ boolean isAtLeastArmV82 = cpuFeatures.contains("asimd") && cpuFeatures.contains("crc32") && cpuFeatures.contains("aes");
283
+ boolean isAtLeastArmV84 = cpuFeatures.contains("dcpop") && cpuFeatures.contains("uscat");
284
+
285
+ if (isAtLeastArmV84 && hasFp16 && hasDotProd) {
286
+ Log.d(NAME, "Loading librnllama_v8_4_fp16_dotprod.so");
287
+ System.loadLibrary("rnllama_v8_4_fp16_dotprod");
288
+ } else if (isAtLeastArmV82 && hasFp16 && hasDotProd) {
289
+ Log.d(NAME, "Loading librnllama_v8_2_fp16_dotprod.so");
290
+ System.loadLibrary("rnllama_v8_2_fp16_dotprod");
291
+ } else if (isAtLeastArmV82 && hasFp16) {
292
+ Log.d(NAME, "Loading librnllama_v8_2_fp16.so");
293
+ System.loadLibrary("rnllama_v8_2_fp16");
293
294
  } else {
294
- Log.d(NAME, "Loading librnllama.so");
295
- System.loadLibrary("rnllama");
295
+ Log.d(NAME, "Loading librnllama_v8.so");
296
+ System.loadLibrary("rnllama_v8");
296
297
  }
297
298
  } else if (LlamaContext.isX86_64()) {
298
- Log.d(NAME, "Loading librnllama.so");
299
+ Log.d(NAME, "Loading librnllama_x86_64.so");
300
+ System.loadLibrary("rnllama_x86_64");
301
+ } else {
302
+ Log.d(NAME, "Loading default librnllama.so");
299
303
  System.loadLibrary("rnllama");
300
304
  }
301
305
  }
@@ -308,20 +312,23 @@ public class LlamaContext {
308
312
  return Build.SUPPORTED_ABIS[0].equals("x86_64");
309
313
  }
310
314
 
311
- private static String cpuInfo() {
315
+ private static String getCpuFeatures() {
312
316
  File file = new File("/proc/cpuinfo");
313
317
  StringBuilder stringBuilder = new StringBuilder();
314
318
  try {
315
319
  BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
316
320
  String line;
317
321
  while ((line = bufferedReader.readLine()) != null) {
322
+ if (line.startsWith("Features")) {
318
323
  stringBuilder.append(line);
324
+ break;
325
+ }
319
326
  }
320
327
  bufferedReader.close();
321
328
  return stringBuilder.toString();
322
329
  } catch (IOException e) {
323
330
  Log.w(NAME, "Couldn't read /proc/cpuinfo", e);
324
- return null;
331
+ return "";
325
332
  }
326
333
  }
327
334
 
package/cpp/common.cpp CHANGED
@@ -691,7 +691,6 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
691
691
  if (arg == "--lora") {
692
692
  CHECK_ARG
693
693
  params.lora_adapter.emplace_back(argv[i], 1.0f);
694
- params.use_mmap = false;
695
694
  return true;
696
695
  }
697
696
  if (arg == "--lora-scaled") {
@@ -699,7 +698,6 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
699
698
  const char* lora_adapter = argv[i];
700
699
  CHECK_ARG
701
700
  params.lora_adapter.emplace_back(lora_adapter, std::stof(argv[i]));
702
- params.use_mmap = false;
703
701
  return true;
704
702
  }
705
703
  if (arg == "--lora-base") {
@@ -2095,19 +2093,14 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
2095
2093
  for (unsigned int i = 0; i < params.lora_adapter.size(); ++i) {
2096
2094
  const std::string & lora_adapter = std::get<0>(params.lora_adapter[i]);
2097
2095
  float lora_scale = std::get<1>(params.lora_adapter[i]);
2098
- int err = llama_model_apply_lora_from_file(model,
2099
- lora_adapter.c_str(),
2100
- lora_scale,
2101
- ((i > 0) || params.lora_base.empty())
2102
- ? NULL
2103
- : params.lora_base.c_str(),
2104
- params.n_threads);
2105
- if (err != 0) {
2096
+ auto adapter = llama_lora_adapter_init(model, lora_adapter.c_str());
2097
+ if (adapter == nullptr) {
2106
2098
  fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__);
2107
2099
  llama_free(lctx);
2108
2100
  llama_free_model(model);
2109
2101
  return std::make_tuple(nullptr, nullptr);
2110
2102
  }
2103
+ llama_lora_adapter_set(lctx, adapter, lora_scale);
2111
2104
  }
2112
2105
 
2113
2106
  if (params.ignore_eos) {
@@ -2146,8 +2139,8 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
2146
2139
 
2147
2140
  struct llama_model_params llama_model_params_from_gpt_params(const gpt_params & params) {
2148
2141
  auto mparams = llama_model_default_params();
2142
+ mparams.vocab_only = params.vocab_only;
2149
2143
 
2150
- mparams.vocab_only = params.vocab_only;
2151
2144
  if (params.n_gpu_layers != -1) {
2152
2145
  mparams.n_gpu_layers = params.n_gpu_layers;
2153
2146
  }
package/cpp/common.h CHANGED
@@ -72,7 +72,7 @@ enum dimre_method {
72
72
  struct gpt_params {
73
73
  uint32_t seed = LLAMA_DEFAULT_SEED; // RNG seed
74
74
 
75
- bool vocab_only = false;
75
+ bool vocab_only = false;
76
76
  int32_t n_threads = cpu_get_num_math();
77
77
  int32_t n_threads_draft = -1;
78
78
  int32_t n_threads_batch = -1; // number of threads to use for batch processing (-1 = use n_threads)