@novastera-oss/llamarn 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/RNLlamaCpp.podspec +1 -1
- package/android/CMakeLists.txt +11 -3
- package/cpp/LlamaCppModel.cpp +2 -10
- package/cpp/SystemUtils.cpp +3 -7
- package/cpp/rn-completion.cpp +3 -27
- package/package.json +1 -1
package/RNLlamaCpp.podspec
CHANGED
|
@@ -53,7 +53,7 @@ Pod::Spec.new do |s|
|
|
|
53
53
|
# Compiler settings
|
|
54
54
|
s.pod_target_xcconfig = {
|
|
55
55
|
"HEADER_SEARCH_PATHS" => "\"$(PODS_TARGET_SRCROOT)/ios/include\" \"$(PODS_TARGET_SRCROOT)/cpp\" \"$(PODS_TARGET_SRCROOT)/ios/generated/RNLlamaCppSpec\" \"$(PODS_TARGET_SRCROOT)/ios/generated\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp/include\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp/ggml/include\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp/common\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp/vendor\" \"$(PODS_ROOT)/boost\" \"$(PODS_ROOT)/Headers/Public/React-bridging\" \"$(PODS_ROOT)/Headers/Public/React\"",
|
|
56
|
-
"OTHER_CPLUSPLUSFLAGS" => "-DFOLLY_NO_CONFIG -DFOLLY_MOBILE=1 -DFOLLY_USE_LIBCPP=1 -DLLAMA_METAL -DRCT_NEW_ARCH_ENABLED=1 -DFBJSRT_EXPORTED=1",
|
|
56
|
+
"OTHER_CPLUSPLUSFLAGS" => "-DFOLLY_NO_CONFIG -DFOLLY_MOBILE=1 -DFOLLY_USE_LIBCPP=1 -DFOLLY_CFG_NO_COROUTINES=1 -DLLAMA_METAL -DRCT_NEW_ARCH_ENABLED=1 -DFBJSRT_EXPORTED=1",
|
|
57
57
|
"CLANG_CXX_LANGUAGE_STANDARD" => "c++17",
|
|
58
58
|
"GCC_OPTIMIZATION_LEVEL" => "3", # Maximum optimization
|
|
59
59
|
"SWIFT_OPTIMIZATION_LEVEL" => "-O",
|
package/android/CMakeLists.txt
CHANGED
|
@@ -78,9 +78,17 @@ add_library(
|
|
|
78
78
|
${CPP_DIR}/rn-completion.cpp
|
|
79
79
|
)
|
|
80
80
|
|
|
81
|
-
# Suppress
|
|
82
|
-
target_compile_options(common PRIVATE
|
|
83
|
-
|
|
81
|
+
# Suppress additional warnings that are treated as errors in Expo SDK 54
|
|
82
|
+
target_compile_options(common PRIVATE )
|
|
83
|
+
|
|
84
|
+
# Use React Native's compile options function for proper C++ flags and RN_SERIALIZABLE_STATE
|
|
85
|
+
if(ReactAndroid_VERSION_MINOR GREATER_EQUAL 80)
|
|
86
|
+
# Add additional warning suppressions for RNLlamaCpp target
|
|
87
|
+
target_compile_reactnative_options(RNLlamaCpp PRIVATE)
|
|
88
|
+
target_compile_options(RNLlamaCpp PRIVATE -Wno-unused-function)
|
|
89
|
+
else()
|
|
90
|
+
target_compile_options(RNLlamaCpp PRIVATE -Wno-unused-function)
|
|
91
|
+
endif()
|
|
84
92
|
|
|
85
93
|
# Check if Vulkan backend library is available
|
|
86
94
|
set(VULKAN_BACKEND_AVAILABLE FALSE)
|
package/cpp/LlamaCppModel.cpp
CHANGED
|
@@ -948,16 +948,8 @@ jsi::Value LlamaCppModel::embeddingJsi(jsi::Runtime& rt, const jsi::Value* args,
|
|
|
948
948
|
throw std::runtime_error("Invalid embedding dimension");
|
|
949
949
|
}
|
|
950
950
|
|
|
951
|
-
//
|
|
952
|
-
|
|
953
|
-
if (options.hasProperty(rt, "pooling") && options.getProperty(rt, "pooling").isString()) {
|
|
954
|
-
std::string pooling = options.getProperty(rt, "pooling").getString(rt).utf8(rt);
|
|
955
|
-
if (pooling == "last") {
|
|
956
|
-
pooling_type = LLAMA_POOLING_TYPE_LAST;
|
|
957
|
-
} else if (pooling == "cls" || pooling == "first") {
|
|
958
|
-
pooling_type = LLAMA_POOLING_TYPE_CLS;
|
|
959
|
-
}
|
|
960
|
-
}
|
|
951
|
+
// Note: Pooling is handled automatically by llama_get_embeddings()
|
|
952
|
+
// The function returns the appropriate embedding based on the model's configuration
|
|
961
953
|
|
|
962
954
|
// Get the embeddings
|
|
963
955
|
std::vector<float> embedding_vec(n_embd);
|
package/cpp/SystemUtils.cpp
CHANGED
|
@@ -20,10 +20,6 @@
|
|
|
20
20
|
|
|
21
21
|
namespace facebook::react {
|
|
22
22
|
|
|
23
|
-
// Memory fallback constants (clearly defined for future maintenance)
|
|
24
|
-
constexpr int64_t FALLBACK_IOS_MEMORY = 2LL * 1024 * 1024 * 1024; // 2GB default
|
|
25
|
-
constexpr int64_t FALLBACK_ANDROID_MEMORY = 3LL * 1024 * 1024 * 1024; // 3GB for Android
|
|
26
|
-
constexpr int64_t DEFAULT_FALLBACK_MEMORY = 2LL * 1024 * 1024 * 1024; // 2GB default
|
|
27
23
|
|
|
28
24
|
int SystemUtils::getOptimalThreadCount() {
|
|
29
25
|
int cpuCores = std::thread::hardware_concurrency();
|
|
@@ -85,11 +81,11 @@ int64_t getTotalPhysicalMemory() {
|
|
|
85
81
|
// Fallback to a conservative estimate if we couldn't get the actual memory
|
|
86
82
|
if (total_memory <= 0) {
|
|
87
83
|
#if defined(__APPLE__) && TARGET_OS_IPHONE
|
|
88
|
-
total_memory =
|
|
84
|
+
total_memory = 2LL * 1024 * 1024 * 1024; // 2GB default for iOS
|
|
89
85
|
#elif defined(__ANDROID__)
|
|
90
|
-
total_memory =
|
|
86
|
+
total_memory = 3LL * 1024 * 1024 * 1024; // 3GB default for Android
|
|
91
87
|
#else
|
|
92
|
-
total_memory =
|
|
88
|
+
total_memory = 2LL * 1024 * 1024 * 1024; // 2GB default for other platforms
|
|
93
89
|
#endif
|
|
94
90
|
}
|
|
95
91
|
|
package/cpp/rn-completion.cpp
CHANGED
|
@@ -255,7 +255,7 @@ CompletionResult run_completion(
|
|
|
255
255
|
}
|
|
256
256
|
|
|
257
257
|
// Start generating tokens
|
|
258
|
-
|
|
258
|
+
// Note: Timing variables removed as they were not being used
|
|
259
259
|
|
|
260
260
|
while (state.has_next_token && state.n_remaining > 0) {
|
|
261
261
|
// Sample the next token
|
|
@@ -323,9 +323,7 @@ CompletionResult run_completion(
|
|
|
323
323
|
}
|
|
324
324
|
}
|
|
325
325
|
|
|
326
|
-
|
|
327
|
-
// Note: keeping generation_time_ms for future timing measurements
|
|
328
|
-
// const double generation_time_ms = (t_end_generation - t_start_generation) / 1000.0;
|
|
326
|
+
// Note: Timing measurements removed as they were not being used
|
|
329
327
|
|
|
330
328
|
// Set the result
|
|
331
329
|
result.content = state.generated_text;
|
|
@@ -428,27 +426,7 @@ CompletionResult run_chat_completion(
|
|
|
428
426
|
// Default to grammar_triggers provided by chat_params
|
|
429
427
|
cmpl_options.grammar_triggers = chat_params.grammar_triggers;
|
|
430
428
|
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
// Add a debug log to observe final grammar_lazy and grammar_triggers
|
|
434
|
-
/*
|
|
435
|
-
if (callback) {
|
|
436
|
-
std::string tool_choice_str;
|
|
437
|
-
switch (template_inputs.tool_choice) {
|
|
438
|
-
case COMMON_CHAT_TOOL_CHOICE_AUTO: tool_choice_str = "auto"; break;
|
|
439
|
-
case COMMON_CHAT_TOOL_CHOICE_NONE: tool_choice_str = "none"; break;
|
|
440
|
-
case COMMON_CHAT_TOOL_CHOICE_REQUIRED: tool_choice_str = "required"; break;
|
|
441
|
-
default: tool_choice_str = "unknown"; break;
|
|
442
|
-
}
|
|
443
|
-
std::string debug_msg = "[DEBUG CHAT_PARAMS] grammar_lazy: " +
|
|
444
|
-
std::string(cmpl_options.grammar_lazy ? "true" : "false") +
|
|
445
|
-
" | grammar_triggers_count: " + std::to_string(cmpl_options.grammar_triggers.size()) + // Log triggers from cmpl_options
|
|
446
|
-
" | For Tool Choice: " + tool_choice_str +
|
|
447
|
-
" | Parallel Tool Calls: " + std::string(template_inputs.parallel_tool_calls ? "true" : "false") +
|
|
448
|
-
" | Original chat_params.grammar_lazy: " + std::string(original_grammar_lazy ? "true" : "false"); // Log original lazy
|
|
449
|
-
callback(debug_msg, false);
|
|
450
|
-
}
|
|
451
|
-
*/
|
|
429
|
+
// Note: Debug logging removed as it was not being used
|
|
452
430
|
}
|
|
453
431
|
|
|
454
432
|
// Run standard completion with the processed prompt
|
|
@@ -534,5 +512,3 @@ CompletionResult run_chat_completion(
|
|
|
534
512
|
}
|
|
535
513
|
|
|
536
514
|
} // namespace facebook::react
|
|
537
|
-
|
|
538
|
-
|