npm - @novastera-oss/llamarn - Versions diffs - 0.4.0 → 0.4.1 - Mend

@novastera-oss/llamarn 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/RNLlamaCpp.podspec CHANGED Viewed

@@ -53,7 +53,7 @@ Pod::Spec.new do |s|
   # Compiler settings
   s.pod_target_xcconfig = {
     "HEADER_SEARCH_PATHS" => "\"$(PODS_TARGET_SRCROOT)/ios/include\" \"$(PODS_TARGET_SRCROOT)/cpp\" \"$(PODS_TARGET_SRCROOT)/ios/generated/RNLlamaCppSpec\" \"$(PODS_TARGET_SRCROOT)/ios/generated\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp/include\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp/ggml/include\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp/common\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp/vendor\" \"$(PODS_ROOT)/boost\" \"$(PODS_ROOT)/Headers/Public/React-bridging\" \"$(PODS_ROOT)/Headers/Public/React\"",
-    "OTHER_CPLUSPLUSFLAGS" => "-DFOLLY_NO_CONFIG -DFOLLY_MOBILE=1 -DFOLLY_USE_LIBCPP=1 -DLLAMA_METAL -DRCT_NEW_ARCH_ENABLED=1 -DFBJSRT_EXPORTED=1",
+    "OTHER_CPLUSPLUSFLAGS" => "-DFOLLY_NO_CONFIG -DFOLLY_MOBILE=1 -DFOLLY_USE_LIBCPP=1 -DFOLLY_CFG_NO_COROUTINES=1 -DLLAMA_METAL -DRCT_NEW_ARCH_ENABLED=1 -DFBJSRT_EXPORTED=1",
     "CLANG_CXX_LANGUAGE_STANDARD" => "c++17",
     "GCC_OPTIMIZATION_LEVEL" => "3", # Maximum optimization
     "SWIFT_OPTIMIZATION_LEVEL" => "-O",

package/android/CMakeLists.txt CHANGED Viewed

@@ -78,9 +78,17 @@ add_library(
     ${CPP_DIR}/rn-completion.cpp
 )
-# Suppress unused function warnings for llama.cpp code
-target_compile_options(common PRIVATE -Wno-unused-function)
-target_compile_options(RNLlamaCpp PRIVATE -Wno-unused-function)
+# Suppress additional warnings that are treated as errors in Expo SDK 54
+target_compile_options(common PRIVATE )
+# Use React Native's compile options function for proper C++ flags and RN_SERIALIZABLE_STATE
+if(ReactAndroid_VERSION_MINOR GREATER_EQUAL 80)
+    # Add additional warning suppressions for RNLlamaCpp target
+    target_compile_reactnative_options(RNLlamaCpp PRIVATE)
+    target_compile_options(RNLlamaCpp PRIVATE -Wno-unused-function)
+else()
+    target_compile_options(RNLlamaCpp PRIVATE -Wno-unused-function)
+endif()
 # Check if Vulkan backend library is available
 set(VULKAN_BACKEND_AVAILABLE FALSE)

package/cpp/LlamaCppModel.cpp CHANGED Viewed

@@ -948,16 +948,8 @@ jsi::Value LlamaCppModel::embeddingJsi(jsi::Runtime& rt, const jsi::Value* args,
       throw std::runtime_error("Invalid embedding dimension");
     }
-    // For OpenAI compatibility, default to mean pooling
-    enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_MEAN;
-    if (options.hasProperty(rt, "pooling") && options.getProperty(rt, "pooling").isString()) {
-      std::string pooling = options.getProperty(rt, "pooling").getString(rt).utf8(rt);
-      if (pooling == "last") {
-        pooling_type = LLAMA_POOLING_TYPE_LAST;
-      } else if (pooling == "cls" || pooling == "first") {
-        pooling_type = LLAMA_POOLING_TYPE_CLS;
-      }
-    }
+    // Note: Pooling is handled automatically by llama_get_embeddings()
+    // The function returns the appropriate embedding based on the model's configuration
     // Get the embeddings
     std::vector<float> embedding_vec(n_embd);

package/cpp/SystemUtils.cpp CHANGED Viewed

@@ -20,10 +20,6 @@
 namespace facebook::react {
-// Memory fallback constants (clearly defined for future maintenance)
-constexpr int64_t FALLBACK_IOS_MEMORY = 2LL * 1024 * 1024 * 1024;     // 2GB default
-constexpr int64_t FALLBACK_ANDROID_MEMORY = 3LL * 1024 * 1024 * 1024; // 3GB for Android
-constexpr int64_t DEFAULT_FALLBACK_MEMORY = 2LL * 1024 * 1024 * 1024; // 2GB default
 int SystemUtils::getOptimalThreadCount() {
     int cpuCores = std::thread::hardware_concurrency();
@@ -85,11 +81,11 @@ int64_t getTotalPhysicalMemory() {
     // Fallback to a conservative estimate if we couldn't get the actual memory
     if (total_memory <= 0) {
 #if defined(__APPLE__) && TARGET_OS_IPHONE
-        total_memory = FALLBACK_IOS_MEMORY;
+        total_memory = 2LL * 1024 * 1024 * 1024;     // 2GB default for iOS
 #elif defined(__ANDROID__)
-        total_memory = FALLBACK_ANDROID_MEMORY;
+        total_memory = 3LL * 1024 * 1024 * 1024;     // 3GB default for Android
 #else
-        total_memory = DEFAULT_FALLBACK_MEMORY;
+        total_memory = 2LL * 1024 * 1024 * 1024;     // 2GB default for other platforms
 #endif
     }

package/cpp/rn-completion.cpp CHANGED Viewed

@@ -255,7 +255,7 @@ CompletionResult run_completion(
         }
         // Start generating tokens
-        const int64_t t_start_generation = ggml_time_us();
+        // Note: Timing variables removed as they were not being used
         while (state.has_next_token && state.n_remaining > 0) {
             // Sample the next token
@@ -323,9 +323,7 @@ CompletionResult run_completion(
             }
         }
-        const int64_t t_end_generation = ggml_time_us();
-        // Note: keeping generation_time_ms for future timing measurements
-        // const double generation_time_ms = (t_end_generation - t_start_generation) / 1000.0;
+        // Note: Timing measurements removed as they were not being used
         // Set the result
         result.content = state.generated_text;
@@ -428,27 +426,7 @@ CompletionResult run_chat_completion(
             // Default to grammar_triggers provided by chat_params
             cmpl_options.grammar_triggers = chat_params.grammar_triggers;
-            bool original_grammar_lazy = chat_params.grammar_lazy; // Store original for logging
-            // Add a debug log to observe final grammar_lazy and grammar_triggers
-            /*
-            if (callback) {
-                std::string tool_choice_str;
-                switch (template_inputs.tool_choice) {
-                    case COMMON_CHAT_TOOL_CHOICE_AUTO: tool_choice_str = "auto"; break;
-                    case COMMON_CHAT_TOOL_CHOICE_NONE: tool_choice_str = "none"; break;
-                    case COMMON_CHAT_TOOL_CHOICE_REQUIRED: tool_choice_str = "required"; break;
-                    default: tool_choice_str = "unknown"; break;
-                }
-                std::string debug_msg = "[DEBUG CHAT_PARAMS] grammar_lazy: " +
-                                      std::string(cmpl_options.grammar_lazy ? "true" : "false") +
-                                      " | grammar_triggers_count: " + std::to_string(cmpl_options.grammar_triggers.size()) + // Log triggers from cmpl_options
-                                      " | For Tool Choice: " + tool_choice_str +
-                                      " | Parallel Tool Calls: " + std::string(template_inputs.parallel_tool_calls ? "true" : "false") +
-                                      " | Original chat_params.grammar_lazy: " + std::string(original_grammar_lazy ? "true" : "false"); // Log original lazy
-                callback(debug_msg, false);
-            }
-            */
+            // Note: Debug logging removed as it was not being used
         }
         // Run standard completion with the processed prompt
@@ -534,5 +512,3 @@ CompletionResult run_chat_completion(
 }
 } // namespace facebook::react

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@novastera-oss/llamarn",
-  "version": "0.4.0",
+  "version": "0.4.1",
   "description": "An attempt at a pure cpp turbo module library",
   "source": "./src/index.tsx",
   "main": "./lib/module/index.js",