@novastera-oss/llamarn 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -53,7 +53,7 @@ Pod::Spec.new do |s|
53
53
  # Compiler settings
54
54
  s.pod_target_xcconfig = {
55
55
  "HEADER_SEARCH_PATHS" => "\"$(PODS_TARGET_SRCROOT)/ios/include\" \"$(PODS_TARGET_SRCROOT)/cpp\" \"$(PODS_TARGET_SRCROOT)/ios/generated/RNLlamaCppSpec\" \"$(PODS_TARGET_SRCROOT)/ios/generated\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp/include\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp/ggml/include\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp/common\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp/vendor\" \"$(PODS_ROOT)/boost\" \"$(PODS_ROOT)/Headers/Public/React-bridging\" \"$(PODS_ROOT)/Headers/Public/React\"",
56
- "OTHER_CPLUSPLUSFLAGS" => "-DFOLLY_NO_CONFIG -DFOLLY_MOBILE=1 -DFOLLY_USE_LIBCPP=1 -DLLAMA_METAL -DRCT_NEW_ARCH_ENABLED=1 -DFBJSRT_EXPORTED=1",
56
+ "OTHER_CPLUSPLUSFLAGS" => "-DFOLLY_NO_CONFIG -DFOLLY_MOBILE=1 -DFOLLY_USE_LIBCPP=1 -DFOLLY_CFG_NO_COROUTINES=1 -DLLAMA_METAL -DRCT_NEW_ARCH_ENABLED=1 -DFBJSRT_EXPORTED=1",
57
57
  "CLANG_CXX_LANGUAGE_STANDARD" => "c++17",
58
58
  "GCC_OPTIMIZATION_LEVEL" => "3", # Maximum optimization
59
59
  "SWIFT_OPTIMIZATION_LEVEL" => "-O",
@@ -78,9 +78,17 @@ add_library(
78
78
  ${CPP_DIR}/rn-completion.cpp
79
79
  )
80
80
 
81
- # Suppress unused function warnings for llama.cpp code
82
- target_compile_options(common PRIVATE -Wno-unused-function)
83
- target_compile_options(RNLlamaCpp PRIVATE -Wno-unused-function)
81
+ # Suppress additional warnings that are treated as errors in Expo SDK 54
82
+ target_compile_options(common PRIVATE )
83
+
84
+ # Use React Native's compile options function for proper C++ flags and RN_SERIALIZABLE_STATE
85
+ if(ReactAndroid_VERSION_MINOR GREATER_EQUAL 80)
86
+ # Add additional warning suppressions for RNLlamaCpp target
87
+ target_compile_reactnative_options(RNLlamaCpp PRIVATE)
88
+ target_compile_options(RNLlamaCpp PRIVATE -Wno-unused-function)
89
+ else()
90
+ target_compile_options(RNLlamaCpp PRIVATE -Wno-unused-function)
91
+ endif()
84
92
 
85
93
  # Check if Vulkan backend library is available
86
94
  set(VULKAN_BACKEND_AVAILABLE FALSE)
@@ -948,16 +948,8 @@ jsi::Value LlamaCppModel::embeddingJsi(jsi::Runtime& rt, const jsi::Value* args,
948
948
  throw std::runtime_error("Invalid embedding dimension");
949
949
  }
950
950
 
951
- // For OpenAI compatibility, default to mean pooling
952
- enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_MEAN;
953
- if (options.hasProperty(rt, "pooling") && options.getProperty(rt, "pooling").isString()) {
954
- std::string pooling = options.getProperty(rt, "pooling").getString(rt).utf8(rt);
955
- if (pooling == "last") {
956
- pooling_type = LLAMA_POOLING_TYPE_LAST;
957
- } else if (pooling == "cls" || pooling == "first") {
958
- pooling_type = LLAMA_POOLING_TYPE_CLS;
959
- }
960
- }
951
+ // Note: Pooling is handled automatically by llama_get_embeddings()
952
+ // The function returns the appropriate embedding based on the model's configuration
961
953
 
962
954
  // Get the embeddings
963
955
  std::vector<float> embedding_vec(n_embd);
@@ -20,10 +20,6 @@
20
20
 
21
21
  namespace facebook::react {
22
22
 
23
- // Memory fallback constants (clearly defined for future maintenance)
24
- constexpr int64_t FALLBACK_IOS_MEMORY = 2LL * 1024 * 1024 * 1024; // 2GB default
25
- constexpr int64_t FALLBACK_ANDROID_MEMORY = 3LL * 1024 * 1024 * 1024; // 3GB for Android
26
- constexpr int64_t DEFAULT_FALLBACK_MEMORY = 2LL * 1024 * 1024 * 1024; // 2GB default
27
23
 
28
24
  int SystemUtils::getOptimalThreadCount() {
29
25
  int cpuCores = std::thread::hardware_concurrency();
@@ -85,11 +81,11 @@ int64_t getTotalPhysicalMemory() {
85
81
  // Fallback to a conservative estimate if we couldn't get the actual memory
86
82
  if (total_memory <= 0) {
87
83
  #if defined(__APPLE__) && TARGET_OS_IPHONE
88
- total_memory = FALLBACK_IOS_MEMORY;
84
+ total_memory = 2LL * 1024 * 1024 * 1024; // 2GB default for iOS
89
85
  #elif defined(__ANDROID__)
90
- total_memory = FALLBACK_ANDROID_MEMORY;
86
+ total_memory = 3LL * 1024 * 1024 * 1024; // 3GB default for Android
91
87
  #else
92
- total_memory = DEFAULT_FALLBACK_MEMORY;
88
+ total_memory = 2LL * 1024 * 1024 * 1024; // 2GB default for other platforms
93
89
  #endif
94
90
  }
95
91
 
@@ -255,7 +255,7 @@ CompletionResult run_completion(
255
255
  }
256
256
 
257
257
  // Start generating tokens
258
- const int64_t t_start_generation = ggml_time_us();
258
+ // Note: Timing variables removed as they were not being used
259
259
 
260
260
  while (state.has_next_token && state.n_remaining > 0) {
261
261
  // Sample the next token
@@ -323,9 +323,7 @@ CompletionResult run_completion(
323
323
  }
324
324
  }
325
325
 
326
- const int64_t t_end_generation = ggml_time_us();
327
- // Note: keeping generation_time_ms for future timing measurements
328
- // const double generation_time_ms = (t_end_generation - t_start_generation) / 1000.0;
326
+ // Note: Timing measurements removed as they were not being used
329
327
 
330
328
  // Set the result
331
329
  result.content = state.generated_text;
@@ -428,27 +426,7 @@ CompletionResult run_chat_completion(
428
426
  // Default to grammar_triggers provided by chat_params
429
427
  cmpl_options.grammar_triggers = chat_params.grammar_triggers;
430
428
 
431
- bool original_grammar_lazy = chat_params.grammar_lazy; // Store original for logging
432
-
433
- // Add a debug log to observe final grammar_lazy and grammar_triggers
434
- /*
435
- if (callback) {
436
- std::string tool_choice_str;
437
- switch (template_inputs.tool_choice) {
438
- case COMMON_CHAT_TOOL_CHOICE_AUTO: tool_choice_str = "auto"; break;
439
- case COMMON_CHAT_TOOL_CHOICE_NONE: tool_choice_str = "none"; break;
440
- case COMMON_CHAT_TOOL_CHOICE_REQUIRED: tool_choice_str = "required"; break;
441
- default: tool_choice_str = "unknown"; break;
442
- }
443
- std::string debug_msg = "[DEBUG CHAT_PARAMS] grammar_lazy: " +
444
- std::string(cmpl_options.grammar_lazy ? "true" : "false") +
445
- " | grammar_triggers_count: " + std::to_string(cmpl_options.grammar_triggers.size()) + // Log triggers from cmpl_options
446
- " | For Tool Choice: " + tool_choice_str +
447
- " | Parallel Tool Calls: " + std::string(template_inputs.parallel_tool_calls ? "true" : "false") +
448
- " | Original chat_params.grammar_lazy: " + std::string(original_grammar_lazy ? "true" : "false"); // Log original lazy
449
- callback(debug_msg, false);
450
- }
451
- */
429
+ // Note: Debug logging removed as it was not being used
452
430
  }
453
431
 
454
432
  // Run standard completion with the processed prompt
@@ -534,5 +512,3 @@ CompletionResult run_chat_completion(
534
512
  }
535
513
 
536
514
  } // namespace facebook::react
537
-
538
-
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@novastera-oss/llamarn",
3
- "version": "0.4.0",
3
+ "version": "0.4.1",
4
4
  "description": "An attempt at a pure cpp turbo module library",
5
5
  "source": "./src/index.tsx",
6
6
  "main": "./lib/module/index.js",