cactus-react-native 1.4.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. package/Cactus.podspec +1 -1
  2. package/README.md +465 -174
  3. package/android/CMakeLists.txt +24 -5
  4. package/android/src/main/jniLibs/arm64-v8a/libcactus.a +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/libcurl.a +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/libmbedcrypto.a +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/libmbedtls.a +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/libmbedx509.a +0 -0
  9. package/cpp/HybridCactus.cpp +157 -6
  10. package/cpp/HybridCactus.hpp +20 -3
  11. package/cpp/cactus_ffi.h +65 -30
  12. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus.h +0 -1
  13. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_ffi.h +65 -30
  14. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_utils.h +357 -122
  15. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/engine.h +184 -63
  16. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/gemma_tools.h +549 -0
  17. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/graph.h +153 -27
  18. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel.h +90 -178
  19. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel_utils.h +276 -151
  20. package/ios/cactus.xcframework/ios-arm64/cactus.framework/cactus +0 -0
  21. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus.h +0 -1
  22. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_ffi.h +65 -30
  23. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_utils.h +357 -122
  24. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/engine.h +184 -63
  25. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/gemma_tools.h +549 -0
  26. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/graph.h +153 -27
  27. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/kernel.h +90 -178
  28. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/kernel_utils.h +276 -151
  29. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/cactus +0 -0
  30. package/lib/module/classes/CactusLM.js +43 -58
  31. package/lib/module/classes/CactusLM.js.map +1 -1
  32. package/lib/module/classes/CactusSTT.js +64 -38
  33. package/lib/module/classes/CactusSTT.js.map +1 -1
  34. package/lib/module/classes/CactusVAD.js +95 -0
  35. package/lib/module/classes/CactusVAD.js.map +1 -0
  36. package/lib/module/hooks/useCactusLM.js +23 -15
  37. package/lib/module/hooks/useCactusLM.js.map +1 -1
  38. package/lib/module/hooks/useCactusSTT.js +85 -28
  39. package/lib/module/hooks/useCactusSTT.js.map +1 -1
  40. package/lib/module/hooks/useCactusVAD.js +171 -0
  41. package/lib/module/hooks/useCactusVAD.js.map +1 -0
  42. package/lib/module/index.js +2 -3
  43. package/lib/module/index.js.map +1 -1
  44. package/lib/module/modelRegistry.js +52 -0
  45. package/lib/module/modelRegistry.js.map +1 -0
  46. package/lib/module/native/Cactus.js +107 -8
  47. package/lib/module/native/Cactus.js.map +1 -1
  48. package/lib/module/native/CactusIndex.js.map +1 -1
  49. package/lib/module/native/index.js +0 -3
  50. package/lib/module/native/index.js.map +1 -1
  51. package/lib/module/types/CactusLM.js +2 -0
  52. package/lib/module/types/CactusSTT.js +2 -0
  53. package/lib/module/types/CactusVAD.js +4 -0
  54. package/lib/module/types/{CactusModel.js.map → CactusVAD.js.map} +1 -1
  55. package/lib/module/types/common.js +2 -0
  56. package/lib/module/types/{CactusSTTModel.js.map → common.js.map} +1 -1
  57. package/lib/typescript/src/classes/CactusLM.d.ts +8 -6
  58. package/lib/typescript/src/classes/CactusLM.d.ts.map +1 -1
  59. package/lib/typescript/src/classes/CactusSTT.d.ts +11 -6
  60. package/lib/typescript/src/classes/CactusSTT.d.ts.map +1 -1
  61. package/lib/typescript/src/classes/CactusVAD.d.ts +20 -0
  62. package/lib/typescript/src/classes/CactusVAD.d.ts.map +1 -0
  63. package/lib/typescript/src/hooks/useCactusLM.d.ts +3 -3
  64. package/lib/typescript/src/hooks/useCactusLM.d.ts.map +1 -1
  65. package/lib/typescript/src/hooks/useCactusSTT.d.ts +11 -5
  66. package/lib/typescript/src/hooks/useCactusSTT.d.ts.map +1 -1
  67. package/lib/typescript/src/hooks/useCactusVAD.d.ts +15 -0
  68. package/lib/typescript/src/hooks/useCactusVAD.d.ts.map +1 -0
  69. package/lib/typescript/src/index.d.ts +7 -6
  70. package/lib/typescript/src/index.d.ts.map +1 -1
  71. package/lib/typescript/src/modelRegistry.d.ts +5 -0
  72. package/lib/typescript/src/modelRegistry.d.ts.map +1 -0
  73. package/lib/typescript/src/native/Cactus.d.ts +12 -6
  74. package/lib/typescript/src/native/Cactus.d.ts.map +1 -1
  75. package/lib/typescript/src/native/CactusIndex.d.ts +2 -2
  76. package/lib/typescript/src/native/CactusIndex.d.ts.map +1 -1
  77. package/lib/typescript/src/native/index.d.ts +0 -3
  78. package/lib/typescript/src/native/index.d.ts.map +1 -1
  79. package/lib/typescript/src/specs/Cactus.nitro.d.ts +6 -1
  80. package/lib/typescript/src/specs/Cactus.nitro.d.ts.map +1 -1
  81. package/lib/typescript/src/types/CactusIndex.d.ts +2 -2
  82. package/lib/typescript/src/types/CactusIndex.d.ts.map +1 -1
  83. package/lib/typescript/src/types/CactusLM.d.ts +19 -9
  84. package/lib/typescript/src/types/CactusLM.d.ts.map +1 -1
  85. package/lib/typescript/src/types/CactusSTT.d.ts +45 -4
  86. package/lib/typescript/src/types/CactusSTT.d.ts.map +1 -1
  87. package/lib/typescript/src/types/CactusVAD.d.ts +34 -0
  88. package/lib/typescript/src/types/CactusVAD.d.ts.map +1 -0
  89. package/lib/typescript/src/types/common.d.ts +23 -0
  90. package/lib/typescript/src/types/common.d.ts.map +1 -0
  91. package/nitro.json +0 -11
  92. package/nitrogen/generated/android/cactus+autolinking.cmake +0 -5
  93. package/nitrogen/generated/android/cactusOnLoad.cpp +0 -30
  94. package/nitrogen/generated/ios/Cactus-Swift-Cxx-Bridge.cpp +0 -50
  95. package/nitrogen/generated/ios/Cactus-Swift-Cxx-Bridge.hpp +9 -147
  96. package/nitrogen/generated/ios/Cactus-Swift-Cxx-Umbrella.hpp +0 -13
  97. package/nitrogen/generated/ios/CactusAutolinking.mm +0 -26
  98. package/nitrogen/generated/ios/CactusAutolinking.swift +0 -30
  99. package/nitrogen/generated/shared/c++/HybridCactusSpec.cpp +5 -0
  100. package/nitrogen/generated/shared/c++/HybridCactusSpec.hpp +6 -1
  101. package/package.json +3 -3
  102. package/src/classes/CactusLM.ts +59 -74
  103. package/src/classes/CactusSTT.ts +92 -49
  104. package/src/classes/CactusVAD.ts +129 -0
  105. package/src/hooks/useCactusLM.ts +26 -9
  106. package/src/hooks/useCactusSTT.ts +105 -44
  107. package/src/hooks/useCactusVAD.ts +215 -0
  108. package/src/index.tsx +20 -10
  109. package/src/modelRegistry.ts +65 -0
  110. package/src/native/Cactus.ts +130 -14
  111. package/src/native/CactusIndex.ts +2 -2
  112. package/src/native/index.ts +0 -3
  113. package/src/specs/Cactus.nitro.ts +11 -2
  114. package/src/types/CactusIndex.ts +2 -2
  115. package/src/types/CactusLM.ts +20 -9
  116. package/src/types/CactusSTT.ts +50 -4
  117. package/src/types/CactusVAD.ts +39 -0
  118. package/src/types/common.ts +23 -0
  119. package/android/src/main/java/com/margelo/nitro/cactus/HybridCactusCrypto.kt +0 -46
  120. package/android/src/main/java/com/margelo/nitro/cactus/HybridCactusDeviceInfo.kt +0 -27
  121. package/android/src/main/jniLibs/arm64-v8a/libcactus_util.a +0 -0
  122. package/cpp/HybridCactusUtil.cpp +0 -47
  123. package/cpp/HybridCactusUtil.hpp +0 -27
  124. package/cpp/cactus_util.h +0 -25
  125. package/ios/HybridCactusCrypto.swift +0 -37
  126. package/ios/HybridCactusDeviceInfo.swift +0 -32
  127. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_telemetry.h +0 -656
  128. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_telemetry.h +0 -656
  129. package/ios/cactus_util.xcframework/Info.plist +0 -39
  130. package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Headers/cactus_util.h +0 -25
  131. package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Headers/database.h +0 -27
  132. package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Headers/ios_utils.h +0 -10
  133. package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Headers/logging.h +0 -25
  134. package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Info.plist +0 -0
  135. package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/cactus_util +0 -0
  136. package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Headers/cactus_util.h +0 -25
  137. package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Headers/database.h +0 -27
  138. package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Headers/ios_utils.h +0 -10
  139. package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Headers/logging.h +0 -25
  140. package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Info.plist +0 -0
  141. package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/_CodeSignature/CodeResources +0 -135
  142. package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/cactus_util +0 -0
  143. package/lib/module/api/Database.js +0 -137
  144. package/lib/module/api/Database.js.map +0 -1
  145. package/lib/module/api/RemoteLM.js +0 -201
  146. package/lib/module/api/RemoteLM.js.map +0 -1
  147. package/lib/module/config/CactusConfig.js +0 -12
  148. package/lib/module/config/CactusConfig.js.map +0 -1
  149. package/lib/module/native/CactusCrypto.js +0 -10
  150. package/lib/module/native/CactusCrypto.js.map +0 -1
  151. package/lib/module/native/CactusDeviceInfo.js +0 -13
  152. package/lib/module/native/CactusDeviceInfo.js.map +0 -1
  153. package/lib/module/native/CactusUtil.js +0 -36
  154. package/lib/module/native/CactusUtil.js.map +0 -1
  155. package/lib/module/specs/CactusCrypto.nitro.js +0 -4
  156. package/lib/module/specs/CactusCrypto.nitro.js.map +0 -1
  157. package/lib/module/specs/CactusDeviceInfo.nitro.js +0 -4
  158. package/lib/module/specs/CactusDeviceInfo.nitro.js.map +0 -1
  159. package/lib/module/specs/CactusUtil.nitro.js +0 -4
  160. package/lib/module/specs/CactusUtil.nitro.js.map +0 -1
  161. package/lib/module/telemetry/Telemetry.js +0 -154
  162. package/lib/module/telemetry/Telemetry.js.map +0 -1
  163. package/lib/module/types/CactusModel.js +0 -2
  164. package/lib/module/types/CactusSTTModel.js +0 -2
  165. package/lib/typescript/src/api/Database.d.ts +0 -18
  166. package/lib/typescript/src/api/Database.d.ts.map +0 -1
  167. package/lib/typescript/src/api/RemoteLM.d.ts +0 -14
  168. package/lib/typescript/src/api/RemoteLM.d.ts.map +0 -1
  169. package/lib/typescript/src/config/CactusConfig.d.ts +0 -7
  170. package/lib/typescript/src/config/CactusConfig.d.ts.map +0 -1
  171. package/lib/typescript/src/native/CactusCrypto.d.ts +0 -5
  172. package/lib/typescript/src/native/CactusCrypto.d.ts.map +0 -1
  173. package/lib/typescript/src/native/CactusDeviceInfo.d.ts +0 -7
  174. package/lib/typescript/src/native/CactusDeviceInfo.d.ts.map +0 -1
  175. package/lib/typescript/src/native/CactusUtil.d.ts +0 -6
  176. package/lib/typescript/src/native/CactusUtil.d.ts.map +0 -1
  177. package/lib/typescript/src/specs/CactusCrypto.nitro.d.ts +0 -8
  178. package/lib/typescript/src/specs/CactusCrypto.nitro.d.ts.map +0 -1
  179. package/lib/typescript/src/specs/CactusDeviceInfo.nitro.d.ts +0 -16
  180. package/lib/typescript/src/specs/CactusDeviceInfo.nitro.d.ts.map +0 -1
  181. package/lib/typescript/src/specs/CactusUtil.nitro.d.ts +0 -10
  182. package/lib/typescript/src/specs/CactusUtil.nitro.d.ts.map +0 -1
  183. package/lib/typescript/src/telemetry/Telemetry.d.ts +0 -34
  184. package/lib/typescript/src/telemetry/Telemetry.d.ts.map +0 -1
  185. package/lib/typescript/src/types/CactusModel.d.ts +0 -13
  186. package/lib/typescript/src/types/CactusModel.d.ts.map +0 -1
  187. package/lib/typescript/src/types/CactusSTTModel.d.ts +0 -8
  188. package/lib/typescript/src/types/CactusSTTModel.d.ts.map +0 -1
  189. package/nitrogen/generated/android/c++/JDeviceInfo.hpp +0 -74
  190. package/nitrogen/generated/android/c++/JHybridCactusCryptoSpec.cpp +0 -65
  191. package/nitrogen/generated/android/c++/JHybridCactusCryptoSpec.hpp +0 -65
  192. package/nitrogen/generated/android/c++/JHybridCactusDeviceInfoSpec.cpp +0 -85
  193. package/nitrogen/generated/android/c++/JHybridCactusDeviceInfoSpec.hpp +0 -66
  194. package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/DeviceInfo.kt +0 -50
  195. package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/HybridCactusCryptoSpec.kt +0 -58
  196. package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/HybridCactusDeviceInfoSpec.kt +0 -62
  197. package/nitrogen/generated/ios/c++/HybridCactusCryptoSpecSwift.cpp +0 -11
  198. package/nitrogen/generated/ios/c++/HybridCactusCryptoSpecSwift.hpp +0 -77
  199. package/nitrogen/generated/ios/c++/HybridCactusDeviceInfoSpecSwift.cpp +0 -11
  200. package/nitrogen/generated/ios/c++/HybridCactusDeviceInfoSpecSwift.hpp +0 -88
  201. package/nitrogen/generated/ios/swift/DeviceInfo.swift +0 -98
  202. package/nitrogen/generated/ios/swift/Func_void_DeviceInfo.swift +0 -47
  203. package/nitrogen/generated/ios/swift/Func_void_std__optional_std__string_.swift +0 -54
  204. package/nitrogen/generated/ios/swift/HybridCactusCryptoSpec.swift +0 -57
  205. package/nitrogen/generated/ios/swift/HybridCactusCryptoSpec_cxx.swift +0 -139
  206. package/nitrogen/generated/ios/swift/HybridCactusDeviceInfoSpec.swift +0 -58
  207. package/nitrogen/generated/ios/swift/HybridCactusDeviceInfoSpec_cxx.swift +0 -164
  208. package/nitrogen/generated/shared/c++/DeviceInfo.hpp +0 -92
  209. package/nitrogen/generated/shared/c++/HybridCactusCryptoSpec.cpp +0 -21
  210. package/nitrogen/generated/shared/c++/HybridCactusCryptoSpec.hpp +0 -63
  211. package/nitrogen/generated/shared/c++/HybridCactusDeviceInfoSpec.cpp +0 -22
  212. package/nitrogen/generated/shared/c++/HybridCactusDeviceInfoSpec.hpp +0 -67
  213. package/nitrogen/generated/shared/c++/HybridCactusUtilSpec.cpp +0 -23
  214. package/nitrogen/generated/shared/c++/HybridCactusUtilSpec.hpp +0 -66
  215. package/src/api/Database.ts +0 -188
  216. package/src/api/RemoteLM.ts +0 -273
  217. package/src/config/CactusConfig.ts +0 -11
  218. package/src/native/CactusCrypto.ts +0 -11
  219. package/src/native/CactusDeviceInfo.ts +0 -18
  220. package/src/native/CactusUtil.ts +0 -43
  221. package/src/specs/CactusCrypto.nitro.ts +0 -6
  222. package/src/specs/CactusDeviceInfo.nitro.ts +0 -15
  223. package/src/specs/CactusUtil.nitro.ts +0 -8
  224. package/src/telemetry/Telemetry.ts +0 -236
  225. package/src/types/CactusModel.ts +0 -15
  226. package/src/types/CactusSTTModel.ts +0 -10
@@ -19,14 +19,53 @@
19
19
 
20
20
  #ifdef __APPLE__
21
21
  #include <uuid/uuid.h>
22
+ #include <mach/mach.h>
23
+ #elif defined(_WIN32)
24
+ #include <windows.h>
25
+ #include <psapi.h>
26
+ #elif defined(__linux__) || defined(__ANDROID__)
27
+ #include <unistd.h>
22
28
  #endif
23
29
 
30
+ inline size_t get_memory_footprint_bytes() {
31
+ #ifdef __APPLE__
32
+ task_vm_info_data_t vm_info;
33
+ mach_msg_type_number_t count = TASK_VM_INFO_COUNT;
34
+ if (task_info(mach_task_self(), TASK_VM_INFO, (task_info_t)&vm_info, &count) == KERN_SUCCESS)
35
+ return vm_info.phys_footprint;
36
+
37
+ #elif defined(_WIN32)
38
+ PROCESS_MEMORY_COUNTERS_EX pmc;
39
+ if (GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc)))
40
+ return pmc.PrivateUsage;
41
+
42
+ #elif defined(__linux__) || defined(__ANDROID__)
43
+ std::ifstream statm("/proc/self/statm");
44
+ if (statm.is_open()) {
45
+ size_t size, resident;
46
+ statm >> size >> resident;
47
+ return resident * sysconf(_SC_PAGESIZE);
48
+ }
49
+ #endif
50
+ return 0;
51
+ }
52
+
53
+ inline double get_ram_usage_mb() {
54
+ return get_memory_footprint_bytes() / (1024.0 * 1024.0);
55
+ }
56
+
24
57
  struct CactusModelHandle {
25
58
  std::unique_ptr<cactus::engine::Model> model;
59
+ std::unique_ptr<cactus::engine::Model> vad_model;
26
60
  std::atomic<bool> should_stop;
27
61
  std::vector<uint32_t> processed_tokens;
28
62
  std::mutex model_mutex;
29
63
  std::string model_name;
64
+ std::unique_ptr<cactus::engine::index::Index> corpus_index;
65
+ std::string corpus_dir;
66
+ size_t corpus_embedding_dim = 0;
67
+ std::vector<std::vector<float>> tool_embeddings;
68
+ std::vector<std::string> tool_texts;
30
69
 
31
70
  CactusModelHandle() : should_stop(false) {}
32
71
  };
@@ -36,16 +75,37 @@ extern std::string last_error_message;
36
75
  bool matches_stop_sequence(const std::vector<uint32_t>& generated_tokens,
37
76
  const std::vector<std::vector<uint32_t>>& stop_sequences);
38
77
 
78
+ std::string retrieve_rag_context(CactusModelHandle* handle, const std::string& query);
79
+
39
80
  namespace cactus {
40
- namespace ffi {
81
+ namespace audio {
82
+
83
+ static constexpr size_t WHISPER_TARGET_FRAMES = 3000;
84
+ static constexpr int WHISPER_SAMPLE_RATE = 16000;
85
+
86
+ inline cactus::engine::AudioProcessor::SpectrogramConfig get_whisper_spectrogram_config() {
87
+ cactus::engine::AudioProcessor::SpectrogramConfig cfg{};
88
+ cfg.n_fft = 400;
89
+ cfg.frame_length = 400;
90
+ cfg.hop_length = 160;
91
+ cfg.power = 2.0f;
92
+ cfg.center = true;
93
+ cfg.pad_mode = "reflect";
94
+ cfg.onesided = true;
95
+ cfg.dither = 0.0f;
96
+ cfg.mel_floor = 1e-10f;
97
+ cfg.log_mel = "log10";
98
+ cfg.reference = 1.0f;
99
+ cfg.min_value = 1e-10f;
100
+ cfg.remove_dc_offset = true;
101
+ return cfg;
102
+ }
41
103
 
42
- #ifndef CACTUS_VERSION
43
- #define CACTUS_VERSION "unknown"
44
- #endif
104
+ } // namespace audio
105
+ } // namespace cactus
45
106
 
46
- inline const char* getVersion() {
47
- return CACTUS_VERSION;
48
- }
107
+ namespace cactus {
108
+ namespace ffi {
49
109
 
50
110
  inline std::string generateUUID() {
51
111
  #ifdef __APPLE__
@@ -63,13 +123,52 @@ struct ToolFunction {
63
123
  std::unordered_map<std::string, std::string> parameters;
64
124
  };
65
125
 
66
- inline void handle_error_response(const std::string& error_message, char* response_buffer, size_t buffer_size) {
67
- std::string sanitized_msg = error_message;
68
- for (auto& c : sanitized_msg) {
69
- if (c == '"') c = '\'';
70
- if (c == '\n') c = ' ';
126
+ } // namespace ffi
127
+ } // namespace cactus
128
+
129
+ std::vector<cactus::ffi::ToolFunction> select_relevant_tools(
130
+ CactusModelHandle* handle,
131
+ const std::string& query,
132
+ const std::vector<cactus::ffi::ToolFunction>& all_tools,
133
+ size_t top_k);
134
+
135
+ #include "gemma_tools.h"
136
+
137
+ namespace cactus {
138
+ namespace ffi {
139
+
140
+ inline std::string escape_json_string(const std::string& s) {
141
+ std::ostringstream o;
142
+ for (char c : s) {
143
+ if (c == '"') o << "\\\"";
144
+ else if (c == '\n') o << "\\n";
145
+ else if (c == '\r') o << "\\r";
146
+ else if (c == '\t') o << "\\t";
147
+ else if (c == '\\') o << "\\\\";
148
+ else o << c;
71
149
  }
72
- std::string error_json = "{\"success\":false,\"error\":\"" + sanitized_msg + "\"}";
150
+ return o.str();
151
+ }
152
+
153
+ inline void handle_error_response(const std::string& error_message, char* response_buffer, size_t buffer_size) {
154
+ std::ostringstream json;
155
+ json << "{";
156
+ json << "\"success\":false,";
157
+ json << "\"error\":\"" << escape_json_string(error_message) << "\",";
158
+ json << "\"cloud_handoff\":false,";
159
+ json << "\"response\":null,";
160
+ json << "\"function_calls\":[],";
161
+ json << "\"confidence\":0.0,";
162
+ json << "\"time_to_first_token_ms\":0.0,";
163
+ json << "\"total_time_ms\":0.0,";
164
+ json << "\"prefill_tps\":0.0,";
165
+ json << "\"decode_tps\":0.0,";
166
+ json << "\"ram_usage_mb\":" << std::fixed << std::setprecision(2) << get_ram_usage_mb() << ",";
167
+ json << "\"prefill_tokens\":0,";
168
+ json << "\"decode_tokens\":0,";
169
+ json << "\"total_tokens\":0";
170
+ json << "}";
171
+ std::string error_json = json.str();
73
172
  if (response_buffer && error_json.length() < buffer_size) {
74
173
  std::strcpy(response_buffer, error_json.c_str());
75
174
  }
@@ -220,12 +319,22 @@ inline void parse_options_json(const std::string& json,
220
319
  float& temperature, float& top_p,
221
320
  size_t& top_k, size_t& max_tokens,
222
321
  std::vector<std::string>& stop_sequences,
223
- bool& force_tools) {
322
+ bool& force_tools,
323
+ size_t& tool_rag_top_k,
324
+ float& confidence_threshold,
325
+ bool& include_stop_sequences,
326
+ bool& use_vad,
327
+ bool& telemetry_enabled) {
224
328
  temperature = 0.0f;
225
329
  top_p = 0.0f;
226
330
  top_k = 0;
227
331
  max_tokens = 100;
228
332
  force_tools = false;
333
+ tool_rag_top_k = 2;
334
+ confidence_threshold = 0.7f;
335
+ include_stop_sequences = false;
336
+ use_vad = true;
337
+ telemetry_enabled = true;
229
338
  stop_sequences.clear();
230
339
 
231
340
  if (json.empty()) return;
@@ -261,6 +370,39 @@ inline void parse_options_json(const std::string& json,
261
370
  force_tools = (json.substr(pos, 4) == "true");
262
371
  }
263
372
 
373
+ pos = json.find("\"tool_rag_top_k\"");
374
+ if (pos != std::string::npos) {
375
+ pos = json.find(':', pos) + 1;
376
+ tool_rag_top_k = std::stoul(json.substr(pos));
377
+ }
378
+
379
+ pos = json.find("\"confidence_threshold\"");
380
+ if (pos != std::string::npos) {
381
+ pos = json.find(':', pos) + 1;
382
+ confidence_threshold = std::stof(json.substr(pos));
383
+ }
384
+
385
+ pos = json.find("\"include_stop_sequences\"");
386
+ if (pos != std::string::npos) {
387
+ pos = json.find(':', pos) + 1;
388
+ while (pos < json.length() && std::isspace(json[pos])) pos++;
389
+ include_stop_sequences = (json.substr(pos, 4) == "true");
390
+ }
391
+
392
+ pos = json.find("\"use_vad\"");
393
+ if (pos != std::string::npos) {
394
+ pos = json.find(':', pos) + 1;
395
+ while (pos < json.length() && std::isspace(json[pos])) pos++;
396
+ use_vad = (json.substr(pos, 4) == "true");
397
+ }
398
+
399
+ pos = json.find("\"telemetry_enabled\"");
400
+ if (pos != std::string::npos) {
401
+ pos = json.find(':', pos) + 1;
402
+ while (pos < json.length() && std::isspace(json[pos])) pos++;
403
+ telemetry_enabled = (json.substr(pos, 4) == "true");
404
+ }
405
+
264
406
  pos = json.find("\"stop_sequences\"");
265
407
  if (pos != std::string::npos) {
266
408
  pos = json.find('[', pos);
@@ -297,80 +439,167 @@ inline std::string format_tools_for_prompt(const std::vector<ToolFunction>& tool
297
439
  return formatted_tools_json;
298
440
  }
299
441
 
442
+ static inline std::string trim_lfm2_slice(const std::string& value, size_t begin, size_t end) {
443
+ while (begin < end && std::isspace(static_cast<unsigned char>(value[begin]))) {
444
+ begin++;
445
+ }
446
+ while (end > begin && std::isspace(static_cast<unsigned char>(value[end - 1]))) {
447
+ end--;
448
+ }
449
+ return value.substr(begin, end - begin);
450
+ }
451
+
452
+ static inline void append_lfm2_call(const std::string& entry,
453
+ std::vector<std::string>& function_calls) {
454
+ if (entry.empty()) return;
455
+
456
+ std::string trimmed_entry = trim_lfm2_slice(entry, 0, entry.size());
457
+ if (trimmed_entry.empty()) return;
458
+
459
+ size_t paren_pos = trimmed_entry.find('(');
460
+ if (paren_pos == std::string::npos) return;
461
+
462
+ std::string func_name = trim_lfm2_slice(trimmed_entry, 0, paren_pos);
463
+ std::string args_str = trim_lfm2_slice(trimmed_entry, paren_pos + 1, trimmed_entry.size());
464
+
465
+ if (!args_str.empty() && args_str.back() == ')') {
466
+ args_str.pop_back();
467
+ args_str = trim_lfm2_slice(args_str, 0, args_str.size());
468
+ }
469
+
470
+ std::string json_call = "{\"name\":\"" + func_name + "\",\"arguments\":{";
471
+
472
+ size_t arg_pos = 0;
473
+ bool first_arg = true;
474
+ while (arg_pos < args_str.length()) {
475
+ while (arg_pos < args_str.length() && std::isspace(static_cast<unsigned char>(args_str[arg_pos]))) {
476
+ arg_pos++;
477
+ }
478
+
479
+ size_t eq_pos = args_str.find('=', arg_pos);
480
+ if (eq_pos == std::string::npos) break;
481
+
482
+ std::string arg_name = args_str.substr(arg_pos, eq_pos - arg_pos);
483
+
484
+ size_t val_start = eq_pos + 1;
485
+ size_t val_end = val_start;
486
+
487
+ if (val_start < args_str.length() && args_str[val_start] == '"') {
488
+ val_start++;
489
+ val_end = args_str.find('"', val_start);
490
+ if (val_end == std::string::npos) break;
491
+ } else {
492
+ val_end = args_str.find(',', val_start);
493
+ if (val_end == std::string::npos) val_end = args_str.length();
494
+ }
495
+
496
+ std::string arg_value = args_str.substr(val_start, val_end - val_start);
497
+
498
+ if (!first_arg) json_call += ",";
499
+ json_call += "\"" + arg_name + "\":\"" + arg_value + "\"";
500
+ first_arg = false;
501
+
502
+ arg_pos = args_str.find(',', val_end);
503
+ if (arg_pos != std::string::npos) {
504
+ arg_pos++;
505
+ } else {
506
+ break;
507
+ }
508
+ }
509
+
510
+ json_call += "}}";
511
+ function_calls.push_back(json_call);
512
+ }
513
+
300
514
  inline void parse_function_calls_from_response(const std::string& response_text,
301
515
  std::string& regular_response,
302
516
  std::vector<std::string>& function_calls) {
303
517
  regular_response = response_text;
304
518
  function_calls.clear();
305
519
 
306
- const std::string TOOL_CALL_START = "<|tool_call_start|>";
307
- const std::string TOOL_CALL_END = "<|tool_call_end|>";
308
- size_t tool_start_pos = 0;
520
+ gemma::parse_function_calls(regular_response, function_calls);
309
521
 
310
- while ((tool_start_pos = response_text.find(TOOL_CALL_START, tool_start_pos)) != std::string::npos) {
311
- size_t content_start = tool_start_pos + TOOL_CALL_START.length();
312
- size_t tool_end_pos = response_text.find(TOOL_CALL_END, content_start);
522
+ // Parse Qwen-style function calls: <tool_call>{"name": "...", "arguments": {...}}</tool_call>
523
+ const std::string QWEN_TOOL_START = "<tool_call>";
524
+ const std::string QWEN_TOOL_END = "</tool_call>";
525
+ size_t qwen_start_pos = 0;
313
526
 
314
- if (tool_end_pos != std::string::npos) {
315
- std::string tool_content = response_text.substr(content_start, tool_end_pos - content_start);
527
+ while ((qwen_start_pos = regular_response.find(QWEN_TOOL_START, qwen_start_pos)) != std::string::npos) {
528
+ size_t content_start = qwen_start_pos + QWEN_TOOL_START.length();
529
+ size_t qwen_end_pos = regular_response.find(QWEN_TOOL_END, content_start);
316
530
 
317
- if (tool_content.size() > 2 && tool_content[0] == '[' && tool_content[tool_content.size()-1] == ']') {
318
- tool_content = tool_content.substr(1, tool_content.size() - 2);
531
+ if (qwen_end_pos != std::string::npos) {
532
+ std::string json_content = regular_response.substr(content_start, qwen_end_pos - content_start);
319
533
 
320
- size_t paren_pos = tool_content.find('(');
321
- if (paren_pos != std::string::npos) {
322
- std::string func_name = tool_content.substr(0, paren_pos);
323
- std::string args_str = tool_content.substr(paren_pos + 1);
324
-
325
- if (!args_str.empty() && args_str.back() == ')') {
326
- args_str.pop_back();
327
- }
328
-
329
- std::string json_call = "{\"name\":\"" + func_name + "\",\"arguments\":{";
330
-
331
- size_t arg_pos = 0;
332
- bool first_arg = true;
333
- while (arg_pos < args_str.length()) {
334
- while (arg_pos < args_str.length() && std::isspace(args_str[arg_pos])) arg_pos++;
335
-
336
- size_t eq_pos = args_str.find('=', arg_pos);
337
- if (eq_pos == std::string::npos) break;
534
+ size_t first = json_content.find_first_not_of(" \t\n\r");
535
+ size_t last = json_content.find_last_not_of(" \t\n\r");
536
+ if (first != std::string::npos && last != std::string::npos) {
537
+ json_content = json_content.substr(first, last - first + 1);
538
+ }
338
539
 
339
- std::string arg_name = args_str.substr(arg_pos, eq_pos - arg_pos);
540
+ if (json_content.size() > 2 && json_content[0] == '{' &&
541
+ json_content.find("\"name\"") != std::string::npos) {
542
+ function_calls.push_back(json_content);
543
+ }
340
544
 
341
- size_t val_start = eq_pos + 1;
342
- size_t val_end = val_start;
545
+ regular_response.erase(qwen_start_pos, qwen_end_pos + QWEN_TOOL_END.length() - qwen_start_pos);
546
+ } else {
547
+ break;
548
+ }
549
+ }
550
+
551
+ // Parse LFM2-style function calls: <|tool_call_start|>[name(args)]<|tool_call_end|>
552
+ const std::string TOOL_CALL_START = "<|tool_call_start|>";
553
+ const std::string TOOL_CALL_END = "<|tool_call_end|>";
554
+ size_t tool_start_pos = 0;
343
555
 
344
- if (val_start < args_str.length() && args_str[val_start] == '"') {
345
- val_start++;
346
- val_end = args_str.find('"', val_start);
347
- if (val_end == std::string::npos) break;
348
- } else {
349
- val_end = args_str.find(',', val_start);
350
- if (val_end == std::string::npos) val_end = args_str.length();
351
- }
556
+ while ((tool_start_pos = regular_response.find(TOOL_CALL_START, tool_start_pos)) != std::string::npos) {
557
+ size_t content_start = tool_start_pos + TOOL_CALL_START.length();
558
+ size_t tool_end_pos = regular_response.find(TOOL_CALL_END, content_start);
352
559
 
353
- std::string arg_value = args_str.substr(val_start, val_end - val_start);
560
+ if (tool_end_pos != std::string::npos) {
561
+ std::string tool_content = regular_response.substr(content_start, tool_end_pos - content_start);
562
+ std::string content = tool_content;
563
+ size_t trim_start = 0;
564
+ while (trim_start < content.size() && std::isspace(static_cast<unsigned char>(content[trim_start]))) {
565
+ trim_start++;
566
+ }
354
567
 
355
- if (!first_arg) json_call += ",";
356
- json_call += "\"" + arg_name + "\":\"" + arg_value + "\"";
357
- first_arg = false;
568
+ if (trim_start < content.size()) {
569
+ size_t trim_end = content.size() - 1;
570
+ while (trim_end > trim_start && std::isspace(static_cast<unsigned char>(content[trim_end]))) {
571
+ trim_end--;
572
+ }
573
+ content = content.substr(trim_start, trim_end - trim_start + 1);
574
+ } else {
575
+ content.clear();
576
+ }
358
577
 
359
- arg_pos = args_str.find(',', val_end);
360
- if (arg_pos != std::string::npos) {
361
- arg_pos++;
362
- } else {
363
- break;
364
- }
578
+ if (!content.empty() && content.front() == '[' && content.back() == ']') {
579
+ std::string inner = content.substr(1, content.size() - 2);
580
+ size_t start = 0;
581
+ int paren_depth = 0;
582
+
583
+ for (size_t i = 0; i < inner.size(); ++i) {
584
+ char c = inner[i];
585
+ if (c == '(') {
586
+ paren_depth++;
587
+ } else if (c == ')' && paren_depth > 0) {
588
+ paren_depth--;
589
+ } else if (c == ',' && paren_depth == 0) {
590
+ append_lfm2_call(inner.substr(start, i - start), function_calls);
591
+ start = i + 1;
365
592
  }
593
+ }
366
594
 
367
- json_call += "}}";
368
- function_calls.push_back(json_call);
595
+ if (start < inner.size()) {
596
+ append_lfm2_call(inner.substr(start), function_calls);
369
597
  }
598
+ } else if (!content.empty()) {
599
+ append_lfm2_call(content, function_calls);
370
600
  }
371
601
 
372
602
  regular_response.erase(tool_start_pos, tool_end_pos + TOOL_CALL_END.length() - tool_start_pos);
373
- tool_start_pos = tool_end_pos + TOOL_CALL_END.length();
374
603
  } else {
375
604
  break;
376
605
  }
@@ -411,38 +640,71 @@ inline std::string construct_response_json(const std::string& regular_response,
411
640
  const std::vector<std::string>& function_calls,
412
641
  double time_to_first_token,
413
642
  double total_time_ms,
414
- double tokens_per_second,
643
+ double prefill_tps,
644
+ double decode_tps,
415
645
  size_t prompt_tokens,
416
- size_t completion_tokens) {
417
- std::ostringstream json_response;
418
- json_response << "{";
419
- json_response << "\"success\":true,";
420
- json_response << "\"response\":\"";
421
- for (char c : regular_response) {
422
- if (c == '"') json_response << "\\\"";
423
- else if (c == '\n') json_response << "\\n";
424
- else if (c == '\r') json_response << "\\r";
425
- else if (c == '\t') json_response << "\\t";
426
- else if (c == '\\') json_response << "\\\\";
427
- else json_response << c;
646
+ size_t completion_tokens,
647
+ float confidence = 0.0f,
648
+ bool cloud_handoff = false) {
649
+ std::ostringstream json;
650
+ json << "{";
651
+ json << "\"success\":" << (cloud_handoff ? "false" : "true") << ",";
652
+ json << "\"error\":null,";
653
+ json << "\"cloud_handoff\":" << (cloud_handoff ? "true" : "false") << ",";
654
+ json << "\"response\":\"" << escape_json_string(regular_response) << "\",";
655
+ json << "\"function_calls\":[";
656
+ for (size_t i = 0; i < function_calls.size(); ++i) {
657
+ if (i > 0) json << ",";
658
+ json << function_calls[i];
428
659
  }
429
- json_response << "\",";
430
- if (!function_calls.empty()) {
431
- json_response << "\"function_calls\":[";
432
- for (size_t i = 0; i < function_calls.size(); ++i) {
433
- if (i > 0) json_response << ",";
434
- json_response << function_calls[i];
435
- }
436
- json_response << "],";
660
+ json << "],";
661
+ json << "\"confidence\":" << std::fixed << std::setprecision(4) << confidence << ",";
662
+ json << "\"time_to_first_token_ms\":" << std::fixed << std::setprecision(2) << time_to_first_token << ",";
663
+ json << "\"total_time_ms\":" << std::fixed << std::setprecision(2) << total_time_ms << ",";
664
+ json << "\"prefill_tps\":" << std::fixed << std::setprecision(2) << prefill_tps << ",";
665
+ json << "\"decode_tps\":" << std::fixed << std::setprecision(2) << decode_tps << ",";
666
+ json << "\"ram_usage_mb\":" << std::fixed << std::setprecision(2) << get_ram_usage_mb() << ",";
667
+ json << "\"prefill_tokens\":" << prompt_tokens << ",";
668
+ json << "\"decode_tokens\":" << completion_tokens << ",";
669
+ json << "\"total_tokens\":" << (prompt_tokens + completion_tokens);
670
+ json << "}";
671
+ return json.str();
672
+ }
673
+
674
+ inline std::string construct_cloud_handoff_json(float confidence,
675
+ double time_to_first_token,
676
+ double prefill_tps,
677
+ size_t prompt_tokens) {
678
+ std::ostringstream json;
679
+ json << "{";
680
+ json << "\"success\":false,";
681
+ json << "\"error\":null,";
682
+ json << "\"cloud_handoff\":true,";
683
+ json << "\"response\":null,";
684
+ json << "\"function_calls\":[],";
685
+ json << "\"confidence\":" << std::fixed << std::setprecision(4) << confidence << ",";
686
+ json << "\"time_to_first_token_ms\":" << std::fixed << std::setprecision(2) << time_to_first_token << ",";
687
+ json << "\"total_time_ms\":" << std::fixed << std::setprecision(2) << time_to_first_token << ",";
688
+ json << "\"prefill_tps\":" << std::fixed << std::setprecision(2) << prefill_tps << ",";
689
+ json << "\"decode_tps\":0.0,";
690
+ json << "\"ram_usage_mb\":" << std::fixed << std::setprecision(2) << get_ram_usage_mb() << ",";
691
+ json << "\"prefill_tokens\":" << prompt_tokens << ",";
692
+ json << "\"decode_tokens\":0,";
693
+ json << "\"total_tokens\":" << prompt_tokens;
694
+ json << "}";
695
+ return json.str();
696
+ }
697
+
698
+ inline std::string serialize_function_calls(const std::vector<std::string>& calls) {
699
+ if (calls.empty()) return "[]";
700
+ std::ostringstream oss;
701
+ oss << "[";
702
+ for (size_t i = 0; i < calls.size(); ++i) {
703
+ if (i > 0) oss << ",";
704
+ oss << calls[i];
437
705
  }
438
- json_response << "\"time_to_first_token_ms\":" << std::fixed << std::setprecision(2) << time_to_first_token << ",";
439
- json_response << "\"total_time_ms\":" << std::fixed << std::setprecision(2) << total_time_ms << ",";
440
- json_response << "\"tokens_per_second\":" << std::fixed << std::setprecision(2) << tokens_per_second << ",";
441
- json_response << "\"prefill_tokens\":" << prompt_tokens << ",";
442
- json_response << "\"decode_tokens\":" << completion_tokens << ",";
443
- json_response << "\"total_tokens\":" << (prompt_tokens + completion_tokens);
444
- json_response << "}";
445
- return json_response.str();
706
+ oss << "]";
707
+ return oss.str();
446
708
  }
447
709
 
448
710
  } // namespace ffi
@@ -454,35 +716,8 @@ extern "C" {
454
716
 
455
717
  const char* cactus_get_last_error();
456
718
 
457
- __attribute__((weak))
458
- const char* register_app(const char* encrypted_data);
459
-
460
- __attribute__((weak))
461
- const char* get_device_id(const char* current_token);
462
-
463
719
  #ifdef __cplusplus
464
720
  }
465
721
  #endif
466
722
 
467
- #ifdef __cplusplus
468
- extern "C" {
469
-
470
- __attribute__((weak))
471
- inline const char* register_app(const char* encrypted_data) {
472
- (void)encrypted_data;
473
- static thread_local std::string uuid_storage;
474
- uuid_storage = cactus::ffi::generateUUID();
475
- return uuid_storage.c_str();
476
- }
477
-
478
- __attribute__((weak))
479
- inline const char* get_device_id(const char* current_token) {
480
- (void)current_token;
481
- static thread_local std::string uuid_storage;
482
- uuid_storage = cactus::ffi::generateUUID();
483
- return uuid_storage.c_str();
484
- }
485
- }
486
- #endif
487
-
488
723
  #endif // CACTUS_UTILS_H