cactus-react-native 1.5.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (216) hide show
  1. package/Cactus.podspec +1 -1
  2. package/README.md +347 -241
  3. package/android/CMakeLists.txt +24 -5
  4. package/android/src/main/jniLibs/arm64-v8a/libcactus.a +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/libcurl.a +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/libmbedcrypto.a +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/libmbedtls.a +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/libmbedx509.a +0 -0
  9. package/cpp/HybridCactus.cpp +149 -117
  10. package/cpp/HybridCactus.hpp +14 -10
  11. package/cpp/cactus_ffi.h +54 -43
  12. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus.h +0 -1
  13. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_ffi.h +54 -43
  14. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_utils.h +318 -123
  15. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/engine.h +118 -15
  16. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/graph.h +77 -32
  17. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel.h +68 -6
  18. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel_utils.h +21 -155
  19. package/ios/cactus.xcframework/ios-arm64/cactus.framework/cactus +0 -0
  20. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus.h +0 -1
  21. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_ffi.h +54 -43
  22. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_utils.h +318 -123
  23. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/engine.h +118 -15
  24. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/graph.h +77 -32
  25. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/kernel.h +68 -6
  26. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/kernel_utils.h +21 -155
  27. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/cactus +0 -0
  28. package/lib/module/classes/CactusLM.js +16 -49
  29. package/lib/module/classes/CactusLM.js.map +1 -1
  30. package/lib/module/classes/CactusSTT.js +30 -79
  31. package/lib/module/classes/CactusSTT.js.map +1 -1
  32. package/lib/module/classes/CactusVAD.js +95 -0
  33. package/lib/module/classes/CactusVAD.js.map +1 -0
  34. package/lib/module/hooks/useCactusLM.js +10 -11
  35. package/lib/module/hooks/useCactusLM.js.map +1 -1
  36. package/lib/module/hooks/useCactusSTT.js +23 -62
  37. package/lib/module/hooks/useCactusSTT.js.map +1 -1
  38. package/lib/module/hooks/useCactusVAD.js +171 -0
  39. package/lib/module/hooks/useCactusVAD.js.map +1 -0
  40. package/lib/module/index.js +2 -3
  41. package/lib/module/index.js.map +1 -1
  42. package/lib/module/modelRegistry.js +52 -0
  43. package/lib/module/modelRegistry.js.map +1 -0
  44. package/lib/module/native/Cactus.js +85 -23
  45. package/lib/module/native/Cactus.js.map +1 -1
  46. package/lib/module/native/CactusIndex.js.map +1 -1
  47. package/lib/module/native/index.js +0 -3
  48. package/lib/module/native/index.js.map +1 -1
  49. package/lib/module/types/CactusVAD.js +4 -0
  50. package/lib/module/{specs/CactusUtil.nitro.js.map → types/CactusVAD.js.map} +1 -1
  51. package/lib/typescript/src/classes/CactusLM.d.ts +5 -7
  52. package/lib/typescript/src/classes/CactusLM.d.ts.map +1 -1
  53. package/lib/typescript/src/classes/CactusSTT.d.ts +8 -12
  54. package/lib/typescript/src/classes/CactusSTT.d.ts.map +1 -1
  55. package/lib/typescript/src/classes/CactusVAD.d.ts +20 -0
  56. package/lib/typescript/src/classes/CactusVAD.d.ts.map +1 -0
  57. package/lib/typescript/src/hooks/useCactusLM.d.ts +2 -2
  58. package/lib/typescript/src/hooks/useCactusLM.d.ts.map +1 -1
  59. package/lib/typescript/src/hooks/useCactusSTT.d.ts +6 -8
  60. package/lib/typescript/src/hooks/useCactusSTT.d.ts.map +1 -1
  61. package/lib/typescript/src/hooks/useCactusVAD.d.ts +15 -0
  62. package/lib/typescript/src/hooks/useCactusVAD.d.ts.map +1 -0
  63. package/lib/typescript/src/index.d.ts +7 -5
  64. package/lib/typescript/src/index.d.ts.map +1 -1
  65. package/lib/typescript/src/modelRegistry.d.ts +5 -0
  66. package/lib/typescript/src/modelRegistry.d.ts.map +1 -0
  67. package/lib/typescript/src/native/Cactus.d.ts +12 -11
  68. package/lib/typescript/src/native/Cactus.d.ts.map +1 -1
  69. package/lib/typescript/src/native/CactusIndex.d.ts +2 -2
  70. package/lib/typescript/src/native/CactusIndex.d.ts.map +1 -1
  71. package/lib/typescript/src/native/index.d.ts +0 -3
  72. package/lib/typescript/src/native/index.d.ts.map +1 -1
  73. package/lib/typescript/src/specs/Cactus.nitro.d.ts +6 -6
  74. package/lib/typescript/src/specs/Cactus.nitro.d.ts.map +1 -1
  75. package/lib/typescript/src/types/CactusIndex.d.ts +2 -2
  76. package/lib/typescript/src/types/CactusIndex.d.ts.map +1 -1
  77. package/lib/typescript/src/types/CactusLM.d.ts +19 -11
  78. package/lib/typescript/src/types/CactusLM.d.ts.map +1 -1
  79. package/lib/typescript/src/types/CactusSTT.d.ts +33 -12
  80. package/lib/typescript/src/types/CactusSTT.d.ts.map +1 -1
  81. package/lib/typescript/src/types/CactusVAD.d.ts +34 -0
  82. package/lib/typescript/src/types/CactusVAD.d.ts.map +1 -0
  83. package/lib/typescript/src/types/common.d.ts +1 -6
  84. package/lib/typescript/src/types/common.d.ts.map +1 -1
  85. package/nitro.json +0 -11
  86. package/nitrogen/generated/android/cactus+autolinking.cmake +0 -5
  87. package/nitrogen/generated/android/cactusOnLoad.cpp +0 -30
  88. package/nitrogen/generated/ios/Cactus-Swift-Cxx-Bridge.cpp +0 -50
  89. package/nitrogen/generated/ios/Cactus-Swift-Cxx-Bridge.hpp +9 -147
  90. package/nitrogen/generated/ios/Cactus-Swift-Cxx-Umbrella.hpp +0 -13
  91. package/nitrogen/generated/ios/CactusAutolinking.mm +0 -26
  92. package/nitrogen/generated/ios/CactusAutolinking.swift +0 -30
  93. package/nitrogen/generated/shared/c++/HybridCactusSpec.cpp +4 -4
  94. package/nitrogen/generated/shared/c++/HybridCactusSpec.hpp +6 -6
  95. package/package.json +3 -3
  96. package/src/classes/CactusLM.ts +18 -65
  97. package/src/classes/CactusSTT.ts +39 -97
  98. package/src/classes/CactusVAD.ts +129 -0
  99. package/src/hooks/useCactusLM.ts +14 -17
  100. package/src/hooks/useCactusSTT.ts +47 -98
  101. package/src/hooks/useCactusVAD.ts +215 -0
  102. package/src/index.tsx +18 -12
  103. package/src/modelRegistry.ts +65 -0
  104. package/src/native/Cactus.ts +102 -41
  105. package/src/native/CactusIndex.ts +2 -2
  106. package/src/native/index.ts +0 -3
  107. package/src/specs/Cactus.nitro.ts +11 -7
  108. package/src/types/CactusIndex.ts +2 -2
  109. package/src/types/CactusLM.ts +19 -11
  110. package/src/types/CactusSTT.ts +33 -13
  111. package/src/types/CactusVAD.ts +39 -0
  112. package/src/types/common.ts +1 -6
  113. package/android/src/main/java/com/margelo/nitro/cactus/HybridCactusCrypto.kt +0 -46
  114. package/android/src/main/java/com/margelo/nitro/cactus/HybridCactusDeviceInfo.kt +0 -27
  115. package/android/src/main/jniLibs/arm64-v8a/libcactus_util.a +0 -0
  116. package/cpp/HybridCactusUtil.cpp +0 -47
  117. package/cpp/HybridCactusUtil.hpp +0 -27
  118. package/cpp/cactus_util.h +0 -25
  119. package/ios/HybridCactusCrypto.swift +0 -37
  120. package/ios/HybridCactusDeviceInfo.swift +0 -32
  121. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_telemetry.h +0 -656
  122. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_telemetry.h +0 -656
  123. package/ios/cactus_util.xcframework/Info.plist +0 -39
  124. package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Headers/cactus_util.h +0 -25
  125. package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Headers/database.h +0 -27
  126. package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Headers/ios_utils.h +0 -10
  127. package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Headers/logging.h +0 -25
  128. package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Info.plist +0 -0
  129. package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/cactus_util +0 -0
  130. package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Headers/cactus_util.h +0 -25
  131. package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Headers/database.h +0 -27
  132. package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Headers/ios_utils.h +0 -10
  133. package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Headers/logging.h +0 -25
  134. package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Info.plist +0 -0
  135. package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/_CodeSignature/CodeResources +0 -135
  136. package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/cactus_util +0 -0
  137. package/lib/module/api/Database.js +0 -45
  138. package/lib/module/api/Database.js.map +0 -1
  139. package/lib/module/api/RemoteLM.js +0 -201
  140. package/lib/module/api/RemoteLM.js.map +0 -1
  141. package/lib/module/config/CactusConfig.js +0 -12
  142. package/lib/module/config/CactusConfig.js.map +0 -1
  143. package/lib/module/models.js +0 -336
  144. package/lib/module/models.js.map +0 -1
  145. package/lib/module/native/CactusCrypto.js +0 -10
  146. package/lib/module/native/CactusCrypto.js.map +0 -1
  147. package/lib/module/native/CactusDeviceInfo.js +0 -13
  148. package/lib/module/native/CactusDeviceInfo.js.map +0 -1
  149. package/lib/module/native/CactusUtil.js +0 -36
  150. package/lib/module/native/CactusUtil.js.map +0 -1
  151. package/lib/module/specs/CactusCrypto.nitro.js +0 -4
  152. package/lib/module/specs/CactusCrypto.nitro.js.map +0 -1
  153. package/lib/module/specs/CactusDeviceInfo.nitro.js +0 -4
  154. package/lib/module/specs/CactusDeviceInfo.nitro.js.map +0 -1
  155. package/lib/module/specs/CactusUtil.nitro.js +0 -4
  156. package/lib/module/telemetry/Telemetry.js +0 -154
  157. package/lib/module/telemetry/Telemetry.js.map +0 -1
  158. package/lib/typescript/src/api/Database.d.ts +0 -12
  159. package/lib/typescript/src/api/Database.d.ts.map +0 -1
  160. package/lib/typescript/src/api/RemoteLM.d.ts +0 -14
  161. package/lib/typescript/src/api/RemoteLM.d.ts.map +0 -1
  162. package/lib/typescript/src/config/CactusConfig.d.ts +0 -7
  163. package/lib/typescript/src/config/CactusConfig.d.ts.map +0 -1
  164. package/lib/typescript/src/models.d.ts +0 -6
  165. package/lib/typescript/src/models.d.ts.map +0 -1
  166. package/lib/typescript/src/native/CactusCrypto.d.ts +0 -5
  167. package/lib/typescript/src/native/CactusCrypto.d.ts.map +0 -1
  168. package/lib/typescript/src/native/CactusDeviceInfo.d.ts +0 -7
  169. package/lib/typescript/src/native/CactusDeviceInfo.d.ts.map +0 -1
  170. package/lib/typescript/src/native/CactusUtil.d.ts +0 -6
  171. package/lib/typescript/src/native/CactusUtil.d.ts.map +0 -1
  172. package/lib/typescript/src/specs/CactusCrypto.nitro.d.ts +0 -8
  173. package/lib/typescript/src/specs/CactusCrypto.nitro.d.ts.map +0 -1
  174. package/lib/typescript/src/specs/CactusDeviceInfo.nitro.d.ts +0 -16
  175. package/lib/typescript/src/specs/CactusDeviceInfo.nitro.d.ts.map +0 -1
  176. package/lib/typescript/src/specs/CactusUtil.nitro.d.ts +0 -10
  177. package/lib/typescript/src/specs/CactusUtil.nitro.d.ts.map +0 -1
  178. package/lib/typescript/src/telemetry/Telemetry.d.ts +0 -34
  179. package/lib/typescript/src/telemetry/Telemetry.d.ts.map +0 -1
  180. package/nitrogen/generated/android/c++/JDeviceInfo.hpp +0 -74
  181. package/nitrogen/generated/android/c++/JHybridCactusCryptoSpec.cpp +0 -65
  182. package/nitrogen/generated/android/c++/JHybridCactusCryptoSpec.hpp +0 -65
  183. package/nitrogen/generated/android/c++/JHybridCactusDeviceInfoSpec.cpp +0 -85
  184. package/nitrogen/generated/android/c++/JHybridCactusDeviceInfoSpec.hpp +0 -66
  185. package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/DeviceInfo.kt +0 -50
  186. package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/HybridCactusCryptoSpec.kt +0 -58
  187. package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/HybridCactusDeviceInfoSpec.kt +0 -62
  188. package/nitrogen/generated/ios/c++/HybridCactusCryptoSpecSwift.cpp +0 -11
  189. package/nitrogen/generated/ios/c++/HybridCactusCryptoSpecSwift.hpp +0 -77
  190. package/nitrogen/generated/ios/c++/HybridCactusDeviceInfoSpecSwift.cpp +0 -11
  191. package/nitrogen/generated/ios/c++/HybridCactusDeviceInfoSpecSwift.hpp +0 -88
  192. package/nitrogen/generated/ios/swift/DeviceInfo.swift +0 -98
  193. package/nitrogen/generated/ios/swift/Func_void_DeviceInfo.swift +0 -47
  194. package/nitrogen/generated/ios/swift/Func_void_std__optional_std__string_.swift +0 -54
  195. package/nitrogen/generated/ios/swift/HybridCactusCryptoSpec.swift +0 -57
  196. package/nitrogen/generated/ios/swift/HybridCactusCryptoSpec_cxx.swift +0 -139
  197. package/nitrogen/generated/ios/swift/HybridCactusDeviceInfoSpec.swift +0 -58
  198. package/nitrogen/generated/ios/swift/HybridCactusDeviceInfoSpec_cxx.swift +0 -164
  199. package/nitrogen/generated/shared/c++/DeviceInfo.hpp +0 -92
  200. package/nitrogen/generated/shared/c++/HybridCactusCryptoSpec.cpp +0 -21
  201. package/nitrogen/generated/shared/c++/HybridCactusCryptoSpec.hpp +0 -63
  202. package/nitrogen/generated/shared/c++/HybridCactusDeviceInfoSpec.cpp +0 -22
  203. package/nitrogen/generated/shared/c++/HybridCactusDeviceInfoSpec.hpp +0 -67
  204. package/nitrogen/generated/shared/c++/HybridCactusUtilSpec.cpp +0 -23
  205. package/nitrogen/generated/shared/c++/HybridCactusUtilSpec.hpp +0 -66
  206. package/src/api/Database.ts +0 -55
  207. package/src/api/RemoteLM.ts +0 -273
  208. package/src/config/CactusConfig.ts +0 -11
  209. package/src/models.ts +0 -344
  210. package/src/native/CactusCrypto.ts +0 -11
  211. package/src/native/CactusDeviceInfo.ts +0 -18
  212. package/src/native/CactusUtil.ts +0 -43
  213. package/src/specs/CactusCrypto.nitro.ts +0 -6
  214. package/src/specs/CactusDeviceInfo.nitro.ts +0 -15
  215. package/src/specs/CactusUtil.nitro.ts +0 -8
  216. package/src/telemetry/Telemetry.ts +0 -236
@@ -19,14 +19,53 @@
19
19
 
20
20
  #ifdef __APPLE__
21
21
  #include <uuid/uuid.h>
22
+ #include <mach/mach.h>
23
+ #elif defined(_WIN32)
24
+ #include <windows.h>
25
+ #include <psapi.h>
26
+ #elif defined(__linux__) || defined(__ANDROID__)
27
+ #include <unistd.h>
22
28
  #endif
23
29
 
30
+ inline size_t get_memory_footprint_bytes() {
31
+ #ifdef __APPLE__
32
+ task_vm_info_data_t vm_info;
33
+ mach_msg_type_number_t count = TASK_VM_INFO_COUNT;
34
+ if (task_info(mach_task_self(), TASK_VM_INFO, (task_info_t)&vm_info, &count) == KERN_SUCCESS)
35
+ return vm_info.phys_footprint;
36
+
37
+ #elif defined(_WIN32)
38
+ PROCESS_MEMORY_COUNTERS_EX pmc;
39
+ if (GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc)))
40
+ return pmc.PrivateUsage;
41
+
42
+ #elif defined(__linux__) || defined(__ANDROID__)
43
+ std::ifstream statm("/proc/self/statm");
44
+ if (statm.is_open()) {
45
+ size_t size, resident;
46
+ statm >> size >> resident;
47
+ return resident * sysconf(_SC_PAGESIZE);
48
+ }
49
+ #endif
50
+ return 0;
51
+ }
52
+
53
+ inline double get_ram_usage_mb() {
54
+ return get_memory_footprint_bytes() / (1024.0 * 1024.0);
55
+ }
56
+
24
57
  struct CactusModelHandle {
25
58
  std::unique_ptr<cactus::engine::Model> model;
59
+ std::unique_ptr<cactus::engine::Model> vad_model;
26
60
  std::atomic<bool> should_stop;
27
61
  std::vector<uint32_t> processed_tokens;
28
62
  std::mutex model_mutex;
29
63
  std::string model_name;
64
+ std::unique_ptr<cactus::engine::index::Index> corpus_index;
65
+ std::string corpus_dir;
66
+ size_t corpus_embedding_dim = 0;
67
+ std::vector<std::vector<float>> tool_embeddings;
68
+ std::vector<std::string> tool_texts;
30
69
 
31
70
  CactusModelHandle() : should_stop(false) {}
32
71
  };
@@ -36,16 +75,37 @@ extern std::string last_error_message;
36
75
  bool matches_stop_sequence(const std::vector<uint32_t>& generated_tokens,
37
76
  const std::vector<std::vector<uint32_t>>& stop_sequences);
38
77
 
78
+ std::string retrieve_rag_context(CactusModelHandle* handle, const std::string& query);
79
+
39
80
  namespace cactus {
40
- namespace ffi {
81
+ namespace audio {
82
+
83
+ static constexpr size_t WHISPER_TARGET_FRAMES = 3000;
84
+ static constexpr int WHISPER_SAMPLE_RATE = 16000;
85
+
86
+ inline cactus::engine::AudioProcessor::SpectrogramConfig get_whisper_spectrogram_config() {
87
+ cactus::engine::AudioProcessor::SpectrogramConfig cfg{};
88
+ cfg.n_fft = 400;
89
+ cfg.frame_length = 400;
90
+ cfg.hop_length = 160;
91
+ cfg.power = 2.0f;
92
+ cfg.center = true;
93
+ cfg.pad_mode = "reflect";
94
+ cfg.onesided = true;
95
+ cfg.dither = 0.0f;
96
+ cfg.mel_floor = 1e-10f;
97
+ cfg.log_mel = "log10";
98
+ cfg.reference = 1.0f;
99
+ cfg.min_value = 1e-10f;
100
+ cfg.remove_dc_offset = true;
101
+ return cfg;
102
+ }
41
103
 
42
- #ifndef CACTUS_VERSION
43
- #define CACTUS_VERSION "unknown"
44
- #endif
104
+ } // namespace audio
105
+ } // namespace cactus
45
106
 
46
- inline const char* getVersion() {
47
- return CACTUS_VERSION;
48
- }
107
+ namespace cactus {
108
+ namespace ffi {
49
109
 
50
110
  inline std::string generateUUID() {
51
111
  #ifdef __APPLE__
@@ -66,18 +126,49 @@ struct ToolFunction {
66
126
  } // namespace ffi
67
127
  } // namespace cactus
68
128
 
129
+ std::vector<cactus::ffi::ToolFunction> select_relevant_tools(
130
+ CactusModelHandle* handle,
131
+ const std::string& query,
132
+ const std::vector<cactus::ffi::ToolFunction>& all_tools,
133
+ size_t top_k);
134
+
69
135
  #include "gemma_tools.h"
70
136
 
71
137
  namespace cactus {
72
138
  namespace ffi {
73
139
 
74
- inline void handle_error_response(const std::string& error_message, char* response_buffer, size_t buffer_size) {
75
- std::string sanitized_msg = error_message;
76
- for (auto& c : sanitized_msg) {
77
- if (c == '"') c = '\'';
78
- if (c == '\n') c = ' ';
140
+ inline std::string escape_json_string(const std::string& s) {
141
+ std::ostringstream o;
142
+ for (char c : s) {
143
+ if (c == '"') o << "\\\"";
144
+ else if (c == '\n') o << "\\n";
145
+ else if (c == '\r') o << "\\r";
146
+ else if (c == '\t') o << "\\t";
147
+ else if (c == '\\') o << "\\\\";
148
+ else o << c;
79
149
  }
80
- std::string error_json = "{\"success\":false,\"error\":\"" + sanitized_msg + "\"}";
150
+ return o.str();
151
+ }
152
+
153
+ inline void handle_error_response(const std::string& error_message, char* response_buffer, size_t buffer_size) {
154
+ std::ostringstream json;
155
+ json << "{";
156
+ json << "\"success\":false,";
157
+ json << "\"error\":\"" << escape_json_string(error_message) << "\",";
158
+ json << "\"cloud_handoff\":false,";
159
+ json << "\"response\":null,";
160
+ json << "\"function_calls\":[],";
161
+ json << "\"confidence\":0.0,";
162
+ json << "\"time_to_first_token_ms\":0.0,";
163
+ json << "\"total_time_ms\":0.0,";
164
+ json << "\"prefill_tps\":0.0,";
165
+ json << "\"decode_tps\":0.0,";
166
+ json << "\"ram_usage_mb\":" << std::fixed << std::setprecision(2) << get_ram_usage_mb() << ",";
167
+ json << "\"prefill_tokens\":0,";
168
+ json << "\"decode_tokens\":0,";
169
+ json << "\"total_tokens\":0";
170
+ json << "}";
171
+ std::string error_json = json.str();
81
172
  if (response_buffer && error_json.length() < buffer_size) {
82
173
  std::strcpy(response_buffer, error_json.c_str());
83
174
  }
@@ -228,12 +319,22 @@ inline void parse_options_json(const std::string& json,
228
319
  float& temperature, float& top_p,
229
320
  size_t& top_k, size_t& max_tokens,
230
321
  std::vector<std::string>& stop_sequences,
231
- bool& force_tools) {
322
+ bool& force_tools,
323
+ size_t& tool_rag_top_k,
324
+ float& confidence_threshold,
325
+ bool& include_stop_sequences,
326
+ bool& use_vad,
327
+ bool& telemetry_enabled) {
232
328
  temperature = 0.0f;
233
329
  top_p = 0.0f;
234
330
  top_k = 0;
235
331
  max_tokens = 100;
236
332
  force_tools = false;
333
+ tool_rag_top_k = 2;
334
+ confidence_threshold = 0.7f;
335
+ include_stop_sequences = false;
336
+ use_vad = true;
337
+ telemetry_enabled = true;
237
338
  stop_sequences.clear();
238
339
 
239
340
  if (json.empty()) return;
@@ -269,6 +370,39 @@ inline void parse_options_json(const std::string& json,
269
370
  force_tools = (json.substr(pos, 4) == "true");
270
371
  }
271
372
 
373
+ pos = json.find("\"tool_rag_top_k\"");
374
+ if (pos != std::string::npos) {
375
+ pos = json.find(':', pos) + 1;
376
+ tool_rag_top_k = std::stoul(json.substr(pos));
377
+ }
378
+
379
+ pos = json.find("\"confidence_threshold\"");
380
+ if (pos != std::string::npos) {
381
+ pos = json.find(':', pos) + 1;
382
+ confidence_threshold = std::stof(json.substr(pos));
383
+ }
384
+
385
+ pos = json.find("\"include_stop_sequences\"");
386
+ if (pos != std::string::npos) {
387
+ pos = json.find(':', pos) + 1;
388
+ while (pos < json.length() && std::isspace(json[pos])) pos++;
389
+ include_stop_sequences = (json.substr(pos, 4) == "true");
390
+ }
391
+
392
+ pos = json.find("\"use_vad\"");
393
+ if (pos != std::string::npos) {
394
+ pos = json.find(':', pos) + 1;
395
+ while (pos < json.length() && std::isspace(json[pos])) pos++;
396
+ use_vad = (json.substr(pos, 4) == "true");
397
+ }
398
+
399
+ pos = json.find("\"telemetry_enabled\"");
400
+ if (pos != std::string::npos) {
401
+ pos = json.find(':', pos) + 1;
402
+ while (pos < json.length() && std::isspace(json[pos])) pos++;
403
+ telemetry_enabled = (json.substr(pos, 4) == "true");
404
+ }
405
+
272
406
  pos = json.find("\"stop_sequences\"");
273
407
  if (pos != std::string::npos) {
274
408
  pos = json.find('[', pos);
@@ -305,6 +439,78 @@ inline std::string format_tools_for_prompt(const std::vector<ToolFunction>& tool
305
439
  return formatted_tools_json;
306
440
  }
307
441
 
442
+ static inline std::string trim_lfm2_slice(const std::string& value, size_t begin, size_t end) {
443
+ while (begin < end && std::isspace(static_cast<unsigned char>(value[begin]))) {
444
+ begin++;
445
+ }
446
+ while (end > begin && std::isspace(static_cast<unsigned char>(value[end - 1]))) {
447
+ end--;
448
+ }
449
+ return value.substr(begin, end - begin);
450
+ }
451
+
452
+ static inline void append_lfm2_call(const std::string& entry,
453
+ std::vector<std::string>& function_calls) {
454
+ if (entry.empty()) return;
455
+
456
+ std::string trimmed_entry = trim_lfm2_slice(entry, 0, entry.size());
457
+ if (trimmed_entry.empty()) return;
458
+
459
+ size_t paren_pos = trimmed_entry.find('(');
460
+ if (paren_pos == std::string::npos) return;
461
+
462
+ std::string func_name = trim_lfm2_slice(trimmed_entry, 0, paren_pos);
463
+ std::string args_str = trim_lfm2_slice(trimmed_entry, paren_pos + 1, trimmed_entry.size());
464
+
465
+ if (!args_str.empty() && args_str.back() == ')') {
466
+ args_str.pop_back();
467
+ args_str = trim_lfm2_slice(args_str, 0, args_str.size());
468
+ }
469
+
470
+ std::string json_call = "{\"name\":\"" + func_name + "\",\"arguments\":{";
471
+
472
+ size_t arg_pos = 0;
473
+ bool first_arg = true;
474
+ while (arg_pos < args_str.length()) {
475
+ while (arg_pos < args_str.length() && std::isspace(static_cast<unsigned char>(args_str[arg_pos]))) {
476
+ arg_pos++;
477
+ }
478
+
479
+ size_t eq_pos = args_str.find('=', arg_pos);
480
+ if (eq_pos == std::string::npos) break;
481
+
482
+ std::string arg_name = args_str.substr(arg_pos, eq_pos - arg_pos);
483
+
484
+ size_t val_start = eq_pos + 1;
485
+ size_t val_end = val_start;
486
+
487
+ if (val_start < args_str.length() && args_str[val_start] == '"') {
488
+ val_start++;
489
+ val_end = args_str.find('"', val_start);
490
+ if (val_end == std::string::npos) break;
491
+ } else {
492
+ val_end = args_str.find(',', val_start);
493
+ if (val_end == std::string::npos) val_end = args_str.length();
494
+ }
495
+
496
+ std::string arg_value = args_str.substr(val_start, val_end - val_start);
497
+
498
+ if (!first_arg) json_call += ",";
499
+ json_call += "\"" + arg_name + "\":\"" + arg_value + "\"";
500
+ first_arg = false;
501
+
502
+ arg_pos = args_str.find(',', val_end);
503
+ if (arg_pos != std::string::npos) {
504
+ arg_pos++;
505
+ } else {
506
+ break;
507
+ }
508
+ }
509
+
510
+ json_call += "}}";
511
+ function_calls.push_back(json_call);
512
+ }
513
+
308
514
  inline void parse_function_calls_from_response(const std::string& response_text,
309
515
  std::string& regular_response,
310
516
  std::vector<std::string>& function_calls) {
@@ -341,7 +547,7 @@ inline void parse_function_calls_from_response(const std::string& response_text,
341
547
  break;
342
548
  }
343
549
  }
344
-
550
+
345
551
  // Parse LFM2-style function calls: <|tool_call_start|>[name(args)]<|tool_call_end|>
346
552
  const std::string TOOL_CALL_START = "<|tool_call_start|>";
347
553
  const std::string TOOL_CALL_END = "<|tool_call_end|>";
@@ -349,68 +555,51 @@ inline void parse_function_calls_from_response(const std::string& response_text,
349
555
 
350
556
  while ((tool_start_pos = regular_response.find(TOOL_CALL_START, tool_start_pos)) != std::string::npos) {
351
557
  size_t content_start = tool_start_pos + TOOL_CALL_START.length();
352
- size_t tool_end_pos = response_text.find(TOOL_CALL_END, content_start);
558
+ size_t tool_end_pos = regular_response.find(TOOL_CALL_END, content_start);
353
559
 
354
560
  if (tool_end_pos != std::string::npos) {
355
- std::string tool_content = response_text.substr(content_start, tool_end_pos - content_start);
356
-
357
- if (tool_content.size() > 2 && tool_content[0] == '[' && tool_content[tool_content.size()-1] == ']') {
358
- tool_content = tool_content.substr(1, tool_content.size() - 2);
359
-
360
- size_t paren_pos = tool_content.find('(');
361
- if (paren_pos != std::string::npos) {
362
- std::string func_name = tool_content.substr(0, paren_pos);
363
- std::string args_str = tool_content.substr(paren_pos + 1);
364
-
365
- if (!args_str.empty() && args_str.back() == ')') {
366
- args_str.pop_back();
367
- }
368
-
369
- std::string json_call = "{\"name\":\"" + func_name + "\",\"arguments\":{";
370
-
371
- size_t arg_pos = 0;
372
- bool first_arg = true;
373
- while (arg_pos < args_str.length()) {
374
- while (arg_pos < args_str.length() && std::isspace(args_str[arg_pos])) arg_pos++;
375
-
376
- size_t eq_pos = args_str.find('=', arg_pos);
377
- if (eq_pos == std::string::npos) break;
378
-
379
- std::string arg_name = args_str.substr(arg_pos, eq_pos - arg_pos);
380
-
381
- size_t val_start = eq_pos + 1;
382
- size_t val_end = val_start;
383
-
384
- if (val_start < args_str.length() && args_str[val_start] == '"') {
385
- val_start++;
386
- val_end = args_str.find('"', val_start);
387
- if (val_end == std::string::npos) break;
388
- } else {
389
- val_end = args_str.find(',', val_start);
390
- if (val_end == std::string::npos) val_end = args_str.length();
391
- }
392
-
393
- std::string arg_value = args_str.substr(val_start, val_end - val_start);
561
+ std::string tool_content = regular_response.substr(content_start, tool_end_pos - content_start);
562
+ std::string content = tool_content;
563
+ size_t trim_start = 0;
564
+ while (trim_start < content.size() && std::isspace(static_cast<unsigned char>(content[trim_start]))) {
565
+ trim_start++;
566
+ }
394
567
 
395
- if (!first_arg) json_call += ",";
396
- json_call += "\"" + arg_name + "\":\"" + arg_value + "\"";
397
- first_arg = false;
568
+ if (trim_start < content.size()) {
569
+ size_t trim_end = content.size() - 1;
570
+ while (trim_end > trim_start && std::isspace(static_cast<unsigned char>(content[trim_end]))) {
571
+ trim_end--;
572
+ }
573
+ content = content.substr(trim_start, trim_end - trim_start + 1);
574
+ } else {
575
+ content.clear();
576
+ }
398
577
 
399
- arg_pos = args_str.find(',', val_end);
400
- if (arg_pos != std::string::npos) {
401
- arg_pos++;
402
- } else {
403
- break;
404
- }
578
+ if (!content.empty() && content.front() == '[' && content.back() == ']') {
579
+ std::string inner = content.substr(1, content.size() - 2);
580
+ size_t start = 0;
581
+ int paren_depth = 0;
582
+
583
+ for (size_t i = 0; i < inner.size(); ++i) {
584
+ char c = inner[i];
585
+ if (c == '(') {
586
+ paren_depth++;
587
+ } else if (c == ')' && paren_depth > 0) {
588
+ paren_depth--;
589
+ } else if (c == ',' && paren_depth == 0) {
590
+ append_lfm2_call(inner.substr(start, i - start), function_calls);
591
+ start = i + 1;
405
592
  }
593
+ }
406
594
 
407
- json_call += "}}";
408
- function_calls.push_back(json_call);
595
+ if (start < inner.size()) {
596
+ append_lfm2_call(inner.substr(start), function_calls);
409
597
  }
598
+ } else if (!content.empty()) {
599
+ append_lfm2_call(content, function_calls);
410
600
  }
411
601
 
412
602
  regular_response.erase(tool_start_pos, tool_end_pos + TOOL_CALL_END.length() - tool_start_pos);
413
- tool_start_pos = tool_end_pos + TOOL_CALL_END.length();
414
603
  } else {
415
604
  break;
416
605
  }
@@ -451,38 +640,71 @@ inline std::string construct_response_json(const std::string& regular_response,
451
640
  const std::vector<std::string>& function_calls,
452
641
  double time_to_first_token,
453
642
  double total_time_ms,
454
- double tokens_per_second,
643
+ double prefill_tps,
644
+ double decode_tps,
455
645
  size_t prompt_tokens,
456
- size_t completion_tokens) {
457
- std::ostringstream json_response;
458
- json_response << "{";
459
- json_response << "\"success\":true,";
460
- json_response << "\"response\":\"";
461
- for (char c : regular_response) {
462
- if (c == '"') json_response << "\\\"";
463
- else if (c == '\n') json_response << "\\n";
464
- else if (c == '\r') json_response << "\\r";
465
- else if (c == '\t') json_response << "\\t";
466
- else if (c == '\\') json_response << "\\\\";
467
- else json_response << c;
646
+ size_t completion_tokens,
647
+ float confidence = 0.0f,
648
+ bool cloud_handoff = false) {
649
+ std::ostringstream json;
650
+ json << "{";
651
+ json << "\"success\":" << (cloud_handoff ? "false" : "true") << ",";
652
+ json << "\"error\":null,";
653
+ json << "\"cloud_handoff\":" << (cloud_handoff ? "true" : "false") << ",";
654
+ json << "\"response\":\"" << escape_json_string(regular_response) << "\",";
655
+ json << "\"function_calls\":[";
656
+ for (size_t i = 0; i < function_calls.size(); ++i) {
657
+ if (i > 0) json << ",";
658
+ json << function_calls[i];
468
659
  }
469
- json_response << "\",";
470
- if (!function_calls.empty()) {
471
- json_response << "\"function_calls\":[";
472
- for (size_t i = 0; i < function_calls.size(); ++i) {
473
- if (i > 0) json_response << ",";
474
- json_response << function_calls[i];
475
- }
476
- json_response << "],";
660
+ json << "],";
661
+ json << "\"confidence\":" << std::fixed << std::setprecision(4) << confidence << ",";
662
+ json << "\"time_to_first_token_ms\":" << std::fixed << std::setprecision(2) << time_to_first_token << ",";
663
+ json << "\"total_time_ms\":" << std::fixed << std::setprecision(2) << total_time_ms << ",";
664
+ json << "\"prefill_tps\":" << std::fixed << std::setprecision(2) << prefill_tps << ",";
665
+ json << "\"decode_tps\":" << std::fixed << std::setprecision(2) << decode_tps << ",";
666
+ json << "\"ram_usage_mb\":" << std::fixed << std::setprecision(2) << get_ram_usage_mb() << ",";
667
+ json << "\"prefill_tokens\":" << prompt_tokens << ",";
668
+ json << "\"decode_tokens\":" << completion_tokens << ",";
669
+ json << "\"total_tokens\":" << (prompt_tokens + completion_tokens);
670
+ json << "}";
671
+ return json.str();
672
+ }
673
+
674
+ inline std::string construct_cloud_handoff_json(float confidence,
675
+ double time_to_first_token,
676
+ double prefill_tps,
677
+ size_t prompt_tokens) {
678
+ std::ostringstream json;
679
+ json << "{";
680
+ json << "\"success\":false,";
681
+ json << "\"error\":null,";
682
+ json << "\"cloud_handoff\":true,";
683
+ json << "\"response\":null,";
684
+ json << "\"function_calls\":[],";
685
+ json << "\"confidence\":" << std::fixed << std::setprecision(4) << confidence << ",";
686
+ json << "\"time_to_first_token_ms\":" << std::fixed << std::setprecision(2) << time_to_first_token << ",";
687
+ json << "\"total_time_ms\":" << std::fixed << std::setprecision(2) << time_to_first_token << ",";
688
+ json << "\"prefill_tps\":" << std::fixed << std::setprecision(2) << prefill_tps << ",";
689
+ json << "\"decode_tps\":0.0,";
690
+ json << "\"ram_usage_mb\":" << std::fixed << std::setprecision(2) << get_ram_usage_mb() << ",";
691
+ json << "\"prefill_tokens\":" << prompt_tokens << ",";
692
+ json << "\"decode_tokens\":0,";
693
+ json << "\"total_tokens\":" << prompt_tokens;
694
+ json << "}";
695
+ return json.str();
696
+ }
697
+
698
+ inline std::string serialize_function_calls(const std::vector<std::string>& calls) {
699
+ if (calls.empty()) return "[]";
700
+ std::ostringstream oss;
701
+ oss << "[";
702
+ for (size_t i = 0; i < calls.size(); ++i) {
703
+ if (i > 0) oss << ",";
704
+ oss << calls[i];
477
705
  }
478
- json_response << "\"time_to_first_token_ms\":" << std::fixed << std::setprecision(2) << time_to_first_token << ",";
479
- json_response << "\"total_time_ms\":" << std::fixed << std::setprecision(2) << total_time_ms << ",";
480
- json_response << "\"tokens_per_second\":" << std::fixed << std::setprecision(2) << tokens_per_second << ",";
481
- json_response << "\"prefill_tokens\":" << prompt_tokens << ",";
482
- json_response << "\"decode_tokens\":" << completion_tokens << ",";
483
- json_response << "\"total_tokens\":" << (prompt_tokens + completion_tokens);
484
- json_response << "}";
485
- return json_response.str();
706
+ oss << "]";
707
+ return oss.str();
486
708
  }
487
709
 
488
710
  } // namespace ffi
@@ -494,35 +716,8 @@ extern "C" {
494
716
 
495
717
  const char* cactus_get_last_error();
496
718
 
497
- __attribute__((weak))
498
- const char* register_app(const char* encrypted_data);
499
-
500
- __attribute__((weak))
501
- const char* get_device_id(const char* current_token);
502
-
503
719
  #ifdef __cplusplus
504
720
  }
505
721
  #endif
506
722
 
507
- #ifdef __cplusplus
508
- extern "C" {
509
-
510
- __attribute__((weak))
511
- inline const char* register_app(const char* encrypted_data) {
512
- (void)encrypted_data;
513
- static thread_local std::string uuid_storage;
514
- uuid_storage = cactus::ffi::generateUUID();
515
- return uuid_storage.c_str();
516
- }
517
-
518
- __attribute__((weak))
519
- inline const char* get_device_id(const char* current_token) {
520
- (void)current_token;
521
- static thread_local std::string uuid_storage;
522
- uuid_storage = cactus::ffi::generateUUID();
523
- return uuid_storage.c_str();
524
- }
525
- }
526
- #endif
527
-
528
723
  #endif // CACTUS_UTILS_H