cactus-react-native 1.5.0 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (221) hide show
  1. package/Cactus.podspec +1 -1
  2. package/README.md +347 -241
  3. package/android/CMakeLists.txt +24 -5
  4. package/android/src/main/jniLibs/arm64-v8a/libcactus.a +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/libcurl.a +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/libmbedcrypto.a +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/libmbedtls.a +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/libmbedx509.a +0 -0
  9. package/cpp/HybridCactus.cpp +197 -117
  10. package/cpp/HybridCactus.hpp +18 -9
  11. package/cpp/cactus_ffi.h +66 -42
  12. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus.h +0 -1
  13. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_cloud.h +48 -0
  14. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_ffi.h +66 -42
  15. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_utils.h +568 -135
  16. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/engine.h +148 -17
  17. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/graph.h +145 -36
  18. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel.h +187 -6
  19. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel_utils.h +49 -149
  20. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Info.plist +0 -0
  21. package/ios/cactus.xcframework/ios-arm64/cactus.framework/cactus +0 -0
  22. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus.h +0 -1
  23. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_cloud.h +48 -0
  24. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_ffi.h +66 -42
  25. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_utils.h +568 -135
  26. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/engine.h +148 -17
  27. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/graph.h +145 -36
  28. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/kernel.h +187 -6
  29. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/kernel_utils.h +49 -149
  30. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Info.plist +0 -0
  31. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/_CodeSignature/CodeResources +1 -1
  32. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/cactus +0 -0
  33. package/lib/module/classes/CactusLM.js +16 -49
  34. package/lib/module/classes/CactusLM.js.map +1 -1
  35. package/lib/module/classes/CactusSTT.js +41 -75
  36. package/lib/module/classes/CactusSTT.js.map +1 -1
  37. package/lib/module/classes/CactusVAD.js +95 -0
  38. package/lib/module/classes/CactusVAD.js.map +1 -0
  39. package/lib/module/hooks/useCactusLM.js +10 -11
  40. package/lib/module/hooks/useCactusLM.js.map +1 -1
  41. package/lib/module/hooks/useCactusSTT.js +23 -62
  42. package/lib/module/hooks/useCactusSTT.js.map +1 -1
  43. package/lib/module/hooks/useCactusVAD.js +171 -0
  44. package/lib/module/hooks/useCactusVAD.js.map +1 -0
  45. package/lib/module/index.js +2 -3
  46. package/lib/module/index.js.map +1 -1
  47. package/lib/module/modelRegistry.js +52 -0
  48. package/lib/module/modelRegistry.js.map +1 -0
  49. package/lib/module/native/Cactus.js +103 -23
  50. package/lib/module/native/Cactus.js.map +1 -1
  51. package/lib/module/native/CactusIndex.js.map +1 -1
  52. package/lib/module/native/index.js +0 -3
  53. package/lib/module/native/index.js.map +1 -1
  54. package/lib/module/types/CactusVAD.js +4 -0
  55. package/lib/module/{specs/CactusUtil.nitro.js.map → types/CactusVAD.js.map} +1 -1
  56. package/lib/typescript/src/classes/CactusLM.d.ts +5 -7
  57. package/lib/typescript/src/classes/CactusLM.d.ts.map +1 -1
  58. package/lib/typescript/src/classes/CactusSTT.d.ts +9 -12
  59. package/lib/typescript/src/classes/CactusSTT.d.ts.map +1 -1
  60. package/lib/typescript/src/classes/CactusVAD.d.ts +20 -0
  61. package/lib/typescript/src/classes/CactusVAD.d.ts.map +1 -0
  62. package/lib/typescript/src/hooks/useCactusLM.d.ts +2 -2
  63. package/lib/typescript/src/hooks/useCactusLM.d.ts.map +1 -1
  64. package/lib/typescript/src/hooks/useCactusSTT.d.ts +6 -8
  65. package/lib/typescript/src/hooks/useCactusSTT.d.ts.map +1 -1
  66. package/lib/typescript/src/hooks/useCactusVAD.d.ts +15 -0
  67. package/lib/typescript/src/hooks/useCactusVAD.d.ts.map +1 -0
  68. package/lib/typescript/src/index.d.ts +7 -5
  69. package/lib/typescript/src/index.d.ts.map +1 -1
  70. package/lib/typescript/src/modelRegistry.d.ts +5 -0
  71. package/lib/typescript/src/modelRegistry.d.ts.map +1 -0
  72. package/lib/typescript/src/native/Cactus.d.ts +13 -11
  73. package/lib/typescript/src/native/Cactus.d.ts.map +1 -1
  74. package/lib/typescript/src/native/CactusIndex.d.ts +2 -2
  75. package/lib/typescript/src/native/CactusIndex.d.ts.map +1 -1
  76. package/lib/typescript/src/native/index.d.ts +0 -3
  77. package/lib/typescript/src/native/index.d.ts.map +1 -1
  78. package/lib/typescript/src/specs/Cactus.nitro.d.ts +7 -6
  79. package/lib/typescript/src/specs/Cactus.nitro.d.ts.map +1 -1
  80. package/lib/typescript/src/types/CactusIndex.d.ts +2 -2
  81. package/lib/typescript/src/types/CactusIndex.d.ts.map +1 -1
  82. package/lib/typescript/src/types/CactusLM.d.ts +19 -11
  83. package/lib/typescript/src/types/CactusLM.d.ts.map +1 -1
  84. package/lib/typescript/src/types/CactusSTT.d.ts +44 -12
  85. package/lib/typescript/src/types/CactusSTT.d.ts.map +1 -1
  86. package/lib/typescript/src/types/CactusVAD.d.ts +34 -0
  87. package/lib/typescript/src/types/CactusVAD.d.ts.map +1 -0
  88. package/lib/typescript/src/types/common.d.ts +1 -6
  89. package/lib/typescript/src/types/common.d.ts.map +1 -1
  90. package/nitro.json +0 -11
  91. package/nitrogen/generated/android/cactus+autolinking.cmake +0 -5
  92. package/nitrogen/generated/android/cactusOnLoad.cpp +0 -30
  93. package/nitrogen/generated/ios/Cactus-Swift-Cxx-Bridge.cpp +0 -50
  94. package/nitrogen/generated/ios/Cactus-Swift-Cxx-Bridge.hpp +9 -147
  95. package/nitrogen/generated/ios/Cactus-Swift-Cxx-Umbrella.hpp +0 -13
  96. package/nitrogen/generated/ios/CactusAutolinking.mm +0 -26
  97. package/nitrogen/generated/ios/CactusAutolinking.swift +0 -30
  98. package/nitrogen/generated/shared/c++/HybridCactusSpec.cpp +5 -4
  99. package/nitrogen/generated/shared/c++/HybridCactusSpec.hpp +7 -6
  100. package/package.json +3 -3
  101. package/src/classes/CactusLM.ts +18 -65
  102. package/src/classes/CactusSTT.ts +52 -90
  103. package/src/classes/CactusVAD.ts +129 -0
  104. package/src/hooks/useCactusLM.ts +14 -17
  105. package/src/hooks/useCactusSTT.ts +47 -98
  106. package/src/hooks/useCactusVAD.ts +215 -0
  107. package/src/index.tsx +21 -12
  108. package/src/modelRegistry.ts +65 -0
  109. package/src/native/Cactus.ts +131 -38
  110. package/src/native/CactusIndex.ts +2 -2
  111. package/src/native/index.ts +0 -3
  112. package/src/specs/Cactus.nitro.ts +16 -7
  113. package/src/types/CactusIndex.ts +2 -2
  114. package/src/types/CactusLM.ts +19 -11
  115. package/src/types/CactusSTT.ts +47 -13
  116. package/src/types/CactusVAD.ts +39 -0
  117. package/src/types/common.ts +1 -6
  118. package/android/src/main/java/com/margelo/nitro/cactus/HybridCactusCrypto.kt +0 -46
  119. package/android/src/main/java/com/margelo/nitro/cactus/HybridCactusDeviceInfo.kt +0 -27
  120. package/android/src/main/jniLibs/arm64-v8a/libcactus_util.a +0 -0
  121. package/cpp/HybridCactusUtil.cpp +0 -47
  122. package/cpp/HybridCactusUtil.hpp +0 -27
  123. package/cpp/cactus_util.h +0 -25
  124. package/ios/HybridCactusCrypto.swift +0 -37
  125. package/ios/HybridCactusDeviceInfo.swift +0 -32
  126. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_telemetry.h +0 -656
  127. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_telemetry.h +0 -656
  128. package/ios/cactus_util.xcframework/Info.plist +0 -39
  129. package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Headers/cactus_util.h +0 -25
  130. package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Headers/database.h +0 -27
  131. package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Headers/ios_utils.h +0 -10
  132. package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Headers/logging.h +0 -25
  133. package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Info.plist +0 -0
  134. package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/cactus_util +0 -0
  135. package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Headers/cactus_util.h +0 -25
  136. package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Headers/database.h +0 -27
  137. package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Headers/ios_utils.h +0 -10
  138. package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Headers/logging.h +0 -25
  139. package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Info.plist +0 -0
  140. package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/_CodeSignature/CodeResources +0 -135
  141. package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/cactus_util +0 -0
  142. package/lib/module/api/Database.js +0 -45
  143. package/lib/module/api/Database.js.map +0 -1
  144. package/lib/module/api/RemoteLM.js +0 -201
  145. package/lib/module/api/RemoteLM.js.map +0 -1
  146. package/lib/module/config/CactusConfig.js +0 -12
  147. package/lib/module/config/CactusConfig.js.map +0 -1
  148. package/lib/module/models.js +0 -336
  149. package/lib/module/models.js.map +0 -1
  150. package/lib/module/native/CactusCrypto.js +0 -10
  151. package/lib/module/native/CactusCrypto.js.map +0 -1
  152. package/lib/module/native/CactusDeviceInfo.js +0 -13
  153. package/lib/module/native/CactusDeviceInfo.js.map +0 -1
  154. package/lib/module/native/CactusUtil.js +0 -36
  155. package/lib/module/native/CactusUtil.js.map +0 -1
  156. package/lib/module/specs/CactusCrypto.nitro.js +0 -4
  157. package/lib/module/specs/CactusCrypto.nitro.js.map +0 -1
  158. package/lib/module/specs/CactusDeviceInfo.nitro.js +0 -4
  159. package/lib/module/specs/CactusDeviceInfo.nitro.js.map +0 -1
  160. package/lib/module/specs/CactusUtil.nitro.js +0 -4
  161. package/lib/module/telemetry/Telemetry.js +0 -154
  162. package/lib/module/telemetry/Telemetry.js.map +0 -1
  163. package/lib/typescript/src/api/Database.d.ts +0 -12
  164. package/lib/typescript/src/api/Database.d.ts.map +0 -1
  165. package/lib/typescript/src/api/RemoteLM.d.ts +0 -14
  166. package/lib/typescript/src/api/RemoteLM.d.ts.map +0 -1
  167. package/lib/typescript/src/config/CactusConfig.d.ts +0 -7
  168. package/lib/typescript/src/config/CactusConfig.d.ts.map +0 -1
  169. package/lib/typescript/src/models.d.ts +0 -6
  170. package/lib/typescript/src/models.d.ts.map +0 -1
  171. package/lib/typescript/src/native/CactusCrypto.d.ts +0 -5
  172. package/lib/typescript/src/native/CactusCrypto.d.ts.map +0 -1
  173. package/lib/typescript/src/native/CactusDeviceInfo.d.ts +0 -7
  174. package/lib/typescript/src/native/CactusDeviceInfo.d.ts.map +0 -1
  175. package/lib/typescript/src/native/CactusUtil.d.ts +0 -6
  176. package/lib/typescript/src/native/CactusUtil.d.ts.map +0 -1
  177. package/lib/typescript/src/specs/CactusCrypto.nitro.d.ts +0 -8
  178. package/lib/typescript/src/specs/CactusCrypto.nitro.d.ts.map +0 -1
  179. package/lib/typescript/src/specs/CactusDeviceInfo.nitro.d.ts +0 -16
  180. package/lib/typescript/src/specs/CactusDeviceInfo.nitro.d.ts.map +0 -1
  181. package/lib/typescript/src/specs/CactusUtil.nitro.d.ts +0 -10
  182. package/lib/typescript/src/specs/CactusUtil.nitro.d.ts.map +0 -1
  183. package/lib/typescript/src/telemetry/Telemetry.d.ts +0 -34
  184. package/lib/typescript/src/telemetry/Telemetry.d.ts.map +0 -1
  185. package/nitrogen/generated/android/c++/JDeviceInfo.hpp +0 -74
  186. package/nitrogen/generated/android/c++/JHybridCactusCryptoSpec.cpp +0 -65
  187. package/nitrogen/generated/android/c++/JHybridCactusCryptoSpec.hpp +0 -65
  188. package/nitrogen/generated/android/c++/JHybridCactusDeviceInfoSpec.cpp +0 -85
  189. package/nitrogen/generated/android/c++/JHybridCactusDeviceInfoSpec.hpp +0 -66
  190. package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/DeviceInfo.kt +0 -50
  191. package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/HybridCactusCryptoSpec.kt +0 -58
  192. package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/HybridCactusDeviceInfoSpec.kt +0 -62
  193. package/nitrogen/generated/ios/c++/HybridCactusCryptoSpecSwift.cpp +0 -11
  194. package/nitrogen/generated/ios/c++/HybridCactusCryptoSpecSwift.hpp +0 -77
  195. package/nitrogen/generated/ios/c++/HybridCactusDeviceInfoSpecSwift.cpp +0 -11
  196. package/nitrogen/generated/ios/c++/HybridCactusDeviceInfoSpecSwift.hpp +0 -88
  197. package/nitrogen/generated/ios/swift/DeviceInfo.swift +0 -98
  198. package/nitrogen/generated/ios/swift/Func_void_DeviceInfo.swift +0 -47
  199. package/nitrogen/generated/ios/swift/Func_void_std__optional_std__string_.swift +0 -54
  200. package/nitrogen/generated/ios/swift/HybridCactusCryptoSpec.swift +0 -57
  201. package/nitrogen/generated/ios/swift/HybridCactusCryptoSpec_cxx.swift +0 -139
  202. package/nitrogen/generated/ios/swift/HybridCactusDeviceInfoSpec.swift +0 -58
  203. package/nitrogen/generated/ios/swift/HybridCactusDeviceInfoSpec_cxx.swift +0 -164
  204. package/nitrogen/generated/shared/c++/DeviceInfo.hpp +0 -92
  205. package/nitrogen/generated/shared/c++/HybridCactusCryptoSpec.cpp +0 -21
  206. package/nitrogen/generated/shared/c++/HybridCactusCryptoSpec.hpp +0 -63
  207. package/nitrogen/generated/shared/c++/HybridCactusDeviceInfoSpec.cpp +0 -22
  208. package/nitrogen/generated/shared/c++/HybridCactusDeviceInfoSpec.hpp +0 -67
  209. package/nitrogen/generated/shared/c++/HybridCactusUtilSpec.cpp +0 -23
  210. package/nitrogen/generated/shared/c++/HybridCactusUtilSpec.hpp +0 -66
  211. package/src/api/Database.ts +0 -55
  212. package/src/api/RemoteLM.ts +0 -273
  213. package/src/config/CactusConfig.ts +0 -11
  214. package/src/models.ts +0 -344
  215. package/src/native/CactusCrypto.ts +0 -11
  216. package/src/native/CactusDeviceInfo.ts +0 -18
  217. package/src/native/CactusUtil.ts +0 -43
  218. package/src/specs/CactusCrypto.nitro.ts +0 -6
  219. package/src/specs/CactusDeviceInfo.nitro.ts +0 -15
  220. package/src/specs/CactusUtil.nitro.ts +0 -8
  221. package/src/telemetry/Telemetry.ts +0 -236
@@ -2,6 +2,7 @@
2
2
  #define CACTUS_UTILS_H
3
3
 
4
4
  #include "../engine/engine.h"
5
+ #include "../models/model.h"
5
6
  #include <string>
6
7
  #include <vector>
7
8
  #include <unordered_map>
@@ -12,6 +13,9 @@
12
13
  #include <iostream>
13
14
  #include <filesystem>
14
15
  #include <cctype>
16
+ #include <algorithm>
17
+ #include <cmath>
18
+ #include <limits>
15
19
  #include <memory>
16
20
  #include <atomic>
17
21
  #include <mutex>
@@ -19,14 +23,53 @@
19
23
 
20
24
  #ifdef __APPLE__
21
25
  #include <uuid/uuid.h>
26
+ #include <mach/mach.h>
27
+ #elif defined(_WIN32)
28
+ #include <windows.h>
29
+ #include <psapi.h>
30
+ #elif defined(__linux__) || defined(__ANDROID__)
31
+ #include <unistd.h>
22
32
  #endif
23
33
 
34
+ inline size_t get_memory_footprint_bytes() {
35
+ #ifdef __APPLE__
36
+ task_vm_info_data_t vm_info;
37
+ mach_msg_type_number_t count = TASK_VM_INFO_COUNT;
38
+ if (task_info(mach_task_self(), TASK_VM_INFO, (task_info_t)&vm_info, &count) == KERN_SUCCESS)
39
+ return vm_info.phys_footprint;
40
+
41
+ #elif defined(_WIN32)
42
+ PROCESS_MEMORY_COUNTERS_EX pmc;
43
+ if (GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc)))
44
+ return pmc.PrivateUsage;
45
+
46
+ #elif defined(__linux__) || defined(__ANDROID__)
47
+ std::ifstream statm("/proc/self/statm");
48
+ if (statm.is_open()) {
49
+ size_t size, resident;
50
+ statm >> size >> resident;
51
+ return resident * sysconf(_SC_PAGESIZE);
52
+ }
53
+ #endif
54
+ return 0;
55
+ }
56
+
57
+ inline double get_ram_usage_mb() {
58
+ return get_memory_footprint_bytes() / (1024.0 * 1024.0);
59
+ }
60
+
24
61
  struct CactusModelHandle {
25
62
  std::unique_ptr<cactus::engine::Model> model;
63
+ std::unique_ptr<cactus::engine::Model> vad_model;
26
64
  std::atomic<bool> should_stop;
27
65
  std::vector<uint32_t> processed_tokens;
28
66
  std::mutex model_mutex;
29
67
  std::string model_name;
68
+ std::unique_ptr<cactus::engine::index::Index> corpus_index;
69
+ std::string corpus_dir;
70
+ size_t corpus_embedding_dim = 0;
71
+ std::vector<std::vector<float>> tool_embeddings;
72
+ std::vector<std::string> tool_texts;
30
73
 
31
74
  CactusModelHandle() : should_stop(false) {}
32
75
  };
@@ -36,15 +79,116 @@ extern std::string last_error_message;
36
79
  bool matches_stop_sequence(const std::vector<uint32_t>& generated_tokens,
37
80
  const std::vector<std::vector<uint32_t>>& stop_sequences);
38
81
 
82
+ std::string retrieve_rag_context(CactusModelHandle* handle, const std::string& query);
83
+
39
84
  namespace cactus {
40
- namespace ffi {
85
+ namespace audio {
86
+
87
+ static constexpr size_t WHISPER_TARGET_FRAMES = 3000;
88
+ static constexpr int WHISPER_SAMPLE_RATE = 16000;
89
+
90
+ inline cactus::engine::AudioProcessor::SpectrogramConfig get_whisper_spectrogram_config() {
91
+ cactus::engine::AudioProcessor::SpectrogramConfig cfg{};
92
+ cfg.n_fft = 400;
93
+ cfg.frame_length = 400;
94
+ cfg.hop_length = 160;
95
+ cfg.power = 2.0f;
96
+ cfg.center = true;
97
+ cfg.pad_mode = "reflect";
98
+ cfg.onesided = true;
99
+ cfg.dither = 0.0f;
100
+ cfg.mel_floor = 1e-10f;
101
+ cfg.log_mel = "log10";
102
+ cfg.reference = 1.0f;
103
+ cfg.min_value = 1e-10f;
104
+ cfg.remove_dc_offset = true;
105
+ return cfg;
106
+ }
41
107
 
42
- #ifndef CACTUS_VERSION
43
- #define CACTUS_VERSION "unknown"
44
- #endif
108
+ inline cactus::engine::AudioProcessor::SpectrogramConfig get_parakeet_spectrogram_config() {
109
+ cactus::engine::AudioProcessor::SpectrogramConfig cfg{};
110
+ cfg.n_fft = 512;
111
+ cfg.frame_length = 400;
112
+ cfg.hop_length = 160;
113
+ cfg.power = 2.0f;
114
+ cfg.center = true;
115
+ cfg.pad_mode = "constant";
116
+ cfg.onesided = true;
117
+ cfg.dither = 0.0f;
118
+ cfg.mel_floor = 5.960464477539063e-08f; // 2^-24 guard value used by HF Parakeet.
119
+ cfg.log_mel = "log";
120
+ cfg.reference = 1.0f;
121
+ cfg.min_value = 1e-10f;
122
+ cfg.remove_dc_offset = false;
123
+ cfg.hann_periodic = false;
124
+ return cfg;
125
+ }
126
+
127
+ inline void apply_preemphasis(std::vector<float>& waveform, float coefficient = 0.97f) {
128
+ if (waveform.size() < 2 || coefficient == 0.0f) {
129
+ return;
130
+ }
131
+ for (size_t i = waveform.size() - 1; i > 0; --i) {
132
+ waveform[i] -= coefficient * waveform[i - 1];
133
+ }
134
+ }
135
+
136
+ inline void normalize_parakeet_log_mel(std::vector<float>& mel, size_t num_mels, float epsilon = 1e-5f) {
137
+ if (mel.empty() || num_mels == 0 || (mel.size() % num_mels) != 0) {
138
+ return;
139
+ }
140
+ const size_t num_frames = mel.size() / num_mels;
141
+ if (num_frames == 0) {
142
+ return;
143
+ }
45
144
 
46
- inline const char* getVersion() {
47
- return CACTUS_VERSION;
145
+ for (size_t m = 0; m < num_mels; ++m) {
146
+ const size_t base = m * num_frames;
147
+ float mean = 0.0f;
148
+ for (size_t t = 0; t < num_frames; ++t) {
149
+ mean += mel[base + t];
150
+ }
151
+ mean /= static_cast<float>(num_frames);
152
+
153
+ float variance = 0.0f;
154
+ for (size_t t = 0; t < num_frames; ++t) {
155
+ const float d = mel[base + t] - mean;
156
+ variance += d * d;
157
+ }
158
+ const float denom = static_cast<float>(std::max<size_t>(1, num_frames - 1));
159
+ const float inv_std = 1.0f / std::sqrt((variance / denom) + epsilon);
160
+ for (size_t t = 0; t < num_frames; ++t) {
161
+ mel[base + t] = (mel[base + t] - mean) * inv_std;
162
+ }
163
+ }
164
+ }
165
+
166
+ inline void trim_mel_frames(std::vector<float>& mel, size_t num_mels, size_t valid_frames) {
167
+ if (mel.empty() || num_mels == 0 || (mel.size() % num_mels) != 0) {
168
+ return;
169
+ }
170
+ size_t total_frames = mel.size() / num_mels;
171
+ if (valid_frames == 0 || valid_frames >= total_frames) {
172
+ return;
173
+ }
174
+ std::vector<float> trimmed(num_mels * valid_frames);
175
+ for (size_t m = 0; m < num_mels; ++m) {
176
+ const float* src = &mel[m * total_frames];
177
+ float* dst = &trimmed[m * valid_frames];
178
+ std::copy(src, src + valid_frames, dst);
179
+ }
180
+ mel.swap(trimmed);
181
+ }
182
+
183
+ } // namespace audio
184
+ } // namespace cactus
185
+
186
+ namespace cactus {
187
+ namespace ffi {
188
+
189
+ inline bool env_flag_enabled(const char* key) {
190
+ const char* value = std::getenv(key);
191
+ return value && value[0] != '\0' && !(value[0] == '0' && value[1] == '\0');
48
192
  }
49
193
 
50
194
  inline std::string generateUUID() {
@@ -54,6 +198,25 @@ inline std::string generateUUID() {
54
198
  char uuid_str[37];
55
199
  uuid_unparse_lower(uuid, uuid_str);
56
200
  return std::string(uuid_str);
201
+ #else
202
+ static std::random_device rd;
203
+ static std::mt19937 gen(rd());
204
+ static std::uniform_int_distribution<> dis(0, 15);
205
+ static std::uniform_int_distribution<> dis2(8, 11);
206
+
207
+ std::stringstream ss;
208
+ ss << std::hex;
209
+ for (int i = 0; i < 8; i++) ss << dis(gen);
210
+ ss << "-";
211
+ for (int i = 0; i < 4; i++) ss << dis(gen);
212
+ ss << "-4";
213
+ for (int i = 0; i < 3; i++) ss << dis(gen);
214
+ ss << "-";
215
+ ss << dis2(gen);
216
+ for (int i = 0; i < 3; i++) ss << dis(gen);
217
+ ss << "-";
218
+ for (int i = 0; i < 12; i++) ss << dis(gen);
219
+ return ss.str();
57
220
  #endif
58
221
  }
59
222
 
@@ -66,18 +229,173 @@ struct ToolFunction {
66
229
  } // namespace ffi
67
230
  } // namespace cactus
68
231
 
232
+ std::vector<cactus::ffi::ToolFunction> select_relevant_tools(
233
+ CactusModelHandle* handle,
234
+ const std::string& query,
235
+ const std::vector<cactus::ffi::ToolFunction>& all_tools,
236
+ size_t top_k);
237
+
69
238
  #include "gemma_tools.h"
70
239
 
71
240
  namespace cactus {
72
241
  namespace ffi {
73
242
 
74
- inline void handle_error_response(const std::string& error_message, char* response_buffer, size_t buffer_size) {
75
- std::string sanitized_msg = error_message;
76
- for (auto& c : sanitized_msg) {
77
- if (c == '"') c = '\'';
78
- if (c == '\n') c = ' ';
243
+ inline std::string escape_json_string(const std::string& s) {
244
+ std::ostringstream o;
245
+ for (char c : s) {
246
+ if (c == '"') o << "\\\"";
247
+ else if (c == '\n') o << "\\n";
248
+ else if (c == '\r') o << "\\r";
249
+ else if (c == '\t') o << "\\t";
250
+ else if (c == '\\') o << "\\\\";
251
+ else o << c;
252
+ }
253
+ return o.str();
254
+ }
255
+
256
+
257
+ inline std::string trim_string(const std::string& s) {
258
+ size_t start = 0;
259
+ while (start < s.size() && std::isspace(static_cast<unsigned char>(s[start]))) ++start;
260
+ size_t end = s.size();
261
+ while (end > start && std::isspace(static_cast<unsigned char>(s[end - 1]))) --end;
262
+ return s.substr(start, end - start);
263
+ }
264
+
265
+ inline std::string env_or_default(const char* key, const char* fallback) {
266
+ const char* v = std::getenv(key);
267
+ if (v && v[0] != '\0') return std::string(v);
268
+ return std::string(fallback);
269
+ }
270
+
271
+ inline std::string json_string_field(const std::string& json, const std::string& key) {
272
+ std::string pattern = "\"" + key + "\":";
273
+ size_t pos = json.find(pattern);
274
+ if (pos == std::string::npos) return {};
275
+
276
+ size_t i = pos + pattern.size();
277
+ while (i < json.size() && std::isspace(static_cast<unsigned char>(json[i]))) i++;
278
+ if (i >= json.size() || json[i] != '"') return {};
279
+ ++i;
280
+
281
+ std::string out;
282
+ out.reserve(128);
283
+ while (i < json.size()) {
284
+ char c = json[i++];
285
+ if (c == '"') return out;
286
+ if (c == '\\' && i < json.size()) {
287
+ char e = json[i++];
288
+ switch (e) {
289
+ case '"': out.push_back('"'); break;
290
+ case '\\': out.push_back('\\'); break;
291
+ case '/': out.push_back('/'); break;
292
+ case 'b': out.push_back('\b'); break;
293
+ case 'f': out.push_back('\f'); break;
294
+ case 'n': out.push_back('\n'); break;
295
+ case 'r': out.push_back('\r'); break;
296
+ case 't': out.push_back('\t'); break;
297
+ default: out.push_back(e); break;
298
+ }
299
+ continue;
300
+ }
301
+ out.push_back(c);
302
+ }
303
+ return {};
304
+ }
305
+
306
+ inline std::string json_array_field(const std::string& json, const std::string& key) {
307
+ std::string pattern = "\"" + key + "\":";
308
+ size_t pos = json.find(pattern);
309
+ if (pos == std::string::npos) return "[]";
310
+ size_t start = pos + pattern.size();
311
+ while (start < json.size() && std::isspace(static_cast<unsigned char>(json[start]))) ++start;
312
+ if (start >= json.size() || json[start] != '[') return "[]";
313
+
314
+ int depth = 1;
315
+ size_t end = start + 1;
316
+ while (end < json.size() && depth > 0) {
317
+ if (json[end] == '[') depth++;
318
+ else if (json[end] == ']') depth--;
319
+ end++;
320
+ }
321
+ return json.substr(start, end - start);
322
+ }
323
+
324
+ inline std::vector<std::string> split_json_array(const std::string& array_json) {
325
+ std::vector<std::string> out;
326
+ if (array_json.size() < 2 || array_json.front() != '[' || array_json.back() != ']') return out;
327
+
328
+ size_t i = 1;
329
+ while (i + 1 < array_json.size()) {
330
+ while (i + 1 < array_json.size() &&
331
+ (std::isspace(static_cast<unsigned char>(array_json[i])) || array_json[i] == ',')) i++;
332
+ if (i + 1 >= array_json.size() || array_json[i] != '{') break;
333
+
334
+ size_t start = i;
335
+ int depth = 0;
336
+ bool in_str = false;
337
+ bool esc = false;
338
+ for (; i < array_json.size(); ++i) {
339
+ char c = array_json[i];
340
+ if (in_str) {
341
+ if (esc) esc = false;
342
+ else if (c == '\\') esc = true;
343
+ else if (c == '"') in_str = false;
344
+ continue;
345
+ }
346
+ if (c == '"') { in_str = true; continue; }
347
+ if (c == '{') depth++;
348
+ if (c == '}') {
349
+ depth--;
350
+ if (depth == 0) {
351
+ out.push_back(array_json.substr(start, i - start + 1));
352
+ i++;
353
+ break;
354
+ }
355
+ }
356
+ }
357
+ }
358
+ return out;
359
+ }
360
+
361
+ inline std::string serialize_tools_json(const std::vector<ToolFunction>& tools) {
362
+ if (tools.empty()) return "";
363
+ std::ostringstream oss;
364
+ oss << "[";
365
+ for (size_t i = 0; i < tools.size(); ++i) {
366
+ if (i > 0) oss << ",";
367
+ oss << "{\"type\":\"function\",\"function\":{";
368
+ oss << "\"name\":\"" << escape_json_string(tools[i].name) << "\",";
369
+ oss << "\"description\":\"" << escape_json_string(tools[i].description) << "\"";
370
+ auto it = tools[i].parameters.find("schema");
371
+ if (it != tools[i].parameters.end()) {
372
+ oss << ",\"parameters\":" << it->second;
373
+ }
374
+ oss << "}}";
79
375
  }
80
- std::string error_json = "{\"success\":false,\"error\":\"" + sanitized_msg + "\"}";
376
+ oss << "]";
377
+ return oss.str();
378
+ }
379
+
380
+ inline void handle_error_response(const std::string& error_message, char* response_buffer, size_t buffer_size) {
381
+ std::ostringstream json;
382
+ json << "{";
383
+ json << "\"success\":false,";
384
+ json << "\"error\":\"" << escape_json_string(error_message) << "\",";
385
+ json << "\"cloud_handoff\":false,";
386
+ json << "\"response\":null,";
387
+ json << "\"function_calls\":[],";
388
+ json << "\"confidence\":0.0,";
389
+ json << "\"time_to_first_token_ms\":0.0,";
390
+ json << "\"total_time_ms\":0.0,";
391
+ json << "\"prefill_tps\":0.0,";
392
+ json << "\"decode_tps\":0.0,";
393
+ json << "\"ram_usage_mb\":" << std::fixed << std::setprecision(2) << get_ram_usage_mb() << ",";
394
+ json << "\"prefill_tokens\":0,";
395
+ json << "\"decode_tokens\":0,";
396
+ json << "\"total_tokens\":0";
397
+ json << "}";
398
+ std::string error_json = json.str();
81
399
  if (response_buffer && error_json.length() < buffer_size) {
82
400
  std::strcpy(response_buffer, error_json.c_str());
83
401
  }
@@ -228,12 +546,28 @@ inline void parse_options_json(const std::string& json,
228
546
  float& temperature, float& top_p,
229
547
  size_t& top_k, size_t& max_tokens,
230
548
  std::vector<std::string>& stop_sequences,
231
- bool& force_tools) {
549
+ bool& force_tools,
550
+ size_t& tool_rag_top_k,
551
+ float& confidence_threshold,
552
+ bool& include_stop_sequences,
553
+ bool& use_vad,
554
+ bool& telemetry_enabled,
555
+ bool* auto_handoff = nullptr,
556
+ size_t* cloud_timeout_ms = nullptr,
557
+ bool* handoff_with_images = nullptr) {
232
558
  temperature = 0.0f;
233
559
  top_p = 0.0f;
234
560
  top_k = 0;
235
561
  max_tokens = 100;
236
562
  force_tools = false;
563
+ tool_rag_top_k = 2;
564
+ confidence_threshold = 0.7f;
565
+ include_stop_sequences = false;
566
+ use_vad = true;
567
+ telemetry_enabled = true;
568
+ if (auto_handoff) *auto_handoff = true;
569
+ if (cloud_timeout_ms) *cloud_timeout_ms = 15000;
570
+ if (handoff_with_images) *handoff_with_images = true;
237
571
  stop_sequences.clear();
238
572
 
239
573
  if (json.empty()) return;
@@ -269,6 +603,65 @@ inline void parse_options_json(const std::string& json,
269
603
  force_tools = (json.substr(pos, 4) == "true");
270
604
  }
271
605
 
606
+ pos = json.find("\"tool_rag_top_k\"");
607
+ if (pos != std::string::npos) {
608
+ pos = json.find(':', pos) + 1;
609
+ tool_rag_top_k = std::stoul(json.substr(pos));
610
+ }
611
+
612
+ pos = json.find("\"confidence_threshold\"");
613
+ if (pos != std::string::npos) {
614
+ pos = json.find(':', pos) + 1;
615
+ confidence_threshold = std::stof(json.substr(pos));
616
+ }
617
+
618
+ pos = json.find("\"include_stop_sequences\"");
619
+ if (pos != std::string::npos) {
620
+ pos = json.find(':', pos) + 1;
621
+ while (pos < json.length() && std::isspace(json[pos])) pos++;
622
+ include_stop_sequences = (json.substr(pos, 4) == "true");
623
+ }
624
+
625
+ pos = json.find("\"use_vad\"");
626
+ if (pos != std::string::npos) {
627
+ pos = json.find(':', pos) + 1;
628
+ while (pos < json.length() && std::isspace(json[pos])) pos++;
629
+ use_vad = (json.substr(pos, 4) == "true");
630
+ }
631
+
632
+ pos = json.find("\"telemetry_enabled\"");
633
+ if (pos != std::string::npos) {
634
+ pos = json.find(':', pos) + 1;
635
+ while (pos < json.length() && std::isspace(json[pos])) pos++;
636
+ telemetry_enabled = (json.substr(pos, 4) == "true");
637
+ }
638
+
639
+ if (auto_handoff) {
640
+ pos = json.find("\"auto_handoff\"");
641
+ if (pos != std::string::npos) {
642
+ pos = json.find(':', pos) + 1;
643
+ while (pos < json.length() && std::isspace(json[pos])) pos++;
644
+ *auto_handoff = (json.substr(pos, 4) == "true");
645
+ }
646
+ }
647
+
648
+ if (cloud_timeout_ms) {
649
+ pos = json.find("\"cloud_timeout_ms\"");
650
+ if (pos != std::string::npos) {
651
+ pos = json.find(':', pos) + 1;
652
+ *cloud_timeout_ms = std::stoul(json.substr(pos));
653
+ }
654
+ }
655
+
656
+ if (handoff_with_images) {
657
+ pos = json.find("\"handoff_with_images\"");
658
+ if (pos != std::string::npos) {
659
+ pos = json.find(':', pos) + 1;
660
+ while (pos < json.length() && std::isspace(json[pos])) pos++;
661
+ *handoff_with_images = (json.substr(pos, 4) == "true");
662
+ }
663
+ }
664
+
272
665
  pos = json.find("\"stop_sequences\"");
273
666
  if (pos != std::string::npos) {
274
667
  pos = json.find('[', pos);
@@ -288,21 +681,70 @@ inline void parse_options_json(const std::string& json,
288
681
  }
289
682
  }
290
683
 
291
- inline std::string format_tools_for_prompt(const std::vector<ToolFunction>& tools) {
292
- if (tools.empty()) return "";
293
- std::string formatted_tools_json;
294
- for (size_t i = 0; i < tools.size(); i++) {
295
- if (i > 0) formatted_tools_json += "\n";
296
- formatted_tools_json += "{\"type\":\"function\",\"function\":{\"name\":\""
297
- + tools[i].name
298
- + "\",\"description\":\""
299
- + tools[i].description + "\"";
300
- if (tools[i].parameters.find("schema") != tools[i].parameters.end()) {
301
- formatted_tools_json += ",\"parameters\":" + tools[i].parameters.at("schema");
684
+ static inline std::string trim_lfm2_slice(const std::string& value, size_t begin, size_t end) {
685
+ return trim_string(value.substr(begin, end - begin));
686
+ }
687
+
688
+ static inline void append_lfm2_call(const std::string& entry,
689
+ std::vector<std::string>& function_calls) {
690
+ if (entry.empty()) return;
691
+
692
+ std::string trimmed_entry = trim_lfm2_slice(entry, 0, entry.size());
693
+ if (trimmed_entry.empty()) return;
694
+
695
+ size_t paren_pos = trimmed_entry.find('(');
696
+ if (paren_pos == std::string::npos) return;
697
+
698
+ std::string func_name = trim_lfm2_slice(trimmed_entry, 0, paren_pos);
699
+ std::string args_str = trim_lfm2_slice(trimmed_entry, paren_pos + 1, trimmed_entry.size());
700
+
701
+ if (!args_str.empty() && args_str.back() == ')') {
702
+ args_str.pop_back();
703
+ args_str = trim_lfm2_slice(args_str, 0, args_str.size());
704
+ }
705
+
706
+ std::string json_call = "{\"name\":\"" + func_name + "\",\"arguments\":{";
707
+
708
+ size_t arg_pos = 0;
709
+ bool first_arg = true;
710
+ while (arg_pos < args_str.length()) {
711
+ while (arg_pos < args_str.length() && std::isspace(static_cast<unsigned char>(args_str[arg_pos]))) {
712
+ arg_pos++;
713
+ }
714
+
715
+ size_t eq_pos = args_str.find('=', arg_pos);
716
+ if (eq_pos == std::string::npos) break;
717
+
718
+ std::string arg_name = args_str.substr(arg_pos, eq_pos - arg_pos);
719
+
720
+ size_t val_start = eq_pos + 1;
721
+ size_t val_end = val_start;
722
+
723
+ if (val_start < args_str.length() && args_str[val_start] == '"') {
724
+ val_start++;
725
+ val_end = args_str.find('"', val_start);
726
+ if (val_end == std::string::npos) break;
727
+ } else {
728
+ val_end = args_str.find(',', val_start);
729
+ if (val_end == std::string::npos) val_end = args_str.length();
730
+ }
731
+
732
+ std::string arg_value = args_str.substr(val_start, val_end - val_start);
733
+
734
+ if (!first_arg) json_call += ",";
735
+ json_call += "\"" + arg_name + "\":\"" + arg_value + "\"";
736
+ first_arg = false;
737
+
738
+ arg_pos = args_str.find(',', val_end);
739
+ if (arg_pos != std::string::npos) {
740
+ arg_pos++;
741
+ } else {
742
+ break;
302
743
  }
303
- formatted_tools_json += "}}";
304
744
  }
305
- return formatted_tools_json;
745
+
746
+ json_call += "}}";
747
+ function_calls.push_back(json_call);
306
748
  }
307
749
 
308
750
  inline void parse_function_calls_from_response(const std::string& response_text,
@@ -341,7 +783,7 @@ inline void parse_function_calls_from_response(const std::string& response_text,
341
783
  break;
342
784
  }
343
785
  }
344
-
786
+
345
787
  // Parse LFM2-style function calls: <|tool_call_start|>[name(args)]<|tool_call_end|>
346
788
  const std::string TOOL_CALL_START = "<|tool_call_start|>";
347
789
  const std::string TOOL_CALL_END = "<|tool_call_end|>";
@@ -349,68 +791,77 @@ inline void parse_function_calls_from_response(const std::string& response_text,
349
791
 
350
792
  while ((tool_start_pos = regular_response.find(TOOL_CALL_START, tool_start_pos)) != std::string::npos) {
351
793
  size_t content_start = tool_start_pos + TOOL_CALL_START.length();
352
- size_t tool_end_pos = response_text.find(TOOL_CALL_END, content_start);
794
+ size_t tool_end_pos = regular_response.find(TOOL_CALL_END, content_start);
353
795
 
354
796
  if (tool_end_pos != std::string::npos) {
355
- std::string tool_content = response_text.substr(content_start, tool_end_pos - content_start);
356
-
357
- if (tool_content.size() > 2 && tool_content[0] == '[' && tool_content[tool_content.size()-1] == ']') {
358
- tool_content = tool_content.substr(1, tool_content.size() - 2);
359
-
360
- size_t paren_pos = tool_content.find('(');
361
- if (paren_pos != std::string::npos) {
362
- std::string func_name = tool_content.substr(0, paren_pos);
363
- std::string args_str = tool_content.substr(paren_pos + 1);
364
-
365
- if (!args_str.empty() && args_str.back() == ')') {
366
- args_str.pop_back();
367
- }
368
-
369
- std::string json_call = "{\"name\":\"" + func_name + "\",\"arguments\":{";
370
-
371
- size_t arg_pos = 0;
372
- bool first_arg = true;
373
- while (arg_pos < args_str.length()) {
374
- while (arg_pos < args_str.length() && std::isspace(args_str[arg_pos])) arg_pos++;
375
-
376
- size_t eq_pos = args_str.find('=', arg_pos);
377
- if (eq_pos == std::string::npos) break;
378
-
379
- std::string arg_name = args_str.substr(arg_pos, eq_pos - arg_pos);
797
+ std::string tool_content = regular_response.substr(content_start, tool_end_pos - content_start);
798
+ std::string content = tool_content;
799
+ size_t trim_start = 0;
800
+ while (trim_start < content.size() && std::isspace(static_cast<unsigned char>(content[trim_start]))) {
801
+ trim_start++;
802
+ }
380
803
 
381
- size_t val_start = eq_pos + 1;
382
- size_t val_end = val_start;
804
+ if (trim_start < content.size()) {
805
+ size_t trim_end = content.size() - 1;
806
+ while (trim_end > trim_start && std::isspace(static_cast<unsigned char>(content[trim_end]))) {
807
+ trim_end--;
808
+ }
809
+ content = content.substr(trim_start, trim_end - trim_start + 1);
810
+ } else {
811
+ content.clear();
812
+ }
383
813
 
384
- if (val_start < args_str.length() && args_str[val_start] == '"') {
385
- val_start++;
386
- val_end = args_str.find('"', val_start);
387
- if (val_end == std::string::npos) break;
814
+ if (!content.empty() && content.front() == '[' && content.back() == ']') {
815
+ std::string inner = content.substr(1, content.size() - 2);
816
+
817
+ size_t inner_first = inner.find_first_not_of(" \t\n\r");
818
+ if (inner_first != std::string::npos && inner[inner_first] == '{') {
819
+ size_t pos = inner_first;
820
+ while (pos < inner.size()) {
821
+ if (inner[pos] == '{') {
822
+ int brace_depth = 1;
823
+ size_t obj_start = pos;
824
+ pos++;
825
+ while (pos < inner.size() && brace_depth > 0) {
826
+ if (inner[pos] == '{') brace_depth++;
827
+ else if (inner[pos] == '}') brace_depth--;
828
+ pos++;
829
+ }
830
+ if (brace_depth == 0) {
831
+ std::string json_obj = inner.substr(obj_start, pos - obj_start);
832
+ if (json_obj.find("\"name\"") != std::string::npos) {
833
+ function_calls.push_back(json_obj);
834
+ }
835
+ }
388
836
  } else {
389
- val_end = args_str.find(',', val_start);
390
- if (val_end == std::string::npos) val_end = args_str.length();
837
+ pos++;
391
838
  }
392
-
393
- std::string arg_value = args_str.substr(val_start, val_end - val_start);
394
-
395
- if (!first_arg) json_call += ",";
396
- json_call += "\"" + arg_name + "\":\"" + arg_value + "\"";
397
- first_arg = false;
398
-
399
- arg_pos = args_str.find(',', val_end);
400
- if (arg_pos != std::string::npos) {
401
- arg_pos++;
402
- } else {
403
- break;
839
+ }
840
+ } else {
841
+ size_t start = 0;
842
+ int paren_depth = 0;
843
+
844
+ for (size_t i = 0; i < inner.size(); ++i) {
845
+ char c = inner[i];
846
+ if (c == '(') {
847
+ paren_depth++;
848
+ } else if (c == ')' && paren_depth > 0) {
849
+ paren_depth--;
850
+ } else if (c == ',' && paren_depth == 0) {
851
+ append_lfm2_call(inner.substr(start, i - start), function_calls);
852
+ start = i + 1;
404
853
  }
405
854
  }
406
855
 
407
- json_call += "}}";
408
- function_calls.push_back(json_call);
856
+ if (start < inner.size()) {
857
+ append_lfm2_call(inner.substr(start), function_calls);
858
+ }
409
859
  }
860
+ } else if (!content.empty()) {
861
+ append_lfm2_call(content, function_calls);
410
862
  }
411
863
 
412
864
  regular_response.erase(tool_start_pos, tool_end_pos + TOOL_CALL_END.length() - tool_start_pos);
413
- tool_start_pos = tool_end_pos + TOOL_CALL_END.length();
414
865
  } else {
415
866
  break;
416
867
  }
@@ -451,38 +902,47 @@ inline std::string construct_response_json(const std::string& regular_response,
451
902
  const std::vector<std::string>& function_calls,
452
903
  double time_to_first_token,
453
904
  double total_time_ms,
454
- double tokens_per_second,
905
+ double prefill_tps,
906
+ double decode_tps,
455
907
  size_t prompt_tokens,
456
- size_t completion_tokens) {
457
- std::ostringstream json_response;
458
- json_response << "{";
459
- json_response << "\"success\":true,";
460
- json_response << "\"response\":\"";
461
- for (char c : regular_response) {
462
- if (c == '"') json_response << "\\\"";
463
- else if (c == '\n') json_response << "\\n";
464
- else if (c == '\r') json_response << "\\r";
465
- else if (c == '\t') json_response << "\\t";
466
- else if (c == '\\') json_response << "\\\\";
467
- else json_response << c;
468
- }
469
- json_response << "\",";
470
- if (!function_calls.empty()) {
471
- json_response << "\"function_calls\":[";
472
- for (size_t i = 0; i < function_calls.size(); ++i) {
473
- if (i > 0) json_response << ",";
474
- json_response << function_calls[i];
475
- }
476
- json_response << "],";
477
- }
478
- json_response << "\"time_to_first_token_ms\":" << std::fixed << std::setprecision(2) << time_to_first_token << ",";
479
- json_response << "\"total_time_ms\":" << std::fixed << std::setprecision(2) << total_time_ms << ",";
480
- json_response << "\"tokens_per_second\":" << std::fixed << std::setprecision(2) << tokens_per_second << ",";
481
- json_response << "\"prefill_tokens\":" << prompt_tokens << ",";
482
- json_response << "\"decode_tokens\":" << completion_tokens << ",";
483
- json_response << "\"total_tokens\":" << (prompt_tokens + completion_tokens);
484
- json_response << "}";
485
- return json_response.str();
908
+ size_t completion_tokens,
909
+ float confidence = 0.0f,
910
+ bool cloud_handoff = false) {
911
+ std::ostringstream json;
912
+ json << "{";
913
+ json << "\"success\":true,";
914
+ json << "\"error\":null,";
915
+ json << "\"cloud_handoff\":" << (cloud_handoff ? "true" : "false") << ",";
916
+ json << "\"response\":\"" << escape_json_string(regular_response) << "\",";
917
+ json << "\"function_calls\":[";
918
+ for (size_t i = 0; i < function_calls.size(); ++i) {
919
+ if (i > 0) json << ",";
920
+ json << function_calls[i];
921
+ }
922
+ json << "],";
923
+ json << "\"confidence\":" << std::fixed << std::setprecision(4) << confidence << ",";
924
+ json << "\"time_to_first_token_ms\":" << std::fixed << std::setprecision(2) << time_to_first_token << ",";
925
+ json << "\"total_time_ms\":" << std::fixed << std::setprecision(2) << total_time_ms << ",";
926
+ json << "\"prefill_tps\":" << std::fixed << std::setprecision(2) << prefill_tps << ",";
927
+ json << "\"decode_tps\":" << std::fixed << std::setprecision(2) << decode_tps << ",";
928
+ json << "\"ram_usage_mb\":" << std::fixed << std::setprecision(2) << get_ram_usage_mb() << ",";
929
+ json << "\"prefill_tokens\":" << prompt_tokens << ",";
930
+ json << "\"decode_tokens\":" << completion_tokens << ",";
931
+ json << "\"total_tokens\":" << (prompt_tokens + completion_tokens);
932
+ json << "}";
933
+ return json.str();
934
+ }
935
+
936
+ inline std::string serialize_function_calls(const std::vector<std::string>& calls) {
937
+ if (calls.empty()) return "[]";
938
+ std::ostringstream oss;
939
+ oss << "[";
940
+ for (size_t i = 0; i < calls.size(); ++i) {
941
+ if (i > 0) oss << ",";
942
+ oss << calls[i];
943
+ }
944
+ oss << "]";
945
+ return oss.str();
486
946
  }
487
947
 
488
948
  } // namespace ffi
@@ -494,35 +954,8 @@ extern "C" {
494
954
 
495
955
  const char* cactus_get_last_error();
496
956
 
497
- __attribute__((weak))
498
- const char* register_app(const char* encrypted_data);
499
-
500
- __attribute__((weak))
501
- const char* get_device_id(const char* current_token);
502
-
503
- #ifdef __cplusplus
504
- }
505
- #endif
506
-
507
957
  #ifdef __cplusplus
508
- extern "C" {
509
-
510
- __attribute__((weak))
511
- inline const char* register_app(const char* encrypted_data) {
512
- (void)encrypted_data;
513
- static thread_local std::string uuid_storage;
514
- uuid_storage = cactus::ffi::generateUUID();
515
- return uuid_storage.c_str();
516
- }
517
-
518
- __attribute__((weak))
519
- inline const char* get_device_id(const char* current_token) {
520
- (void)current_token;
521
- static thread_local std::string uuid_storage;
522
- uuid_storage = cactus::ffi::generateUUID();
523
- return uuid_storage.c_str();
524
- }
525
958
  }
526
959
  #endif
527
960
 
528
- #endif // CACTUS_UTILS_H
961
+ #endif // CACTUS_UTILS_H