cactus-react-native 1.5.0 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cactus.podspec +1 -1
- package/README.md +347 -241
- package/android/CMakeLists.txt +24 -5
- package/android/src/main/jniLibs/arm64-v8a/libcactus.a +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libcurl.a +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libmbedcrypto.a +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libmbedtls.a +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libmbedx509.a +0 -0
- package/cpp/HybridCactus.cpp +149 -117
- package/cpp/HybridCactus.hpp +14 -10
- package/cpp/cactus_ffi.h +54 -43
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus.h +0 -1
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_ffi.h +54 -43
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_utils.h +318 -123
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/engine.h +118 -15
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/graph.h +77 -32
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel.h +68 -6
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel_utils.h +21 -155
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/cactus +0 -0
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus.h +0 -1
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_ffi.h +54 -43
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_utils.h +318 -123
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/engine.h +118 -15
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/graph.h +77 -32
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/kernel.h +68 -6
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/kernel_utils.h +21 -155
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/cactus +0 -0
- package/lib/module/classes/CactusLM.js +16 -49
- package/lib/module/classes/CactusLM.js.map +1 -1
- package/lib/module/classes/CactusSTT.js +30 -79
- package/lib/module/classes/CactusSTT.js.map +1 -1
- package/lib/module/classes/CactusVAD.js +95 -0
- package/lib/module/classes/CactusVAD.js.map +1 -0
- package/lib/module/hooks/useCactusLM.js +10 -11
- package/lib/module/hooks/useCactusLM.js.map +1 -1
- package/lib/module/hooks/useCactusSTT.js +23 -62
- package/lib/module/hooks/useCactusSTT.js.map +1 -1
- package/lib/module/hooks/useCactusVAD.js +171 -0
- package/lib/module/hooks/useCactusVAD.js.map +1 -0
- package/lib/module/index.js +2 -3
- package/lib/module/index.js.map +1 -1
- package/lib/module/modelRegistry.js +52 -0
- package/lib/module/modelRegistry.js.map +1 -0
- package/lib/module/native/Cactus.js +85 -23
- package/lib/module/native/Cactus.js.map +1 -1
- package/lib/module/native/CactusIndex.js.map +1 -1
- package/lib/module/native/index.js +0 -3
- package/lib/module/native/index.js.map +1 -1
- package/lib/module/types/CactusVAD.js +4 -0
- package/lib/module/{specs/CactusUtil.nitro.js.map → types/CactusVAD.js.map} +1 -1
- package/lib/typescript/src/classes/CactusLM.d.ts +5 -7
- package/lib/typescript/src/classes/CactusLM.d.ts.map +1 -1
- package/lib/typescript/src/classes/CactusSTT.d.ts +8 -12
- package/lib/typescript/src/classes/CactusSTT.d.ts.map +1 -1
- package/lib/typescript/src/classes/CactusVAD.d.ts +20 -0
- package/lib/typescript/src/classes/CactusVAD.d.ts.map +1 -0
- package/lib/typescript/src/hooks/useCactusLM.d.ts +2 -2
- package/lib/typescript/src/hooks/useCactusLM.d.ts.map +1 -1
- package/lib/typescript/src/hooks/useCactusSTT.d.ts +6 -8
- package/lib/typescript/src/hooks/useCactusSTT.d.ts.map +1 -1
- package/lib/typescript/src/hooks/useCactusVAD.d.ts +15 -0
- package/lib/typescript/src/hooks/useCactusVAD.d.ts.map +1 -0
- package/lib/typescript/src/index.d.ts +7 -5
- package/lib/typescript/src/index.d.ts.map +1 -1
- package/lib/typescript/src/modelRegistry.d.ts +5 -0
- package/lib/typescript/src/modelRegistry.d.ts.map +1 -0
- package/lib/typescript/src/native/Cactus.d.ts +12 -11
- package/lib/typescript/src/native/Cactus.d.ts.map +1 -1
- package/lib/typescript/src/native/CactusIndex.d.ts +2 -2
- package/lib/typescript/src/native/CactusIndex.d.ts.map +1 -1
- package/lib/typescript/src/native/index.d.ts +0 -3
- package/lib/typescript/src/native/index.d.ts.map +1 -1
- package/lib/typescript/src/specs/Cactus.nitro.d.ts +6 -6
- package/lib/typescript/src/specs/Cactus.nitro.d.ts.map +1 -1
- package/lib/typescript/src/types/CactusIndex.d.ts +2 -2
- package/lib/typescript/src/types/CactusIndex.d.ts.map +1 -1
- package/lib/typescript/src/types/CactusLM.d.ts +19 -11
- package/lib/typescript/src/types/CactusLM.d.ts.map +1 -1
- package/lib/typescript/src/types/CactusSTT.d.ts +33 -12
- package/lib/typescript/src/types/CactusSTT.d.ts.map +1 -1
- package/lib/typescript/src/types/CactusVAD.d.ts +34 -0
- package/lib/typescript/src/types/CactusVAD.d.ts.map +1 -0
- package/lib/typescript/src/types/common.d.ts +1 -6
- package/lib/typescript/src/types/common.d.ts.map +1 -1
- package/nitro.json +0 -11
- package/nitrogen/generated/android/cactus+autolinking.cmake +0 -5
- package/nitrogen/generated/android/cactusOnLoad.cpp +0 -30
- package/nitrogen/generated/ios/Cactus-Swift-Cxx-Bridge.cpp +0 -50
- package/nitrogen/generated/ios/Cactus-Swift-Cxx-Bridge.hpp +9 -147
- package/nitrogen/generated/ios/Cactus-Swift-Cxx-Umbrella.hpp +0 -13
- package/nitrogen/generated/ios/CactusAutolinking.mm +0 -26
- package/nitrogen/generated/ios/CactusAutolinking.swift +0 -30
- package/nitrogen/generated/shared/c++/HybridCactusSpec.cpp +4 -4
- package/nitrogen/generated/shared/c++/HybridCactusSpec.hpp +6 -6
- package/package.json +3 -3
- package/src/classes/CactusLM.ts +18 -65
- package/src/classes/CactusSTT.ts +39 -97
- package/src/classes/CactusVAD.ts +129 -0
- package/src/hooks/useCactusLM.ts +14 -17
- package/src/hooks/useCactusSTT.ts +47 -98
- package/src/hooks/useCactusVAD.ts +215 -0
- package/src/index.tsx +18 -12
- package/src/modelRegistry.ts +65 -0
- package/src/native/Cactus.ts +102 -41
- package/src/native/CactusIndex.ts +2 -2
- package/src/native/index.ts +0 -3
- package/src/specs/Cactus.nitro.ts +11 -7
- package/src/types/CactusIndex.ts +2 -2
- package/src/types/CactusLM.ts +19 -11
- package/src/types/CactusSTT.ts +33 -13
- package/src/types/CactusVAD.ts +39 -0
- package/src/types/common.ts +1 -6
- package/android/src/main/java/com/margelo/nitro/cactus/HybridCactusCrypto.kt +0 -46
- package/android/src/main/java/com/margelo/nitro/cactus/HybridCactusDeviceInfo.kt +0 -27
- package/android/src/main/jniLibs/arm64-v8a/libcactus_util.a +0 -0
- package/cpp/HybridCactusUtil.cpp +0 -47
- package/cpp/HybridCactusUtil.hpp +0 -27
- package/cpp/cactus_util.h +0 -25
- package/ios/HybridCactusCrypto.swift +0 -37
- package/ios/HybridCactusDeviceInfo.swift +0 -32
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_telemetry.h +0 -656
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_telemetry.h +0 -656
- package/ios/cactus_util.xcframework/Info.plist +0 -39
- package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Headers/cactus_util.h +0 -25
- package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Headers/database.h +0 -27
- package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Headers/ios_utils.h +0 -10
- package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Headers/logging.h +0 -25
- package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Info.plist +0 -0
- package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/cactus_util +0 -0
- package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Headers/cactus_util.h +0 -25
- package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Headers/database.h +0 -27
- package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Headers/ios_utils.h +0 -10
- package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Headers/logging.h +0 -25
- package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Info.plist +0 -0
- package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/_CodeSignature/CodeResources +0 -135
- package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/cactus_util +0 -0
- package/lib/module/api/Database.js +0 -45
- package/lib/module/api/Database.js.map +0 -1
- package/lib/module/api/RemoteLM.js +0 -201
- package/lib/module/api/RemoteLM.js.map +0 -1
- package/lib/module/config/CactusConfig.js +0 -12
- package/lib/module/config/CactusConfig.js.map +0 -1
- package/lib/module/models.js +0 -336
- package/lib/module/models.js.map +0 -1
- package/lib/module/native/CactusCrypto.js +0 -10
- package/lib/module/native/CactusCrypto.js.map +0 -1
- package/lib/module/native/CactusDeviceInfo.js +0 -13
- package/lib/module/native/CactusDeviceInfo.js.map +0 -1
- package/lib/module/native/CactusUtil.js +0 -36
- package/lib/module/native/CactusUtil.js.map +0 -1
- package/lib/module/specs/CactusCrypto.nitro.js +0 -4
- package/lib/module/specs/CactusCrypto.nitro.js.map +0 -1
- package/lib/module/specs/CactusDeviceInfo.nitro.js +0 -4
- package/lib/module/specs/CactusDeviceInfo.nitro.js.map +0 -1
- package/lib/module/specs/CactusUtil.nitro.js +0 -4
- package/lib/module/telemetry/Telemetry.js +0 -154
- package/lib/module/telemetry/Telemetry.js.map +0 -1
- package/lib/typescript/src/api/Database.d.ts +0 -12
- package/lib/typescript/src/api/Database.d.ts.map +0 -1
- package/lib/typescript/src/api/RemoteLM.d.ts +0 -14
- package/lib/typescript/src/api/RemoteLM.d.ts.map +0 -1
- package/lib/typescript/src/config/CactusConfig.d.ts +0 -7
- package/lib/typescript/src/config/CactusConfig.d.ts.map +0 -1
- package/lib/typescript/src/models.d.ts +0 -6
- package/lib/typescript/src/models.d.ts.map +0 -1
- package/lib/typescript/src/native/CactusCrypto.d.ts +0 -5
- package/lib/typescript/src/native/CactusCrypto.d.ts.map +0 -1
- package/lib/typescript/src/native/CactusDeviceInfo.d.ts +0 -7
- package/lib/typescript/src/native/CactusDeviceInfo.d.ts.map +0 -1
- package/lib/typescript/src/native/CactusUtil.d.ts +0 -6
- package/lib/typescript/src/native/CactusUtil.d.ts.map +0 -1
- package/lib/typescript/src/specs/CactusCrypto.nitro.d.ts +0 -8
- package/lib/typescript/src/specs/CactusCrypto.nitro.d.ts.map +0 -1
- package/lib/typescript/src/specs/CactusDeviceInfo.nitro.d.ts +0 -16
- package/lib/typescript/src/specs/CactusDeviceInfo.nitro.d.ts.map +0 -1
- package/lib/typescript/src/specs/CactusUtil.nitro.d.ts +0 -10
- package/lib/typescript/src/specs/CactusUtil.nitro.d.ts.map +0 -1
- package/lib/typescript/src/telemetry/Telemetry.d.ts +0 -34
- package/lib/typescript/src/telemetry/Telemetry.d.ts.map +0 -1
- package/nitrogen/generated/android/c++/JDeviceInfo.hpp +0 -74
- package/nitrogen/generated/android/c++/JHybridCactusCryptoSpec.cpp +0 -65
- package/nitrogen/generated/android/c++/JHybridCactusCryptoSpec.hpp +0 -65
- package/nitrogen/generated/android/c++/JHybridCactusDeviceInfoSpec.cpp +0 -85
- package/nitrogen/generated/android/c++/JHybridCactusDeviceInfoSpec.hpp +0 -66
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/DeviceInfo.kt +0 -50
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/HybridCactusCryptoSpec.kt +0 -58
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/HybridCactusDeviceInfoSpec.kt +0 -62
- package/nitrogen/generated/ios/c++/HybridCactusCryptoSpecSwift.cpp +0 -11
- package/nitrogen/generated/ios/c++/HybridCactusCryptoSpecSwift.hpp +0 -77
- package/nitrogen/generated/ios/c++/HybridCactusDeviceInfoSpecSwift.cpp +0 -11
- package/nitrogen/generated/ios/c++/HybridCactusDeviceInfoSpecSwift.hpp +0 -88
- package/nitrogen/generated/ios/swift/DeviceInfo.swift +0 -98
- package/nitrogen/generated/ios/swift/Func_void_DeviceInfo.swift +0 -47
- package/nitrogen/generated/ios/swift/Func_void_std__optional_std__string_.swift +0 -54
- package/nitrogen/generated/ios/swift/HybridCactusCryptoSpec.swift +0 -57
- package/nitrogen/generated/ios/swift/HybridCactusCryptoSpec_cxx.swift +0 -139
- package/nitrogen/generated/ios/swift/HybridCactusDeviceInfoSpec.swift +0 -58
- package/nitrogen/generated/ios/swift/HybridCactusDeviceInfoSpec_cxx.swift +0 -164
- package/nitrogen/generated/shared/c++/DeviceInfo.hpp +0 -92
- package/nitrogen/generated/shared/c++/HybridCactusCryptoSpec.cpp +0 -21
- package/nitrogen/generated/shared/c++/HybridCactusCryptoSpec.hpp +0 -63
- package/nitrogen/generated/shared/c++/HybridCactusDeviceInfoSpec.cpp +0 -22
- package/nitrogen/generated/shared/c++/HybridCactusDeviceInfoSpec.hpp +0 -67
- package/nitrogen/generated/shared/c++/HybridCactusUtilSpec.cpp +0 -23
- package/nitrogen/generated/shared/c++/HybridCactusUtilSpec.hpp +0 -66
- package/src/api/Database.ts +0 -55
- package/src/api/RemoteLM.ts +0 -273
- package/src/config/CactusConfig.ts +0 -11
- package/src/models.ts +0 -344
- package/src/native/CactusCrypto.ts +0 -11
- package/src/native/CactusDeviceInfo.ts +0 -18
- package/src/native/CactusUtil.ts +0 -43
- package/src/specs/CactusCrypto.nitro.ts +0 -6
- package/src/specs/CactusDeviceInfo.nitro.ts +0 -15
- package/src/specs/CactusUtil.nitro.ts +0 -8
- package/src/telemetry/Telemetry.ts +0 -236
|
@@ -19,14 +19,53 @@
|
|
|
19
19
|
|
|
20
20
|
#ifdef __APPLE__
|
|
21
21
|
#include <uuid/uuid.h>
|
|
22
|
+
#include <mach/mach.h>
|
|
23
|
+
#elif defined(_WIN32)
|
|
24
|
+
#include <windows.h>
|
|
25
|
+
#include <psapi.h>
|
|
26
|
+
#elif defined(__linux__) || defined(__ANDROID__)
|
|
27
|
+
#include <unistd.h>
|
|
22
28
|
#endif
|
|
23
29
|
|
|
30
|
+
inline size_t get_memory_footprint_bytes() {
|
|
31
|
+
#ifdef __APPLE__
|
|
32
|
+
task_vm_info_data_t vm_info;
|
|
33
|
+
mach_msg_type_number_t count = TASK_VM_INFO_COUNT;
|
|
34
|
+
if (task_info(mach_task_self(), TASK_VM_INFO, (task_info_t)&vm_info, &count) == KERN_SUCCESS)
|
|
35
|
+
return vm_info.phys_footprint;
|
|
36
|
+
|
|
37
|
+
#elif defined(_WIN32)
|
|
38
|
+
PROCESS_MEMORY_COUNTERS_EX pmc;
|
|
39
|
+
if (GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc)))
|
|
40
|
+
return pmc.PrivateUsage;
|
|
41
|
+
|
|
42
|
+
#elif defined(__linux__) || defined(__ANDROID__)
|
|
43
|
+
std::ifstream statm("/proc/self/statm");
|
|
44
|
+
if (statm.is_open()) {
|
|
45
|
+
size_t size, resident;
|
|
46
|
+
statm >> size >> resident;
|
|
47
|
+
return resident * sysconf(_SC_PAGESIZE);
|
|
48
|
+
}
|
|
49
|
+
#endif
|
|
50
|
+
return 0;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
inline double get_ram_usage_mb() {
|
|
54
|
+
return get_memory_footprint_bytes() / (1024.0 * 1024.0);
|
|
55
|
+
}
|
|
56
|
+
|
|
24
57
|
struct CactusModelHandle {
|
|
25
58
|
std::unique_ptr<cactus::engine::Model> model;
|
|
59
|
+
std::unique_ptr<cactus::engine::Model> vad_model;
|
|
26
60
|
std::atomic<bool> should_stop;
|
|
27
61
|
std::vector<uint32_t> processed_tokens;
|
|
28
62
|
std::mutex model_mutex;
|
|
29
63
|
std::string model_name;
|
|
64
|
+
std::unique_ptr<cactus::engine::index::Index> corpus_index;
|
|
65
|
+
std::string corpus_dir;
|
|
66
|
+
size_t corpus_embedding_dim = 0;
|
|
67
|
+
std::vector<std::vector<float>> tool_embeddings;
|
|
68
|
+
std::vector<std::string> tool_texts;
|
|
30
69
|
|
|
31
70
|
CactusModelHandle() : should_stop(false) {}
|
|
32
71
|
};
|
|
@@ -36,16 +75,37 @@ extern std::string last_error_message;
|
|
|
36
75
|
bool matches_stop_sequence(const std::vector<uint32_t>& generated_tokens,
|
|
37
76
|
const std::vector<std::vector<uint32_t>>& stop_sequences);
|
|
38
77
|
|
|
78
|
+
std::string retrieve_rag_context(CactusModelHandle* handle, const std::string& query);
|
|
79
|
+
|
|
39
80
|
namespace cactus {
|
|
40
|
-
namespace
|
|
81
|
+
namespace audio {
|
|
82
|
+
|
|
83
|
+
static constexpr size_t WHISPER_TARGET_FRAMES = 3000;
|
|
84
|
+
static constexpr int WHISPER_SAMPLE_RATE = 16000;
|
|
85
|
+
|
|
86
|
+
inline cactus::engine::AudioProcessor::SpectrogramConfig get_whisper_spectrogram_config() {
|
|
87
|
+
cactus::engine::AudioProcessor::SpectrogramConfig cfg{};
|
|
88
|
+
cfg.n_fft = 400;
|
|
89
|
+
cfg.frame_length = 400;
|
|
90
|
+
cfg.hop_length = 160;
|
|
91
|
+
cfg.power = 2.0f;
|
|
92
|
+
cfg.center = true;
|
|
93
|
+
cfg.pad_mode = "reflect";
|
|
94
|
+
cfg.onesided = true;
|
|
95
|
+
cfg.dither = 0.0f;
|
|
96
|
+
cfg.mel_floor = 1e-10f;
|
|
97
|
+
cfg.log_mel = "log10";
|
|
98
|
+
cfg.reference = 1.0f;
|
|
99
|
+
cfg.min_value = 1e-10f;
|
|
100
|
+
cfg.remove_dc_offset = true;
|
|
101
|
+
return cfg;
|
|
102
|
+
}
|
|
41
103
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
#endif
|
|
104
|
+
} // namespace audio
|
|
105
|
+
} // namespace cactus
|
|
45
106
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
}
|
|
107
|
+
namespace cactus {
|
|
108
|
+
namespace ffi {
|
|
49
109
|
|
|
50
110
|
inline std::string generateUUID() {
|
|
51
111
|
#ifdef __APPLE__
|
|
@@ -66,18 +126,49 @@ struct ToolFunction {
|
|
|
66
126
|
} // namespace ffi
|
|
67
127
|
} // namespace cactus
|
|
68
128
|
|
|
129
|
+
std::vector<cactus::ffi::ToolFunction> select_relevant_tools(
|
|
130
|
+
CactusModelHandle* handle,
|
|
131
|
+
const std::string& query,
|
|
132
|
+
const std::vector<cactus::ffi::ToolFunction>& all_tools,
|
|
133
|
+
size_t top_k);
|
|
134
|
+
|
|
69
135
|
#include "gemma_tools.h"
|
|
70
136
|
|
|
71
137
|
namespace cactus {
|
|
72
138
|
namespace ffi {
|
|
73
139
|
|
|
74
|
-
inline
|
|
75
|
-
std::
|
|
76
|
-
for (
|
|
77
|
-
if (c == '"')
|
|
78
|
-
if (c == '\n')
|
|
140
|
+
inline std::string escape_json_string(const std::string& s) {
|
|
141
|
+
std::ostringstream o;
|
|
142
|
+
for (char c : s) {
|
|
143
|
+
if (c == '"') o << "\\\"";
|
|
144
|
+
else if (c == '\n') o << "\\n";
|
|
145
|
+
else if (c == '\r') o << "\\r";
|
|
146
|
+
else if (c == '\t') o << "\\t";
|
|
147
|
+
else if (c == '\\') o << "\\\\";
|
|
148
|
+
else o << c;
|
|
79
149
|
}
|
|
80
|
-
|
|
150
|
+
return o.str();
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
inline void handle_error_response(const std::string& error_message, char* response_buffer, size_t buffer_size) {
|
|
154
|
+
std::ostringstream json;
|
|
155
|
+
json << "{";
|
|
156
|
+
json << "\"success\":false,";
|
|
157
|
+
json << "\"error\":\"" << escape_json_string(error_message) << "\",";
|
|
158
|
+
json << "\"cloud_handoff\":false,";
|
|
159
|
+
json << "\"response\":null,";
|
|
160
|
+
json << "\"function_calls\":[],";
|
|
161
|
+
json << "\"confidence\":0.0,";
|
|
162
|
+
json << "\"time_to_first_token_ms\":0.0,";
|
|
163
|
+
json << "\"total_time_ms\":0.0,";
|
|
164
|
+
json << "\"prefill_tps\":0.0,";
|
|
165
|
+
json << "\"decode_tps\":0.0,";
|
|
166
|
+
json << "\"ram_usage_mb\":" << std::fixed << std::setprecision(2) << get_ram_usage_mb() << ",";
|
|
167
|
+
json << "\"prefill_tokens\":0,";
|
|
168
|
+
json << "\"decode_tokens\":0,";
|
|
169
|
+
json << "\"total_tokens\":0";
|
|
170
|
+
json << "}";
|
|
171
|
+
std::string error_json = json.str();
|
|
81
172
|
if (response_buffer && error_json.length() < buffer_size) {
|
|
82
173
|
std::strcpy(response_buffer, error_json.c_str());
|
|
83
174
|
}
|
|
@@ -228,12 +319,22 @@ inline void parse_options_json(const std::string& json,
|
|
|
228
319
|
float& temperature, float& top_p,
|
|
229
320
|
size_t& top_k, size_t& max_tokens,
|
|
230
321
|
std::vector<std::string>& stop_sequences,
|
|
231
|
-
bool& force_tools
|
|
322
|
+
bool& force_tools,
|
|
323
|
+
size_t& tool_rag_top_k,
|
|
324
|
+
float& confidence_threshold,
|
|
325
|
+
bool& include_stop_sequences,
|
|
326
|
+
bool& use_vad,
|
|
327
|
+
bool& telemetry_enabled) {
|
|
232
328
|
temperature = 0.0f;
|
|
233
329
|
top_p = 0.0f;
|
|
234
330
|
top_k = 0;
|
|
235
331
|
max_tokens = 100;
|
|
236
332
|
force_tools = false;
|
|
333
|
+
tool_rag_top_k = 2;
|
|
334
|
+
confidence_threshold = 0.7f;
|
|
335
|
+
include_stop_sequences = false;
|
|
336
|
+
use_vad = true;
|
|
337
|
+
telemetry_enabled = true;
|
|
237
338
|
stop_sequences.clear();
|
|
238
339
|
|
|
239
340
|
if (json.empty()) return;
|
|
@@ -269,6 +370,39 @@ inline void parse_options_json(const std::string& json,
|
|
|
269
370
|
force_tools = (json.substr(pos, 4) == "true");
|
|
270
371
|
}
|
|
271
372
|
|
|
373
|
+
pos = json.find("\"tool_rag_top_k\"");
|
|
374
|
+
if (pos != std::string::npos) {
|
|
375
|
+
pos = json.find(':', pos) + 1;
|
|
376
|
+
tool_rag_top_k = std::stoul(json.substr(pos));
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
pos = json.find("\"confidence_threshold\"");
|
|
380
|
+
if (pos != std::string::npos) {
|
|
381
|
+
pos = json.find(':', pos) + 1;
|
|
382
|
+
confidence_threshold = std::stof(json.substr(pos));
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
pos = json.find("\"include_stop_sequences\"");
|
|
386
|
+
if (pos != std::string::npos) {
|
|
387
|
+
pos = json.find(':', pos) + 1;
|
|
388
|
+
while (pos < json.length() && std::isspace(json[pos])) pos++;
|
|
389
|
+
include_stop_sequences = (json.substr(pos, 4) == "true");
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
pos = json.find("\"use_vad\"");
|
|
393
|
+
if (pos != std::string::npos) {
|
|
394
|
+
pos = json.find(':', pos) + 1;
|
|
395
|
+
while (pos < json.length() && std::isspace(json[pos])) pos++;
|
|
396
|
+
use_vad = (json.substr(pos, 4) == "true");
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
pos = json.find("\"telemetry_enabled\"");
|
|
400
|
+
if (pos != std::string::npos) {
|
|
401
|
+
pos = json.find(':', pos) + 1;
|
|
402
|
+
while (pos < json.length() && std::isspace(json[pos])) pos++;
|
|
403
|
+
telemetry_enabled = (json.substr(pos, 4) == "true");
|
|
404
|
+
}
|
|
405
|
+
|
|
272
406
|
pos = json.find("\"stop_sequences\"");
|
|
273
407
|
if (pos != std::string::npos) {
|
|
274
408
|
pos = json.find('[', pos);
|
|
@@ -305,6 +439,78 @@ inline std::string format_tools_for_prompt(const std::vector<ToolFunction>& tool
|
|
|
305
439
|
return formatted_tools_json;
|
|
306
440
|
}
|
|
307
441
|
|
|
442
|
+
static inline std::string trim_lfm2_slice(const std::string& value, size_t begin, size_t end) {
|
|
443
|
+
while (begin < end && std::isspace(static_cast<unsigned char>(value[begin]))) {
|
|
444
|
+
begin++;
|
|
445
|
+
}
|
|
446
|
+
while (end > begin && std::isspace(static_cast<unsigned char>(value[end - 1]))) {
|
|
447
|
+
end--;
|
|
448
|
+
}
|
|
449
|
+
return value.substr(begin, end - begin);
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
static inline void append_lfm2_call(const std::string& entry,
|
|
453
|
+
std::vector<std::string>& function_calls) {
|
|
454
|
+
if (entry.empty()) return;
|
|
455
|
+
|
|
456
|
+
std::string trimmed_entry = trim_lfm2_slice(entry, 0, entry.size());
|
|
457
|
+
if (trimmed_entry.empty()) return;
|
|
458
|
+
|
|
459
|
+
size_t paren_pos = trimmed_entry.find('(');
|
|
460
|
+
if (paren_pos == std::string::npos) return;
|
|
461
|
+
|
|
462
|
+
std::string func_name = trim_lfm2_slice(trimmed_entry, 0, paren_pos);
|
|
463
|
+
std::string args_str = trim_lfm2_slice(trimmed_entry, paren_pos + 1, trimmed_entry.size());
|
|
464
|
+
|
|
465
|
+
if (!args_str.empty() && args_str.back() == ')') {
|
|
466
|
+
args_str.pop_back();
|
|
467
|
+
args_str = trim_lfm2_slice(args_str, 0, args_str.size());
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
std::string json_call = "{\"name\":\"" + func_name + "\",\"arguments\":{";
|
|
471
|
+
|
|
472
|
+
size_t arg_pos = 0;
|
|
473
|
+
bool first_arg = true;
|
|
474
|
+
while (arg_pos < args_str.length()) {
|
|
475
|
+
while (arg_pos < args_str.length() && std::isspace(static_cast<unsigned char>(args_str[arg_pos]))) {
|
|
476
|
+
arg_pos++;
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
size_t eq_pos = args_str.find('=', arg_pos);
|
|
480
|
+
if (eq_pos == std::string::npos) break;
|
|
481
|
+
|
|
482
|
+
std::string arg_name = args_str.substr(arg_pos, eq_pos - arg_pos);
|
|
483
|
+
|
|
484
|
+
size_t val_start = eq_pos + 1;
|
|
485
|
+
size_t val_end = val_start;
|
|
486
|
+
|
|
487
|
+
if (val_start < args_str.length() && args_str[val_start] == '"') {
|
|
488
|
+
val_start++;
|
|
489
|
+
val_end = args_str.find('"', val_start);
|
|
490
|
+
if (val_end == std::string::npos) break;
|
|
491
|
+
} else {
|
|
492
|
+
val_end = args_str.find(',', val_start);
|
|
493
|
+
if (val_end == std::string::npos) val_end = args_str.length();
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
std::string arg_value = args_str.substr(val_start, val_end - val_start);
|
|
497
|
+
|
|
498
|
+
if (!first_arg) json_call += ",";
|
|
499
|
+
json_call += "\"" + arg_name + "\":\"" + arg_value + "\"";
|
|
500
|
+
first_arg = false;
|
|
501
|
+
|
|
502
|
+
arg_pos = args_str.find(',', val_end);
|
|
503
|
+
if (arg_pos != std::string::npos) {
|
|
504
|
+
arg_pos++;
|
|
505
|
+
} else {
|
|
506
|
+
break;
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
json_call += "}}";
|
|
511
|
+
function_calls.push_back(json_call);
|
|
512
|
+
}
|
|
513
|
+
|
|
308
514
|
inline void parse_function_calls_from_response(const std::string& response_text,
|
|
309
515
|
std::string& regular_response,
|
|
310
516
|
std::vector<std::string>& function_calls) {
|
|
@@ -341,7 +547,7 @@ inline void parse_function_calls_from_response(const std::string& response_text,
|
|
|
341
547
|
break;
|
|
342
548
|
}
|
|
343
549
|
}
|
|
344
|
-
|
|
550
|
+
|
|
345
551
|
// Parse LFM2-style function calls: <|tool_call_start|>[name(args)]<|tool_call_end|>
|
|
346
552
|
const std::string TOOL_CALL_START = "<|tool_call_start|>";
|
|
347
553
|
const std::string TOOL_CALL_END = "<|tool_call_end|>";
|
|
@@ -349,68 +555,51 @@ inline void parse_function_calls_from_response(const std::string& response_text,
|
|
|
349
555
|
|
|
350
556
|
while ((tool_start_pos = regular_response.find(TOOL_CALL_START, tool_start_pos)) != std::string::npos) {
|
|
351
557
|
size_t content_start = tool_start_pos + TOOL_CALL_START.length();
|
|
352
|
-
size_t tool_end_pos =
|
|
558
|
+
size_t tool_end_pos = regular_response.find(TOOL_CALL_END, content_start);
|
|
353
559
|
|
|
354
560
|
if (tool_end_pos != std::string::npos) {
|
|
355
|
-
std::string tool_content =
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
if (paren_pos != std::string::npos) {
|
|
362
|
-
std::string func_name = tool_content.substr(0, paren_pos);
|
|
363
|
-
std::string args_str = tool_content.substr(paren_pos + 1);
|
|
364
|
-
|
|
365
|
-
if (!args_str.empty() && args_str.back() == ')') {
|
|
366
|
-
args_str.pop_back();
|
|
367
|
-
}
|
|
368
|
-
|
|
369
|
-
std::string json_call = "{\"name\":\"" + func_name + "\",\"arguments\":{";
|
|
370
|
-
|
|
371
|
-
size_t arg_pos = 0;
|
|
372
|
-
bool first_arg = true;
|
|
373
|
-
while (arg_pos < args_str.length()) {
|
|
374
|
-
while (arg_pos < args_str.length() && std::isspace(args_str[arg_pos])) arg_pos++;
|
|
375
|
-
|
|
376
|
-
size_t eq_pos = args_str.find('=', arg_pos);
|
|
377
|
-
if (eq_pos == std::string::npos) break;
|
|
378
|
-
|
|
379
|
-
std::string arg_name = args_str.substr(arg_pos, eq_pos - arg_pos);
|
|
380
|
-
|
|
381
|
-
size_t val_start = eq_pos + 1;
|
|
382
|
-
size_t val_end = val_start;
|
|
383
|
-
|
|
384
|
-
if (val_start < args_str.length() && args_str[val_start] == '"') {
|
|
385
|
-
val_start++;
|
|
386
|
-
val_end = args_str.find('"', val_start);
|
|
387
|
-
if (val_end == std::string::npos) break;
|
|
388
|
-
} else {
|
|
389
|
-
val_end = args_str.find(',', val_start);
|
|
390
|
-
if (val_end == std::string::npos) val_end = args_str.length();
|
|
391
|
-
}
|
|
392
|
-
|
|
393
|
-
std::string arg_value = args_str.substr(val_start, val_end - val_start);
|
|
561
|
+
std::string tool_content = regular_response.substr(content_start, tool_end_pos - content_start);
|
|
562
|
+
std::string content = tool_content;
|
|
563
|
+
size_t trim_start = 0;
|
|
564
|
+
while (trim_start < content.size() && std::isspace(static_cast<unsigned char>(content[trim_start]))) {
|
|
565
|
+
trim_start++;
|
|
566
|
+
}
|
|
394
567
|
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
568
|
+
if (trim_start < content.size()) {
|
|
569
|
+
size_t trim_end = content.size() - 1;
|
|
570
|
+
while (trim_end > trim_start && std::isspace(static_cast<unsigned char>(content[trim_end]))) {
|
|
571
|
+
trim_end--;
|
|
572
|
+
}
|
|
573
|
+
content = content.substr(trim_start, trim_end - trim_start + 1);
|
|
574
|
+
} else {
|
|
575
|
+
content.clear();
|
|
576
|
+
}
|
|
398
577
|
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
578
|
+
if (!content.empty() && content.front() == '[' && content.back() == ']') {
|
|
579
|
+
std::string inner = content.substr(1, content.size() - 2);
|
|
580
|
+
size_t start = 0;
|
|
581
|
+
int paren_depth = 0;
|
|
582
|
+
|
|
583
|
+
for (size_t i = 0; i < inner.size(); ++i) {
|
|
584
|
+
char c = inner[i];
|
|
585
|
+
if (c == '(') {
|
|
586
|
+
paren_depth++;
|
|
587
|
+
} else if (c == ')' && paren_depth > 0) {
|
|
588
|
+
paren_depth--;
|
|
589
|
+
} else if (c == ',' && paren_depth == 0) {
|
|
590
|
+
append_lfm2_call(inner.substr(start, i - start), function_calls);
|
|
591
|
+
start = i + 1;
|
|
405
592
|
}
|
|
593
|
+
}
|
|
406
594
|
|
|
407
|
-
|
|
408
|
-
|
|
595
|
+
if (start < inner.size()) {
|
|
596
|
+
append_lfm2_call(inner.substr(start), function_calls);
|
|
409
597
|
}
|
|
598
|
+
} else if (!content.empty()) {
|
|
599
|
+
append_lfm2_call(content, function_calls);
|
|
410
600
|
}
|
|
411
601
|
|
|
412
602
|
regular_response.erase(tool_start_pos, tool_end_pos + TOOL_CALL_END.length() - tool_start_pos);
|
|
413
|
-
tool_start_pos = tool_end_pos + TOOL_CALL_END.length();
|
|
414
603
|
} else {
|
|
415
604
|
break;
|
|
416
605
|
}
|
|
@@ -451,38 +640,71 @@ inline std::string construct_response_json(const std::string& regular_response,
|
|
|
451
640
|
const std::vector<std::string>& function_calls,
|
|
452
641
|
double time_to_first_token,
|
|
453
642
|
double total_time_ms,
|
|
454
|
-
double
|
|
643
|
+
double prefill_tps,
|
|
644
|
+
double decode_tps,
|
|
455
645
|
size_t prompt_tokens,
|
|
456
|
-
size_t completion_tokens
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
646
|
+
size_t completion_tokens,
|
|
647
|
+
float confidence = 0.0f,
|
|
648
|
+
bool cloud_handoff = false) {
|
|
649
|
+
std::ostringstream json;
|
|
650
|
+
json << "{";
|
|
651
|
+
json << "\"success\":" << (cloud_handoff ? "false" : "true") << ",";
|
|
652
|
+
json << "\"error\":null,";
|
|
653
|
+
json << "\"cloud_handoff\":" << (cloud_handoff ? "true" : "false") << ",";
|
|
654
|
+
json << "\"response\":\"" << escape_json_string(regular_response) << "\",";
|
|
655
|
+
json << "\"function_calls\":[";
|
|
656
|
+
for (size_t i = 0; i < function_calls.size(); ++i) {
|
|
657
|
+
if (i > 0) json << ",";
|
|
658
|
+
json << function_calls[i];
|
|
468
659
|
}
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
660
|
+
json << "],";
|
|
661
|
+
json << "\"confidence\":" << std::fixed << std::setprecision(4) << confidence << ",";
|
|
662
|
+
json << "\"time_to_first_token_ms\":" << std::fixed << std::setprecision(2) << time_to_first_token << ",";
|
|
663
|
+
json << "\"total_time_ms\":" << std::fixed << std::setprecision(2) << total_time_ms << ",";
|
|
664
|
+
json << "\"prefill_tps\":" << std::fixed << std::setprecision(2) << prefill_tps << ",";
|
|
665
|
+
json << "\"decode_tps\":" << std::fixed << std::setprecision(2) << decode_tps << ",";
|
|
666
|
+
json << "\"ram_usage_mb\":" << std::fixed << std::setprecision(2) << get_ram_usage_mb() << ",";
|
|
667
|
+
json << "\"prefill_tokens\":" << prompt_tokens << ",";
|
|
668
|
+
json << "\"decode_tokens\":" << completion_tokens << ",";
|
|
669
|
+
json << "\"total_tokens\":" << (prompt_tokens + completion_tokens);
|
|
670
|
+
json << "}";
|
|
671
|
+
return json.str();
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
inline std::string construct_cloud_handoff_json(float confidence,
|
|
675
|
+
double time_to_first_token,
|
|
676
|
+
double prefill_tps,
|
|
677
|
+
size_t prompt_tokens) {
|
|
678
|
+
std::ostringstream json;
|
|
679
|
+
json << "{";
|
|
680
|
+
json << "\"success\":false,";
|
|
681
|
+
json << "\"error\":null,";
|
|
682
|
+
json << "\"cloud_handoff\":true,";
|
|
683
|
+
json << "\"response\":null,";
|
|
684
|
+
json << "\"function_calls\":[],";
|
|
685
|
+
json << "\"confidence\":" << std::fixed << std::setprecision(4) << confidence << ",";
|
|
686
|
+
json << "\"time_to_first_token_ms\":" << std::fixed << std::setprecision(2) << time_to_first_token << ",";
|
|
687
|
+
json << "\"total_time_ms\":" << std::fixed << std::setprecision(2) << time_to_first_token << ",";
|
|
688
|
+
json << "\"prefill_tps\":" << std::fixed << std::setprecision(2) << prefill_tps << ",";
|
|
689
|
+
json << "\"decode_tps\":0.0,";
|
|
690
|
+
json << "\"ram_usage_mb\":" << std::fixed << std::setprecision(2) << get_ram_usage_mb() << ",";
|
|
691
|
+
json << "\"prefill_tokens\":" << prompt_tokens << ",";
|
|
692
|
+
json << "\"decode_tokens\":0,";
|
|
693
|
+
json << "\"total_tokens\":" << prompt_tokens;
|
|
694
|
+
json << "}";
|
|
695
|
+
return json.str();
|
|
696
|
+
}
|
|
697
|
+
|
|
698
|
+
inline std::string serialize_function_calls(const std::vector<std::string>& calls) {
|
|
699
|
+
if (calls.empty()) return "[]";
|
|
700
|
+
std::ostringstream oss;
|
|
701
|
+
oss << "[";
|
|
702
|
+
for (size_t i = 0; i < calls.size(); ++i) {
|
|
703
|
+
if (i > 0) oss << ",";
|
|
704
|
+
oss << calls[i];
|
|
477
705
|
}
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
json_response << "\"tokens_per_second\":" << std::fixed << std::setprecision(2) << tokens_per_second << ",";
|
|
481
|
-
json_response << "\"prefill_tokens\":" << prompt_tokens << ",";
|
|
482
|
-
json_response << "\"decode_tokens\":" << completion_tokens << ",";
|
|
483
|
-
json_response << "\"total_tokens\":" << (prompt_tokens + completion_tokens);
|
|
484
|
-
json_response << "}";
|
|
485
|
-
return json_response.str();
|
|
706
|
+
oss << "]";
|
|
707
|
+
return oss.str();
|
|
486
708
|
}
|
|
487
709
|
|
|
488
710
|
} // namespace ffi
|
|
@@ -494,35 +716,8 @@ extern "C" {
|
|
|
494
716
|
|
|
495
717
|
const char* cactus_get_last_error();
|
|
496
718
|
|
|
497
|
-
__attribute__((weak))
|
|
498
|
-
const char* register_app(const char* encrypted_data);
|
|
499
|
-
|
|
500
|
-
__attribute__((weak))
|
|
501
|
-
const char* get_device_id(const char* current_token);
|
|
502
|
-
|
|
503
719
|
#ifdef __cplusplus
|
|
504
720
|
}
|
|
505
721
|
#endif
|
|
506
722
|
|
|
507
|
-
#ifdef __cplusplus
|
|
508
|
-
extern "C" {
|
|
509
|
-
|
|
510
|
-
__attribute__((weak))
|
|
511
|
-
inline const char* register_app(const char* encrypted_data) {
|
|
512
|
-
(void)encrypted_data;
|
|
513
|
-
static thread_local std::string uuid_storage;
|
|
514
|
-
uuid_storage = cactus::ffi::generateUUID();
|
|
515
|
-
return uuid_storage.c_str();
|
|
516
|
-
}
|
|
517
|
-
|
|
518
|
-
__attribute__((weak))
|
|
519
|
-
inline const char* get_device_id(const char* current_token) {
|
|
520
|
-
(void)current_token;
|
|
521
|
-
static thread_local std::string uuid_storage;
|
|
522
|
-
uuid_storage = cactus::ffi::generateUUID();
|
|
523
|
-
return uuid_storage.c_str();
|
|
524
|
-
}
|
|
525
|
-
}
|
|
526
|
-
#endif
|
|
527
|
-
|
|
528
723
|
#endif // CACTUS_UTILS_H
|