cactus-react-native 1.5.0 → 1.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cactus.podspec +1 -1
- package/README.md +347 -241
- package/android/CMakeLists.txt +24 -5
- package/android/src/main/jniLibs/arm64-v8a/libcactus.a +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libcurl.a +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libmbedcrypto.a +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libmbedtls.a +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libmbedx509.a +0 -0
- package/cpp/HybridCactus.cpp +197 -117
- package/cpp/HybridCactus.hpp +18 -9
- package/cpp/cactus_ffi.h +66 -42
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus.h +0 -1
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_cloud.h +48 -0
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_ffi.h +66 -42
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_utils.h +568 -135
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/engine.h +148 -17
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/graph.h +145 -36
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel.h +187 -6
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel_utils.h +49 -149
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Info.plist +0 -0
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/cactus +0 -0
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus.h +0 -1
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_cloud.h +48 -0
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_ffi.h +66 -42
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_utils.h +568 -135
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/engine.h +148 -17
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/graph.h +145 -36
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/kernel.h +187 -6
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/kernel_utils.h +49 -149
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Info.plist +0 -0
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/_CodeSignature/CodeResources +1 -1
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/cactus +0 -0
- package/lib/module/classes/CactusLM.js +16 -49
- package/lib/module/classes/CactusLM.js.map +1 -1
- package/lib/module/classes/CactusSTT.js +41 -75
- package/lib/module/classes/CactusSTT.js.map +1 -1
- package/lib/module/classes/CactusVAD.js +95 -0
- package/lib/module/classes/CactusVAD.js.map +1 -0
- package/lib/module/hooks/useCactusLM.js +10 -11
- package/lib/module/hooks/useCactusLM.js.map +1 -1
- package/lib/module/hooks/useCactusSTT.js +23 -62
- package/lib/module/hooks/useCactusSTT.js.map +1 -1
- package/lib/module/hooks/useCactusVAD.js +171 -0
- package/lib/module/hooks/useCactusVAD.js.map +1 -0
- package/lib/module/index.js +2 -3
- package/lib/module/index.js.map +1 -1
- package/lib/module/modelRegistry.js +52 -0
- package/lib/module/modelRegistry.js.map +1 -0
- package/lib/module/native/Cactus.js +103 -23
- package/lib/module/native/Cactus.js.map +1 -1
- package/lib/module/native/CactusIndex.js.map +1 -1
- package/lib/module/native/index.js +0 -3
- package/lib/module/native/index.js.map +1 -1
- package/lib/module/types/CactusVAD.js +4 -0
- package/lib/module/{specs/CactusUtil.nitro.js.map → types/CactusVAD.js.map} +1 -1
- package/lib/typescript/src/classes/CactusLM.d.ts +5 -7
- package/lib/typescript/src/classes/CactusLM.d.ts.map +1 -1
- package/lib/typescript/src/classes/CactusSTT.d.ts +9 -12
- package/lib/typescript/src/classes/CactusSTT.d.ts.map +1 -1
- package/lib/typescript/src/classes/CactusVAD.d.ts +20 -0
- package/lib/typescript/src/classes/CactusVAD.d.ts.map +1 -0
- package/lib/typescript/src/hooks/useCactusLM.d.ts +2 -2
- package/lib/typescript/src/hooks/useCactusLM.d.ts.map +1 -1
- package/lib/typescript/src/hooks/useCactusSTT.d.ts +6 -8
- package/lib/typescript/src/hooks/useCactusSTT.d.ts.map +1 -1
- package/lib/typescript/src/hooks/useCactusVAD.d.ts +15 -0
- package/lib/typescript/src/hooks/useCactusVAD.d.ts.map +1 -0
- package/lib/typescript/src/index.d.ts +7 -5
- package/lib/typescript/src/index.d.ts.map +1 -1
- package/lib/typescript/src/modelRegistry.d.ts +5 -0
- package/lib/typescript/src/modelRegistry.d.ts.map +1 -0
- package/lib/typescript/src/native/Cactus.d.ts +13 -11
- package/lib/typescript/src/native/Cactus.d.ts.map +1 -1
- package/lib/typescript/src/native/CactusIndex.d.ts +2 -2
- package/lib/typescript/src/native/CactusIndex.d.ts.map +1 -1
- package/lib/typescript/src/native/index.d.ts +0 -3
- package/lib/typescript/src/native/index.d.ts.map +1 -1
- package/lib/typescript/src/specs/Cactus.nitro.d.ts +7 -6
- package/lib/typescript/src/specs/Cactus.nitro.d.ts.map +1 -1
- package/lib/typescript/src/types/CactusIndex.d.ts +2 -2
- package/lib/typescript/src/types/CactusIndex.d.ts.map +1 -1
- package/lib/typescript/src/types/CactusLM.d.ts +19 -11
- package/lib/typescript/src/types/CactusLM.d.ts.map +1 -1
- package/lib/typescript/src/types/CactusSTT.d.ts +44 -12
- package/lib/typescript/src/types/CactusSTT.d.ts.map +1 -1
- package/lib/typescript/src/types/CactusVAD.d.ts +34 -0
- package/lib/typescript/src/types/CactusVAD.d.ts.map +1 -0
- package/lib/typescript/src/types/common.d.ts +1 -6
- package/lib/typescript/src/types/common.d.ts.map +1 -1
- package/nitro.json +0 -11
- package/nitrogen/generated/android/cactus+autolinking.cmake +0 -5
- package/nitrogen/generated/android/cactusOnLoad.cpp +0 -30
- package/nitrogen/generated/ios/Cactus-Swift-Cxx-Bridge.cpp +0 -50
- package/nitrogen/generated/ios/Cactus-Swift-Cxx-Bridge.hpp +9 -147
- package/nitrogen/generated/ios/Cactus-Swift-Cxx-Umbrella.hpp +0 -13
- package/nitrogen/generated/ios/CactusAutolinking.mm +0 -26
- package/nitrogen/generated/ios/CactusAutolinking.swift +0 -30
- package/nitrogen/generated/shared/c++/HybridCactusSpec.cpp +5 -4
- package/nitrogen/generated/shared/c++/HybridCactusSpec.hpp +7 -6
- package/package.json +3 -3
- package/src/classes/CactusLM.ts +18 -65
- package/src/classes/CactusSTT.ts +52 -90
- package/src/classes/CactusVAD.ts +129 -0
- package/src/hooks/useCactusLM.ts +14 -17
- package/src/hooks/useCactusSTT.ts +47 -98
- package/src/hooks/useCactusVAD.ts +215 -0
- package/src/index.tsx +21 -12
- package/src/modelRegistry.ts +65 -0
- package/src/native/Cactus.ts +131 -38
- package/src/native/CactusIndex.ts +2 -2
- package/src/native/index.ts +0 -3
- package/src/specs/Cactus.nitro.ts +16 -7
- package/src/types/CactusIndex.ts +2 -2
- package/src/types/CactusLM.ts +19 -11
- package/src/types/CactusSTT.ts +47 -13
- package/src/types/CactusVAD.ts +39 -0
- package/src/types/common.ts +1 -6
- package/android/src/main/java/com/margelo/nitro/cactus/HybridCactusCrypto.kt +0 -46
- package/android/src/main/java/com/margelo/nitro/cactus/HybridCactusDeviceInfo.kt +0 -27
- package/android/src/main/jniLibs/arm64-v8a/libcactus_util.a +0 -0
- package/cpp/HybridCactusUtil.cpp +0 -47
- package/cpp/HybridCactusUtil.hpp +0 -27
- package/cpp/cactus_util.h +0 -25
- package/ios/HybridCactusCrypto.swift +0 -37
- package/ios/HybridCactusDeviceInfo.swift +0 -32
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_telemetry.h +0 -656
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_telemetry.h +0 -656
- package/ios/cactus_util.xcframework/Info.plist +0 -39
- package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Headers/cactus_util.h +0 -25
- package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Headers/database.h +0 -27
- package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Headers/ios_utils.h +0 -10
- package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Headers/logging.h +0 -25
- package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Info.plist +0 -0
- package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/cactus_util +0 -0
- package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Headers/cactus_util.h +0 -25
- package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Headers/database.h +0 -27
- package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Headers/ios_utils.h +0 -10
- package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Headers/logging.h +0 -25
- package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Info.plist +0 -0
- package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/_CodeSignature/CodeResources +0 -135
- package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/cactus_util +0 -0
- package/lib/module/api/Database.js +0 -45
- package/lib/module/api/Database.js.map +0 -1
- package/lib/module/api/RemoteLM.js +0 -201
- package/lib/module/api/RemoteLM.js.map +0 -1
- package/lib/module/config/CactusConfig.js +0 -12
- package/lib/module/config/CactusConfig.js.map +0 -1
- package/lib/module/models.js +0 -336
- package/lib/module/models.js.map +0 -1
- package/lib/module/native/CactusCrypto.js +0 -10
- package/lib/module/native/CactusCrypto.js.map +0 -1
- package/lib/module/native/CactusDeviceInfo.js +0 -13
- package/lib/module/native/CactusDeviceInfo.js.map +0 -1
- package/lib/module/native/CactusUtil.js +0 -36
- package/lib/module/native/CactusUtil.js.map +0 -1
- package/lib/module/specs/CactusCrypto.nitro.js +0 -4
- package/lib/module/specs/CactusCrypto.nitro.js.map +0 -1
- package/lib/module/specs/CactusDeviceInfo.nitro.js +0 -4
- package/lib/module/specs/CactusDeviceInfo.nitro.js.map +0 -1
- package/lib/module/specs/CactusUtil.nitro.js +0 -4
- package/lib/module/telemetry/Telemetry.js +0 -154
- package/lib/module/telemetry/Telemetry.js.map +0 -1
- package/lib/typescript/src/api/Database.d.ts +0 -12
- package/lib/typescript/src/api/Database.d.ts.map +0 -1
- package/lib/typescript/src/api/RemoteLM.d.ts +0 -14
- package/lib/typescript/src/api/RemoteLM.d.ts.map +0 -1
- package/lib/typescript/src/config/CactusConfig.d.ts +0 -7
- package/lib/typescript/src/config/CactusConfig.d.ts.map +0 -1
- package/lib/typescript/src/models.d.ts +0 -6
- package/lib/typescript/src/models.d.ts.map +0 -1
- package/lib/typescript/src/native/CactusCrypto.d.ts +0 -5
- package/lib/typescript/src/native/CactusCrypto.d.ts.map +0 -1
- package/lib/typescript/src/native/CactusDeviceInfo.d.ts +0 -7
- package/lib/typescript/src/native/CactusDeviceInfo.d.ts.map +0 -1
- package/lib/typescript/src/native/CactusUtil.d.ts +0 -6
- package/lib/typescript/src/native/CactusUtil.d.ts.map +0 -1
- package/lib/typescript/src/specs/CactusCrypto.nitro.d.ts +0 -8
- package/lib/typescript/src/specs/CactusCrypto.nitro.d.ts.map +0 -1
- package/lib/typescript/src/specs/CactusDeviceInfo.nitro.d.ts +0 -16
- package/lib/typescript/src/specs/CactusDeviceInfo.nitro.d.ts.map +0 -1
- package/lib/typescript/src/specs/CactusUtil.nitro.d.ts +0 -10
- package/lib/typescript/src/specs/CactusUtil.nitro.d.ts.map +0 -1
- package/lib/typescript/src/telemetry/Telemetry.d.ts +0 -34
- package/lib/typescript/src/telemetry/Telemetry.d.ts.map +0 -1
- package/nitrogen/generated/android/c++/JDeviceInfo.hpp +0 -74
- package/nitrogen/generated/android/c++/JHybridCactusCryptoSpec.cpp +0 -65
- package/nitrogen/generated/android/c++/JHybridCactusCryptoSpec.hpp +0 -65
- package/nitrogen/generated/android/c++/JHybridCactusDeviceInfoSpec.cpp +0 -85
- package/nitrogen/generated/android/c++/JHybridCactusDeviceInfoSpec.hpp +0 -66
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/DeviceInfo.kt +0 -50
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/HybridCactusCryptoSpec.kt +0 -58
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/HybridCactusDeviceInfoSpec.kt +0 -62
- package/nitrogen/generated/ios/c++/HybridCactusCryptoSpecSwift.cpp +0 -11
- package/nitrogen/generated/ios/c++/HybridCactusCryptoSpecSwift.hpp +0 -77
- package/nitrogen/generated/ios/c++/HybridCactusDeviceInfoSpecSwift.cpp +0 -11
- package/nitrogen/generated/ios/c++/HybridCactusDeviceInfoSpecSwift.hpp +0 -88
- package/nitrogen/generated/ios/swift/DeviceInfo.swift +0 -98
- package/nitrogen/generated/ios/swift/Func_void_DeviceInfo.swift +0 -47
- package/nitrogen/generated/ios/swift/Func_void_std__optional_std__string_.swift +0 -54
- package/nitrogen/generated/ios/swift/HybridCactusCryptoSpec.swift +0 -57
- package/nitrogen/generated/ios/swift/HybridCactusCryptoSpec_cxx.swift +0 -139
- package/nitrogen/generated/ios/swift/HybridCactusDeviceInfoSpec.swift +0 -58
- package/nitrogen/generated/ios/swift/HybridCactusDeviceInfoSpec_cxx.swift +0 -164
- package/nitrogen/generated/shared/c++/DeviceInfo.hpp +0 -92
- package/nitrogen/generated/shared/c++/HybridCactusCryptoSpec.cpp +0 -21
- package/nitrogen/generated/shared/c++/HybridCactusCryptoSpec.hpp +0 -63
- package/nitrogen/generated/shared/c++/HybridCactusDeviceInfoSpec.cpp +0 -22
- package/nitrogen/generated/shared/c++/HybridCactusDeviceInfoSpec.hpp +0 -67
- package/nitrogen/generated/shared/c++/HybridCactusUtilSpec.cpp +0 -23
- package/nitrogen/generated/shared/c++/HybridCactusUtilSpec.hpp +0 -66
- package/src/api/Database.ts +0 -55
- package/src/api/RemoteLM.ts +0 -273
- package/src/config/CactusConfig.ts +0 -11
- package/src/models.ts +0 -344
- package/src/native/CactusCrypto.ts +0 -11
- package/src/native/CactusDeviceInfo.ts +0 -18
- package/src/native/CactusUtil.ts +0 -43
- package/src/specs/CactusCrypto.nitro.ts +0 -6
- package/src/specs/CactusDeviceInfo.nitro.ts +0 -15
- package/src/specs/CactusUtil.nitro.ts +0 -8
- package/src/telemetry/Telemetry.ts +0 -236
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
#define CACTUS_UTILS_H
|
|
3
3
|
|
|
4
4
|
#include "../engine/engine.h"
|
|
5
|
+
#include "../models/model.h"
|
|
5
6
|
#include <string>
|
|
6
7
|
#include <vector>
|
|
7
8
|
#include <unordered_map>
|
|
@@ -12,6 +13,9 @@
|
|
|
12
13
|
#include <iostream>
|
|
13
14
|
#include <filesystem>
|
|
14
15
|
#include <cctype>
|
|
16
|
+
#include <algorithm>
|
|
17
|
+
#include <cmath>
|
|
18
|
+
#include <limits>
|
|
15
19
|
#include <memory>
|
|
16
20
|
#include <atomic>
|
|
17
21
|
#include <mutex>
|
|
@@ -19,14 +23,53 @@
|
|
|
19
23
|
|
|
20
24
|
#ifdef __APPLE__
|
|
21
25
|
#include <uuid/uuid.h>
|
|
26
|
+
#include <mach/mach.h>
|
|
27
|
+
#elif defined(_WIN32)
|
|
28
|
+
#include <windows.h>
|
|
29
|
+
#include <psapi.h>
|
|
30
|
+
#elif defined(__linux__) || defined(__ANDROID__)
|
|
31
|
+
#include <unistd.h>
|
|
22
32
|
#endif
|
|
23
33
|
|
|
34
|
+
inline size_t get_memory_footprint_bytes() {
|
|
35
|
+
#ifdef __APPLE__
|
|
36
|
+
task_vm_info_data_t vm_info;
|
|
37
|
+
mach_msg_type_number_t count = TASK_VM_INFO_COUNT;
|
|
38
|
+
if (task_info(mach_task_self(), TASK_VM_INFO, (task_info_t)&vm_info, &count) == KERN_SUCCESS)
|
|
39
|
+
return vm_info.phys_footprint;
|
|
40
|
+
|
|
41
|
+
#elif defined(_WIN32)
|
|
42
|
+
PROCESS_MEMORY_COUNTERS_EX pmc;
|
|
43
|
+
if (GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc)))
|
|
44
|
+
return pmc.PrivateUsage;
|
|
45
|
+
|
|
46
|
+
#elif defined(__linux__) || defined(__ANDROID__)
|
|
47
|
+
std::ifstream statm("/proc/self/statm");
|
|
48
|
+
if (statm.is_open()) {
|
|
49
|
+
size_t size, resident;
|
|
50
|
+
statm >> size >> resident;
|
|
51
|
+
return resident * sysconf(_SC_PAGESIZE);
|
|
52
|
+
}
|
|
53
|
+
#endif
|
|
54
|
+
return 0;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
inline double get_ram_usage_mb() {
|
|
58
|
+
return get_memory_footprint_bytes() / (1024.0 * 1024.0);
|
|
59
|
+
}
|
|
60
|
+
|
|
24
61
|
struct CactusModelHandle {
|
|
25
62
|
std::unique_ptr<cactus::engine::Model> model;
|
|
63
|
+
std::unique_ptr<cactus::engine::Model> vad_model;
|
|
26
64
|
std::atomic<bool> should_stop;
|
|
27
65
|
std::vector<uint32_t> processed_tokens;
|
|
28
66
|
std::mutex model_mutex;
|
|
29
67
|
std::string model_name;
|
|
68
|
+
std::unique_ptr<cactus::engine::index::Index> corpus_index;
|
|
69
|
+
std::string corpus_dir;
|
|
70
|
+
size_t corpus_embedding_dim = 0;
|
|
71
|
+
std::vector<std::vector<float>> tool_embeddings;
|
|
72
|
+
std::vector<std::string> tool_texts;
|
|
30
73
|
|
|
31
74
|
CactusModelHandle() : should_stop(false) {}
|
|
32
75
|
};
|
|
@@ -36,15 +79,116 @@ extern std::string last_error_message;
|
|
|
36
79
|
bool matches_stop_sequence(const std::vector<uint32_t>& generated_tokens,
|
|
37
80
|
const std::vector<std::vector<uint32_t>>& stop_sequences);
|
|
38
81
|
|
|
82
|
+
std::string retrieve_rag_context(CactusModelHandle* handle, const std::string& query);
|
|
83
|
+
|
|
39
84
|
namespace cactus {
|
|
40
|
-
namespace
|
|
85
|
+
namespace audio {
|
|
86
|
+
|
|
87
|
+
static constexpr size_t WHISPER_TARGET_FRAMES = 3000;
|
|
88
|
+
static constexpr int WHISPER_SAMPLE_RATE = 16000;
|
|
89
|
+
|
|
90
|
+
inline cactus::engine::AudioProcessor::SpectrogramConfig get_whisper_spectrogram_config() {
|
|
91
|
+
cactus::engine::AudioProcessor::SpectrogramConfig cfg{};
|
|
92
|
+
cfg.n_fft = 400;
|
|
93
|
+
cfg.frame_length = 400;
|
|
94
|
+
cfg.hop_length = 160;
|
|
95
|
+
cfg.power = 2.0f;
|
|
96
|
+
cfg.center = true;
|
|
97
|
+
cfg.pad_mode = "reflect";
|
|
98
|
+
cfg.onesided = true;
|
|
99
|
+
cfg.dither = 0.0f;
|
|
100
|
+
cfg.mel_floor = 1e-10f;
|
|
101
|
+
cfg.log_mel = "log10";
|
|
102
|
+
cfg.reference = 1.0f;
|
|
103
|
+
cfg.min_value = 1e-10f;
|
|
104
|
+
cfg.remove_dc_offset = true;
|
|
105
|
+
return cfg;
|
|
106
|
+
}
|
|
41
107
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
108
|
+
inline cactus::engine::AudioProcessor::SpectrogramConfig get_parakeet_spectrogram_config() {
|
|
109
|
+
cactus::engine::AudioProcessor::SpectrogramConfig cfg{};
|
|
110
|
+
cfg.n_fft = 512;
|
|
111
|
+
cfg.frame_length = 400;
|
|
112
|
+
cfg.hop_length = 160;
|
|
113
|
+
cfg.power = 2.0f;
|
|
114
|
+
cfg.center = true;
|
|
115
|
+
cfg.pad_mode = "constant";
|
|
116
|
+
cfg.onesided = true;
|
|
117
|
+
cfg.dither = 0.0f;
|
|
118
|
+
cfg.mel_floor = 5.960464477539063e-08f; // 2^-24 guard value used by HF Parakeet.
|
|
119
|
+
cfg.log_mel = "log";
|
|
120
|
+
cfg.reference = 1.0f;
|
|
121
|
+
cfg.min_value = 1e-10f;
|
|
122
|
+
cfg.remove_dc_offset = false;
|
|
123
|
+
cfg.hann_periodic = false;
|
|
124
|
+
return cfg;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
inline void apply_preemphasis(std::vector<float>& waveform, float coefficient = 0.97f) {
|
|
128
|
+
if (waveform.size() < 2 || coefficient == 0.0f) {
|
|
129
|
+
return;
|
|
130
|
+
}
|
|
131
|
+
for (size_t i = waveform.size() - 1; i > 0; --i) {
|
|
132
|
+
waveform[i] -= coefficient * waveform[i - 1];
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
inline void normalize_parakeet_log_mel(std::vector<float>& mel, size_t num_mels, float epsilon = 1e-5f) {
|
|
137
|
+
if (mel.empty() || num_mels == 0 || (mel.size() % num_mels) != 0) {
|
|
138
|
+
return;
|
|
139
|
+
}
|
|
140
|
+
const size_t num_frames = mel.size() / num_mels;
|
|
141
|
+
if (num_frames == 0) {
|
|
142
|
+
return;
|
|
143
|
+
}
|
|
45
144
|
|
|
46
|
-
|
|
47
|
-
|
|
145
|
+
for (size_t m = 0; m < num_mels; ++m) {
|
|
146
|
+
const size_t base = m * num_frames;
|
|
147
|
+
float mean = 0.0f;
|
|
148
|
+
for (size_t t = 0; t < num_frames; ++t) {
|
|
149
|
+
mean += mel[base + t];
|
|
150
|
+
}
|
|
151
|
+
mean /= static_cast<float>(num_frames);
|
|
152
|
+
|
|
153
|
+
float variance = 0.0f;
|
|
154
|
+
for (size_t t = 0; t < num_frames; ++t) {
|
|
155
|
+
const float d = mel[base + t] - mean;
|
|
156
|
+
variance += d * d;
|
|
157
|
+
}
|
|
158
|
+
const float denom = static_cast<float>(std::max<size_t>(1, num_frames - 1));
|
|
159
|
+
const float inv_std = 1.0f / std::sqrt((variance / denom) + epsilon);
|
|
160
|
+
for (size_t t = 0; t < num_frames; ++t) {
|
|
161
|
+
mel[base + t] = (mel[base + t] - mean) * inv_std;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
inline void trim_mel_frames(std::vector<float>& mel, size_t num_mels, size_t valid_frames) {
|
|
167
|
+
if (mel.empty() || num_mels == 0 || (mel.size() % num_mels) != 0) {
|
|
168
|
+
return;
|
|
169
|
+
}
|
|
170
|
+
size_t total_frames = mel.size() / num_mels;
|
|
171
|
+
if (valid_frames == 0 || valid_frames >= total_frames) {
|
|
172
|
+
return;
|
|
173
|
+
}
|
|
174
|
+
std::vector<float> trimmed(num_mels * valid_frames);
|
|
175
|
+
for (size_t m = 0; m < num_mels; ++m) {
|
|
176
|
+
const float* src = &mel[m * total_frames];
|
|
177
|
+
float* dst = &trimmed[m * valid_frames];
|
|
178
|
+
std::copy(src, src + valid_frames, dst);
|
|
179
|
+
}
|
|
180
|
+
mel.swap(trimmed);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
} // namespace audio
|
|
184
|
+
} // namespace cactus
|
|
185
|
+
|
|
186
|
+
namespace cactus {
|
|
187
|
+
namespace ffi {
|
|
188
|
+
|
|
189
|
+
inline bool env_flag_enabled(const char* key) {
|
|
190
|
+
const char* value = std::getenv(key);
|
|
191
|
+
return value && value[0] != '\0' && !(value[0] == '0' && value[1] == '\0');
|
|
48
192
|
}
|
|
49
193
|
|
|
50
194
|
inline std::string generateUUID() {
|
|
@@ -54,6 +198,25 @@ inline std::string generateUUID() {
|
|
|
54
198
|
char uuid_str[37];
|
|
55
199
|
uuid_unparse_lower(uuid, uuid_str);
|
|
56
200
|
return std::string(uuid_str);
|
|
201
|
+
#else
|
|
202
|
+
static std::random_device rd;
|
|
203
|
+
static std::mt19937 gen(rd());
|
|
204
|
+
static std::uniform_int_distribution<> dis(0, 15);
|
|
205
|
+
static std::uniform_int_distribution<> dis2(8, 11);
|
|
206
|
+
|
|
207
|
+
std::stringstream ss;
|
|
208
|
+
ss << std::hex;
|
|
209
|
+
for (int i = 0; i < 8; i++) ss << dis(gen);
|
|
210
|
+
ss << "-";
|
|
211
|
+
for (int i = 0; i < 4; i++) ss << dis(gen);
|
|
212
|
+
ss << "-4";
|
|
213
|
+
for (int i = 0; i < 3; i++) ss << dis(gen);
|
|
214
|
+
ss << "-";
|
|
215
|
+
ss << dis2(gen);
|
|
216
|
+
for (int i = 0; i < 3; i++) ss << dis(gen);
|
|
217
|
+
ss << "-";
|
|
218
|
+
for (int i = 0; i < 12; i++) ss << dis(gen);
|
|
219
|
+
return ss.str();
|
|
57
220
|
#endif
|
|
58
221
|
}
|
|
59
222
|
|
|
@@ -66,18 +229,173 @@ struct ToolFunction {
|
|
|
66
229
|
} // namespace ffi
|
|
67
230
|
} // namespace cactus
|
|
68
231
|
|
|
232
|
+
std::vector<cactus::ffi::ToolFunction> select_relevant_tools(
|
|
233
|
+
CactusModelHandle* handle,
|
|
234
|
+
const std::string& query,
|
|
235
|
+
const std::vector<cactus::ffi::ToolFunction>& all_tools,
|
|
236
|
+
size_t top_k);
|
|
237
|
+
|
|
69
238
|
#include "gemma_tools.h"
|
|
70
239
|
|
|
71
240
|
namespace cactus {
|
|
72
241
|
namespace ffi {
|
|
73
242
|
|
|
74
|
-
inline
|
|
75
|
-
std::
|
|
76
|
-
for (
|
|
77
|
-
if (c == '"')
|
|
78
|
-
if (c == '\n')
|
|
243
|
+
inline std::string escape_json_string(const std::string& s) {
|
|
244
|
+
std::ostringstream o;
|
|
245
|
+
for (char c : s) {
|
|
246
|
+
if (c == '"') o << "\\\"";
|
|
247
|
+
else if (c == '\n') o << "\\n";
|
|
248
|
+
else if (c == '\r') o << "\\r";
|
|
249
|
+
else if (c == '\t') o << "\\t";
|
|
250
|
+
else if (c == '\\') o << "\\\\";
|
|
251
|
+
else o << c;
|
|
252
|
+
}
|
|
253
|
+
return o.str();
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
inline std::string trim_string(const std::string& s) {
|
|
258
|
+
size_t start = 0;
|
|
259
|
+
while (start < s.size() && std::isspace(static_cast<unsigned char>(s[start]))) ++start;
|
|
260
|
+
size_t end = s.size();
|
|
261
|
+
while (end > start && std::isspace(static_cast<unsigned char>(s[end - 1]))) --end;
|
|
262
|
+
return s.substr(start, end - start);
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
inline std::string env_or_default(const char* key, const char* fallback) {
|
|
266
|
+
const char* v = std::getenv(key);
|
|
267
|
+
if (v && v[0] != '\0') return std::string(v);
|
|
268
|
+
return std::string(fallback);
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
inline std::string json_string_field(const std::string& json, const std::string& key) {
|
|
272
|
+
std::string pattern = "\"" + key + "\":";
|
|
273
|
+
size_t pos = json.find(pattern);
|
|
274
|
+
if (pos == std::string::npos) return {};
|
|
275
|
+
|
|
276
|
+
size_t i = pos + pattern.size();
|
|
277
|
+
while (i < json.size() && std::isspace(static_cast<unsigned char>(json[i]))) i++;
|
|
278
|
+
if (i >= json.size() || json[i] != '"') return {};
|
|
279
|
+
++i;
|
|
280
|
+
|
|
281
|
+
std::string out;
|
|
282
|
+
out.reserve(128);
|
|
283
|
+
while (i < json.size()) {
|
|
284
|
+
char c = json[i++];
|
|
285
|
+
if (c == '"') return out;
|
|
286
|
+
if (c == '\\' && i < json.size()) {
|
|
287
|
+
char e = json[i++];
|
|
288
|
+
switch (e) {
|
|
289
|
+
case '"': out.push_back('"'); break;
|
|
290
|
+
case '\\': out.push_back('\\'); break;
|
|
291
|
+
case '/': out.push_back('/'); break;
|
|
292
|
+
case 'b': out.push_back('\b'); break;
|
|
293
|
+
case 'f': out.push_back('\f'); break;
|
|
294
|
+
case 'n': out.push_back('\n'); break;
|
|
295
|
+
case 'r': out.push_back('\r'); break;
|
|
296
|
+
case 't': out.push_back('\t'); break;
|
|
297
|
+
default: out.push_back(e); break;
|
|
298
|
+
}
|
|
299
|
+
continue;
|
|
300
|
+
}
|
|
301
|
+
out.push_back(c);
|
|
302
|
+
}
|
|
303
|
+
return {};
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
inline std::string json_array_field(const std::string& json, const std::string& key) {
|
|
307
|
+
std::string pattern = "\"" + key + "\":";
|
|
308
|
+
size_t pos = json.find(pattern);
|
|
309
|
+
if (pos == std::string::npos) return "[]";
|
|
310
|
+
size_t start = pos + pattern.size();
|
|
311
|
+
while (start < json.size() && std::isspace(static_cast<unsigned char>(json[start]))) ++start;
|
|
312
|
+
if (start >= json.size() || json[start] != '[') return "[]";
|
|
313
|
+
|
|
314
|
+
int depth = 1;
|
|
315
|
+
size_t end = start + 1;
|
|
316
|
+
while (end < json.size() && depth > 0) {
|
|
317
|
+
if (json[end] == '[') depth++;
|
|
318
|
+
else if (json[end] == ']') depth--;
|
|
319
|
+
end++;
|
|
320
|
+
}
|
|
321
|
+
return json.substr(start, end - start);
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
inline std::vector<std::string> split_json_array(const std::string& array_json) {
|
|
325
|
+
std::vector<std::string> out;
|
|
326
|
+
if (array_json.size() < 2 || array_json.front() != '[' || array_json.back() != ']') return out;
|
|
327
|
+
|
|
328
|
+
size_t i = 1;
|
|
329
|
+
while (i + 1 < array_json.size()) {
|
|
330
|
+
while (i + 1 < array_json.size() &&
|
|
331
|
+
(std::isspace(static_cast<unsigned char>(array_json[i])) || array_json[i] == ',')) i++;
|
|
332
|
+
if (i + 1 >= array_json.size() || array_json[i] != '{') break;
|
|
333
|
+
|
|
334
|
+
size_t start = i;
|
|
335
|
+
int depth = 0;
|
|
336
|
+
bool in_str = false;
|
|
337
|
+
bool esc = false;
|
|
338
|
+
for (; i < array_json.size(); ++i) {
|
|
339
|
+
char c = array_json[i];
|
|
340
|
+
if (in_str) {
|
|
341
|
+
if (esc) esc = false;
|
|
342
|
+
else if (c == '\\') esc = true;
|
|
343
|
+
else if (c == '"') in_str = false;
|
|
344
|
+
continue;
|
|
345
|
+
}
|
|
346
|
+
if (c == '"') { in_str = true; continue; }
|
|
347
|
+
if (c == '{') depth++;
|
|
348
|
+
if (c == '}') {
|
|
349
|
+
depth--;
|
|
350
|
+
if (depth == 0) {
|
|
351
|
+
out.push_back(array_json.substr(start, i - start + 1));
|
|
352
|
+
i++;
|
|
353
|
+
break;
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
return out;
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
inline std::string serialize_tools_json(const std::vector<ToolFunction>& tools) {
|
|
362
|
+
if (tools.empty()) return "";
|
|
363
|
+
std::ostringstream oss;
|
|
364
|
+
oss << "[";
|
|
365
|
+
for (size_t i = 0; i < tools.size(); ++i) {
|
|
366
|
+
if (i > 0) oss << ",";
|
|
367
|
+
oss << "{\"type\":\"function\",\"function\":{";
|
|
368
|
+
oss << "\"name\":\"" << escape_json_string(tools[i].name) << "\",";
|
|
369
|
+
oss << "\"description\":\"" << escape_json_string(tools[i].description) << "\"";
|
|
370
|
+
auto it = tools[i].parameters.find("schema");
|
|
371
|
+
if (it != tools[i].parameters.end()) {
|
|
372
|
+
oss << ",\"parameters\":" << it->second;
|
|
373
|
+
}
|
|
374
|
+
oss << "}}";
|
|
79
375
|
}
|
|
80
|
-
|
|
376
|
+
oss << "]";
|
|
377
|
+
return oss.str();
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
inline void handle_error_response(const std::string& error_message, char* response_buffer, size_t buffer_size) {
|
|
381
|
+
std::ostringstream json;
|
|
382
|
+
json << "{";
|
|
383
|
+
json << "\"success\":false,";
|
|
384
|
+
json << "\"error\":\"" << escape_json_string(error_message) << "\",";
|
|
385
|
+
json << "\"cloud_handoff\":false,";
|
|
386
|
+
json << "\"response\":null,";
|
|
387
|
+
json << "\"function_calls\":[],";
|
|
388
|
+
json << "\"confidence\":0.0,";
|
|
389
|
+
json << "\"time_to_first_token_ms\":0.0,";
|
|
390
|
+
json << "\"total_time_ms\":0.0,";
|
|
391
|
+
json << "\"prefill_tps\":0.0,";
|
|
392
|
+
json << "\"decode_tps\":0.0,";
|
|
393
|
+
json << "\"ram_usage_mb\":" << std::fixed << std::setprecision(2) << get_ram_usage_mb() << ",";
|
|
394
|
+
json << "\"prefill_tokens\":0,";
|
|
395
|
+
json << "\"decode_tokens\":0,";
|
|
396
|
+
json << "\"total_tokens\":0";
|
|
397
|
+
json << "}";
|
|
398
|
+
std::string error_json = json.str();
|
|
81
399
|
if (response_buffer && error_json.length() < buffer_size) {
|
|
82
400
|
std::strcpy(response_buffer, error_json.c_str());
|
|
83
401
|
}
|
|
@@ -228,12 +546,28 @@ inline void parse_options_json(const std::string& json,
|
|
|
228
546
|
float& temperature, float& top_p,
|
|
229
547
|
size_t& top_k, size_t& max_tokens,
|
|
230
548
|
std::vector<std::string>& stop_sequences,
|
|
231
|
-
bool& force_tools
|
|
549
|
+
bool& force_tools,
|
|
550
|
+
size_t& tool_rag_top_k,
|
|
551
|
+
float& confidence_threshold,
|
|
552
|
+
bool& include_stop_sequences,
|
|
553
|
+
bool& use_vad,
|
|
554
|
+
bool& telemetry_enabled,
|
|
555
|
+
bool* auto_handoff = nullptr,
|
|
556
|
+
size_t* cloud_timeout_ms = nullptr,
|
|
557
|
+
bool* handoff_with_images = nullptr) {
|
|
232
558
|
temperature = 0.0f;
|
|
233
559
|
top_p = 0.0f;
|
|
234
560
|
top_k = 0;
|
|
235
561
|
max_tokens = 100;
|
|
236
562
|
force_tools = false;
|
|
563
|
+
tool_rag_top_k = 2;
|
|
564
|
+
confidence_threshold = 0.7f;
|
|
565
|
+
include_stop_sequences = false;
|
|
566
|
+
use_vad = true;
|
|
567
|
+
telemetry_enabled = true;
|
|
568
|
+
if (auto_handoff) *auto_handoff = true;
|
|
569
|
+
if (cloud_timeout_ms) *cloud_timeout_ms = 15000;
|
|
570
|
+
if (handoff_with_images) *handoff_with_images = true;
|
|
237
571
|
stop_sequences.clear();
|
|
238
572
|
|
|
239
573
|
if (json.empty()) return;
|
|
@@ -269,6 +603,65 @@ inline void parse_options_json(const std::string& json,
|
|
|
269
603
|
force_tools = (json.substr(pos, 4) == "true");
|
|
270
604
|
}
|
|
271
605
|
|
|
606
|
+
pos = json.find("\"tool_rag_top_k\"");
|
|
607
|
+
if (pos != std::string::npos) {
|
|
608
|
+
pos = json.find(':', pos) + 1;
|
|
609
|
+
tool_rag_top_k = std::stoul(json.substr(pos));
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
pos = json.find("\"confidence_threshold\"");
|
|
613
|
+
if (pos != std::string::npos) {
|
|
614
|
+
pos = json.find(':', pos) + 1;
|
|
615
|
+
confidence_threshold = std::stof(json.substr(pos));
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
pos = json.find("\"include_stop_sequences\"");
|
|
619
|
+
if (pos != std::string::npos) {
|
|
620
|
+
pos = json.find(':', pos) + 1;
|
|
621
|
+
while (pos < json.length() && std::isspace(json[pos])) pos++;
|
|
622
|
+
include_stop_sequences = (json.substr(pos, 4) == "true");
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
pos = json.find("\"use_vad\"");
|
|
626
|
+
if (pos != std::string::npos) {
|
|
627
|
+
pos = json.find(':', pos) + 1;
|
|
628
|
+
while (pos < json.length() && std::isspace(json[pos])) pos++;
|
|
629
|
+
use_vad = (json.substr(pos, 4) == "true");
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
pos = json.find("\"telemetry_enabled\"");
|
|
633
|
+
if (pos != std::string::npos) {
|
|
634
|
+
pos = json.find(':', pos) + 1;
|
|
635
|
+
while (pos < json.length() && std::isspace(json[pos])) pos++;
|
|
636
|
+
telemetry_enabled = (json.substr(pos, 4) == "true");
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
if (auto_handoff) {
|
|
640
|
+
pos = json.find("\"auto_handoff\"");
|
|
641
|
+
if (pos != std::string::npos) {
|
|
642
|
+
pos = json.find(':', pos) + 1;
|
|
643
|
+
while (pos < json.length() && std::isspace(json[pos])) pos++;
|
|
644
|
+
*auto_handoff = (json.substr(pos, 4) == "true");
|
|
645
|
+
}
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
if (cloud_timeout_ms) {
|
|
649
|
+
pos = json.find("\"cloud_timeout_ms\"");
|
|
650
|
+
if (pos != std::string::npos) {
|
|
651
|
+
pos = json.find(':', pos) + 1;
|
|
652
|
+
*cloud_timeout_ms = std::stoul(json.substr(pos));
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
if (handoff_with_images) {
|
|
657
|
+
pos = json.find("\"handoff_with_images\"");
|
|
658
|
+
if (pos != std::string::npos) {
|
|
659
|
+
pos = json.find(':', pos) + 1;
|
|
660
|
+
while (pos < json.length() && std::isspace(json[pos])) pos++;
|
|
661
|
+
*handoff_with_images = (json.substr(pos, 4) == "true");
|
|
662
|
+
}
|
|
663
|
+
}
|
|
664
|
+
|
|
272
665
|
pos = json.find("\"stop_sequences\"");
|
|
273
666
|
if (pos != std::string::npos) {
|
|
274
667
|
pos = json.find('[', pos);
|
|
@@ -288,21 +681,70 @@ inline void parse_options_json(const std::string& json,
|
|
|
288
681
|
}
|
|
289
682
|
}
|
|
290
683
|
|
|
291
|
-
inline std::string
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
684
|
+
static inline std::string trim_lfm2_slice(const std::string& value, size_t begin, size_t end) {
|
|
685
|
+
return trim_string(value.substr(begin, end - begin));
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
static inline void append_lfm2_call(const std::string& entry,
|
|
689
|
+
std::vector<std::string>& function_calls) {
|
|
690
|
+
if (entry.empty()) return;
|
|
691
|
+
|
|
692
|
+
std::string trimmed_entry = trim_lfm2_slice(entry, 0, entry.size());
|
|
693
|
+
if (trimmed_entry.empty()) return;
|
|
694
|
+
|
|
695
|
+
size_t paren_pos = trimmed_entry.find('(');
|
|
696
|
+
if (paren_pos == std::string::npos) return;
|
|
697
|
+
|
|
698
|
+
std::string func_name = trim_lfm2_slice(trimmed_entry, 0, paren_pos);
|
|
699
|
+
std::string args_str = trim_lfm2_slice(trimmed_entry, paren_pos + 1, trimmed_entry.size());
|
|
700
|
+
|
|
701
|
+
if (!args_str.empty() && args_str.back() == ')') {
|
|
702
|
+
args_str.pop_back();
|
|
703
|
+
args_str = trim_lfm2_slice(args_str, 0, args_str.size());
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
std::string json_call = "{\"name\":\"" + func_name + "\",\"arguments\":{";
|
|
707
|
+
|
|
708
|
+
size_t arg_pos = 0;
|
|
709
|
+
bool first_arg = true;
|
|
710
|
+
while (arg_pos < args_str.length()) {
|
|
711
|
+
while (arg_pos < args_str.length() && std::isspace(static_cast<unsigned char>(args_str[arg_pos]))) {
|
|
712
|
+
arg_pos++;
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
size_t eq_pos = args_str.find('=', arg_pos);
|
|
716
|
+
if (eq_pos == std::string::npos) break;
|
|
717
|
+
|
|
718
|
+
std::string arg_name = args_str.substr(arg_pos, eq_pos - arg_pos);
|
|
719
|
+
|
|
720
|
+
size_t val_start = eq_pos + 1;
|
|
721
|
+
size_t val_end = val_start;
|
|
722
|
+
|
|
723
|
+
if (val_start < args_str.length() && args_str[val_start] == '"') {
|
|
724
|
+
val_start++;
|
|
725
|
+
val_end = args_str.find('"', val_start);
|
|
726
|
+
if (val_end == std::string::npos) break;
|
|
727
|
+
} else {
|
|
728
|
+
val_end = args_str.find(',', val_start);
|
|
729
|
+
if (val_end == std::string::npos) val_end = args_str.length();
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
std::string arg_value = args_str.substr(val_start, val_end - val_start);
|
|
733
|
+
|
|
734
|
+
if (!first_arg) json_call += ",";
|
|
735
|
+
json_call += "\"" + arg_name + "\":\"" + arg_value + "\"";
|
|
736
|
+
first_arg = false;
|
|
737
|
+
|
|
738
|
+
arg_pos = args_str.find(',', val_end);
|
|
739
|
+
if (arg_pos != std::string::npos) {
|
|
740
|
+
arg_pos++;
|
|
741
|
+
} else {
|
|
742
|
+
break;
|
|
302
743
|
}
|
|
303
|
-
formatted_tools_json += "}}";
|
|
304
744
|
}
|
|
305
|
-
|
|
745
|
+
|
|
746
|
+
json_call += "}}";
|
|
747
|
+
function_calls.push_back(json_call);
|
|
306
748
|
}
|
|
307
749
|
|
|
308
750
|
inline void parse_function_calls_from_response(const std::string& response_text,
|
|
@@ -341,7 +783,7 @@ inline void parse_function_calls_from_response(const std::string& response_text,
|
|
|
341
783
|
break;
|
|
342
784
|
}
|
|
343
785
|
}
|
|
344
|
-
|
|
786
|
+
|
|
345
787
|
// Parse LFM2-style function calls: <|tool_call_start|>[name(args)]<|tool_call_end|>
|
|
346
788
|
const std::string TOOL_CALL_START = "<|tool_call_start|>";
|
|
347
789
|
const std::string TOOL_CALL_END = "<|tool_call_end|>";
|
|
@@ -349,68 +791,77 @@ inline void parse_function_calls_from_response(const std::string& response_text,
|
|
|
349
791
|
|
|
350
792
|
while ((tool_start_pos = regular_response.find(TOOL_CALL_START, tool_start_pos)) != std::string::npos) {
|
|
351
793
|
size_t content_start = tool_start_pos + TOOL_CALL_START.length();
|
|
352
|
-
size_t tool_end_pos =
|
|
794
|
+
size_t tool_end_pos = regular_response.find(TOOL_CALL_END, content_start);
|
|
353
795
|
|
|
354
796
|
if (tool_end_pos != std::string::npos) {
|
|
355
|
-
std::string tool_content =
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
if (paren_pos != std::string::npos) {
|
|
362
|
-
std::string func_name = tool_content.substr(0, paren_pos);
|
|
363
|
-
std::string args_str = tool_content.substr(paren_pos + 1);
|
|
364
|
-
|
|
365
|
-
if (!args_str.empty() && args_str.back() == ')') {
|
|
366
|
-
args_str.pop_back();
|
|
367
|
-
}
|
|
368
|
-
|
|
369
|
-
std::string json_call = "{\"name\":\"" + func_name + "\",\"arguments\":{";
|
|
370
|
-
|
|
371
|
-
size_t arg_pos = 0;
|
|
372
|
-
bool first_arg = true;
|
|
373
|
-
while (arg_pos < args_str.length()) {
|
|
374
|
-
while (arg_pos < args_str.length() && std::isspace(args_str[arg_pos])) arg_pos++;
|
|
375
|
-
|
|
376
|
-
size_t eq_pos = args_str.find('=', arg_pos);
|
|
377
|
-
if (eq_pos == std::string::npos) break;
|
|
378
|
-
|
|
379
|
-
std::string arg_name = args_str.substr(arg_pos, eq_pos - arg_pos);
|
|
797
|
+
std::string tool_content = regular_response.substr(content_start, tool_end_pos - content_start);
|
|
798
|
+
std::string content = tool_content;
|
|
799
|
+
size_t trim_start = 0;
|
|
800
|
+
while (trim_start < content.size() && std::isspace(static_cast<unsigned char>(content[trim_start]))) {
|
|
801
|
+
trim_start++;
|
|
802
|
+
}
|
|
380
803
|
|
|
381
|
-
|
|
382
|
-
|
|
804
|
+
if (trim_start < content.size()) {
|
|
805
|
+
size_t trim_end = content.size() - 1;
|
|
806
|
+
while (trim_end > trim_start && std::isspace(static_cast<unsigned char>(content[trim_end]))) {
|
|
807
|
+
trim_end--;
|
|
808
|
+
}
|
|
809
|
+
content = content.substr(trim_start, trim_end - trim_start + 1);
|
|
810
|
+
} else {
|
|
811
|
+
content.clear();
|
|
812
|
+
}
|
|
383
813
|
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
814
|
+
if (!content.empty() && content.front() == '[' && content.back() == ']') {
|
|
815
|
+
std::string inner = content.substr(1, content.size() - 2);
|
|
816
|
+
|
|
817
|
+
size_t inner_first = inner.find_first_not_of(" \t\n\r");
|
|
818
|
+
if (inner_first != std::string::npos && inner[inner_first] == '{') {
|
|
819
|
+
size_t pos = inner_first;
|
|
820
|
+
while (pos < inner.size()) {
|
|
821
|
+
if (inner[pos] == '{') {
|
|
822
|
+
int brace_depth = 1;
|
|
823
|
+
size_t obj_start = pos;
|
|
824
|
+
pos++;
|
|
825
|
+
while (pos < inner.size() && brace_depth > 0) {
|
|
826
|
+
if (inner[pos] == '{') brace_depth++;
|
|
827
|
+
else if (inner[pos] == '}') brace_depth--;
|
|
828
|
+
pos++;
|
|
829
|
+
}
|
|
830
|
+
if (brace_depth == 0) {
|
|
831
|
+
std::string json_obj = inner.substr(obj_start, pos - obj_start);
|
|
832
|
+
if (json_obj.find("\"name\"") != std::string::npos) {
|
|
833
|
+
function_calls.push_back(json_obj);
|
|
834
|
+
}
|
|
835
|
+
}
|
|
388
836
|
} else {
|
|
389
|
-
|
|
390
|
-
if (val_end == std::string::npos) val_end = args_str.length();
|
|
837
|
+
pos++;
|
|
391
838
|
}
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
839
|
+
}
|
|
840
|
+
} else {
|
|
841
|
+
size_t start = 0;
|
|
842
|
+
int paren_depth = 0;
|
|
843
|
+
|
|
844
|
+
for (size_t i = 0; i < inner.size(); ++i) {
|
|
845
|
+
char c = inner[i];
|
|
846
|
+
if (c == '(') {
|
|
847
|
+
paren_depth++;
|
|
848
|
+
} else if (c == ')' && paren_depth > 0) {
|
|
849
|
+
paren_depth--;
|
|
850
|
+
} else if (c == ',' && paren_depth == 0) {
|
|
851
|
+
append_lfm2_call(inner.substr(start, i - start), function_calls);
|
|
852
|
+
start = i + 1;
|
|
404
853
|
}
|
|
405
854
|
}
|
|
406
855
|
|
|
407
|
-
|
|
408
|
-
|
|
856
|
+
if (start < inner.size()) {
|
|
857
|
+
append_lfm2_call(inner.substr(start), function_calls);
|
|
858
|
+
}
|
|
409
859
|
}
|
|
860
|
+
} else if (!content.empty()) {
|
|
861
|
+
append_lfm2_call(content, function_calls);
|
|
410
862
|
}
|
|
411
863
|
|
|
412
864
|
regular_response.erase(tool_start_pos, tool_end_pos + TOOL_CALL_END.length() - tool_start_pos);
|
|
413
|
-
tool_start_pos = tool_end_pos + TOOL_CALL_END.length();
|
|
414
865
|
} else {
|
|
415
866
|
break;
|
|
416
867
|
}
|
|
@@ -451,38 +902,47 @@ inline std::string construct_response_json(const std::string& regular_response,
|
|
|
451
902
|
const std::vector<std::string>& function_calls,
|
|
452
903
|
double time_to_first_token,
|
|
453
904
|
double total_time_ms,
|
|
454
|
-
double
|
|
905
|
+
double prefill_tps,
|
|
906
|
+
double decode_tps,
|
|
455
907
|
size_t prompt_tokens,
|
|
456
|
-
size_t completion_tokens
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
908
|
+
size_t completion_tokens,
|
|
909
|
+
float confidence = 0.0f,
|
|
910
|
+
bool cloud_handoff = false) {
|
|
911
|
+
std::ostringstream json;
|
|
912
|
+
json << "{";
|
|
913
|
+
json << "\"success\":true,";
|
|
914
|
+
json << "\"error\":null,";
|
|
915
|
+
json << "\"cloud_handoff\":" << (cloud_handoff ? "true" : "false") << ",";
|
|
916
|
+
json << "\"response\":\"" << escape_json_string(regular_response) << "\",";
|
|
917
|
+
json << "\"function_calls\":[";
|
|
918
|
+
for (size_t i = 0; i < function_calls.size(); ++i) {
|
|
919
|
+
if (i > 0) json << ",";
|
|
920
|
+
json << function_calls[i];
|
|
921
|
+
}
|
|
922
|
+
json << "],";
|
|
923
|
+
json << "\"confidence\":" << std::fixed << std::setprecision(4) << confidence << ",";
|
|
924
|
+
json << "\"time_to_first_token_ms\":" << std::fixed << std::setprecision(2) << time_to_first_token << ",";
|
|
925
|
+
json << "\"total_time_ms\":" << std::fixed << std::setprecision(2) << total_time_ms << ",";
|
|
926
|
+
json << "\"prefill_tps\":" << std::fixed << std::setprecision(2) << prefill_tps << ",";
|
|
927
|
+
json << "\"decode_tps\":" << std::fixed << std::setprecision(2) << decode_tps << ",";
|
|
928
|
+
json << "\"ram_usage_mb\":" << std::fixed << std::setprecision(2) << get_ram_usage_mb() << ",";
|
|
929
|
+
json << "\"prefill_tokens\":" << prompt_tokens << ",";
|
|
930
|
+
json << "\"decode_tokens\":" << completion_tokens << ",";
|
|
931
|
+
json << "\"total_tokens\":" << (prompt_tokens + completion_tokens);
|
|
932
|
+
json << "}";
|
|
933
|
+
return json.str();
|
|
934
|
+
}
|
|
935
|
+
|
|
936
|
+
inline std::string serialize_function_calls(const std::vector<std::string>& calls) {
|
|
937
|
+
if (calls.empty()) return "[]";
|
|
938
|
+
std::ostringstream oss;
|
|
939
|
+
oss << "[";
|
|
940
|
+
for (size_t i = 0; i < calls.size(); ++i) {
|
|
941
|
+
if (i > 0) oss << ",";
|
|
942
|
+
oss << calls[i];
|
|
943
|
+
}
|
|
944
|
+
oss << "]";
|
|
945
|
+
return oss.str();
|
|
486
946
|
}
|
|
487
947
|
|
|
488
948
|
} // namespace ffi
|
|
@@ -494,35 +954,8 @@ extern "C" {
|
|
|
494
954
|
|
|
495
955
|
const char* cactus_get_last_error();
|
|
496
956
|
|
|
497
|
-
__attribute__((weak))
|
|
498
|
-
const char* register_app(const char* encrypted_data);
|
|
499
|
-
|
|
500
|
-
__attribute__((weak))
|
|
501
|
-
const char* get_device_id(const char* current_token);
|
|
502
|
-
|
|
503
|
-
#ifdef __cplusplus
|
|
504
|
-
}
|
|
505
|
-
#endif
|
|
506
|
-
|
|
507
957
|
#ifdef __cplusplus
|
|
508
|
-
extern "C" {
|
|
509
|
-
|
|
510
|
-
__attribute__((weak))
|
|
511
|
-
inline const char* register_app(const char* encrypted_data) {
|
|
512
|
-
(void)encrypted_data;
|
|
513
|
-
static thread_local std::string uuid_storage;
|
|
514
|
-
uuid_storage = cactus::ffi::generateUUID();
|
|
515
|
-
return uuid_storage.c_str();
|
|
516
|
-
}
|
|
517
|
-
|
|
518
|
-
__attribute__((weak))
|
|
519
|
-
inline const char* get_device_id(const char* current_token) {
|
|
520
|
-
(void)current_token;
|
|
521
|
-
static thread_local std::string uuid_storage;
|
|
522
|
-
uuid_storage = cactus::ffi::generateUUID();
|
|
523
|
-
return uuid_storage.c_str();
|
|
524
|
-
}
|
|
525
958
|
}
|
|
526
959
|
#endif
|
|
527
960
|
|
|
528
|
-
#endif // CACTUS_UTILS_H
|
|
961
|
+
#endif // CACTUS_UTILS_H
|