cactus-react-native 1.7.0 → 1.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/jniLibs/arm64-v8a/libcactus.a +0 -0
- package/cpp/HybridCactus.cpp +49 -1
- package/cpp/HybridCactus.hpp +5 -0
- package/cpp/cactus_ffi.h +14 -1
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_cloud.h +48 -0
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_ffi.h +14 -1
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_utils.h +304 -66
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/engine.h +32 -4
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/graph.h +75 -11
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel.h +123 -4
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel_utils.h +37 -3
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Info.plist +0 -0
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/cactus +0 -0
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_cloud.h +48 -0
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_ffi.h +14 -1
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_utils.h +304 -66
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/engine.h +32 -4
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/graph.h +75 -11
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/kernel.h +123 -4
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/kernel_utils.h +37 -3
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Info.plist +0 -0
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/_CodeSignature/CodeResources +1 -1
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/cactus +0 -0
- package/lib/module/classes/CactusSTT.js +15 -0
- package/lib/module/classes/CactusSTT.js.map +1 -1
- package/lib/module/native/Cactus.js +18 -0
- package/lib/module/native/Cactus.js.map +1 -1
- package/lib/typescript/src/classes/CactusSTT.d.ts +2 -1
- package/lib/typescript/src/classes/CactusSTT.d.ts.map +1 -1
- package/lib/typescript/src/index.d.ts +1 -1
- package/lib/typescript/src/index.d.ts.map +1 -1
- package/lib/typescript/src/native/Cactus.d.ts +2 -1
- package/lib/typescript/src/native/Cactus.d.ts.map +1 -1
- package/lib/typescript/src/specs/Cactus.nitro.d.ts +1 -0
- package/lib/typescript/src/specs/Cactus.nitro.d.ts.map +1 -1
- package/lib/typescript/src/types/CactusSTT.d.ts +11 -0
- package/lib/typescript/src/types/CactusSTT.d.ts.map +1 -1
- package/nitrogen/generated/shared/c++/HybridCactusSpec.cpp +1 -0
- package/nitrogen/generated/shared/c++/HybridCactusSpec.hpp +1 -0
- package/package.json +1 -1
- package/src/classes/CactusSTT.ts +20 -0
- package/src/index.tsx +3 -0
- package/src/native/Cactus.ts +32 -0
- package/src/specs/Cactus.nitro.ts +5 -0
- package/src/types/CactusSTT.ts +14 -0
|
Binary file
|
package/cpp/HybridCactus.cpp
CHANGED
|
@@ -212,6 +212,54 @@ std::shared_ptr<Promise<std::string>> HybridCactus::transcribe(
|
|
|
212
212
|
});
|
|
213
213
|
}
|
|
214
214
|
|
|
215
|
+
std::shared_ptr<Promise<std::string>> HybridCactus::detectLanguage(
|
|
216
|
+
const std::variant<std::vector<double>, std::string> &audio,
|
|
217
|
+
double responseBufferSize,
|
|
218
|
+
const std::optional<std::string> &optionsJson) {
|
|
219
|
+
return Promise<std::string>::async(
|
|
220
|
+
[this, audio, optionsJson, responseBufferSize]() -> std::string {
|
|
221
|
+
std::lock_guard<std::mutex> lock(this->_modelMutex);
|
|
222
|
+
|
|
223
|
+
if (!this->_model) {
|
|
224
|
+
throw std::runtime_error("Cactus model is not initialized");
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
std::string responseBuffer;
|
|
228
|
+
responseBuffer.resize(responseBufferSize);
|
|
229
|
+
|
|
230
|
+
int result;
|
|
231
|
+
if (std::holds_alternative<std::string>(audio)) {
|
|
232
|
+
result = cactus_detect_language(
|
|
233
|
+
this->_model, std::get<std::string>(audio).c_str(),
|
|
234
|
+
responseBuffer.data(), responseBufferSize,
|
|
235
|
+
optionsJson ? optionsJson->c_str() : nullptr, nullptr, 0);
|
|
236
|
+
} else {
|
|
237
|
+
const auto &audioDoubles = std::get<std::vector<double>>(audio);
|
|
238
|
+
|
|
239
|
+
std::vector<uint8_t> audioBytes;
|
|
240
|
+
audioBytes.reserve(audioDoubles.size());
|
|
241
|
+
|
|
242
|
+
for (double d : audioDoubles) {
|
|
243
|
+
d = std::clamp(d, 0.0, 255.0);
|
|
244
|
+
audioBytes.emplace_back(static_cast<uint8_t>(d));
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
result = cactus_detect_language(
|
|
248
|
+
this->_model, nullptr, responseBuffer.data(), responseBufferSize,
|
|
249
|
+
optionsJson ? optionsJson->c_str() : nullptr, audioBytes.data(),
|
|
250
|
+
audioBytes.size());
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
if (result < 0) {
|
|
254
|
+
throw std::runtime_error("Cactus detect language failed: " +
|
|
255
|
+
std::string(cactus_get_last_error()));
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
responseBuffer.resize(strlen(responseBuffer.c_str()));
|
|
259
|
+
return responseBuffer;
|
|
260
|
+
});
|
|
261
|
+
}
|
|
262
|
+
|
|
215
263
|
std::shared_ptr<Promise<void>> HybridCactus::streamTranscribeStart(
|
|
216
264
|
const std::optional<std::string> &optionsJson) {
|
|
217
265
|
return Promise<void>::async([this, optionsJson]() -> void {
|
|
@@ -477,7 +525,7 @@ std::shared_ptr<Promise<void>> HybridCactus::destroy() {
|
|
|
477
525
|
std::shared_ptr<Promise<void>>
|
|
478
526
|
HybridCactus::setTelemetryEnvironment(const std::string &cacheDir) {
|
|
479
527
|
return Promise<void>::async([cacheDir]() -> void {
|
|
480
|
-
cactus_set_telemetry_environment("react-native
|
|
528
|
+
cactus_set_telemetry_environment("react-native", cacheDir.c_str(), "1.10.0");
|
|
481
529
|
});
|
|
482
530
|
}
|
|
483
531
|
|
package/cpp/HybridCactus.hpp
CHANGED
|
@@ -39,6 +39,11 @@ public:
|
|
|
39
39
|
double /* tokenId */)>> &callback)
|
|
40
40
|
override;
|
|
41
41
|
|
|
42
|
+
std::shared_ptr<Promise<std::string>>
|
|
43
|
+
detectLanguage(const std::variant<std::vector<double>, std::string> &audio,
|
|
44
|
+
double responseBufferSize,
|
|
45
|
+
const std::optional<std::string> &optionsJson) override;
|
|
46
|
+
|
|
42
47
|
std::shared_ptr<Promise<void>>
|
|
43
48
|
streamTranscribeStart(const std::optional<std::string> &optionsJson) override;
|
|
44
49
|
|
package/cpp/cactus_ffi.h
CHANGED
|
@@ -76,6 +76,16 @@ CACTUS_FFI_EXPORT int cactus_transcribe(
|
|
|
76
76
|
size_t pcm_buffer_size
|
|
77
77
|
);
|
|
78
78
|
|
|
79
|
+
CACTUS_FFI_EXPORT int cactus_detect_language(
|
|
80
|
+
cactus_model_t model,
|
|
81
|
+
const char* audio_file_path, // NULL if using pcm_buffer
|
|
82
|
+
char* response_buffer,
|
|
83
|
+
size_t buffer_size,
|
|
84
|
+
const char* options_json, // optional
|
|
85
|
+
const uint8_t* pcm_buffer, // NULL if using audio_file_path
|
|
86
|
+
size_t pcm_buffer_size
|
|
87
|
+
);
|
|
88
|
+
|
|
79
89
|
CACTUS_FFI_EXPORT cactus_stream_transcribe_t cactus_stream_transcribe_start(
|
|
80
90
|
cactus_model_t model,
|
|
81
91
|
const char* options_json // optional
|
|
@@ -189,7 +199,10 @@ CACTUS_FFI_EXPORT void cactus_index_destroy(cactus_index_t index);
|
|
|
189
199
|
|
|
190
200
|
CACTUS_FFI_EXPORT const char* cactus_get_last_error(void);
|
|
191
201
|
|
|
192
|
-
CACTUS_FFI_EXPORT void cactus_set_telemetry_environment(const char* framework, const char* cache_location);
|
|
202
|
+
CACTUS_FFI_EXPORT void cactus_set_telemetry_environment(const char* framework, const char* cache_location, const char* version);
|
|
203
|
+
CACTUS_FFI_EXPORT void cactus_set_app_id(const char* app_id);
|
|
204
|
+
CACTUS_FFI_EXPORT void cactus_telemetry_flush(void);
|
|
205
|
+
CACTUS_FFI_EXPORT void cactus_telemetry_shutdown(void);
|
|
193
206
|
|
|
194
207
|
#ifdef __cplusplus
|
|
195
208
|
}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
#ifndef CACTUS_CLOUD_H
|
|
2
|
+
#define CACTUS_CLOUD_H
|
|
3
|
+
|
|
4
|
+
#include "cactus_utils.h"
|
|
5
|
+
#include <string>
|
|
6
|
+
#include <vector>
|
|
7
|
+
|
|
8
|
+
namespace cactus {
|
|
9
|
+
namespace ffi {
|
|
10
|
+
|
|
11
|
+
struct CloudResponse {
|
|
12
|
+
std::string transcript;
|
|
13
|
+
std::string api_key_hash;
|
|
14
|
+
bool used_cloud = false;
|
|
15
|
+
std::string error;
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
struct CloudCompletionRequest {
|
|
19
|
+
std::vector<cactus::engine::ChatMessage> messages;
|
|
20
|
+
std::vector<ToolFunction> tools;
|
|
21
|
+
std::string local_output;
|
|
22
|
+
std::vector<std::string> local_function_calls;
|
|
23
|
+
bool has_images = false;
|
|
24
|
+
std::string cloud_key;
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
struct CloudCompletionResult {
|
|
28
|
+
bool ok = false;
|
|
29
|
+
bool used_cloud = false;
|
|
30
|
+
std::string response;
|
|
31
|
+
std::vector<std::string> function_calls;
|
|
32
|
+
std::string error;
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
std::string cloud_base64_encode(const uint8_t* data, size_t len);
|
|
36
|
+
std::vector<uint8_t> cloud_build_wav(const uint8_t* pcm, size_t pcm_bytes);
|
|
37
|
+
std::string resolve_cloud_api_key(const char* cloud_key_param);
|
|
38
|
+
CloudResponse cloud_transcribe_request(const std::string& audio_b64,
|
|
39
|
+
const std::string& fallback_text,
|
|
40
|
+
long timeout_seconds = 15L,
|
|
41
|
+
const char* cloud_key = nullptr);
|
|
42
|
+
CloudCompletionResult cloud_complete_request(const CloudCompletionRequest& request,
|
|
43
|
+
long timeout_ms);
|
|
44
|
+
|
|
45
|
+
} // namespace ffi
|
|
46
|
+
} // namespace cactus
|
|
47
|
+
|
|
48
|
+
#endif // CACTUS_CLOUD_H
|
|
@@ -76,6 +76,16 @@ CACTUS_FFI_EXPORT int cactus_transcribe(
|
|
|
76
76
|
size_t pcm_buffer_size
|
|
77
77
|
);
|
|
78
78
|
|
|
79
|
+
CACTUS_FFI_EXPORT int cactus_detect_language(
|
|
80
|
+
cactus_model_t model,
|
|
81
|
+
const char* audio_file_path, // NULL if using pcm_buffer
|
|
82
|
+
char* response_buffer,
|
|
83
|
+
size_t buffer_size,
|
|
84
|
+
const char* options_json, // optional
|
|
85
|
+
const uint8_t* pcm_buffer, // NULL if using audio_file_path
|
|
86
|
+
size_t pcm_buffer_size
|
|
87
|
+
);
|
|
88
|
+
|
|
79
89
|
CACTUS_FFI_EXPORT cactus_stream_transcribe_t cactus_stream_transcribe_start(
|
|
80
90
|
cactus_model_t model,
|
|
81
91
|
const char* options_json // optional
|
|
@@ -189,7 +199,10 @@ CACTUS_FFI_EXPORT void cactus_index_destroy(cactus_index_t index);
|
|
|
189
199
|
|
|
190
200
|
CACTUS_FFI_EXPORT const char* cactus_get_last_error(void);
|
|
191
201
|
|
|
192
|
-
CACTUS_FFI_EXPORT void cactus_set_telemetry_environment(const char* framework, const char* cache_location);
|
|
202
|
+
CACTUS_FFI_EXPORT void cactus_set_telemetry_environment(const char* framework, const char* cache_location, const char* version);
|
|
203
|
+
CACTUS_FFI_EXPORT void cactus_set_app_id(const char* app_id);
|
|
204
|
+
CACTUS_FFI_EXPORT void cactus_telemetry_flush(void);
|
|
205
|
+
CACTUS_FFI_EXPORT void cactus_telemetry_shutdown(void);
|
|
193
206
|
|
|
194
207
|
#ifdef __cplusplus
|
|
195
208
|
}
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
#define CACTUS_UTILS_H
|
|
3
3
|
|
|
4
4
|
#include "../engine/engine.h"
|
|
5
|
+
#include "../models/model.h"
|
|
5
6
|
#include <string>
|
|
6
7
|
#include <vector>
|
|
7
8
|
#include <unordered_map>
|
|
@@ -12,6 +13,9 @@
|
|
|
12
13
|
#include <iostream>
|
|
13
14
|
#include <filesystem>
|
|
14
15
|
#include <cctype>
|
|
16
|
+
#include <algorithm>
|
|
17
|
+
#include <cmath>
|
|
18
|
+
#include <limits>
|
|
15
19
|
#include <memory>
|
|
16
20
|
#include <atomic>
|
|
17
21
|
#include <mutex>
|
|
@@ -101,12 +105,92 @@ inline cactus::engine::AudioProcessor::SpectrogramConfig get_whisper_spectrogram
|
|
|
101
105
|
return cfg;
|
|
102
106
|
}
|
|
103
107
|
|
|
108
|
+
inline cactus::engine::AudioProcessor::SpectrogramConfig get_parakeet_spectrogram_config() {
|
|
109
|
+
cactus::engine::AudioProcessor::SpectrogramConfig cfg{};
|
|
110
|
+
cfg.n_fft = 512;
|
|
111
|
+
cfg.frame_length = 400;
|
|
112
|
+
cfg.hop_length = 160;
|
|
113
|
+
cfg.power = 2.0f;
|
|
114
|
+
cfg.center = true;
|
|
115
|
+
cfg.pad_mode = "constant";
|
|
116
|
+
cfg.onesided = true;
|
|
117
|
+
cfg.dither = 0.0f;
|
|
118
|
+
cfg.mel_floor = 5.960464477539063e-08f; // 2^-24 guard value used by HF Parakeet.
|
|
119
|
+
cfg.log_mel = "log";
|
|
120
|
+
cfg.reference = 1.0f;
|
|
121
|
+
cfg.min_value = 1e-10f;
|
|
122
|
+
cfg.remove_dc_offset = false;
|
|
123
|
+
cfg.hann_periodic = false;
|
|
124
|
+
return cfg;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
inline void apply_preemphasis(std::vector<float>& waveform, float coefficient = 0.97f) {
|
|
128
|
+
if (waveform.size() < 2 || coefficient == 0.0f) {
|
|
129
|
+
return;
|
|
130
|
+
}
|
|
131
|
+
for (size_t i = waveform.size() - 1; i > 0; --i) {
|
|
132
|
+
waveform[i] -= coefficient * waveform[i - 1];
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
inline void normalize_parakeet_log_mel(std::vector<float>& mel, size_t num_mels, float epsilon = 1e-5f) {
|
|
137
|
+
if (mel.empty() || num_mels == 0 || (mel.size() % num_mels) != 0) {
|
|
138
|
+
return;
|
|
139
|
+
}
|
|
140
|
+
const size_t num_frames = mel.size() / num_mels;
|
|
141
|
+
if (num_frames == 0) {
|
|
142
|
+
return;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
for (size_t m = 0; m < num_mels; ++m) {
|
|
146
|
+
const size_t base = m * num_frames;
|
|
147
|
+
float mean = 0.0f;
|
|
148
|
+
for (size_t t = 0; t < num_frames; ++t) {
|
|
149
|
+
mean += mel[base + t];
|
|
150
|
+
}
|
|
151
|
+
mean /= static_cast<float>(num_frames);
|
|
152
|
+
|
|
153
|
+
float variance = 0.0f;
|
|
154
|
+
for (size_t t = 0; t < num_frames; ++t) {
|
|
155
|
+
const float d = mel[base + t] - mean;
|
|
156
|
+
variance += d * d;
|
|
157
|
+
}
|
|
158
|
+
const float denom = static_cast<float>(std::max<size_t>(1, num_frames - 1));
|
|
159
|
+
const float inv_std = 1.0f / std::sqrt((variance / denom) + epsilon);
|
|
160
|
+
for (size_t t = 0; t < num_frames; ++t) {
|
|
161
|
+
mel[base + t] = (mel[base + t] - mean) * inv_std;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
inline void trim_mel_frames(std::vector<float>& mel, size_t num_mels, size_t valid_frames) {
|
|
167
|
+
if (mel.empty() || num_mels == 0 || (mel.size() % num_mels) != 0) {
|
|
168
|
+
return;
|
|
169
|
+
}
|
|
170
|
+
size_t total_frames = mel.size() / num_mels;
|
|
171
|
+
if (valid_frames == 0 || valid_frames >= total_frames) {
|
|
172
|
+
return;
|
|
173
|
+
}
|
|
174
|
+
std::vector<float> trimmed(num_mels * valid_frames);
|
|
175
|
+
for (size_t m = 0; m < num_mels; ++m) {
|
|
176
|
+
const float* src = &mel[m * total_frames];
|
|
177
|
+
float* dst = &trimmed[m * valid_frames];
|
|
178
|
+
std::copy(src, src + valid_frames, dst);
|
|
179
|
+
}
|
|
180
|
+
mel.swap(trimmed);
|
|
181
|
+
}
|
|
182
|
+
|
|
104
183
|
} // namespace audio
|
|
105
184
|
} // namespace cactus
|
|
106
185
|
|
|
107
186
|
namespace cactus {
|
|
108
187
|
namespace ffi {
|
|
109
188
|
|
|
189
|
+
inline bool env_flag_enabled(const char* key) {
|
|
190
|
+
const char* value = std::getenv(key);
|
|
191
|
+
return value && value[0] != '\0' && !(value[0] == '0' && value[1] == '\0');
|
|
192
|
+
}
|
|
193
|
+
|
|
110
194
|
inline std::string generateUUID() {
|
|
111
195
|
#ifdef __APPLE__
|
|
112
196
|
uuid_t uuid;
|
|
@@ -114,6 +198,25 @@ inline std::string generateUUID() {
|
|
|
114
198
|
char uuid_str[37];
|
|
115
199
|
uuid_unparse_lower(uuid, uuid_str);
|
|
116
200
|
return std::string(uuid_str);
|
|
201
|
+
#else
|
|
202
|
+
static std::random_device rd;
|
|
203
|
+
static std::mt19937 gen(rd());
|
|
204
|
+
static std::uniform_int_distribution<> dis(0, 15);
|
|
205
|
+
static std::uniform_int_distribution<> dis2(8, 11);
|
|
206
|
+
|
|
207
|
+
std::stringstream ss;
|
|
208
|
+
ss << std::hex;
|
|
209
|
+
for (int i = 0; i < 8; i++) ss << dis(gen);
|
|
210
|
+
ss << "-";
|
|
211
|
+
for (int i = 0; i < 4; i++) ss << dis(gen);
|
|
212
|
+
ss << "-4";
|
|
213
|
+
for (int i = 0; i < 3; i++) ss << dis(gen);
|
|
214
|
+
ss << "-";
|
|
215
|
+
ss << dis2(gen);
|
|
216
|
+
for (int i = 0; i < 3; i++) ss << dis(gen);
|
|
217
|
+
ss << "-";
|
|
218
|
+
for (int i = 0; i < 12; i++) ss << dis(gen);
|
|
219
|
+
return ss.str();
|
|
117
220
|
#endif
|
|
118
221
|
}
|
|
119
222
|
|
|
@@ -150,6 +253,130 @@ inline std::string escape_json_string(const std::string& s) {
|
|
|
150
253
|
return o.str();
|
|
151
254
|
}
|
|
152
255
|
|
|
256
|
+
|
|
257
|
+
inline std::string trim_string(const std::string& s) {
|
|
258
|
+
size_t start = 0;
|
|
259
|
+
while (start < s.size() && std::isspace(static_cast<unsigned char>(s[start]))) ++start;
|
|
260
|
+
size_t end = s.size();
|
|
261
|
+
while (end > start && std::isspace(static_cast<unsigned char>(s[end - 1]))) --end;
|
|
262
|
+
return s.substr(start, end - start);
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
inline std::string env_or_default(const char* key, const char* fallback) {
|
|
266
|
+
const char* v = std::getenv(key);
|
|
267
|
+
if (v && v[0] != '\0') return std::string(v);
|
|
268
|
+
return std::string(fallback);
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
inline std::string json_string_field(const std::string& json, const std::string& key) {
|
|
272
|
+
std::string pattern = "\"" + key + "\":";
|
|
273
|
+
size_t pos = json.find(pattern);
|
|
274
|
+
if (pos == std::string::npos) return {};
|
|
275
|
+
|
|
276
|
+
size_t i = pos + pattern.size();
|
|
277
|
+
while (i < json.size() && std::isspace(static_cast<unsigned char>(json[i]))) i++;
|
|
278
|
+
if (i >= json.size() || json[i] != '"') return {};
|
|
279
|
+
++i;
|
|
280
|
+
|
|
281
|
+
std::string out;
|
|
282
|
+
out.reserve(128);
|
|
283
|
+
while (i < json.size()) {
|
|
284
|
+
char c = json[i++];
|
|
285
|
+
if (c == '"') return out;
|
|
286
|
+
if (c == '\\' && i < json.size()) {
|
|
287
|
+
char e = json[i++];
|
|
288
|
+
switch (e) {
|
|
289
|
+
case '"': out.push_back('"'); break;
|
|
290
|
+
case '\\': out.push_back('\\'); break;
|
|
291
|
+
case '/': out.push_back('/'); break;
|
|
292
|
+
case 'b': out.push_back('\b'); break;
|
|
293
|
+
case 'f': out.push_back('\f'); break;
|
|
294
|
+
case 'n': out.push_back('\n'); break;
|
|
295
|
+
case 'r': out.push_back('\r'); break;
|
|
296
|
+
case 't': out.push_back('\t'); break;
|
|
297
|
+
default: out.push_back(e); break;
|
|
298
|
+
}
|
|
299
|
+
continue;
|
|
300
|
+
}
|
|
301
|
+
out.push_back(c);
|
|
302
|
+
}
|
|
303
|
+
return {};
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
inline std::string json_array_field(const std::string& json, const std::string& key) {
|
|
307
|
+
std::string pattern = "\"" + key + "\":";
|
|
308
|
+
size_t pos = json.find(pattern);
|
|
309
|
+
if (pos == std::string::npos) return "[]";
|
|
310
|
+
size_t start = pos + pattern.size();
|
|
311
|
+
while (start < json.size() && std::isspace(static_cast<unsigned char>(json[start]))) ++start;
|
|
312
|
+
if (start >= json.size() || json[start] != '[') return "[]";
|
|
313
|
+
|
|
314
|
+
int depth = 1;
|
|
315
|
+
size_t end = start + 1;
|
|
316
|
+
while (end < json.size() && depth > 0) {
|
|
317
|
+
if (json[end] == '[') depth++;
|
|
318
|
+
else if (json[end] == ']') depth--;
|
|
319
|
+
end++;
|
|
320
|
+
}
|
|
321
|
+
return json.substr(start, end - start);
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
inline std::vector<std::string> split_json_array(const std::string& array_json) {
|
|
325
|
+
std::vector<std::string> out;
|
|
326
|
+
if (array_json.size() < 2 || array_json.front() != '[' || array_json.back() != ']') return out;
|
|
327
|
+
|
|
328
|
+
size_t i = 1;
|
|
329
|
+
while (i + 1 < array_json.size()) {
|
|
330
|
+
while (i + 1 < array_json.size() &&
|
|
331
|
+
(std::isspace(static_cast<unsigned char>(array_json[i])) || array_json[i] == ',')) i++;
|
|
332
|
+
if (i + 1 >= array_json.size() || array_json[i] != '{') break;
|
|
333
|
+
|
|
334
|
+
size_t start = i;
|
|
335
|
+
int depth = 0;
|
|
336
|
+
bool in_str = false;
|
|
337
|
+
bool esc = false;
|
|
338
|
+
for (; i < array_json.size(); ++i) {
|
|
339
|
+
char c = array_json[i];
|
|
340
|
+
if (in_str) {
|
|
341
|
+
if (esc) esc = false;
|
|
342
|
+
else if (c == '\\') esc = true;
|
|
343
|
+
else if (c == '"') in_str = false;
|
|
344
|
+
continue;
|
|
345
|
+
}
|
|
346
|
+
if (c == '"') { in_str = true; continue; }
|
|
347
|
+
if (c == '{') depth++;
|
|
348
|
+
if (c == '}') {
|
|
349
|
+
depth--;
|
|
350
|
+
if (depth == 0) {
|
|
351
|
+
out.push_back(array_json.substr(start, i - start + 1));
|
|
352
|
+
i++;
|
|
353
|
+
break;
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
return out;
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
inline std::string serialize_tools_json(const std::vector<ToolFunction>& tools) {
|
|
362
|
+
if (tools.empty()) return "";
|
|
363
|
+
std::ostringstream oss;
|
|
364
|
+
oss << "[";
|
|
365
|
+
for (size_t i = 0; i < tools.size(); ++i) {
|
|
366
|
+
if (i > 0) oss << ",";
|
|
367
|
+
oss << "{\"type\":\"function\",\"function\":{";
|
|
368
|
+
oss << "\"name\":\"" << escape_json_string(tools[i].name) << "\",";
|
|
369
|
+
oss << "\"description\":\"" << escape_json_string(tools[i].description) << "\"";
|
|
370
|
+
auto it = tools[i].parameters.find("schema");
|
|
371
|
+
if (it != tools[i].parameters.end()) {
|
|
372
|
+
oss << ",\"parameters\":" << it->second;
|
|
373
|
+
}
|
|
374
|
+
oss << "}}";
|
|
375
|
+
}
|
|
376
|
+
oss << "]";
|
|
377
|
+
return oss.str();
|
|
378
|
+
}
|
|
379
|
+
|
|
153
380
|
inline void handle_error_response(const std::string& error_message, char* response_buffer, size_t buffer_size) {
|
|
154
381
|
std::ostringstream json;
|
|
155
382
|
json << "{";
|
|
@@ -324,7 +551,10 @@ inline void parse_options_json(const std::string& json,
|
|
|
324
551
|
float& confidence_threshold,
|
|
325
552
|
bool& include_stop_sequences,
|
|
326
553
|
bool& use_vad,
|
|
327
|
-
bool& telemetry_enabled
|
|
554
|
+
bool& telemetry_enabled,
|
|
555
|
+
bool* auto_handoff = nullptr,
|
|
556
|
+
size_t* cloud_timeout_ms = nullptr,
|
|
557
|
+
bool* handoff_with_images = nullptr) {
|
|
328
558
|
temperature = 0.0f;
|
|
329
559
|
top_p = 0.0f;
|
|
330
560
|
top_k = 0;
|
|
@@ -335,6 +565,9 @@ inline void parse_options_json(const std::string& json,
|
|
|
335
565
|
include_stop_sequences = false;
|
|
336
566
|
use_vad = true;
|
|
337
567
|
telemetry_enabled = true;
|
|
568
|
+
if (auto_handoff) *auto_handoff = true;
|
|
569
|
+
if (cloud_timeout_ms) *cloud_timeout_ms = 15000;
|
|
570
|
+
if (handoff_with_images) *handoff_with_images = true;
|
|
338
571
|
stop_sequences.clear();
|
|
339
572
|
|
|
340
573
|
if (json.empty()) return;
|
|
@@ -403,6 +636,32 @@ inline void parse_options_json(const std::string& json,
|
|
|
403
636
|
telemetry_enabled = (json.substr(pos, 4) == "true");
|
|
404
637
|
}
|
|
405
638
|
|
|
639
|
+
if (auto_handoff) {
|
|
640
|
+
pos = json.find("\"auto_handoff\"");
|
|
641
|
+
if (pos != std::string::npos) {
|
|
642
|
+
pos = json.find(':', pos) + 1;
|
|
643
|
+
while (pos < json.length() && std::isspace(json[pos])) pos++;
|
|
644
|
+
*auto_handoff = (json.substr(pos, 4) == "true");
|
|
645
|
+
}
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
if (cloud_timeout_ms) {
|
|
649
|
+
pos = json.find("\"cloud_timeout_ms\"");
|
|
650
|
+
if (pos != std::string::npos) {
|
|
651
|
+
pos = json.find(':', pos) + 1;
|
|
652
|
+
*cloud_timeout_ms = std::stoul(json.substr(pos));
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
if (handoff_with_images) {
|
|
657
|
+
pos = json.find("\"handoff_with_images\"");
|
|
658
|
+
if (pos != std::string::npos) {
|
|
659
|
+
pos = json.find(':', pos) + 1;
|
|
660
|
+
while (pos < json.length() && std::isspace(json[pos])) pos++;
|
|
661
|
+
*handoff_with_images = (json.substr(pos, 4) == "true");
|
|
662
|
+
}
|
|
663
|
+
}
|
|
664
|
+
|
|
406
665
|
pos = json.find("\"stop_sequences\"");
|
|
407
666
|
if (pos != std::string::npos) {
|
|
408
667
|
pos = json.find('[', pos);
|
|
@@ -422,31 +681,8 @@ inline void parse_options_json(const std::string& json,
|
|
|
422
681
|
}
|
|
423
682
|
}
|
|
424
683
|
|
|
425
|
-
inline std::string format_tools_for_prompt(const std::vector<ToolFunction>& tools) {
|
|
426
|
-
if (tools.empty()) return "";
|
|
427
|
-
std::string formatted_tools_json;
|
|
428
|
-
for (size_t i = 0; i < tools.size(); i++) {
|
|
429
|
-
if (i > 0) formatted_tools_json += "\n";
|
|
430
|
-
formatted_tools_json += "{\"type\":\"function\",\"function\":{\"name\":\""
|
|
431
|
-
+ tools[i].name
|
|
432
|
-
+ "\",\"description\":\""
|
|
433
|
-
+ tools[i].description + "\"";
|
|
434
|
-
if (tools[i].parameters.find("schema") != tools[i].parameters.end()) {
|
|
435
|
-
formatted_tools_json += ",\"parameters\":" + tools[i].parameters.at("schema");
|
|
436
|
-
}
|
|
437
|
-
formatted_tools_json += "}}";
|
|
438
|
-
}
|
|
439
|
-
return formatted_tools_json;
|
|
440
|
-
}
|
|
441
|
-
|
|
442
684
|
static inline std::string trim_lfm2_slice(const std::string& value, size_t begin, size_t end) {
|
|
443
|
-
|
|
444
|
-
begin++;
|
|
445
|
-
}
|
|
446
|
-
while (end > begin && std::isspace(static_cast<unsigned char>(value[end - 1]))) {
|
|
447
|
-
end--;
|
|
448
|
-
}
|
|
449
|
-
return value.substr(begin, end - begin);
|
|
685
|
+
return trim_string(value.substr(begin, end - begin));
|
|
450
686
|
}
|
|
451
687
|
|
|
452
688
|
static inline void append_lfm2_call(const std::string& entry,
|
|
@@ -577,23 +813,49 @@ inline void parse_function_calls_from_response(const std::string& response_text,
|
|
|
577
813
|
|
|
578
814
|
if (!content.empty() && content.front() == '[' && content.back() == ']') {
|
|
579
815
|
std::string inner = content.substr(1, content.size() - 2);
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
816
|
+
|
|
817
|
+
size_t inner_first = inner.find_first_not_of(" \t\n\r");
|
|
818
|
+
if (inner_first != std::string::npos && inner[inner_first] == '{') {
|
|
819
|
+
size_t pos = inner_first;
|
|
820
|
+
while (pos < inner.size()) {
|
|
821
|
+
if (inner[pos] == '{') {
|
|
822
|
+
int brace_depth = 1;
|
|
823
|
+
size_t obj_start = pos;
|
|
824
|
+
pos++;
|
|
825
|
+
while (pos < inner.size() && brace_depth > 0) {
|
|
826
|
+
if (inner[pos] == '{') brace_depth++;
|
|
827
|
+
else if (inner[pos] == '}') brace_depth--;
|
|
828
|
+
pos++;
|
|
829
|
+
}
|
|
830
|
+
if (brace_depth == 0) {
|
|
831
|
+
std::string json_obj = inner.substr(obj_start, pos - obj_start);
|
|
832
|
+
if (json_obj.find("\"name\"") != std::string::npos) {
|
|
833
|
+
function_calls.push_back(json_obj);
|
|
834
|
+
}
|
|
835
|
+
}
|
|
836
|
+
} else {
|
|
837
|
+
pos++;
|
|
838
|
+
}
|
|
839
|
+
}
|
|
840
|
+
} else {
|
|
841
|
+
size_t start = 0;
|
|
842
|
+
int paren_depth = 0;
|
|
843
|
+
|
|
844
|
+
for (size_t i = 0; i < inner.size(); ++i) {
|
|
845
|
+
char c = inner[i];
|
|
846
|
+
if (c == '(') {
|
|
847
|
+
paren_depth++;
|
|
848
|
+
} else if (c == ')' && paren_depth > 0) {
|
|
849
|
+
paren_depth--;
|
|
850
|
+
} else if (c == ',' && paren_depth == 0) {
|
|
851
|
+
append_lfm2_call(inner.substr(start, i - start), function_calls);
|
|
852
|
+
start = i + 1;
|
|
853
|
+
}
|
|
592
854
|
}
|
|
593
|
-
}
|
|
594
855
|
|
|
595
|
-
|
|
596
|
-
|
|
856
|
+
if (start < inner.size()) {
|
|
857
|
+
append_lfm2_call(inner.substr(start), function_calls);
|
|
858
|
+
}
|
|
597
859
|
}
|
|
598
860
|
} else if (!content.empty()) {
|
|
599
861
|
append_lfm2_call(content, function_calls);
|
|
@@ -648,7 +910,7 @@ inline std::string construct_response_json(const std::string& regular_response,
|
|
|
648
910
|
bool cloud_handoff = false) {
|
|
649
911
|
std::ostringstream json;
|
|
650
912
|
json << "{";
|
|
651
|
-
json << "\"success\":
|
|
913
|
+
json << "\"success\":true,";
|
|
652
914
|
json << "\"error\":null,";
|
|
653
915
|
json << "\"cloud_handoff\":" << (cloud_handoff ? "true" : "false") << ",";
|
|
654
916
|
json << "\"response\":\"" << escape_json_string(regular_response) << "\",";
|
|
@@ -671,30 +933,6 @@ inline std::string construct_response_json(const std::string& regular_response,
|
|
|
671
933
|
return json.str();
|
|
672
934
|
}
|
|
673
935
|
|
|
674
|
-
inline std::string construct_cloud_handoff_json(float confidence,
|
|
675
|
-
double time_to_first_token,
|
|
676
|
-
double prefill_tps,
|
|
677
|
-
size_t prompt_tokens) {
|
|
678
|
-
std::ostringstream json;
|
|
679
|
-
json << "{";
|
|
680
|
-
json << "\"success\":false,";
|
|
681
|
-
json << "\"error\":null,";
|
|
682
|
-
json << "\"cloud_handoff\":true,";
|
|
683
|
-
json << "\"response\":null,";
|
|
684
|
-
json << "\"function_calls\":[],";
|
|
685
|
-
json << "\"confidence\":" << std::fixed << std::setprecision(4) << confidence << ",";
|
|
686
|
-
json << "\"time_to_first_token_ms\":" << std::fixed << std::setprecision(2) << time_to_first_token << ",";
|
|
687
|
-
json << "\"total_time_ms\":" << std::fixed << std::setprecision(2) << time_to_first_token << ",";
|
|
688
|
-
json << "\"prefill_tps\":" << std::fixed << std::setprecision(2) << prefill_tps << ",";
|
|
689
|
-
json << "\"decode_tps\":0.0,";
|
|
690
|
-
json << "\"ram_usage_mb\":" << std::fixed << std::setprecision(2) << get_ram_usage_mb() << ",";
|
|
691
|
-
json << "\"prefill_tokens\":" << prompt_tokens << ",";
|
|
692
|
-
json << "\"decode_tokens\":0,";
|
|
693
|
-
json << "\"total_tokens\":" << prompt_tokens;
|
|
694
|
-
json << "}";
|
|
695
|
-
return json.str();
|
|
696
|
-
}
|
|
697
|
-
|
|
698
936
|
inline std::string serialize_function_calls(const std::vector<std::string>& calls) {
|
|
699
937
|
if (calls.empty()) return "[]";
|
|
700
938
|
std::ostringstream oss;
|
|
@@ -720,4 +958,4 @@ const char* cactus_get_last_error();
|
|
|
720
958
|
}
|
|
721
959
|
#endif
|
|
722
960
|
|
|
723
|
-
#endif // CACTUS_UTILS_H
|
|
961
|
+
#endif // CACTUS_UTILS_H
|