cactus-react-native 1.7.0 → 1.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +61 -0
- package/android/gradle.properties +1 -1
- package/android/src/main/jniLibs/arm64-v8a/libcactus.a +0 -0
- package/cpp/HybridCactus.cpp +49 -1
- package/cpp/HybridCactus.hpp +5 -0
- package/cpp/cactus_ffi.h +14 -1
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_cloud.h +48 -0
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_ffi.h +14 -1
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_utils.h +304 -66
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/engine.h +32 -4
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/graph.h +75 -11
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel.h +123 -4
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel_utils.h +37 -3
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Info.plist +0 -0
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/cactus +0 -0
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_cloud.h +48 -0
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_ffi.h +14 -1
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_utils.h +304 -66
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/engine.h +32 -4
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/graph.h +75 -11
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/kernel.h +123 -4
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/kernel_utils.h +37 -3
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Info.plist +0 -0
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/_CodeSignature/CodeResources +1 -1
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/cactus +0 -0
- package/lib/module/classes/CactusSTT.js +15 -0
- package/lib/module/classes/CactusSTT.js.map +1 -1
- package/lib/module/index.js +3 -0
- package/lib/module/index.js.map +1 -1
- package/lib/module/modelRegistry.js +27 -2
- package/lib/module/modelRegistry.js.map +1 -1
- package/lib/module/native/Cactus.js +18 -0
- package/lib/module/native/Cactus.js.map +1 -1
- package/lib/typescript/src/classes/CactusSTT.d.ts +2 -1
- package/lib/typescript/src/classes/CactusSTT.d.ts.map +1 -1
- package/lib/typescript/src/index.d.ts +2 -1
- package/lib/typescript/src/index.d.ts.map +1 -1
- package/lib/typescript/src/native/Cactus.d.ts +2 -1
- package/lib/typescript/src/native/Cactus.d.ts.map +1 -1
- package/lib/typescript/src/specs/Cactus.nitro.d.ts +1 -0
- package/lib/typescript/src/specs/Cactus.nitro.d.ts.map +1 -1
- package/lib/typescript/src/types/CactusSTT.d.ts +11 -0
- package/lib/typescript/src/types/CactusSTT.d.ts.map +1 -1
- package/nitrogen/generated/shared/c++/HybridCactusSpec.cpp +1 -0
- package/nitrogen/generated/shared/c++/HybridCactusSpec.hpp +1 -0
- package/package.json +1 -1
- package/src/classes/CactusSTT.ts +20 -0
- package/src/index.tsx +6 -0
- package/src/modelRegistry.ts +42 -2
- package/src/native/Cactus.ts +32 -0
- package/src/specs/Cactus.nitro.ts +5 -0
- package/src/types/CactusSTT.ts +14 -0
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
#define CACTUS_UTILS_H
|
|
3
3
|
|
|
4
4
|
#include "../engine/engine.h"
|
|
5
|
+
#include "../models/model.h"
|
|
5
6
|
#include <string>
|
|
6
7
|
#include <vector>
|
|
7
8
|
#include <unordered_map>
|
|
@@ -12,6 +13,9 @@
|
|
|
12
13
|
#include <iostream>
|
|
13
14
|
#include <filesystem>
|
|
14
15
|
#include <cctype>
|
|
16
|
+
#include <algorithm>
|
|
17
|
+
#include <cmath>
|
|
18
|
+
#include <limits>
|
|
15
19
|
#include <memory>
|
|
16
20
|
#include <atomic>
|
|
17
21
|
#include <mutex>
|
|
@@ -101,12 +105,92 @@ inline cactus::engine::AudioProcessor::SpectrogramConfig get_whisper_spectrogram
|
|
|
101
105
|
return cfg;
|
|
102
106
|
}
|
|
103
107
|
|
|
108
|
+
inline cactus::engine::AudioProcessor::SpectrogramConfig get_parakeet_spectrogram_config() {
|
|
109
|
+
cactus::engine::AudioProcessor::SpectrogramConfig cfg{};
|
|
110
|
+
cfg.n_fft = 512;
|
|
111
|
+
cfg.frame_length = 400;
|
|
112
|
+
cfg.hop_length = 160;
|
|
113
|
+
cfg.power = 2.0f;
|
|
114
|
+
cfg.center = true;
|
|
115
|
+
cfg.pad_mode = "constant";
|
|
116
|
+
cfg.onesided = true;
|
|
117
|
+
cfg.dither = 0.0f;
|
|
118
|
+
cfg.mel_floor = 5.960464477539063e-08f; // 2^-24 guard value used by HF Parakeet.
|
|
119
|
+
cfg.log_mel = "log";
|
|
120
|
+
cfg.reference = 1.0f;
|
|
121
|
+
cfg.min_value = 1e-10f;
|
|
122
|
+
cfg.remove_dc_offset = false;
|
|
123
|
+
cfg.hann_periodic = false;
|
|
124
|
+
return cfg;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
inline void apply_preemphasis(std::vector<float>& waveform, float coefficient = 0.97f) {
|
|
128
|
+
if (waveform.size() < 2 || coefficient == 0.0f) {
|
|
129
|
+
return;
|
|
130
|
+
}
|
|
131
|
+
for (size_t i = waveform.size() - 1; i > 0; --i) {
|
|
132
|
+
waveform[i] -= coefficient * waveform[i - 1];
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
inline void normalize_parakeet_log_mel(std::vector<float>& mel, size_t num_mels, float epsilon = 1e-5f) {
|
|
137
|
+
if (mel.empty() || num_mels == 0 || (mel.size() % num_mels) != 0) {
|
|
138
|
+
return;
|
|
139
|
+
}
|
|
140
|
+
const size_t num_frames = mel.size() / num_mels;
|
|
141
|
+
if (num_frames == 0) {
|
|
142
|
+
return;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
for (size_t m = 0; m < num_mels; ++m) {
|
|
146
|
+
const size_t base = m * num_frames;
|
|
147
|
+
float mean = 0.0f;
|
|
148
|
+
for (size_t t = 0; t < num_frames; ++t) {
|
|
149
|
+
mean += mel[base + t];
|
|
150
|
+
}
|
|
151
|
+
mean /= static_cast<float>(num_frames);
|
|
152
|
+
|
|
153
|
+
float variance = 0.0f;
|
|
154
|
+
for (size_t t = 0; t < num_frames; ++t) {
|
|
155
|
+
const float d = mel[base + t] - mean;
|
|
156
|
+
variance += d * d;
|
|
157
|
+
}
|
|
158
|
+
const float denom = static_cast<float>(std::max<size_t>(1, num_frames - 1));
|
|
159
|
+
const float inv_std = 1.0f / std::sqrt((variance / denom) + epsilon);
|
|
160
|
+
for (size_t t = 0; t < num_frames; ++t) {
|
|
161
|
+
mel[base + t] = (mel[base + t] - mean) * inv_std;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
inline void trim_mel_frames(std::vector<float>& mel, size_t num_mels, size_t valid_frames) {
|
|
167
|
+
if (mel.empty() || num_mels == 0 || (mel.size() % num_mels) != 0) {
|
|
168
|
+
return;
|
|
169
|
+
}
|
|
170
|
+
size_t total_frames = mel.size() / num_mels;
|
|
171
|
+
if (valid_frames == 0 || valid_frames >= total_frames) {
|
|
172
|
+
return;
|
|
173
|
+
}
|
|
174
|
+
std::vector<float> trimmed(num_mels * valid_frames);
|
|
175
|
+
for (size_t m = 0; m < num_mels; ++m) {
|
|
176
|
+
const float* src = &mel[m * total_frames];
|
|
177
|
+
float* dst = &trimmed[m * valid_frames];
|
|
178
|
+
std::copy(src, src + valid_frames, dst);
|
|
179
|
+
}
|
|
180
|
+
mel.swap(trimmed);
|
|
181
|
+
}
|
|
182
|
+
|
|
104
183
|
} // namespace audio
|
|
105
184
|
} // namespace cactus
|
|
106
185
|
|
|
107
186
|
namespace cactus {
|
|
108
187
|
namespace ffi {
|
|
109
188
|
|
|
189
|
+
inline bool env_flag_enabled(const char* key) {
|
|
190
|
+
const char* value = std::getenv(key);
|
|
191
|
+
return value && value[0] != '\0' && !(value[0] == '0' && value[1] == '\0');
|
|
192
|
+
}
|
|
193
|
+
|
|
110
194
|
inline std::string generateUUID() {
|
|
111
195
|
#ifdef __APPLE__
|
|
112
196
|
uuid_t uuid;
|
|
@@ -114,6 +198,25 @@ inline std::string generateUUID() {
|
|
|
114
198
|
char uuid_str[37];
|
|
115
199
|
uuid_unparse_lower(uuid, uuid_str);
|
|
116
200
|
return std::string(uuid_str);
|
|
201
|
+
#else
|
|
202
|
+
static std::random_device rd;
|
|
203
|
+
static std::mt19937 gen(rd());
|
|
204
|
+
static std::uniform_int_distribution<> dis(0, 15);
|
|
205
|
+
static std::uniform_int_distribution<> dis2(8, 11);
|
|
206
|
+
|
|
207
|
+
std::stringstream ss;
|
|
208
|
+
ss << std::hex;
|
|
209
|
+
for (int i = 0; i < 8; i++) ss << dis(gen);
|
|
210
|
+
ss << "-";
|
|
211
|
+
for (int i = 0; i < 4; i++) ss << dis(gen);
|
|
212
|
+
ss << "-4";
|
|
213
|
+
for (int i = 0; i < 3; i++) ss << dis(gen);
|
|
214
|
+
ss << "-";
|
|
215
|
+
ss << dis2(gen);
|
|
216
|
+
for (int i = 0; i < 3; i++) ss << dis(gen);
|
|
217
|
+
ss << "-";
|
|
218
|
+
for (int i = 0; i < 12; i++) ss << dis(gen);
|
|
219
|
+
return ss.str();
|
|
117
220
|
#endif
|
|
118
221
|
}
|
|
119
222
|
|
|
@@ -150,6 +253,130 @@ inline std::string escape_json_string(const std::string& s) {
|
|
|
150
253
|
return o.str();
|
|
151
254
|
}
|
|
152
255
|
|
|
256
|
+
|
|
257
|
+
inline std::string trim_string(const std::string& s) {
|
|
258
|
+
size_t start = 0;
|
|
259
|
+
while (start < s.size() && std::isspace(static_cast<unsigned char>(s[start]))) ++start;
|
|
260
|
+
size_t end = s.size();
|
|
261
|
+
while (end > start && std::isspace(static_cast<unsigned char>(s[end - 1]))) --end;
|
|
262
|
+
return s.substr(start, end - start);
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
inline std::string env_or_default(const char* key, const char* fallback) {
|
|
266
|
+
const char* v = std::getenv(key);
|
|
267
|
+
if (v && v[0] != '\0') return std::string(v);
|
|
268
|
+
return std::string(fallback);
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
inline std::string json_string_field(const std::string& json, const std::string& key) {
|
|
272
|
+
std::string pattern = "\"" + key + "\":";
|
|
273
|
+
size_t pos = json.find(pattern);
|
|
274
|
+
if (pos == std::string::npos) return {};
|
|
275
|
+
|
|
276
|
+
size_t i = pos + pattern.size();
|
|
277
|
+
while (i < json.size() && std::isspace(static_cast<unsigned char>(json[i]))) i++;
|
|
278
|
+
if (i >= json.size() || json[i] != '"') return {};
|
|
279
|
+
++i;
|
|
280
|
+
|
|
281
|
+
std::string out;
|
|
282
|
+
out.reserve(128);
|
|
283
|
+
while (i < json.size()) {
|
|
284
|
+
char c = json[i++];
|
|
285
|
+
if (c == '"') return out;
|
|
286
|
+
if (c == '\\' && i < json.size()) {
|
|
287
|
+
char e = json[i++];
|
|
288
|
+
switch (e) {
|
|
289
|
+
case '"': out.push_back('"'); break;
|
|
290
|
+
case '\\': out.push_back('\\'); break;
|
|
291
|
+
case '/': out.push_back('/'); break;
|
|
292
|
+
case 'b': out.push_back('\b'); break;
|
|
293
|
+
case 'f': out.push_back('\f'); break;
|
|
294
|
+
case 'n': out.push_back('\n'); break;
|
|
295
|
+
case 'r': out.push_back('\r'); break;
|
|
296
|
+
case 't': out.push_back('\t'); break;
|
|
297
|
+
default: out.push_back(e); break;
|
|
298
|
+
}
|
|
299
|
+
continue;
|
|
300
|
+
}
|
|
301
|
+
out.push_back(c);
|
|
302
|
+
}
|
|
303
|
+
return {};
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
inline std::string json_array_field(const std::string& json, const std::string& key) {
|
|
307
|
+
std::string pattern = "\"" + key + "\":";
|
|
308
|
+
size_t pos = json.find(pattern);
|
|
309
|
+
if (pos == std::string::npos) return "[]";
|
|
310
|
+
size_t start = pos + pattern.size();
|
|
311
|
+
while (start < json.size() && std::isspace(static_cast<unsigned char>(json[start]))) ++start;
|
|
312
|
+
if (start >= json.size() || json[start] != '[') return "[]";
|
|
313
|
+
|
|
314
|
+
int depth = 1;
|
|
315
|
+
size_t end = start + 1;
|
|
316
|
+
while (end < json.size() && depth > 0) {
|
|
317
|
+
if (json[end] == '[') depth++;
|
|
318
|
+
else if (json[end] == ']') depth--;
|
|
319
|
+
end++;
|
|
320
|
+
}
|
|
321
|
+
return json.substr(start, end - start);
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
inline std::vector<std::string> split_json_array(const std::string& array_json) {
|
|
325
|
+
std::vector<std::string> out;
|
|
326
|
+
if (array_json.size() < 2 || array_json.front() != '[' || array_json.back() != ']') return out;
|
|
327
|
+
|
|
328
|
+
size_t i = 1;
|
|
329
|
+
while (i + 1 < array_json.size()) {
|
|
330
|
+
while (i + 1 < array_json.size() &&
|
|
331
|
+
(std::isspace(static_cast<unsigned char>(array_json[i])) || array_json[i] == ',')) i++;
|
|
332
|
+
if (i + 1 >= array_json.size() || array_json[i] != '{') break;
|
|
333
|
+
|
|
334
|
+
size_t start = i;
|
|
335
|
+
int depth = 0;
|
|
336
|
+
bool in_str = false;
|
|
337
|
+
bool esc = false;
|
|
338
|
+
for (; i < array_json.size(); ++i) {
|
|
339
|
+
char c = array_json[i];
|
|
340
|
+
if (in_str) {
|
|
341
|
+
if (esc) esc = false;
|
|
342
|
+
else if (c == '\\') esc = true;
|
|
343
|
+
else if (c == '"') in_str = false;
|
|
344
|
+
continue;
|
|
345
|
+
}
|
|
346
|
+
if (c == '"') { in_str = true; continue; }
|
|
347
|
+
if (c == '{') depth++;
|
|
348
|
+
if (c == '}') {
|
|
349
|
+
depth--;
|
|
350
|
+
if (depth == 0) {
|
|
351
|
+
out.push_back(array_json.substr(start, i - start + 1));
|
|
352
|
+
i++;
|
|
353
|
+
break;
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
return out;
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
inline std::string serialize_tools_json(const std::vector<ToolFunction>& tools) {
|
|
362
|
+
if (tools.empty()) return "";
|
|
363
|
+
std::ostringstream oss;
|
|
364
|
+
oss << "[";
|
|
365
|
+
for (size_t i = 0; i < tools.size(); ++i) {
|
|
366
|
+
if (i > 0) oss << ",";
|
|
367
|
+
oss << "{\"type\":\"function\",\"function\":{";
|
|
368
|
+
oss << "\"name\":\"" << escape_json_string(tools[i].name) << "\",";
|
|
369
|
+
oss << "\"description\":\"" << escape_json_string(tools[i].description) << "\"";
|
|
370
|
+
auto it = tools[i].parameters.find("schema");
|
|
371
|
+
if (it != tools[i].parameters.end()) {
|
|
372
|
+
oss << ",\"parameters\":" << it->second;
|
|
373
|
+
}
|
|
374
|
+
oss << "}}";
|
|
375
|
+
}
|
|
376
|
+
oss << "]";
|
|
377
|
+
return oss.str();
|
|
378
|
+
}
|
|
379
|
+
|
|
153
380
|
inline void handle_error_response(const std::string& error_message, char* response_buffer, size_t buffer_size) {
|
|
154
381
|
std::ostringstream json;
|
|
155
382
|
json << "{";
|
|
@@ -324,7 +551,10 @@ inline void parse_options_json(const std::string& json,
|
|
|
324
551
|
float& confidence_threshold,
|
|
325
552
|
bool& include_stop_sequences,
|
|
326
553
|
bool& use_vad,
|
|
327
|
-
bool& telemetry_enabled
|
|
554
|
+
bool& telemetry_enabled,
|
|
555
|
+
bool* auto_handoff = nullptr,
|
|
556
|
+
size_t* cloud_timeout_ms = nullptr,
|
|
557
|
+
bool* handoff_with_images = nullptr) {
|
|
328
558
|
temperature = 0.0f;
|
|
329
559
|
top_p = 0.0f;
|
|
330
560
|
top_k = 0;
|
|
@@ -335,6 +565,9 @@ inline void parse_options_json(const std::string& json,
|
|
|
335
565
|
include_stop_sequences = false;
|
|
336
566
|
use_vad = true;
|
|
337
567
|
telemetry_enabled = true;
|
|
568
|
+
if (auto_handoff) *auto_handoff = true;
|
|
569
|
+
if (cloud_timeout_ms) *cloud_timeout_ms = 15000;
|
|
570
|
+
if (handoff_with_images) *handoff_with_images = true;
|
|
338
571
|
stop_sequences.clear();
|
|
339
572
|
|
|
340
573
|
if (json.empty()) return;
|
|
@@ -403,6 +636,32 @@ inline void parse_options_json(const std::string& json,
|
|
|
403
636
|
telemetry_enabled = (json.substr(pos, 4) == "true");
|
|
404
637
|
}
|
|
405
638
|
|
|
639
|
+
if (auto_handoff) {
|
|
640
|
+
pos = json.find("\"auto_handoff\"");
|
|
641
|
+
if (pos != std::string::npos) {
|
|
642
|
+
pos = json.find(':', pos) + 1;
|
|
643
|
+
while (pos < json.length() && std::isspace(json[pos])) pos++;
|
|
644
|
+
*auto_handoff = (json.substr(pos, 4) == "true");
|
|
645
|
+
}
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
if (cloud_timeout_ms) {
|
|
649
|
+
pos = json.find("\"cloud_timeout_ms\"");
|
|
650
|
+
if (pos != std::string::npos) {
|
|
651
|
+
pos = json.find(':', pos) + 1;
|
|
652
|
+
*cloud_timeout_ms = std::stoul(json.substr(pos));
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
if (handoff_with_images) {
|
|
657
|
+
pos = json.find("\"handoff_with_images\"");
|
|
658
|
+
if (pos != std::string::npos) {
|
|
659
|
+
pos = json.find(':', pos) + 1;
|
|
660
|
+
while (pos < json.length() && std::isspace(json[pos])) pos++;
|
|
661
|
+
*handoff_with_images = (json.substr(pos, 4) == "true");
|
|
662
|
+
}
|
|
663
|
+
}
|
|
664
|
+
|
|
406
665
|
pos = json.find("\"stop_sequences\"");
|
|
407
666
|
if (pos != std::string::npos) {
|
|
408
667
|
pos = json.find('[', pos);
|
|
@@ -422,31 +681,8 @@ inline void parse_options_json(const std::string& json,
|
|
|
422
681
|
}
|
|
423
682
|
}
|
|
424
683
|
|
|
425
|
-
inline std::string format_tools_for_prompt(const std::vector<ToolFunction>& tools) {
|
|
426
|
-
if (tools.empty()) return "";
|
|
427
|
-
std::string formatted_tools_json;
|
|
428
|
-
for (size_t i = 0; i < tools.size(); i++) {
|
|
429
|
-
if (i > 0) formatted_tools_json += "\n";
|
|
430
|
-
formatted_tools_json += "{\"type\":\"function\",\"function\":{\"name\":\""
|
|
431
|
-
+ tools[i].name
|
|
432
|
-
+ "\",\"description\":\""
|
|
433
|
-
+ tools[i].description + "\"";
|
|
434
|
-
if (tools[i].parameters.find("schema") != tools[i].parameters.end()) {
|
|
435
|
-
formatted_tools_json += ",\"parameters\":" + tools[i].parameters.at("schema");
|
|
436
|
-
}
|
|
437
|
-
formatted_tools_json += "}}";
|
|
438
|
-
}
|
|
439
|
-
return formatted_tools_json;
|
|
440
|
-
}
|
|
441
|
-
|
|
442
684
|
static inline std::string trim_lfm2_slice(const std::string& value, size_t begin, size_t end) {
|
|
443
|
-
|
|
444
|
-
begin++;
|
|
445
|
-
}
|
|
446
|
-
while (end > begin && std::isspace(static_cast<unsigned char>(value[end - 1]))) {
|
|
447
|
-
end--;
|
|
448
|
-
}
|
|
449
|
-
return value.substr(begin, end - begin);
|
|
685
|
+
return trim_string(value.substr(begin, end - begin));
|
|
450
686
|
}
|
|
451
687
|
|
|
452
688
|
static inline void append_lfm2_call(const std::string& entry,
|
|
@@ -577,23 +813,49 @@ inline void parse_function_calls_from_response(const std::string& response_text,
|
|
|
577
813
|
|
|
578
814
|
if (!content.empty() && content.front() == '[' && content.back() == ']') {
|
|
579
815
|
std::string inner = content.substr(1, content.size() - 2);
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
816
|
+
|
|
817
|
+
size_t inner_first = inner.find_first_not_of(" \t\n\r");
|
|
818
|
+
if (inner_first != std::string::npos && inner[inner_first] == '{') {
|
|
819
|
+
size_t pos = inner_first;
|
|
820
|
+
while (pos < inner.size()) {
|
|
821
|
+
if (inner[pos] == '{') {
|
|
822
|
+
int brace_depth = 1;
|
|
823
|
+
size_t obj_start = pos;
|
|
824
|
+
pos++;
|
|
825
|
+
while (pos < inner.size() && brace_depth > 0) {
|
|
826
|
+
if (inner[pos] == '{') brace_depth++;
|
|
827
|
+
else if (inner[pos] == '}') brace_depth--;
|
|
828
|
+
pos++;
|
|
829
|
+
}
|
|
830
|
+
if (brace_depth == 0) {
|
|
831
|
+
std::string json_obj = inner.substr(obj_start, pos - obj_start);
|
|
832
|
+
if (json_obj.find("\"name\"") != std::string::npos) {
|
|
833
|
+
function_calls.push_back(json_obj);
|
|
834
|
+
}
|
|
835
|
+
}
|
|
836
|
+
} else {
|
|
837
|
+
pos++;
|
|
838
|
+
}
|
|
839
|
+
}
|
|
840
|
+
} else {
|
|
841
|
+
size_t start = 0;
|
|
842
|
+
int paren_depth = 0;
|
|
843
|
+
|
|
844
|
+
for (size_t i = 0; i < inner.size(); ++i) {
|
|
845
|
+
char c = inner[i];
|
|
846
|
+
if (c == '(') {
|
|
847
|
+
paren_depth++;
|
|
848
|
+
} else if (c == ')' && paren_depth > 0) {
|
|
849
|
+
paren_depth--;
|
|
850
|
+
} else if (c == ',' && paren_depth == 0) {
|
|
851
|
+
append_lfm2_call(inner.substr(start, i - start), function_calls);
|
|
852
|
+
start = i + 1;
|
|
853
|
+
}
|
|
592
854
|
}
|
|
593
|
-
}
|
|
594
855
|
|
|
595
|
-
|
|
596
|
-
|
|
856
|
+
if (start < inner.size()) {
|
|
857
|
+
append_lfm2_call(inner.substr(start), function_calls);
|
|
858
|
+
}
|
|
597
859
|
}
|
|
598
860
|
} else if (!content.empty()) {
|
|
599
861
|
append_lfm2_call(content, function_calls);
|
|
@@ -648,7 +910,7 @@ inline std::string construct_response_json(const std::string& regular_response,
|
|
|
648
910
|
bool cloud_handoff = false) {
|
|
649
911
|
std::ostringstream json;
|
|
650
912
|
json << "{";
|
|
651
|
-
json << "\"success\":
|
|
913
|
+
json << "\"success\":true,";
|
|
652
914
|
json << "\"error\":null,";
|
|
653
915
|
json << "\"cloud_handoff\":" << (cloud_handoff ? "true" : "false") << ",";
|
|
654
916
|
json << "\"response\":\"" << escape_json_string(regular_response) << "\",";
|
|
@@ -671,30 +933,6 @@ inline std::string construct_response_json(const std::string& regular_response,
|
|
|
671
933
|
return json.str();
|
|
672
934
|
}
|
|
673
935
|
|
|
674
|
-
inline std::string construct_cloud_handoff_json(float confidence,
|
|
675
|
-
double time_to_first_token,
|
|
676
|
-
double prefill_tps,
|
|
677
|
-
size_t prompt_tokens) {
|
|
678
|
-
std::ostringstream json;
|
|
679
|
-
json << "{";
|
|
680
|
-
json << "\"success\":false,";
|
|
681
|
-
json << "\"error\":null,";
|
|
682
|
-
json << "\"cloud_handoff\":true,";
|
|
683
|
-
json << "\"response\":null,";
|
|
684
|
-
json << "\"function_calls\":[],";
|
|
685
|
-
json << "\"confidence\":" << std::fixed << std::setprecision(4) << confidence << ",";
|
|
686
|
-
json << "\"time_to_first_token_ms\":" << std::fixed << std::setprecision(2) << time_to_first_token << ",";
|
|
687
|
-
json << "\"total_time_ms\":" << std::fixed << std::setprecision(2) << time_to_first_token << ",";
|
|
688
|
-
json << "\"prefill_tps\":" << std::fixed << std::setprecision(2) << prefill_tps << ",";
|
|
689
|
-
json << "\"decode_tps\":0.0,";
|
|
690
|
-
json << "\"ram_usage_mb\":" << std::fixed << std::setprecision(2) << get_ram_usage_mb() << ",";
|
|
691
|
-
json << "\"prefill_tokens\":" << prompt_tokens << ",";
|
|
692
|
-
json << "\"decode_tokens\":0,";
|
|
693
|
-
json << "\"total_tokens\":" << prompt_tokens;
|
|
694
|
-
json << "}";
|
|
695
|
-
return json.str();
|
|
696
|
-
}
|
|
697
|
-
|
|
698
936
|
inline std::string serialize_function_calls(const std::vector<std::string>& calls) {
|
|
699
937
|
if (calls.empty()) return "[]";
|
|
700
938
|
std::ostringstream oss;
|
|
@@ -720,4 +958,4 @@ const char* cactus_get_last_error();
|
|
|
720
958
|
}
|
|
721
959
|
#endif
|
|
722
960
|
|
|
723
|
-
#endif // CACTUS_UTILS_H
|
|
961
|
+
#endif // CACTUS_UTILS_H
|
|
@@ -56,6 +56,12 @@ struct Config {
|
|
|
56
56
|
uint32_t num_shared_experts = 0;
|
|
57
57
|
uint32_t num_top_experts = 0;
|
|
58
58
|
uint32_t moe_every_n_layers = 0;
|
|
59
|
+
uint32_t moe_intermediate_dim = 0;
|
|
60
|
+
uint32_t num_dense_layers = 0;
|
|
61
|
+
uint32_t num_experts_per_tok = 0;
|
|
62
|
+
bool norm_topk_prob = false;
|
|
63
|
+
bool use_expert_bias = false;
|
|
64
|
+
float routed_scaling_factor = 1.0f;
|
|
59
65
|
bool tie_word_embeddings = true;
|
|
60
66
|
|
|
61
67
|
uint32_t vision_hidden_dim = 0;
|
|
@@ -93,8 +99,22 @@ struct Config {
|
|
|
93
99
|
uint32_t num_encoder_layers = 0;
|
|
94
100
|
uint32_t num_decoder_layers = 0;
|
|
95
101
|
float partial_rotary_factor = 0.0f;
|
|
96
|
-
|
|
97
|
-
|
|
102
|
+
uint32_t pad_token_id = 0;
|
|
103
|
+
uint32_t conv_kernel_size = 0;
|
|
104
|
+
uint32_t subsampling_conv_kernel_size = 0;
|
|
105
|
+
uint32_t subsampling_conv_stride = 0;
|
|
106
|
+
uint32_t subsampling_conv_channels = 0;
|
|
107
|
+
uint32_t subsampling_factor = 0;
|
|
108
|
+
uint32_t num_mel_bins = 80;
|
|
109
|
+
std::string encoder_hidden_act = "silu";
|
|
110
|
+
uint32_t predictor_hidden_dim = 0;
|
|
111
|
+
uint32_t predictor_num_layers = 0;
|
|
112
|
+
uint32_t tdt_joint_dim = 0;
|
|
113
|
+
uint32_t tdt_num_durations = 0;
|
|
114
|
+
uint32_t tdt_blank_id = 0;
|
|
115
|
+
std::vector<uint32_t> tdt_durations;
|
|
116
|
+
|
|
117
|
+
enum class ModelType {QWEN = 0, GEMMA = 1, NOMIC = 3, LFM2 = 5, SIGLIP2 = 6, WHISPER = 7, MOONSHINE = 8, SILERO_VAD = 9, PARAKEET = 10, PARAKEET_TDT = 11};
|
|
98
118
|
ModelType model_type = ModelType::QWEN;
|
|
99
119
|
|
|
100
120
|
enum class ModelVariant {DEFAULT = 0, VLM = 1, EXTRACT = 2, RAG = 3};
|
|
@@ -168,7 +188,7 @@ public:
|
|
|
168
188
|
uint32_t get_global_img_token_id() const { return global_img_token_id_; }
|
|
169
189
|
|
|
170
190
|
protected:
|
|
171
|
-
enum class ModelType { UNKNOWN, QWEN, GEMMA, LFM2, BERT, WHISPER};
|
|
191
|
+
enum class ModelType { UNKNOWN, QWEN, GEMMA, LFM2, BERT, WHISPER, PARAKEET};
|
|
172
192
|
ModelType model_type_ = ModelType::UNKNOWN;
|
|
173
193
|
enum class ModelVariant { DEFAULT, VLM, EXTRACT, RAG};
|
|
174
194
|
ModelVariant model_variant_ = ModelVariant::DEFAULT;
|
|
@@ -366,7 +386,6 @@ struct KVCache {
|
|
|
366
386
|
size_t num_tokens, size_t kv_heads, size_t head_dim);
|
|
367
387
|
|
|
368
388
|
bool is_empty() const { return current_seq_len == 0; }
|
|
369
|
-
bool is_int8() const { return precision == Precision::INT8; }
|
|
370
389
|
void* get_key_ptr(size_t layer);
|
|
371
390
|
void* get_value_ptr(size_t layer);
|
|
372
391
|
|
|
@@ -684,6 +703,8 @@ public:
|
|
|
684
703
|
float reference = 1.0f;
|
|
685
704
|
float min_value = 1e-10f;
|
|
686
705
|
bool remove_dc_offset = false;
|
|
706
|
+
float preemphasis = 0.0f;
|
|
707
|
+
bool hann_periodic = true;
|
|
687
708
|
};
|
|
688
709
|
|
|
689
710
|
AudioProcessor();
|
|
@@ -696,6 +717,11 @@ public:
|
|
|
696
717
|
const std::vector<float>& waveform,
|
|
697
718
|
const SpectrogramConfig& config);
|
|
698
719
|
|
|
720
|
+
static std::vector<float> compute_irfft(
|
|
721
|
+
const std::vector<float>& complex_input,
|
|
722
|
+
size_t n,
|
|
723
|
+
const char* norm = "backward");
|
|
724
|
+
|
|
699
725
|
const std::vector<float>& get_mel_filters() const { return mel_filters_; }
|
|
700
726
|
|
|
701
727
|
size_t get_num_mel_filters() const { return num_mel_filters_; }
|
|
@@ -721,6 +747,8 @@ namespace index {
|
|
|
721
747
|
struct QueryResult {
|
|
722
748
|
int doc_id;
|
|
723
749
|
float score;
|
|
750
|
+
|
|
751
|
+
QueryResult(int doc_id, float score) : doc_id(doc_id), score(score) {}
|
|
724
752
|
};
|
|
725
753
|
|
|
726
754
|
struct QueryOptions {
|