cactus-react-native 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +212 -27
- package/android/src/main/jniLibs/arm64-v8a/libcactus.a +0 -0
- package/cpp/HybridCactus.cpp +119 -0
- package/cpp/HybridCactus.hpp +13 -0
- package/cpp/cactus_ffi.h +24 -0
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_ffi.h +24 -0
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_utils.h +41 -1
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/engine.h +66 -48
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/gemma_tools.h +549 -0
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/graph.h +102 -21
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel.h +45 -195
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel_utils.h +399 -140
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/cactus +0 -0
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_ffi.h +24 -0
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_utils.h +41 -1
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/engine.h +66 -48
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/gemma_tools.h +549 -0
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/graph.h +102 -21
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/kernel.h +45 -195
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/kernel_utils.h +399 -140
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/cactus +0 -0
- package/lib/module/api/Database.js +0 -92
- package/lib/module/api/Database.js.map +1 -1
- package/lib/module/classes/CactusLM.js +33 -15
- package/lib/module/classes/CactusLM.js.map +1 -1
- package/lib/module/classes/CactusSTT.js +90 -15
- package/lib/module/classes/CactusSTT.js.map +1 -1
- package/lib/module/hooks/useCactusLM.js +14 -5
- package/lib/module/hooks/useCactusLM.js.map +1 -1
- package/lib/module/hooks/useCactusSTT.js +100 -4
- package/lib/module/hooks/useCactusSTT.js.map +1 -1
- package/lib/module/index.js.map +1 -1
- package/lib/module/models.js +336 -0
- package/lib/module/models.js.map +1 -0
- package/lib/module/native/Cactus.js +37 -0
- package/lib/module/native/Cactus.js.map +1 -1
- package/lib/module/types/CactusLM.js +2 -0
- package/lib/module/types/CactusSTT.js +2 -0
- package/lib/module/types/common.js +2 -0
- package/lib/module/types/{CactusModel.js.map → common.js.map} +1 -1
- package/lib/typescript/src/api/Database.d.ts +0 -6
- package/lib/typescript/src/api/Database.d.ts.map +1 -1
- package/lib/typescript/src/classes/CactusLM.d.ts +7 -3
- package/lib/typescript/src/classes/CactusLM.d.ts.map +1 -1
- package/lib/typescript/src/classes/CactusSTT.d.ts +13 -4
- package/lib/typescript/src/classes/CactusSTT.d.ts.map +1 -1
- package/lib/typescript/src/hooks/useCactusLM.d.ts +2 -2
- package/lib/typescript/src/hooks/useCactusLM.d.ts.map +1 -1
- package/lib/typescript/src/hooks/useCactusSTT.d.ts +12 -4
- package/lib/typescript/src/hooks/useCactusSTT.d.ts.map +1 -1
- package/lib/typescript/src/index.d.ts +2 -3
- package/lib/typescript/src/index.d.ts.map +1 -1
- package/lib/typescript/src/models.d.ts +6 -0
- package/lib/typescript/src/models.d.ts.map +1 -0
- package/lib/typescript/src/native/Cactus.d.ts +6 -1
- package/lib/typescript/src/native/Cactus.d.ts.map +1 -1
- package/lib/typescript/src/specs/Cactus.nitro.d.ts +5 -0
- package/lib/typescript/src/specs/Cactus.nitro.d.ts.map +1 -1
- package/lib/typescript/src/types/CactusLM.d.ts +2 -0
- package/lib/typescript/src/types/CactusLM.d.ts.map +1 -1
- package/lib/typescript/src/types/CactusSTT.d.ts +20 -0
- package/lib/typescript/src/types/CactusSTT.d.ts.map +1 -1
- package/lib/typescript/src/types/common.d.ts +28 -0
- package/lib/typescript/src/types/common.d.ts.map +1 -0
- package/nitrogen/generated/shared/c++/HybridCactusSpec.cpp +5 -0
- package/nitrogen/generated/shared/c++/HybridCactusSpec.hpp +5 -0
- package/package.json +1 -1
- package/src/api/Database.ts +0 -133
- package/src/classes/CactusLM.ts +49 -17
- package/src/classes/CactusSTT.ts +118 -17
- package/src/hooks/useCactusLM.ts +25 -5
- package/src/hooks/useCactusSTT.ts +117 -5
- package/src/index.tsx +6 -2
- package/src/models.ts +344 -0
- package/src/native/Cactus.ts +55 -0
- package/src/specs/Cactus.nitro.ts +5 -0
- package/src/types/CactusLM.ts +3 -0
- package/src/types/CactusSTT.ts +26 -0
- package/src/types/common.ts +28 -0
- package/lib/module/types/CactusModel.js +0 -2
- package/lib/module/types/CactusSTTModel.js +0 -2
- package/lib/module/types/CactusSTTModel.js.map +0 -1
- package/lib/typescript/src/types/CactusModel.d.ts +0 -13
- package/lib/typescript/src/types/CactusModel.d.ts.map +0 -1
- package/lib/typescript/src/types/CactusSTTModel.d.ts +0 -8
- package/lib/typescript/src/types/CactusSTTModel.d.ts.map +0 -1
- package/src/types/CactusModel.ts +0 -15
- package/src/types/CactusSTTModel.ts +0 -10
|
@@ -67,6 +67,30 @@ CACTUS_FFI_EXPORT int cactus_transcribe(
|
|
|
67
67
|
size_t pcm_buffer_size
|
|
68
68
|
);
|
|
69
69
|
|
|
70
|
+
typedef void* cactus_stream_transcribe_t;
|
|
71
|
+
|
|
72
|
+
CACTUS_FFI_EXPORT cactus_stream_transcribe_t cactus_stream_transcribe_init(cactus_model_t model);
|
|
73
|
+
|
|
74
|
+
CACTUS_FFI_EXPORT int cactus_stream_transcribe_insert(
|
|
75
|
+
cactus_stream_transcribe_t stream,
|
|
76
|
+
const uint8_t* pcm_buffer,
|
|
77
|
+
size_t pcm_buffer_size
|
|
78
|
+
);
|
|
79
|
+
|
|
80
|
+
CACTUS_FFI_EXPORT int cactus_stream_transcribe_process(
|
|
81
|
+
cactus_stream_transcribe_t stream,
|
|
82
|
+
char* response_buffer,
|
|
83
|
+
size_t buffer_size,
|
|
84
|
+
const char* options_json
|
|
85
|
+
);
|
|
86
|
+
|
|
87
|
+
CACTUS_FFI_EXPORT int cactus_stream_transcribe_finalize(
|
|
88
|
+
cactus_stream_transcribe_t stream,
|
|
89
|
+
char* response_buffer,
|
|
90
|
+
size_t buffer_size
|
|
91
|
+
);
|
|
92
|
+
|
|
93
|
+
CACTUS_FFI_EXPORT void cactus_stream_transcribe_destroy(cactus_stream_transcribe_t stream);
|
|
70
94
|
|
|
71
95
|
CACTUS_FFI_EXPORT int cactus_embed(
|
|
72
96
|
cactus_model_t model,
|
|
@@ -63,6 +63,14 @@ struct ToolFunction {
|
|
|
63
63
|
std::unordered_map<std::string, std::string> parameters;
|
|
64
64
|
};
|
|
65
65
|
|
|
66
|
+
} // namespace ffi
|
|
67
|
+
} // namespace cactus
|
|
68
|
+
|
|
69
|
+
#include "gemma_tools.h"
|
|
70
|
+
|
|
71
|
+
namespace cactus {
|
|
72
|
+
namespace ffi {
|
|
73
|
+
|
|
66
74
|
inline void handle_error_response(const std::string& error_message, char* response_buffer, size_t buffer_size) {
|
|
67
75
|
std::string sanitized_msg = error_message;
|
|
68
76
|
for (auto& c : sanitized_msg) {
|
|
@@ -303,11 +311,43 @@ inline void parse_function_calls_from_response(const std::string& response_text,
|
|
|
303
311
|
regular_response = response_text;
|
|
304
312
|
function_calls.clear();
|
|
305
313
|
|
|
314
|
+
gemma::parse_function_calls(regular_response, function_calls);
|
|
315
|
+
|
|
316
|
+
// Parse Qwen-style function calls: <tool_call>{"name": "...", "arguments": {...}}</tool_call>
|
|
317
|
+
const std::string QWEN_TOOL_START = "<tool_call>";
|
|
318
|
+
const std::string QWEN_TOOL_END = "</tool_call>";
|
|
319
|
+
size_t qwen_start_pos = 0;
|
|
320
|
+
|
|
321
|
+
while ((qwen_start_pos = regular_response.find(QWEN_TOOL_START, qwen_start_pos)) != std::string::npos) {
|
|
322
|
+
size_t content_start = qwen_start_pos + QWEN_TOOL_START.length();
|
|
323
|
+
size_t qwen_end_pos = regular_response.find(QWEN_TOOL_END, content_start);
|
|
324
|
+
|
|
325
|
+
if (qwen_end_pos != std::string::npos) {
|
|
326
|
+
std::string json_content = regular_response.substr(content_start, qwen_end_pos - content_start);
|
|
327
|
+
|
|
328
|
+
size_t first = json_content.find_first_not_of(" \t\n\r");
|
|
329
|
+
size_t last = json_content.find_last_not_of(" \t\n\r");
|
|
330
|
+
if (first != std::string::npos && last != std::string::npos) {
|
|
331
|
+
json_content = json_content.substr(first, last - first + 1);
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
if (json_content.size() > 2 && json_content[0] == '{' &&
|
|
335
|
+
json_content.find("\"name\"") != std::string::npos) {
|
|
336
|
+
function_calls.push_back(json_content);
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
regular_response.erase(qwen_start_pos, qwen_end_pos + QWEN_TOOL_END.length() - qwen_start_pos);
|
|
340
|
+
} else {
|
|
341
|
+
break;
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
// Parse LFM2-style function calls: <|tool_call_start|>[name(args)]<|tool_call_end|>
|
|
306
346
|
const std::string TOOL_CALL_START = "<|tool_call_start|>";
|
|
307
347
|
const std::string TOOL_CALL_END = "<|tool_call_end|>";
|
|
308
348
|
size_t tool_start_pos = 0;
|
|
309
349
|
|
|
310
|
-
while ((tool_start_pos =
|
|
350
|
+
while ((tool_start_pos = regular_response.find(TOOL_CALL_START, tool_start_pos)) != std::string::npos) {
|
|
311
351
|
size_t content_start = tool_start_pos + TOOL_CALL_START.length();
|
|
312
352
|
size_t tool_end_pos = response_text.find(TOOL_CALL_END, content_start);
|
|
313
353
|
|
|
@@ -131,9 +131,12 @@ struct MergeRule {
|
|
|
131
131
|
struct ChatMessage {
|
|
132
132
|
std::string role;
|
|
133
133
|
std::string content;
|
|
134
|
+
std::string name;
|
|
134
135
|
std::vector<std::string> images;
|
|
135
136
|
};
|
|
136
137
|
|
|
138
|
+
|
|
139
|
+
|
|
137
140
|
class Tokenizer {
|
|
138
141
|
public:
|
|
139
142
|
virtual ~Tokenizer() = default;
|
|
@@ -329,6 +332,8 @@ struct KVCache {
|
|
|
329
332
|
struct LayerCache {
|
|
330
333
|
std::vector<uint8_t> keys;
|
|
331
334
|
std::vector<uint8_t> values;
|
|
335
|
+
std::vector<float> key_scales;
|
|
336
|
+
std::vector<float> value_scales;
|
|
332
337
|
};
|
|
333
338
|
|
|
334
339
|
std::vector<LayerCache> layer_caches;
|
|
@@ -354,13 +359,11 @@ struct KVCache {
|
|
|
354
359
|
const std::vector<size_t>& v_nodes, size_t seq_len,
|
|
355
360
|
size_t num_layers, size_t kv_heads, size_t head_dim);
|
|
356
361
|
|
|
357
|
-
// Update KV cache from NPU prefill outputs
|
|
358
|
-
// NPU outputs are in shape [num_tokens, num_kv_heads, head_dim]
|
|
359
|
-
// This handles transposition to cache format and sliding window
|
|
360
362
|
void update_from_npu(size_t layer_idx, const __fp16* k_data, const __fp16* v_data,
|
|
361
363
|
size_t num_tokens, size_t kv_heads, size_t head_dim);
|
|
362
364
|
|
|
363
365
|
bool is_empty() const { return current_seq_len == 0; }
|
|
366
|
+
bool is_int8() const { return precision == Precision::INT8; }
|
|
364
367
|
void* get_key_ptr(size_t layer);
|
|
365
368
|
void* get_value_ptr(size_t layer);
|
|
366
369
|
|
|
@@ -374,33 +377,44 @@ struct KVCache {
|
|
|
374
377
|
|
|
375
378
|
CircularView get_key_view(size_t layer);
|
|
376
379
|
CircularView get_value_view(size_t layer);
|
|
380
|
+
|
|
381
|
+
const int8_t* get_keys_int8(size_t layer) const;
|
|
382
|
+
const int8_t* get_values_int8(size_t layer) const;
|
|
383
|
+
const float* get_key_scales(size_t layer) const;
|
|
384
|
+
const float* get_value_scales(size_t layer) const;
|
|
377
385
|
};
|
|
378
386
|
|
|
379
387
|
class ToolCallConstrainer {
|
|
380
388
|
public:
|
|
381
389
|
enum class State {
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
390
|
+
DONE,
|
|
391
|
+
|
|
392
|
+
QWEN_START,
|
|
393
|
+
QWEN_EXPECT_OPEN_BRACE,
|
|
394
|
+
QWEN_EXPECT_NAME_KEY,
|
|
395
|
+
QWEN_EXPECT_NAME_COLON,
|
|
396
|
+
QWEN_EXPECT_NAME_VALUE,
|
|
397
|
+
QWEN_EXPECT_COMMA,
|
|
398
|
+
QWEN_EXPECT_ARGS_KEY,
|
|
399
|
+
QWEN_EXPECT_ARGS_COLON,
|
|
400
|
+
QWEN_IN_ARGUMENTS,
|
|
401
|
+
QWEN_EXPECT_CLOSE_BRACE,
|
|
402
|
+
QWEN_EXPECT_END,
|
|
403
|
+
|
|
404
|
+
LFM_START,
|
|
405
|
+
LFM_EXPECT_BRACKET,
|
|
406
|
+
LFM_IN_FUNC_NAME,
|
|
407
|
+
LFM_EXPECT_PAREN,
|
|
408
|
+
LFM_IN_ARGUMENTS,
|
|
409
|
+
LFM_EXPECT_BRACKET_CLOSE,
|
|
410
|
+
LFM_EXPECT_END,
|
|
411
|
+
|
|
412
|
+
GEMMA_START,
|
|
413
|
+
GEMMA_EXPECT_CALL,
|
|
414
|
+
GEMMA_IN_FUNC_NAME,
|
|
415
|
+
GEMMA_EXPECT_BRACE,
|
|
416
|
+
GEMMA_IN_ARGUMENTS,
|
|
417
|
+
GEMMA_EXPECT_END
|
|
404
418
|
};
|
|
405
419
|
|
|
406
420
|
void init(Config::ModelType model_type,
|
|
@@ -417,36 +431,40 @@ public:
|
|
|
417
431
|
|
|
418
432
|
private:
|
|
419
433
|
bool active_ = false;
|
|
420
|
-
State state_ = State::
|
|
434
|
+
State state_ = State::QWEN_START;
|
|
421
435
|
Config::ModelType model_type_ = Config::ModelType::QWEN;
|
|
422
436
|
Tokenizer* tokenizer_ = nullptr;
|
|
423
437
|
|
|
424
438
|
std::vector<std::string> function_names_;
|
|
425
439
|
std::string generated_text_;
|
|
426
|
-
int brace_depth_ = 0;
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
std::unordered_set<uint32_t>
|
|
430
|
-
std::unordered_set<uint32_t>
|
|
431
|
-
std::unordered_set<uint32_t>
|
|
432
|
-
std::unordered_set<uint32_t>
|
|
433
|
-
std::unordered_set<uint32_t>
|
|
434
|
-
std::unordered_set<uint32_t> name_key_tokens_;
|
|
435
|
-
std::unordered_set<uint32_t> args_key_tokens_;
|
|
436
|
-
std::unordered_set<uint32_t> quote_tokens_;
|
|
437
|
-
std::unordered_set<uint32_t> backtick_tokens_;
|
|
438
|
-
std::unordered_set<uint32_t>
|
|
439
|
-
std::
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
// LFM2-specific tokens
|
|
440
|
+
int brace_depth_ = 0;
|
|
441
|
+
|
|
442
|
+
std::unordered_set<uint32_t> qwen_tool_call_start_tokens_;
|
|
443
|
+
std::unordered_set<uint32_t> qwen_tool_call_end_tokens_;
|
|
444
|
+
std::unordered_set<uint32_t> open_brace_tokens_;
|
|
445
|
+
std::unordered_set<uint32_t> close_brace_tokens_;
|
|
446
|
+
std::unordered_set<uint32_t> colon_tokens_;
|
|
447
|
+
std::unordered_set<uint32_t> comma_tokens_;
|
|
448
|
+
std::unordered_set<uint32_t> name_key_tokens_;
|
|
449
|
+
std::unordered_set<uint32_t> args_key_tokens_;
|
|
450
|
+
std::unordered_set<uint32_t> quote_tokens_;
|
|
451
|
+
std::unordered_set<uint32_t> backtick_tokens_;
|
|
452
|
+
std::unordered_set<uint32_t> all_func_name_tokens_;
|
|
453
|
+
std::unordered_map<std::string, std::vector<uint32_t>> func_name_sequences_;
|
|
454
|
+
|
|
443
455
|
std::unordered_set<uint32_t> tool_start_tokens_;
|
|
444
456
|
std::unordered_set<uint32_t> tool_end_tokens_;
|
|
445
|
-
std::unordered_set<uint32_t> bracket_open_tokens_;
|
|
446
|
-
std::unordered_set<uint32_t> bracket_close_tokens_;
|
|
447
|
-
std::unordered_set<uint32_t> paren_open_tokens_;
|
|
448
|
-
std::unordered_set<uint32_t> paren_close_tokens_;
|
|
449
|
-
std::unordered_set<uint32_t> equals_tokens_;
|
|
457
|
+
std::unordered_set<uint32_t> bracket_open_tokens_;
|
|
458
|
+
std::unordered_set<uint32_t> bracket_close_tokens_;
|
|
459
|
+
std::unordered_set<uint32_t> paren_open_tokens_;
|
|
460
|
+
std::unordered_set<uint32_t> paren_close_tokens_;
|
|
461
|
+
std::unordered_set<uint32_t> equals_tokens_;
|
|
462
|
+
|
|
463
|
+
std::unordered_set<uint32_t> gemma_call_start_tokens_;
|
|
464
|
+
std::unordered_set<uint32_t> gemma_call_end_tokens_;
|
|
465
|
+
std::unordered_set<uint32_t> gemma_response_start_tokens_;
|
|
466
|
+
std::unordered_set<uint32_t> gemma_call_prefix_tokens_;
|
|
467
|
+
std::unordered_set<uint32_t> escape_tokens_;
|
|
450
468
|
|
|
451
469
|
std::unordered_map<uint32_t, float> current_bias_;
|
|
452
470
|
|