cactus-react-native 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +212 -27
- package/android/src/main/jniLibs/arm64-v8a/libcactus.a +0 -0
- package/cpp/HybridCactus.cpp +119 -0
- package/cpp/HybridCactus.hpp +13 -0
- package/cpp/cactus_ffi.h +24 -0
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_ffi.h +24 -0
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_utils.h +41 -1
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/engine.h +66 -48
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/gemma_tools.h +549 -0
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/graph.h +102 -21
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel.h +45 -195
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel_utils.h +399 -140
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/cactus +0 -0
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_ffi.h +24 -0
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_utils.h +41 -1
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/engine.h +66 -48
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/gemma_tools.h +549 -0
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/graph.h +102 -21
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/kernel.h +45 -195
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/kernel_utils.h +399 -140
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/cactus +0 -0
- package/lib/module/api/Database.js +0 -92
- package/lib/module/api/Database.js.map +1 -1
- package/lib/module/classes/CactusLM.js +33 -15
- package/lib/module/classes/CactusLM.js.map +1 -1
- package/lib/module/classes/CactusSTT.js +90 -15
- package/lib/module/classes/CactusSTT.js.map +1 -1
- package/lib/module/hooks/useCactusLM.js +14 -5
- package/lib/module/hooks/useCactusLM.js.map +1 -1
- package/lib/module/hooks/useCactusSTT.js +100 -4
- package/lib/module/hooks/useCactusSTT.js.map +1 -1
- package/lib/module/index.js.map +1 -1
- package/lib/module/models.js +336 -0
- package/lib/module/models.js.map +1 -0
- package/lib/module/native/Cactus.js +37 -0
- package/lib/module/native/Cactus.js.map +1 -1
- package/lib/module/types/CactusLM.js +2 -0
- package/lib/module/types/CactusSTT.js +2 -0
- package/lib/module/types/common.js +2 -0
- package/lib/module/types/{CactusModel.js.map → common.js.map} +1 -1
- package/lib/typescript/src/api/Database.d.ts +0 -6
- package/lib/typescript/src/api/Database.d.ts.map +1 -1
- package/lib/typescript/src/classes/CactusLM.d.ts +7 -3
- package/lib/typescript/src/classes/CactusLM.d.ts.map +1 -1
- package/lib/typescript/src/classes/CactusSTT.d.ts +13 -4
- package/lib/typescript/src/classes/CactusSTT.d.ts.map +1 -1
- package/lib/typescript/src/hooks/useCactusLM.d.ts +2 -2
- package/lib/typescript/src/hooks/useCactusLM.d.ts.map +1 -1
- package/lib/typescript/src/hooks/useCactusSTT.d.ts +12 -4
- package/lib/typescript/src/hooks/useCactusSTT.d.ts.map +1 -1
- package/lib/typescript/src/index.d.ts +2 -3
- package/lib/typescript/src/index.d.ts.map +1 -1
- package/lib/typescript/src/models.d.ts +6 -0
- package/lib/typescript/src/models.d.ts.map +1 -0
- package/lib/typescript/src/native/Cactus.d.ts +6 -1
- package/lib/typescript/src/native/Cactus.d.ts.map +1 -1
- package/lib/typescript/src/specs/Cactus.nitro.d.ts +5 -0
- package/lib/typescript/src/specs/Cactus.nitro.d.ts.map +1 -1
- package/lib/typescript/src/types/CactusLM.d.ts +2 -0
- package/lib/typescript/src/types/CactusLM.d.ts.map +1 -1
- package/lib/typescript/src/types/CactusSTT.d.ts +20 -0
- package/lib/typescript/src/types/CactusSTT.d.ts.map +1 -1
- package/lib/typescript/src/types/common.d.ts +28 -0
- package/lib/typescript/src/types/common.d.ts.map +1 -0
- package/nitrogen/generated/shared/c++/HybridCactusSpec.cpp +5 -0
- package/nitrogen/generated/shared/c++/HybridCactusSpec.hpp +5 -0
- package/package.json +1 -1
- package/src/api/Database.ts +0 -133
- package/src/classes/CactusLM.ts +49 -17
- package/src/classes/CactusSTT.ts +118 -17
- package/src/hooks/useCactusLM.ts +25 -5
- package/src/hooks/useCactusSTT.ts +117 -5
- package/src/index.tsx +6 -2
- package/src/models.ts +344 -0
- package/src/native/Cactus.ts +55 -0
- package/src/specs/Cactus.nitro.ts +5 -0
- package/src/types/CactusLM.ts +3 -0
- package/src/types/CactusSTT.ts +26 -0
- package/src/types/common.ts +28 -0
- package/lib/module/types/CactusModel.js +0 -2
- package/lib/module/types/CactusSTTModel.js +0 -2
- package/lib/module/types/CactusSTTModel.js.map +0 -1
- package/lib/typescript/src/types/CactusModel.d.ts +0 -13
- package/lib/typescript/src/types/CactusModel.d.ts.map +0 -1
- package/lib/typescript/src/types/CactusSTTModel.d.ts +0 -8
- package/lib/typescript/src/types/CactusSTTModel.d.ts.map +0 -1
- package/src/types/CactusModel.ts +0 -15
- package/src/types/CactusSTTModel.ts +0 -10
|
@@ -131,9 +131,12 @@ struct MergeRule {
|
|
|
131
131
|
struct ChatMessage {
|
|
132
132
|
std::string role;
|
|
133
133
|
std::string content;
|
|
134
|
+
std::string name;
|
|
134
135
|
std::vector<std::string> images;
|
|
135
136
|
};
|
|
136
137
|
|
|
138
|
+
|
|
139
|
+
|
|
137
140
|
class Tokenizer {
|
|
138
141
|
public:
|
|
139
142
|
virtual ~Tokenizer() = default;
|
|
@@ -329,6 +332,8 @@ struct KVCache {
|
|
|
329
332
|
struct LayerCache {
|
|
330
333
|
std::vector<uint8_t> keys;
|
|
331
334
|
std::vector<uint8_t> values;
|
|
335
|
+
std::vector<float> key_scales;
|
|
336
|
+
std::vector<float> value_scales;
|
|
332
337
|
};
|
|
333
338
|
|
|
334
339
|
std::vector<LayerCache> layer_caches;
|
|
@@ -354,13 +359,11 @@ struct KVCache {
|
|
|
354
359
|
const std::vector<size_t>& v_nodes, size_t seq_len,
|
|
355
360
|
size_t num_layers, size_t kv_heads, size_t head_dim);
|
|
356
361
|
|
|
357
|
-
// Update KV cache from NPU prefill outputs
|
|
358
|
-
// NPU outputs are in shape [num_tokens, num_kv_heads, head_dim]
|
|
359
|
-
// This handles transposition to cache format and sliding window
|
|
360
362
|
void update_from_npu(size_t layer_idx, const __fp16* k_data, const __fp16* v_data,
|
|
361
363
|
size_t num_tokens, size_t kv_heads, size_t head_dim);
|
|
362
364
|
|
|
363
365
|
bool is_empty() const { return current_seq_len == 0; }
|
|
366
|
+
bool is_int8() const { return precision == Precision::INT8; }
|
|
364
367
|
void* get_key_ptr(size_t layer);
|
|
365
368
|
void* get_value_ptr(size_t layer);
|
|
366
369
|
|
|
@@ -374,33 +377,44 @@ struct KVCache {
|
|
|
374
377
|
|
|
375
378
|
CircularView get_key_view(size_t layer);
|
|
376
379
|
CircularView get_value_view(size_t layer);
|
|
380
|
+
|
|
381
|
+
const int8_t* get_keys_int8(size_t layer) const;
|
|
382
|
+
const int8_t* get_values_int8(size_t layer) const;
|
|
383
|
+
const float* get_key_scales(size_t layer) const;
|
|
384
|
+
const float* get_value_scales(size_t layer) const;
|
|
377
385
|
};
|
|
378
386
|
|
|
379
387
|
class ToolCallConstrainer {
|
|
380
388
|
public:
|
|
381
389
|
enum class State {
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
390
|
+
DONE,
|
|
391
|
+
|
|
392
|
+
QWEN_START,
|
|
393
|
+
QWEN_EXPECT_OPEN_BRACE,
|
|
394
|
+
QWEN_EXPECT_NAME_KEY,
|
|
395
|
+
QWEN_EXPECT_NAME_COLON,
|
|
396
|
+
QWEN_EXPECT_NAME_VALUE,
|
|
397
|
+
QWEN_EXPECT_COMMA,
|
|
398
|
+
QWEN_EXPECT_ARGS_KEY,
|
|
399
|
+
QWEN_EXPECT_ARGS_COLON,
|
|
400
|
+
QWEN_IN_ARGUMENTS,
|
|
401
|
+
QWEN_EXPECT_CLOSE_BRACE,
|
|
402
|
+
QWEN_EXPECT_END,
|
|
403
|
+
|
|
404
|
+
LFM_START,
|
|
405
|
+
LFM_EXPECT_BRACKET,
|
|
406
|
+
LFM_IN_FUNC_NAME,
|
|
407
|
+
LFM_EXPECT_PAREN,
|
|
408
|
+
LFM_IN_ARGUMENTS,
|
|
409
|
+
LFM_EXPECT_BRACKET_CLOSE,
|
|
410
|
+
LFM_EXPECT_END,
|
|
411
|
+
|
|
412
|
+
GEMMA_START,
|
|
413
|
+
GEMMA_EXPECT_CALL,
|
|
414
|
+
GEMMA_IN_FUNC_NAME,
|
|
415
|
+
GEMMA_EXPECT_BRACE,
|
|
416
|
+
GEMMA_IN_ARGUMENTS,
|
|
417
|
+
GEMMA_EXPECT_END
|
|
404
418
|
};
|
|
405
419
|
|
|
406
420
|
void init(Config::ModelType model_type,
|
|
@@ -417,36 +431,40 @@ public:
|
|
|
417
431
|
|
|
418
432
|
private:
|
|
419
433
|
bool active_ = false;
|
|
420
|
-
State state_ = State::
|
|
434
|
+
State state_ = State::QWEN_START;
|
|
421
435
|
Config::ModelType model_type_ = Config::ModelType::QWEN;
|
|
422
436
|
Tokenizer* tokenizer_ = nullptr;
|
|
423
437
|
|
|
424
438
|
std::vector<std::string> function_names_;
|
|
425
439
|
std::string generated_text_;
|
|
426
|
-
int brace_depth_ = 0;
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
std::unordered_set<uint32_t>
|
|
430
|
-
std::unordered_set<uint32_t>
|
|
431
|
-
std::unordered_set<uint32_t>
|
|
432
|
-
std::unordered_set<uint32_t>
|
|
433
|
-
std::unordered_set<uint32_t>
|
|
434
|
-
std::unordered_set<uint32_t> name_key_tokens_;
|
|
435
|
-
std::unordered_set<uint32_t> args_key_tokens_;
|
|
436
|
-
std::unordered_set<uint32_t> quote_tokens_;
|
|
437
|
-
std::unordered_set<uint32_t> backtick_tokens_;
|
|
438
|
-
std::unordered_set<uint32_t>
|
|
439
|
-
std::
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
// LFM2-specific tokens
|
|
440
|
+
int brace_depth_ = 0;
|
|
441
|
+
|
|
442
|
+
std::unordered_set<uint32_t> qwen_tool_call_start_tokens_;
|
|
443
|
+
std::unordered_set<uint32_t> qwen_tool_call_end_tokens_;
|
|
444
|
+
std::unordered_set<uint32_t> open_brace_tokens_;
|
|
445
|
+
std::unordered_set<uint32_t> close_brace_tokens_;
|
|
446
|
+
std::unordered_set<uint32_t> colon_tokens_;
|
|
447
|
+
std::unordered_set<uint32_t> comma_tokens_;
|
|
448
|
+
std::unordered_set<uint32_t> name_key_tokens_;
|
|
449
|
+
std::unordered_set<uint32_t> args_key_tokens_;
|
|
450
|
+
std::unordered_set<uint32_t> quote_tokens_;
|
|
451
|
+
std::unordered_set<uint32_t> backtick_tokens_;
|
|
452
|
+
std::unordered_set<uint32_t> all_func_name_tokens_;
|
|
453
|
+
std::unordered_map<std::string, std::vector<uint32_t>> func_name_sequences_;
|
|
454
|
+
|
|
443
455
|
std::unordered_set<uint32_t> tool_start_tokens_;
|
|
444
456
|
std::unordered_set<uint32_t> tool_end_tokens_;
|
|
445
|
-
std::unordered_set<uint32_t> bracket_open_tokens_;
|
|
446
|
-
std::unordered_set<uint32_t> bracket_close_tokens_;
|
|
447
|
-
std::unordered_set<uint32_t> paren_open_tokens_;
|
|
448
|
-
std::unordered_set<uint32_t> paren_close_tokens_;
|
|
449
|
-
std::unordered_set<uint32_t> equals_tokens_;
|
|
457
|
+
std::unordered_set<uint32_t> bracket_open_tokens_;
|
|
458
|
+
std::unordered_set<uint32_t> bracket_close_tokens_;
|
|
459
|
+
std::unordered_set<uint32_t> paren_open_tokens_;
|
|
460
|
+
std::unordered_set<uint32_t> paren_close_tokens_;
|
|
461
|
+
std::unordered_set<uint32_t> equals_tokens_;
|
|
462
|
+
|
|
463
|
+
std::unordered_set<uint32_t> gemma_call_start_tokens_;
|
|
464
|
+
std::unordered_set<uint32_t> gemma_call_end_tokens_;
|
|
465
|
+
std::unordered_set<uint32_t> gemma_response_start_tokens_;
|
|
466
|
+
std::unordered_set<uint32_t> gemma_call_prefix_tokens_;
|
|
467
|
+
std::unordered_set<uint32_t> escape_tokens_;
|
|
450
468
|
|
|
451
469
|
std::unordered_map<uint32_t, float> current_bias_;
|
|
452
470
|
|