cactus-react-native 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/README.md +212 -27
  2. package/android/src/main/jniLibs/arm64-v8a/libcactus.a +0 -0
  3. package/cpp/HybridCactus.cpp +119 -0
  4. package/cpp/HybridCactus.hpp +13 -0
  5. package/cpp/cactus_ffi.h +24 -0
  6. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_ffi.h +24 -0
  7. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_utils.h +41 -1
  8. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/engine.h +66 -48
  9. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/gemma_tools.h +549 -0
  10. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/graph.h +102 -21
  11. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel.h +45 -195
  12. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel_utils.h +399 -140
  13. package/ios/cactus.xcframework/ios-arm64/cactus.framework/cactus +0 -0
  14. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_ffi.h +24 -0
  15. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_utils.h +41 -1
  16. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/engine.h +66 -48
  17. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/gemma_tools.h +549 -0
  18. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/graph.h +102 -21
  19. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/kernel.h +45 -195
  20. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/kernel_utils.h +399 -140
  21. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/cactus +0 -0
  22. package/lib/module/api/Database.js +0 -92
  23. package/lib/module/api/Database.js.map +1 -1
  24. package/lib/module/classes/CactusLM.js +33 -15
  25. package/lib/module/classes/CactusLM.js.map +1 -1
  26. package/lib/module/classes/CactusSTT.js +90 -15
  27. package/lib/module/classes/CactusSTT.js.map +1 -1
  28. package/lib/module/hooks/useCactusLM.js +14 -5
  29. package/lib/module/hooks/useCactusLM.js.map +1 -1
  30. package/lib/module/hooks/useCactusSTT.js +100 -4
  31. package/lib/module/hooks/useCactusSTT.js.map +1 -1
  32. package/lib/module/index.js.map +1 -1
  33. package/lib/module/models.js +336 -0
  34. package/lib/module/models.js.map +1 -0
  35. package/lib/module/native/Cactus.js +37 -0
  36. package/lib/module/native/Cactus.js.map +1 -1
  37. package/lib/module/types/CactusLM.js +2 -0
  38. package/lib/module/types/CactusSTT.js +2 -0
  39. package/lib/module/types/common.js +2 -0
  40. package/lib/module/types/{CactusModel.js.map → common.js.map} +1 -1
  41. package/lib/typescript/src/api/Database.d.ts +0 -6
  42. package/lib/typescript/src/api/Database.d.ts.map +1 -1
  43. package/lib/typescript/src/classes/CactusLM.d.ts +7 -3
  44. package/lib/typescript/src/classes/CactusLM.d.ts.map +1 -1
  45. package/lib/typescript/src/classes/CactusSTT.d.ts +13 -4
  46. package/lib/typescript/src/classes/CactusSTT.d.ts.map +1 -1
  47. package/lib/typescript/src/hooks/useCactusLM.d.ts +2 -2
  48. package/lib/typescript/src/hooks/useCactusLM.d.ts.map +1 -1
  49. package/lib/typescript/src/hooks/useCactusSTT.d.ts +12 -4
  50. package/lib/typescript/src/hooks/useCactusSTT.d.ts.map +1 -1
  51. package/lib/typescript/src/index.d.ts +2 -3
  52. package/lib/typescript/src/index.d.ts.map +1 -1
  53. package/lib/typescript/src/models.d.ts +6 -0
  54. package/lib/typescript/src/models.d.ts.map +1 -0
  55. package/lib/typescript/src/native/Cactus.d.ts +6 -1
  56. package/lib/typescript/src/native/Cactus.d.ts.map +1 -1
  57. package/lib/typescript/src/specs/Cactus.nitro.d.ts +5 -0
  58. package/lib/typescript/src/specs/Cactus.nitro.d.ts.map +1 -1
  59. package/lib/typescript/src/types/CactusLM.d.ts +2 -0
  60. package/lib/typescript/src/types/CactusLM.d.ts.map +1 -1
  61. package/lib/typescript/src/types/CactusSTT.d.ts +20 -0
  62. package/lib/typescript/src/types/CactusSTT.d.ts.map +1 -1
  63. package/lib/typescript/src/types/common.d.ts +28 -0
  64. package/lib/typescript/src/types/common.d.ts.map +1 -0
  65. package/nitrogen/generated/shared/c++/HybridCactusSpec.cpp +5 -0
  66. package/nitrogen/generated/shared/c++/HybridCactusSpec.hpp +5 -0
  67. package/package.json +1 -1
  68. package/src/api/Database.ts +0 -133
  69. package/src/classes/CactusLM.ts +49 -17
  70. package/src/classes/CactusSTT.ts +118 -17
  71. package/src/hooks/useCactusLM.ts +25 -5
  72. package/src/hooks/useCactusSTT.ts +117 -5
  73. package/src/index.tsx +6 -2
  74. package/src/models.ts +344 -0
  75. package/src/native/Cactus.ts +55 -0
  76. package/src/specs/Cactus.nitro.ts +5 -0
  77. package/src/types/CactusLM.ts +3 -0
  78. package/src/types/CactusSTT.ts +26 -0
  79. package/src/types/common.ts +28 -0
  80. package/lib/module/types/CactusModel.js +0 -2
  81. package/lib/module/types/CactusSTTModel.js +0 -2
  82. package/lib/module/types/CactusSTTModel.js.map +0 -1
  83. package/lib/typescript/src/types/CactusModel.d.ts +0 -13
  84. package/lib/typescript/src/types/CactusModel.d.ts.map +0 -1
  85. package/lib/typescript/src/types/CactusSTTModel.d.ts +0 -8
  86. package/lib/typescript/src/types/CactusSTTModel.d.ts.map +0 -1
  87. package/src/types/CactusModel.ts +0 -15
  88. package/src/types/CactusSTTModel.ts +0 -10
@@ -131,9 +131,12 @@ struct MergeRule {
131
131
  struct ChatMessage {
132
132
  std::string role;
133
133
  std::string content;
134
+ std::string name;
134
135
  std::vector<std::string> images;
135
136
  };
136
137
 
138
+
139
+
137
140
  class Tokenizer {
138
141
  public:
139
142
  virtual ~Tokenizer() = default;
@@ -329,6 +332,8 @@ struct KVCache {
329
332
  struct LayerCache {
330
333
  std::vector<uint8_t> keys;
331
334
  std::vector<uint8_t> values;
335
+ std::vector<float> key_scales;
336
+ std::vector<float> value_scales;
332
337
  };
333
338
 
334
339
  std::vector<LayerCache> layer_caches;
@@ -354,13 +359,11 @@ struct KVCache {
354
359
  const std::vector<size_t>& v_nodes, size_t seq_len,
355
360
  size_t num_layers, size_t kv_heads, size_t head_dim);
356
361
 
357
- // Update KV cache from NPU prefill outputs
358
- // NPU outputs are in shape [num_tokens, num_kv_heads, head_dim]
359
- // This handles transposition to cache format and sliding window
360
362
  void update_from_npu(size_t layer_idx, const __fp16* k_data, const __fp16* v_data,
361
363
  size_t num_tokens, size_t kv_heads, size_t head_dim);
362
364
 
363
365
  bool is_empty() const { return current_seq_len == 0; }
366
+ bool is_int8() const { return precision == Precision::INT8; }
364
367
  void* get_key_ptr(size_t layer);
365
368
  void* get_value_ptr(size_t layer);
366
369
 
@@ -374,33 +377,44 @@ struct KVCache {
374
377
 
375
378
  CircularView get_key_view(size_t layer);
376
379
  CircularView get_value_view(size_t layer);
380
+
381
+ const int8_t* get_keys_int8(size_t layer) const;
382
+ const int8_t* get_values_int8(size_t layer) const;
383
+ const float* get_key_scales(size_t layer) const;
384
+ const float* get_value_scales(size_t layer) const;
377
385
  };
378
386
 
379
387
  class ToolCallConstrainer {
380
388
  public:
381
389
  enum class State {
382
- START, // -> expect {
383
- EXPECT_FC_KEY, // -> expect "function_call"
384
- EXPECT_FC_COLON, // -> expect :
385
- EXPECT_FC_OPEN_BRACE, // -> expect {
386
- EXPECT_NAME_KEY, // -> expect "name"
387
- EXPECT_NAME_COLON, // -> expect :
388
- EXPECT_NAME_VALUE, // -> expect "<function_name>"
389
- EXPECT_COMMA, // -> expect ,
390
- EXPECT_ARGS_KEY, // -> expect "arguments"
391
- EXPECT_ARGS_COLON, // -> expect :
392
- IN_ARGUMENTS, // -> free JSON, track brace depth
393
- EXPECT_INNER_CLOSE, // -> expect } to close inner object
394
- EXPECT_OUTER_CLOSE, // -> expect } to close outer object
395
- DONE, // complete
396
-
397
- LFM_START, // -> expect <|tool_call_start|>
398
- LFM_EXPECT_BRACKET, // -> expect [
399
- LFM_IN_FUNC_NAME, // -> expect function name
400
- LFM_EXPECT_PAREN, // -> expect (
401
- LFM_IN_ARGUMENTS, // -> arguments until )
402
- LFM_EXPECT_BRACKET_CLOSE, // -> expect ]
403
- LFM_EXPECT_END // -> expect <|tool_call_end|>
390
+ DONE,
391
+
392
+ QWEN_START,
393
+ QWEN_EXPECT_OPEN_BRACE,
394
+ QWEN_EXPECT_NAME_KEY,
395
+ QWEN_EXPECT_NAME_COLON,
396
+ QWEN_EXPECT_NAME_VALUE,
397
+ QWEN_EXPECT_COMMA,
398
+ QWEN_EXPECT_ARGS_KEY,
399
+ QWEN_EXPECT_ARGS_COLON,
400
+ QWEN_IN_ARGUMENTS,
401
+ QWEN_EXPECT_CLOSE_BRACE,
402
+ QWEN_EXPECT_END,
403
+
404
+ LFM_START,
405
+ LFM_EXPECT_BRACKET,
406
+ LFM_IN_FUNC_NAME,
407
+ LFM_EXPECT_PAREN,
408
+ LFM_IN_ARGUMENTS,
409
+ LFM_EXPECT_BRACKET_CLOSE,
410
+ LFM_EXPECT_END,
411
+
412
+ GEMMA_START,
413
+ GEMMA_EXPECT_CALL,
414
+ GEMMA_IN_FUNC_NAME,
415
+ GEMMA_EXPECT_BRACE,
416
+ GEMMA_IN_ARGUMENTS,
417
+ GEMMA_EXPECT_END
404
418
  };
405
419
 
406
420
  void init(Config::ModelType model_type,
@@ -417,36 +431,40 @@ public:
417
431
 
418
432
  private:
419
433
  bool active_ = false;
420
- State state_ = State::START;
434
+ State state_ = State::QWEN_START;
421
435
  Config::ModelType model_type_ = Config::ModelType::QWEN;
422
436
  Tokenizer* tokenizer_ = nullptr;
423
437
 
424
438
  std::vector<std::string> function_names_;
425
439
  std::string generated_text_;
426
- int brace_depth_ = 0; // Track nested braces in arguments
427
-
428
- // Pre-tokenized token sets for each grammar element
429
- std::unordered_set<uint32_t> open_brace_tokens_; // {
430
- std::unordered_set<uint32_t> close_brace_tokens_; // }
431
- std::unordered_set<uint32_t> colon_tokens_; // :
432
- std::unordered_set<uint32_t> comma_tokens_; // ,
433
- std::unordered_set<uint32_t> fc_key_tokens_; // "function_call"
434
- std::unordered_set<uint32_t> name_key_tokens_; // "name"
435
- std::unordered_set<uint32_t> args_key_tokens_; // "arguments"
436
- std::unordered_set<uint32_t> quote_tokens_; // "
437
- std::unordered_set<uint32_t> backtick_tokens_; // ` (to block markdown code fences)
438
- std::unordered_set<uint32_t> response_starter_tokens_; // Common response starters to block (I, I'm, Sorry, etc.)
439
- std::unordered_set<uint32_t> all_func_name_tokens_; // All function name tokens combined
440
- std::unordered_map<std::string, std::vector<uint32_t>> func_name_sequences_; // Full token sequence per function
441
-
442
- // LFM2-specific tokens
440
+ int brace_depth_ = 0;
441
+
442
+ std::unordered_set<uint32_t> qwen_tool_call_start_tokens_;
443
+ std::unordered_set<uint32_t> qwen_tool_call_end_tokens_;
444
+ std::unordered_set<uint32_t> open_brace_tokens_;
445
+ std::unordered_set<uint32_t> close_brace_tokens_;
446
+ std::unordered_set<uint32_t> colon_tokens_;
447
+ std::unordered_set<uint32_t> comma_tokens_;
448
+ std::unordered_set<uint32_t> name_key_tokens_;
449
+ std::unordered_set<uint32_t> args_key_tokens_;
450
+ std::unordered_set<uint32_t> quote_tokens_;
451
+ std::unordered_set<uint32_t> backtick_tokens_;
452
+ std::unordered_set<uint32_t> all_func_name_tokens_;
453
+ std::unordered_map<std::string, std::vector<uint32_t>> func_name_sequences_;
454
+
443
455
  std::unordered_set<uint32_t> tool_start_tokens_;
444
456
  std::unordered_set<uint32_t> tool_end_tokens_;
445
- std::unordered_set<uint32_t> bracket_open_tokens_; // [
446
- std::unordered_set<uint32_t> bracket_close_tokens_; // ]
447
- std::unordered_set<uint32_t> paren_open_tokens_; // (
448
- std::unordered_set<uint32_t> paren_close_tokens_; // )
449
- std::unordered_set<uint32_t> equals_tokens_; // =
457
+ std::unordered_set<uint32_t> bracket_open_tokens_;
458
+ std::unordered_set<uint32_t> bracket_close_tokens_;
459
+ std::unordered_set<uint32_t> paren_open_tokens_;
460
+ std::unordered_set<uint32_t> paren_close_tokens_;
461
+ std::unordered_set<uint32_t> equals_tokens_;
462
+
463
+ std::unordered_set<uint32_t> gemma_call_start_tokens_;
464
+ std::unordered_set<uint32_t> gemma_call_end_tokens_;
465
+ std::unordered_set<uint32_t> gemma_response_start_tokens_;
466
+ std::unordered_set<uint32_t> gemma_call_prefix_tokens_;
467
+ std::unordered_set<uint32_t> escape_tokens_;
450
468
 
451
469
  std::unordered_map<uint32_t, float> current_bias_;
452
470