cactus-react-native 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/README.md +212 -27
  2. package/android/src/main/jniLibs/arm64-v8a/libcactus.a +0 -0
  3. package/cpp/HybridCactus.cpp +119 -0
  4. package/cpp/HybridCactus.hpp +13 -0
  5. package/cpp/cactus_ffi.h +24 -0
  6. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_ffi.h +24 -0
  7. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_utils.h +41 -1
  8. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/engine.h +66 -48
  9. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/gemma_tools.h +549 -0
  10. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/graph.h +102 -21
  11. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel.h +45 -195
  12. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel_utils.h +399 -140
  13. package/ios/cactus.xcframework/ios-arm64/cactus.framework/cactus +0 -0
  14. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_ffi.h +24 -0
  15. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_utils.h +41 -1
  16. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/engine.h +66 -48
  17. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/gemma_tools.h +549 -0
  18. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/graph.h +102 -21
  19. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/kernel.h +45 -195
  20. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/kernel_utils.h +399 -140
  21. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/cactus +0 -0
  22. package/lib/module/api/Database.js +0 -92
  23. package/lib/module/api/Database.js.map +1 -1
  24. package/lib/module/classes/CactusLM.js +33 -15
  25. package/lib/module/classes/CactusLM.js.map +1 -1
  26. package/lib/module/classes/CactusSTT.js +90 -15
  27. package/lib/module/classes/CactusSTT.js.map +1 -1
  28. package/lib/module/hooks/useCactusLM.js +14 -5
  29. package/lib/module/hooks/useCactusLM.js.map +1 -1
  30. package/lib/module/hooks/useCactusSTT.js +100 -4
  31. package/lib/module/hooks/useCactusSTT.js.map +1 -1
  32. package/lib/module/index.js.map +1 -1
  33. package/lib/module/models.js +336 -0
  34. package/lib/module/models.js.map +1 -0
  35. package/lib/module/native/Cactus.js +37 -0
  36. package/lib/module/native/Cactus.js.map +1 -1
  37. package/lib/module/types/CactusLM.js +2 -0
  38. package/lib/module/types/CactusSTT.js +2 -0
  39. package/lib/module/types/common.js +2 -0
  40. package/lib/module/types/{CactusModel.js.map → common.js.map} +1 -1
  41. package/lib/typescript/src/api/Database.d.ts +0 -6
  42. package/lib/typescript/src/api/Database.d.ts.map +1 -1
  43. package/lib/typescript/src/classes/CactusLM.d.ts +7 -3
  44. package/lib/typescript/src/classes/CactusLM.d.ts.map +1 -1
  45. package/lib/typescript/src/classes/CactusSTT.d.ts +13 -4
  46. package/lib/typescript/src/classes/CactusSTT.d.ts.map +1 -1
  47. package/lib/typescript/src/hooks/useCactusLM.d.ts +2 -2
  48. package/lib/typescript/src/hooks/useCactusLM.d.ts.map +1 -1
  49. package/lib/typescript/src/hooks/useCactusSTT.d.ts +12 -4
  50. package/lib/typescript/src/hooks/useCactusSTT.d.ts.map +1 -1
  51. package/lib/typescript/src/index.d.ts +2 -3
  52. package/lib/typescript/src/index.d.ts.map +1 -1
  53. package/lib/typescript/src/models.d.ts +6 -0
  54. package/lib/typescript/src/models.d.ts.map +1 -0
  55. package/lib/typescript/src/native/Cactus.d.ts +6 -1
  56. package/lib/typescript/src/native/Cactus.d.ts.map +1 -1
  57. package/lib/typescript/src/specs/Cactus.nitro.d.ts +5 -0
  58. package/lib/typescript/src/specs/Cactus.nitro.d.ts.map +1 -1
  59. package/lib/typescript/src/types/CactusLM.d.ts +2 -0
  60. package/lib/typescript/src/types/CactusLM.d.ts.map +1 -1
  61. package/lib/typescript/src/types/CactusSTT.d.ts +20 -0
  62. package/lib/typescript/src/types/CactusSTT.d.ts.map +1 -1
  63. package/lib/typescript/src/types/common.d.ts +28 -0
  64. package/lib/typescript/src/types/common.d.ts.map +1 -0
  65. package/nitrogen/generated/shared/c++/HybridCactusSpec.cpp +5 -0
  66. package/nitrogen/generated/shared/c++/HybridCactusSpec.hpp +5 -0
  67. package/package.json +1 -1
  68. package/src/api/Database.ts +0 -133
  69. package/src/classes/CactusLM.ts +49 -17
  70. package/src/classes/CactusSTT.ts +118 -17
  71. package/src/hooks/useCactusLM.ts +25 -5
  72. package/src/hooks/useCactusSTT.ts +117 -5
  73. package/src/index.tsx +6 -2
  74. package/src/models.ts +344 -0
  75. package/src/native/Cactus.ts +55 -0
  76. package/src/specs/Cactus.nitro.ts +5 -0
  77. package/src/types/CactusLM.ts +3 -0
  78. package/src/types/CactusSTT.ts +26 -0
  79. package/src/types/common.ts +28 -0
  80. package/lib/module/types/CactusModel.js +0 -2
  81. package/lib/module/types/CactusSTTModel.js +0 -2
  82. package/lib/module/types/CactusSTTModel.js.map +0 -1
  83. package/lib/typescript/src/types/CactusModel.d.ts +0 -13
  84. package/lib/typescript/src/types/CactusModel.d.ts.map +0 -1
  85. package/lib/typescript/src/types/CactusSTTModel.d.ts +0 -8
  86. package/lib/typescript/src/types/CactusSTTModel.d.ts.map +0 -1
  87. package/src/types/CactusModel.ts +0 -15
  88. package/src/types/CactusSTTModel.ts +0 -10
@@ -67,6 +67,30 @@ CACTUS_FFI_EXPORT int cactus_transcribe(
67
67
  size_t pcm_buffer_size
68
68
  );
69
69
 
70
+ typedef void* cactus_stream_transcribe_t;
71
+
72
+ CACTUS_FFI_EXPORT cactus_stream_transcribe_t cactus_stream_transcribe_init(cactus_model_t model);
73
+
74
+ CACTUS_FFI_EXPORT int cactus_stream_transcribe_insert(
75
+ cactus_stream_transcribe_t stream,
76
+ const uint8_t* pcm_buffer,
77
+ size_t pcm_buffer_size
78
+ );
79
+
80
+ CACTUS_FFI_EXPORT int cactus_stream_transcribe_process(
81
+ cactus_stream_transcribe_t stream,
82
+ char* response_buffer,
83
+ size_t buffer_size,
84
+ const char* options_json
85
+ );
86
+
87
+ CACTUS_FFI_EXPORT int cactus_stream_transcribe_finalize(
88
+ cactus_stream_transcribe_t stream,
89
+ char* response_buffer,
90
+ size_t buffer_size
91
+ );
92
+
93
+ CACTUS_FFI_EXPORT void cactus_stream_transcribe_destroy(cactus_stream_transcribe_t stream);
70
94
 
71
95
  CACTUS_FFI_EXPORT int cactus_embed(
72
96
  cactus_model_t model,
@@ -63,6 +63,14 @@ struct ToolFunction {
63
63
  std::unordered_map<std::string, std::string> parameters;
64
64
  };
65
65
 
66
+ } // namespace ffi
67
+ } // namespace cactus
68
+
69
+ #include "gemma_tools.h"
70
+
71
+ namespace cactus {
72
+ namespace ffi {
73
+
66
74
  inline void handle_error_response(const std::string& error_message, char* response_buffer, size_t buffer_size) {
67
75
  std::string sanitized_msg = error_message;
68
76
  for (auto& c : sanitized_msg) {
@@ -303,11 +311,43 @@ inline void parse_function_calls_from_response(const std::string& response_text,
303
311
  regular_response = response_text;
304
312
  function_calls.clear();
305
313
 
314
+ gemma::parse_function_calls(regular_response, function_calls);
315
+
316
+ // Parse Qwen-style function calls: <tool_call>{"name": "...", "arguments": {...}}</tool_call>
317
+ const std::string QWEN_TOOL_START = "<tool_call>";
318
+ const std::string QWEN_TOOL_END = "</tool_call>";
319
+ size_t qwen_start_pos = 0;
320
+
321
+ while ((qwen_start_pos = regular_response.find(QWEN_TOOL_START, qwen_start_pos)) != std::string::npos) {
322
+ size_t content_start = qwen_start_pos + QWEN_TOOL_START.length();
323
+ size_t qwen_end_pos = regular_response.find(QWEN_TOOL_END, content_start);
324
+
325
+ if (qwen_end_pos != std::string::npos) {
326
+ std::string json_content = regular_response.substr(content_start, qwen_end_pos - content_start);
327
+
328
+ size_t first = json_content.find_first_not_of(" \t\n\r");
329
+ size_t last = json_content.find_last_not_of(" \t\n\r");
330
+ if (first != std::string::npos && last != std::string::npos) {
331
+ json_content = json_content.substr(first, last - first + 1);
332
+ }
333
+
334
+ if (json_content.size() > 2 && json_content[0] == '{' &&
335
+ json_content.find("\"name\"") != std::string::npos) {
336
+ function_calls.push_back(json_content);
337
+ }
338
+
339
+ regular_response.erase(qwen_start_pos, qwen_end_pos + QWEN_TOOL_END.length() - qwen_start_pos);
340
+ } else {
341
+ break;
342
+ }
343
+ }
344
+
345
+ // Parse LFM2-style function calls: <|tool_call_start|>[name(args)]<|tool_call_end|>
306
346
  const std::string TOOL_CALL_START = "<|tool_call_start|>";
307
347
  const std::string TOOL_CALL_END = "<|tool_call_end|>";
308
348
  size_t tool_start_pos = 0;
309
349
 
310
- while ((tool_start_pos = response_text.find(TOOL_CALL_START, tool_start_pos)) != std::string::npos) {
350
+ while ((tool_start_pos = regular_response.find(TOOL_CALL_START, tool_start_pos)) != std::string::npos) {
311
351
  size_t content_start = tool_start_pos + TOOL_CALL_START.length();
312
352
  size_t tool_end_pos = response_text.find(TOOL_CALL_END, content_start);
313
353
 
@@ -131,9 +131,12 @@ struct MergeRule {
131
131
  struct ChatMessage {
132
132
  std::string role;
133
133
  std::string content;
134
+ std::string name;
134
135
  std::vector<std::string> images;
135
136
  };
136
137
 
138
+
139
+
137
140
  class Tokenizer {
138
141
  public:
139
142
  virtual ~Tokenizer() = default;
@@ -329,6 +332,8 @@ struct KVCache {
329
332
  struct LayerCache {
330
333
  std::vector<uint8_t> keys;
331
334
  std::vector<uint8_t> values;
335
+ std::vector<float> key_scales;
336
+ std::vector<float> value_scales;
332
337
  };
333
338
 
334
339
  std::vector<LayerCache> layer_caches;
@@ -354,13 +359,11 @@ struct KVCache {
354
359
  const std::vector<size_t>& v_nodes, size_t seq_len,
355
360
  size_t num_layers, size_t kv_heads, size_t head_dim);
356
361
 
357
- // Update KV cache from NPU prefill outputs
358
- // NPU outputs are in shape [num_tokens, num_kv_heads, head_dim]
359
- // This handles transposition to cache format and sliding window
360
362
  void update_from_npu(size_t layer_idx, const __fp16* k_data, const __fp16* v_data,
361
363
  size_t num_tokens, size_t kv_heads, size_t head_dim);
362
364
 
363
365
  bool is_empty() const { return current_seq_len == 0; }
366
+ bool is_int8() const { return precision == Precision::INT8; }
364
367
  void* get_key_ptr(size_t layer);
365
368
  void* get_value_ptr(size_t layer);
366
369
 
@@ -374,33 +377,44 @@ struct KVCache {
374
377
 
375
378
  CircularView get_key_view(size_t layer);
376
379
  CircularView get_value_view(size_t layer);
380
+
381
+ const int8_t* get_keys_int8(size_t layer) const;
382
+ const int8_t* get_values_int8(size_t layer) const;
383
+ const float* get_key_scales(size_t layer) const;
384
+ const float* get_value_scales(size_t layer) const;
377
385
  };
378
386
 
379
387
  class ToolCallConstrainer {
380
388
  public:
381
389
  enum class State {
382
- START, // -> expect {
383
- EXPECT_FC_KEY, // -> expect "function_call"
384
- EXPECT_FC_COLON, // -> expect :
385
- EXPECT_FC_OPEN_BRACE, // -> expect {
386
- EXPECT_NAME_KEY, // -> expect "name"
387
- EXPECT_NAME_COLON, // -> expect :
388
- EXPECT_NAME_VALUE, // -> expect "<function_name>"
389
- EXPECT_COMMA, // -> expect ,
390
- EXPECT_ARGS_KEY, // -> expect "arguments"
391
- EXPECT_ARGS_COLON, // -> expect :
392
- IN_ARGUMENTS, // -> free JSON, track brace depth
393
- EXPECT_INNER_CLOSE, // -> expect } to close inner object
394
- EXPECT_OUTER_CLOSE, // -> expect } to close outer object
395
- DONE, // complete
396
-
397
- LFM_START, // -> expect <|tool_call_start|>
398
- LFM_EXPECT_BRACKET, // -> expect [
399
- LFM_IN_FUNC_NAME, // -> expect function name
400
- LFM_EXPECT_PAREN, // -> expect (
401
- LFM_IN_ARGUMENTS, // -> arguments until )
402
- LFM_EXPECT_BRACKET_CLOSE, // -> expect ]
403
- LFM_EXPECT_END // -> expect <|tool_call_end|>
390
+ DONE,
391
+
392
+ QWEN_START,
393
+ QWEN_EXPECT_OPEN_BRACE,
394
+ QWEN_EXPECT_NAME_KEY,
395
+ QWEN_EXPECT_NAME_COLON,
396
+ QWEN_EXPECT_NAME_VALUE,
397
+ QWEN_EXPECT_COMMA,
398
+ QWEN_EXPECT_ARGS_KEY,
399
+ QWEN_EXPECT_ARGS_COLON,
400
+ QWEN_IN_ARGUMENTS,
401
+ QWEN_EXPECT_CLOSE_BRACE,
402
+ QWEN_EXPECT_END,
403
+
404
+ LFM_START,
405
+ LFM_EXPECT_BRACKET,
406
+ LFM_IN_FUNC_NAME,
407
+ LFM_EXPECT_PAREN,
408
+ LFM_IN_ARGUMENTS,
409
+ LFM_EXPECT_BRACKET_CLOSE,
410
+ LFM_EXPECT_END,
411
+
412
+ GEMMA_START,
413
+ GEMMA_EXPECT_CALL,
414
+ GEMMA_IN_FUNC_NAME,
415
+ GEMMA_EXPECT_BRACE,
416
+ GEMMA_IN_ARGUMENTS,
417
+ GEMMA_EXPECT_END
404
418
  };
405
419
 
406
420
  void init(Config::ModelType model_type,
@@ -417,36 +431,40 @@ public:
417
431
 
418
432
  private:
419
433
  bool active_ = false;
420
- State state_ = State::START;
434
+ State state_ = State::QWEN_START;
421
435
  Config::ModelType model_type_ = Config::ModelType::QWEN;
422
436
  Tokenizer* tokenizer_ = nullptr;
423
437
 
424
438
  std::vector<std::string> function_names_;
425
439
  std::string generated_text_;
426
- int brace_depth_ = 0; // Track nested braces in arguments
427
-
428
- // Pre-tokenized token sets for each grammar element
429
- std::unordered_set<uint32_t> open_brace_tokens_; // {
430
- std::unordered_set<uint32_t> close_brace_tokens_; // }
431
- std::unordered_set<uint32_t> colon_tokens_; // :
432
- std::unordered_set<uint32_t> comma_tokens_; // ,
433
- std::unordered_set<uint32_t> fc_key_tokens_; // "function_call"
434
- std::unordered_set<uint32_t> name_key_tokens_; // "name"
435
- std::unordered_set<uint32_t> args_key_tokens_; // "arguments"
436
- std::unordered_set<uint32_t> quote_tokens_; // "
437
- std::unordered_set<uint32_t> backtick_tokens_; // ` (to block markdown code fences)
438
- std::unordered_set<uint32_t> response_starter_tokens_; // Common response starters to block (I, I'm, Sorry, etc.)
439
- std::unordered_set<uint32_t> all_func_name_tokens_; // All function name tokens combined
440
- std::unordered_map<std::string, std::vector<uint32_t>> func_name_sequences_; // Full token sequence per function
441
-
442
- // LFM2-specific tokens
440
+ int brace_depth_ = 0;
441
+
442
+ std::unordered_set<uint32_t> qwen_tool_call_start_tokens_;
443
+ std::unordered_set<uint32_t> qwen_tool_call_end_tokens_;
444
+ std::unordered_set<uint32_t> open_brace_tokens_;
445
+ std::unordered_set<uint32_t> close_brace_tokens_;
446
+ std::unordered_set<uint32_t> colon_tokens_;
447
+ std::unordered_set<uint32_t> comma_tokens_;
448
+ std::unordered_set<uint32_t> name_key_tokens_;
449
+ std::unordered_set<uint32_t> args_key_tokens_;
450
+ std::unordered_set<uint32_t> quote_tokens_;
451
+ std::unordered_set<uint32_t> backtick_tokens_;
452
+ std::unordered_set<uint32_t> all_func_name_tokens_;
453
+ std::unordered_map<std::string, std::vector<uint32_t>> func_name_sequences_;
454
+
443
455
  std::unordered_set<uint32_t> tool_start_tokens_;
444
456
  std::unordered_set<uint32_t> tool_end_tokens_;
445
- std::unordered_set<uint32_t> bracket_open_tokens_; // [
446
- std::unordered_set<uint32_t> bracket_close_tokens_; // ]
447
- std::unordered_set<uint32_t> paren_open_tokens_; // (
448
- std::unordered_set<uint32_t> paren_close_tokens_; // )
449
- std::unordered_set<uint32_t> equals_tokens_; // =
457
+ std::unordered_set<uint32_t> bracket_open_tokens_;
458
+ std::unordered_set<uint32_t> bracket_close_tokens_;
459
+ std::unordered_set<uint32_t> paren_open_tokens_;
460
+ std::unordered_set<uint32_t> paren_close_tokens_;
461
+ std::unordered_set<uint32_t> equals_tokens_;
462
+
463
+ std::unordered_set<uint32_t> gemma_call_start_tokens_;
464
+ std::unordered_set<uint32_t> gemma_call_end_tokens_;
465
+ std::unordered_set<uint32_t> gemma_response_start_tokens_;
466
+ std::unordered_set<uint32_t> gemma_call_prefix_tokens_;
467
+ std::unordered_set<uint32_t> escape_tokens_;
450
468
 
451
469
  std::unordered_map<uint32_t, float> current_bias_;
452
470