cactus-react-native 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/android/src/main/CMakeLists.txt +0 -4
  2. package/android/src/main/java/com/cactus/Cactus.java +179 -42
  3. package/android/src/main/java/com/cactus/LlamaContext.java +22 -0
  4. package/android/src/main/jni.cpp +53 -1
  5. package/android/src/main/jniLibs/arm64-v8a/libcactus.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/libcactus_v8.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2_dotprod.so +0 -0
  9. package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2_dotprod_i8mm.so +0 -0
  10. package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2_i8mm.so +0 -0
  11. package/android/src/main/jniLibs/x86_64/libcactus.so +0 -0
  12. package/android/src/main/jniLibs/x86_64/libcactus_x86_64.so +0 -0
  13. package/android/src/newarch/java/com/cactus/CactusModule.java +20 -0
  14. package/android/src/oldarch/java/com/cactus/CactusModule.java +20 -0
  15. package/ios/CMakeLists.txt +2 -0
  16. package/ios/Cactus.mm +80 -0
  17. package/ios/CactusContext.h +6 -0
  18. package/ios/CactusContext.mm +27 -0
  19. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus.h +18 -0
  20. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_ffi.h +39 -0
  21. package/ios/cactus.xcframework/ios-arm64/cactus.framework/cactus +0 -0
  22. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/cactus.h +18 -0
  23. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/cactus_ffi.h +39 -0
  24. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/cactus +0 -0
  25. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/cactus.h +18 -0
  26. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/cactus_ffi.h +39 -0
  27. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/cactus +0 -0
  28. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/cactus.h +18 -0
  29. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/cactus_ffi.h +39 -0
  30. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/cactus +0 -0
  31. package/lib/commonjs/NativeCactus.js +1 -0
  32. package/lib/commonjs/NativeCactus.js.map +1 -1
  33. package/lib/commonjs/index.js +29 -0
  34. package/lib/commonjs/index.js.map +1 -1
  35. package/lib/module/NativeCactus.js +2 -0
  36. package/lib/module/NativeCactus.js.map +1 -1
  37. package/lib/module/index.js +29 -0
  38. package/lib/module/index.js.map +1 -1
  39. package/lib/typescript/NativeCactus.d.ts +10 -0
  40. package/lib/typescript/NativeCactus.d.ts.map +1 -1
  41. package/lib/typescript/index.d.ts +17 -1
  42. package/lib/typescript/index.d.ts.map +1 -1
  43. package/package.json +1 -1
  44. package/src/NativeCactus.ts +22 -0
  45. package/src/index.ts +36 -0
package/ios/Cactus.mm CHANGED
@@ -643,4 +643,84 @@ RCT_EXPORT_METHOD(releaseVocoder:(double)contextId
643
643
  }
644
644
  #endif
645
645
 
646
+ // New conversation management methods
647
+ RCT_EXPORT_METHOD(generateResponse:(double)contextId
648
+ userMessage:(NSString *)userMessage
649
+ maxTokens:(double)maxTokens
650
+ withResolver:(RCTPromiseResolveBlock)resolve
651
+ withRejecter:(RCTPromiseRejectBlock)reject)
652
+ {
653
+ CactusContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
654
+ if (context == nil) {
655
+ reject(@"llama_error", @"Context not found", nil);
656
+ return;
657
+ }
658
+ if ([context isPredicting]) {
659
+ reject(@"llama_error", @"Context is busy", nil);
660
+ return;
661
+ }
662
+ dispatch_async(llamaDQueue, ^{
663
+ @try {
664
+ @autoreleasepool {
665
+ NSString *result = [context generateResponse:userMessage maxTokens:(int)maxTokens];
666
+ resolve(result);
667
+ }
668
+ } @catch (NSException *exception) {
669
+ reject(@"llama_cpp_error", exception.reason, nil);
670
+ }
671
+ });
672
+ }
673
+
674
+ RCT_EXPORT_METHOD(continueConversation:(double)contextId
675
+ userMessage:(NSString *)userMessage
676
+ maxTokens:(double)maxTokens
677
+ withResolver:(RCTPromiseResolveBlock)resolve
678
+ withRejecter:(RCTPromiseRejectBlock)reject)
679
+ {
680
+ CactusContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
681
+ if (context == nil) {
682
+ reject(@"llama_error", @"Context not found", nil);
683
+ return;
684
+ }
685
+ if ([context isPredicting]) {
686
+ reject(@"llama_error", @"Context is busy", nil);
687
+ return;
688
+ }
689
+ dispatch_async(llamaDQueue, ^{
690
+ @try {
691
+ @autoreleasepool {
692
+ NSDictionary *result = [context continueConversation:userMessage maxTokens:(int)maxTokens];
693
+ resolve(result);
694
+ }
695
+ } @catch (NSException *exception) {
696
+ reject(@"llama_cpp_error", exception.reason, nil);
697
+ }
698
+ });
699
+ }
700
+
701
+ RCT_EXPORT_METHOD(clearConversation:(double)contextId
702
+ withResolver:(RCTPromiseResolveBlock)resolve
703
+ withRejecter:(RCTPromiseRejectBlock)reject)
704
+ {
705
+ CactusContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
706
+ if (context == nil) {
707
+ reject(@"llama_error", @"Context not found", nil);
708
+ return;
709
+ }
710
+ [context clearConversation];
711
+ resolve(nil);
712
+ }
713
+
714
+ RCT_EXPORT_METHOD(isConversationActive:(double)contextId
715
+ withResolver:(RCTPromiseResolveBlock)resolve
716
+ withRejecter:(RCTPromiseRejectBlock)reject)
717
+ {
718
+ CactusContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
719
+ if (context == nil) {
720
+ reject(@"llama_error", @"Context not found", nil);
721
+ return;
722
+ }
723
+ resolve(@([context isConversationActive]));
724
+ }
725
+
646
726
  @end
@@ -74,6 +74,12 @@
74
74
  - (NSArray *)decodeAudioTokens:(NSArray *)tokens;
75
75
  - (void)releaseVocoder;
76
76
 
77
+ // New conversation management methods
78
+ - (NSString *)generateResponse:(NSString *)userMessage maxTokens:(int)maxTokens;
79
+ - (NSDictionary *)continueConversation:(NSString *)userMessage maxTokens:(int)maxTokens;
80
+ - (void)clearConversation;
81
+ - (BOOL)isConversationActive;
82
+
77
83
  - (void)invalidate;
78
84
 
79
85
  @end
@@ -961,6 +961,33 @@
961
961
  llama->releaseVocoder();
962
962
  }
963
963
 
964
+ // New conversation management methods
965
+ - (NSString *)generateResponse:(NSString *)userMessage maxTokens:(int)maxTokens {
966
+ std::string result = llama->generateResponse([userMessage UTF8String], maxTokens);
967
+ llama->is_predicting = false;
968
+ return [NSString stringWithUTF8String:result.c_str()];
969
+ }
970
+
971
+ - (NSDictionary *)continueConversation:(NSString *)userMessage maxTokens:(int)maxTokens {
972
+ cactus::conversation_result result = llama->continueConversation([userMessage UTF8String], maxTokens);
973
+ llama->is_predicting = false;
974
+
975
+ return @{
976
+ @"text": [NSString stringWithUTF8String:result.text.c_str()],
977
+ @"time_to_first_token": @(result.time_to_first_token.count()),
978
+ @"total_time": @(result.total_time.count()),
979
+ @"tokens_generated": @(result.tokens_generated)
980
+ };
981
+ }
982
+
983
+ - (void)clearConversation {
984
+ llama->clearConversation();
985
+ }
986
+
987
+ - (BOOL)isConversationActive {
988
+ return llama->isConversationActive();
989
+ }
990
+
964
991
  - (void)invalidate {
965
992
  delete llama;
966
993
  // llama_backend_free();
@@ -3,6 +3,7 @@
3
3
 
4
4
  #include <sstream>
5
5
  #include <iostream>
6
+ #include <chrono>
6
7
  #include "chat.h"
7
8
  #include "common.h"
8
9
  #include "ggml.h"
@@ -48,6 +49,13 @@ struct completion_token_output
48
49
  llama_token tok;
49
50
  };
50
51
 
52
+ struct conversation_result {
53
+ std::string text;
54
+ std::chrono::milliseconds time_to_first_token;
55
+ std::chrono::milliseconds total_time;
56
+ int tokens_generated;
57
+ };
58
+
51
59
  struct cactus_tokenize_result {
52
60
  std::vector<llama_token> tokens;
53
61
  bool has_media = false;
@@ -112,6 +120,10 @@ struct cactus_context {
112
120
  bool has_vocoder = false;
113
121
  std::vector<llama_token> audio_tokens;
114
122
 
123
+ // Conversation management state
124
+ bool conversation_active = false;
125
+ std::string last_chat_template = "";
126
+
115
127
  ~cactus_context();
116
128
 
117
129
  void rewind();
@@ -180,6 +192,12 @@ struct cactus_context {
180
192
  std::vector<llama_token> getAudioCompletionGuideTokens(const std::string &text_to_speak);
181
193
  std::vector<float> decodeAudioTokens(const std::vector<llama_token> &tokens);
182
194
  void releaseVocoder();
195
+
196
+ // High-level conversation management API
197
+ std::string generateResponse(const std::string &user_message, int max_tokens = 200);
198
+ conversation_result continueConversation(const std::string &user_message, int max_tokens = 200);
199
+ void clearConversation();
200
+ bool isConversationActive() const;
183
201
  };
184
202
 
185
203
  extern bool cactus_verbose;
@@ -127,6 +127,15 @@ CACTUS_FFI_EXPORT int cactus_completion_c(
127
127
  cactus_completion_result_c_t* result
128
128
  );
129
129
 
130
+ // **MULTIMODAL COMPLETION**
131
+ CACTUS_FFI_EXPORT int cactus_multimodal_completion_c(
132
+ cactus_context_handle_t handle,
133
+ const cactus_completion_params_c_t* params,
134
+ const char** media_paths,
135
+ int media_count,
136
+ cactus_completion_result_c_t* result
137
+ );
138
+
130
139
  CACTUS_FFI_EXPORT void cactus_stop_completion_c(cactus_context_handle_t handle);
131
140
 
132
141
  CACTUS_FFI_EXPORT cactus_token_array_c_t cactus_tokenize_c(cactus_context_handle_t handle, const char* text);
@@ -207,10 +216,39 @@ CACTUS_FFI_EXPORT cactus_lora_adapters_c_t cactus_get_loaded_lora_adapters_c(cac
207
216
  CACTUS_FFI_EXPORT bool cactus_validate_chat_template_c(cactus_context_handle_t handle, bool use_jinja, const char* name);
208
217
  CACTUS_FFI_EXPORT char* cactus_get_formatted_chat_c(cactus_context_handle_t handle, const char* messages, const char* chat_template);
209
218
 
219
+ // **ADVANCED: Chat with Jinja and Tools Support**
220
+ typedef struct {
221
+ char* prompt;
222
+ char* json_schema;
223
+ char* tools;
224
+ char* tool_choice;
225
+ bool parallel_tool_calls;
226
+ } cactus_chat_result_c_t;
227
+
228
+ CACTUS_FFI_EXPORT cactus_chat_result_c_t cactus_get_formatted_chat_with_jinja_c(
229
+ cactus_context_handle_t handle,
230
+ const char* messages,
231
+ const char* chat_template,
232
+ const char* json_schema,
233
+ const char* tools,
234
+ bool parallel_tool_calls,
235
+ const char* tool_choice
236
+ );
237
+
210
238
  // **HIGH PRIORITY: Context Management**
211
239
  CACTUS_FFI_EXPORT void cactus_rewind_c(cactus_context_handle_t handle);
212
240
  CACTUS_FFI_EXPORT bool cactus_init_sampling_c(cactus_context_handle_t handle);
213
241
 
242
+ // **COMPLETION CONTROL**
243
+ CACTUS_FFI_EXPORT void cactus_begin_completion_c(cactus_context_handle_t handle);
244
+ CACTUS_FFI_EXPORT void cactus_end_completion_c(cactus_context_handle_t handle);
245
+ CACTUS_FFI_EXPORT void cactus_load_prompt_c(cactus_context_handle_t handle);
246
+ CACTUS_FFI_EXPORT void cactus_load_prompt_with_media_c(cactus_context_handle_t handle, const char** media_paths, int media_count);
247
+
248
+ // **TOKEN PROCESSING**
249
+ CACTUS_FFI_EXPORT int cactus_do_completion_step_c(cactus_context_handle_t handle, char** token_text);
250
+ CACTUS_FFI_EXPORT size_t cactus_find_stopping_strings_c(cactus_context_handle_t handle, const char* text, size_t last_token_size, int stop_type);
251
+
214
252
  // **HIGH PRIORITY: Model Information**
215
253
  CACTUS_FFI_EXPORT int32_t cactus_get_n_ctx_c(cactus_context_handle_t handle);
216
254
  CACTUS_FFI_EXPORT int32_t cactus_get_n_embd_c(cactus_context_handle_t handle);
@@ -221,6 +259,7 @@ CACTUS_FFI_EXPORT int64_t cactus_get_model_params_c(cactus_context_handle_t hand
221
259
  // Memory management functions
222
260
  CACTUS_FFI_EXPORT void cactus_free_bench_result_members_c(cactus_bench_result_c_t* result);
223
261
  CACTUS_FFI_EXPORT void cactus_free_lora_adapters_c(cactus_lora_adapters_c_t* adapters);
262
+ CACTUS_FFI_EXPORT void cactus_free_chat_result_members_c(cactus_chat_result_c_t* result);
224
263
 
225
264
  #ifdef __cplusplus
226
265
  }
@@ -3,6 +3,7 @@
3
3
 
4
4
  #include <sstream>
5
5
  #include <iostream>
6
+ #include <chrono>
6
7
  #include "chat.h"
7
8
  #include "common.h"
8
9
  #include "ggml.h"
@@ -48,6 +49,13 @@ struct completion_token_output
48
49
  llama_token tok;
49
50
  };
50
51
 
52
+ struct conversation_result {
53
+ std::string text;
54
+ std::chrono::milliseconds time_to_first_token;
55
+ std::chrono::milliseconds total_time;
56
+ int tokens_generated;
57
+ };
58
+
51
59
  struct cactus_tokenize_result {
52
60
  std::vector<llama_token> tokens;
53
61
  bool has_media = false;
@@ -112,6 +120,10 @@ struct cactus_context {
112
120
  bool has_vocoder = false;
113
121
  std::vector<llama_token> audio_tokens;
114
122
 
123
+ // Conversation management state
124
+ bool conversation_active = false;
125
+ std::string last_chat_template = "";
126
+
115
127
  ~cactus_context();
116
128
 
117
129
  void rewind();
@@ -180,6 +192,12 @@ struct cactus_context {
180
192
  std::vector<llama_token> getAudioCompletionGuideTokens(const std::string &text_to_speak);
181
193
  std::vector<float> decodeAudioTokens(const std::vector<llama_token> &tokens);
182
194
  void releaseVocoder();
195
+
196
+ // High-level conversation management API
197
+ std::string generateResponse(const std::string &user_message, int max_tokens = 200);
198
+ conversation_result continueConversation(const std::string &user_message, int max_tokens = 200);
199
+ void clearConversation();
200
+ bool isConversationActive() const;
183
201
  };
184
202
 
185
203
  extern bool cactus_verbose;
@@ -127,6 +127,15 @@ CACTUS_FFI_EXPORT int cactus_completion_c(
127
127
  cactus_completion_result_c_t* result
128
128
  );
129
129
 
130
+ // **MULTIMODAL COMPLETION**
131
+ CACTUS_FFI_EXPORT int cactus_multimodal_completion_c(
132
+ cactus_context_handle_t handle,
133
+ const cactus_completion_params_c_t* params,
134
+ const char** media_paths,
135
+ int media_count,
136
+ cactus_completion_result_c_t* result
137
+ );
138
+
130
139
  CACTUS_FFI_EXPORT void cactus_stop_completion_c(cactus_context_handle_t handle);
131
140
 
132
141
  CACTUS_FFI_EXPORT cactus_token_array_c_t cactus_tokenize_c(cactus_context_handle_t handle, const char* text);
@@ -207,10 +216,39 @@ CACTUS_FFI_EXPORT cactus_lora_adapters_c_t cactus_get_loaded_lora_adapters_c(cac
207
216
  CACTUS_FFI_EXPORT bool cactus_validate_chat_template_c(cactus_context_handle_t handle, bool use_jinja, const char* name);
208
217
  CACTUS_FFI_EXPORT char* cactus_get_formatted_chat_c(cactus_context_handle_t handle, const char* messages, const char* chat_template);
209
218
 
219
+ // **ADVANCED: Chat with Jinja and Tools Support**
220
+ typedef struct {
221
+ char* prompt;
222
+ char* json_schema;
223
+ char* tools;
224
+ char* tool_choice;
225
+ bool parallel_tool_calls;
226
+ } cactus_chat_result_c_t;
227
+
228
+ CACTUS_FFI_EXPORT cactus_chat_result_c_t cactus_get_formatted_chat_with_jinja_c(
229
+ cactus_context_handle_t handle,
230
+ const char* messages,
231
+ const char* chat_template,
232
+ const char* json_schema,
233
+ const char* tools,
234
+ bool parallel_tool_calls,
235
+ const char* tool_choice
236
+ );
237
+
210
238
  // **HIGH PRIORITY: Context Management**
211
239
  CACTUS_FFI_EXPORT void cactus_rewind_c(cactus_context_handle_t handle);
212
240
  CACTUS_FFI_EXPORT bool cactus_init_sampling_c(cactus_context_handle_t handle);
213
241
 
242
+ // **COMPLETION CONTROL**
243
+ CACTUS_FFI_EXPORT void cactus_begin_completion_c(cactus_context_handle_t handle);
244
+ CACTUS_FFI_EXPORT void cactus_end_completion_c(cactus_context_handle_t handle);
245
+ CACTUS_FFI_EXPORT void cactus_load_prompt_c(cactus_context_handle_t handle);
246
+ CACTUS_FFI_EXPORT void cactus_load_prompt_with_media_c(cactus_context_handle_t handle, const char** media_paths, int media_count);
247
+
248
+ // **TOKEN PROCESSING**
249
+ CACTUS_FFI_EXPORT int cactus_do_completion_step_c(cactus_context_handle_t handle, char** token_text);
250
+ CACTUS_FFI_EXPORT size_t cactus_find_stopping_strings_c(cactus_context_handle_t handle, const char* text, size_t last_token_size, int stop_type);
251
+
214
252
  // **HIGH PRIORITY: Model Information**
215
253
  CACTUS_FFI_EXPORT int32_t cactus_get_n_ctx_c(cactus_context_handle_t handle);
216
254
  CACTUS_FFI_EXPORT int32_t cactus_get_n_embd_c(cactus_context_handle_t handle);
@@ -221,6 +259,7 @@ CACTUS_FFI_EXPORT int64_t cactus_get_model_params_c(cactus_context_handle_t hand
221
259
  // Memory management functions
222
260
  CACTUS_FFI_EXPORT void cactus_free_bench_result_members_c(cactus_bench_result_c_t* result);
223
261
  CACTUS_FFI_EXPORT void cactus_free_lora_adapters_c(cactus_lora_adapters_c_t* adapters);
262
+ CACTUS_FFI_EXPORT void cactus_free_chat_result_members_c(cactus_chat_result_c_t* result);
224
263
 
225
264
  #ifdef __cplusplus
226
265
  }
@@ -3,6 +3,7 @@
3
3
 
4
4
  #include <sstream>
5
5
  #include <iostream>
6
+ #include <chrono>
6
7
  #include "chat.h"
7
8
  #include "common.h"
8
9
  #include "ggml.h"
@@ -48,6 +49,13 @@ struct completion_token_output
48
49
  llama_token tok;
49
50
  };
50
51
 
52
+ struct conversation_result {
53
+ std::string text;
54
+ std::chrono::milliseconds time_to_first_token;
55
+ std::chrono::milliseconds total_time;
56
+ int tokens_generated;
57
+ };
58
+
51
59
  struct cactus_tokenize_result {
52
60
  std::vector<llama_token> tokens;
53
61
  bool has_media = false;
@@ -112,6 +120,10 @@ struct cactus_context {
112
120
  bool has_vocoder = false;
113
121
  std::vector<llama_token> audio_tokens;
114
122
 
123
+ // Conversation management state
124
+ bool conversation_active = false;
125
+ std::string last_chat_template = "";
126
+
115
127
  ~cactus_context();
116
128
 
117
129
  void rewind();
@@ -180,6 +192,12 @@ struct cactus_context {
180
192
  std::vector<llama_token> getAudioCompletionGuideTokens(const std::string &text_to_speak);
181
193
  std::vector<float> decodeAudioTokens(const std::vector<llama_token> &tokens);
182
194
  void releaseVocoder();
195
+
196
+ // High-level conversation management API
197
+ std::string generateResponse(const std::string &user_message, int max_tokens = 200);
198
+ conversation_result continueConversation(const std::string &user_message, int max_tokens = 200);
199
+ void clearConversation();
200
+ bool isConversationActive() const;
183
201
  };
184
202
 
185
203
  extern bool cactus_verbose;
@@ -127,6 +127,15 @@ CACTUS_FFI_EXPORT int cactus_completion_c(
127
127
  cactus_completion_result_c_t* result
128
128
  );
129
129
 
130
+ // **MULTIMODAL COMPLETION**
131
+ CACTUS_FFI_EXPORT int cactus_multimodal_completion_c(
132
+ cactus_context_handle_t handle,
133
+ const cactus_completion_params_c_t* params,
134
+ const char** media_paths,
135
+ int media_count,
136
+ cactus_completion_result_c_t* result
137
+ );
138
+
130
139
  CACTUS_FFI_EXPORT void cactus_stop_completion_c(cactus_context_handle_t handle);
131
140
 
132
141
  CACTUS_FFI_EXPORT cactus_token_array_c_t cactus_tokenize_c(cactus_context_handle_t handle, const char* text);
@@ -207,10 +216,39 @@ CACTUS_FFI_EXPORT cactus_lora_adapters_c_t cactus_get_loaded_lora_adapters_c(cac
207
216
  CACTUS_FFI_EXPORT bool cactus_validate_chat_template_c(cactus_context_handle_t handle, bool use_jinja, const char* name);
208
217
  CACTUS_FFI_EXPORT char* cactus_get_formatted_chat_c(cactus_context_handle_t handle, const char* messages, const char* chat_template);
209
218
 
219
+ // **ADVANCED: Chat with Jinja and Tools Support**
220
+ typedef struct {
221
+ char* prompt;
222
+ char* json_schema;
223
+ char* tools;
224
+ char* tool_choice;
225
+ bool parallel_tool_calls;
226
+ } cactus_chat_result_c_t;
227
+
228
+ CACTUS_FFI_EXPORT cactus_chat_result_c_t cactus_get_formatted_chat_with_jinja_c(
229
+ cactus_context_handle_t handle,
230
+ const char* messages,
231
+ const char* chat_template,
232
+ const char* json_schema,
233
+ const char* tools,
234
+ bool parallel_tool_calls,
235
+ const char* tool_choice
236
+ );
237
+
210
238
  // **HIGH PRIORITY: Context Management**
211
239
  CACTUS_FFI_EXPORT void cactus_rewind_c(cactus_context_handle_t handle);
212
240
  CACTUS_FFI_EXPORT bool cactus_init_sampling_c(cactus_context_handle_t handle);
213
241
 
242
+ // **COMPLETION CONTROL**
243
+ CACTUS_FFI_EXPORT void cactus_begin_completion_c(cactus_context_handle_t handle);
244
+ CACTUS_FFI_EXPORT void cactus_end_completion_c(cactus_context_handle_t handle);
245
+ CACTUS_FFI_EXPORT void cactus_load_prompt_c(cactus_context_handle_t handle);
246
+ CACTUS_FFI_EXPORT void cactus_load_prompt_with_media_c(cactus_context_handle_t handle, const char** media_paths, int media_count);
247
+
248
+ // **TOKEN PROCESSING**
249
+ CACTUS_FFI_EXPORT int cactus_do_completion_step_c(cactus_context_handle_t handle, char** token_text);
250
+ CACTUS_FFI_EXPORT size_t cactus_find_stopping_strings_c(cactus_context_handle_t handle, const char* text, size_t last_token_size, int stop_type);
251
+
214
252
  // **HIGH PRIORITY: Model Information**
215
253
  CACTUS_FFI_EXPORT int32_t cactus_get_n_ctx_c(cactus_context_handle_t handle);
216
254
  CACTUS_FFI_EXPORT int32_t cactus_get_n_embd_c(cactus_context_handle_t handle);
@@ -221,6 +259,7 @@ CACTUS_FFI_EXPORT int64_t cactus_get_model_params_c(cactus_context_handle_t hand
221
259
  // Memory management functions
222
260
  CACTUS_FFI_EXPORT void cactus_free_bench_result_members_c(cactus_bench_result_c_t* result);
223
261
  CACTUS_FFI_EXPORT void cactus_free_lora_adapters_c(cactus_lora_adapters_c_t* adapters);
262
+ CACTUS_FFI_EXPORT void cactus_free_chat_result_members_c(cactus_chat_result_c_t* result);
224
263
 
225
264
  #ifdef __cplusplus
226
265
  }
@@ -3,6 +3,7 @@
3
3
 
4
4
  #include <sstream>
5
5
  #include <iostream>
6
+ #include <chrono>
6
7
  #include "chat.h"
7
8
  #include "common.h"
8
9
  #include "ggml.h"
@@ -48,6 +49,13 @@ struct completion_token_output
48
49
  llama_token tok;
49
50
  };
50
51
 
52
+ struct conversation_result {
53
+ std::string text;
54
+ std::chrono::milliseconds time_to_first_token;
55
+ std::chrono::milliseconds total_time;
56
+ int tokens_generated;
57
+ };
58
+
51
59
  struct cactus_tokenize_result {
52
60
  std::vector<llama_token> tokens;
53
61
  bool has_media = false;
@@ -112,6 +120,10 @@ struct cactus_context {
112
120
  bool has_vocoder = false;
113
121
  std::vector<llama_token> audio_tokens;
114
122
 
123
+ // Conversation management state
124
+ bool conversation_active = false;
125
+ std::string last_chat_template = "";
126
+
115
127
  ~cactus_context();
116
128
 
117
129
  void rewind();
@@ -180,6 +192,12 @@ struct cactus_context {
180
192
  std::vector<llama_token> getAudioCompletionGuideTokens(const std::string &text_to_speak);
181
193
  std::vector<float> decodeAudioTokens(const std::vector<llama_token> &tokens);
182
194
  void releaseVocoder();
195
+
196
+ // High-level conversation management API
197
+ std::string generateResponse(const std::string &user_message, int max_tokens = 200);
198
+ conversation_result continueConversation(const std::string &user_message, int max_tokens = 200);
199
+ void clearConversation();
200
+ bool isConversationActive() const;
183
201
  };
184
202
 
185
203
  extern bool cactus_verbose;
@@ -127,6 +127,15 @@ CACTUS_FFI_EXPORT int cactus_completion_c(
127
127
  cactus_completion_result_c_t* result
128
128
  );
129
129
 
130
+ // **MULTIMODAL COMPLETION**
131
+ CACTUS_FFI_EXPORT int cactus_multimodal_completion_c(
132
+ cactus_context_handle_t handle,
133
+ const cactus_completion_params_c_t* params,
134
+ const char** media_paths,
135
+ int media_count,
136
+ cactus_completion_result_c_t* result
137
+ );
138
+
130
139
  CACTUS_FFI_EXPORT void cactus_stop_completion_c(cactus_context_handle_t handle);
131
140
 
132
141
  CACTUS_FFI_EXPORT cactus_token_array_c_t cactus_tokenize_c(cactus_context_handle_t handle, const char* text);
@@ -207,10 +216,39 @@ CACTUS_FFI_EXPORT cactus_lora_adapters_c_t cactus_get_loaded_lora_adapters_c(cac
207
216
  CACTUS_FFI_EXPORT bool cactus_validate_chat_template_c(cactus_context_handle_t handle, bool use_jinja, const char* name);
208
217
  CACTUS_FFI_EXPORT char* cactus_get_formatted_chat_c(cactus_context_handle_t handle, const char* messages, const char* chat_template);
209
218
 
219
+ // **ADVANCED: Chat with Jinja and Tools Support**
220
+ typedef struct {
221
+ char* prompt;
222
+ char* json_schema;
223
+ char* tools;
224
+ char* tool_choice;
225
+ bool parallel_tool_calls;
226
+ } cactus_chat_result_c_t;
227
+
228
+ CACTUS_FFI_EXPORT cactus_chat_result_c_t cactus_get_formatted_chat_with_jinja_c(
229
+ cactus_context_handle_t handle,
230
+ const char* messages,
231
+ const char* chat_template,
232
+ const char* json_schema,
233
+ const char* tools,
234
+ bool parallel_tool_calls,
235
+ const char* tool_choice
236
+ );
237
+
210
238
  // **HIGH PRIORITY: Context Management**
211
239
  CACTUS_FFI_EXPORT void cactus_rewind_c(cactus_context_handle_t handle);
212
240
  CACTUS_FFI_EXPORT bool cactus_init_sampling_c(cactus_context_handle_t handle);
213
241
 
242
+ // **COMPLETION CONTROL**
243
+ CACTUS_FFI_EXPORT void cactus_begin_completion_c(cactus_context_handle_t handle);
244
+ CACTUS_FFI_EXPORT void cactus_end_completion_c(cactus_context_handle_t handle);
245
+ CACTUS_FFI_EXPORT void cactus_load_prompt_c(cactus_context_handle_t handle);
246
+ CACTUS_FFI_EXPORT void cactus_load_prompt_with_media_c(cactus_context_handle_t handle, const char** media_paths, int media_count);
247
+
248
+ // **TOKEN PROCESSING**
249
+ CACTUS_FFI_EXPORT int cactus_do_completion_step_c(cactus_context_handle_t handle, char** token_text);
250
+ CACTUS_FFI_EXPORT size_t cactus_find_stopping_strings_c(cactus_context_handle_t handle, const char* text, size_t last_token_size, int stop_type);
251
+
214
252
  // **HIGH PRIORITY: Model Information**
215
253
  CACTUS_FFI_EXPORT int32_t cactus_get_n_ctx_c(cactus_context_handle_t handle);
216
254
  CACTUS_FFI_EXPORT int32_t cactus_get_n_embd_c(cactus_context_handle_t handle);
@@ -221,6 +259,7 @@ CACTUS_FFI_EXPORT int64_t cactus_get_model_params_c(cactus_context_handle_t hand
221
259
  // Memory management functions
222
260
  CACTUS_FFI_EXPORT void cactus_free_bench_result_members_c(cactus_bench_result_c_t* result);
223
261
  CACTUS_FFI_EXPORT void cactus_free_lora_adapters_c(cactus_lora_adapters_c_t* adapters);
262
+ CACTUS_FFI_EXPORT void cactus_free_chat_result_members_c(cactus_chat_result_c_t* result);
224
263
 
225
264
  #ifdef __cplusplus
226
265
  }
@@ -6,5 +6,6 @@ Object.defineProperty(exports, "__esModule", {
6
6
  exports.default = void 0;
7
7
  var _reactNative = require("react-native");
8
8
  // New TTS/Audio types
9
+ // New conversation management types
9
10
  var _default = exports.default = _reactNative.TurboModuleRegistry.get('Cactus');
10
11
  //# sourceMappingURL=NativeCactus.js.map
@@ -1 +1 @@
1
- {"version":3,"names":["_reactNative","require","_default","exports","default","TurboModuleRegistry","get"],"sourceRoot":"../../src","sources":["NativeCactus.ts"],"mappings":";;;;;;AACA,IAAAA,YAAA,GAAAC,OAAA;AAkSA;AAAA,IAAAC,QAAA,GAAAC,OAAA,CAAAC,OAAA,GA2LeC,gCAAmB,CAACC,GAAG,CAAO,QAAQ,CAAC","ignoreList":[]}
1
+ {"version":3,"names":["_reactNative","require","_default","exports","default","TurboModuleRegistry","get"],"sourceRoot":"../../src","sources":["NativeCactus.ts"],"mappings":";;;;;;AACA,IAAAA,YAAA,GAAAC,OAAA;AAkSA;AAiBA;AAAA,IAAAC,QAAA,GAAAC,OAAA,CAAAC,OAAA,GAgMeC,gCAAmB,CAACC,GAAG,CAAO,QAAQ,CAAC","ignoreList":[]}
@@ -216,6 +216,35 @@ class LlamaContext {
216
216
  stopCompletion() {
217
217
  return _NativeCactus.default.stopCompletion(this.id);
218
218
  }
219
+
220
+ // New conversation management methods
221
+ /**
222
+ * Generate a response to a user message (simple text return)
223
+ */
224
+ async generateResponse(userMessage, maxTokens = 200) {
225
+ return _NativeCactus.default.generateResponse(this.id, userMessage, maxTokens);
226
+ }
227
+
228
+ /**
229
+ * Continue conversation with detailed timing information
230
+ */
231
+ async continueConversation(userMessage, maxTokens = 200) {
232
+ return _NativeCactus.default.continueConversation(this.id, userMessage, maxTokens);
233
+ }
234
+
235
+ /**
236
+ * Clear conversation history and reset KV cache
237
+ */
238
+ async clearConversation() {
239
+ return _NativeCactus.default.clearConversation(this.id);
240
+ }
241
+
242
+ /**
243
+ * Check if conversation is currently active
244
+ */
245
+ async isConversationActive() {
246
+ return _NativeCactus.default.isConversationActive(this.id);
247
+ }
219
248
  tokenize(text) {
220
249
  return _NativeCactus.default.tokenize(this.id, text);
221
250
  }