cactus-react-native 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/CMakeLists.txt +0 -4
- package/android/src/main/java/com/cactus/Cactus.java +179 -42
- package/android/src/main/java/com/cactus/LlamaContext.java +22 -0
- package/android/src/main/jni.cpp +53 -1
- package/android/src/main/jniLibs/arm64-v8a/libcactus.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libcactus_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/libcactus.so +0 -0
- package/android/src/main/jniLibs/x86_64/libcactus_x86_64.so +0 -0
- package/android/src/newarch/java/com/cactus/CactusModule.java +20 -0
- package/android/src/oldarch/java/com/cactus/CactusModule.java +20 -0
- package/ios/CMakeLists.txt +2 -0
- package/ios/Cactus.mm +80 -0
- package/ios/CactusContext.h +6 -0
- package/ios/CactusContext.mm +27 -0
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus.h +18 -0
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_ffi.h +39 -0
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/cactus +0 -0
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/cactus.h +18 -0
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/cactus_ffi.h +39 -0
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/cactus +0 -0
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/cactus.h +18 -0
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/cactus_ffi.h +39 -0
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/cactus +0 -0
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/cactus.h +18 -0
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/cactus_ffi.h +39 -0
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/cactus +0 -0
- package/lib/commonjs/NativeCactus.js +1 -0
- package/lib/commonjs/NativeCactus.js.map +1 -1
- package/lib/commonjs/index.js +29 -0
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/NativeCactus.js +2 -0
- package/lib/module/NativeCactus.js.map +1 -1
- package/lib/module/index.js +29 -0
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/NativeCactus.d.ts +10 -0
- package/lib/typescript/NativeCactus.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +17 -1
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/NativeCactus.ts +22 -0
- package/src/index.ts +36 -0
package/ios/Cactus.mm
CHANGED
|
@@ -643,4 +643,84 @@ RCT_EXPORT_METHOD(releaseVocoder:(double)contextId
|
|
|
643
643
|
}
|
|
644
644
|
#endif
|
|
645
645
|
|
|
646
|
+
// New conversation management methods
|
|
647
|
+
RCT_EXPORT_METHOD(generateResponse:(double)contextId
|
|
648
|
+
userMessage:(NSString *)userMessage
|
|
649
|
+
maxTokens:(double)maxTokens
|
|
650
|
+
withResolver:(RCTPromiseResolveBlock)resolve
|
|
651
|
+
withRejecter:(RCTPromiseRejectBlock)reject)
|
|
652
|
+
{
|
|
653
|
+
CactusContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
|
|
654
|
+
if (context == nil) {
|
|
655
|
+
reject(@"llama_error", @"Context not found", nil);
|
|
656
|
+
return;
|
|
657
|
+
}
|
|
658
|
+
if ([context isPredicting]) {
|
|
659
|
+
reject(@"llama_error", @"Context is busy", nil);
|
|
660
|
+
return;
|
|
661
|
+
}
|
|
662
|
+
dispatch_async(llamaDQueue, ^{
|
|
663
|
+
@try {
|
|
664
|
+
@autoreleasepool {
|
|
665
|
+
NSString *result = [context generateResponse:userMessage maxTokens:(int)maxTokens];
|
|
666
|
+
resolve(result);
|
|
667
|
+
}
|
|
668
|
+
} @catch (NSException *exception) {
|
|
669
|
+
reject(@"llama_cpp_error", exception.reason, nil);
|
|
670
|
+
}
|
|
671
|
+
});
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
RCT_EXPORT_METHOD(continueConversation:(double)contextId
|
|
675
|
+
userMessage:(NSString *)userMessage
|
|
676
|
+
maxTokens:(double)maxTokens
|
|
677
|
+
withResolver:(RCTPromiseResolveBlock)resolve
|
|
678
|
+
withRejecter:(RCTPromiseRejectBlock)reject)
|
|
679
|
+
{
|
|
680
|
+
CactusContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
|
|
681
|
+
if (context == nil) {
|
|
682
|
+
reject(@"llama_error", @"Context not found", nil);
|
|
683
|
+
return;
|
|
684
|
+
}
|
|
685
|
+
if ([context isPredicting]) {
|
|
686
|
+
reject(@"llama_error", @"Context is busy", nil);
|
|
687
|
+
return;
|
|
688
|
+
}
|
|
689
|
+
dispatch_async(llamaDQueue, ^{
|
|
690
|
+
@try {
|
|
691
|
+
@autoreleasepool {
|
|
692
|
+
NSDictionary *result = [context continueConversation:userMessage maxTokens:(int)maxTokens];
|
|
693
|
+
resolve(result);
|
|
694
|
+
}
|
|
695
|
+
} @catch (NSException *exception) {
|
|
696
|
+
reject(@"llama_cpp_error", exception.reason, nil);
|
|
697
|
+
}
|
|
698
|
+
});
|
|
699
|
+
}
|
|
700
|
+
|
|
701
|
+
RCT_EXPORT_METHOD(clearConversation:(double)contextId
|
|
702
|
+
withResolver:(RCTPromiseResolveBlock)resolve
|
|
703
|
+
withRejecter:(RCTPromiseRejectBlock)reject)
|
|
704
|
+
{
|
|
705
|
+
CactusContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
|
|
706
|
+
if (context == nil) {
|
|
707
|
+
reject(@"llama_error", @"Context not found", nil);
|
|
708
|
+
return;
|
|
709
|
+
}
|
|
710
|
+
[context clearConversation];
|
|
711
|
+
resolve(nil);
|
|
712
|
+
}
|
|
713
|
+
|
|
714
|
+
RCT_EXPORT_METHOD(isConversationActive:(double)contextId
|
|
715
|
+
withResolver:(RCTPromiseResolveBlock)resolve
|
|
716
|
+
withRejecter:(RCTPromiseRejectBlock)reject)
|
|
717
|
+
{
|
|
718
|
+
CactusContext *context = llamaContexts[[NSNumber numberWithDouble:contextId]];
|
|
719
|
+
if (context == nil) {
|
|
720
|
+
reject(@"llama_error", @"Context not found", nil);
|
|
721
|
+
return;
|
|
722
|
+
}
|
|
723
|
+
resolve(@([context isConversationActive]));
|
|
724
|
+
}
|
|
725
|
+
|
|
646
726
|
@end
|
package/ios/CactusContext.h
CHANGED
|
@@ -74,6 +74,12 @@
|
|
|
74
74
|
- (NSArray *)decodeAudioTokens:(NSArray *)tokens;
|
|
75
75
|
- (void)releaseVocoder;
|
|
76
76
|
|
|
77
|
+
// New conversation management methods
|
|
78
|
+
- (NSString *)generateResponse:(NSString *)userMessage maxTokens:(int)maxTokens;
|
|
79
|
+
- (NSDictionary *)continueConversation:(NSString *)userMessage maxTokens:(int)maxTokens;
|
|
80
|
+
- (void)clearConversation;
|
|
81
|
+
- (BOOL)isConversationActive;
|
|
82
|
+
|
|
77
83
|
- (void)invalidate;
|
|
78
84
|
|
|
79
85
|
@end
|
package/ios/CactusContext.mm
CHANGED
|
@@ -961,6 +961,33 @@
|
|
|
961
961
|
llama->releaseVocoder();
|
|
962
962
|
}
|
|
963
963
|
|
|
964
|
+
// New conversation management methods
|
|
965
|
+
- (NSString *)generateResponse:(NSString *)userMessage maxTokens:(int)maxTokens {
|
|
966
|
+
std::string result = llama->generateResponse([userMessage UTF8String], maxTokens);
|
|
967
|
+
llama->is_predicting = false;
|
|
968
|
+
return [NSString stringWithUTF8String:result.c_str()];
|
|
969
|
+
}
|
|
970
|
+
|
|
971
|
+
- (NSDictionary *)continueConversation:(NSString *)userMessage maxTokens:(int)maxTokens {
|
|
972
|
+
cactus::conversation_result result = llama->continueConversation([userMessage UTF8String], maxTokens);
|
|
973
|
+
llama->is_predicting = false;
|
|
974
|
+
|
|
975
|
+
return @{
|
|
976
|
+
@"text": [NSString stringWithUTF8String:result.text.c_str()],
|
|
977
|
+
@"time_to_first_token": @(result.time_to_first_token.count()),
|
|
978
|
+
@"total_time": @(result.total_time.count()),
|
|
979
|
+
@"tokens_generated": @(result.tokens_generated)
|
|
980
|
+
};
|
|
981
|
+
}
|
|
982
|
+
|
|
983
|
+
- (void)clearConversation {
|
|
984
|
+
llama->clearConversation();
|
|
985
|
+
}
|
|
986
|
+
|
|
987
|
+
- (BOOL)isConversationActive {
|
|
988
|
+
return llama->isConversationActive();
|
|
989
|
+
}
|
|
990
|
+
|
|
964
991
|
- (void)invalidate {
|
|
965
992
|
delete llama;
|
|
966
993
|
// llama_backend_free();
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
|
|
4
4
|
#include <sstream>
|
|
5
5
|
#include <iostream>
|
|
6
|
+
#include <chrono>
|
|
6
7
|
#include "chat.h"
|
|
7
8
|
#include "common.h"
|
|
8
9
|
#include "ggml.h"
|
|
@@ -48,6 +49,13 @@ struct completion_token_output
|
|
|
48
49
|
llama_token tok;
|
|
49
50
|
};
|
|
50
51
|
|
|
52
|
+
struct conversation_result {
|
|
53
|
+
std::string text;
|
|
54
|
+
std::chrono::milliseconds time_to_first_token;
|
|
55
|
+
std::chrono::milliseconds total_time;
|
|
56
|
+
int tokens_generated;
|
|
57
|
+
};
|
|
58
|
+
|
|
51
59
|
struct cactus_tokenize_result {
|
|
52
60
|
std::vector<llama_token> tokens;
|
|
53
61
|
bool has_media = false;
|
|
@@ -112,6 +120,10 @@ struct cactus_context {
|
|
|
112
120
|
bool has_vocoder = false;
|
|
113
121
|
std::vector<llama_token> audio_tokens;
|
|
114
122
|
|
|
123
|
+
// Conversation management state
|
|
124
|
+
bool conversation_active = false;
|
|
125
|
+
std::string last_chat_template = "";
|
|
126
|
+
|
|
115
127
|
~cactus_context();
|
|
116
128
|
|
|
117
129
|
void rewind();
|
|
@@ -180,6 +192,12 @@ struct cactus_context {
|
|
|
180
192
|
std::vector<llama_token> getAudioCompletionGuideTokens(const std::string &text_to_speak);
|
|
181
193
|
std::vector<float> decodeAudioTokens(const std::vector<llama_token> &tokens);
|
|
182
194
|
void releaseVocoder();
|
|
195
|
+
|
|
196
|
+
// High-level conversation management API
|
|
197
|
+
std::string generateResponse(const std::string &user_message, int max_tokens = 200);
|
|
198
|
+
conversation_result continueConversation(const std::string &user_message, int max_tokens = 200);
|
|
199
|
+
void clearConversation();
|
|
200
|
+
bool isConversationActive() const;
|
|
183
201
|
};
|
|
184
202
|
|
|
185
203
|
extern bool cactus_verbose;
|
|
@@ -127,6 +127,15 @@ CACTUS_FFI_EXPORT int cactus_completion_c(
|
|
|
127
127
|
cactus_completion_result_c_t* result
|
|
128
128
|
);
|
|
129
129
|
|
|
130
|
+
// **MULTIMODAL COMPLETION**
|
|
131
|
+
CACTUS_FFI_EXPORT int cactus_multimodal_completion_c(
|
|
132
|
+
cactus_context_handle_t handle,
|
|
133
|
+
const cactus_completion_params_c_t* params,
|
|
134
|
+
const char** media_paths,
|
|
135
|
+
int media_count,
|
|
136
|
+
cactus_completion_result_c_t* result
|
|
137
|
+
);
|
|
138
|
+
|
|
130
139
|
CACTUS_FFI_EXPORT void cactus_stop_completion_c(cactus_context_handle_t handle);
|
|
131
140
|
|
|
132
141
|
CACTUS_FFI_EXPORT cactus_token_array_c_t cactus_tokenize_c(cactus_context_handle_t handle, const char* text);
|
|
@@ -207,10 +216,39 @@ CACTUS_FFI_EXPORT cactus_lora_adapters_c_t cactus_get_loaded_lora_adapters_c(cac
|
|
|
207
216
|
CACTUS_FFI_EXPORT bool cactus_validate_chat_template_c(cactus_context_handle_t handle, bool use_jinja, const char* name);
|
|
208
217
|
CACTUS_FFI_EXPORT char* cactus_get_formatted_chat_c(cactus_context_handle_t handle, const char* messages, const char* chat_template);
|
|
209
218
|
|
|
219
|
+
// **ADVANCED: Chat with Jinja and Tools Support**
|
|
220
|
+
typedef struct {
|
|
221
|
+
char* prompt;
|
|
222
|
+
char* json_schema;
|
|
223
|
+
char* tools;
|
|
224
|
+
char* tool_choice;
|
|
225
|
+
bool parallel_tool_calls;
|
|
226
|
+
} cactus_chat_result_c_t;
|
|
227
|
+
|
|
228
|
+
CACTUS_FFI_EXPORT cactus_chat_result_c_t cactus_get_formatted_chat_with_jinja_c(
|
|
229
|
+
cactus_context_handle_t handle,
|
|
230
|
+
const char* messages,
|
|
231
|
+
const char* chat_template,
|
|
232
|
+
const char* json_schema,
|
|
233
|
+
const char* tools,
|
|
234
|
+
bool parallel_tool_calls,
|
|
235
|
+
const char* tool_choice
|
|
236
|
+
);
|
|
237
|
+
|
|
210
238
|
// **HIGH PRIORITY: Context Management**
|
|
211
239
|
CACTUS_FFI_EXPORT void cactus_rewind_c(cactus_context_handle_t handle);
|
|
212
240
|
CACTUS_FFI_EXPORT bool cactus_init_sampling_c(cactus_context_handle_t handle);
|
|
213
241
|
|
|
242
|
+
// **COMPLETION CONTROL**
|
|
243
|
+
CACTUS_FFI_EXPORT void cactus_begin_completion_c(cactus_context_handle_t handle);
|
|
244
|
+
CACTUS_FFI_EXPORT void cactus_end_completion_c(cactus_context_handle_t handle);
|
|
245
|
+
CACTUS_FFI_EXPORT void cactus_load_prompt_c(cactus_context_handle_t handle);
|
|
246
|
+
CACTUS_FFI_EXPORT void cactus_load_prompt_with_media_c(cactus_context_handle_t handle, const char** media_paths, int media_count);
|
|
247
|
+
|
|
248
|
+
// **TOKEN PROCESSING**
|
|
249
|
+
CACTUS_FFI_EXPORT int cactus_do_completion_step_c(cactus_context_handle_t handle, char** token_text);
|
|
250
|
+
CACTUS_FFI_EXPORT size_t cactus_find_stopping_strings_c(cactus_context_handle_t handle, const char* text, size_t last_token_size, int stop_type);
|
|
251
|
+
|
|
214
252
|
// **HIGH PRIORITY: Model Information**
|
|
215
253
|
CACTUS_FFI_EXPORT int32_t cactus_get_n_ctx_c(cactus_context_handle_t handle);
|
|
216
254
|
CACTUS_FFI_EXPORT int32_t cactus_get_n_embd_c(cactus_context_handle_t handle);
|
|
@@ -221,6 +259,7 @@ CACTUS_FFI_EXPORT int64_t cactus_get_model_params_c(cactus_context_handle_t hand
|
|
|
221
259
|
// Memory management functions
|
|
222
260
|
CACTUS_FFI_EXPORT void cactus_free_bench_result_members_c(cactus_bench_result_c_t* result);
|
|
223
261
|
CACTUS_FFI_EXPORT void cactus_free_lora_adapters_c(cactus_lora_adapters_c_t* adapters);
|
|
262
|
+
CACTUS_FFI_EXPORT void cactus_free_chat_result_members_c(cactus_chat_result_c_t* result);
|
|
224
263
|
|
|
225
264
|
#ifdef __cplusplus
|
|
226
265
|
}
|
|
Binary file
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
|
|
4
4
|
#include <sstream>
|
|
5
5
|
#include <iostream>
|
|
6
|
+
#include <chrono>
|
|
6
7
|
#include "chat.h"
|
|
7
8
|
#include "common.h"
|
|
8
9
|
#include "ggml.h"
|
|
@@ -48,6 +49,13 @@ struct completion_token_output
|
|
|
48
49
|
llama_token tok;
|
|
49
50
|
};
|
|
50
51
|
|
|
52
|
+
struct conversation_result {
|
|
53
|
+
std::string text;
|
|
54
|
+
std::chrono::milliseconds time_to_first_token;
|
|
55
|
+
std::chrono::milliseconds total_time;
|
|
56
|
+
int tokens_generated;
|
|
57
|
+
};
|
|
58
|
+
|
|
51
59
|
struct cactus_tokenize_result {
|
|
52
60
|
std::vector<llama_token> tokens;
|
|
53
61
|
bool has_media = false;
|
|
@@ -112,6 +120,10 @@ struct cactus_context {
|
|
|
112
120
|
bool has_vocoder = false;
|
|
113
121
|
std::vector<llama_token> audio_tokens;
|
|
114
122
|
|
|
123
|
+
// Conversation management state
|
|
124
|
+
bool conversation_active = false;
|
|
125
|
+
std::string last_chat_template = "";
|
|
126
|
+
|
|
115
127
|
~cactus_context();
|
|
116
128
|
|
|
117
129
|
void rewind();
|
|
@@ -180,6 +192,12 @@ struct cactus_context {
|
|
|
180
192
|
std::vector<llama_token> getAudioCompletionGuideTokens(const std::string &text_to_speak);
|
|
181
193
|
std::vector<float> decodeAudioTokens(const std::vector<llama_token> &tokens);
|
|
182
194
|
void releaseVocoder();
|
|
195
|
+
|
|
196
|
+
// High-level conversation management API
|
|
197
|
+
std::string generateResponse(const std::string &user_message, int max_tokens = 200);
|
|
198
|
+
conversation_result continueConversation(const std::string &user_message, int max_tokens = 200);
|
|
199
|
+
void clearConversation();
|
|
200
|
+
bool isConversationActive() const;
|
|
183
201
|
};
|
|
184
202
|
|
|
185
203
|
extern bool cactus_verbose;
|
package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/cactus_ffi.h
CHANGED
|
@@ -127,6 +127,15 @@ CACTUS_FFI_EXPORT int cactus_completion_c(
|
|
|
127
127
|
cactus_completion_result_c_t* result
|
|
128
128
|
);
|
|
129
129
|
|
|
130
|
+
// **MULTIMODAL COMPLETION**
|
|
131
|
+
CACTUS_FFI_EXPORT int cactus_multimodal_completion_c(
|
|
132
|
+
cactus_context_handle_t handle,
|
|
133
|
+
const cactus_completion_params_c_t* params,
|
|
134
|
+
const char** media_paths,
|
|
135
|
+
int media_count,
|
|
136
|
+
cactus_completion_result_c_t* result
|
|
137
|
+
);
|
|
138
|
+
|
|
130
139
|
CACTUS_FFI_EXPORT void cactus_stop_completion_c(cactus_context_handle_t handle);
|
|
131
140
|
|
|
132
141
|
CACTUS_FFI_EXPORT cactus_token_array_c_t cactus_tokenize_c(cactus_context_handle_t handle, const char* text);
|
|
@@ -207,10 +216,39 @@ CACTUS_FFI_EXPORT cactus_lora_adapters_c_t cactus_get_loaded_lora_adapters_c(cac
|
|
|
207
216
|
CACTUS_FFI_EXPORT bool cactus_validate_chat_template_c(cactus_context_handle_t handle, bool use_jinja, const char* name);
|
|
208
217
|
CACTUS_FFI_EXPORT char* cactus_get_formatted_chat_c(cactus_context_handle_t handle, const char* messages, const char* chat_template);
|
|
209
218
|
|
|
219
|
+
// **ADVANCED: Chat with Jinja and Tools Support**
|
|
220
|
+
typedef struct {
|
|
221
|
+
char* prompt;
|
|
222
|
+
char* json_schema;
|
|
223
|
+
char* tools;
|
|
224
|
+
char* tool_choice;
|
|
225
|
+
bool parallel_tool_calls;
|
|
226
|
+
} cactus_chat_result_c_t;
|
|
227
|
+
|
|
228
|
+
CACTUS_FFI_EXPORT cactus_chat_result_c_t cactus_get_formatted_chat_with_jinja_c(
|
|
229
|
+
cactus_context_handle_t handle,
|
|
230
|
+
const char* messages,
|
|
231
|
+
const char* chat_template,
|
|
232
|
+
const char* json_schema,
|
|
233
|
+
const char* tools,
|
|
234
|
+
bool parallel_tool_calls,
|
|
235
|
+
const char* tool_choice
|
|
236
|
+
);
|
|
237
|
+
|
|
210
238
|
// **HIGH PRIORITY: Context Management**
|
|
211
239
|
CACTUS_FFI_EXPORT void cactus_rewind_c(cactus_context_handle_t handle);
|
|
212
240
|
CACTUS_FFI_EXPORT bool cactus_init_sampling_c(cactus_context_handle_t handle);
|
|
213
241
|
|
|
242
|
+
// **COMPLETION CONTROL**
|
|
243
|
+
CACTUS_FFI_EXPORT void cactus_begin_completion_c(cactus_context_handle_t handle);
|
|
244
|
+
CACTUS_FFI_EXPORT void cactus_end_completion_c(cactus_context_handle_t handle);
|
|
245
|
+
CACTUS_FFI_EXPORT void cactus_load_prompt_c(cactus_context_handle_t handle);
|
|
246
|
+
CACTUS_FFI_EXPORT void cactus_load_prompt_with_media_c(cactus_context_handle_t handle, const char** media_paths, int media_count);
|
|
247
|
+
|
|
248
|
+
// **TOKEN PROCESSING**
|
|
249
|
+
CACTUS_FFI_EXPORT int cactus_do_completion_step_c(cactus_context_handle_t handle, char** token_text);
|
|
250
|
+
CACTUS_FFI_EXPORT size_t cactus_find_stopping_strings_c(cactus_context_handle_t handle, const char* text, size_t last_token_size, int stop_type);
|
|
251
|
+
|
|
214
252
|
// **HIGH PRIORITY: Model Information**
|
|
215
253
|
CACTUS_FFI_EXPORT int32_t cactus_get_n_ctx_c(cactus_context_handle_t handle);
|
|
216
254
|
CACTUS_FFI_EXPORT int32_t cactus_get_n_embd_c(cactus_context_handle_t handle);
|
|
@@ -221,6 +259,7 @@ CACTUS_FFI_EXPORT int64_t cactus_get_model_params_c(cactus_context_handle_t hand
|
|
|
221
259
|
// Memory management functions
|
|
222
260
|
CACTUS_FFI_EXPORT void cactus_free_bench_result_members_c(cactus_bench_result_c_t* result);
|
|
223
261
|
CACTUS_FFI_EXPORT void cactus_free_lora_adapters_c(cactus_lora_adapters_c_t* adapters);
|
|
262
|
+
CACTUS_FFI_EXPORT void cactus_free_chat_result_members_c(cactus_chat_result_c_t* result);
|
|
224
263
|
|
|
225
264
|
#ifdef __cplusplus
|
|
226
265
|
}
|
|
Binary file
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
|
|
4
4
|
#include <sstream>
|
|
5
5
|
#include <iostream>
|
|
6
|
+
#include <chrono>
|
|
6
7
|
#include "chat.h"
|
|
7
8
|
#include "common.h"
|
|
8
9
|
#include "ggml.h"
|
|
@@ -48,6 +49,13 @@ struct completion_token_output
|
|
|
48
49
|
llama_token tok;
|
|
49
50
|
};
|
|
50
51
|
|
|
52
|
+
struct conversation_result {
|
|
53
|
+
std::string text;
|
|
54
|
+
std::chrono::milliseconds time_to_first_token;
|
|
55
|
+
std::chrono::milliseconds total_time;
|
|
56
|
+
int tokens_generated;
|
|
57
|
+
};
|
|
58
|
+
|
|
51
59
|
struct cactus_tokenize_result {
|
|
52
60
|
std::vector<llama_token> tokens;
|
|
53
61
|
bool has_media = false;
|
|
@@ -112,6 +120,10 @@ struct cactus_context {
|
|
|
112
120
|
bool has_vocoder = false;
|
|
113
121
|
std::vector<llama_token> audio_tokens;
|
|
114
122
|
|
|
123
|
+
// Conversation management state
|
|
124
|
+
bool conversation_active = false;
|
|
125
|
+
std::string last_chat_template = "";
|
|
126
|
+
|
|
115
127
|
~cactus_context();
|
|
116
128
|
|
|
117
129
|
void rewind();
|
|
@@ -180,6 +192,12 @@ struct cactus_context {
|
|
|
180
192
|
std::vector<llama_token> getAudioCompletionGuideTokens(const std::string &text_to_speak);
|
|
181
193
|
std::vector<float> decodeAudioTokens(const std::vector<llama_token> &tokens);
|
|
182
194
|
void releaseVocoder();
|
|
195
|
+
|
|
196
|
+
// High-level conversation management API
|
|
197
|
+
std::string generateResponse(const std::string &user_message, int max_tokens = 200);
|
|
198
|
+
conversation_result continueConversation(const std::string &user_message, int max_tokens = 200);
|
|
199
|
+
void clearConversation();
|
|
200
|
+
bool isConversationActive() const;
|
|
183
201
|
};
|
|
184
202
|
|
|
185
203
|
extern bool cactus_verbose;
|
|
@@ -127,6 +127,15 @@ CACTUS_FFI_EXPORT int cactus_completion_c(
|
|
|
127
127
|
cactus_completion_result_c_t* result
|
|
128
128
|
);
|
|
129
129
|
|
|
130
|
+
// **MULTIMODAL COMPLETION**
|
|
131
|
+
CACTUS_FFI_EXPORT int cactus_multimodal_completion_c(
|
|
132
|
+
cactus_context_handle_t handle,
|
|
133
|
+
const cactus_completion_params_c_t* params,
|
|
134
|
+
const char** media_paths,
|
|
135
|
+
int media_count,
|
|
136
|
+
cactus_completion_result_c_t* result
|
|
137
|
+
);
|
|
138
|
+
|
|
130
139
|
CACTUS_FFI_EXPORT void cactus_stop_completion_c(cactus_context_handle_t handle);
|
|
131
140
|
|
|
132
141
|
CACTUS_FFI_EXPORT cactus_token_array_c_t cactus_tokenize_c(cactus_context_handle_t handle, const char* text);
|
|
@@ -207,10 +216,39 @@ CACTUS_FFI_EXPORT cactus_lora_adapters_c_t cactus_get_loaded_lora_adapters_c(cac
|
|
|
207
216
|
CACTUS_FFI_EXPORT bool cactus_validate_chat_template_c(cactus_context_handle_t handle, bool use_jinja, const char* name);
|
|
208
217
|
CACTUS_FFI_EXPORT char* cactus_get_formatted_chat_c(cactus_context_handle_t handle, const char* messages, const char* chat_template);
|
|
209
218
|
|
|
219
|
+
// **ADVANCED: Chat with Jinja and Tools Support**
|
|
220
|
+
typedef struct {
|
|
221
|
+
char* prompt;
|
|
222
|
+
char* json_schema;
|
|
223
|
+
char* tools;
|
|
224
|
+
char* tool_choice;
|
|
225
|
+
bool parallel_tool_calls;
|
|
226
|
+
} cactus_chat_result_c_t;
|
|
227
|
+
|
|
228
|
+
CACTUS_FFI_EXPORT cactus_chat_result_c_t cactus_get_formatted_chat_with_jinja_c(
|
|
229
|
+
cactus_context_handle_t handle,
|
|
230
|
+
const char* messages,
|
|
231
|
+
const char* chat_template,
|
|
232
|
+
const char* json_schema,
|
|
233
|
+
const char* tools,
|
|
234
|
+
bool parallel_tool_calls,
|
|
235
|
+
const char* tool_choice
|
|
236
|
+
);
|
|
237
|
+
|
|
210
238
|
// **HIGH PRIORITY: Context Management**
|
|
211
239
|
CACTUS_FFI_EXPORT void cactus_rewind_c(cactus_context_handle_t handle);
|
|
212
240
|
CACTUS_FFI_EXPORT bool cactus_init_sampling_c(cactus_context_handle_t handle);
|
|
213
241
|
|
|
242
|
+
// **COMPLETION CONTROL**
|
|
243
|
+
CACTUS_FFI_EXPORT void cactus_begin_completion_c(cactus_context_handle_t handle);
|
|
244
|
+
CACTUS_FFI_EXPORT void cactus_end_completion_c(cactus_context_handle_t handle);
|
|
245
|
+
CACTUS_FFI_EXPORT void cactus_load_prompt_c(cactus_context_handle_t handle);
|
|
246
|
+
CACTUS_FFI_EXPORT void cactus_load_prompt_with_media_c(cactus_context_handle_t handle, const char** media_paths, int media_count);
|
|
247
|
+
|
|
248
|
+
// **TOKEN PROCESSING**
|
|
249
|
+
CACTUS_FFI_EXPORT int cactus_do_completion_step_c(cactus_context_handle_t handle, char** token_text);
|
|
250
|
+
CACTUS_FFI_EXPORT size_t cactus_find_stopping_strings_c(cactus_context_handle_t handle, const char* text, size_t last_token_size, int stop_type);
|
|
251
|
+
|
|
214
252
|
// **HIGH PRIORITY: Model Information**
|
|
215
253
|
CACTUS_FFI_EXPORT int32_t cactus_get_n_ctx_c(cactus_context_handle_t handle);
|
|
216
254
|
CACTUS_FFI_EXPORT int32_t cactus_get_n_embd_c(cactus_context_handle_t handle);
|
|
@@ -221,6 +259,7 @@ CACTUS_FFI_EXPORT int64_t cactus_get_model_params_c(cactus_context_handle_t hand
|
|
|
221
259
|
// Memory management functions
|
|
222
260
|
CACTUS_FFI_EXPORT void cactus_free_bench_result_members_c(cactus_bench_result_c_t* result);
|
|
223
261
|
CACTUS_FFI_EXPORT void cactus_free_lora_adapters_c(cactus_lora_adapters_c_t* adapters);
|
|
262
|
+
CACTUS_FFI_EXPORT void cactus_free_chat_result_members_c(cactus_chat_result_c_t* result);
|
|
224
263
|
|
|
225
264
|
#ifdef __cplusplus
|
|
226
265
|
}
|
|
Binary file
|
package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/cactus.h
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
|
|
4
4
|
#include <sstream>
|
|
5
5
|
#include <iostream>
|
|
6
|
+
#include <chrono>
|
|
6
7
|
#include "chat.h"
|
|
7
8
|
#include "common.h"
|
|
8
9
|
#include "ggml.h"
|
|
@@ -48,6 +49,13 @@ struct completion_token_output
|
|
|
48
49
|
llama_token tok;
|
|
49
50
|
};
|
|
50
51
|
|
|
52
|
+
struct conversation_result {
|
|
53
|
+
std::string text;
|
|
54
|
+
std::chrono::milliseconds time_to_first_token;
|
|
55
|
+
std::chrono::milliseconds total_time;
|
|
56
|
+
int tokens_generated;
|
|
57
|
+
};
|
|
58
|
+
|
|
51
59
|
struct cactus_tokenize_result {
|
|
52
60
|
std::vector<llama_token> tokens;
|
|
53
61
|
bool has_media = false;
|
|
@@ -112,6 +120,10 @@ struct cactus_context {
|
|
|
112
120
|
bool has_vocoder = false;
|
|
113
121
|
std::vector<llama_token> audio_tokens;
|
|
114
122
|
|
|
123
|
+
// Conversation management state
|
|
124
|
+
bool conversation_active = false;
|
|
125
|
+
std::string last_chat_template = "";
|
|
126
|
+
|
|
115
127
|
~cactus_context();
|
|
116
128
|
|
|
117
129
|
void rewind();
|
|
@@ -180,6 +192,12 @@ struct cactus_context {
|
|
|
180
192
|
std::vector<llama_token> getAudioCompletionGuideTokens(const std::string &text_to_speak);
|
|
181
193
|
std::vector<float> decodeAudioTokens(const std::vector<llama_token> &tokens);
|
|
182
194
|
void releaseVocoder();
|
|
195
|
+
|
|
196
|
+
// High-level conversation management API
|
|
197
|
+
std::string generateResponse(const std::string &user_message, int max_tokens = 200);
|
|
198
|
+
conversation_result continueConversation(const std::string &user_message, int max_tokens = 200);
|
|
199
|
+
void clearConversation();
|
|
200
|
+
bool isConversationActive() const;
|
|
183
201
|
};
|
|
184
202
|
|
|
185
203
|
extern bool cactus_verbose;
|
package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/cactus_ffi.h
CHANGED
|
@@ -127,6 +127,15 @@ CACTUS_FFI_EXPORT int cactus_completion_c(
|
|
|
127
127
|
cactus_completion_result_c_t* result
|
|
128
128
|
);
|
|
129
129
|
|
|
130
|
+
// **MULTIMODAL COMPLETION**
|
|
131
|
+
CACTUS_FFI_EXPORT int cactus_multimodal_completion_c(
|
|
132
|
+
cactus_context_handle_t handle,
|
|
133
|
+
const cactus_completion_params_c_t* params,
|
|
134
|
+
const char** media_paths,
|
|
135
|
+
int media_count,
|
|
136
|
+
cactus_completion_result_c_t* result
|
|
137
|
+
);
|
|
138
|
+
|
|
130
139
|
CACTUS_FFI_EXPORT void cactus_stop_completion_c(cactus_context_handle_t handle);
|
|
131
140
|
|
|
132
141
|
CACTUS_FFI_EXPORT cactus_token_array_c_t cactus_tokenize_c(cactus_context_handle_t handle, const char* text);
|
|
@@ -207,10 +216,39 @@ CACTUS_FFI_EXPORT cactus_lora_adapters_c_t cactus_get_loaded_lora_adapters_c(cac
|
|
|
207
216
|
CACTUS_FFI_EXPORT bool cactus_validate_chat_template_c(cactus_context_handle_t handle, bool use_jinja, const char* name);
|
|
208
217
|
CACTUS_FFI_EXPORT char* cactus_get_formatted_chat_c(cactus_context_handle_t handle, const char* messages, const char* chat_template);
|
|
209
218
|
|
|
219
|
+
// **ADVANCED: Chat with Jinja and Tools Support**
|
|
220
|
+
typedef struct {
|
|
221
|
+
char* prompt;
|
|
222
|
+
char* json_schema;
|
|
223
|
+
char* tools;
|
|
224
|
+
char* tool_choice;
|
|
225
|
+
bool parallel_tool_calls;
|
|
226
|
+
} cactus_chat_result_c_t;
|
|
227
|
+
|
|
228
|
+
CACTUS_FFI_EXPORT cactus_chat_result_c_t cactus_get_formatted_chat_with_jinja_c(
|
|
229
|
+
cactus_context_handle_t handle,
|
|
230
|
+
const char* messages,
|
|
231
|
+
const char* chat_template,
|
|
232
|
+
const char* json_schema,
|
|
233
|
+
const char* tools,
|
|
234
|
+
bool parallel_tool_calls,
|
|
235
|
+
const char* tool_choice
|
|
236
|
+
);
|
|
237
|
+
|
|
210
238
|
// **HIGH PRIORITY: Context Management**
|
|
211
239
|
CACTUS_FFI_EXPORT void cactus_rewind_c(cactus_context_handle_t handle);
|
|
212
240
|
CACTUS_FFI_EXPORT bool cactus_init_sampling_c(cactus_context_handle_t handle);
|
|
213
241
|
|
|
242
|
+
// **COMPLETION CONTROL**
|
|
243
|
+
CACTUS_FFI_EXPORT void cactus_begin_completion_c(cactus_context_handle_t handle);
|
|
244
|
+
CACTUS_FFI_EXPORT void cactus_end_completion_c(cactus_context_handle_t handle);
|
|
245
|
+
CACTUS_FFI_EXPORT void cactus_load_prompt_c(cactus_context_handle_t handle);
|
|
246
|
+
CACTUS_FFI_EXPORT void cactus_load_prompt_with_media_c(cactus_context_handle_t handle, const char** media_paths, int media_count);
|
|
247
|
+
|
|
248
|
+
// **TOKEN PROCESSING**
|
|
249
|
+
CACTUS_FFI_EXPORT int cactus_do_completion_step_c(cactus_context_handle_t handle, char** token_text);
|
|
250
|
+
CACTUS_FFI_EXPORT size_t cactus_find_stopping_strings_c(cactus_context_handle_t handle, const char* text, size_t last_token_size, int stop_type);
|
|
251
|
+
|
|
214
252
|
// **HIGH PRIORITY: Model Information**
|
|
215
253
|
CACTUS_FFI_EXPORT int32_t cactus_get_n_ctx_c(cactus_context_handle_t handle);
|
|
216
254
|
CACTUS_FFI_EXPORT int32_t cactus_get_n_embd_c(cactus_context_handle_t handle);
|
|
@@ -221,6 +259,7 @@ CACTUS_FFI_EXPORT int64_t cactus_get_model_params_c(cactus_context_handle_t hand
|
|
|
221
259
|
// Memory management functions
|
|
222
260
|
CACTUS_FFI_EXPORT void cactus_free_bench_result_members_c(cactus_bench_result_c_t* result);
|
|
223
261
|
CACTUS_FFI_EXPORT void cactus_free_lora_adapters_c(cactus_lora_adapters_c_t* adapters);
|
|
262
|
+
CACTUS_FFI_EXPORT void cactus_free_chat_result_members_c(cactus_chat_result_c_t* result);
|
|
224
263
|
|
|
225
264
|
#ifdef __cplusplus
|
|
226
265
|
}
|
|
Binary file
|
|
@@ -6,5 +6,6 @@ Object.defineProperty(exports, "__esModule", {
|
|
|
6
6
|
exports.default = void 0;
|
|
7
7
|
var _reactNative = require("react-native");
|
|
8
8
|
// New TTS/Audio types
|
|
9
|
+
// New conversation management types
|
|
9
10
|
var _default = exports.default = _reactNative.TurboModuleRegistry.get('Cactus');
|
|
10
11
|
//# sourceMappingURL=NativeCactus.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"names":["_reactNative","require","_default","exports","default","TurboModuleRegistry","get"],"sourceRoot":"../../src","sources":["NativeCactus.ts"],"mappings":";;;;;;AACA,IAAAA,YAAA,GAAAC,OAAA;AAkSA;AAAA,IAAAC,QAAA,GAAAC,OAAA,CAAAC,OAAA,
|
|
1
|
+
{"version":3,"names":["_reactNative","require","_default","exports","default","TurboModuleRegistry","get"],"sourceRoot":"../../src","sources":["NativeCactus.ts"],"mappings":";;;;;;AACA,IAAAA,YAAA,GAAAC,OAAA;AAkSA;AAiBA;AAAA,IAAAC,QAAA,GAAAC,OAAA,CAAAC,OAAA,GAgMeC,gCAAmB,CAACC,GAAG,CAAO,QAAQ,CAAC","ignoreList":[]}
|
package/lib/commonjs/index.js
CHANGED
|
@@ -216,6 +216,35 @@ class LlamaContext {
|
|
|
216
216
|
stopCompletion() {
|
|
217
217
|
return _NativeCactus.default.stopCompletion(this.id);
|
|
218
218
|
}
|
|
219
|
+
|
|
220
|
+
// New conversation management methods
|
|
221
|
+
/**
|
|
222
|
+
* Generate a response to a user message (simple text return)
|
|
223
|
+
*/
|
|
224
|
+
async generateResponse(userMessage, maxTokens = 200) {
|
|
225
|
+
return _NativeCactus.default.generateResponse(this.id, userMessage, maxTokens);
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
/**
|
|
229
|
+
* Continue conversation with detailed timing information
|
|
230
|
+
*/
|
|
231
|
+
async continueConversation(userMessage, maxTokens = 200) {
|
|
232
|
+
return _NativeCactus.default.continueConversation(this.id, userMessage, maxTokens);
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
/**
|
|
236
|
+
* Clear conversation history and reset KV cache
|
|
237
|
+
*/
|
|
238
|
+
async clearConversation() {
|
|
239
|
+
return _NativeCactus.default.clearConversation(this.id);
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
/**
|
|
243
|
+
* Check if conversation is currently active
|
|
244
|
+
*/
|
|
245
|
+
async isConversationActive() {
|
|
246
|
+
return _NativeCactus.default.isConversationActive(this.id);
|
|
247
|
+
}
|
|
219
248
|
tokenize(text) {
|
|
220
249
|
return _NativeCactus.default.tokenize(this.id, text);
|
|
221
250
|
}
|