cactus-react-native 0.0.1 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. package/LICENSE.txt +20 -0
  2. package/README.md +3 -1
  3. package/android/src/main/CMakeLists.txt +58 -23
  4. package/android/src/main/java/com/cactus/Cactus.java +484 -16
  5. package/android/src/main/java/com/cactus/LlamaContext.java +199 -0
  6. package/android/src/main/jni.cpp +325 -10
  7. package/android/src/main/jniLibs/arm64-v8a/libcactus.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/libcactus_v8.so +0 -0
  9. package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2.so +0 -0
  10. package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2_dotprod.so +0 -0
  11. package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2_dotprod_i8mm.so +0 -0
  12. package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2_i8mm.so +0 -0
  13. package/android/src/main/jniLibs/x86_64/libcactus.so +0 -0
  14. package/android/src/main/jniLibs/x86_64/libcactus_x86_64.so +0 -0
  15. package/android/src/newarch/java/com/cactus/CactusModule.java +79 -7
  16. package/android/src/oldarch/java/com/cactus/CactusModule.java +70 -0
  17. package/cactus-react-native.podspec +0 -3
  18. package/ios/CMakeLists.txt +58 -36
  19. package/ios/Cactus.mm +243 -2
  20. package/ios/CactusContext.h +22 -0
  21. package/ios/CactusContext.mm +176 -1
  22. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus.h +92 -5
  23. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_ffi.h +268 -0
  24. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/chat.h +2 -0
  25. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/common.h +42 -51
  26. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-backend.h +4 -4
  27. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-common.h +12 -6
  28. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-cpp.h +1 -1
  29. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-cpu.h +5 -0
  30. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-impl.h +52 -18
  31. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-metal-impl.h +106 -14
  32. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-opt.h +49 -28
  33. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml.h +87 -106
  34. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-arch.h +16 -0
  35. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-batch.h +2 -1
  36. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-chat.h +7 -2
  37. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-context.h +44 -33
  38. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-cparams.h +1 -0
  39. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-graph.h +83 -17
  40. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-hparams.h +44 -2
  41. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-kv-cache.h +407 -179
  42. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-memory.h +13 -2
  43. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-model-loader.h +5 -3
  44. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-model-saver.h +37 -0
  45. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-model.h +24 -2
  46. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-vocab.h +6 -0
  47. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama.h +102 -142
  48. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/minja/chat-template.hpp +23 -11
  49. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/minja/minja.hpp +186 -127
  50. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Info.plist +0 -0
  51. package/ios/cactus.xcframework/ios-arm64/cactus.framework/cactus +0 -0
  52. package/ios/cactus.xcframework/ios-arm64/cactus.framework/ggml-llama.metallib +0 -0
  53. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/cactus.h +92 -5
  54. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/cactus_ffi.h +268 -0
  55. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/chat.h +2 -0
  56. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/common.h +42 -51
  57. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-backend.h +4 -4
  58. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-common.h +12 -6
  59. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpp.h +1 -1
  60. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpu.h +5 -0
  61. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-impl.h +52 -18
  62. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-metal-impl.h +106 -14
  63. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-opt.h +49 -28
  64. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml.h +87 -106
  65. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-arch.h +16 -0
  66. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-batch.h +2 -1
  67. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-chat.h +7 -2
  68. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-context.h +44 -33
  69. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-cparams.h +1 -0
  70. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-graph.h +83 -17
  71. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-hparams.h +44 -2
  72. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-kv-cache.h +407 -179
  73. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-memory.h +13 -2
  74. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-model-loader.h +5 -3
  75. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-model-saver.h +37 -0
  76. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-model.h +24 -2
  77. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-vocab.h +6 -0
  78. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama.h +102 -142
  79. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/minja/chat-template.hpp +23 -11
  80. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/minja/minja.hpp +186 -127
  81. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Info.plist +0 -0
  82. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/_CodeSignature/CodeResources +1 -1
  83. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/cactus +0 -0
  84. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/ggml-llama-sim.metallib +0 -0
  85. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/cactus.h +92 -5
  86. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/cactus_ffi.h +268 -0
  87. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/chat.h +2 -0
  88. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/common.h +42 -51
  89. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-backend.h +4 -4
  90. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-common.h +12 -6
  91. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-cpp.h +1 -1
  92. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-cpu.h +5 -0
  93. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-impl.h +52 -18
  94. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-metal-impl.h +106 -14
  95. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-opt.h +49 -28
  96. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml.h +87 -106
  97. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-arch.h +16 -0
  98. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-batch.h +2 -1
  99. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-chat.h +7 -2
  100. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-context.h +44 -33
  101. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-cparams.h +1 -0
  102. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-graph.h +83 -17
  103. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-hparams.h +44 -2
  104. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-kv-cache.h +407 -179
  105. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-memory.h +13 -2
  106. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-model-loader.h +5 -3
  107. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-model-saver.h +37 -0
  108. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-model.h +24 -2
  109. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-vocab.h +6 -0
  110. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama.h +102 -142
  111. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/minja/chat-template.hpp +23 -11
  112. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/minja/minja.hpp +186 -127
  113. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Info.plist +0 -0
  114. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/cactus +0 -0
  115. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/ggml-llama.metallib +0 -0
  116. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/cactus.h +92 -5
  117. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/cactus_ffi.h +268 -0
  118. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/chat.h +2 -0
  119. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/common.h +42 -51
  120. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-backend.h +4 -4
  121. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-common.h +12 -6
  122. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpp.h +1 -1
  123. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpu.h +5 -0
  124. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-impl.h +52 -18
  125. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-metal-impl.h +106 -14
  126. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-opt.h +49 -28
  127. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml.h +87 -106
  128. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-arch.h +16 -0
  129. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-batch.h +2 -1
  130. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-chat.h +7 -2
  131. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-context.h +44 -33
  132. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-cparams.h +1 -0
  133. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-graph.h +83 -17
  134. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-hparams.h +44 -2
  135. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-kv-cache.h +407 -179
  136. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-memory.h +13 -2
  137. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-model-loader.h +5 -3
  138. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-model-saver.h +37 -0
  139. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-model.h +24 -2
  140. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-vocab.h +6 -0
  141. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama.h +102 -142
  142. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/minja/chat-template.hpp +23 -11
  143. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/minja/minja.hpp +186 -127
  144. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Info.plist +0 -0
  145. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/_CodeSignature/CodeResources +1 -1
  146. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/cactus +0 -0
  147. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/ggml-llama-sim.metallib +0 -0
  148. package/lib/commonjs/NativeCactus.js +1 -0
  149. package/lib/commonjs/NativeCactus.js.map +1 -1
  150. package/lib/commonjs/index.js +112 -0
  151. package/lib/commonjs/index.js.map +1 -1
  152. package/lib/commonjs/tools.js +118 -0
  153. package/lib/commonjs/tools.js.map +1 -0
  154. package/lib/module/NativeCactus.js +3 -0
  155. package/lib/module/NativeCactus.js.map +1 -1
  156. package/lib/module/index.js +87 -1
  157. package/lib/module/index.js.map +1 -1
  158. package/lib/module/tools.js +110 -0
  159. package/lib/module/tools.js.map +1 -0
  160. package/lib/typescript/NativeCactus.d.ts +30 -1
  161. package/lib/typescript/NativeCactus.d.ts.map +1 -1
  162. package/lib/typescript/index.d.ts +21 -2
  163. package/lib/typescript/index.d.ts.map +1 -1
  164. package/lib/typescript/tools.d.ts +38 -0
  165. package/lib/typescript/tools.d.ts.map +1 -0
  166. package/package.json +6 -3
  167. package/src/NativeCactus.ts +62 -1
  168. package/src/index.ts +113 -2
  169. package/src/tools.ts +127 -0
  170. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-cpu-aarch64.h +0 -8
  171. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-cpu-impl.h +0 -531
  172. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-cpu-quants.h +0 -63
  173. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-cpu-traits.h +0 -38
  174. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/sgemm.h +0 -14
  175. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpu-aarch64.h +0 -8
  176. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpu-impl.h +0 -531
  177. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpu-quants.h +0 -63
  178. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpu-traits.h +0 -38
  179. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/sgemm.h +0 -14
  180. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-cpu-aarch64.h +0 -8
  181. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-cpu-impl.h +0 -531
  182. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-cpu-quants.h +0 -63
  183. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-cpu-traits.h +0 -38
  184. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/sgemm.h +0 -14
  185. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpu-aarch64.h +0 -8
  186. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpu-impl.h +0 -531
  187. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpu-quants.h +0 -63
  188. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpu-traits.h +0 -38
  189. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/sgemm.h +0 -14
@@ -82,7 +82,7 @@
82
82
  BOOL isAsset = [params[@"is_model_asset"] boolValue];
83
83
  NSString *path = modelPath;
84
84
  if (isAsset) path = [[NSBundle mainBundle] pathForResource:modelPath ofType:nil];
85
- defaultParams.model = [path UTF8String];
85
+ defaultParams.model.path = [path UTF8String];
86
86
 
87
87
  NSString *chatTemplate = params[@"chat_template"];
88
88
  if (chatTemplate) {
@@ -700,6 +700,25 @@
700
700
  return result;
701
701
  }
702
702
 
703
+ - (NSArray *)tokenize:(NSString *)text withMediaPaths:(NSArray *)mediaPaths {
704
+ std::vector<std::string> media_paths_vector;
705
+ if (mediaPaths) {
706
+ for (NSString *mediaPath in mediaPaths) {
707
+ media_paths_vector.push_back([mediaPath UTF8String]);
708
+ }
709
+ }
710
+
711
+ cactus::cactus_tokenize_result tokenize_result = llama->tokenize([text UTF8String], media_paths_vector);
712
+
713
+ // Return just the tokens array to match the method signature
714
+ NSMutableArray *tokens = [[NSMutableArray alloc] init];
715
+ for (const auto &tok : tokenize_result.tokens) {
716
+ [tokens addObject:@(tok)];
717
+ }
718
+
719
+ return tokens;
720
+ }
721
+
703
722
  - (NSString *)detokenize:(NSArray *)tokens {
704
723
  std::vector<llama_token> toks;
705
724
  for (NSNumber *tok in tokens) {
@@ -827,9 +846,165 @@
827
846
  return result;
828
847
  }
829
848
 
849
+ // New Multimodal Methods
850
+ - (BOOL)initMultimodal:(NSString *)mmprojPath useGpu:(BOOL)useGpu {
851
+ return llama->initMultimodal([mmprojPath UTF8String], useGpu);
852
+ }
853
+
854
+ - (BOOL)isMultimodalEnabled {
855
+ return llama->isMultimodalEnabled();
856
+ }
857
+
858
+ - (BOOL)isMultimodalSupportVision {
859
+ return llama->isMultimodalSupportVision();
860
+ }
861
+
862
+ - (BOOL)isMultimodalSupportAudio {
863
+ return llama->isMultimodalSupportAudio();
864
+ }
865
+
866
+ - (void)releaseMultimodal {
867
+ llama->releaseMultimodal();
868
+ }
869
+
870
+ - (NSDictionary *)multimodalCompletion:(NSString *)prompt
871
+ withMediaPaths:(NSArray *)mediaPaths
872
+ params:(NSDictionary *)params
873
+ onToken:(void (^)(NSMutableDictionary *tokenResult))onToken {
874
+
875
+ if (!llama->isMultimodalEnabled()) {
876
+ @throw [NSException exceptionWithName:@"LlamaException" reason:@"Multimodal is not enabled" userInfo:nil];
877
+ }
878
+
879
+ llama->rewind();
880
+
881
+ llama->params.prompt = [prompt UTF8String];
882
+ llama->params.sampling.seed = params[@"seed"] ? [params[@"seed"] intValue] : -1;
883
+
884
+ // Set all completion parameters (similar to existing completion method)
885
+ if (params[@"n_threads"]) {
886
+ int nThreads = [params[@"n_threads"] intValue];
887
+ const int maxThreads = (int) [[NSProcessInfo processInfo] processorCount];
888
+ const int defaultNThreads = nThreads == 4 ? 2 : MIN(4, maxThreads);
889
+ llama->params.cpuparams.n_threads = nThreads > 0 ? nThreads : defaultNThreads;
890
+ }
891
+ if (params[@"n_predict"]) llama->params.n_predict = [params[@"n_predict"] intValue];
892
+ if (params[@"ignore_eos"]) llama->params.sampling.ignore_eos = [params[@"ignore_eos"] boolValue];
893
+
894
+ auto & sparams = llama->params.sampling;
895
+ if (params[@"temperature"]) sparams.temp = [params[@"temperature"] doubleValue];
896
+ if (params[@"top_k"]) sparams.top_k = [params[@"top_k"] intValue];
897
+ if (params[@"top_p"]) sparams.top_p = [params[@"top_p"] doubleValue];
898
+
899
+ // Convert media paths
900
+ std::vector<std::string> media_paths_vector;
901
+ if (mediaPaths) {
902
+ for (NSString *mediaPath in mediaPaths) {
903
+ media_paths_vector.push_back([mediaPath UTF8String]);
904
+ }
905
+ }
906
+
907
+ if (!llama->initSampling()) {
908
+ @throw [NSException exceptionWithName:@"LlamaException" reason:@"Failed to initialize sampling" userInfo:nil];
909
+ }
910
+
911
+ llama->beginCompletion();
912
+
913
+ @try {
914
+ llama->loadPrompt(media_paths_vector);
915
+ } @catch (NSException *exception) {
916
+ @throw [NSException exceptionWithName:@"LlamaException" reason:exception.reason userInfo:nil];
917
+ }
918
+
919
+ size_t sent_count = 0;
920
+ while (llama->has_next_token && !llama->is_interrupted) {
921
+ const cactus::completion_token_output token_with_probs = llama->doCompletion();
922
+ if (token_with_probs.tok == -1 || llama->incomplete) {
923
+ continue;
924
+ }
925
+
926
+ const std::string token_text = common_token_to_piece(llama->ctx, token_with_probs.tok);
927
+ size_t pos = std::min(sent_count, llama->generated_text.size());
928
+ const std::string str_test = llama->generated_text.substr(pos);
929
+
930
+ size_t stop_pos = llama->findStoppingStrings(str_test, token_text.size(), cactus::STOP_FULL);
931
+
932
+ if (stop_pos == std::string::npos || (!llama->has_next_token && stop_pos > 0)) {
933
+ const std::string to_send = llama->generated_text.substr(pos, std::string::npos);
934
+ sent_count += to_send.size();
935
+
936
+ if (onToken) {
937
+ NSMutableDictionary *tokenResult = [[NSMutableDictionary alloc] init];
938
+ tokenResult[@"token"] = [NSString stringWithUTF8String:to_send.c_str()];
939
+ onToken(tokenResult);
940
+ }
941
+ }
942
+ }
943
+
944
+ llama->is_predicting = false;
945
+
946
+ NSMutableDictionary *result = [[NSMutableDictionary alloc] init];
947
+ result[@"text"] = [NSString stringWithUTF8String:llama->generated_text.c_str()];
948
+ result[@"tokens_predicted"] = @(llama->num_tokens_predicted);
949
+ result[@"tokens_evaluated"] = @(llama->num_prompt_tokens);
950
+ result[@"truncated"] = @(llama->truncated);
951
+ result[@"stopped_eos"] = @(llama->stopped_eos);
952
+ result[@"stopped_word"] = @(llama->stopped_word);
953
+ result[@"stopped_limit"] = @(llama->stopped_limit);
954
+ result[@"stopping_word"] = [NSString stringWithUTF8String:llama->stopping_word.c_str()];
955
+ result[@"tokens_cached"] = @(llama->n_past);
956
+
957
+ return result;
958
+ }
959
+
960
+ - (void)releaseVocoder {
961
+ llama->releaseVocoder();
962
+ }
963
+
830
964
  - (void)invalidate {
831
965
  delete llama;
832
966
  // llama_backend_free();
833
967
  }
834
968
 
969
+ // New TTS/Vocoder Methods
970
+ - (BOOL)initVocoder:(NSString *)vocoderModelPath {
971
+ return llama->initVocoder([vocoderModelPath UTF8String]);
972
+ }
973
+
974
+ - (BOOL)isVocoderEnabled {
975
+ return llama->isVocoderEnabled();
976
+ }
977
+
978
+ - (int)getTTSType {
979
+ return static_cast<int>(llama->getTTSType());
980
+ }
981
+
982
+ - (NSString *)getFormattedAudioCompletion:(NSString *)speakerJsonStr textToSpeak:(NSString *)textToSpeak {
983
+ std::string result = llama->getFormattedAudioCompletion([speakerJsonStr UTF8String], [textToSpeak UTF8String]);
984
+ return [NSString stringWithUTF8String:result.c_str()];
985
+ }
986
+
987
+ - (NSArray *)getAudioCompletionGuideTokens:(NSString *)textToSpeak {
988
+ std::vector<llama_token> tokens = llama->getAudioCompletionGuideTokens([textToSpeak UTF8String]);
989
+ NSMutableArray *result = [[NSMutableArray alloc] init];
990
+ for (const auto &token : tokens) {
991
+ [result addObject:@(token)];
992
+ }
993
+ return result;
994
+ }
995
+
996
+ - (NSArray *)decodeAudioTokens:(NSArray *)tokens {
997
+ std::vector<llama_token> token_vector;
998
+ for (NSNumber *token in tokens) {
999
+ token_vector.push_back([token intValue]);
1000
+ }
1001
+
1002
+ std::vector<float> audio_data = llama->decodeAudioTokens(token_vector);
1003
+ NSMutableArray *result = [[NSMutableArray alloc] init];
1004
+ for (const auto &sample : audio_data) {
1005
+ [result addObject:@(sample)];
1006
+ }
1007
+ return result;
1008
+ }
1009
+
835
1010
  @end
@@ -14,6 +14,8 @@
14
14
  #include <android/log.h>
15
15
  #endif
16
16
 
17
+ struct mtmd_context;
18
+
17
19
  namespace cactus {
18
20
 
19
21
  std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token);
@@ -28,7 +30,12 @@ enum stop_type
28
30
  STOP_PARTIAL,
29
31
  };
30
32
 
31
- // completion token output with probabilities
33
+ enum tts_type {
34
+ TTS_UNKNOWN = -1,
35
+ TTS_OUTETTS_V0_2 = 1,
36
+ TTS_OUTETTS_V0_3 = 2,
37
+ };
38
+
32
39
  struct completion_token_output
33
40
  {
34
41
  struct token_prob
@@ -41,7 +48,14 @@ struct completion_token_output
41
48
  llama_token tok;
42
49
  };
43
50
 
44
- // Main context class
51
+ struct cactus_tokenize_result {
52
+ std::vector<llama_token> tokens;
53
+ bool has_media = false;
54
+ std::vector<std::string> bitmap_hashes;
55
+ std::vector<size_t> chunk_pos;
56
+ std::vector<size_t> chunk_pos_media;
57
+ };
58
+
45
59
  struct cactus_context {
46
60
  bool is_predicting = false;
47
61
  bool is_interrupted = false;
@@ -77,12 +91,37 @@ struct cactus_context {
77
91
 
78
92
  std::vector<common_adapter_lora_info> lora;
79
93
 
94
+ bool context_full = false;
95
+ std::vector<llama_token> guide_tokens;
96
+ bool next_token_uses_guide_token = true;
97
+
98
+ struct cactus_context_mtmd {
99
+ mtmd_context* mtmd_ctx = nullptr;
100
+ };
101
+ cactus_context_mtmd *mtmd_wrapper = nullptr;
102
+ bool has_multimodal = false;
103
+ std::vector<std::string> mtmd_bitmap_past_hashes;
104
+
105
+ struct cactus_context_vocoder {
106
+ common_init_result init_result;
107
+ llama_model *model = nullptr;
108
+ llama_context *ctx = nullptr;
109
+ tts_type type = TTS_UNKNOWN;
110
+ };
111
+ cactus_context_vocoder *vocoder_wrapper = nullptr;
112
+ bool has_vocoder = false;
113
+ std::vector<llama_token> audio_tokens;
114
+
80
115
  ~cactus_context();
81
116
 
82
117
  void rewind();
118
+
83
119
  bool initSampling();
120
+
84
121
  bool loadModel(common_params &params_);
122
+
85
123
  bool validateModelChatTemplate(bool use_jinja, const char *name) const;
124
+
86
125
  common_chat_params getFormattedChatWithJinja(
87
126
  const std::string &messages,
88
127
  const std::string &chat_template,
@@ -91,24 +130,58 @@ struct cactus_context {
91
130
  const bool &parallel_tool_calls,
92
131
  const std::string &tool_choice
93
132
  ) const;
133
+
94
134
  std::string getFormattedChat(
95
135
  const std::string &messages,
96
136
  const std::string &chat_template
97
137
  ) const;
138
+
98
139
  void truncatePrompt(std::vector<llama_token> &prompt_tokens);
140
+
99
141
  void loadPrompt();
142
+
143
+ void loadPrompt(const std::vector<std::string> &media_paths);
144
+
145
+ void setGuideTokens(const std::vector<llama_token> &tokens);
146
+
100
147
  void beginCompletion();
148
+
149
+ void endCompletion();
150
+
101
151
  completion_token_output nextToken();
152
+
102
153
  size_t findStoppingStrings(const std::string &text, const size_t last_token_size, const stop_type type);
154
+
103
155
  completion_token_output doCompletion();
156
+
104
157
  std::vector<float> getEmbedding(common_params &embd_params);
158
+
105
159
  std::string bench(int pp, int tg, int pl, int nr);
160
+
106
161
  int applyLoraAdapters(std::vector<common_adapter_lora_info> lora);
162
+
107
163
  void removeLoraAdapters();
164
+
108
165
  std::vector<common_adapter_lora_info> getLoadedLoraAdapters();
109
- };\
110
166
 
111
- // Logging macros
167
+ cactus_tokenize_result tokenize(const std::string &text, const std::vector<std::string> &media_paths);
168
+
169
+ bool initMultimodal(const std::string &mmproj_path, bool use_gpu);
170
+ bool isMultimodalEnabled() const;
171
+ bool isMultimodalSupportVision() const;
172
+ bool isMultimodalSupportAudio() const;
173
+ void releaseMultimodal();
174
+ void processMedia(const std::string &prompt, const std::vector<std::string> &media_paths);
175
+
176
+ bool initVocoder(const std::string &vocoder_model_path);
177
+ bool isVocoderEnabled() const;
178
+ tts_type getTTSType() const;
179
+ std::string getFormattedAudioCompletion(const std::string &speaker_json_str, const std::string &text_to_speak);
180
+ std::vector<llama_token> getAudioCompletionGuideTokens(const std::string &text_to_speak);
181
+ std::vector<float> decodeAudioTokens(const std::vector<llama_token> &tokens);
182
+ void releaseVocoder();
183
+ };
184
+
112
185
  extern bool cactus_verbose;
113
186
 
114
187
  #if CACTUS_VERBOSE != 1
@@ -125,9 +198,23 @@ extern bool cactus_verbose;
125
198
  #endif
126
199
 
127
200
  #define LOG_ERROR(MSG, ...) log("ERROR", __func__, __LINE__, MSG, ##__VA_ARGS__)
201
+
128
202
  #define LOG_WARNING(MSG, ...) log("WARNING", __func__, __LINE__, MSG, ##__VA_ARGS__)
203
+
129
204
  #define LOG_INFO(MSG, ...) log("INFO", __func__, __LINE__, MSG, ##__VA_ARGS__)
130
205
 
206
+ void log(const char *level, const char *function, int line, const char *format, ...);
207
+
208
+ void llama_batch_clear(llama_batch *batch);
209
+
210
+ void llama_batch_add(llama_batch *batch, llama_token id, llama_pos pos, const std::vector<llama_seq_id>& seq_ids, bool logits);
211
+
212
+ size_t common_part(const std::vector<llama_token> &a, const std::vector<llama_token> &b);
213
+
214
+ bool ends_with(const std::string &str, const std::string &suffix);
215
+
216
+ size_t find_partial_stop_string(const std::string &stop, const std::string &text);
217
+
131
218
  } // namespace cactus
132
219
 
133
- #endif /* CACTUS_H */
220
+ #endif /* CACTUS_H */
@@ -0,0 +1,268 @@
1
+ #ifndef CACTUS_FFI_H
2
+ #define CACTUS_FFI_H
3
+
4
+ #include <stdint.h>
5
+ #include <stdbool.h>
6
+ #include <stddef.h>
7
+
8
+ #if defined _WIN32 || defined __CYGWIN__
9
+ #ifdef CACTUS_FFI_BUILDING_DLL
10
+ #ifdef __GNUC__
11
+ #define CACTUS_FFI_EXPORT __attribute__ ((dllexport))
12
+ #else
13
+ #define CACTUS_FFI_EXPORT __declspec(dllexport)
14
+ #endif
15
+ #else
16
+ #ifdef __GNUC__
17
+ #define CACTUS_FFI_EXPORT __attribute__ ((dllimport))
18
+ #else
19
+ #define CACTUS_FFI_EXPORT __declspec(dllimport)
20
+ #endif
21
+ #endif
22
+ #define CACTUS_FFI_LOCAL
23
+ #else
24
+ #if __GNUC__ >= 4
25
+ #define CACTUS_FFI_EXPORT __attribute__ ((visibility ("default")))
26
+ #define CACTUS_FFI_LOCAL __attribute__ ((visibility ("hidden")))
27
+ #else
28
+ #define CACTUS_FFI_EXPORT
29
+ #define CACTUS_FFI_LOCAL
30
+ #endif
31
+ #endif
32
+
33
+ #ifdef __cplusplus
34
+ extern "C" {
35
+ #endif
36
+
37
+ typedef struct cactus_context_opaque* cactus_context_handle_t;
38
+
39
+
40
+ typedef struct cactus_init_params_c {
41
+ const char* model_path;
42
+ const char* chat_template;
43
+
44
+ int32_t n_ctx;
45
+ int32_t n_batch;
46
+ int32_t n_ubatch;
47
+ int32_t n_gpu_layers;
48
+ int32_t n_threads;
49
+ bool use_mmap;
50
+ bool use_mlock;
51
+ bool embedding;
52
+ int32_t pooling_type;
53
+ int32_t embd_normalize;
54
+ bool flash_attn;
55
+ const char* cache_type_k;
56
+ const char* cache_type_v;
57
+ void (*progress_callback)(float progress);
58
+
59
+ } cactus_init_params_c_t;
60
+
61
+ typedef struct cactus_completion_params_c {
62
+ const char* prompt;
63
+ int32_t n_predict;
64
+ int32_t n_threads;
65
+ int32_t seed;
66
+ double temperature;
67
+ int32_t top_k;
68
+ double top_p;
69
+ double min_p;
70
+ double typical_p;
71
+ int32_t penalty_last_n;
72
+ double penalty_repeat;
73
+ double penalty_freq;
74
+ double penalty_present;
75
+ int32_t mirostat;
76
+ double mirostat_tau;
77
+ double mirostat_eta;
78
+ bool ignore_eos;
79
+ int32_t n_probs;
80
+ const char** stop_sequences;
81
+ int stop_sequence_count;
82
+ const char* grammar;
83
+ bool (*token_callback)(const char* token_json);
84
+
85
+ } cactus_completion_params_c_t;
86
+
87
+
88
+ typedef struct cactus_token_array_c {
89
+ int32_t* tokens;
90
+ int32_t count;
91
+ } cactus_token_array_c_t;
92
+
93
+ typedef struct cactus_float_array_c {
94
+ float* values;
95
+ int32_t count;
96
+ } cactus_float_array_c_t;
97
+
98
+ typedef struct cactus_completion_result_c {
99
+ char* text;
100
+ int32_t tokens_predicted;
101
+ int32_t tokens_evaluated;
102
+ bool truncated;
103
+ bool stopped_eos;
104
+ bool stopped_word;
105
+ bool stopped_limit;
106
+ char* stopping_word;
107
+ } cactus_completion_result_c_t;
108
+
109
+ typedef struct cactus_tokenize_result_c {
110
+ cactus_token_array_c_t tokens;
111
+ bool has_media;
112
+ char** bitmap_hashes;
113
+ int bitmap_hash_count;
114
+ size_t* chunk_positions;
115
+ int chunk_position_count;
116
+ size_t* chunk_positions_media;
117
+ int chunk_position_media_count;
118
+ } cactus_tokenize_result_c_t;
119
+
120
+ CACTUS_FFI_EXPORT cactus_context_handle_t cactus_init_context_c(const cactus_init_params_c_t* params);
121
+
122
+ CACTUS_FFI_EXPORT void cactus_free_context_c(cactus_context_handle_t handle);
123
+
124
+ CACTUS_FFI_EXPORT int cactus_completion_c(
125
+ cactus_context_handle_t handle,
126
+ const cactus_completion_params_c_t* params,
127
+ cactus_completion_result_c_t* result
128
+ );
129
+
130
+ // **MULTIMODAL COMPLETION**
131
+ CACTUS_FFI_EXPORT int cactus_multimodal_completion_c(
132
+ cactus_context_handle_t handle,
133
+ const cactus_completion_params_c_t* params,
134
+ const char** media_paths,
135
+ int media_count,
136
+ cactus_completion_result_c_t* result
137
+ );
138
+
139
+ CACTUS_FFI_EXPORT void cactus_stop_completion_c(cactus_context_handle_t handle);
140
+
141
+ CACTUS_FFI_EXPORT cactus_token_array_c_t cactus_tokenize_c(cactus_context_handle_t handle, const char* text);
142
+
143
+ CACTUS_FFI_EXPORT char* cactus_detokenize_c(cactus_context_handle_t handle, const int32_t* tokens, int32_t count);
144
+
145
+ CACTUS_FFI_EXPORT cactus_float_array_c_t cactus_embedding_c(cactus_context_handle_t handle, const char* text);
146
+
147
+ CACTUS_FFI_EXPORT void cactus_free_string_c(char* str);
148
+
149
+ CACTUS_FFI_EXPORT void cactus_free_token_array_c(cactus_token_array_c_t arr);
150
+
151
+ CACTUS_FFI_EXPORT void cactus_free_float_array_c(cactus_float_array_c_t arr);
152
+
153
+ CACTUS_FFI_EXPORT void cactus_free_completion_result_members_c(cactus_completion_result_c_t* result);
154
+
155
+ CACTUS_FFI_EXPORT cactus_tokenize_result_c_t cactus_tokenize_with_media_c(cactus_context_handle_t handle, const char* text, const char** media_paths, int media_count);
156
+
157
+ CACTUS_FFI_EXPORT void cactus_free_tokenize_result_c(cactus_tokenize_result_c_t* result);
158
+
159
+ CACTUS_FFI_EXPORT void cactus_set_guide_tokens_c(cactus_context_handle_t handle, const int32_t* tokens, int32_t count);
160
+
161
+ CACTUS_FFI_EXPORT int cactus_init_multimodal_c(cactus_context_handle_t handle, const char* mmproj_path, bool use_gpu);
162
+
163
+ CACTUS_FFI_EXPORT bool cactus_is_multimodal_enabled_c(cactus_context_handle_t handle);
164
+
165
+ CACTUS_FFI_EXPORT bool cactus_supports_vision_c(cactus_context_handle_t handle);
166
+
167
+ CACTUS_FFI_EXPORT bool cactus_supports_audio_c(cactus_context_handle_t handle);
168
+
169
+ CACTUS_FFI_EXPORT void cactus_release_multimodal_c(cactus_context_handle_t handle);
170
+
171
+ CACTUS_FFI_EXPORT int cactus_init_vocoder_c(cactus_context_handle_t handle, const char* vocoder_model_path);
172
+
173
+ CACTUS_FFI_EXPORT bool cactus_is_vocoder_enabled_c(cactus_context_handle_t handle);
174
+
175
+ CACTUS_FFI_EXPORT int cactus_get_tts_type_c(cactus_context_handle_t handle);
176
+
177
+ CACTUS_FFI_EXPORT char* cactus_get_formatted_audio_completion_c(cactus_context_handle_t handle, const char* speaker_json_str, const char* text_to_speak);
178
+
179
+ CACTUS_FFI_EXPORT cactus_token_array_c_t cactus_get_audio_guide_tokens_c(cactus_context_handle_t handle, const char* text_to_speak);
180
+
181
+ CACTUS_FFI_EXPORT cactus_float_array_c_t cactus_decode_audio_tokens_c(cactus_context_handle_t handle, const int32_t* tokens, int32_t count);
182
+
183
+ CACTUS_FFI_EXPORT void cactus_release_vocoder_c(cactus_context_handle_t handle);
184
+
185
+ // **HIGH PRIORITY ADDITIONS**
186
+
187
+ typedef struct {
188
+ const char* path;
189
+ float scale;
190
+ } cactus_lora_adapter_c_t;
191
+
192
+ typedef struct {
193
+ cactus_lora_adapter_c_t* adapters;
194
+ int32_t count;
195
+ } cactus_lora_adapters_c_t;
196
+
197
+ typedef struct {
198
+ char* model_name;
199
+ int64_t model_size;
200
+ int64_t model_params;
201
+ double pp_avg;
202
+ double pp_std;
203
+ double tg_avg;
204
+ double tg_std;
205
+ } cactus_bench_result_c_t;
206
+
207
+ // **HIGH PRIORITY: Benchmarking**
208
+ CACTUS_FFI_EXPORT cactus_bench_result_c_t cactus_bench_c(cactus_context_handle_t handle, int pp, int tg, int pl, int nr);
209
+
210
+ // **HIGH PRIORITY: LoRA Adapter Support**
211
+ CACTUS_FFI_EXPORT int cactus_apply_lora_adapters_c(cactus_context_handle_t handle, const cactus_lora_adapters_c_t* adapters);
212
+ CACTUS_FFI_EXPORT void cactus_remove_lora_adapters_c(cactus_context_handle_t handle);
213
+ CACTUS_FFI_EXPORT cactus_lora_adapters_c_t cactus_get_loaded_lora_adapters_c(cactus_context_handle_t handle);
214
+
215
+ // **HIGH PRIORITY: Chat Template Support**
216
+ CACTUS_FFI_EXPORT bool cactus_validate_chat_template_c(cactus_context_handle_t handle, bool use_jinja, const char* name);
217
+ CACTUS_FFI_EXPORT char* cactus_get_formatted_chat_c(cactus_context_handle_t handle, const char* messages, const char* chat_template);
218
+
219
+ // **ADVANCED: Chat with Jinja and Tools Support**
220
+ typedef struct {
221
+ char* prompt;
222
+ char* json_schema;
223
+ char* tools;
224
+ char* tool_choice;
225
+ bool parallel_tool_calls;
226
+ } cactus_chat_result_c_t;
227
+
228
+ CACTUS_FFI_EXPORT cactus_chat_result_c_t cactus_get_formatted_chat_with_jinja_c(
229
+ cactus_context_handle_t handle,
230
+ const char* messages,
231
+ const char* chat_template,
232
+ const char* json_schema,
233
+ const char* tools,
234
+ bool parallel_tool_calls,
235
+ const char* tool_choice
236
+ );
237
+
238
+ // **HIGH PRIORITY: Context Management**
239
+ CACTUS_FFI_EXPORT void cactus_rewind_c(cactus_context_handle_t handle);
240
+ CACTUS_FFI_EXPORT bool cactus_init_sampling_c(cactus_context_handle_t handle);
241
+
242
+ // **COMPLETION CONTROL**
243
+ CACTUS_FFI_EXPORT void cactus_begin_completion_c(cactus_context_handle_t handle);
244
+ CACTUS_FFI_EXPORT void cactus_end_completion_c(cactus_context_handle_t handle);
245
+ CACTUS_FFI_EXPORT void cactus_load_prompt_c(cactus_context_handle_t handle);
246
+ CACTUS_FFI_EXPORT void cactus_load_prompt_with_media_c(cactus_context_handle_t handle, const char** media_paths, int media_count);
247
+
248
+ // **TOKEN PROCESSING**
249
+ CACTUS_FFI_EXPORT int cactus_do_completion_step_c(cactus_context_handle_t handle, char** token_text);
250
+ CACTUS_FFI_EXPORT size_t cactus_find_stopping_strings_c(cactus_context_handle_t handle, const char* text, size_t last_token_size, int stop_type);
251
+
252
+ // **HIGH PRIORITY: Model Information**
253
+ CACTUS_FFI_EXPORT int32_t cactus_get_n_ctx_c(cactus_context_handle_t handle);
254
+ CACTUS_FFI_EXPORT int32_t cactus_get_n_embd_c(cactus_context_handle_t handle);
255
+ CACTUS_FFI_EXPORT char* cactus_get_model_desc_c(cactus_context_handle_t handle);
256
+ CACTUS_FFI_EXPORT int64_t cactus_get_model_size_c(cactus_context_handle_t handle);
257
+ CACTUS_FFI_EXPORT int64_t cactus_get_model_params_c(cactus_context_handle_t handle);
258
+
259
+ // Memory management functions
260
+ CACTUS_FFI_EXPORT void cactus_free_bench_result_members_c(cactus_bench_result_c_t* result);
261
+ CACTUS_FFI_EXPORT void cactus_free_lora_adapters_c(cactus_lora_adapters_c_t* adapters);
262
+ CACTUS_FFI_EXPORT void cactus_free_chat_result_members_c(cactus_chat_result_c_t* result);
263
+
264
+ #ifdef __cplusplus
265
+ }
266
+ #endif
267
+
268
+ #endif
@@ -3,6 +3,7 @@
3
3
  #pragma once
4
4
 
5
5
  #include "common.h"
6
+ #include <chrono>
6
7
  #include <string>
7
8
  #include <vector>
8
9
  #include "minja/chat-template.hpp"
@@ -79,6 +80,7 @@ struct common_chat_templates_inputs {
79
80
  common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
80
81
  bool parallel_tool_calls = false;
81
82
  bool extract_reasoning = true;
83
+ std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
82
84
  };
83
85
 
84
86
  struct common_chat_params {