cactus-react-native 1.2.1 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (238) hide show
  1. package/README.md +765 -33
  2. package/android/CMakeLists.txt +4 -3
  3. package/android/src/main/java/com/margelo/nitro/cactus/HybridCactusFileSystem.kt +20 -1
  4. package/android/src/main/jniLibs/arm64-v8a/libcactus.a +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/libcactus_util.a +0 -0
  6. package/cpp/HybridCactus.cpp +231 -19
  7. package/cpp/HybridCactus.hpp +25 -3
  8. package/cpp/HybridCactusIndex.cpp +325 -0
  9. package/cpp/HybridCactusIndex.hpp +43 -0
  10. package/cpp/HybridCactusUtil.cpp +3 -3
  11. package/cpp/HybridCactusUtil.hpp +2 -1
  12. package/cpp/cactus_ffi.h +107 -2
  13. package/cpp/cactus_util.h +1 -1
  14. package/ios/HybridCactusFileSystem.swift +23 -2
  15. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus.h +2 -0
  16. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_ffi.h +107 -2
  17. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_telemetry.h +656 -0
  18. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/{ffi_utils.h → cactus_utils.h} +145 -18
  19. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/engine.h +135 -7
  20. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/gemma_tools.h +549 -0
  21. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/graph.h +193 -26
  22. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel.h +54 -195
  23. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel_utils.h +399 -140
  24. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Info.plist +0 -0
  25. package/ios/cactus.xcframework/ios-arm64/cactus.framework/cactus +0 -0
  26. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus.h +2 -0
  27. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_ffi.h +107 -2
  28. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_telemetry.h +656 -0
  29. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/{ffi_utils.h → cactus_utils.h} +145 -18
  30. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/engine.h +135 -7
  31. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/gemma_tools.h +549 -0
  32. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/graph.h +193 -26
  33. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/kernel.h +54 -195
  34. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/kernel_utils.h +399 -140
  35. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Info.plist +0 -0
  36. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/_CodeSignature/CodeResources +1 -1
  37. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/cactus +0 -0
  38. package/ios/cactus_util.xcframework/Info.plist +4 -4
  39. package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Headers/cactus_util.h +1 -1
  40. package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Headers/database.h +27 -0
  41. package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Info.plist +0 -0
  42. package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/cactus_util +0 -0
  43. package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Headers/cactus_util.h +1 -1
  44. package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Headers/database.h +27 -0
  45. package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Info.plist +0 -0
  46. package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/_CodeSignature/CodeResources +3 -3
  47. package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/cactus_util +0 -0
  48. package/lib/module/api/Database.js +12 -95
  49. package/lib/module/api/Database.js.map +1 -1
  50. package/lib/module/classes/CactusIndex.js +45 -0
  51. package/lib/module/classes/CactusIndex.js.map +1 -0
  52. package/lib/module/classes/CactusLM.js +65 -17
  53. package/lib/module/classes/CactusLM.js.map +1 -1
  54. package/lib/module/classes/CactusSTT.js +104 -17
  55. package/lib/module/classes/CactusSTT.js.map +1 -1
  56. package/lib/module/config/CactusConfig.js +2 -0
  57. package/lib/module/config/CactusConfig.js.map +1 -1
  58. package/lib/module/constants/packageVersion.js +1 -1
  59. package/lib/module/hooks/useCactusIndex.js +175 -0
  60. package/lib/module/hooks/useCactusIndex.js.map +1 -0
  61. package/lib/module/hooks/useCactusLM.js +68 -7
  62. package/lib/module/hooks/useCactusLM.js.map +1 -1
  63. package/lib/module/hooks/useCactusSTT.js +102 -6
  64. package/lib/module/hooks/useCactusSTT.js.map +1 -1
  65. package/lib/module/index.js +2 -0
  66. package/lib/module/index.js.map +1 -1
  67. package/lib/module/models.js +336 -0
  68. package/lib/module/models.js.map +1 -0
  69. package/lib/module/native/Cactus.js +61 -13
  70. package/lib/module/native/Cactus.js.map +1 -1
  71. package/lib/module/native/CactusFileSystem.js +3 -0
  72. package/lib/module/native/CactusFileSystem.js.map +1 -1
  73. package/lib/module/native/CactusIndex.js +32 -0
  74. package/lib/module/native/CactusIndex.js.map +1 -0
  75. package/lib/module/native/CactusUtil.js +16 -3
  76. package/lib/module/native/CactusUtil.js.map +1 -1
  77. package/lib/module/native/index.js +1 -0
  78. package/lib/module/native/index.js.map +1 -1
  79. package/lib/module/specs/CactusIndex.nitro.js +4 -0
  80. package/lib/module/specs/CactusIndex.nitro.js.map +1 -0
  81. package/lib/module/telemetry/Telemetry.js +3 -1
  82. package/lib/module/telemetry/Telemetry.js.map +1 -1
  83. package/lib/module/types/CactusIndex.js +2 -0
  84. package/lib/module/types/{CactusModel.js.map → CactusIndex.js.map} +1 -1
  85. package/lib/module/types/CactusLM.js +2 -0
  86. package/lib/module/types/CactusSTT.js +2 -0
  87. package/lib/module/types/common.js +2 -0
  88. package/lib/module/types/{CactusSTTModel.js.map → common.js.map} +1 -1
  89. package/lib/typescript/src/api/Database.d.ts +4 -7
  90. package/lib/typescript/src/api/Database.d.ts.map +1 -1
  91. package/lib/typescript/src/classes/CactusIndex.d.ts +15 -0
  92. package/lib/typescript/src/classes/CactusIndex.d.ts.map +1 -0
  93. package/lib/typescript/src/classes/CactusLM.d.ts +12 -5
  94. package/lib/typescript/src/classes/CactusLM.d.ts.map +1 -1
  95. package/lib/typescript/src/classes/CactusSTT.d.ts +15 -5
  96. package/lib/typescript/src/classes/CactusSTT.d.ts.map +1 -1
  97. package/lib/typescript/src/config/CactusConfig.d.ts +1 -0
  98. package/lib/typescript/src/config/CactusConfig.d.ts.map +1 -1
  99. package/lib/typescript/src/constants/packageVersion.d.ts +1 -1
  100. package/lib/typescript/src/hooks/useCactusIndex.d.ts +14 -0
  101. package/lib/typescript/src/hooks/useCactusIndex.d.ts.map +1 -0
  102. package/lib/typescript/src/hooks/useCactusLM.d.ts +6 -4
  103. package/lib/typescript/src/hooks/useCactusLM.d.ts.map +1 -1
  104. package/lib/typescript/src/hooks/useCactusSTT.d.ts +13 -5
  105. package/lib/typescript/src/hooks/useCactusSTT.d.ts.map +1 -1
  106. package/lib/typescript/src/index.d.ts +6 -4
  107. package/lib/typescript/src/index.d.ts.map +1 -1
  108. package/lib/typescript/src/models.d.ts +6 -0
  109. package/lib/typescript/src/models.d.ts.map +1 -0
  110. package/lib/typescript/src/native/Cactus.d.ts +10 -3
  111. package/lib/typescript/src/native/Cactus.d.ts.map +1 -1
  112. package/lib/typescript/src/native/CactusFileSystem.d.ts +1 -0
  113. package/lib/typescript/src/native/CactusFileSystem.d.ts.map +1 -1
  114. package/lib/typescript/src/native/CactusIndex.d.ts +12 -0
  115. package/lib/typescript/src/native/CactusIndex.d.ts.map +1 -0
  116. package/lib/typescript/src/native/CactusUtil.d.ts.map +1 -1
  117. package/lib/typescript/src/native/index.d.ts +1 -0
  118. package/lib/typescript/src/native/index.d.ts.map +1 -1
  119. package/lib/typescript/src/specs/Cactus.nitro.d.ts +9 -2
  120. package/lib/typescript/src/specs/Cactus.nitro.d.ts.map +1 -1
  121. package/lib/typescript/src/specs/CactusFileSystem.nitro.d.ts +1 -0
  122. package/lib/typescript/src/specs/CactusFileSystem.nitro.d.ts.map +1 -1
  123. package/lib/typescript/src/specs/CactusIndex.nitro.d.ts +24 -0
  124. package/lib/typescript/src/specs/CactusIndex.nitro.d.ts.map +1 -0
  125. package/lib/typescript/src/specs/CactusUtil.nitro.d.ts +1 -1
  126. package/lib/typescript/src/specs/CactusUtil.nitro.d.ts.map +1 -1
  127. package/lib/typescript/src/types/CactusIndex.d.ts +34 -0
  128. package/lib/typescript/src/types/CactusIndex.d.ts.map +1 -0
  129. package/lib/typescript/src/types/CactusLM.d.ts +19 -0
  130. package/lib/typescript/src/types/CactusLM.d.ts.map +1 -1
  131. package/lib/typescript/src/types/CactusSTT.d.ts +21 -1
  132. package/lib/typescript/src/types/CactusSTT.d.ts.map +1 -1
  133. package/lib/typescript/src/types/common.d.ts +28 -0
  134. package/lib/typescript/src/types/common.d.ts.map +1 -0
  135. package/nitro.json +3 -0
  136. package/nitrogen/generated/android/c++/JDeviceInfo.hpp +1 -1
  137. package/nitrogen/generated/android/c++/JFunc_void_double.hpp +1 -1
  138. package/nitrogen/generated/android/c++/JHybridCactusCryptoSpec.cpp +1 -1
  139. package/nitrogen/generated/android/c++/JHybridCactusCryptoSpec.hpp +1 -1
  140. package/nitrogen/generated/android/c++/JHybridCactusDeviceInfoSpec.cpp +1 -1
  141. package/nitrogen/generated/android/c++/JHybridCactusDeviceInfoSpec.hpp +1 -1
  142. package/nitrogen/generated/android/c++/JHybridCactusFileSystemSpec.cpp +17 -1
  143. package/nitrogen/generated/android/c++/JHybridCactusFileSystemSpec.hpp +2 -1
  144. package/nitrogen/generated/android/c++/JHybridCactusImageSpec.cpp +1 -1
  145. package/nitrogen/generated/android/c++/JHybridCactusImageSpec.hpp +1 -1
  146. package/nitrogen/generated/android/cactus+autolinking.cmake +2 -1
  147. package/nitrogen/generated/android/cactus+autolinking.gradle +1 -1
  148. package/nitrogen/generated/android/cactusOnLoad.cpp +11 -1
  149. package/nitrogen/generated/android/cactusOnLoad.hpp +1 -1
  150. package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/DeviceInfo.kt +1 -1
  151. package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/Func_void_double.kt +1 -1
  152. package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/HybridCactusCryptoSpec.kt +1 -1
  153. package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/HybridCactusDeviceInfoSpec.kt +1 -1
  154. package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/HybridCactusFileSystemSpec.kt +5 -1
  155. package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/HybridCactusImageSpec.kt +1 -1
  156. package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/cactusOnLoad.kt +1 -1
  157. package/nitrogen/generated/ios/Cactus+autolinking.rb +1 -1
  158. package/nitrogen/generated/ios/Cactus-Swift-Cxx-Bridge.cpp +1 -1
  159. package/nitrogen/generated/ios/Cactus-Swift-Cxx-Bridge.hpp +1 -1
  160. package/nitrogen/generated/ios/Cactus-Swift-Cxx-Umbrella.hpp +1 -1
  161. package/nitrogen/generated/ios/CactusAutolinking.mm +11 -1
  162. package/nitrogen/generated/ios/CactusAutolinking.swift +1 -1
  163. package/nitrogen/generated/ios/c++/HybridCactusCryptoSpecSwift.cpp +1 -1
  164. package/nitrogen/generated/ios/c++/HybridCactusCryptoSpecSwift.hpp +1 -1
  165. package/nitrogen/generated/ios/c++/HybridCactusDeviceInfoSpecSwift.cpp +1 -1
  166. package/nitrogen/generated/ios/c++/HybridCactusDeviceInfoSpecSwift.hpp +1 -1
  167. package/nitrogen/generated/ios/c++/HybridCactusFileSystemSpecSwift.cpp +1 -1
  168. package/nitrogen/generated/ios/c++/HybridCactusFileSystemSpecSwift.hpp +9 -1
  169. package/nitrogen/generated/ios/c++/HybridCactusImageSpecSwift.cpp +1 -1
  170. package/nitrogen/generated/ios/c++/HybridCactusImageSpecSwift.hpp +1 -1
  171. package/nitrogen/generated/ios/swift/DeviceInfo.swift +1 -1
  172. package/nitrogen/generated/ios/swift/Func_void.swift +1 -1
  173. package/nitrogen/generated/ios/swift/Func_void_DeviceInfo.swift +1 -1
  174. package/nitrogen/generated/ios/swift/Func_void_bool.swift +1 -1
  175. package/nitrogen/generated/ios/swift/Func_void_double.swift +1 -1
  176. package/nitrogen/generated/ios/swift/Func_void_std__exception_ptr.swift +1 -1
  177. package/nitrogen/generated/ios/swift/Func_void_std__optional_std__string_.swift +1 -1
  178. package/nitrogen/generated/ios/swift/Func_void_std__string.swift +1 -1
  179. package/nitrogen/generated/ios/swift/HybridCactusCryptoSpec.swift +1 -1
  180. package/nitrogen/generated/ios/swift/HybridCactusCryptoSpec_cxx.swift +1 -1
  181. package/nitrogen/generated/ios/swift/HybridCactusDeviceInfoSpec.swift +1 -1
  182. package/nitrogen/generated/ios/swift/HybridCactusDeviceInfoSpec_cxx.swift +1 -1
  183. package/nitrogen/generated/ios/swift/HybridCactusFileSystemSpec.swift +2 -1
  184. package/nitrogen/generated/ios/swift/HybridCactusFileSystemSpec_cxx.swift +20 -1
  185. package/nitrogen/generated/ios/swift/HybridCactusImageSpec.swift +1 -1
  186. package/nitrogen/generated/ios/swift/HybridCactusImageSpec_cxx.swift +1 -1
  187. package/nitrogen/generated/shared/c++/CactusIndexGetResult.hpp +84 -0
  188. package/nitrogen/generated/shared/c++/CactusIndexQueryResult.hpp +79 -0
  189. package/nitrogen/generated/shared/c++/DeviceInfo.hpp +1 -1
  190. package/nitrogen/generated/shared/c++/HybridCactusCryptoSpec.cpp +1 -1
  191. package/nitrogen/generated/shared/c++/HybridCactusCryptoSpec.hpp +1 -1
  192. package/nitrogen/generated/shared/c++/HybridCactusDeviceInfoSpec.cpp +1 -1
  193. package/nitrogen/generated/shared/c++/HybridCactusDeviceInfoSpec.hpp +1 -1
  194. package/nitrogen/generated/shared/c++/HybridCactusFileSystemSpec.cpp +2 -1
  195. package/nitrogen/generated/shared/c++/HybridCactusFileSystemSpec.hpp +2 -1
  196. package/nitrogen/generated/shared/c++/HybridCactusImageSpec.cpp +1 -1
  197. package/nitrogen/generated/shared/c++/HybridCactusImageSpec.hpp +1 -1
  198. package/nitrogen/generated/shared/c++/HybridCactusIndexSpec.cpp +27 -0
  199. package/nitrogen/generated/shared/c++/HybridCactusIndexSpec.hpp +76 -0
  200. package/nitrogen/generated/shared/c++/HybridCactusSpec.cpp +8 -1
  201. package/nitrogen/generated/shared/c++/HybridCactusSpec.hpp +11 -3
  202. package/nitrogen/generated/shared/c++/HybridCactusUtilSpec.cpp +1 -1
  203. package/nitrogen/generated/shared/c++/HybridCactusUtilSpec.hpp +2 -2
  204. package/package.json +2 -2
  205. package/src/api/Database.ts +14 -135
  206. package/src/classes/CactusIndex.ts +58 -0
  207. package/src/classes/CactusLM.ts +87 -19
  208. package/src/classes/CactusSTT.ts +134 -20
  209. package/src/config/CactusConfig.ts +3 -0
  210. package/src/constants/packageVersion.ts +1 -1
  211. package/src/hooks/useCactusIndex.ts +195 -0
  212. package/src/hooks/useCactusLM.ts +88 -8
  213. package/src/hooks/useCactusSTT.ts +119 -7
  214. package/src/index.tsx +22 -2
  215. package/src/models.ts +344 -0
  216. package/src/native/Cactus.ts +95 -13
  217. package/src/native/CactusFileSystem.ts +4 -0
  218. package/src/native/CactusIndex.ts +54 -0
  219. package/src/native/CactusUtil.ts +19 -3
  220. package/src/native/index.ts +1 -0
  221. package/src/specs/Cactus.nitro.ts +18 -2
  222. package/src/specs/CactusFileSystem.nitro.ts +2 -0
  223. package/src/specs/CactusIndex.nitro.ts +31 -0
  224. package/src/specs/CactusUtil.nitro.ts +1 -1
  225. package/src/telemetry/Telemetry.ts +1 -1
  226. package/src/types/CactusIndex.ts +40 -0
  227. package/src/types/CactusLM.ts +24 -0
  228. package/src/types/CactusSTT.ts +27 -1
  229. package/src/types/common.ts +28 -0
  230. package/android/src/main/jniLibs/arm64-v8a/libcactus_util.so +0 -0
  231. package/lib/module/types/CactusModel.js +0 -2
  232. package/lib/module/types/CactusSTTModel.js +0 -2
  233. package/lib/typescript/src/types/CactusModel.d.ts +0 -13
  234. package/lib/typescript/src/types/CactusModel.d.ts.map +0 -1
  235. package/lib/typescript/src/types/CactusSTTModel.d.ts +0 -8
  236. package/lib/typescript/src/types/CactusSTTModel.d.ts.map +0 -1
  237. package/src/types/CactusModel.ts +0 -15
  238. package/src/types/CactusSTTModel.ts +0 -10
@@ -1,5 +1,5 @@
1
- #ifndef CACTUS_FFI_UTILS_H
2
- #define CACTUS_FFI_UTILS_H
1
+ #ifndef CACTUS_UTILS_H
2
+ #define CACTUS_UTILS_H
3
3
 
4
4
  #include "../engine/engine.h"
5
5
  #include <string>
@@ -12,16 +12,65 @@
12
12
  #include <iostream>
13
13
  #include <filesystem>
14
14
  #include <cctype>
15
+ #include <memory>
16
+ #include <atomic>
17
+ #include <mutex>
18
+ #include <random>
19
+
20
+ #ifdef __APPLE__
21
+ #include <uuid/uuid.h>
22
+ #endif
23
+
24
+ struct CactusModelHandle {
25
+ std::unique_ptr<cactus::engine::Model> model;
26
+ std::atomic<bool> should_stop;
27
+ std::vector<uint32_t> processed_tokens;
28
+ std::mutex model_mutex;
29
+ std::string model_name;
30
+
31
+ CactusModelHandle() : should_stop(false) {}
32
+ };
33
+
34
+ extern std::string last_error_message;
35
+
36
+ bool matches_stop_sequence(const std::vector<uint32_t>& generated_tokens,
37
+ const std::vector<std::vector<uint32_t>>& stop_sequences);
15
38
 
16
39
  namespace cactus {
17
40
  namespace ffi {
18
41
 
42
+ #ifndef CACTUS_VERSION
43
+ #define CACTUS_VERSION "unknown"
44
+ #endif
45
+
46
+ inline const char* getVersion() {
47
+ return CACTUS_VERSION;
48
+ }
49
+
50
+ inline std::string generateUUID() {
51
+ #ifdef __APPLE__
52
+ uuid_t uuid;
53
+ uuid_generate_random(uuid);
54
+ char uuid_str[37];
55
+ uuid_unparse_lower(uuid, uuid_str);
56
+ return std::string(uuid_str);
57
+ #endif
58
+ }
59
+
19
60
  struct ToolFunction {
20
61
  std::string name;
21
62
  std::string description;
22
63
  std::unordered_map<std::string, std::string> parameters;
23
64
  };
24
65
 
66
+ } // namespace ffi
67
+ } // namespace cactus
68
+
69
+ #include "gemma_tools.h"
70
+
71
+ namespace cactus {
72
+ namespace ffi {
73
+
25
74
  inline void handle_error_response(const std::string& error_message, char* response_buffer, size_t buffer_size) {
26
75
  std::string sanitized_msg = error_message;
27
76
  for (auto& c : sanitized_msg) {
@@ -175,49 +224,58 @@ inline std::vector<ToolFunction> parse_tools_json(const std::string& json) {
175
224
  return tools;
176
225
  }
177
226
 
178
- inline void parse_options_json(const std::string& json,
179
- float& temperature, float& top_p,
227
+ inline void parse_options_json(const std::string& json,
228
+ float& temperature, float& top_p,
180
229
  size_t& top_k, size_t& max_tokens,
181
- std::vector<std::string>& stop_sequences) {
230
+ std::vector<std::string>& stop_sequences,
231
+ bool& force_tools) {
182
232
  temperature = 0.0f;
183
- top_p = 0.0f;
184
- top_k = 0;
185
- max_tokens = 100;
233
+ top_p = 0.0f;
234
+ top_k = 0;
235
+ max_tokens = 100;
236
+ force_tools = false;
186
237
  stop_sequences.clear();
187
-
238
+
188
239
  if (json.empty()) return;
189
-
240
+
190
241
  size_t pos = json.find("\"temperature\"");
191
242
  if (pos != std::string::npos) {
192
243
  pos = json.find(':', pos) + 1;
193
244
  temperature = std::stof(json.substr(pos));
194
245
  }
195
-
246
+
196
247
  pos = json.find("\"top_p\"");
197
248
  if (pos != std::string::npos) {
198
249
  pos = json.find(':', pos) + 1;
199
250
  top_p = std::stof(json.substr(pos));
200
251
  }
201
-
252
+
202
253
  pos = json.find("\"top_k\"");
203
254
  if (pos != std::string::npos) {
204
255
  pos = json.find(':', pos) + 1;
205
256
  top_k = std::stoul(json.substr(pos));
206
257
  }
207
-
258
+
208
259
  pos = json.find("\"max_tokens\"");
209
260
  if (pos != std::string::npos) {
210
261
  pos = json.find(':', pos) + 1;
211
262
  max_tokens = std::stoul(json.substr(pos));
212
263
  }
213
-
264
+
265
+ pos = json.find("\"force_tools\"");
266
+ if (pos != std::string::npos) {
267
+ pos = json.find(':', pos) + 1;
268
+ while (pos < json.length() && std::isspace(json[pos])) pos++;
269
+ force_tools = (json.substr(pos, 4) == "true");
270
+ }
271
+
214
272
  pos = json.find("\"stop_sequences\"");
215
273
  if (pos != std::string::npos) {
216
274
  pos = json.find('[', pos);
217
275
  if (pos != std::string::npos) {
218
276
  size_t end_pos = json.find(']', pos);
219
277
  size_t seq_pos = json.find('"', pos);
220
-
278
+
221
279
  while (seq_pos != std::string::npos && seq_pos < end_pos) {
222
280
  size_t seq_start = seq_pos + 1;
223
281
  size_t seq_end = json.find('"', seq_start);
@@ -234,7 +292,7 @@ inline std::string format_tools_for_prompt(const std::vector<ToolFunction>& tool
234
292
  if (tools.empty()) return "";
235
293
  std::string formatted_tools_json;
236
294
  for (size_t i = 0; i < tools.size(); i++) {
237
- if (i > 0) formatted_tools_json += ",\n";
295
+ if (i > 0) formatted_tools_json += "\n";
238
296
  formatted_tools_json += "{\"type\":\"function\",\"function\":{\"name\":\""
239
297
  + tools[i].name
240
298
  + "\",\"description\":\""
@@ -253,11 +311,43 @@ inline void parse_function_calls_from_response(const std::string& response_text,
253
311
  regular_response = response_text;
254
312
  function_calls.clear();
255
313
 
314
+ gemma::parse_function_calls(regular_response, function_calls);
315
+
316
+ // Parse Qwen-style function calls: <tool_call>{"name": "...", "arguments": {...}}</tool_call>
317
+ const std::string QWEN_TOOL_START = "<tool_call>";
318
+ const std::string QWEN_TOOL_END = "</tool_call>";
319
+ size_t qwen_start_pos = 0;
320
+
321
+ while ((qwen_start_pos = regular_response.find(QWEN_TOOL_START, qwen_start_pos)) != std::string::npos) {
322
+ size_t content_start = qwen_start_pos + QWEN_TOOL_START.length();
323
+ size_t qwen_end_pos = regular_response.find(QWEN_TOOL_END, content_start);
324
+
325
+ if (qwen_end_pos != std::string::npos) {
326
+ std::string json_content = regular_response.substr(content_start, qwen_end_pos - content_start);
327
+
328
+ size_t first = json_content.find_first_not_of(" \t\n\r");
329
+ size_t last = json_content.find_last_not_of(" \t\n\r");
330
+ if (first != std::string::npos && last != std::string::npos) {
331
+ json_content = json_content.substr(first, last - first + 1);
332
+ }
333
+
334
+ if (json_content.size() > 2 && json_content[0] == '{' &&
335
+ json_content.find("\"name\"") != std::string::npos) {
336
+ function_calls.push_back(json_content);
337
+ }
338
+
339
+ regular_response.erase(qwen_start_pos, qwen_end_pos + QWEN_TOOL_END.length() - qwen_start_pos);
340
+ } else {
341
+ break;
342
+ }
343
+ }
344
+
345
+ // Parse LFM2-style function calls: <|tool_call_start|>[name(args)]<|tool_call_end|>
256
346
  const std::string TOOL_CALL_START = "<|tool_call_start|>";
257
347
  const std::string TOOL_CALL_END = "<|tool_call_end|>";
258
348
  size_t tool_start_pos = 0;
259
349
 
260
- while ((tool_start_pos = response_text.find(TOOL_CALL_START, tool_start_pos)) != std::string::npos) {
350
+ while ((tool_start_pos = regular_response.find(TOOL_CALL_START, tool_start_pos)) != std::string::npos) {
261
351
  size_t content_start = tool_start_pos + TOOL_CALL_START.length();
262
352
  size_t tool_end_pos = response_text.find(TOOL_CALL_END, content_start);
263
353
 
@@ -398,4 +488,41 @@ inline std::string construct_response_json(const std::string& regular_response,
398
488
  } // namespace ffi
399
489
  } // namespace cactus
400
490
 
401
- #endif // CACTUS_FFI_UTILS_H
491
+ #ifdef __cplusplus
492
+ extern "C" {
493
+ #endif
494
+
495
+ const char* cactus_get_last_error();
496
+
497
+ __attribute__((weak))
498
+ const char* register_app(const char* encrypted_data);
499
+
500
+ __attribute__((weak))
501
+ const char* get_device_id(const char* current_token);
502
+
503
+ #ifdef __cplusplus
504
+ }
505
+ #endif
506
+
507
+ #ifdef __cplusplus
508
+ extern "C" {
509
+
510
+ __attribute__((weak))
511
+ inline const char* register_app(const char* encrypted_data) {
512
+ (void)encrypted_data;
513
+ static thread_local std::string uuid_storage;
514
+ uuid_storage = cactus::ffi::generateUUID();
515
+ return uuid_storage.c_str();
516
+ }
517
+
518
+ __attribute__((weak))
519
+ inline const char* get_device_id(const char* current_token) {
520
+ (void)current_token;
521
+ static thread_local std::string uuid_storage;
522
+ uuid_storage = cactus::ffi::generateUUID();
523
+ return uuid_storage.c_str();
524
+ }
525
+ }
526
+ #endif
527
+
528
+ #endif // CACTUS_UTILS_H
@@ -3,6 +3,7 @@
3
3
  #include <vector>
4
4
  #include <string>
5
5
  #include <unordered_map>
6
+ #include <unordered_set>
6
7
  #include <memory>
7
8
  #include <cstdint>
8
9
 
@@ -32,6 +33,9 @@ extern "C" {
32
33
  class CactusGraph;
33
34
 
34
35
  namespace cactus {
36
+ namespace npu {
37
+ class NPUPrefill;
38
+ }
35
39
  namespace engine {
36
40
 
37
41
  class Siglip2Preprocessor;
@@ -127,9 +131,12 @@ struct MergeRule {
127
131
  struct ChatMessage {
128
132
  std::string role;
129
133
  std::string content;
134
+ std::string name;
130
135
  std::vector<std::string> images;
131
136
  };
132
137
 
138
+
139
+
133
140
  class Tokenizer {
134
141
  public:
135
142
  virtual ~Tokenizer() = default;
@@ -325,6 +332,8 @@ struct KVCache {
325
332
  struct LayerCache {
326
333
  std::vector<uint8_t> keys;
327
334
  std::vector<uint8_t> values;
335
+ std::vector<float> key_scales;
336
+ std::vector<float> value_scales;
328
337
  };
329
338
 
330
339
  std::vector<LayerCache> layer_caches;
@@ -349,7 +358,12 @@ struct KVCache {
349
358
  void update_from_graph(CactusGraph* gb, const std::vector<size_t>& k_nodes,
350
359
  const std::vector<size_t>& v_nodes, size_t seq_len,
351
360
  size_t num_layers, size_t kv_heads, size_t head_dim);
361
+
362
+ void update_from_npu(size_t layer_idx, const __fp16* k_data, const __fp16* v_data,
363
+ size_t num_tokens, size_t kv_heads, size_t head_dim);
364
+
352
365
  bool is_empty() const { return current_seq_len == 0; }
366
+ bool is_int8() const { return precision == Precision::INT8; }
353
367
  void* get_key_ptr(size_t layer);
354
368
  void* get_value_ptr(size_t layer);
355
369
 
@@ -363,6 +377,100 @@ struct KVCache {
363
377
 
364
378
  CircularView get_key_view(size_t layer);
365
379
  CircularView get_value_view(size_t layer);
380
+
381
+ const int8_t* get_keys_int8(size_t layer) const;
382
+ const int8_t* get_values_int8(size_t layer) const;
383
+ const float* get_key_scales(size_t layer) const;
384
+ const float* get_value_scales(size_t layer) const;
385
+ };
386
+
387
+ class ToolCallConstrainer {
388
+ public:
389
+ enum class State {
390
+ DONE,
391
+
392
+ QWEN_START,
393
+ QWEN_EXPECT_OPEN_BRACE,
394
+ QWEN_EXPECT_NAME_KEY,
395
+ QWEN_EXPECT_NAME_COLON,
396
+ QWEN_EXPECT_NAME_VALUE,
397
+ QWEN_EXPECT_COMMA,
398
+ QWEN_EXPECT_ARGS_KEY,
399
+ QWEN_EXPECT_ARGS_COLON,
400
+ QWEN_IN_ARGUMENTS,
401
+ QWEN_EXPECT_CLOSE_BRACE,
402
+ QWEN_EXPECT_END,
403
+
404
+ LFM_START,
405
+ LFM_EXPECT_BRACKET,
406
+ LFM_IN_FUNC_NAME,
407
+ LFM_EXPECT_PAREN,
408
+ LFM_IN_ARGUMENTS,
409
+ LFM_EXPECT_BRACKET_CLOSE,
410
+ LFM_EXPECT_END,
411
+
412
+ GEMMA_START,
413
+ GEMMA_EXPECT_CALL,
414
+ GEMMA_IN_FUNC_NAME,
415
+ GEMMA_EXPECT_BRACE,
416
+ GEMMA_IN_ARGUMENTS,
417
+ GEMMA_EXPECT_END
418
+ };
419
+
420
+ void init(Config::ModelType model_type,
421
+ const std::vector<std::string>& function_names,
422
+ Tokenizer* tokenizer);
423
+
424
+ const std::unordered_map<uint32_t, float>& get_bias() const { return current_bias_; }
425
+
426
+ void update(uint32_t token_id, const std::string& decoded_text);
427
+
428
+ void reset();
429
+
430
+ bool is_active() const { return active_; }
431
+
432
+ private:
433
+ bool active_ = false;
434
+ State state_ = State::QWEN_START;
435
+ Config::ModelType model_type_ = Config::ModelType::QWEN;
436
+ Tokenizer* tokenizer_ = nullptr;
437
+
438
+ std::vector<std::string> function_names_;
439
+ std::string generated_text_;
440
+ int brace_depth_ = 0;
441
+
442
+ std::unordered_set<uint32_t> qwen_tool_call_start_tokens_;
443
+ std::unordered_set<uint32_t> qwen_tool_call_end_tokens_;
444
+ std::unordered_set<uint32_t> open_brace_tokens_;
445
+ std::unordered_set<uint32_t> close_brace_tokens_;
446
+ std::unordered_set<uint32_t> colon_tokens_;
447
+ std::unordered_set<uint32_t> comma_tokens_;
448
+ std::unordered_set<uint32_t> name_key_tokens_;
449
+ std::unordered_set<uint32_t> args_key_tokens_;
450
+ std::unordered_set<uint32_t> quote_tokens_;
451
+ std::unordered_set<uint32_t> backtick_tokens_;
452
+ std::unordered_set<uint32_t> all_func_name_tokens_;
453
+ std::unordered_map<std::string, std::vector<uint32_t>> func_name_sequences_;
454
+
455
+ std::unordered_set<uint32_t> tool_start_tokens_;
456
+ std::unordered_set<uint32_t> tool_end_tokens_;
457
+ std::unordered_set<uint32_t> bracket_open_tokens_;
458
+ std::unordered_set<uint32_t> bracket_close_tokens_;
459
+ std::unordered_set<uint32_t> paren_open_tokens_;
460
+ std::unordered_set<uint32_t> paren_close_tokens_;
461
+ std::unordered_set<uint32_t> equals_tokens_;
462
+
463
+ std::unordered_set<uint32_t> gemma_call_start_tokens_;
464
+ std::unordered_set<uint32_t> gemma_call_end_tokens_;
465
+ std::unordered_set<uint32_t> gemma_response_start_tokens_;
466
+ std::unordered_set<uint32_t> gemma_call_prefix_tokens_;
467
+ std::unordered_set<uint32_t> escape_tokens_;
468
+
469
+ std::unordered_map<uint32_t, float> current_bias_;
470
+
471
+ void compute_bias();
472
+ void tokenize_grammar_elements();
473
+ void add_tokens_for_string(const std::string& str, std::unordered_set<uint32_t>& token_set);
366
474
  };
367
475
 
368
476
  class Model {
@@ -386,26 +494,40 @@ public:
386
494
  virtual bool init(CactusGraph* external_graph, const std::string& model_folder, size_t context_size,
387
495
  const std::string& system_prompt = "", bool do_warmup = true);
388
496
 
389
- virtual uint32_t generate(const std::vector<uint32_t>& tokens, float temperature = -1.0f, float top_p = -1.0f,
390
- size_t top_k = 0, const std::string& profile_file = "", bool prefill_only = false);
497
+ virtual uint32_t decode(const std::vector<uint32_t>& tokens, float temperature = -1.0f, float top_p = -1.0f,
498
+ size_t top_k = 0, const std::string& profile_file = "");
499
+
500
+ virtual void prefill(const std::vector<uint32_t>& tokens, size_t chunk_size = 256, const std::string& profile_file = "");
391
501
 
392
- virtual uint32_t generate_with_images(const std::vector<uint32_t>& tokens, const std::vector<std::string>& image_paths,
502
+ virtual uint32_t decode_with_images(const std::vector<uint32_t>& tokens, const std::vector<std::string>& image_paths,
393
503
  float temperature = -1.0f, float top_p = -1.0f,
394
504
  size_t top_k = 0, const std::string& profile_file = "");
395
-
396
- virtual uint32_t generate_with_audio(const std::vector<uint32_t>& tokens, const std::vector<float>& mel_bins, float temperature = 0.0f, float top_p = 0.0f,
505
+
506
+ virtual uint32_t decode_with_audio(const std::vector<uint32_t>& tokens, const std::vector<float>& mel_bins, float temperature = 0.0f, float top_p = 0.0f,
397
507
  size_t top_k = 0, const std::string& profile_file = "");
398
508
 
399
- std::vector<float> get_embeddings(const std::vector<uint32_t>& tokens, bool pooled = true, const std::string& profile_file = "");
509
+ std::vector<float> get_embeddings(const std::vector<uint32_t>& tokens, bool pooled = true, bool normalize = false, const std::string& profile_file = "");
400
510
 
401
511
  virtual std::vector<float> get_image_embeddings(const std::string& image_path);
402
512
 
403
513
  virtual std::vector<float> get_audio_embeddings(const std::vector<float>& mel_bins);
404
514
 
405
515
  virtual void reset_cache() { kv_cache_.reset(); }
406
-
516
+
517
+ double score_tokens_window_logprob(const std::vector<uint32_t>& tokens, size_t start, size_t end, size_t context, size_t* tokens_scored);
518
+
519
+
520
+
407
521
  void set_cache_window(size_t window_size, size_t sink_size = 4) { kv_cache_.set_window_size(window_size, sink_size); }
408
522
 
523
+ bool load_npu_prefill(const std::string& model_path);
524
+ bool has_npu_prefill() const;
525
+ size_t get_prefill_chunk_size() const;
526
+
527
+ void set_tool_constraints(const std::vector<std::string>& function_names);
528
+ void clear_tool_constraints();
529
+ void update_tool_constraints(uint32_t token_id);
530
+
409
531
  void* graph_handle_;
410
532
 
411
533
  protected:
@@ -449,6 +571,12 @@ protected:
449
571
  bool init_internal(CactusGraph* gb, const std::string& model_folder, size_t context_size,
450
572
  const std::string& system_prompt, bool do_warmup);
451
573
  bool owns_graph_;
574
+
575
+ std::unique_ptr<npu::NPUPrefill> npu_prefill_;
576
+ void prefill_npu(const std::vector<uint32_t>& tokens);
577
+ virtual std::vector<__fp16> get_token_embeddings(const std::vector<uint32_t>& tokens);
578
+
579
+ ToolCallConstrainer tool_constrainer_;
452
580
  };
453
581
 
454
582
  std::unique_ptr<Model> create_model(const std::string& model_folder);