cactus-react-native 1.4.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. package/Cactus.podspec +1 -1
  2. package/README.md +465 -174
  3. package/android/CMakeLists.txt +24 -5
  4. package/android/src/main/jniLibs/arm64-v8a/libcactus.a +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/libcurl.a +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/libmbedcrypto.a +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/libmbedtls.a +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/libmbedx509.a +0 -0
  9. package/cpp/HybridCactus.cpp +157 -6
  10. package/cpp/HybridCactus.hpp +20 -3
  11. package/cpp/cactus_ffi.h +65 -30
  12. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus.h +0 -1
  13. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_ffi.h +65 -30
  14. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_utils.h +357 -122
  15. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/engine.h +184 -63
  16. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/gemma_tools.h +549 -0
  17. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/graph.h +153 -27
  18. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel.h +90 -178
  19. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel_utils.h +276 -151
  20. package/ios/cactus.xcframework/ios-arm64/cactus.framework/cactus +0 -0
  21. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus.h +0 -1
  22. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_ffi.h +65 -30
  23. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_utils.h +357 -122
  24. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/engine.h +184 -63
  25. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/gemma_tools.h +549 -0
  26. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/graph.h +153 -27
  27. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/kernel.h +90 -178
  28. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/kernel_utils.h +276 -151
  29. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/cactus +0 -0
  30. package/lib/module/classes/CactusLM.js +43 -58
  31. package/lib/module/classes/CactusLM.js.map +1 -1
  32. package/lib/module/classes/CactusSTT.js +64 -38
  33. package/lib/module/classes/CactusSTT.js.map +1 -1
  34. package/lib/module/classes/CactusVAD.js +95 -0
  35. package/lib/module/classes/CactusVAD.js.map +1 -0
  36. package/lib/module/hooks/useCactusLM.js +23 -15
  37. package/lib/module/hooks/useCactusLM.js.map +1 -1
  38. package/lib/module/hooks/useCactusSTT.js +85 -28
  39. package/lib/module/hooks/useCactusSTT.js.map +1 -1
  40. package/lib/module/hooks/useCactusVAD.js +171 -0
  41. package/lib/module/hooks/useCactusVAD.js.map +1 -0
  42. package/lib/module/index.js +2 -3
  43. package/lib/module/index.js.map +1 -1
  44. package/lib/module/modelRegistry.js +52 -0
  45. package/lib/module/modelRegistry.js.map +1 -0
  46. package/lib/module/native/Cactus.js +107 -8
  47. package/lib/module/native/Cactus.js.map +1 -1
  48. package/lib/module/native/CactusIndex.js.map +1 -1
  49. package/lib/module/native/index.js +0 -3
  50. package/lib/module/native/index.js.map +1 -1
  51. package/lib/module/types/CactusLM.js +2 -0
  52. package/lib/module/types/CactusSTT.js +2 -0
  53. package/lib/module/types/CactusVAD.js +4 -0
  54. package/lib/module/types/{CactusModel.js.map → CactusVAD.js.map} +1 -1
  55. package/lib/module/types/common.js +2 -0
  56. package/lib/module/types/{CactusSTTModel.js.map → common.js.map} +1 -1
  57. package/lib/typescript/src/classes/CactusLM.d.ts +8 -6
  58. package/lib/typescript/src/classes/CactusLM.d.ts.map +1 -1
  59. package/lib/typescript/src/classes/CactusSTT.d.ts +11 -6
  60. package/lib/typescript/src/classes/CactusSTT.d.ts.map +1 -1
  61. package/lib/typescript/src/classes/CactusVAD.d.ts +20 -0
  62. package/lib/typescript/src/classes/CactusVAD.d.ts.map +1 -0
  63. package/lib/typescript/src/hooks/useCactusLM.d.ts +3 -3
  64. package/lib/typescript/src/hooks/useCactusLM.d.ts.map +1 -1
  65. package/lib/typescript/src/hooks/useCactusSTT.d.ts +11 -5
  66. package/lib/typescript/src/hooks/useCactusSTT.d.ts.map +1 -1
  67. package/lib/typescript/src/hooks/useCactusVAD.d.ts +15 -0
  68. package/lib/typescript/src/hooks/useCactusVAD.d.ts.map +1 -0
  69. package/lib/typescript/src/index.d.ts +7 -6
  70. package/lib/typescript/src/index.d.ts.map +1 -1
  71. package/lib/typescript/src/modelRegistry.d.ts +5 -0
  72. package/lib/typescript/src/modelRegistry.d.ts.map +1 -0
  73. package/lib/typescript/src/native/Cactus.d.ts +12 -6
  74. package/lib/typescript/src/native/Cactus.d.ts.map +1 -1
  75. package/lib/typescript/src/native/CactusIndex.d.ts +2 -2
  76. package/lib/typescript/src/native/CactusIndex.d.ts.map +1 -1
  77. package/lib/typescript/src/native/index.d.ts +0 -3
  78. package/lib/typescript/src/native/index.d.ts.map +1 -1
  79. package/lib/typescript/src/specs/Cactus.nitro.d.ts +6 -1
  80. package/lib/typescript/src/specs/Cactus.nitro.d.ts.map +1 -1
  81. package/lib/typescript/src/types/CactusIndex.d.ts +2 -2
  82. package/lib/typescript/src/types/CactusIndex.d.ts.map +1 -1
  83. package/lib/typescript/src/types/CactusLM.d.ts +19 -9
  84. package/lib/typescript/src/types/CactusLM.d.ts.map +1 -1
  85. package/lib/typescript/src/types/CactusSTT.d.ts +45 -4
  86. package/lib/typescript/src/types/CactusSTT.d.ts.map +1 -1
  87. package/lib/typescript/src/types/CactusVAD.d.ts +34 -0
  88. package/lib/typescript/src/types/CactusVAD.d.ts.map +1 -0
  89. package/lib/typescript/src/types/common.d.ts +23 -0
  90. package/lib/typescript/src/types/common.d.ts.map +1 -0
  91. package/nitro.json +0 -11
  92. package/nitrogen/generated/android/cactus+autolinking.cmake +0 -5
  93. package/nitrogen/generated/android/cactusOnLoad.cpp +0 -30
  94. package/nitrogen/generated/ios/Cactus-Swift-Cxx-Bridge.cpp +0 -50
  95. package/nitrogen/generated/ios/Cactus-Swift-Cxx-Bridge.hpp +9 -147
  96. package/nitrogen/generated/ios/Cactus-Swift-Cxx-Umbrella.hpp +0 -13
  97. package/nitrogen/generated/ios/CactusAutolinking.mm +0 -26
  98. package/nitrogen/generated/ios/CactusAutolinking.swift +0 -30
  99. package/nitrogen/generated/shared/c++/HybridCactusSpec.cpp +5 -0
  100. package/nitrogen/generated/shared/c++/HybridCactusSpec.hpp +6 -1
  101. package/package.json +3 -3
  102. package/src/classes/CactusLM.ts +59 -74
  103. package/src/classes/CactusSTT.ts +92 -49
  104. package/src/classes/CactusVAD.ts +129 -0
  105. package/src/hooks/useCactusLM.ts +26 -9
  106. package/src/hooks/useCactusSTT.ts +105 -44
  107. package/src/hooks/useCactusVAD.ts +215 -0
  108. package/src/index.tsx +20 -10
  109. package/src/modelRegistry.ts +65 -0
  110. package/src/native/Cactus.ts +130 -14
  111. package/src/native/CactusIndex.ts +2 -2
  112. package/src/native/index.ts +0 -3
  113. package/src/specs/Cactus.nitro.ts +11 -2
  114. package/src/types/CactusIndex.ts +2 -2
  115. package/src/types/CactusLM.ts +20 -9
  116. package/src/types/CactusSTT.ts +50 -4
  117. package/src/types/CactusVAD.ts +39 -0
  118. package/src/types/common.ts +23 -0
  119. package/android/src/main/java/com/margelo/nitro/cactus/HybridCactusCrypto.kt +0 -46
  120. package/android/src/main/java/com/margelo/nitro/cactus/HybridCactusDeviceInfo.kt +0 -27
  121. package/android/src/main/jniLibs/arm64-v8a/libcactus_util.a +0 -0
  122. package/cpp/HybridCactusUtil.cpp +0 -47
  123. package/cpp/HybridCactusUtil.hpp +0 -27
  124. package/cpp/cactus_util.h +0 -25
  125. package/ios/HybridCactusCrypto.swift +0 -37
  126. package/ios/HybridCactusDeviceInfo.swift +0 -32
  127. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_telemetry.h +0 -656
  128. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_telemetry.h +0 -656
  129. package/ios/cactus_util.xcframework/Info.plist +0 -39
  130. package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Headers/cactus_util.h +0 -25
  131. package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Headers/database.h +0 -27
  132. package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Headers/ios_utils.h +0 -10
  133. package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Headers/logging.h +0 -25
  134. package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/Info.plist +0 -0
  135. package/ios/cactus_util.xcframework/ios-arm64/cactus_util.framework/cactus_util +0 -0
  136. package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Headers/cactus_util.h +0 -25
  137. package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Headers/database.h +0 -27
  138. package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Headers/ios_utils.h +0 -10
  139. package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Headers/logging.h +0 -25
  140. package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/Info.plist +0 -0
  141. package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/_CodeSignature/CodeResources +0 -135
  142. package/ios/cactus_util.xcframework/ios-arm64-simulator/cactus_util.framework/cactus_util +0 -0
  143. package/lib/module/api/Database.js +0 -137
  144. package/lib/module/api/Database.js.map +0 -1
  145. package/lib/module/api/RemoteLM.js +0 -201
  146. package/lib/module/api/RemoteLM.js.map +0 -1
  147. package/lib/module/config/CactusConfig.js +0 -12
  148. package/lib/module/config/CactusConfig.js.map +0 -1
  149. package/lib/module/native/CactusCrypto.js +0 -10
  150. package/lib/module/native/CactusCrypto.js.map +0 -1
  151. package/lib/module/native/CactusDeviceInfo.js +0 -13
  152. package/lib/module/native/CactusDeviceInfo.js.map +0 -1
  153. package/lib/module/native/CactusUtil.js +0 -36
  154. package/lib/module/native/CactusUtil.js.map +0 -1
  155. package/lib/module/specs/CactusCrypto.nitro.js +0 -4
  156. package/lib/module/specs/CactusCrypto.nitro.js.map +0 -1
  157. package/lib/module/specs/CactusDeviceInfo.nitro.js +0 -4
  158. package/lib/module/specs/CactusDeviceInfo.nitro.js.map +0 -1
  159. package/lib/module/specs/CactusUtil.nitro.js +0 -4
  160. package/lib/module/specs/CactusUtil.nitro.js.map +0 -1
  161. package/lib/module/telemetry/Telemetry.js +0 -154
  162. package/lib/module/telemetry/Telemetry.js.map +0 -1
  163. package/lib/module/types/CactusModel.js +0 -2
  164. package/lib/module/types/CactusSTTModel.js +0 -2
  165. package/lib/typescript/src/api/Database.d.ts +0 -18
  166. package/lib/typescript/src/api/Database.d.ts.map +0 -1
  167. package/lib/typescript/src/api/RemoteLM.d.ts +0 -14
  168. package/lib/typescript/src/api/RemoteLM.d.ts.map +0 -1
  169. package/lib/typescript/src/config/CactusConfig.d.ts +0 -7
  170. package/lib/typescript/src/config/CactusConfig.d.ts.map +0 -1
  171. package/lib/typescript/src/native/CactusCrypto.d.ts +0 -5
  172. package/lib/typescript/src/native/CactusCrypto.d.ts.map +0 -1
  173. package/lib/typescript/src/native/CactusDeviceInfo.d.ts +0 -7
  174. package/lib/typescript/src/native/CactusDeviceInfo.d.ts.map +0 -1
  175. package/lib/typescript/src/native/CactusUtil.d.ts +0 -6
  176. package/lib/typescript/src/native/CactusUtil.d.ts.map +0 -1
  177. package/lib/typescript/src/specs/CactusCrypto.nitro.d.ts +0 -8
  178. package/lib/typescript/src/specs/CactusCrypto.nitro.d.ts.map +0 -1
  179. package/lib/typescript/src/specs/CactusDeviceInfo.nitro.d.ts +0 -16
  180. package/lib/typescript/src/specs/CactusDeviceInfo.nitro.d.ts.map +0 -1
  181. package/lib/typescript/src/specs/CactusUtil.nitro.d.ts +0 -10
  182. package/lib/typescript/src/specs/CactusUtil.nitro.d.ts.map +0 -1
  183. package/lib/typescript/src/telemetry/Telemetry.d.ts +0 -34
  184. package/lib/typescript/src/telemetry/Telemetry.d.ts.map +0 -1
  185. package/lib/typescript/src/types/CactusModel.d.ts +0 -13
  186. package/lib/typescript/src/types/CactusModel.d.ts.map +0 -1
  187. package/lib/typescript/src/types/CactusSTTModel.d.ts +0 -8
  188. package/lib/typescript/src/types/CactusSTTModel.d.ts.map +0 -1
  189. package/nitrogen/generated/android/c++/JDeviceInfo.hpp +0 -74
  190. package/nitrogen/generated/android/c++/JHybridCactusCryptoSpec.cpp +0 -65
  191. package/nitrogen/generated/android/c++/JHybridCactusCryptoSpec.hpp +0 -65
  192. package/nitrogen/generated/android/c++/JHybridCactusDeviceInfoSpec.cpp +0 -85
  193. package/nitrogen/generated/android/c++/JHybridCactusDeviceInfoSpec.hpp +0 -66
  194. package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/DeviceInfo.kt +0 -50
  195. package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/HybridCactusCryptoSpec.kt +0 -58
  196. package/nitrogen/generated/android/kotlin/com/margelo/nitro/cactus/HybridCactusDeviceInfoSpec.kt +0 -62
  197. package/nitrogen/generated/ios/c++/HybridCactusCryptoSpecSwift.cpp +0 -11
  198. package/nitrogen/generated/ios/c++/HybridCactusCryptoSpecSwift.hpp +0 -77
  199. package/nitrogen/generated/ios/c++/HybridCactusDeviceInfoSpecSwift.cpp +0 -11
  200. package/nitrogen/generated/ios/c++/HybridCactusDeviceInfoSpecSwift.hpp +0 -88
  201. package/nitrogen/generated/ios/swift/DeviceInfo.swift +0 -98
  202. package/nitrogen/generated/ios/swift/Func_void_DeviceInfo.swift +0 -47
  203. package/nitrogen/generated/ios/swift/Func_void_std__optional_std__string_.swift +0 -54
  204. package/nitrogen/generated/ios/swift/HybridCactusCryptoSpec.swift +0 -57
  205. package/nitrogen/generated/ios/swift/HybridCactusCryptoSpec_cxx.swift +0 -139
  206. package/nitrogen/generated/ios/swift/HybridCactusDeviceInfoSpec.swift +0 -58
  207. package/nitrogen/generated/ios/swift/HybridCactusDeviceInfoSpec_cxx.swift +0 -164
  208. package/nitrogen/generated/shared/c++/DeviceInfo.hpp +0 -92
  209. package/nitrogen/generated/shared/c++/HybridCactusCryptoSpec.cpp +0 -21
  210. package/nitrogen/generated/shared/c++/HybridCactusCryptoSpec.hpp +0 -63
  211. package/nitrogen/generated/shared/c++/HybridCactusDeviceInfoSpec.cpp +0 -22
  212. package/nitrogen/generated/shared/c++/HybridCactusDeviceInfoSpec.hpp +0 -67
  213. package/nitrogen/generated/shared/c++/HybridCactusUtilSpec.cpp +0 -23
  214. package/nitrogen/generated/shared/c++/HybridCactusUtilSpec.hpp +0 -66
  215. package/src/api/Database.ts +0 -188
  216. package/src/api/RemoteLM.ts +0 -273
  217. package/src/config/CactusConfig.ts +0 -11
  218. package/src/native/CactusCrypto.ts +0 -11
  219. package/src/native/CactusDeviceInfo.ts +0 -18
  220. package/src/native/CactusUtil.ts +0 -43
  221. package/src/specs/CactusCrypto.nitro.ts +0 -6
  222. package/src/specs/CactusDeviceInfo.nitro.ts +0 -15
  223. package/src/specs/CactusUtil.nitro.ts +0 -8
  224. package/src/telemetry/Telemetry.ts +0 -236
  225. package/src/types/CactusModel.ts +0 -15
  226. package/src/types/CactusSTTModel.ts +0 -10
package/README.md CHANGED
@@ -15,7 +15,7 @@ npm install cactus-react-native react-native-nitro-modules
15
15
  Get started with Cactus in just a few lines of code:
16
16
 
17
17
  ```typescript
18
- import { CactusLM, type Message } from 'cactus-react-native';
18
+ import { CactusLM, type CactusLMMessage } from 'cactus-react-native';
19
19
 
20
20
  // Create a new instance
21
21
  const cactusLM = new CactusLM();
@@ -26,7 +26,7 @@ await cactusLM.download({
26
26
  });
27
27
 
28
28
  // Generate a completion
29
- const messages: Message[] = [
29
+ const messages: CactusLMMessage[] = [
30
30
  { role: 'user', content: 'What is the capital of France?' }
31
31
  ];
32
32
 
@@ -78,6 +78,32 @@ const App = () => {
78
78
 
79
79
  ## Language Model
80
80
 
81
+ ### Model Options
82
+
83
+ Choose model quantization and NPU acceleration with Pro models.
84
+
85
+ ```typescript
86
+ import { CactusLM } from 'cactus-react-native';
87
+
88
+ // Use int8 for better accuracy (default)
89
+ const cactusLM = new CactusLM({
90
+ model: 'lfm2-vl-450m',
91
+ options: {
92
+ quantization: 'int8', // 'int4' or 'int8'
93
+ pro: false
94
+ }
95
+ });
96
+
97
+ // Use pro models for NPU acceleration
98
+ const cactusPro = new CactusLM({
99
+ model: 'lfm2-vl-450m',
100
+ options: {
101
+ quantization: 'int8',
102
+ pro: true
103
+ }
104
+ });
105
+ ```
106
+
81
107
  ### Completion
82
108
 
83
109
  Generate text responses from the model by providing a conversation history.
@@ -85,11 +111,11 @@ Generate text responses from the model by providing a conversation history.
85
111
  #### Class
86
112
 
87
113
  ```typescript
88
- import { CactusLM, type Message } from 'cactus-react-native';
114
+ import { CactusLM, type CactusLMMessage } from 'cactus-react-native';
89
115
 
90
116
  const cactusLM = new CactusLM();
91
117
 
92
- const messages: Message[] = [{ role: 'user', content: 'Hello, World!' }];
118
+ const messages: CactusLMMessage[] = [{ role: 'user', content: 'Hello, World!' }];
93
119
  const onToken = (token: string) => { console.log('Token:', token) };
94
120
 
95
121
  const result = await cactusLM.complete({ messages, onToken });
@@ -99,13 +125,13 @@ console.log('Completion result:', result);
99
125
  #### Hook
100
126
 
101
127
  ```tsx
102
- import { useCactusLM, type Message } from 'cactus-react-native';
128
+ import { useCactusLM, type CactusLMMessage } from 'cactus-react-native';
103
129
 
104
130
  const App = () => {
105
131
  const cactusLM = useCactusLM();
106
132
 
107
133
  const handleComplete = async () => {
108
- const messages: Message[] = [{ role: 'user', content: 'Hello, World!' }];
134
+ const messages: CactusLMMessage[] = [{ role: 'user', content: 'Hello, World!' }];
109
135
 
110
136
  const result = await cactusLM.complete({ messages });
111
137
  console.log('Completion result:', result);
@@ -127,12 +153,12 @@ Vision allows you to pass images along with text prompts, enabling the model to
127
153
  #### Class
128
154
 
129
155
  ```typescript
130
- import { CactusLM, type Message } from 'cactus-react-native';
156
+ import { CactusLM, type CactusLMMessage } from 'cactus-react-native';
131
157
 
132
158
  // Vision-capable model
133
159
  const cactusLM = new CactusLM({ model: 'lfm2-vl-450m' });
134
160
 
135
- const messages: Message[] = [
161
+ const messages: CactusLMMessage[] = [
136
162
  {
137
163
  role: 'user',
138
164
  content: "What's in the image?",
@@ -147,14 +173,14 @@ console.log('Response:', result.response);
147
173
  #### Hook
148
174
 
149
175
  ```tsx
150
- import { useCactusLM, type Message } from 'cactus-react-native';
176
+ import { useCactusLM, type CactusLMMessage } from 'cactus-react-native';
151
177
 
152
178
  const App = () => {
153
179
  // Vision-capable model
154
180
  const cactusLM = useCactusLM({ model: 'lfm2-vl-450m' });
155
181
 
156
182
  const handleAnalyze = async () => {
157
- const messages: Message[] = [
183
+ const messages: CactusLMMessage[] = [
158
184
  {
159
185
  role: 'user',
160
186
  content: "What's in the image?",
@@ -181,9 +207,9 @@ Enable the model to generate function calls by defining available tools and thei
181
207
  #### Class
182
208
 
183
209
  ```typescript
184
- import { CactusLM, type Message, type Tool } from 'cactus-react-native';
210
+ import { CactusLM, type CactusLMMessage, type CactusLMTool } from 'cactus-react-native';
185
211
 
186
- const tools: Tool[] = [
212
+ const tools: CactusLMTool[] = [
187
213
  {
188
214
  name: 'get_weather',
189
215
  description: 'Get current weather for a location',
@@ -202,7 +228,7 @@ const tools: Tool[] = [
202
228
 
203
229
  const cactusLM = new CactusLM();
204
230
 
205
- const messages: Message[] = [
231
+ const messages: CactusLMMessage[] = [
206
232
  { role: 'user', content: "What's the weather in San Francisco?" },
207
233
  ];
208
234
 
@@ -214,9 +240,9 @@ console.log('Function calls:', result.functionCalls);
214
240
  #### Hook
215
241
 
216
242
  ```tsx
217
- import { useCactusLM, type Message, type Tool } from 'cactus-react-native';
243
+ import { useCactusLM, type CactusLMMessage, type CactusLMTool } from 'cactus-react-native';
218
244
 
219
- const tools: Tool[] = [
245
+ const tools: CactusLMTool[] = [
220
246
  {
221
247
  name: 'get_weather',
222
248
  description: 'Get current weather for a location',
@@ -237,7 +263,7 @@ const App = () => {
237
263
  const cactusLM = useCactusLM();
238
264
 
239
265
  const handleComplete = async () => {
240
- const messages: Message[] = [
266
+ const messages: CactusLMMessage[] = [
241
267
  { role: 'user', content: "What's the weather in San Francisco?" },
242
268
  ];
243
269
 
@@ -257,13 +283,13 @@ RAG allows you to provide a corpus of documents that the model can reference dur
257
283
  #### Class
258
284
 
259
285
  ```typescript
260
- import { CactusLM, type Message } from 'cactus-react-native';
286
+ import { CactusLM, type CactusLMMessage } from 'cactus-react-native';
261
287
 
262
288
  const cactusLM = new CactusLM({
263
289
  corpusDir: 'path/to/your/corpus', // Directory containing .txt files
264
290
  });
265
291
 
266
- const messages: Message[] = [
292
+ const messages: CactusLMMessage[] = [
267
293
  { role: 'user', content: 'What information is in the documents?' },
268
294
  ];
269
295
 
@@ -274,7 +300,7 @@ console.log(result.response);
274
300
  #### Hook
275
301
 
276
302
  ```tsx
277
- import { useCactusLM, type Message } from 'cactus-react-native';
303
+ import { useCactusLM, type CactusLMMessage } from 'cactus-react-native';
278
304
 
279
305
  const App = () => {
280
306
  const cactusLM = useCactusLM({
@@ -282,7 +308,7 @@ const App = () => {
282
308
  });
283
309
 
284
310
  const handleAsk = async () => {
285
- const messages: Message[] = [
311
+ const messages: CactusLMMessage[] = [
286
312
  { role: 'user', content: 'What information is in the documents?' },
287
313
  ];
288
314
 
@@ -442,60 +468,9 @@ const App = () => {
442
468
  };
443
469
  ```
444
470
 
445
- ### Hybrid Mode (Cloud Fallback)
446
-
447
- The CactusLM supports a hybrid completion mode that falls back to a cloud-based LLM provider `OpenRouter` if local inference fails.
448
-
449
- #### Class
450
-
451
- ```typescript
452
- import { CactusLM, type Message } from 'cactus-react-native';
453
-
454
- const cactusLM = new CactusLM();
455
-
456
- const messages: Message[] = [
457
- { role: 'user', content: 'Hello, World!' }
458
- ];
459
-
460
- // Falls back to remote if local fails
461
- const result = await cactusLM.complete({
462
- messages,
463
- mode: 'hybrid'
464
- });
465
- ```
466
-
467
- #### Hook
468
-
469
- ```tsx
470
- import { useCactusLM, type Message } from 'cactus-react-native';
471
-
472
- const App = () => {
473
- const cactusLM = useCactusLM();
474
-
475
- const handleComplete = async () => {
476
- const messages: Message[] = [
477
- { role: 'user', content: 'Hello, World!' }
478
- ];
479
-
480
- // Falls back to remote if local fails
481
- await cactusLM.complete({
482
- messages,
483
- mode: 'hybrid'
484
- });
485
- };
486
-
487
- return (
488
- <>
489
- <Button title="Complete" onPress={handleComplete} />
490
- <Text>{cactusLM.completion}</Text>
491
- </>
492
- );
493
- };
494
- ```
495
-
496
471
  ## Speech-to-Text (STT)
497
472
 
498
- The `CactusSTT` class provides audio transcription and audio embedding capabilities using Whisper models.
473
+ The `CactusSTT` class provides audio transcription and audio embedding capabilities using speech-to-text models such as Whisper and Moonshine.
499
474
 
500
475
  ### Transcription
501
476
 
@@ -508,8 +483,6 @@ import { CactusSTT } from 'cactus-react-native';
508
483
 
509
484
  const cactusSTT = new CactusSTT({ model: 'whisper-small' });
510
485
 
511
- await cactusSTT.init();
512
-
513
486
  // Transcribe from file path
514
487
  const result = await cactusSTT.transcribe({
515
488
  audio: 'path/to/audio.wav',
@@ -559,6 +532,66 @@ const App = () => {
559
532
  };
560
533
  ```
561
534
 
535
+ ### Streaming Transcription
536
+
537
+ Transcribe audio in real-time with incremental results. Each call to `streamTranscribeProcess` feeds an audio chunk and returns the currently confirmed and pending text.
538
+
539
+ #### Class
540
+
541
+ ```typescript
542
+ import { CactusSTT } from 'cactus-react-native';
543
+
544
+ const cactusSTT = new CactusSTT({ model: 'whisper-small' });
545
+
546
+ await cactusSTT.streamTranscribeStart({
547
+ confirmationThreshold: 0.99, // confidence required to confirm text
548
+ minChunkSize: 32000, // minimum samples before processing
549
+ });
550
+
551
+ const audioChunk: number[] = [/* PCM samples as bytes */];
552
+ const result = await cactusSTT.streamTranscribeProcess({ audio: audioChunk });
553
+
554
+ console.log('Confirmed:', result.confirmed);
555
+ console.log('Pending:', result.pending);
556
+
557
+ const final = await cactusSTT.streamTranscribeStop();
558
+ console.log('Final confirmed:', final.confirmed);
559
+ ```
560
+
561
+ #### Hook
562
+
563
+ ```tsx
564
+ import { useCactusSTT } from 'cactus-react-native';
565
+
566
+ const App = () => {
567
+ const cactusSTT = useCactusSTT({ model: 'whisper-small' });
568
+
569
+ const handleStart = async () => {
570
+ await cactusSTT.streamTranscribeStart({ confirmationThreshold: 0.99 });
571
+ };
572
+
573
+ const handleChunk = async (audioChunk: number[]) => {
574
+ const result = await cactusSTT.streamTranscribeProcess({ audio: audioChunk });
575
+ console.log('Confirmed:', result.confirmed);
576
+ console.log('Pending:', result.pending);
577
+ };
578
+
579
+ const handleStop = async () => {
580
+ const final = await cactusSTT.streamTranscribeStop();
581
+ console.log('Final:', final.confirmed);
582
+ };
583
+
584
+ return (
585
+ <>
586
+ <Button onPress={handleStart} title="Start" />
587
+ <Button onPress={handleStop} title="Stop" />
588
+ <Text>{cactusSTT.streamTranscribeConfirmed}</Text>
589
+ <Text>{cactusSTT.streamTranscribePending}</Text>
590
+ </>
591
+ );
592
+ };
593
+ ```
594
+
562
595
  ### Audio Embedding
563
596
 
564
597
  Generate embeddings from audio files for audio understanding.
@@ -570,8 +603,6 @@ import { CactusSTT } from 'cactus-react-native';
570
603
 
571
604
  const cactusSTT = new CactusSTT();
572
605
 
573
- await cactusSTT.init();
574
-
575
606
  const result = await cactusSTT.audioEmbed({
576
607
  audioPath: 'path/to/audio.wav'
577
608
  });
@@ -600,6 +631,50 @@ const App = () => {
600
631
  };
601
632
  ```
602
633
 
634
+ ## Voice Activity Detection (VAD)
635
+
636
+ The `CactusVAD` class detects speech segments in audio, returning timestamped intervals where speech is present.
637
+
638
+ ### Class
639
+
640
+ ```typescript
641
+ import { CactusVAD } from 'cactus-react-native';
642
+
643
+ const cactusVAD = new CactusVAD({ model: 'silero-vad' });
644
+
645
+ const result = await cactusVAD.vad({
646
+ audio: 'path/to/audio.wav',
647
+ options: {
648
+ threshold: 0.5,
649
+ minSpeechDurationMs: 250,
650
+ minSilenceDurationMs: 100,
651
+ }
652
+ });
653
+
654
+ console.log('Speech segments:', result.segments);
655
+ // [{ start: 0, end: 16000 }, { start: 32000, end: 48000 }, ...]
656
+ console.log('Total time (ms):', result.totalTime);
657
+ ```
658
+
659
+ ### Hook
660
+
661
+ ```tsx
662
+ import { useCactusVAD } from 'cactus-react-native';
663
+
664
+ const App = () => {
665
+ const cactusVAD = useCactusVAD({ model: 'silero-vad' });
666
+
667
+ const handleVAD = async () => {
668
+ const result = await cactusVAD.vad({
669
+ audio: 'path/to/audio.wav',
670
+ });
671
+ console.log('Speech segments:', result.segments);
672
+ };
673
+
674
+ return <Button title="Detect Speech" onPress={handleVAD} />;
675
+ };
676
+ ```
677
+
603
678
  ## Vector Index
604
679
 
605
680
  The `CactusIndex` class provides a vector database for storing and querying embeddings with metadata. Enabling similarity search and retrieval.
@@ -854,9 +929,12 @@ const App = () => {
854
929
  **`new CactusLM(params?: CactusLMParams)`**
855
930
 
856
931
  **Parameters:**
857
- - `model` - Model slug or absolute path to Cactus model (default: `'qwen3-0.6'`).
858
- - `contextSize` - Context window size (default: `2048`).
932
+ - `model` - Model slug or absolute path to a model file (default: `'qwen3-0.6b'`).
859
933
  - `corpusDir` - Directory containing text files for RAG (default: `undefined`).
934
+ - `cacheIndex` - Whether to cache the RAG corpus index on disk (default: `false`).
935
+ - `options` - Model options for quantization and NPU acceleration:
936
+ - `quantization` - Quantization type: `'int4'` | `'int8'` (default: `'int8'`).
937
+ - `pro` - Enable NPU-accelerated models (default: `false`).
860
938
 
861
939
  #### Methods
862
940
 
@@ -876,17 +954,21 @@ Initializes the model and prepares it for inference. Safe to call multiple times
876
954
  Performs text completion with optional streaming and tool support. Automatically calls `init()` if not already initialized. Throws an error if a generation (completion or embedding) is already in progress.
877
955
 
878
956
  **Parameters:**
879
- - `messages` - Array of `Message` objects.
957
+ - `messages` - Array of `CactusLMMessage` objects.
880
958
  - `options` - Generation options:
881
- - `temperature` - Sampling temperature (default: model-optimized).
882
- - `topP` - Nucleus sampling threshold (default: model-optimized).
883
- - `topK` - Top-K sampling limit (default: model-optimized).
959
+ - `temperature` - Sampling temperature.
960
+ - `topP` - Nucleus sampling threshold.
961
+ - `topK` - Top-K sampling limit.
884
962
  - `maxTokens` - Maximum number of tokens to generate (default: `512`).
885
- - `stopSequences` - Array of strings to stop generation (default: `undefined`).
963
+ - `stopSequences` - Array of strings to stop generation.
886
964
  - `forceTools` - Force the model to call one of the provided tools (default: `false`).
887
- - `tools` - Array of `Tool` objects for function calling (default: `undefined`).
965
+ - `telemetryEnabled` - Enable telemetry for this request (default: `true`).
966
+ - `confidenceThreshold` - Confidence threshold below which cloud handoff is triggered (default: `0.7`).
967
+ - `toolRagTopK` - Number of tools to select via RAG when tool list is large (default: `2`).
968
+ - `includeStopSequences` - Whether to include stop sequences in the response (default: `false`).
969
+ - `useVad` - Whether to use VAD preprocessing (default: `true`).
970
+ - `tools` - Array of `CactusLMTool` objects for function calling.
888
971
  - `onToken` - Callback for streaming tokens.
889
- - `mode` - Completion mode: `'local'` | `'hybrid'` (default: `'local'`)
890
972
 
891
973
  **`tokenize(params: CactusLMTokenizeParams): Promise<CactusLMTokenizeResult>`**
892
974
 
@@ -897,7 +979,7 @@ Converts text into tokens using the model's tokenizer.
897
979
 
898
980
  **`scoreWindow(params: CactusLMScoreWindowParams): Promise<CactusLMScoreWindowResult>`**
899
981
 
900
- Calculates perplexity scores for a window of tokens within a sequence.
982
+ Calculates the log-probability score for a window of tokens within a sequence.
901
983
 
902
984
  **Parameters:**
903
985
  - `tokens` - Array of token IDs.
@@ -934,16 +1016,16 @@ Releases all resources associated with the model. Automatically calls `stop()` f
934
1016
 
935
1017
  **`getModels(): Promise<CactusModel[]>`**
936
1018
 
937
- Fetches available models from the database and checks their download status.
1019
+ Returns available models.
938
1020
 
939
1021
  ### useCactusLM Hook
940
1022
 
941
- The `useCactusLM` hook manages a `CactusLM` instance with reactive state. When model parameters (`model`, `contextSize`, or `corpusDir`) change, the hook creates a new instance and resets all state. The hook automatically cleans up resources when the component unmounts.
1023
+ The `useCactusLM` hook manages a `CactusLM` instance with reactive state. When model parameters (`model`, `corpusDir`, `cacheIndex`, `options`) change, the hook creates a new instance and resets all state. The hook automatically cleans up resources when the component unmounts.
942
1024
 
943
1025
  #### State
944
1026
 
945
1027
  - `completion: string` - Current generated text. Automatically accumulated during streaming. Cleared before each new completion and when calling `reset()` or `destroy()`.
946
- - `isGenerating: boolean` - Whether the model is currently generating (completion or embedding). Both operations share this flag.
1028
+ - `isGenerating: boolean` - Whether the model is currently running an operation. Shared by `complete`, `tokenize`, `scoreWindow`, `embed`, and `imageEmbed`.
947
1029
  - `isInitializing: boolean` - Whether the model is initializing.
948
1030
  - `isDownloaded: boolean` - Whether the model is downloaded locally. Automatically checked when the hook mounts or model changes.
949
1031
  - `isDownloading: boolean` - Whether the model is being downloaded.
@@ -956,13 +1038,13 @@ The `useCactusLM` hook manages a `CactusLM` instance with reactive state. When m
956
1038
  - `init(): Promise<void>` - Initializes the model for inference. Sets `isInitializing` to `true` during initialization.
957
1039
  - `complete(params: CactusLMCompleteParams): Promise<CactusLMCompleteResult>` - Generates text completions. Automatically accumulates tokens in the `completion` state during streaming. Sets `isGenerating` to `true` while generating. Clears `completion` before starting.
958
1040
  - `tokenize(params: CactusLMTokenizeParams): Promise<CactusLMTokenizeResult>` - Converts text into tokens. Sets `isGenerating` to `true` during operation.
959
- - `scoreWindow(params: CactusLMScoreWindowParams): Promise<CactusLMScoreWindowResult>` - Calculates perplexity scores for a window of tokens. Sets `isGenerating` to `true` during operation.
1041
+ - `scoreWindow(params: CactusLMScoreWindowParams): Promise<CactusLMScoreWindowResult>` - Calculates log-probability scores for a window of tokens. Sets `isGenerating` to `true` during operation.
960
1042
  - `embed(params: CactusLMEmbedParams): Promise<CactusLMEmbedResult>` - Generates embeddings for the given text. Sets `isGenerating` to `true` during operation.
961
1043
  - `imageEmbed(params: CactusLMImageEmbedParams): Promise<CactusLMImageEmbedResult>` - Generates embeddings for the given image. Sets `isGenerating` to `true` while generating.
962
1044
  - `stop(): Promise<void>` - Stops ongoing generation. Clears any errors.
963
1045
  - `reset(): Promise<void>` - Resets the model's internal state, clearing cached context. Also clears the `completion` state.
964
1046
  - `destroy(): Promise<void>` - Releases all resources associated with the model. Clears the `completion` state. Automatically called when the component unmounts.
965
- - `getModels(): Promise<CactusModel[]>` - Fetches available models from the database and checks their download status.
1047
+ - `getModels(): Promise<CactusModel[]>` - Returns available models.
966
1048
 
967
1049
  ### CactusSTT Class
968
1050
 
@@ -971,8 +1053,10 @@ The `useCactusLM` hook manages a `CactusLM` instance with reactive state. When m
971
1053
  **`new CactusSTT(params?: CactusSTTParams)`**
972
1054
 
973
1055
  **Parameters:**
974
- - `model` - Model slug or absolute path to Cactus model (default: `'qwen3-0.6'`).
975
- - `contextSize` - Context window size (default: `2048`).
1056
+ - `model` - Model slug or absolute path to a model file (default: `'whisper-small'`).
1057
+ - `options` - Model options for quantization and NPU acceleration:
1058
+ - `quantization` - Quantization type: `'int4'` | `'int8'` (default: `'int8'`).
1059
+ - `pro` - Enable NPU-accelerated models (default: `false`).
976
1060
 
977
1061
  #### Methods
978
1062
 
@@ -992,16 +1076,41 @@ Initializes the model and prepares it for inference. Safe to call multiple times
992
1076
  Transcribes audio to text with optional streaming support. Accepts either a file path or raw PCM audio samples. Automatically calls `init()` if not already initialized. Throws an error if a generation is already in progress.
993
1077
 
994
1078
  **Parameters:**
995
- - `audio` - Path to the audio file or raw PCM samples.
1079
+ - `audio` - Path to the audio file or raw PCM samples as a byte array.
996
1080
  - `prompt` - Optional prompt to guide transcription (default: `'<|startoftranscript|><|en|><|transcribe|><|notimestamps|>'`).
997
1081
  - `options` - Transcription options:
998
- - `temperature` - Sampling temperature (default: model-optimized).
999
- - `topP` - Nucleus sampling threshold (default: model-optimized).
1000
- - `topK` - Top-K sampling limit (default: model-optimized).
1001
- - `maxTokens` - Maximum number of tokens to generate (default: `512`).
1002
- - `stopSequences` - Array of strings to stop generation (default: `undefined`).
1082
+ - `temperature` - Sampling temperature.
1083
+ - `topP` - Nucleus sampling threshold.
1084
+ - `topK` - Top-K sampling limit.
1085
+ - `maxTokens` - Maximum number of tokens to generate (default: `384`).
1086
+ - `stopSequences` - Array of strings to stop generation.
1087
+ - `useVad` - Whether to apply VAD to strip silence before transcription (default: `true`).
1088
+ - `telemetryEnabled` - Enable telemetry for this request (default: `true`).
1089
+ - `confidenceThreshold` - Confidence threshold for quality assessment (default: `0.7`).
1090
+ - `cloudHandoffThreshold` - Max entropy threshold above which cloud handoff is triggered.
1091
+ - `includeStopSequences` - Whether to include stop sequences in the response (default: `false`).
1003
1092
  - `onToken` - Callback for streaming tokens.
1004
1093
 
1094
+ **`streamTranscribeStart(options?: CactusSTTStreamTranscribeStartOptions): Promise<void>`**
1095
+
1096
+ Starts a streaming transcription session. Automatically calls `init()` if not already initialized. If a session is already active, returns immediately.
1097
+
1098
+ **Parameters:**
1099
+ - `confirmationThreshold` - Fuzzy match ratio required to confirm a transcription segment (default: `0.99`).
1100
+ - `minChunkSize` - Minimum number of audio samples before processing (default: `32000`).
1101
+ - `telemetryEnabled` - Enable telemetry for this session (default: `true`).
1102
+
1103
+ **`streamTranscribeProcess(params: CactusSTTStreamTranscribeProcessParams): Promise<CactusSTTStreamTranscribeProcessResult>`**
1104
+
1105
+ Feeds audio samples into the streaming session and returns the current transcription state. Throws an error if no session is active.
1106
+
1107
+ **Parameters:**
1108
+ - `audio` - PCM audio samples as a byte array.
1109
+
1110
+ **`streamTranscribeStop(): Promise<CactusSTTStreamTranscribeStopResult>`**
1111
+
1112
+ Stops the streaming session and returns the final confirmed transcription text. Throws an error if no session is active.
1113
+
1005
1114
  **`audioEmbed(params: CactusSTTAudioEmbedParams): Promise<CactusSTTAudioEmbedResult>`**
1006
1115
 
1007
1116
  Generates embeddings for the given audio file. Automatically calls `init()` if not already initialized. Throws an error if a generation is already in progress.
@@ -1019,20 +1128,23 @@ Resets the model's internal state. Automatically calls `stop()` first.
1019
1128
 
1020
1129
  **`destroy(): Promise<void>`**
1021
1130
 
1022
- Releases all resources associated with the model. Automatically calls `stop()` first. Safe to call even if the model is not initialized.
1131
+ Releases all resources associated with the model. Stops any active streaming session. Automatically calls `stop()` first. Safe to call even if the model is not initialized.
1023
1132
 
1024
- **`getModels(): Promise<CactusSTTModel[]>`**
1133
+ **`getModels(): Promise<CactusModel[]>`**
1025
1134
 
1026
- Fetches available STT models from the database and checks their download status.
1135
+ Returns available speech-to-text models.
1027
1136
 
1028
1137
  ### useCactusSTT Hook
1029
1138
 
1030
- The `useCactusSTT` hook manages a `CactusSTT` instance with reactive state. When model parameters (`model`, `contextSize`) change, the hook creates a new instance and resets all state. The hook automatically cleans up resources when the component unmounts.
1139
+ The `useCactusSTT` hook manages a `CactusSTT` instance with reactive state. When model parameters (`model`, `options`) change, the hook creates a new instance and resets all state. The hook automatically cleans up resources when the component unmounts.
1031
1140
 
1032
1141
  #### State
1033
1142
 
1034
1143
  - `transcription: string` - Current transcription text. Automatically accumulated during streaming. Cleared before each new transcription and when calling `reset()` or `destroy()`.
1035
- - `isGenerating: boolean` - Whether the model is currently generating (transcription or embedding). Both operations share this flag.
1144
+ - `streamTranscribeConfirmed: string` - Accumulated confirmed text from the active streaming session. Updated after each successful `streamTranscribeProcess` call and finalized by `streamTranscribeStop`.
1145
+ - `streamTranscribePending: string` - Uncommitted (in-progress) text from the current audio chunk. Cleared when the session stops.
1146
+ - `isGenerating: boolean` - Whether the model is currently transcribing or embedding. Both operations share this flag.
1147
+ - `isStreamTranscribing: boolean` - Whether a streaming transcription session is currently active.
1036
1148
  - `isInitializing: boolean` - Whether the model is initializing.
1037
1149
  - `isDownloaded: boolean` - Whether the model is downloaded locally. Automatically checked when the hook mounts or model changes.
1038
1150
  - `isDownloading: boolean` - Whether the model is being downloaded.
@@ -1045,10 +1157,84 @@ The `useCactusSTT` hook manages a `CactusSTT` instance with reactive state. When
1045
1157
  - `init(): Promise<void>` - Initializes the model for inference. Sets `isInitializing` to `true` during initialization.
1046
1158
  - `transcribe(params: CactusSTTTranscribeParams): Promise<CactusSTTTranscribeResult>` - Transcribes audio to text. Automatically accumulates tokens in the `transcription` state during streaming. Sets `isGenerating` to `true` while generating. Clears `transcription` before starting.
1047
1159
  - `audioEmbed(params: CactusSTTAudioEmbedParams): Promise<CactusSTTAudioEmbedResult>` - Generates embeddings for the given audio. Sets `isGenerating` to `true` during operation.
1160
+ - `streamTranscribeStart(options?: CactusSTTStreamTranscribeStartOptions): Promise<void>` - Starts a streaming transcription session. If a session is already active, returns immediately. Clears `streamTranscribeConfirmed` and `streamTranscribePending` before starting. Sets `isStreamTranscribing` to `true`.
1161
+ - `streamTranscribeProcess(params: CactusSTTStreamTranscribeProcessParams): Promise<CactusSTTStreamTranscribeProcessResult>` - Feeds audio and returns incremental results. Appends confirmed text to `streamTranscribeConfirmed` and updates `streamTranscribePending`.
1162
+ - `streamTranscribeStop(): Promise<CactusSTTStreamTranscribeStopResult>` - Stops the session and returns the final result. Sets `isStreamTranscribing` to `false`. Appends final confirmed text to `streamTranscribeConfirmed` and clears `streamTranscribePending`.
1048
1163
  - `stop(): Promise<void>` - Stops ongoing generation. Clears any errors.
1049
1164
  - `reset(): Promise<void>` - Resets the model's internal state. Also clears the `transcription` state.
1050
- - `destroy(): Promise<void>` - Releases all resources associated with the model. Clears the `transcription` state. Automatically called when the component unmounts.
1051
- - `getModels(): Promise<CactusSTTModel[]>` - Fetches available STT models from the database and checks their download status.
1165
+ - `destroy(): Promise<void>` - Releases all resources associated with the model. Clears the `transcription`, `streamTranscribeConfirmed`, and `streamTranscribePending` state. Automatically called when the component unmounts.
1166
+ - `getModels(): Promise<CactusModel[]>` - Returns available speech-to-text models.
1167
+
1168
+ ### CactusVAD Class
1169
+
1170
+ #### Constructor
1171
+
1172
+ **`new CactusVAD(params?: CactusVADParams)`**
1173
+
1174
+ **Parameters:**
1175
+ - `model` - Model slug or absolute path to a VAD model file (default: `'silero-vad'`).
1176
+ - `options` - Model options:
1177
+ - `quantization` - Quantization type: `'int4'` | `'int8'` (default: `'int8'`).
1178
+ - `pro` - Enable NPU-accelerated models (default: `false`).
1179
+
1180
+ #### Methods
1181
+
1182
+ **`download(params?: CactusVADDownloadParams): Promise<void>`**
1183
+
1184
+ Downloads the VAD model. If the model is already downloaded, returns immediately with progress `1`. Throws an error if a download is already in progress.
1185
+
1186
+ **Parameters:**
1187
+ - `onProgress` - Callback for download progress (0-1).
1188
+
1189
+ **`init(): Promise<void>`**
1190
+
1191
+ Initializes the VAD model. Safe to call multiple times (idempotent). Throws an error if the model is not downloaded yet.
1192
+
1193
+ **`vad(params: CactusVADVadParams): Promise<CactusVADResult>`**
1194
+
1195
+ Runs voice activity detection on the given audio. Automatically calls `init()` if not already initialized.
1196
+
1197
+ **Parameters:**
1198
+ - `audio` - Path to the audio file or raw PCM samples as a byte array.
1199
+ - `options` - VAD options:
1200
+ - `threshold` - Speech probability threshold (default: model default).
1201
+ - `negThreshold` - Silence probability threshold.
1202
+ - `minSpeechDurationMs` - Minimum speech segment duration in ms.
1203
+ - `maxSpeechDurationS` - Maximum speech segment duration in seconds.
1204
+ - `minSilenceDurationMs` - Minimum silence duration before ending a segment.
1205
+ - `speechPadMs` - Padding added to each speech segment in ms.
1206
+ - `windowSizeSamples` - Processing window size in samples.
1207
+ - `samplingRate` - Audio sampling rate.
1208
+ - `minSilenceAtMaxSpeech` - Minimum silence at max speech duration.
1209
+ - `useMaxPossSilAtMaxSpeech` - Whether to use maximum possible silence at max speech.
1210
+
1211
+ **`destroy(): Promise<void>`**
1212
+
1213
+ Releases all resources associated with the model. Safe to call even if the model is not initialized.
1214
+
1215
+ **`getModels(): Promise<CactusModel[]>`**
1216
+
1217
+ Returns available VAD models.
1218
+
1219
+ ### useCactusVAD Hook
1220
+
1221
+ The `useCactusVAD` hook manages a `CactusVAD` instance with reactive state. When model parameters (`model`, `options`) change, the hook creates a new instance and resets all state. The hook automatically cleans up resources when the component unmounts.
1222
+
1223
+ #### State
1224
+
1225
+ - `isInitializing: boolean` - Whether the model is initializing.
1226
+ - `isDownloaded: boolean` - Whether the model is downloaded locally. Automatically checked when the hook mounts or model changes.
1227
+ - `isDownloading: boolean` - Whether the model is being downloaded.
1228
+ - `downloadProgress: number` - Download progress (0-1). Reset to `0` after download completes.
1229
+ - `error: string | null` - Last error message, or `null`.
1230
+
1231
+ #### Methods
1232
+
1233
+ - `download(params?: CactusVADDownloadParams): Promise<void>` - Downloads the model. Updates `isDownloading` and `downloadProgress` state during download. Sets `isDownloaded` to `true` on success.
1234
+ - `init(): Promise<void>` - Initializes the model.
1235
+ - `vad(params: CactusVADVadParams): Promise<CactusVADResult>` - Runs voice activity detection.
1236
+ - `destroy(): Promise<void>` - Releases all resources. Automatically called when the component unmounts.
1237
+ - `getModels(): Promise<CactusModel[]>` - Returns available VAD models.
1052
1238
 
1053
1239
  ### CactusIndex Class
1054
1240
 
@@ -1135,8 +1321,9 @@ The `useCactusIndex` hook manages a `CactusIndex` instance with reactive state.
1135
1321
  ```typescript
1136
1322
  interface CactusLMParams {
1137
1323
  model?: string;
1138
- contextSize?: number;
1139
1324
  corpusDir?: string;
1325
+ cacheIndex?: boolean;
1326
+ options?: CactusModelOptions;
1140
1327
  }
1141
1328
  ```
1142
1329
 
@@ -1148,33 +1335,38 @@ interface CactusLMDownloadParams {
1148
1335
  }
1149
1336
  ```
1150
1337
 
1151
- ### Message
1338
+ ### CactusLMMessage
1152
1339
 
1153
1340
  ```typescript
1154
- interface Message {
1341
+ interface CactusLMMessage {
1155
1342
  role: 'user' | 'assistant' | 'system';
1156
1343
  content?: string;
1157
1344
  images?: string[];
1158
1345
  }
1159
1346
  ```
1160
1347
 
1161
- ### CompleteOptions
1348
+ ### CactusLMCompleteOptions
1162
1349
 
1163
1350
  ```typescript
1164
- interface CompleteOptions {
1351
+ interface CactusLMCompleteOptions {
1165
1352
  temperature?: number;
1166
1353
  topP?: number;
1167
1354
  topK?: number;
1168
1355
  maxTokens?: number;
1169
1356
  stopSequences?: string[];
1170
1357
  forceTools?: boolean;
1358
+ telemetryEnabled?: boolean;
1359
+ confidenceThreshold?: number;
1360
+ toolRagTopK?: number;
1361
+ includeStopSequences?: boolean;
1362
+ useVad?: boolean;
1171
1363
  }
1172
1364
  ```
1173
1365
 
1174
- ### Tool
1366
+ ### CactusLMTool
1175
1367
 
1176
1368
  ```typescript
1177
- interface Tool {
1369
+ interface CactusLMTool {
1178
1370
  name: string;
1179
1371
  description: string;
1180
1372
  parameters: {
@@ -1194,11 +1386,10 @@ interface Tool {
1194
1386
 
1195
1387
  ```typescript
1196
1388
  interface CactusLMCompleteParams {
1197
- messages: Message[];
1198
- options?: CompleteOptions;
1199
- tools?: Tool[];
1389
+ messages: CactusLMMessage[];
1390
+ options?: CactusLMCompleteOptions;
1391
+ tools?: CactusLMTool[];
1200
1392
  onToken?: (token: string) => void;
1201
- mode?: 'local' | 'hybrid';
1202
1393
  }
1203
1394
  ```
1204
1395
 
@@ -1212,12 +1403,16 @@ interface CactusLMCompleteResult {
1212
1403
  name: string;
1213
1404
  arguments: { [key: string]: any };
1214
1405
  }[];
1406
+ cloudHandoff?: boolean;
1407
+ confidence?: number;
1215
1408
  timeToFirstTokenMs: number;
1216
1409
  totalTimeMs: number;
1217
- tokensPerSecond: number;
1218
1410
  prefillTokens: number;
1411
+ prefillTps: number;
1219
1412
  decodeTokens: number;
1413
+ decodeTps: number;
1220
1414
  totalTokens: number;
1415
+ ramUsageMb?: number;
1221
1416
  }
1222
1417
  ```
1223
1418
 
@@ -1293,28 +1488,31 @@ interface CactusLMImageEmbedResult {
1293
1488
 
1294
1489
  ```typescript
1295
1490
  interface CactusModel {
1296
- name: string;
1297
- slug: string;
1298
- quantization: number;
1299
- sizeMb: number;
1300
- downloadUrl: string;
1301
- supportsToolCalling: boolean;
1302
- supportsVision: boolean;
1303
- supportsCompletion: boolean;
1304
- createdAt: Date;
1305
- isDownloaded: boolean;
1491
+ quantization: {
1492
+ int4: {
1493
+ sizeMb: number;
1494
+ url: string;
1495
+ pro?: {
1496
+ apple: string;
1497
+ };
1498
+ };
1499
+ int8: {
1500
+ sizeMb: number;
1501
+ url: string;
1502
+ pro?: {
1503
+ apple: string;
1504
+ };
1505
+ };
1506
+ };
1306
1507
  }
1307
1508
  ```
1308
1509
 
1309
- ### CactusSTTModel
1510
+ ### CactusModelOptions
1310
1511
 
1311
1512
  ```typescript
1312
- interface CactusSTTModel {
1313
- slug: string;
1314
- sizeMb: number;
1315
- downloadUrl: string;
1316
- createdAt: Date;
1317
- isDownloaded: boolean;
1513
+ interface CactusModelOptions {
1514
+ quantization?: 'int4' | 'int8';
1515
+ pro?: boolean;
1318
1516
  }
1319
1517
  ```
1320
1518
 
@@ -1323,7 +1521,7 @@ interface CactusSTTModel {
1323
1521
  ```typescript
1324
1522
  interface CactusSTTParams {
1325
1523
  model?: string;
1326
- contextSize?: number;
1524
+ options?: CactusModelOptions;
1327
1525
  }
1328
1526
  ```
1329
1527
 
@@ -1333,18 +1531,22 @@ interface CactusSTTParams {
1333
1531
  interface CactusSTTDownloadParams {
1334
1532
  onProgress?: (progress: number) => void;
1335
1533
  }
1336
-
1337
1534
  ```
1338
1535
 
1339
- ### TranscribeOptions
1536
+ ### CactusSTTTranscribeOptions
1340
1537
 
1341
- ```ts
1342
- interface TranscribeOptions {
1538
+ ```typescript
1539
+ interface CactusSTTTranscribeOptions {
1343
1540
  temperature?: number;
1344
1541
  topP?: number;
1345
1542
  topK?: number;
1346
1543
  maxTokens?: number;
1347
1544
  stopSequences?: string[];
1545
+ useVad?: boolean;
1546
+ telemetryEnabled?: boolean;
1547
+ confidenceThreshold?: number;
1548
+ cloudHandoffThreshold?: number;
1549
+ includeStopSequences?: boolean;
1348
1550
  }
1349
1551
  ```
1350
1552
 
@@ -1354,7 +1556,7 @@ interface TranscribeOptions {
1354
1556
  interface CactusSTTTranscribeParams {
1355
1557
  audio: string | number[];
1356
1558
  prompt?: string;
1357
- options?: TranscribeOptions;
1559
+ options?: CactusSTTTranscribeOptions;
1358
1560
  onToken?: (token: string) => void;
1359
1561
  }
1360
1562
  ```
@@ -1365,14 +1567,17 @@ interface CactusSTTTranscribeParams {
1365
1567
  interface CactusSTTTranscribeResult {
1366
1568
  success: boolean;
1367
1569
  response: string;
1570
+ cloudHandoff?: boolean;
1571
+ confidence?: number;
1368
1572
  timeToFirstTokenMs: number;
1369
1573
  totalTimeMs: number;
1370
- tokensPerSecond: number;
1371
1574
  prefillTokens: number;
1575
+ prefillTps: number;
1372
1576
  decodeTokens: number;
1577
+ decodeTps: number;
1373
1578
  totalTokens: number;
1579
+ ramUsageMb?: number;
1374
1580
  }
1375
-
1376
1581
  ```
1377
1582
 
1378
1583
  ### CactusSTTAudioEmbedParams
@@ -1391,6 +1596,119 @@ interface CactusSTTAudioEmbedResult {
1391
1596
  }
1392
1597
  ```
1393
1598
 
1599
+ ### CactusSTTStreamTranscribeStartOptions
1600
+
1601
+ ```typescript
1602
+ interface CactusSTTStreamTranscribeStartOptions {
1603
+ confirmationThreshold?: number;
1604
+ minChunkSize?: number;
1605
+ telemetryEnabled?: boolean;
1606
+ }
1607
+ ```
1608
+
1609
+ ### CactusSTTStreamTranscribeProcessParams
1610
+
1611
+ ```typescript
1612
+ interface CactusSTTStreamTranscribeProcessParams {
1613
+ audio: number[];
1614
+ }
1615
+ ```
1616
+
1617
+ ### CactusSTTStreamTranscribeProcessResult
1618
+
1619
+ ```typescript
1620
+ interface CactusSTTStreamTranscribeProcessResult {
1621
+ success: boolean;
1622
+ confirmed: string;
1623
+ pending: string;
1624
+ bufferDurationMs?: number;
1625
+ confidence?: number;
1626
+ cloudHandoff?: boolean;
1627
+ cloudResult?: string;
1628
+ cloudJobId?: number;
1629
+ cloudResultJobId?: number;
1630
+ timeToFirstTokenMs?: number;
1631
+ totalTimeMs?: number;
1632
+ prefillTokens?: number;
1633
+ prefillTps?: number;
1634
+ decodeTokens?: number;
1635
+ decodeTps?: number;
1636
+ totalTokens?: number;
1637
+ ramUsageMb?: number;
1638
+ }
1639
+ ```
1640
+
1641
+ ### CactusSTTStreamTranscribeStopResult
1642
+
1643
+ ```typescript
1644
+ interface CactusSTTStreamTranscribeStopResult {
1645
+ success: boolean;
1646
+ confirmed: string;
1647
+ }
1648
+ ```
1649
+
1650
+ ### CactusVADParams
1651
+
1652
+ ```typescript
1653
+ interface CactusVADParams {
1654
+ model?: string;
1655
+ options?: CactusModelOptions;
1656
+ }
1657
+ ```
1658
+
1659
+ ### CactusVADDownloadParams
1660
+
1661
+ ```typescript
1662
+ interface CactusVADDownloadParams {
1663
+ onProgress?: (progress: number) => void;
1664
+ }
1665
+ ```
1666
+
1667
+ ### CactusVADOptions
1668
+
1669
+ ```typescript
1670
+ interface CactusVADOptions {
1671
+ threshold?: number;
1672
+ negThreshold?: number;
1673
+ minSpeechDurationMs?: number;
1674
+ maxSpeechDurationS?: number;
1675
+ minSilenceDurationMs?: number;
1676
+ speechPadMs?: number;
1677
+ windowSizeSamples?: number;
1678
+ samplingRate?: number;
1679
+ minSilenceAtMaxSpeech?: number;
1680
+ useMaxPossSilAtMaxSpeech?: boolean;
1681
+ }
1682
+ ```
1683
+
1684
+ ### CactusVADSegment
1685
+
1686
+ ```typescript
1687
+ interface CactusVADSegment {
1688
+ start: number;
1689
+ end: number;
1690
+ }
1691
+ ```
1692
+
1693
+ ### CactusVADResult
1694
+
1695
+ ```typescript
1696
+ interface CactusVADResult {
1697
+ segments: CactusVADSegment[];
1698
+ totalTime: number;
1699
+ ramUsage: number;
1700
+ }
1701
+ ```
1702
+
1703
+ ### CactusVADVadParams
1704
+
1705
+ ```typescript
1706
+ interface CactusVADVadParams {
1707
+ audio: string | number[];
1708
+ options?: CactusVADOptions;
1709
+ }
1710
+ ```
1711
+
1394
1712
  ### CactusIndexParams
1395
1713
 
1396
1714
  ```typescript
@@ -1429,10 +1747,10 @@ interface CactusIndexGetResult {
1429
1747
  }
1430
1748
  ```
1431
1749
 
1432
- ### IndexQueryOptions
1750
+ ### CactusIndexQueryOptions
1433
1751
 
1434
1752
  ```typescript
1435
- interface IndexQueryOptions {
1753
+ interface CactusIndexQueryOptions {
1436
1754
  topK?: number;
1437
1755
  scoreThreshold?: number;
1438
1756
  }
@@ -1443,7 +1761,7 @@ interface IndexQueryOptions {
1443
1761
  ```typescript
1444
1762
  interface CactusIndexQueryParams {
1445
1763
  embeddings: number[][];
1446
- options?: IndexQueryOptions;
1764
+ options?: CactusIndexQueryOptions;
1447
1765
  }
1448
1766
  ```
1449
1767
 
@@ -1464,38 +1782,11 @@ interface CactusIndexDeleteParams {
1464
1782
  }
1465
1783
  ```
1466
1784
 
1467
- ## Configuration
1468
-
1469
- ### Telemetry
1470
-
1471
- Cactus offers powerful telemetry for all your projects. Create a token on the [Cactus dashboard](https://www.cactuscompute.com/dashboard).
1472
-
1473
- ```typescript
1474
- import { CactusConfig } from 'cactus-react-native';
1475
-
1476
- // Enable Telemetry for your project
1477
- CactusConfig.telemetryToken = 'your-telemetry-token-here';
1478
-
1479
- // Disable telemetry
1480
- CactusConfig.isTelemetryEnabled = false;
1481
- ```
1482
-
1483
- ### Hybrid Mode
1484
-
1485
- Enable cloud fallback.
1486
-
1487
- ```typescript
1488
- import { CactusConfig } from 'cactus-react-native';
1489
-
1490
- // Set your Cactus token for hybrid mode
1491
- CactusConfig.cactusToken = 'your-cactus-token-here';
1492
- ```
1493
-
1494
1785
  ## Performance Tips
1495
1786
 
1496
1787
  - **Model Selection** - Choose smaller models for faster inference on mobile devices.
1497
- - **Context Size** - Reduce the context size to lower memory usage.
1498
1788
  - **Memory Management** - Always call `destroy()` when you're done with models to free up resources.
1789
+ - **VAD** - Use `useVad: true` (the default) when transcribing audio with silence, to strip non-speech regions and speed up transcription.
1499
1790
 
1500
1791
  ## Example App
1501
1792