cactus-react-native 1.10.3 → 1.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/README.md +199 -40
  2. package/android/src/main/jniLibs/arm64-v8a/libcactus.a +0 -0
  3. package/cpp/HybridCactus.cpp +131 -2
  4. package/cpp/HybridCactus.hpp +15 -0
  5. package/cpp/cactus_ffi.h +240 -2
  6. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_ffi.h +240 -2
  7. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_utils.h +940 -109
  8. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/engine.h +175 -25
  9. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/gemma_tools.h +48 -21
  10. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/graph.h +79 -7
  11. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel.h +122 -9
  12. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel_utils.h +191 -2
  13. package/ios/cactus.xcframework/ios-arm64/cactus.framework/cactus +0 -0
  14. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_ffi.h +240 -2
  15. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_utils.h +940 -109
  16. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/engine.h +175 -25
  17. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/gemma_tools.h +48 -21
  18. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/graph.h +79 -7
  19. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/kernel.h +122 -9
  20. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/kernel_utils.h +191 -2
  21. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/cactus +0 -0
  22. package/lib/module/classes/{CactusVAD.js → CactusAudio.js} +19 -6
  23. package/lib/module/classes/CactusAudio.js.map +1 -0
  24. package/lib/module/classes/CactusLM.js +25 -0
  25. package/lib/module/classes/CactusLM.js.map +1 -1
  26. package/lib/module/hooks/{useCactusVAD.js → useCactusAudio.js} +50 -20
  27. package/lib/module/hooks/useCactusAudio.js.map +1 -0
  28. package/lib/module/index.js +2 -2
  29. package/lib/module/index.js.map +1 -1
  30. package/lib/module/modelRegistry.js +5 -3
  31. package/lib/module/modelRegistry.js.map +1 -1
  32. package/lib/module/native/Cactus.js +81 -2
  33. package/lib/module/native/Cactus.js.map +1 -1
  34. package/lib/module/types/CactusAudio.js +4 -0
  35. package/lib/module/types/{CactusVAD.js.map → CactusAudio.js.map} +1 -1
  36. package/lib/typescript/src/classes/CactusAudio.d.ts +22 -0
  37. package/lib/typescript/src/classes/CactusAudio.d.ts.map +1 -0
  38. package/lib/typescript/src/classes/CactusLM.d.ts +2 -1
  39. package/lib/typescript/src/classes/CactusLM.d.ts.map +1 -1
  40. package/lib/typescript/src/hooks/useCactusAudio.d.ts +17 -0
  41. package/lib/typescript/src/hooks/useCactusAudio.d.ts.map +1 -0
  42. package/lib/typescript/src/index.d.ts +4 -4
  43. package/lib/typescript/src/index.d.ts.map +1 -1
  44. package/lib/typescript/src/native/Cactus.d.ts +9 -3
  45. package/lib/typescript/src/native/Cactus.d.ts.map +1 -1
  46. package/lib/typescript/src/specs/Cactus.nitro.d.ts +3 -0
  47. package/lib/typescript/src/specs/Cactus.nitro.d.ts.map +1 -1
  48. package/lib/typescript/src/types/CactusAudio.d.ts +63 -0
  49. package/lib/typescript/src/types/CactusAudio.d.ts.map +1 -0
  50. package/lib/typescript/src/types/CactusLM.d.ts +15 -0
  51. package/lib/typescript/src/types/CactusLM.d.ts.map +1 -1
  52. package/lib/typescript/src/types/CactusSTT.d.ts +1 -0
  53. package/lib/typescript/src/types/CactusSTT.d.ts.map +1 -1
  54. package/nitrogen/generated/shared/c++/HybridCactusSpec.cpp +3 -0
  55. package/nitrogen/generated/shared/c++/HybridCactusSpec.hpp +3 -0
  56. package/package.json +1 -1
  57. package/src/classes/{CactusVAD.ts → CactusAudio.ts} +32 -13
  58. package/src/classes/CactusLM.ts +36 -0
  59. package/src/hooks/{useCactusVAD.ts → useCactusAudio.ts} +65 -28
  60. package/src/index.tsx +16 -9
  61. package/src/modelRegistry.ts +20 -6
  62. package/src/native/Cactus.ts +118 -3
  63. package/src/specs/Cactus.nitro.ts +16 -0
  64. package/src/types/CactusAudio.ts +73 -0
  65. package/src/types/CactusLM.ts +17 -0
  66. package/src/types/CactusSTT.ts +1 -0
  67. package/lib/module/classes/CactusVAD.js.map +0 -1
  68. package/lib/module/hooks/useCactusVAD.js.map +0 -1
  69. package/lib/module/types/CactusVAD.js +0 -4
  70. package/lib/typescript/src/classes/CactusVAD.d.ts +0 -20
  71. package/lib/typescript/src/classes/CactusVAD.d.ts.map +0 -1
  72. package/lib/typescript/src/hooks/useCactusVAD.d.ts +0 -15
  73. package/lib/typescript/src/hooks/useCactusVAD.d.ts.map +0 -1
  74. package/lib/typescript/src/types/CactusVAD.d.ts +0 -34
  75. package/lib/typescript/src/types/CactusVAD.d.ts.map +0 -1
  76. package/src/types/CactusVAD.ts +0 -39
package/README.md CHANGED
@@ -649,18 +649,18 @@ console.log('Language:', result.language); // e.g. 'en'
649
649
  console.log('Confidence:', result.confidence);
650
650
  ```
651
651
 
652
- ## Voice Activity Detection (VAD)
652
+ ## Audio Processing
653
653
 
654
- The `CactusVAD` class detects speech segments in audio, returning timestamped intervals where speech is present.
654
+ The `CactusAudio` class provides voice activity detection (VAD), speaker diarization, and speaker embedding extraction.
655
655
 
656
- ### Class
656
+ ### Voice Activity Detection
657
657
 
658
658
  ```typescript
659
- import { CactusVAD } from 'cactus-react-native';
659
+ import { CactusAudio } from 'cactus-react-native';
660
660
 
661
- const cactusVAD = new CactusVAD({ model: 'silero-vad' });
661
+ const cactusAudio = new CactusAudio({ model: 'silero-vad' });
662
662
 
663
- const result = await cactusVAD.vad({
663
+ const result = await cactusAudio.vad({
664
664
  audio: 'path/to/audio.wav',
665
665
  options: {
666
666
  threshold: 0.5,
@@ -674,22 +674,68 @@ console.log('Speech segments:', result.segments);
674
674
  console.log('Total time (ms):', result.totalTime);
675
675
  ```
676
676
 
677
+ ### Speaker Diarization
678
+
679
+ ```typescript
680
+ import { CactusAudio } from 'cactus-react-native';
681
+
682
+ const cactusAudio = new CactusAudio({ model: 'silero-vad' });
683
+
684
+ const result = await cactusAudio.diarize({
685
+ audio: 'path/to/audio.wav',
686
+ options: {
687
+ numSpeakers: 2,
688
+ minSpeakers: 1,
689
+ maxSpeakers: 4,
690
+ }
691
+ });
692
+
693
+ console.log('Number of speakers:', result.numSpeakers);
694
+ console.log('Scores:', result.scores);
695
+ ```
696
+
697
+ ### Speaker Embedding
698
+
699
+ ```typescript
700
+ import { CactusAudio } from 'cactus-react-native';
701
+
702
+ const cactusAudio = new CactusAudio({ model: 'silero-vad' });
703
+
704
+ const result = await cactusAudio.embedSpeaker({
705
+ audio: 'path/to/audio.wav',
706
+ });
707
+
708
+ console.log('Speaker embedding:', result.embedding);
709
+ ```
710
+
677
711
  ### Hook
678
712
 
679
713
  ```tsx
680
- import { useCactusVAD } from 'cactus-react-native';
714
+ import { useCactusAudio } from 'cactus-react-native';
681
715
 
682
716
  const App = () => {
683
- const cactusVAD = useCactusVAD({ model: 'silero-vad' });
717
+ const cactusAudio = useCactusAudio({ model: 'silero-vad' });
684
718
 
685
719
  const handleVAD = async () => {
686
- const result = await cactusVAD.vad({
720
+ const result = await cactusAudio.vad({
687
721
  audio: 'path/to/audio.wav',
688
722
  });
689
723
  console.log('Speech segments:', result.segments);
690
724
  };
691
725
 
692
- return <Button title="Detect Speech" onPress={handleVAD} />;
726
+ const handleDiarize = async () => {
727
+ const result = await cactusAudio.diarize({
728
+ audio: 'path/to/audio.wav',
729
+ });
730
+ console.log('Speakers:', result.numSpeakers);
731
+ };
732
+
733
+ return (
734
+ <>
735
+ <Button title="Detect Speech" onPress={handleVAD} />
736
+ <Button title="Diarize" onPress={handleDiarize} />
737
+ </>
738
+ );
693
739
  };
694
740
  ```
695
741
 
@@ -985,9 +1031,19 @@ Performs text completion with optional streaming and tool support. Automatically
985
1031
  - `toolRagTopK` - Number of tools to select via RAG when tool list is large (default: `2`).
986
1032
  - `includeStopSequences` - Whether to include stop sequences in the response (default: `false`).
987
1033
  - `useVad` - Whether to use VAD preprocessing (default: `true`).
1034
+ - `enableThinking` - Whether to enable thinking/reasoning output if supported by the model (default: unset).
988
1035
  - `tools` - Array of `CactusLMTool` objects for function calling.
989
1036
  - `onToken` - Callback for streaming tokens.
990
1037
 
1038
+ **`prefill(params: CactusLMPrefillParams): Promise<CactusLMPrefillResult>`**
1039
+
1040
+ Runs prompt prefill without generating any output tokens. Useful for measuring prefill performance or warming up the model's KV cache. Automatically calls `init()` if not already initialized. Throws an error if a generation is already in progress.
1041
+
1042
+ **Parameters:**
1043
+ - `messages` - Array of `CactusLMMessage` objects.
1044
+ - `options` - Same options as `complete`.
1045
+ - `tools` - Array of `CactusLMTool` objects.
1046
+
991
1047
  **`tokenize(params: CactusLMTokenizeParams): Promise<CactusLMTokenizeResult>`**
992
1048
 
993
1049
  Converts text into tokens using the model's tokenizer.
@@ -1038,7 +1094,7 @@ Returns available models.
1038
1094
 
1039
1095
  **`getModelName(): string`**
1040
1096
 
1041
- Returns the model slug or path the instance was created with.
1097
+ Returns the computed model identifier including quantization and pro suffix (e.g., `'qwen3-0.6b-int8'`, `'lfm2-vl-450m-int4-pro'`).
1042
1098
 
1043
1099
  ### useCactusLM Hook
1044
1100
 
@@ -1121,6 +1177,7 @@ Starts a streaming transcription session. Automatically calls `init()` if not al
1121
1177
  - `confirmationThreshold` - Fuzzy match ratio required to confirm a transcription segment (default: `0.99`).
1122
1178
  - `minChunkSize` - Minimum number of audio samples before processing (default: `32000`).
1123
1179
  - `telemetryEnabled` - Enable telemetry for this session (default: `true`).
1180
+ - `language` - Language code for transcription (e.g., `'en'`, `'es'`, `'fr'`). If not set, language is auto-detected.
1124
1181
 
1125
1182
  **`streamTranscribeProcess(params: CactusSTTStreamTranscribeProcessParams): Promise<CactusSTTStreamTranscribeProcessResult>`**
1126
1183
 
@@ -1167,7 +1224,7 @@ Returns available speech-to-text models.
1167
1224
 
1168
1225
  **`getModelName(): string`**
1169
1226
 
1170
- Returns the model slug or path the instance was created with.
1227
+ Returns the computed model identifier including quantization and pro suffix (e.g., `'whisper-small-int8'`).
1171
1228
 
1172
1229
  ### useCactusSTT Hook
1173
1230
 
@@ -1200,32 +1257,32 @@ The `useCactusSTT` hook manages a `CactusSTT` instance with reactive state. When
1200
1257
  - `destroy(): Promise<void>` - Releases all resources associated with the model. Clears the `transcription`, `streamTranscribeConfirmed`, and `streamTranscribePending` state. Automatically called when the component unmounts.
1201
1258
  - `getModels(): Promise<CactusModel[]>` - Returns available speech-to-text models.
1202
1259
 
1203
- ### CactusVAD Class
1260
+ ### CactusAudio Class
1204
1261
 
1205
1262
  #### Constructor
1206
1263
 
1207
- **`new CactusVAD(params?: CactusVADParams)`**
1264
+ **`new CactusAudio(params?: CactusAudioParams)`**
1208
1265
 
1209
1266
  **Parameters:**
1210
- - `model` - Model slug or absolute path to a VAD model file (default: `'silero-vad'`).
1267
+ - `model` - Model slug or absolute path to an audio model file (default: `'silero-vad'`).
1211
1268
  - `options` - Model options:
1212
1269
  - `quantization` - Quantization type: `'int4'` | `'int8'` (default: `'int8'`).
1213
1270
  - `pro` - Enable NPU-accelerated models (default: `false`).
1214
1271
 
1215
1272
  #### Methods
1216
1273
 
1217
- **`download(params?: CactusVADDownloadParams): Promise<void>`**
1274
+ **`download(params?: CactusAudioDownloadParams): Promise<void>`**
1218
1275
 
1219
- Downloads the VAD model. If the model is already downloaded, returns immediately with progress `1`. Throws an error if a download is already in progress.
1276
+ Downloads the audio model. If the model is already downloaded, returns immediately with progress `1`. Throws an error if a download is already in progress.
1220
1277
 
1221
1278
  **Parameters:**
1222
1279
  - `onProgress` - Callback for download progress (0-1).
1223
1280
 
1224
1281
  **`init(): Promise<void>`**
1225
1282
 
1226
- Initializes the VAD model. Safe to call multiple times (idempotent). Throws an error if the model is not downloaded yet.
1283
+ Initializes the audio model. Safe to call multiple times (idempotent). Throws an error if the model is not downloaded yet.
1227
1284
 
1228
- **`vad(params: CactusVADVadParams): Promise<CactusVADResult>`**
1285
+ **`vad(params: CactusAudioVADParams): Promise<CactusAudioVADResult>`**
1229
1286
 
1230
1287
  Runs voice activity detection on the given audio. Automatically calls `init()` if not already initialized.
1231
1288
 
@@ -1243,21 +1300,41 @@ Runs voice activity detection on the given audio. Automatically calls `init()` i
1243
1300
  - `minSilenceAtMaxSpeech` - Minimum silence at max speech duration.
1244
1301
  - `useMaxPossSilAtMaxSpeech` - Whether to use maximum possible silence at max speech.
1245
1302
 
1303
+ **`diarize(params: CactusAudioDiarizeParams): Promise<CactusAudioDiarizeResult>`**
1304
+
1305
+ Runs speaker diarization on the given audio. Automatically calls `init()` if not already initialized.
1306
+
1307
+ **Parameters:**
1308
+ - `audio` - Path to the audio file or raw PCM samples as a byte array.
1309
+ - `options` - Diarize options:
1310
+ - `stepMs` - Step size in milliseconds.
1311
+ - `threshold` - Diarization threshold.
1312
+ - `numSpeakers` - Expected number of speakers.
1313
+ - `minSpeakers` - Minimum number of speakers.
1314
+ - `maxSpeakers` - Maximum number of speakers.
1315
+
1316
+ **`embedSpeaker(params: CactusAudioEmbedSpeakerParams): Promise<CactusAudioEmbedSpeakerResult>`**
1317
+
1318
+ Extracts a speaker embedding vector from the given audio. Automatically calls `init()` if not already initialized.
1319
+
1320
+ **Parameters:**
1321
+ - `audio` - Path to the audio file or raw PCM samples as a byte array.
1322
+
1246
1323
  **`destroy(): Promise<void>`**
1247
1324
 
1248
1325
  Releases all resources associated with the model. Safe to call even if the model is not initialized.
1249
1326
 
1250
1327
  **`getModels(): Promise<CactusModel[]>`**
1251
1328
 
1252
- Returns available VAD models.
1329
+ Returns available audio models.
1253
1330
 
1254
1331
  **`getModelName(): string`**
1255
1332
 
1256
- Returns the model slug or path the instance was created with.
1333
+ Returns the computed model identifier including quantization and pro suffix (e.g., `'silero-vad-int8'`).
1257
1334
 
1258
- ### useCactusVAD Hook
1335
+ ### useCactusAudio Hook
1259
1336
 
1260
- The `useCactusVAD` hook manages a `CactusVAD` instance with reactive state. When model parameters (`model`, `options`) change, the hook creates a new instance and resets all state. The hook automatically cleans up resources when the component unmounts.
1337
+ The `useCactusAudio` hook manages a `CactusAudio` instance with reactive state. When model parameters (`model`, `options`) change, the hook creates a new instance and resets all state. The hook automatically cleans up resources when the component unmounts.
1261
1338
 
1262
1339
  #### State
1263
1340
 
@@ -1269,11 +1346,13 @@ The `useCactusVAD` hook manages a `CactusVAD` instance with reactive state. When
1269
1346
 
1270
1347
  #### Methods
1271
1348
 
1272
- - `download(params?: CactusVADDownloadParams): Promise<void>` - Downloads the model. Updates `isDownloading` and `downloadProgress` state during download. Sets `isDownloaded` to `true` on success.
1349
+ - `download(params?: CactusAudioDownloadParams): Promise<void>` - Downloads the model. Updates `isDownloading` and `downloadProgress` state during download. Sets `isDownloaded` to `true` on success.
1273
1350
  - `init(): Promise<void>` - Initializes the model.
1274
- - `vad(params: CactusVADVadParams): Promise<CactusVADResult>` - Runs voice activity detection.
1351
+ - `vad(params: CactusAudioVADParams): Promise<CactusAudioVADResult>` - Runs voice activity detection.
1352
+ - `diarize(params: CactusAudioDiarizeParams): Promise<CactusAudioDiarizeResult>` - Runs speaker diarization.
1353
+ - `embedSpeaker(params: CactusAudioEmbedSpeakerParams): Promise<CactusAudioEmbedSpeakerResult>` - Extracts a speaker embedding.
1275
1354
  - `destroy(): Promise<void>` - Releases all resources. Automatically called when the component unmounts.
1276
- - `getModels(): Promise<CactusModel[]>` - Returns available VAD models.
1355
+ - `getModels(): Promise<CactusModel[]>` - Returns available audio models.
1277
1356
 
1278
1357
  ### CactusIndex Class
1279
1358
 
@@ -1413,6 +1492,7 @@ interface CactusLMCompleteOptions {
1413
1492
  toolRagTopK?: number;
1414
1493
  includeStopSequences?: boolean;
1415
1494
  useVad?: boolean;
1495
+ enableThinking?: boolean;
1416
1496
  }
1417
1497
  ```
1418
1498
 
@@ -1446,12 +1526,36 @@ interface CactusLMCompleteParams {
1446
1526
  }
1447
1527
  ```
1448
1528
 
1529
+ ### CactusLMPrefillParams
1530
+
1531
+ ```typescript
1532
+ interface CactusLMPrefillParams {
1533
+ messages: CactusLMMessage[];
1534
+ options?: CactusLMCompleteOptions;
1535
+ tools?: CactusLMTool[];
1536
+ }
1537
+ ```
1538
+
1539
+ ### CactusLMPrefillResult
1540
+
1541
+ ```typescript
1542
+ interface CactusLMPrefillResult {
1543
+ success: boolean;
1544
+ error: string | null;
1545
+ prefillTokens: number;
1546
+ prefillTps: number;
1547
+ totalTimeMs: number;
1548
+ ramUsageMb: number;
1549
+ }
1550
+ ```
1551
+
1449
1552
  ### CactusLMCompleteResult
1450
1553
 
1451
1554
  ```typescript
1452
1555
  interface CactusLMCompleteResult {
1453
1556
  success: boolean;
1454
1557
  response: string;
1558
+ thinking?: string;
1455
1559
  functionCalls?: {
1456
1560
  name: string;
1457
1561
  arguments: { [key: string]: any };
@@ -1658,6 +1762,7 @@ interface CactusSTTStreamTranscribeStartOptions {
1658
1762
  confirmationThreshold?: number;
1659
1763
  minChunkSize?: number;
1660
1764
  telemetryEnabled?: boolean;
1765
+ language?: string;
1661
1766
  }
1662
1767
  ```
1663
1768
 
@@ -1728,27 +1833,27 @@ interface CactusSTTDetectLanguageResult {
1728
1833
  }
1729
1834
  ```
1730
1835
 
1731
- ### CactusVADParams
1836
+ ### CactusAudioParams
1732
1837
 
1733
1838
  ```typescript
1734
- interface CactusVADParams {
1839
+ interface CactusAudioParams {
1735
1840
  model?: string;
1736
1841
  options?: CactusModelOptions;
1737
1842
  }
1738
1843
  ```
1739
1844
 
1740
- ### CactusVADDownloadParams
1845
+ ### CactusAudioDownloadParams
1741
1846
 
1742
1847
  ```typescript
1743
- interface CactusVADDownloadParams {
1848
+ interface CactusAudioDownloadParams {
1744
1849
  onProgress?: (progress: number) => void;
1745
1850
  }
1746
1851
  ```
1747
1852
 
1748
- ### CactusVADOptions
1853
+ ### CactusAudioVADOptions
1749
1854
 
1750
1855
  ```typescript
1751
- interface CactusVADOptions {
1856
+ interface CactusAudioVADOptions {
1752
1857
  threshold?: number;
1753
1858
  negThreshold?: number;
1754
1859
  minSpeechDurationMs?: number;
@@ -1762,31 +1867,85 @@ interface CactusVADOptions {
1762
1867
  }
1763
1868
  ```
1764
1869
 
1765
- ### CactusVADSegment
1870
+ ### CactusAudioVADSegment
1766
1871
 
1767
1872
  ```typescript
1768
- interface CactusVADSegment {
1873
+ interface CactusAudioVADSegment {
1769
1874
  start: number;
1770
1875
  end: number;
1771
1876
  }
1772
1877
  ```
1773
1878
 
1774
- ### CactusVADResult
1879
+ ### CactusAudioVADResult
1775
1880
 
1776
1881
  ```typescript
1777
- interface CactusVADResult {
1778
- segments: CactusVADSegment[];
1882
+ interface CactusAudioVADResult {
1883
+ segments: CactusAudioVADSegment[];
1779
1884
  totalTime: number;
1780
1885
  ramUsage: number;
1781
1886
  }
1782
1887
  ```
1783
1888
 
1784
- ### CactusVADVadParams
1889
+ ### CactusAudioVADParams
1785
1890
 
1786
1891
  ```typescript
1787
- interface CactusVADVadParams {
1892
+ interface CactusAudioVADParams {
1788
1893
  audio: string | number[];
1789
- options?: CactusVADOptions;
1894
+ options?: CactusAudioVADOptions;
1895
+ }
1896
+ ```
1897
+
1898
+ ### CactusAudioDiarizeOptions
1899
+
1900
+ ```typescript
1901
+ interface CactusAudioDiarizeOptions {
1902
+ stepMs?: number;
1903
+ threshold?: number;
1904
+ numSpeakers?: number;
1905
+ minSpeakers?: number;
1906
+ maxSpeakers?: number;
1907
+ }
1908
+ ```
1909
+
1910
+ ### CactusAudioDiarizeParams
1911
+
1912
+ ```typescript
1913
+ interface CactusAudioDiarizeParams {
1914
+ audio: string | number[];
1915
+ options?: CactusAudioDiarizeOptions;
1916
+ }
1917
+ ```
1918
+
1919
+ ### CactusAudioDiarizeResult
1920
+
1921
+ ```typescript
1922
+ interface CactusAudioDiarizeResult {
1923
+ success: boolean;
1924
+ error: string | null;
1925
+ numSpeakers: number;
1926
+ scores: number[];
1927
+ totalTimeMs: number;
1928
+ ramUsageMb: number;
1929
+ }
1930
+ ```
1931
+
1932
+ ### CactusAudioEmbedSpeakerParams
1933
+
1934
+ ```typescript
1935
+ interface CactusAudioEmbedSpeakerParams {
1936
+ audio: string | number[];
1937
+ }
1938
+ ```
1939
+
1940
+ ### CactusAudioEmbedSpeakerResult
1941
+
1942
+ ```typescript
1943
+ interface CactusAudioEmbedSpeakerResult {
1944
+ success: boolean;
1945
+ error: string | null;
1946
+ embedding: number[];
1947
+ totalTimeMs: number;
1948
+ ramUsageMb: number;
1790
1949
  }
1791
1950
  ```
1792
1951
 
@@ -65,7 +65,8 @@ std::shared_ptr<Promise<std::string>> HybridCactus::complete(
65
65
  responseBuffer.data(), responseBufferSize,
66
66
  optionsJson ? optionsJson->c_str() : nullptr,
67
67
  toolsJson ? toolsJson->c_str() : nullptr,
68
- cactusTokenCallback, &callbackCtx);
68
+ cactusTokenCallback, &callbackCtx,
69
+ nullptr, 0);
69
70
 
70
71
  if (result < 0) {
71
72
  throw std::runtime_error("Cactus complete failed: " +
@@ -79,6 +80,38 @@ std::shared_ptr<Promise<std::string>> HybridCactus::complete(
79
80
  });
80
81
  }
81
82
 
83
+ std::shared_ptr<Promise<std::string>> HybridCactus::prefill(
84
+ const std::string &messagesJson, double responseBufferSize,
85
+ const std::optional<std::string> &optionsJson,
86
+ const std::optional<std::string> &toolsJson) {
87
+ return Promise<std::string>::async([this, messagesJson, responseBufferSize,
88
+ optionsJson,
89
+ toolsJson]() -> std::string {
90
+ std::lock_guard<std::mutex> lock(this->_modelMutex);
91
+
92
+ if (!this->_model) {
93
+ throw std::runtime_error("Cactus model is not initialized");
94
+ }
95
+
96
+ std::string responseBuffer;
97
+ responseBuffer.resize(responseBufferSize);
98
+
99
+ int result = cactus_prefill(this->_model, messagesJson.c_str(),
100
+ responseBuffer.data(), responseBufferSize,
101
+ optionsJson ? optionsJson->c_str() : nullptr,
102
+ toolsJson ? toolsJson->c_str() : nullptr,
103
+ nullptr, 0);
104
+
105
+ if (result < 0) {
106
+ throw std::runtime_error("Cactus prefill failed: " +
107
+ std::string(cactus_get_last_error()));
108
+ }
109
+
110
+ responseBuffer.resize(strlen(responseBuffer.c_str()));
111
+ return responseBuffer;
112
+ });
113
+ }
114
+
82
115
  std::shared_ptr<Promise<std::vector<double>>>
83
116
  HybridCactus::tokenize(const std::string &text) {
84
117
  return Promise<std::vector<double>>::async([this,
@@ -488,6 +521,102 @@ HybridCactus::audioEmbed(const std::string &audioPath,
488
521
  });
489
522
  }
490
523
 
524
+ std::shared_ptr<Promise<std::string>> HybridCactus::diarize(
525
+ const std::variant<std::vector<double>, std::string> &audio,
526
+ double responseBufferSize,
527
+ const std::optional<std::string> &optionsJson) {
528
+ return Promise<std::string>::async(
529
+ [this, audio, responseBufferSize, optionsJson]() -> std::string {
530
+ std::lock_guard<std::mutex> lock(this->_modelMutex);
531
+
532
+ if (!this->_model) {
533
+ throw std::runtime_error("Cactus model is not initialized");
534
+ }
535
+
536
+ std::string responseBuffer;
537
+ responseBuffer.resize(responseBufferSize);
538
+
539
+ int result;
540
+ if (std::holds_alternative<std::string>(audio)) {
541
+ result = cactus_diarize(
542
+ this->_model, std::get<std::string>(audio).c_str(),
543
+ responseBuffer.data(), responseBufferSize,
544
+ optionsJson ? optionsJson->c_str() : nullptr, nullptr, 0);
545
+ } else {
546
+ const auto &audioDoubles = std::get<std::vector<double>>(audio);
547
+
548
+ std::vector<uint8_t> audioBytes;
549
+ audioBytes.reserve(audioDoubles.size());
550
+ for (double d : audioDoubles) {
551
+ d = std::clamp(d, 0.0, 255.0);
552
+ audioBytes.emplace_back(static_cast<uint8_t>(d));
553
+ }
554
+
555
+ result = cactus_diarize(
556
+ this->_model, nullptr,
557
+ responseBuffer.data(), responseBufferSize,
558
+ optionsJson ? optionsJson->c_str() : nullptr,
559
+ audioBytes.data(), audioBytes.size());
560
+ }
561
+
562
+ if (result < 0) {
563
+ throw std::runtime_error("Cactus diarize failed: " +
564
+ std::string(cactus_get_last_error()));
565
+ }
566
+
567
+ responseBuffer.resize(strlen(responseBuffer.c_str()));
568
+ return responseBuffer;
569
+ });
570
+ }
571
+
572
+ std::shared_ptr<Promise<std::string>> HybridCactus::embedSpeaker(
573
+ const std::variant<std::vector<double>, std::string> &audio,
574
+ double responseBufferSize,
575
+ const std::optional<std::string> &optionsJson) {
576
+ return Promise<std::string>::async(
577
+ [this, audio, responseBufferSize, optionsJson]() -> std::string {
578
+ std::lock_guard<std::mutex> lock(this->_modelMutex);
579
+
580
+ if (!this->_model) {
581
+ throw std::runtime_error("Cactus model is not initialized");
582
+ }
583
+
584
+ std::string responseBuffer;
585
+ responseBuffer.resize(responseBufferSize);
586
+
587
+ int result;
588
+ if (std::holds_alternative<std::string>(audio)) {
589
+ result = cactus_embed_speaker(
590
+ this->_model, std::get<std::string>(audio).c_str(),
591
+ responseBuffer.data(), responseBufferSize,
592
+ optionsJson ? optionsJson->c_str() : nullptr, nullptr, 0);
593
+ } else {
594
+ const auto &audioDoubles = std::get<std::vector<double>>(audio);
595
+
596
+ std::vector<uint8_t> audioBytes;
597
+ audioBytes.reserve(audioDoubles.size());
598
+ for (double d : audioDoubles) {
599
+ d = std::clamp(d, 0.0, 255.0);
600
+ audioBytes.emplace_back(static_cast<uint8_t>(d));
601
+ }
602
+
603
+ result = cactus_embed_speaker(
604
+ this->_model, nullptr,
605
+ responseBuffer.data(), responseBufferSize,
606
+ optionsJson ? optionsJson->c_str() : nullptr,
607
+ audioBytes.data(), audioBytes.size());
608
+ }
609
+
610
+ if (result < 0) {
611
+ throw std::runtime_error("Cactus embed speaker failed: " +
612
+ std::string(cactus_get_last_error()));
613
+ }
614
+
615
+ responseBuffer.resize(strlen(responseBuffer.c_str()));
616
+ return responseBuffer;
617
+ });
618
+ }
619
+
491
620
  std::shared_ptr<Promise<void>> HybridCactus::reset() {
492
621
  return Promise<void>::async([this]() -> void {
493
622
  std::lock_guard<std::mutex> lock(this->_modelMutex);
@@ -525,7 +654,7 @@ std::shared_ptr<Promise<void>> HybridCactus::destroy() {
525
654
  std::shared_ptr<Promise<void>>
526
655
  HybridCactus::setTelemetryEnvironment(const std::string &cacheDir) {
527
656
  return Promise<void>::async([cacheDir]() -> void {
528
- cactus_set_telemetry_environment("react-native", cacheDir.c_str(), "1.10.0");
657
+ cactus_set_telemetry_environment("react-native", cacheDir.c_str(), "1.12.0");
529
658
  });
530
659
  }
531
660
 
@@ -24,6 +24,11 @@ public:
24
24
  double /* tokenId */)>> &callback)
25
25
  override;
26
26
 
27
+ std::shared_ptr<Promise<std::string>> prefill(
28
+ const std::string &messagesJson, double responseBufferSize,
29
+ const std::optional<std::string> &optionsJson,
30
+ const std::optional<std::string> &toolsJson) override;
31
+
27
32
  std::shared_ptr<Promise<std::vector<double>>>
28
33
  tokenize(const std::string &text) override;
29
34
 
@@ -67,6 +72,16 @@ public:
67
72
  std::shared_ptr<Promise<std::vector<double>>>
68
73
  audioEmbed(const std::string &audioPath, double embeddingBufferSize) override;
69
74
 
75
+ std::shared_ptr<Promise<std::string>>
76
+ diarize(const std::variant<std::vector<double>, std::string> &audio,
77
+ double responseBufferSize,
78
+ const std::optional<std::string> &optionsJson) override;
79
+
80
+ std::shared_ptr<Promise<std::string>>
81
+ embedSpeaker(const std::variant<std::vector<double>, std::string> &audio,
82
+ double responseBufferSize,
83
+ const std::optional<std::string> &optionsJson) override;
84
+
70
85
  std::shared_ptr<Promise<void>> reset() override;
71
86
 
72
87
  std::shared_ptr<Promise<void>> stop() override;