cactus-react-native 1.10.3 → 1.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +199 -40
- package/android/src/main/jniLibs/arm64-v8a/libcactus.a +0 -0
- package/cpp/HybridCactus.cpp +131 -2
- package/cpp/HybridCactus.hpp +15 -0
- package/cpp/cactus_ffi.h +240 -2
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_ffi.h +240 -2
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_utils.h +940 -109
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/engine.h +175 -25
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/gemma_tools.h +48 -21
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/graph.h +79 -7
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel.h +122 -9
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel_utils.h +191 -2
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/cactus +0 -0
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_ffi.h +240 -2
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_utils.h +940 -109
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/engine.h +175 -25
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/gemma_tools.h +48 -21
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/graph.h +79 -7
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/kernel.h +122 -9
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/kernel_utils.h +191 -2
- package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/cactus +0 -0
- package/lib/module/classes/{CactusVAD.js → CactusAudio.js} +19 -6
- package/lib/module/classes/CactusAudio.js.map +1 -0
- package/lib/module/classes/CactusLM.js +25 -0
- package/lib/module/classes/CactusLM.js.map +1 -1
- package/lib/module/hooks/{useCactusVAD.js → useCactusAudio.js} +50 -20
- package/lib/module/hooks/useCactusAudio.js.map +1 -0
- package/lib/module/index.js +2 -2
- package/lib/module/index.js.map +1 -1
- package/lib/module/modelRegistry.js +5 -3
- package/lib/module/modelRegistry.js.map +1 -1
- package/lib/module/native/Cactus.js +81 -2
- package/lib/module/native/Cactus.js.map +1 -1
- package/lib/module/types/CactusAudio.js +4 -0
- package/lib/module/types/{CactusVAD.js.map → CactusAudio.js.map} +1 -1
- package/lib/typescript/src/classes/CactusAudio.d.ts +22 -0
- package/lib/typescript/src/classes/CactusAudio.d.ts.map +1 -0
- package/lib/typescript/src/classes/CactusLM.d.ts +2 -1
- package/lib/typescript/src/classes/CactusLM.d.ts.map +1 -1
- package/lib/typescript/src/hooks/useCactusAudio.d.ts +17 -0
- package/lib/typescript/src/hooks/useCactusAudio.d.ts.map +1 -0
- package/lib/typescript/src/index.d.ts +4 -4
- package/lib/typescript/src/index.d.ts.map +1 -1
- package/lib/typescript/src/native/Cactus.d.ts +9 -3
- package/lib/typescript/src/native/Cactus.d.ts.map +1 -1
- package/lib/typescript/src/specs/Cactus.nitro.d.ts +3 -0
- package/lib/typescript/src/specs/Cactus.nitro.d.ts.map +1 -1
- package/lib/typescript/src/types/CactusAudio.d.ts +63 -0
- package/lib/typescript/src/types/CactusAudio.d.ts.map +1 -0
- package/lib/typescript/src/types/CactusLM.d.ts +15 -0
- package/lib/typescript/src/types/CactusLM.d.ts.map +1 -1
- package/lib/typescript/src/types/CactusSTT.d.ts +1 -0
- package/lib/typescript/src/types/CactusSTT.d.ts.map +1 -1
- package/nitrogen/generated/shared/c++/HybridCactusSpec.cpp +3 -0
- package/nitrogen/generated/shared/c++/HybridCactusSpec.hpp +3 -0
- package/package.json +1 -1
- package/src/classes/{CactusVAD.ts → CactusAudio.ts} +32 -13
- package/src/classes/CactusLM.ts +36 -0
- package/src/hooks/{useCactusVAD.ts → useCactusAudio.ts} +65 -28
- package/src/index.tsx +16 -9
- package/src/modelRegistry.ts +20 -6
- package/src/native/Cactus.ts +118 -3
- package/src/specs/Cactus.nitro.ts +16 -0
- package/src/types/CactusAudio.ts +73 -0
- package/src/types/CactusLM.ts +17 -0
- package/src/types/CactusSTT.ts +1 -0
- package/lib/module/classes/CactusVAD.js.map +0 -1
- package/lib/module/hooks/useCactusVAD.js.map +0 -1
- package/lib/module/types/CactusVAD.js +0 -4
- package/lib/typescript/src/classes/CactusVAD.d.ts +0 -20
- package/lib/typescript/src/classes/CactusVAD.d.ts.map +0 -1
- package/lib/typescript/src/hooks/useCactusVAD.d.ts +0 -15
- package/lib/typescript/src/hooks/useCactusVAD.d.ts.map +0 -1
- package/lib/typescript/src/types/CactusVAD.d.ts +0 -34
- package/lib/typescript/src/types/CactusVAD.d.ts.map +0 -1
- package/src/types/CactusVAD.ts +0 -39
package/README.md
CHANGED
|
@@ -649,18 +649,18 @@ console.log('Language:', result.language); // e.g. 'en'
|
|
|
649
649
|
console.log('Confidence:', result.confidence);
|
|
650
650
|
```
|
|
651
651
|
|
|
652
|
-
##
|
|
652
|
+
## Audio Processing
|
|
653
653
|
|
|
654
|
-
The `
|
|
654
|
+
The `CactusAudio` class provides voice activity detection (VAD), speaker diarization, and speaker embedding extraction.
|
|
655
655
|
|
|
656
|
-
###
|
|
656
|
+
### Voice Activity Detection
|
|
657
657
|
|
|
658
658
|
```typescript
|
|
659
|
-
import {
|
|
659
|
+
import { CactusAudio } from 'cactus-react-native';
|
|
660
660
|
|
|
661
|
-
const
|
|
661
|
+
const cactusAudio = new CactusAudio({ model: 'silero-vad' });
|
|
662
662
|
|
|
663
|
-
const result = await
|
|
663
|
+
const result = await cactusAudio.vad({
|
|
664
664
|
audio: 'path/to/audio.wav',
|
|
665
665
|
options: {
|
|
666
666
|
threshold: 0.5,
|
|
@@ -674,22 +674,68 @@ console.log('Speech segments:', result.segments);
|
|
|
674
674
|
console.log('Total time (ms):', result.totalTime);
|
|
675
675
|
```
|
|
676
676
|
|
|
677
|
+
### Speaker Diarization
|
|
678
|
+
|
|
679
|
+
```typescript
|
|
680
|
+
import { CactusAudio } from 'cactus-react-native';
|
|
681
|
+
|
|
682
|
+
const cactusAudio = new CactusAudio({ model: 'silero-vad' });
|
|
683
|
+
|
|
684
|
+
const result = await cactusAudio.diarize({
|
|
685
|
+
audio: 'path/to/audio.wav',
|
|
686
|
+
options: {
|
|
687
|
+
numSpeakers: 2,
|
|
688
|
+
minSpeakers: 1,
|
|
689
|
+
maxSpeakers: 4,
|
|
690
|
+
}
|
|
691
|
+
});
|
|
692
|
+
|
|
693
|
+
console.log('Number of speakers:', result.numSpeakers);
|
|
694
|
+
console.log('Scores:', result.scores);
|
|
695
|
+
```
|
|
696
|
+
|
|
697
|
+
### Speaker Embedding
|
|
698
|
+
|
|
699
|
+
```typescript
|
|
700
|
+
import { CactusAudio } from 'cactus-react-native';
|
|
701
|
+
|
|
702
|
+
const cactusAudio = new CactusAudio({ model: 'silero-vad' });
|
|
703
|
+
|
|
704
|
+
const result = await cactusAudio.embedSpeaker({
|
|
705
|
+
audio: 'path/to/audio.wav',
|
|
706
|
+
});
|
|
707
|
+
|
|
708
|
+
console.log('Speaker embedding:', result.embedding);
|
|
709
|
+
```
|
|
710
|
+
|
|
677
711
|
### Hook
|
|
678
712
|
|
|
679
713
|
```tsx
|
|
680
|
-
import {
|
|
714
|
+
import { useCactusAudio } from 'cactus-react-native';
|
|
681
715
|
|
|
682
716
|
const App = () => {
|
|
683
|
-
const
|
|
717
|
+
const cactusAudio = useCactusAudio({ model: 'silero-vad' });
|
|
684
718
|
|
|
685
719
|
const handleVAD = async () => {
|
|
686
|
-
const result = await
|
|
720
|
+
const result = await cactusAudio.vad({
|
|
687
721
|
audio: 'path/to/audio.wav',
|
|
688
722
|
});
|
|
689
723
|
console.log('Speech segments:', result.segments);
|
|
690
724
|
};
|
|
691
725
|
|
|
692
|
-
|
|
726
|
+
const handleDiarize = async () => {
|
|
727
|
+
const result = await cactusAudio.diarize({
|
|
728
|
+
audio: 'path/to/audio.wav',
|
|
729
|
+
});
|
|
730
|
+
console.log('Speakers:', result.numSpeakers);
|
|
731
|
+
};
|
|
732
|
+
|
|
733
|
+
return (
|
|
734
|
+
<>
|
|
735
|
+
<Button title="Detect Speech" onPress={handleVAD} />
|
|
736
|
+
<Button title="Diarize" onPress={handleDiarize} />
|
|
737
|
+
</>
|
|
738
|
+
);
|
|
693
739
|
};
|
|
694
740
|
```
|
|
695
741
|
|
|
@@ -985,9 +1031,19 @@ Performs text completion with optional streaming and tool support. Automatically
|
|
|
985
1031
|
- `toolRagTopK` - Number of tools to select via RAG when tool list is large (default: `2`).
|
|
986
1032
|
- `includeStopSequences` - Whether to include stop sequences in the response (default: `false`).
|
|
987
1033
|
- `useVad` - Whether to use VAD preprocessing (default: `true`).
|
|
1034
|
+
- `enableThinking` - Whether to enable thinking/reasoning output if supported by the model (default: unset).
|
|
988
1035
|
- `tools` - Array of `CactusLMTool` objects for function calling.
|
|
989
1036
|
- `onToken` - Callback for streaming tokens.
|
|
990
1037
|
|
|
1038
|
+
**`prefill(params: CactusLMPrefillParams): Promise<CactusLMPrefillResult>`**
|
|
1039
|
+
|
|
1040
|
+
Runs prompt prefill without generating any output tokens. Useful for measuring prefill performance or warming up the model's KV cache. Automatically calls `init()` if not already initialized. Throws an error if a generation is already in progress.
|
|
1041
|
+
|
|
1042
|
+
**Parameters:**
|
|
1043
|
+
- `messages` - Array of `CactusLMMessage` objects.
|
|
1044
|
+
- `options` - Same options as `complete`.
|
|
1045
|
+
- `tools` - Array of `CactusLMTool` objects.
|
|
1046
|
+
|
|
991
1047
|
**`tokenize(params: CactusLMTokenizeParams): Promise<CactusLMTokenizeResult>`**
|
|
992
1048
|
|
|
993
1049
|
Converts text into tokens using the model's tokenizer.
|
|
@@ -1038,7 +1094,7 @@ Returns available models.
|
|
|
1038
1094
|
|
|
1039
1095
|
**`getModelName(): string`**
|
|
1040
1096
|
|
|
1041
|
-
Returns the model
|
|
1097
|
+
Returns the computed model identifier including quantization and pro suffix (e.g., `'qwen3-0.6b-int8'`, `'lfm2-vl-450m-int4-pro'`).
|
|
1042
1098
|
|
|
1043
1099
|
### useCactusLM Hook
|
|
1044
1100
|
|
|
@@ -1121,6 +1177,7 @@ Starts a streaming transcription session. Automatically calls `init()` if not al
|
|
|
1121
1177
|
- `confirmationThreshold` - Fuzzy match ratio required to confirm a transcription segment (default: `0.99`).
|
|
1122
1178
|
- `minChunkSize` - Minimum number of audio samples before processing (default: `32000`).
|
|
1123
1179
|
- `telemetryEnabled` - Enable telemetry for this session (default: `true`).
|
|
1180
|
+
- `language` - Language code for transcription (e.g., `'en'`, `'es'`, `'fr'`). If not set, language is auto-detected.
|
|
1124
1181
|
|
|
1125
1182
|
**`streamTranscribeProcess(params: CactusSTTStreamTranscribeProcessParams): Promise<CactusSTTStreamTranscribeProcessResult>`**
|
|
1126
1183
|
|
|
@@ -1167,7 +1224,7 @@ Returns available speech-to-text models.
|
|
|
1167
1224
|
|
|
1168
1225
|
**`getModelName(): string`**
|
|
1169
1226
|
|
|
1170
|
-
Returns the model
|
|
1227
|
+
Returns the computed model identifier including quantization and pro suffix (e.g., `'whisper-small-int8'`).
|
|
1171
1228
|
|
|
1172
1229
|
### useCactusSTT Hook
|
|
1173
1230
|
|
|
@@ -1200,32 +1257,32 @@ The `useCactusSTT` hook manages a `CactusSTT` instance with reactive state. When
|
|
|
1200
1257
|
- `destroy(): Promise<void>` - Releases all resources associated with the model. Clears the `transcription`, `streamTranscribeConfirmed`, and `streamTranscribePending` state. Automatically called when the component unmounts.
|
|
1201
1258
|
- `getModels(): Promise<CactusModel[]>` - Returns available speech-to-text models.
|
|
1202
1259
|
|
|
1203
|
-
###
|
|
1260
|
+
### CactusAudio Class
|
|
1204
1261
|
|
|
1205
1262
|
#### Constructor
|
|
1206
1263
|
|
|
1207
|
-
**`new
|
|
1264
|
+
**`new CactusAudio(params?: CactusAudioParams)`**
|
|
1208
1265
|
|
|
1209
1266
|
**Parameters:**
|
|
1210
|
-
- `model` - Model slug or absolute path to
|
|
1267
|
+
- `model` - Model slug or absolute path to an audio model file (default: `'silero-vad'`).
|
|
1211
1268
|
- `options` - Model options:
|
|
1212
1269
|
- `quantization` - Quantization type: `'int4'` | `'int8'` (default: `'int8'`).
|
|
1213
1270
|
- `pro` - Enable NPU-accelerated models (default: `false`).
|
|
1214
1271
|
|
|
1215
1272
|
#### Methods
|
|
1216
1273
|
|
|
1217
|
-
**`download(params?:
|
|
1274
|
+
**`download(params?: CactusAudioDownloadParams): Promise<void>`**
|
|
1218
1275
|
|
|
1219
|
-
Downloads the
|
|
1276
|
+
Downloads the audio model. If the model is already downloaded, returns immediately with progress `1`. Throws an error if a download is already in progress.
|
|
1220
1277
|
|
|
1221
1278
|
**Parameters:**
|
|
1222
1279
|
- `onProgress` - Callback for download progress (0-1).
|
|
1223
1280
|
|
|
1224
1281
|
**`init(): Promise<void>`**
|
|
1225
1282
|
|
|
1226
|
-
Initializes the
|
|
1283
|
+
Initializes the audio model. Safe to call multiple times (idempotent). Throws an error if the model is not downloaded yet.
|
|
1227
1284
|
|
|
1228
|
-
**`vad(params:
|
|
1285
|
+
**`vad(params: CactusAudioVADParams): Promise<CactusAudioVADResult>`**
|
|
1229
1286
|
|
|
1230
1287
|
Runs voice activity detection on the given audio. Automatically calls `init()` if not already initialized.
|
|
1231
1288
|
|
|
@@ -1243,21 +1300,41 @@ Runs voice activity detection on the given audio. Automatically calls `init()` i
|
|
|
1243
1300
|
- `minSilenceAtMaxSpeech` - Minimum silence at max speech duration.
|
|
1244
1301
|
- `useMaxPossSilAtMaxSpeech` - Whether to use maximum possible silence at max speech.
|
|
1245
1302
|
|
|
1303
|
+
**`diarize(params: CactusAudioDiarizeParams): Promise<CactusAudioDiarizeResult>`**
|
|
1304
|
+
|
|
1305
|
+
Runs speaker diarization on the given audio. Automatically calls `init()` if not already initialized.
|
|
1306
|
+
|
|
1307
|
+
**Parameters:**
|
|
1308
|
+
- `audio` - Path to the audio file or raw PCM samples as a byte array.
|
|
1309
|
+
- `options` - Diarize options:
|
|
1310
|
+
- `stepMs` - Step size in milliseconds.
|
|
1311
|
+
- `threshold` - Diarization threshold.
|
|
1312
|
+
- `numSpeakers` - Expected number of speakers.
|
|
1313
|
+
- `minSpeakers` - Minimum number of speakers.
|
|
1314
|
+
- `maxSpeakers` - Maximum number of speakers.
|
|
1315
|
+
|
|
1316
|
+
**`embedSpeaker(params: CactusAudioEmbedSpeakerParams): Promise<CactusAudioEmbedSpeakerResult>`**
|
|
1317
|
+
|
|
1318
|
+
Extracts a speaker embedding vector from the given audio. Automatically calls `init()` if not already initialized.
|
|
1319
|
+
|
|
1320
|
+
**Parameters:**
|
|
1321
|
+
- `audio` - Path to the audio file or raw PCM samples as a byte array.
|
|
1322
|
+
|
|
1246
1323
|
**`destroy(): Promise<void>`**
|
|
1247
1324
|
|
|
1248
1325
|
Releases all resources associated with the model. Safe to call even if the model is not initialized.
|
|
1249
1326
|
|
|
1250
1327
|
**`getModels(): Promise<CactusModel[]>`**
|
|
1251
1328
|
|
|
1252
|
-
Returns available
|
|
1329
|
+
Returns available audio models.
|
|
1253
1330
|
|
|
1254
1331
|
**`getModelName(): string`**
|
|
1255
1332
|
|
|
1256
|
-
Returns the model
|
|
1333
|
+
Returns the computed model identifier including quantization and pro suffix (e.g., `'silero-vad-int8'`).
|
|
1257
1334
|
|
|
1258
|
-
###
|
|
1335
|
+
### useCactusAudio Hook
|
|
1259
1336
|
|
|
1260
|
-
The `
|
|
1337
|
+
The `useCactusAudio` hook manages a `CactusAudio` instance with reactive state. When model parameters (`model`, `options`) change, the hook creates a new instance and resets all state. The hook automatically cleans up resources when the component unmounts.
|
|
1261
1338
|
|
|
1262
1339
|
#### State
|
|
1263
1340
|
|
|
@@ -1269,11 +1346,13 @@ The `useCactusVAD` hook manages a `CactusVAD` instance with reactive state. When
|
|
|
1269
1346
|
|
|
1270
1347
|
#### Methods
|
|
1271
1348
|
|
|
1272
|
-
- `download(params?:
|
|
1349
|
+
- `download(params?: CactusAudioDownloadParams): Promise<void>` - Downloads the model. Updates `isDownloading` and `downloadProgress` state during download. Sets `isDownloaded` to `true` on success.
|
|
1273
1350
|
- `init(): Promise<void>` - Initializes the model.
|
|
1274
|
-
- `vad(params:
|
|
1351
|
+
- `vad(params: CactusAudioVADParams): Promise<CactusAudioVADResult>` - Runs voice activity detection.
|
|
1352
|
+
- `diarize(params: CactusAudioDiarizeParams): Promise<CactusAudioDiarizeResult>` - Runs speaker diarization.
|
|
1353
|
+
- `embedSpeaker(params: CactusAudioEmbedSpeakerParams): Promise<CactusAudioEmbedSpeakerResult>` - Extracts a speaker embedding.
|
|
1275
1354
|
- `destroy(): Promise<void>` - Releases all resources. Automatically called when the component unmounts.
|
|
1276
|
-
- `getModels(): Promise<CactusModel[]>` - Returns available
|
|
1355
|
+
- `getModels(): Promise<CactusModel[]>` - Returns available audio models.
|
|
1277
1356
|
|
|
1278
1357
|
### CactusIndex Class
|
|
1279
1358
|
|
|
@@ -1413,6 +1492,7 @@ interface CactusLMCompleteOptions {
|
|
|
1413
1492
|
toolRagTopK?: number;
|
|
1414
1493
|
includeStopSequences?: boolean;
|
|
1415
1494
|
useVad?: boolean;
|
|
1495
|
+
enableThinking?: boolean;
|
|
1416
1496
|
}
|
|
1417
1497
|
```
|
|
1418
1498
|
|
|
@@ -1446,12 +1526,36 @@ interface CactusLMCompleteParams {
|
|
|
1446
1526
|
}
|
|
1447
1527
|
```
|
|
1448
1528
|
|
|
1529
|
+
### CactusLMPrefillParams
|
|
1530
|
+
|
|
1531
|
+
```typescript
|
|
1532
|
+
interface CactusLMPrefillParams {
|
|
1533
|
+
messages: CactusLMMessage[];
|
|
1534
|
+
options?: CactusLMCompleteOptions;
|
|
1535
|
+
tools?: CactusLMTool[];
|
|
1536
|
+
}
|
|
1537
|
+
```
|
|
1538
|
+
|
|
1539
|
+
### CactusLMPrefillResult
|
|
1540
|
+
|
|
1541
|
+
```typescript
|
|
1542
|
+
interface CactusLMPrefillResult {
|
|
1543
|
+
success: boolean;
|
|
1544
|
+
error: string | null;
|
|
1545
|
+
prefillTokens: number;
|
|
1546
|
+
prefillTps: number;
|
|
1547
|
+
totalTimeMs: number;
|
|
1548
|
+
ramUsageMb: number;
|
|
1549
|
+
}
|
|
1550
|
+
```
|
|
1551
|
+
|
|
1449
1552
|
### CactusLMCompleteResult
|
|
1450
1553
|
|
|
1451
1554
|
```typescript
|
|
1452
1555
|
interface CactusLMCompleteResult {
|
|
1453
1556
|
success: boolean;
|
|
1454
1557
|
response: string;
|
|
1558
|
+
thinking?: string;
|
|
1455
1559
|
functionCalls?: {
|
|
1456
1560
|
name: string;
|
|
1457
1561
|
arguments: { [key: string]: any };
|
|
@@ -1658,6 +1762,7 @@ interface CactusSTTStreamTranscribeStartOptions {
|
|
|
1658
1762
|
confirmationThreshold?: number;
|
|
1659
1763
|
minChunkSize?: number;
|
|
1660
1764
|
telemetryEnabled?: boolean;
|
|
1765
|
+
language?: string;
|
|
1661
1766
|
}
|
|
1662
1767
|
```
|
|
1663
1768
|
|
|
@@ -1728,27 +1833,27 @@ interface CactusSTTDetectLanguageResult {
|
|
|
1728
1833
|
}
|
|
1729
1834
|
```
|
|
1730
1835
|
|
|
1731
|
-
###
|
|
1836
|
+
### CactusAudioParams
|
|
1732
1837
|
|
|
1733
1838
|
```typescript
|
|
1734
|
-
interface
|
|
1839
|
+
interface CactusAudioParams {
|
|
1735
1840
|
model?: string;
|
|
1736
1841
|
options?: CactusModelOptions;
|
|
1737
1842
|
}
|
|
1738
1843
|
```
|
|
1739
1844
|
|
|
1740
|
-
###
|
|
1845
|
+
### CactusAudioDownloadParams
|
|
1741
1846
|
|
|
1742
1847
|
```typescript
|
|
1743
|
-
interface
|
|
1848
|
+
interface CactusAudioDownloadParams {
|
|
1744
1849
|
onProgress?: (progress: number) => void;
|
|
1745
1850
|
}
|
|
1746
1851
|
```
|
|
1747
1852
|
|
|
1748
|
-
###
|
|
1853
|
+
### CactusAudioVADOptions
|
|
1749
1854
|
|
|
1750
1855
|
```typescript
|
|
1751
|
-
interface
|
|
1856
|
+
interface CactusAudioVADOptions {
|
|
1752
1857
|
threshold?: number;
|
|
1753
1858
|
negThreshold?: number;
|
|
1754
1859
|
minSpeechDurationMs?: number;
|
|
@@ -1762,31 +1867,85 @@ interface CactusVADOptions {
|
|
|
1762
1867
|
}
|
|
1763
1868
|
```
|
|
1764
1869
|
|
|
1765
|
-
###
|
|
1870
|
+
### CactusAudioVADSegment
|
|
1766
1871
|
|
|
1767
1872
|
```typescript
|
|
1768
|
-
interface
|
|
1873
|
+
interface CactusAudioVADSegment {
|
|
1769
1874
|
start: number;
|
|
1770
1875
|
end: number;
|
|
1771
1876
|
}
|
|
1772
1877
|
```
|
|
1773
1878
|
|
|
1774
|
-
###
|
|
1879
|
+
### CactusAudioVADResult
|
|
1775
1880
|
|
|
1776
1881
|
```typescript
|
|
1777
|
-
interface
|
|
1778
|
-
segments:
|
|
1882
|
+
interface CactusAudioVADResult {
|
|
1883
|
+
segments: CactusAudioVADSegment[];
|
|
1779
1884
|
totalTime: number;
|
|
1780
1885
|
ramUsage: number;
|
|
1781
1886
|
}
|
|
1782
1887
|
```
|
|
1783
1888
|
|
|
1784
|
-
###
|
|
1889
|
+
### CactusAudioVADParams
|
|
1785
1890
|
|
|
1786
1891
|
```typescript
|
|
1787
|
-
interface
|
|
1892
|
+
interface CactusAudioVADParams {
|
|
1788
1893
|
audio: string | number[];
|
|
1789
|
-
options?:
|
|
1894
|
+
options?: CactusAudioVADOptions;
|
|
1895
|
+
}
|
|
1896
|
+
```
|
|
1897
|
+
|
|
1898
|
+
### CactusAudioDiarizeOptions
|
|
1899
|
+
|
|
1900
|
+
```typescript
|
|
1901
|
+
interface CactusAudioDiarizeOptions {
|
|
1902
|
+
stepMs?: number;
|
|
1903
|
+
threshold?: number;
|
|
1904
|
+
numSpeakers?: number;
|
|
1905
|
+
minSpeakers?: number;
|
|
1906
|
+
maxSpeakers?: number;
|
|
1907
|
+
}
|
|
1908
|
+
```
|
|
1909
|
+
|
|
1910
|
+
### CactusAudioDiarizeParams
|
|
1911
|
+
|
|
1912
|
+
```typescript
|
|
1913
|
+
interface CactusAudioDiarizeParams {
|
|
1914
|
+
audio: string | number[];
|
|
1915
|
+
options?: CactusAudioDiarizeOptions;
|
|
1916
|
+
}
|
|
1917
|
+
```
|
|
1918
|
+
|
|
1919
|
+
### CactusAudioDiarizeResult
|
|
1920
|
+
|
|
1921
|
+
```typescript
|
|
1922
|
+
interface CactusAudioDiarizeResult {
|
|
1923
|
+
success: boolean;
|
|
1924
|
+
error: string | null;
|
|
1925
|
+
numSpeakers: number;
|
|
1926
|
+
scores: number[];
|
|
1927
|
+
totalTimeMs: number;
|
|
1928
|
+
ramUsageMb: number;
|
|
1929
|
+
}
|
|
1930
|
+
```
|
|
1931
|
+
|
|
1932
|
+
### CactusAudioEmbedSpeakerParams
|
|
1933
|
+
|
|
1934
|
+
```typescript
|
|
1935
|
+
interface CactusAudioEmbedSpeakerParams {
|
|
1936
|
+
audio: string | number[];
|
|
1937
|
+
}
|
|
1938
|
+
```
|
|
1939
|
+
|
|
1940
|
+
### CactusAudioEmbedSpeakerResult
|
|
1941
|
+
|
|
1942
|
+
```typescript
|
|
1943
|
+
interface CactusAudioEmbedSpeakerResult {
|
|
1944
|
+
success: boolean;
|
|
1945
|
+
error: string | null;
|
|
1946
|
+
embedding: number[];
|
|
1947
|
+
totalTimeMs: number;
|
|
1948
|
+
ramUsageMb: number;
|
|
1790
1949
|
}
|
|
1791
1950
|
```
|
|
1792
1951
|
|
|
Binary file
|
package/cpp/HybridCactus.cpp
CHANGED
|
@@ -65,7 +65,8 @@ std::shared_ptr<Promise<std::string>> HybridCactus::complete(
|
|
|
65
65
|
responseBuffer.data(), responseBufferSize,
|
|
66
66
|
optionsJson ? optionsJson->c_str() : nullptr,
|
|
67
67
|
toolsJson ? toolsJson->c_str() : nullptr,
|
|
68
|
-
cactusTokenCallback, &callbackCtx
|
|
68
|
+
cactusTokenCallback, &callbackCtx,
|
|
69
|
+
nullptr, 0);
|
|
69
70
|
|
|
70
71
|
if (result < 0) {
|
|
71
72
|
throw std::runtime_error("Cactus complete failed: " +
|
|
@@ -79,6 +80,38 @@ std::shared_ptr<Promise<std::string>> HybridCactus::complete(
|
|
|
79
80
|
});
|
|
80
81
|
}
|
|
81
82
|
|
|
83
|
+
std::shared_ptr<Promise<std::string>> HybridCactus::prefill(
|
|
84
|
+
const std::string &messagesJson, double responseBufferSize,
|
|
85
|
+
const std::optional<std::string> &optionsJson,
|
|
86
|
+
const std::optional<std::string> &toolsJson) {
|
|
87
|
+
return Promise<std::string>::async([this, messagesJson, responseBufferSize,
|
|
88
|
+
optionsJson,
|
|
89
|
+
toolsJson]() -> std::string {
|
|
90
|
+
std::lock_guard<std::mutex> lock(this->_modelMutex);
|
|
91
|
+
|
|
92
|
+
if (!this->_model) {
|
|
93
|
+
throw std::runtime_error("Cactus model is not initialized");
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
std::string responseBuffer;
|
|
97
|
+
responseBuffer.resize(responseBufferSize);
|
|
98
|
+
|
|
99
|
+
int result = cactus_prefill(this->_model, messagesJson.c_str(),
|
|
100
|
+
responseBuffer.data(), responseBufferSize,
|
|
101
|
+
optionsJson ? optionsJson->c_str() : nullptr,
|
|
102
|
+
toolsJson ? toolsJson->c_str() : nullptr,
|
|
103
|
+
nullptr, 0);
|
|
104
|
+
|
|
105
|
+
if (result < 0) {
|
|
106
|
+
throw std::runtime_error("Cactus prefill failed: " +
|
|
107
|
+
std::string(cactus_get_last_error()));
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
responseBuffer.resize(strlen(responseBuffer.c_str()));
|
|
111
|
+
return responseBuffer;
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
|
|
82
115
|
std::shared_ptr<Promise<std::vector<double>>>
|
|
83
116
|
HybridCactus::tokenize(const std::string &text) {
|
|
84
117
|
return Promise<std::vector<double>>::async([this,
|
|
@@ -488,6 +521,102 @@ HybridCactus::audioEmbed(const std::string &audioPath,
|
|
|
488
521
|
});
|
|
489
522
|
}
|
|
490
523
|
|
|
524
|
+
std::shared_ptr<Promise<std::string>> HybridCactus::diarize(
|
|
525
|
+
const std::variant<std::vector<double>, std::string> &audio,
|
|
526
|
+
double responseBufferSize,
|
|
527
|
+
const std::optional<std::string> &optionsJson) {
|
|
528
|
+
return Promise<std::string>::async(
|
|
529
|
+
[this, audio, responseBufferSize, optionsJson]() -> std::string {
|
|
530
|
+
std::lock_guard<std::mutex> lock(this->_modelMutex);
|
|
531
|
+
|
|
532
|
+
if (!this->_model) {
|
|
533
|
+
throw std::runtime_error("Cactus model is not initialized");
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
std::string responseBuffer;
|
|
537
|
+
responseBuffer.resize(responseBufferSize);
|
|
538
|
+
|
|
539
|
+
int result;
|
|
540
|
+
if (std::holds_alternative<std::string>(audio)) {
|
|
541
|
+
result = cactus_diarize(
|
|
542
|
+
this->_model, std::get<std::string>(audio).c_str(),
|
|
543
|
+
responseBuffer.data(), responseBufferSize,
|
|
544
|
+
optionsJson ? optionsJson->c_str() : nullptr, nullptr, 0);
|
|
545
|
+
} else {
|
|
546
|
+
const auto &audioDoubles = std::get<std::vector<double>>(audio);
|
|
547
|
+
|
|
548
|
+
std::vector<uint8_t> audioBytes;
|
|
549
|
+
audioBytes.reserve(audioDoubles.size());
|
|
550
|
+
for (double d : audioDoubles) {
|
|
551
|
+
d = std::clamp(d, 0.0, 255.0);
|
|
552
|
+
audioBytes.emplace_back(static_cast<uint8_t>(d));
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
result = cactus_diarize(
|
|
556
|
+
this->_model, nullptr,
|
|
557
|
+
responseBuffer.data(), responseBufferSize,
|
|
558
|
+
optionsJson ? optionsJson->c_str() : nullptr,
|
|
559
|
+
audioBytes.data(), audioBytes.size());
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
if (result < 0) {
|
|
563
|
+
throw std::runtime_error("Cactus diarize failed: " +
|
|
564
|
+
std::string(cactus_get_last_error()));
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
responseBuffer.resize(strlen(responseBuffer.c_str()));
|
|
568
|
+
return responseBuffer;
|
|
569
|
+
});
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
std::shared_ptr<Promise<std::string>> HybridCactus::embedSpeaker(
|
|
573
|
+
const std::variant<std::vector<double>, std::string> &audio,
|
|
574
|
+
double responseBufferSize,
|
|
575
|
+
const std::optional<std::string> &optionsJson) {
|
|
576
|
+
return Promise<std::string>::async(
|
|
577
|
+
[this, audio, responseBufferSize, optionsJson]() -> std::string {
|
|
578
|
+
std::lock_guard<std::mutex> lock(this->_modelMutex);
|
|
579
|
+
|
|
580
|
+
if (!this->_model) {
|
|
581
|
+
throw std::runtime_error("Cactus model is not initialized");
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
std::string responseBuffer;
|
|
585
|
+
responseBuffer.resize(responseBufferSize);
|
|
586
|
+
|
|
587
|
+
int result;
|
|
588
|
+
if (std::holds_alternative<std::string>(audio)) {
|
|
589
|
+
result = cactus_embed_speaker(
|
|
590
|
+
this->_model, std::get<std::string>(audio).c_str(),
|
|
591
|
+
responseBuffer.data(), responseBufferSize,
|
|
592
|
+
optionsJson ? optionsJson->c_str() : nullptr, nullptr, 0);
|
|
593
|
+
} else {
|
|
594
|
+
const auto &audioDoubles = std::get<std::vector<double>>(audio);
|
|
595
|
+
|
|
596
|
+
std::vector<uint8_t> audioBytes;
|
|
597
|
+
audioBytes.reserve(audioDoubles.size());
|
|
598
|
+
for (double d : audioDoubles) {
|
|
599
|
+
d = std::clamp(d, 0.0, 255.0);
|
|
600
|
+
audioBytes.emplace_back(static_cast<uint8_t>(d));
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
result = cactus_embed_speaker(
|
|
604
|
+
this->_model, nullptr,
|
|
605
|
+
responseBuffer.data(), responseBufferSize,
|
|
606
|
+
optionsJson ? optionsJson->c_str() : nullptr,
|
|
607
|
+
audioBytes.data(), audioBytes.size());
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
if (result < 0) {
|
|
611
|
+
throw std::runtime_error("Cactus embed speaker failed: " +
|
|
612
|
+
std::string(cactus_get_last_error()));
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
responseBuffer.resize(strlen(responseBuffer.c_str()));
|
|
616
|
+
return responseBuffer;
|
|
617
|
+
});
|
|
618
|
+
}
|
|
619
|
+
|
|
491
620
|
std::shared_ptr<Promise<void>> HybridCactus::reset() {
|
|
492
621
|
return Promise<void>::async([this]() -> void {
|
|
493
622
|
std::lock_guard<std::mutex> lock(this->_modelMutex);
|
|
@@ -525,7 +654,7 @@ std::shared_ptr<Promise<void>> HybridCactus::destroy() {
|
|
|
525
654
|
std::shared_ptr<Promise<void>>
|
|
526
655
|
HybridCactus::setTelemetryEnvironment(const std::string &cacheDir) {
|
|
527
656
|
return Promise<void>::async([cacheDir]() -> void {
|
|
528
|
-
cactus_set_telemetry_environment("react-native", cacheDir.c_str(), "1.
|
|
657
|
+
cactus_set_telemetry_environment("react-native", cacheDir.c_str(), "1.12.0");
|
|
529
658
|
});
|
|
530
659
|
}
|
|
531
660
|
|
package/cpp/HybridCactus.hpp
CHANGED
|
@@ -24,6 +24,11 @@ public:
|
|
|
24
24
|
double /* tokenId */)>> &callback)
|
|
25
25
|
override;
|
|
26
26
|
|
|
27
|
+
std::shared_ptr<Promise<std::string>> prefill(
|
|
28
|
+
const std::string &messagesJson, double responseBufferSize,
|
|
29
|
+
const std::optional<std::string> &optionsJson,
|
|
30
|
+
const std::optional<std::string> &toolsJson) override;
|
|
31
|
+
|
|
27
32
|
std::shared_ptr<Promise<std::vector<double>>>
|
|
28
33
|
tokenize(const std::string &text) override;
|
|
29
34
|
|
|
@@ -67,6 +72,16 @@ public:
|
|
|
67
72
|
std::shared_ptr<Promise<std::vector<double>>>
|
|
68
73
|
audioEmbed(const std::string &audioPath, double embeddingBufferSize) override;
|
|
69
74
|
|
|
75
|
+
std::shared_ptr<Promise<std::string>>
|
|
76
|
+
diarize(const std::variant<std::vector<double>, std::string> &audio,
|
|
77
|
+
double responseBufferSize,
|
|
78
|
+
const std::optional<std::string> &optionsJson) override;
|
|
79
|
+
|
|
80
|
+
std::shared_ptr<Promise<std::string>>
|
|
81
|
+
embedSpeaker(const std::variant<std::vector<double>, std::string> &audio,
|
|
82
|
+
double responseBufferSize,
|
|
83
|
+
const std::optional<std::string> &optionsJson) override;
|
|
84
|
+
|
|
70
85
|
std::shared_ptr<Promise<void>> reset() override;
|
|
71
86
|
|
|
72
87
|
std::shared_ptr<Promise<void>> stop() override;
|