cactus-react-native 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/README.md +212 -27
  2. package/android/src/main/jniLibs/arm64-v8a/libcactus.a +0 -0
  3. package/cpp/HybridCactus.cpp +119 -0
  4. package/cpp/HybridCactus.hpp +13 -0
  5. package/cpp/cactus_ffi.h +24 -0
  6. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_ffi.h +24 -0
  7. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_utils.h +41 -1
  8. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/engine.h +66 -48
  9. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/gemma_tools.h +549 -0
  10. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/graph.h +102 -21
  11. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel.h +45 -195
  12. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/kernel_utils.h +399 -140
  13. package/ios/cactus.xcframework/ios-arm64/cactus.framework/cactus +0 -0
  14. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_ffi.h +24 -0
  15. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_utils.h +41 -1
  16. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/engine.h +66 -48
  17. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/gemma_tools.h +549 -0
  18. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/graph.h +102 -21
  19. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/kernel.h +45 -195
  20. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/kernel_utils.h +399 -140
  21. package/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/cactus +0 -0
  22. package/lib/module/api/Database.js +0 -92
  23. package/lib/module/api/Database.js.map +1 -1
  24. package/lib/module/classes/CactusLM.js +33 -15
  25. package/lib/module/classes/CactusLM.js.map +1 -1
  26. package/lib/module/classes/CactusSTT.js +90 -15
  27. package/lib/module/classes/CactusSTT.js.map +1 -1
  28. package/lib/module/hooks/useCactusLM.js +14 -5
  29. package/lib/module/hooks/useCactusLM.js.map +1 -1
  30. package/lib/module/hooks/useCactusSTT.js +100 -4
  31. package/lib/module/hooks/useCactusSTT.js.map +1 -1
  32. package/lib/module/index.js.map +1 -1
  33. package/lib/module/models.js +336 -0
  34. package/lib/module/models.js.map +1 -0
  35. package/lib/module/native/Cactus.js +37 -0
  36. package/lib/module/native/Cactus.js.map +1 -1
  37. package/lib/module/types/CactusLM.js +2 -0
  38. package/lib/module/types/CactusSTT.js +2 -0
  39. package/lib/module/types/common.js +2 -0
  40. package/lib/module/types/{CactusModel.js.map → common.js.map} +1 -1
  41. package/lib/typescript/src/api/Database.d.ts +0 -6
  42. package/lib/typescript/src/api/Database.d.ts.map +1 -1
  43. package/lib/typescript/src/classes/CactusLM.d.ts +7 -3
  44. package/lib/typescript/src/classes/CactusLM.d.ts.map +1 -1
  45. package/lib/typescript/src/classes/CactusSTT.d.ts +13 -4
  46. package/lib/typescript/src/classes/CactusSTT.d.ts.map +1 -1
  47. package/lib/typescript/src/hooks/useCactusLM.d.ts +2 -2
  48. package/lib/typescript/src/hooks/useCactusLM.d.ts.map +1 -1
  49. package/lib/typescript/src/hooks/useCactusSTT.d.ts +12 -4
  50. package/lib/typescript/src/hooks/useCactusSTT.d.ts.map +1 -1
  51. package/lib/typescript/src/index.d.ts +2 -3
  52. package/lib/typescript/src/index.d.ts.map +1 -1
  53. package/lib/typescript/src/models.d.ts +6 -0
  54. package/lib/typescript/src/models.d.ts.map +1 -0
  55. package/lib/typescript/src/native/Cactus.d.ts +6 -1
  56. package/lib/typescript/src/native/Cactus.d.ts.map +1 -1
  57. package/lib/typescript/src/specs/Cactus.nitro.d.ts +5 -0
  58. package/lib/typescript/src/specs/Cactus.nitro.d.ts.map +1 -1
  59. package/lib/typescript/src/types/CactusLM.d.ts +2 -0
  60. package/lib/typescript/src/types/CactusLM.d.ts.map +1 -1
  61. package/lib/typescript/src/types/CactusSTT.d.ts +20 -0
  62. package/lib/typescript/src/types/CactusSTT.d.ts.map +1 -1
  63. package/lib/typescript/src/types/common.d.ts +28 -0
  64. package/lib/typescript/src/types/common.d.ts.map +1 -0
  65. package/nitrogen/generated/shared/c++/HybridCactusSpec.cpp +5 -0
  66. package/nitrogen/generated/shared/c++/HybridCactusSpec.hpp +5 -0
  67. package/package.json +1 -1
  68. package/src/api/Database.ts +0 -133
  69. package/src/classes/CactusLM.ts +49 -17
  70. package/src/classes/CactusSTT.ts +118 -17
  71. package/src/hooks/useCactusLM.ts +25 -5
  72. package/src/hooks/useCactusSTT.ts +117 -5
  73. package/src/index.tsx +6 -2
  74. package/src/models.ts +344 -0
  75. package/src/native/Cactus.ts +55 -0
  76. package/src/specs/Cactus.nitro.ts +5 -0
  77. package/src/types/CactusLM.ts +3 -0
  78. package/src/types/CactusSTT.ts +26 -0
  79. package/src/types/common.ts +28 -0
  80. package/lib/module/types/CactusModel.js +0 -2
  81. package/lib/module/types/CactusSTTModel.js +0 -2
  82. package/lib/module/types/CactusSTTModel.js.map +0 -1
  83. package/lib/typescript/src/types/CactusModel.d.ts +0 -13
  84. package/lib/typescript/src/types/CactusModel.d.ts.map +0 -1
  85. package/lib/typescript/src/types/CactusSTTModel.d.ts +0 -8
  86. package/lib/typescript/src/types/CactusSTTModel.d.ts.map +0 -1
  87. package/src/types/CactusModel.ts +0 -15
  88. package/src/types/CactusSTTModel.ts +0 -10
package/README.md CHANGED
@@ -78,6 +78,32 @@ const App = () => {
78
78
 
79
79
  ## Language Model
80
80
 
81
+ ### Model Options
82
+
83
+ Choose model quantization and NPU acceleration with Pro models.
84
+
85
+ ```typescript
86
+ import { CactusLM } from 'cactus-react-native';
87
+
88
+ // Use int4 for faster performance and smaller file size
89
+ const cactusLM = new CactusLM({
90
+ model: 'lfm2-vl-450m',
91
+ options: {
92
+ quantization: 'int4', // 'int4' or 'int8'
93
+ pro: false
94
+ }
95
+ });
96
+
97
+ // Use pro models for NPU acceleration
98
+ const cactusPro = new CactusLM({
99
+ model: 'lfm2-vl-450m',
100
+ options: {
101
+ quantization: 'int4',
102
+ pro: true
103
+ }
104
+ });
105
+ ```
106
+
81
107
  ### Completion
82
108
 
83
109
  Generate text responses from the model by providing a conversation history.
@@ -559,6 +585,60 @@ const App = () => {
559
585
  };
560
586
  ```
561
587
 
588
+ ### Streaming Transcription
589
+
590
+ Transcribe audio in real-time with incremental results.
591
+
592
+ #### Class
593
+
594
+ ```typescript
595
+ import { CactusSTT } from 'cactus-react-native';
596
+
597
+ const cactusSTT = new CactusSTT({ model: 'whisper-small' });
598
+
599
+ await cactusSTT.streamTranscribeInit();
600
+
601
+ const audioChunk: number[] = [/* PCM samples */];
602
+ await cactusSTT.streamTranscribeInsert({ audio: audioChunk });
603
+
604
+ const result = await cactusSTT.streamTranscribeProcess({
605
+ options: { confirmationThreshold: 0.95 }
606
+ });
607
+
608
+ console.log('Confirmed:', result.confirmed);
609
+ console.log('Pending:', result.pending);
610
+
611
+ const final = await cactusSTT.streamTranscribeFinalize();
612
+ await cactusSTT.streamTranscribeDestroy();
613
+ ```
614
+
615
+ #### Hook
616
+
617
+ ```tsx
618
+ import { useCactusSTT } from 'cactus-react-native';
619
+
620
+ const App = () => {
621
+ const cactusSTT = useCactusSTT({ model: 'whisper-small' });
622
+
623
+ const handleStream = async () => {
624
+ await cactusSTT.streamTranscribeInit();
625
+
626
+ const audioChunk: number[] = [/* PCM samples */];
627
+ await cactusSTT.streamTranscribeInsert({ audio: audioChunk });
628
+
629
+ await cactusSTT.streamTranscribeProcess();
630
+ };
631
+
632
+ return (
633
+ <>
634
+ <Button onPress={handleStream} title="Stream" />
635
+ <Text>{cactusSTT.streamTranscribeConfirmed}</Text>
636
+ <Text>{cactusSTT.streamTranscribePending}</Text>
637
+ </>
638
+ );
639
+ };
640
+ ```
641
+
562
642
  ### Audio Embedding
563
643
 
564
644
  Generate embeddings from audio files for audio understanding.
@@ -854,9 +934,12 @@ const App = () => {
854
934
  **`new CactusLM(params?: CactusLMParams)`**
855
935
 
856
936
  **Parameters:**
857
- - `model` - Model slug or absolute path to Cactus model (default: `'qwen3-0.6'`).
937
+ - `model` - Model slug or absolute path to Cactus model (default: `'qwen3-0.6b'`).
858
938
  - `contextSize` - Context window size (default: `2048`).
859
939
  - `corpusDir` - Directory containing text files for RAG (default: `undefined`).
940
+ - `options` - Model options for quantization and NPU acceleration:
941
+ - `quantization` - Quantization type: `'int4'` | `'int8'` (default: `'int4'`).
942
+ - `pro` - Enable NPU-accelerated models (default: `false`).
860
943
 
861
944
  #### Methods
862
945
 
@@ -932,13 +1015,13 @@ Resets the model's internal state, clearing any cached context. Automatically ca
932
1015
 
933
1016
  Releases all resources associated with the model. Automatically calls `stop()` first. Safe to call even if the model is not initialized.
934
1017
 
935
- **`getModels(): Promise<CactusModel[]>`**
1018
+ **`getModels(): CactusModel[]`**
936
1019
 
937
- Fetches available models from the database and checks their download status.
1020
+ Returns available models.
938
1021
 
939
1022
  ### useCactusLM Hook
940
1023
 
941
- The `useCactusLM` hook manages a `CactusLM` instance with reactive state. When model parameters (`model`, `contextSize`, or `corpusDir`) change, the hook creates a new instance and resets all state. The hook automatically cleans up resources when the component unmounts.
1024
+ The `useCactusLM` hook manages a `CactusLM` instance with reactive state. When model parameters (`model`, `contextSize`, `corpusDir`, `options`) change, the hook creates a new instance and resets all state. The hook automatically cleans up resources when the component unmounts.
942
1025
 
943
1026
  #### State
944
1027
 
@@ -962,7 +1045,7 @@ The `useCactusLM` hook manages a `CactusLM` instance with reactive state. When m
962
1045
  - `stop(): Promise<void>` - Stops ongoing generation. Clears any errors.
963
1046
  - `reset(): Promise<void>` - Resets the model's internal state, clearing cached context. Also clears the `completion` state.
964
1047
  - `destroy(): Promise<void>` - Releases all resources associated with the model. Clears the `completion` state. Automatically called when the component unmounts.
965
- - `getModels(): Promise<CactusModel[]>` - Fetches available models from the database and checks their download status.
1048
+ - `getModels(): CactusModel[]` - Returns available models.
966
1049
 
967
1050
  ### CactusSTT Class
968
1051
 
@@ -971,8 +1054,11 @@ The `useCactusLM` hook manages a `CactusLM` instance with reactive state. When m
971
1054
  **`new CactusSTT(params?: CactusSTTParams)`**
972
1055
 
973
1056
  **Parameters:**
974
- - `model` - Model slug or absolute path to Cactus model (default: `'qwen3-0.6'`).
1057
+ - `model` - Model slug or absolute path to Cactus model (default: `'whisper-small'`).
975
1058
  - `contextSize` - Context window size (default: `2048`).
1059
+ - `options` - Model options for quantization and NPU acceleration:
1060
+ - `quantization` - Quantization type: `'int4'` | `'int8'` (default: `'int4'`).
1061
+ - `pro` - Enable NPU-accelerated models (default: `false`).
976
1062
 
977
1063
  #### Methods
978
1064
 
@@ -1009,6 +1095,33 @@ Generates embeddings for the given audio file. Automatically calls `init()` if n
1009
1095
  **Parameters:**
1010
1096
  - `audioPath` - Path to the audio file.
1011
1097
 
1098
+ **`streamTranscribeInit(): Promise<void>`**
1099
+
1100
+ Initializes a streaming transcription session. Automatically calls `init()` if not already initialized.
1101
+
1102
+ **`streamTranscribeInsert(params: CactusSTTStreamTranscribeInsertParams): Promise<void>`**
1103
+
1104
+ Inserts PCM audio samples into the streaming buffer.
1105
+
1106
+ **Parameters:**
1107
+ - `audio` - Array of PCM audio samples.
1108
+
1109
+ **`streamTranscribeProcess(params?: CactusSTTStreamTranscribeProcessParams): Promise<CactusSTTStreamTranscribeProcessResult>`**
1110
+
1111
+ Processes accumulated audio and returns incremental transcription results.
1112
+
1113
+ **Parameters:**
1114
+ - `options` - Processing options:
1115
+ - `confirmationThreshold` - Confidence threshold for confirming text.
1116
+
1117
+ **`streamTranscribeFinalize(): Promise<CactusSTTStreamTranscribeFinalizeResult>`**
1118
+
1119
+ Finalizes the streaming session and returns remaining transcription text.
1120
+
1121
+ **`streamTranscribeDestroy(): Promise<void>`**
1122
+
1123
+ Destroys the streaming session and releases resources.
1124
+
1012
1125
  **`stop(): Promise<void>`**
1013
1126
 
1014
1127
  Stops ongoing transcription or embedding generation.
@@ -1021,18 +1134,21 @@ Resets the model's internal state. Automatically calls `stop()` first.
1021
1134
 
1022
1135
  Releases all resources associated with the model. Automatically calls `stop()` first. Safe to call even if the model is not initialized.
1023
1136
 
1024
- **`getModels(): Promise<CactusSTTModel[]>`**
1137
+ **`getModels(): CactusModel[]`**
1025
1138
 
1026
- Fetches available STT models from the database and checks their download status.
1139
+ Returns available speech-to-text models.
1027
1140
 
1028
1141
  ### useCactusSTT Hook
1029
1142
 
1030
- The `useCactusSTT` hook manages a `CactusSTT` instance with reactive state. When model parameters (`model`, `contextSize`) change, the hook creates a new instance and resets all state. The hook automatically cleans up resources when the component unmounts.
1143
+ The `useCactusSTT` hook manages a `CactusSTT` instance with reactive state. When model parameters (`model`, `contextSize`, `options`) change, the hook creates a new instance and resets all state. The hook automatically cleans up resources when the component unmounts.
1031
1144
 
1032
1145
  #### State
1033
1146
 
1034
1147
  - `transcription: string` - Current transcription text. Automatically accumulated during streaming. Cleared before each new transcription and when calling `reset()` or `destroy()`.
1148
+ - `streamTranscribeConfirmed: string` - Accumulated confirmed text from streaming transcription.
1149
+ - `streamTranscribePending: string` - Current pending text from streaming transcription.
1035
1150
  - `isGenerating: boolean` - Whether the model is currently generating (transcription or embedding). Both operations share this flag.
1151
+ - `isStreamTranscribing: boolean` - Whether a streaming transcription session is active.
1036
1152
  - `isInitializing: boolean` - Whether the model is initializing.
1037
1153
  - `isDownloaded: boolean` - Whether the model is downloaded locally. Automatically checked when the hook mounts or model changes.
1038
1154
  - `isDownloading: boolean` - Whether the model is being downloaded.
@@ -1045,10 +1161,15 @@ The `useCactusSTT` hook manages a `CactusSTT` instance with reactive state. When
1045
1161
  - `init(): Promise<void>` - Initializes the model for inference. Sets `isInitializing` to `true` during initialization.
1046
1162
  - `transcribe(params: CactusSTTTranscribeParams): Promise<CactusSTTTranscribeResult>` - Transcribes audio to text. Automatically accumulates tokens in the `transcription` state during streaming. Sets `isGenerating` to `true` while generating. Clears `transcription` before starting.
1047
1163
  - `audioEmbed(params: CactusSTTAudioEmbedParams): Promise<CactusSTTAudioEmbedResult>` - Generates embeddings for the given audio. Sets `isGenerating` to `true` during operation.
1164
+ - `streamTranscribeInit(): Promise<void>` - Initializes a streaming transcription session. Sets `isStreamTranscribing` to `true`.
1165
+ - `streamTranscribeInsert(params: CactusSTTStreamTranscribeInsertParams): Promise<void>` - Inserts audio chunks into the streaming buffer.
1166
+ - `streamTranscribeProcess(params?: CactusSTTStreamTranscribeProcessParams): Promise<CactusSTTStreamTranscribeProcessResult>` - Processes audio and returns results. Automatically accumulates confirmed text in `streamTranscribeConfirmed` and updates `streamTranscribePending`.
1167
+ - `streamTranscribeFinalize(): Promise<CactusSTTStreamTranscribeFinalizeResult>` - Finalizes streaming and returns remaining text.
1168
+ - `streamTranscribeDestroy(): Promise<void>` - Destroys the streaming session. Sets `isStreamTranscribing` to `false`.
1048
1169
  - `stop(): Promise<void>` - Stops ongoing generation. Clears any errors.
1049
1170
  - `reset(): Promise<void>` - Resets the model's internal state. Also clears the `transcription` state.
1050
1171
  - `destroy(): Promise<void>` - Releases all resources associated with the model. Clears the `transcription` state. Automatically called when the component unmounts.
1051
- - `getModels(): Promise<CactusSTTModel[]>` - Fetches available STT models from the database and checks their download status.
1172
+ - `getModels(): CactusModel[]` - Returns available speech-to-text models.
1052
1173
 
1053
1174
  ### CactusIndex Class
1054
1175
 
@@ -1137,6 +1258,7 @@ interface CactusLMParams {
1137
1258
  model?: string;
1138
1259
  contextSize?: number;
1139
1260
  corpusDir?: string;
1261
+ options?: ModelOptions;
1140
1262
  }
1141
1263
  ```
1142
1264
 
@@ -1293,28 +1415,36 @@ interface CactusLMImageEmbedResult {
1293
1415
 
1294
1416
  ```typescript
1295
1417
  interface CactusModel {
1296
- name: string;
1297
- slug: string;
1298
- quantization: number;
1299
- sizeMb: number;
1300
- downloadUrl: string;
1301
- supportsToolCalling: boolean;
1302
- supportsVision: boolean;
1303
- supportsCompletion: boolean;
1304
- createdAt: Date;
1305
- isDownloaded: boolean;
1418
+ completion: boolean;
1419
+ tools: boolean;
1420
+ vision: boolean;
1421
+ embed: boolean;
1422
+ speech: boolean;
1423
+ quantization: {
1424
+ int4: {
1425
+ sizeMb: number;
1426
+ url: string;
1427
+ pro?: {
1428
+ apple: string;
1429
+ };
1430
+ };
1431
+ int8: {
1432
+ sizeMb: number;
1433
+ url: string;
1434
+ pro?: {
1435
+ apple: string;
1436
+ };
1437
+ };
1438
+ };
1306
1439
  }
1307
1440
  ```
1308
1441
 
1309
- ### CactusSTTModel
1442
+ ### ModelOptions
1310
1443
 
1311
1444
  ```typescript
1312
- interface CactusSTTModel {
1313
- slug: string;
1314
- sizeMb: number;
1315
- downloadUrl: string;
1316
- createdAt: Date;
1317
- isDownloaded: boolean;
1445
+ interface ModelOptions {
1446
+ quantization: 'int4' | 'int8';
1447
+ pro: boolean;
1318
1448
  }
1319
1449
  ```
1320
1450
 
@@ -1324,6 +1454,7 @@ interface CactusSTTModel {
1324
1454
  interface CactusSTTParams {
1325
1455
  model?: string;
1326
1456
  contextSize?: number;
1457
+ options?: ModelOptions;
1327
1458
  }
1328
1459
  ```
1329
1460
 
@@ -1391,6 +1522,49 @@ interface CactusSTTAudioEmbedResult {
1391
1522
  }
1392
1523
  ```
1393
1524
 
1525
+ ### CactusSTTStreamTranscribeInsertParams
1526
+
1527
+ ```typescript
1528
+ interface CactusSTTStreamTranscribeInsertParams {
1529
+ audio: number[];
1530
+ }
1531
+ ```
1532
+
1533
+ ### StreamTranscribeProcessOptions
1534
+
1535
+ ```typescript
1536
+ interface StreamTranscribeProcessOptions {
1537
+ confirmationThreshold?: number;
1538
+ }
1539
+ ```
1540
+
1541
+ ### CactusSTTStreamTranscribeProcessParams
1542
+
1543
+ ```typescript
1544
+ interface CactusSTTStreamTranscribeProcessParams {
1545
+ options?: StreamTranscribeProcessOptions;
1546
+ }
1547
+ ```
1548
+
1549
+ ### CactusSTTStreamTranscribeProcessResult
1550
+
1551
+ ```typescript
1552
+ interface CactusSTTStreamTranscribeProcessResult {
1553
+ success: boolean;
1554
+ confirmed: string;
1555
+ pending: string;
1556
+ }
1557
+ ```
1558
+
1559
+ ### CactusSTTStreamTranscribeFinalizeResult
1560
+
1561
+ ```typescript
1562
+ interface CactusSTTStreamTranscribeFinalizeResult {
1563
+ success: boolean;
1564
+ confirmed: string;
1565
+ }
1566
+ ```
1567
+
1394
1568
  ### CactusIndexParams
1395
1569
 
1396
1570
  ```typescript
@@ -1491,6 +1665,17 @@ import { CactusConfig } from 'cactus-react-native';
1491
1665
  CactusConfig.cactusToken = 'your-cactus-token-here';
1492
1666
  ```
1493
1667
 
1668
+ ### Cactus Pro
1669
+
1670
+ Enable NPU-accelerated models for enhanced performance.
1671
+
1672
+ ```typescript
1673
+ import { CactusConfig } from 'cactus-react-native';
1674
+
1675
+ // Set your Cactus Pro key
1676
+ CactusConfig.cactusProKey = 'your-cactus-pro-key-here';
1677
+ ```
1678
+
1494
1679
  ## Performance Tips
1495
1680
 
1496
1681
  - **Model Selection** - Choose smaller models for faster inference on mobile devices.
@@ -325,9 +325,128 @@ std::shared_ptr<Promise<void>> HybridCactus::destroy() {
325
325
  throw std::runtime_error("Cactus model is not initialized");
326
326
  }
327
327
 
328
+ if (this->_streamTranscribe) {
329
+ cactus_stream_transcribe_destroy(this->_streamTranscribe);
330
+ this->_streamTranscribe = nullptr;
331
+ }
332
+
328
333
  cactus_destroy(this->_model);
329
334
  this->_model = nullptr;
330
335
  });
331
336
  }
332
337
 
338
+ std::shared_ptr<Promise<void>> HybridCactus::streamTranscribeInit() {
339
+ return Promise<void>::async([this]() -> void {
340
+ std::lock_guard<std::mutex> lock(this->_modelMutex);
341
+
342
+ if (!this->_model) {
343
+ throw std::runtime_error("Cactus model is not initialized");
344
+ }
345
+
346
+ if (this->_streamTranscribe) {
347
+ throw std::runtime_error(
348
+ "Cactus stream transcribe is already initialized");
349
+ }
350
+
351
+ this->_streamTranscribe = cactus_stream_transcribe_init(this->_model);
352
+ if (!this->_streamTranscribe) {
353
+ throw std::runtime_error("Cactus stream transcribe init failed: " +
354
+ std::string(cactus_get_last_error()));
355
+ }
356
+ });
357
+ }
358
+
359
+ std::shared_ptr<Promise<void>>
360
+ HybridCactus::streamTranscribeInsert(const std::vector<double> &audio) {
361
+ return Promise<void>::async([this, audio]() -> void {
362
+ std::lock_guard<std::mutex> lock(this->_modelMutex);
363
+
364
+ if (!this->_streamTranscribe) {
365
+ throw std::runtime_error("Cactus stream transcribe is not initialized");
366
+ }
367
+
368
+ std::vector<uint8_t> audioBytes;
369
+ audioBytes.reserve(audio.size());
370
+ for (double d : audio) {
371
+ d = std::clamp(d, 0.0, 255.0);
372
+ audioBytes.emplace_back(static_cast<uint8_t>(d));
373
+ }
374
+
375
+ int result = cactus_stream_transcribe_insert(
376
+ this->_streamTranscribe, audioBytes.data(), audioBytes.size());
377
+
378
+ if (result < 0) {
379
+ throw std::runtime_error("Cactus stream transcribe insert failed: " +
380
+ std::string(cactus_get_last_error()));
381
+ }
382
+ });
383
+ }
384
+
385
+ std::shared_ptr<Promise<std::string>> HybridCactus::streamTranscribeProcess(
386
+ const std::optional<std::string> &optionsJson) {
387
+ return Promise<std::string>::async([this, optionsJson]() -> std::string {
388
+ std::lock_guard<std::mutex> lock(this->_modelMutex);
389
+
390
+ if (!this->_streamTranscribe) {
391
+ throw std::runtime_error("Cactus stream transcribe is not initialized");
392
+ }
393
+
394
+ std::string responseBuffer;
395
+ responseBuffer.resize(32768);
396
+
397
+ int result = cactus_stream_transcribe_process(
398
+ this->_streamTranscribe, responseBuffer.data(), responseBuffer.size(),
399
+ optionsJson ? optionsJson->c_str() : nullptr);
400
+
401
+ if (result < 0) {
402
+ throw std::runtime_error("Cactus stream transcribe process failed: " +
403
+ std::string(cactus_get_last_error()));
404
+ }
405
+
406
+ // Remove null terminator
407
+ responseBuffer.resize(strlen(responseBuffer.c_str()));
408
+
409
+ return responseBuffer;
410
+ });
411
+ }
412
+
413
+ std::shared_ptr<Promise<std::string>> HybridCactus::streamTranscribeFinalize() {
414
+ return Promise<std::string>::async([this]() -> std::string {
415
+ std::lock_guard<std::mutex> lock(this->_modelMutex);
416
+
417
+ if (!this->_streamTranscribe) {
418
+ throw std::runtime_error("Cactus stream transcribe is not initialized");
419
+ }
420
+
421
+ std::string responseBuffer;
422
+ responseBuffer.resize(32768);
423
+
424
+ int result = cactus_stream_transcribe_finalize(
425
+ this->_streamTranscribe, responseBuffer.data(), responseBuffer.size());
426
+
427
+ if (result < 0) {
428
+ throw std::runtime_error("Cactus stream transcribe finalize failed: " +
429
+ std::string(cactus_get_last_error()));
430
+ }
431
+
432
+ // Remove null terminator
433
+ responseBuffer.resize(strlen(responseBuffer.c_str()));
434
+
435
+ return responseBuffer;
436
+ });
437
+ }
438
+
439
+ std::shared_ptr<Promise<void>> HybridCactus::streamTranscribeDestroy() {
440
+ return Promise<void>::async([this]() -> void {
441
+ std::lock_guard<std::mutex> lock(this->_modelMutex);
442
+
443
+ if (!this->_streamTranscribe) {
444
+ throw std::runtime_error("Cactus stream transcribe is not initialized");
445
+ }
446
+
447
+ cactus_stream_transcribe_destroy(this->_streamTranscribe);
448
+ this->_streamTranscribe = nullptr;
449
+ });
450
+ }
451
+
333
452
  } // namespace margelo::nitro::cactus
@@ -38,6 +38,18 @@ public:
38
38
  double /* tokenId */)>> &callback)
39
39
  override;
40
40
 
41
+ std::shared_ptr<Promise<void>> streamTranscribeInit() override;
42
+
43
+ std::shared_ptr<Promise<void>>
44
+ streamTranscribeInsert(const std::vector<double> &audio) override;
45
+
46
+ std::shared_ptr<Promise<std::string>> streamTranscribeProcess(
47
+ const std::optional<std::string> &optionsJson) override;
48
+
49
+ std::shared_ptr<Promise<std::string>> streamTranscribeFinalize() override;
50
+
51
+ std::shared_ptr<Promise<void>> streamTranscribeDestroy() override;
52
+
41
53
  std::shared_ptr<Promise<std::vector<double>>>
42
54
  embed(const std::string &text, double embeddingBufferSize,
43
55
  bool normalize) override;
@@ -56,6 +68,7 @@ public:
56
68
 
57
69
  private:
58
70
  cactus_model_t _model = nullptr;
71
+ cactus_stream_transcribe_t _streamTranscribe = nullptr;
59
72
  size_t _contextSize;
60
73
 
61
74
  std::mutex _modelMutex;
package/cpp/cactus_ffi.h CHANGED
@@ -67,6 +67,30 @@ CACTUS_FFI_EXPORT int cactus_transcribe(
67
67
  size_t pcm_buffer_size
68
68
  );
69
69
 
70
+ typedef void* cactus_stream_transcribe_t;
71
+
72
+ CACTUS_FFI_EXPORT cactus_stream_transcribe_t cactus_stream_transcribe_init(cactus_model_t model);
73
+
74
+ CACTUS_FFI_EXPORT int cactus_stream_transcribe_insert(
75
+ cactus_stream_transcribe_t stream,
76
+ const uint8_t* pcm_buffer,
77
+ size_t pcm_buffer_size
78
+ );
79
+
80
+ CACTUS_FFI_EXPORT int cactus_stream_transcribe_process(
81
+ cactus_stream_transcribe_t stream,
82
+ char* response_buffer,
83
+ size_t buffer_size,
84
+ const char* options_json
85
+ );
86
+
87
+ CACTUS_FFI_EXPORT int cactus_stream_transcribe_finalize(
88
+ cactus_stream_transcribe_t stream,
89
+ char* response_buffer,
90
+ size_t buffer_size
91
+ );
92
+
93
+ CACTUS_FFI_EXPORT void cactus_stream_transcribe_destroy(cactus_stream_transcribe_t stream);
70
94
 
71
95
  CACTUS_FFI_EXPORT int cactus_embed(
72
96
  cactus_model_t model,
@@ -67,6 +67,30 @@ CACTUS_FFI_EXPORT int cactus_transcribe(
67
67
  size_t pcm_buffer_size
68
68
  );
69
69
 
70
+ typedef void* cactus_stream_transcribe_t;
71
+
72
+ CACTUS_FFI_EXPORT cactus_stream_transcribe_t cactus_stream_transcribe_init(cactus_model_t model);
73
+
74
+ CACTUS_FFI_EXPORT int cactus_stream_transcribe_insert(
75
+ cactus_stream_transcribe_t stream,
76
+ const uint8_t* pcm_buffer,
77
+ size_t pcm_buffer_size
78
+ );
79
+
80
+ CACTUS_FFI_EXPORT int cactus_stream_transcribe_process(
81
+ cactus_stream_transcribe_t stream,
82
+ char* response_buffer,
83
+ size_t buffer_size,
84
+ const char* options_json
85
+ );
86
+
87
+ CACTUS_FFI_EXPORT int cactus_stream_transcribe_finalize(
88
+ cactus_stream_transcribe_t stream,
89
+ char* response_buffer,
90
+ size_t buffer_size
91
+ );
92
+
93
+ CACTUS_FFI_EXPORT void cactus_stream_transcribe_destroy(cactus_stream_transcribe_t stream);
70
94
 
71
95
  CACTUS_FFI_EXPORT int cactus_embed(
72
96
  cactus_model_t model,
@@ -63,6 +63,14 @@ struct ToolFunction {
63
63
  std::unordered_map<std::string, std::string> parameters;
64
64
  };
65
65
 
66
+ } // namespace ffi
67
+ } // namespace cactus
68
+
69
+ #include "gemma_tools.h"
70
+
71
+ namespace cactus {
72
+ namespace ffi {
73
+
66
74
  inline void handle_error_response(const std::string& error_message, char* response_buffer, size_t buffer_size) {
67
75
  std::string sanitized_msg = error_message;
68
76
  for (auto& c : sanitized_msg) {
@@ -303,11 +311,43 @@ inline void parse_function_calls_from_response(const std::string& response_text,
303
311
  regular_response = response_text;
304
312
  function_calls.clear();
305
313
 
314
+ gemma::parse_function_calls(regular_response, function_calls);
315
+
316
+ // Parse Qwen-style function calls: <tool_call>{"name": "...", "arguments": {...}}</tool_call>
317
+ const std::string QWEN_TOOL_START = "<tool_call>";
318
+ const std::string QWEN_TOOL_END = "</tool_call>";
319
+ size_t qwen_start_pos = 0;
320
+
321
+ while ((qwen_start_pos = regular_response.find(QWEN_TOOL_START, qwen_start_pos)) != std::string::npos) {
322
+ size_t content_start = qwen_start_pos + QWEN_TOOL_START.length();
323
+ size_t qwen_end_pos = regular_response.find(QWEN_TOOL_END, content_start);
324
+
325
+ if (qwen_end_pos != std::string::npos) {
326
+ std::string json_content = regular_response.substr(content_start, qwen_end_pos - content_start);
327
+
328
+ size_t first = json_content.find_first_not_of(" \t\n\r");
329
+ size_t last = json_content.find_last_not_of(" \t\n\r");
330
+ if (first != std::string::npos && last != std::string::npos) {
331
+ json_content = json_content.substr(first, last - first + 1);
332
+ }
333
+
334
+ if (json_content.size() > 2 && json_content[0] == '{' &&
335
+ json_content.find("\"name\"") != std::string::npos) {
336
+ function_calls.push_back(json_content);
337
+ }
338
+
339
+ regular_response.erase(qwen_start_pos, qwen_end_pos + QWEN_TOOL_END.length() - qwen_start_pos);
340
+ } else {
341
+ break;
342
+ }
343
+ }
344
+
345
+ // Parse LFM2-style function calls: <|tool_call_start|>[name(args)]<|tool_call_end|>
306
346
  const std::string TOOL_CALL_START = "<|tool_call_start|>";
307
347
  const std::string TOOL_CALL_END = "<|tool_call_end|>";
308
348
  size_t tool_start_pos = 0;
309
349
 
310
- while ((tool_start_pos = response_text.find(TOOL_CALL_START, tool_start_pos)) != std::string::npos) {
350
+ while ((tool_start_pos = regular_response.find(TOOL_CALL_START, tool_start_pos)) != std::string::npos) {
311
351
  size_t content_start = tool_start_pos + TOOL_CALL_START.length();
312
352
  size_t tool_end_pos = response_text.find(TOOL_CALL_END, content_start);
313
353