react-native-sherpa-onnx 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. package/README.md +232 -236
  2. package/SherpaOnnx.podspec +68 -64
  3. package/android/build.gradle +182 -192
  4. package/android/codegen.gradle +57 -0
  5. package/android/prebuilt-download.gradle +428 -0
  6. package/android/prebuilt-versions.gradle +43 -0
  7. package/android/proguard-rules.pro +10 -0
  8. package/android/src/main/assets/testModels/add_mul_add.onnx +28 -0
  9. package/android/src/main/assets/testModels/nnapi_internal_uint8_support.onnx +0 -0
  10. package/android/src/main/assets/testModels/qnn_multi_ctx_embed.onnx +0 -0
  11. package/android/src/main/cpp/CMakeLists.txt +166 -129
  12. package/android/src/main/cpp/CMakePresets.json +54 -0
  13. package/android/src/main/cpp/crypto/sha256.cpp +174 -0
  14. package/android/src/main/cpp/crypto/sha256.h +16 -0
  15. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.cpp +404 -0
  16. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.h +56 -0
  17. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-jni.cpp +181 -0
  18. package/android/src/main/cpp/jni/audio/sherpa-onnx-audio-convert-jni.cpp +888 -0
  19. package/{ios → android/src/main/cpp/jni/model_detect}/sherpa-onnx-common.h +18 -18
  20. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.cpp +86 -0
  21. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.h +20 -0
  22. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.cpp +423 -0
  23. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.h +55 -0
  24. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-stt.cpp +399 -0
  25. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +238 -0
  26. package/{ios → android/src/main/cpp/jni/model_detect}/sherpa-onnx-model-detect.h +122 -89
  27. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.cpp +99 -0
  28. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.h +16 -0
  29. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.cpp +78 -0
  30. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.h +16 -0
  31. package/android/src/main/cpp/jni/module/sherpa-onnx-module-jni.cpp +190 -0
  32. package/android/src/main/cpp/jni/tts/sherpa-onnx-tts-zipvoice-jni.cpp +301 -0
  33. package/android/src/main/java/com/sherpaonnx/SherpaOnnxArchiveHelper.kt +94 -0
  34. package/android/src/main/java/com/sherpaonnx/{SherpaOnnxCoreHelper.kt → SherpaOnnxAssetHelper.kt} +350 -236
  35. package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +791 -483
  36. package/android/src/main/java/com/sherpaonnx/SherpaOnnxSttHelper.kt +699 -109
  37. package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +1123 -668
  38. package/android/src/main/java/com/sherpaonnx/ZipvoiceTtsWrapper.kt +187 -0
  39. package/ios/SherpaOnnx+Assets.h +11 -0
  40. package/ios/SherpaOnnx+Assets.mm +325 -0
  41. package/ios/SherpaOnnx+STT.mm +455 -118
  42. package/ios/SherpaOnnx+TTS.mm +1101 -712
  43. package/ios/SherpaOnnx.h +17 -6
  44. package/ios/SherpaOnnx.mm +206 -311
  45. package/ios/SherpaOnnx.xcconfig +19 -19
  46. package/ios/SherpaOnnxCoreMLHelper.swift +24 -0
  47. package/ios/archive/sherpa-onnx-archive-helper.h +21 -0
  48. package/ios/archive/sherpa-onnx-archive-helper.mm +296 -0
  49. package/ios/libarchive_darwin_config.h +153 -0
  50. package/{android/src/main/cpp/jni → ios/model_detect}/sherpa-onnx-common.h +18 -18
  51. package/ios/model_detect/sherpa-onnx-model-detect-helper.h +49 -0
  52. package/ios/model_detect/sherpa-onnx-model-detect-helper.mm +210 -0
  53. package/ios/model_detect/sherpa-onnx-model-detect-stt.mm +344 -0
  54. package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +201 -0
  55. package/{android/src/main/cpp/jni → ios/model_detect}/sherpa-onnx-model-detect.h +117 -89
  56. package/ios/scripts/patch-libarchive-includes.sh +61 -0
  57. package/ios/scripts/setup-ios-libarchive.sh +98 -0
  58. package/ios/stt/sherpa-onnx-stt-wrapper.h +129 -0
  59. package/ios/stt/sherpa-onnx-stt-wrapper.mm +523 -0
  60. package/ios/{sherpa-onnx-tts-wrapper.h → tts/sherpa-onnx-tts-wrapper.h} +90 -85
  61. package/ios/{sherpa-onnx-tts-wrapper.mm → tts/sherpa-onnx-tts-wrapper.mm} +376 -345
  62. package/lib/module/NativeSherpaOnnx.js +3 -0
  63. package/lib/module/NativeSherpaOnnx.js.map +1 -1
  64. package/lib/module/audio/index.js +22 -0
  65. package/lib/module/audio/index.js.map +1 -0
  66. package/lib/module/diarization/index.js +1 -1
  67. package/lib/module/diarization/index.js.map +1 -1
  68. package/lib/module/download/ModelDownloadManager.js +918 -0
  69. package/lib/module/download/ModelDownloadManager.js.map +1 -0
  70. package/lib/module/download/extractTarBz2.js +53 -0
  71. package/lib/module/download/extractTarBz2.js.map +1 -0
  72. package/lib/module/download/index.js +6 -0
  73. package/lib/module/download/index.js.map +1 -0
  74. package/lib/module/download/validation.js +178 -0
  75. package/lib/module/download/validation.js.map +1 -0
  76. package/lib/module/enhancement/index.js +1 -1
  77. package/lib/module/enhancement/index.js.map +1 -1
  78. package/lib/module/index.js +41 -3
  79. package/lib/module/index.js.map +1 -1
  80. package/lib/module/separation/index.js +1 -1
  81. package/lib/module/separation/index.js.map +1 -1
  82. package/lib/module/stt/index.js +127 -60
  83. package/lib/module/stt/index.js.map +1 -1
  84. package/lib/module/stt/sttModelLanguages.js +512 -0
  85. package/lib/module/stt/sttModelLanguages.js.map +1 -0
  86. package/lib/module/stt/types.js +53 -1
  87. package/lib/module/stt/types.js.map +1 -1
  88. package/lib/module/tts/index.js +216 -289
  89. package/lib/module/tts/index.js.map +1 -1
  90. package/lib/module/tts/types.js +86 -1
  91. package/lib/module/tts/types.js.map +1 -1
  92. package/lib/module/types.js.map +1 -1
  93. package/lib/module/utils.js +86 -73
  94. package/lib/module/utils.js.map +1 -1
  95. package/lib/module/vad/index.js +1 -1
  96. package/lib/module/vad/index.js.map +1 -1
  97. package/lib/typescript/src/NativeSherpaOnnx.d.ts +192 -38
  98. package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
  99. package/lib/typescript/src/audio/index.d.ts +13 -0
  100. package/lib/typescript/src/audio/index.d.ts.map +1 -0
  101. package/lib/typescript/src/diarization/index.d.ts +3 -2
  102. package/lib/typescript/src/diarization/index.d.ts.map +1 -1
  103. package/lib/typescript/src/download/ModelDownloadManager.d.ts +108 -0
  104. package/lib/typescript/src/download/ModelDownloadManager.d.ts.map +1 -0
  105. package/lib/typescript/src/download/extractTarBz2.d.ts +14 -0
  106. package/lib/typescript/src/download/extractTarBz2.d.ts.map +1 -0
  107. package/lib/typescript/src/download/index.d.ts +7 -0
  108. package/lib/typescript/src/download/index.d.ts.map +1 -0
  109. package/lib/typescript/src/download/validation.d.ts +57 -0
  110. package/lib/typescript/src/download/validation.d.ts.map +1 -0
  111. package/lib/typescript/src/enhancement/index.d.ts +3 -2
  112. package/lib/typescript/src/enhancement/index.d.ts.map +1 -1
  113. package/lib/typescript/src/index.d.ts +26 -2
  114. package/lib/typescript/src/index.d.ts.map +1 -1
  115. package/lib/typescript/src/separation/index.d.ts +3 -2
  116. package/lib/typescript/src/separation/index.d.ts.map +1 -1
  117. package/lib/typescript/src/stt/index.d.ts +31 -43
  118. package/lib/typescript/src/stt/index.d.ts.map +1 -1
  119. package/lib/typescript/src/stt/sttModelLanguages.d.ts +52 -0
  120. package/lib/typescript/src/stt/sttModelLanguages.d.ts.map +1 -0
  121. package/lib/typescript/src/stt/types.d.ts +196 -9
  122. package/lib/typescript/src/stt/types.d.ts.map +1 -1
  123. package/lib/typescript/src/tts/index.d.ts +25 -211
  124. package/lib/typescript/src/tts/index.d.ts.map +1 -1
  125. package/lib/typescript/src/tts/types.d.ts +148 -25
  126. package/lib/typescript/src/tts/types.d.ts.map +1 -1
  127. package/lib/typescript/src/types.d.ts +0 -32
  128. package/lib/typescript/src/types.d.ts.map +1 -1
  129. package/lib/typescript/src/utils.d.ts +28 -13
  130. package/lib/typescript/src/utils.d.ts.map +1 -1
  131. package/lib/typescript/src/vad/index.d.ts +3 -2
  132. package/lib/typescript/src/vad/index.d.ts.map +1 -1
  133. package/package.json +250 -222
  134. package/scripts/check-qnn-support.sh +78 -0
  135. package/scripts/setup-ios-framework.sh +379 -282
  136. package/src/NativeSherpaOnnx.ts +474 -251
  137. package/src/audio/index.ts +32 -0
  138. package/src/diarization/index.ts +4 -2
  139. package/src/download/ModelDownloadManager.ts +1325 -0
  140. package/src/download/extractTarBz2.ts +78 -0
  141. package/src/download/index.ts +43 -0
  142. package/src/download/validation.ts +279 -0
  143. package/src/enhancement/index.ts +4 -2
  144. package/src/index.tsx +78 -27
  145. package/src/separation/index.ts +4 -2
  146. package/src/stt/index.ts +249 -89
  147. package/src/stt/sttModelLanguages.ts +237 -0
  148. package/src/stt/types.ts +263 -9
  149. package/src/tts/index.ts +470 -458
  150. package/src/tts/types.ts +373 -218
  151. package/src/types.ts +0 -44
  152. package/src/utils.ts +145 -131
  153. package/src/vad/index.ts +4 -2
  154. package/third_party/ffmpeg_prebuilt/ANDROID_RELEASE_TAG +1 -0
  155. package/third_party/libarchive_prebuilt/ANDROID_RELEASE_TAG +1 -0
  156. package/third_party/libarchive_prebuilt/IOS_RELEASE_TAG +1 -0
  157. package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -0
  158. package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -0
  159. package/android/src/main/cpp/include/sherpa-onnx/c-api/c-api.h +0 -1918
  160. package/android/src/main/cpp/include/sherpa-onnx/c-api/cxx-api.h +0 -841
  161. package/android/src/main/cpp/jni/sherpa-onnx-model-detect.cpp +0 -541
  162. package/android/src/main/cpp/jni/sherpa-onnx-stt-jni.cpp +0 -336
  163. package/android/src/main/cpp/jni/sherpa-onnx-stt-wrapper.cpp +0 -222
  164. package/android/src/main/cpp/jni/sherpa-onnx-stt-wrapper.h +0 -68
  165. package/android/src/main/cpp/jni/sherpa-onnx-tts-jni.cpp +0 -823
  166. package/android/src/main/cpp/jni/sherpa-onnx-tts-wrapper.cpp +0 -387
  167. package/android/src/main/cpp/jni/sherpa-onnx-tts-wrapper.h +0 -147
  168. package/ios/Frameworks/sherpa_onnx.xcframework.zip +0 -0
  169. package/ios/include/sherpa-onnx/c-api/c-api.h +0 -1918
  170. package/ios/include/sherpa-onnx/c-api/cxx-api.h +0 -841
  171. package/ios/sherpa-onnx-model-detect.mm +0 -441
  172. package/ios/sherpa-onnx-stt-wrapper.h +0 -48
  173. package/ios/sherpa-onnx-stt-wrapper.mm +0 -201
  174. package/scripts/copy-headers.js +0 -184
  175. package/scripts/setup-assets.js +0 -323
@@ -1,251 +1,474 @@
1
- import { TurboModuleRegistry, type TurboModule } from 'react-native';
2
-
3
- export interface Spec extends TurboModule {
4
- /**
5
- * Test method to verify sherpa-onnx native library is loaded.
6
- */
7
- testSherpaInit(): Promise<string>;
8
-
9
- /**
10
- * Resolve model path based on configuration.
11
- * Handles asset paths, file system paths, and auto-detection.
12
- * Returns an absolute path that can be used by native code.
13
- *
14
- * @param config - Object with 'type' ('asset' | 'file' | 'auto') and 'path' (string)
15
- */
16
- resolveModelPath(config: { type: string; path: string }): Promise<string>;
17
-
18
- /**
19
- * Initialize sherpa-onnx with model directory.
20
- * Expects an absolute path (use resolveModelPath first for asset/file paths).
21
- * @param modelDir - Absolute path to model directory
22
- * @param preferInt8 - Optional: true = prefer int8 models, false = prefer regular models, undefined = try int8 first (default)
23
- * @param modelType - Optional: explicit model type ('transducer', 'paraformer', 'nemo_ctc', 'auto'), undefined = auto (default)
24
- * @returns Object with success boolean and array of detected models (each with type and modelDir)
25
- */
26
- initializeSherpaOnnx(
27
- modelDir: string,
28
- preferInt8?: boolean,
29
- modelType?: string
30
- ): Promise<{
31
- success: boolean;
32
- detectedModels: Array<{ type: string; modelDir: string }>;
33
- }>;
34
-
35
- // ==================== STT Methods ====================
36
-
37
- /**
38
- * Transcribe an audio file.
39
- */
40
- transcribeFile(filePath: string): Promise<string>;
41
-
42
- /**
43
- * Release sherpa-onnx resources.
44
- */
45
- unloadSherpaOnnx(): Promise<void>;
46
-
47
- // ==================== TTS Methods ====================
48
-
49
- /**
50
- * Initialize Text-to-Speech (TTS) with model directory.
51
- * @param modelDir - Absolute path to model directory
52
- * @param modelType - Model type ('vits', 'matcha', 'kokoro', 'kitten', 'zipvoice', 'auto')
53
- * @param numThreads - Number of threads for inference (default: 2)
54
- * @param debug - Enable debug logging (default: false)
55
- * @returns Object with success boolean and array of detected models (each with type and modelDir)
56
- */
57
- initializeTts(
58
- modelDir: string,
59
- modelType: string,
60
- numThreads: number,
61
- debug: boolean,
62
- noiseScale?: number,
63
- noiseScaleW?: number,
64
- lengthScale?: number
65
- ): Promise<{
66
- success: boolean;
67
- detectedModels: Array<{ type: string; modelDir: string }>;
68
- }>;
69
-
70
- /**
71
- * Update TTS model parameters by re-initializing with stored config.
72
- * @param noiseScale - Optional noise scale override
73
- * @param noiseScaleW - Optional noise scale W override
74
- * @param lengthScale - Optional length scale override
75
- * @returns Object with success boolean and array of detected models
76
- */
77
- updateTtsParams(
78
- noiseScale?: number | null,
79
- noiseScaleW?: number | null,
80
- lengthScale?: number | null
81
- ): Promise<{
82
- success: boolean;
83
- detectedModels: Array<{ type: string; modelDir: string }>;
84
- }>;
85
-
86
- /**
87
- * Generate speech from text.
88
- * @param text - Text to convert to speech
89
- * @param sid - Speaker ID for multi-speaker models (default: 0)
90
- * @param speed - Speech speed multiplier (default: 1.0)
91
- * @returns Object with { samples: number[], sampleRate: number }
92
- */
93
- generateTts(
94
- text: string,
95
- sid: number,
96
- speed: number
97
- ): Promise<{ samples: number[]; sampleRate: number }>;
98
-
99
- /**
100
- * Generate speech with subtitle/timestamp metadata.
101
- * @param text - Text to convert to speech
102
- * @param sid - Speaker ID for multi-speaker models (default: 0)
103
- * @param speed - Speech speed multiplier (default: 1.0)
104
- * @returns Object with samples, sampleRate, subtitles, and estimated flag
105
- */
106
- generateTtsWithTimestamps(
107
- text: string,
108
- sid: number,
109
- speed: number
110
- ): Promise<{
111
- samples: number[];
112
- sampleRate: number;
113
- subtitles: Array<{ text: string; start: number; end: number }>;
114
- estimated: boolean;
115
- }>;
116
-
117
- /**
118
- * Generate speech in streaming mode (emits chunk events).
119
- * @param text - Text to convert to speech
120
- * @param sid - Speaker ID for multi-speaker models (default: 0)
121
- * @param speed - Speech speed multiplier (default: 1.0)
122
- */
123
- generateTtsStream(text: string, sid: number, speed: number): Promise<void>;
124
-
125
- /**
126
- * Cancel an ongoing streaming TTS generation.
127
- */
128
- cancelTtsStream(): Promise<void>;
129
-
130
- /**
131
- * Start PCM playback for streaming TTS.
132
- * @param sampleRate - Sample rate in Hz
133
- * @param channels - Number of channels (1 = mono)
134
- */
135
- startTtsPcmPlayer(sampleRate: number, channels: number): Promise<void>;
136
-
137
- /**
138
- * Write PCM samples to the streaming TTS player.
139
- * @param samples - Float PCM samples in range [-1.0, 1.0]
140
- */
141
- writeTtsPcmChunk(samples: number[]): Promise<void>;
142
-
143
- /**
144
- * Stop PCM playback for streaming TTS.
145
- */
146
- stopTtsPcmPlayer(): Promise<void>;
147
-
148
- /**
149
- * Get the sample rate of the initialized TTS model.
150
- * @returns Sample rate in Hz
151
- */
152
- getTtsSampleRate(): Promise<number>;
153
-
154
- /**
155
- * Get the number of speakers/voices available in the model.
156
- * @returns Number of speakers (0 or 1 for single-speaker models)
157
- */
158
- getTtsNumSpeakers(): Promise<number>;
159
-
160
- /**
161
- * Release TTS resources.
162
- */
163
- unloadTts(): Promise<void>;
164
-
165
- /**
166
- * Save TTS audio samples to a WAV file.
167
- * @param samples - Audio samples array
168
- * @param sampleRate - Sample rate in Hz
169
- * @param filePath - Absolute path where to save the WAV file
170
- * @returns The file path where audio was saved
171
- */
172
- saveTtsAudioToFile(
173
- samples: number[],
174
- sampleRate: number,
175
- filePath: string
176
- ): Promise<string>;
177
-
178
- /**
179
- * Save TTS audio samples to a WAV file via Android SAF content URI.
180
- * @param samples - Audio samples array
181
- * @param sampleRate - Sample rate in Hz
182
- * @param directoryUri - Directory content URI (tree or document)
183
- * @param filename - Desired file name (e.g., tts_123.wav)
184
- * @returns The content URI of the saved file
185
- */
186
- saveTtsAudioToContentUri(
187
- samples: number[],
188
- sampleRate: number,
189
- directoryUri: string,
190
- filename: string
191
- ): Promise<string>;
192
-
193
- /**
194
- * Save a text file via Android SAF content URI.
195
- * @param text - Text content to write
196
- * @param directoryUri - Directory content URI (tree or document)
197
- * @param filename - Desired file name (e.g., tts_123.srt)
198
- * @param mimeType - MIME type (e.g., application/x-subrip)
199
- * @returns The content URI of the saved file
200
- */
201
- saveTtsTextToContentUri(
202
- text: string,
203
- directoryUri: string,
204
- filename: string,
205
- mimeType: string
206
- ): Promise<string>;
207
-
208
- /**
209
- * Copy a SAF content URI to a cache file for local playback.
210
- * @param fileUri - Content URI of the saved WAV file
211
- * @param filename - Desired cache filename
212
- * @returns Absolute file path to the cached copy
213
- */
214
- copyTtsContentUriToCache(fileUri: string, filename: string): Promise<string>;
215
-
216
- /**
217
- * Share a TTS audio file (file path or content URI).
218
- * @param fileUri - File path or content URI
219
- * @param mimeType - MIME type (e.g., audio/wav)
220
- */
221
- shareTtsAudio(fileUri: string, mimeType: string): Promise<void>;
222
-
223
- // ==================== Helper Methods ====================
224
-
225
- /**
226
- * List all model folders in the assets/models directory.
227
- * Scans the platform-specific model directory and returns folder names.
228
- *
229
- * @returns Array of model info objects found in assets/models/ (Android) or bundle models/ (iOS)
230
- *
231
- * @example
232
- * ```typescript
233
- * const folders = await listAssetModels();
234
- * // Returns: [{ folder: 'sherpa-onnx-streaming-zipformer-en-2023-06-26', hint: 'stt' }, { folder: 'sherpa-onnx-matcha-icefall-en_US-ljspeech', hint: 'tts' }]
235
- *
236
- * // Then use with resolveModelPath and initialize:
237
- * for (const model of folders) {
238
- * const path = await resolveModelPath({ type: 'asset', path: `models/${model.folder}` });
239
- * const result = await initializeSherpaOnnx(path);
240
- * if (result.success) {
241
- * console.log(`Found models in ${model.folder}:`, result.detectedModels);
242
- * }
243
- * }
244
- * ```
245
- */
246
- listAssetModels(): Promise<
247
- Array<{ folder: string; hint: 'stt' | 'tts' | 'unknown' }>
248
- >;
249
- }
250
-
251
- export default TurboModuleRegistry.getEnforcing<Spec>('SherpaOnnx');
1
+ import { TurboModuleRegistry, type TurboModule } from 'react-native';
2
+
3
+ /** Unified shape for all acceleration backends (QNN, NNAPI, XNNPACK, Core ML). */
4
+ export type AccelerationSupport = {
5
+ providerCompiled: boolean;
6
+ hasAccelerator: boolean;
7
+ canInit: boolean;
8
+ };
9
+
10
+ export interface Spec extends TurboModule {
11
+ /**
12
+ * Test method to verify sherpa-onnx native library is loaded.
13
+ */
14
+ testSherpaInit(): Promise<string>;
15
+
16
+ // ==================== STT Methods ====================
17
+
18
+ /**
19
+ * Initialize Speech-to-Text (STT) with model directory.
20
+ * Expects an absolute path (use resolveModelPath first for asset/file paths).
21
+ * @param instanceId - Unique ID for this engine instance (from createSTT)
22
+ * @param modelDir - Absolute path to model directory
23
+ * @param preferInt8 - Optional: true = prefer int8 models, false = prefer regular models, undefined = try int8 first (default)
24
+ * @param modelType - Optional: explicit model type ('transducer', 'nemo_transducer', 'paraformer', 'nemo_ctc', 'wenet_ctc', 'sense_voice', 'zipformer_ctc', 'whisper', 'funasr_nano', 'fire_red_asr', 'moonshine', 'dolphin', 'canary', 'omnilingual', 'medasr', 'telespeech_ctc', 'auto'), undefined = auto (default)
25
+ * @param debug - Optional: enable debug logging in native layer and sherpa-onnx (default: false)
26
+ * @param hotwordsFile - Optional: path to hotwords file (OfflineRecognizerConfig)
27
+ * @param hotwordsScore - Optional: hotwords score (default in Kotlin 1.5)
28
+ * @param numThreads - Optional: number of threads for inference (default in Kotlin: 1)
29
+ * @param provider - Optional: provider string e.g. 'cpu' (stored in config only)
30
+ * @param ruleFsts - Optional: path(s) to rule FSTs for ITN (comma-separated)
31
+ * @param ruleFars - Optional: path(s) to rule FARs for ITN (comma-separated)
32
+ * @param dither - Optional: dither for feature extraction (default 0)
33
+ * @param modelOptions - Optional: model-specific options (whisper, senseVoice, canary, funasrNano). Only the block for the loaded model type is applied.
34
+ * @param modelingUnit - Optional: 'cjkchar' | 'bpe' | 'cjkchar+bpe' for hotwords tokenization (OfflineModelConfig.modelingUnit)
35
+ * @param bpeVocab - Optional: path to BPE vocab file (OfflineModelConfig.bpeVocab), used when modelingUnit is bpe or cjkchar+bpe
36
+ * @returns Object with success boolean and array of detected models (each with type and modelDir)
37
+ */
38
+ initializeStt(
39
+ instanceId: string,
40
+ modelDir: string,
41
+ preferInt8?: boolean,
42
+ modelType?: string,
43
+ debug?: boolean,
44
+ hotwordsFile?: string,
45
+ hotwordsScore?: number,
46
+ numThreads?: number,
47
+ provider?: string,
48
+ ruleFsts?: string,
49
+ ruleFars?: string,
50
+ dither?: number,
51
+ modelOptions?: Object,
52
+ modelingUnit?: string,
53
+ bpeVocab?: string
54
+ ): Promise<{
55
+ success: boolean;
56
+ detectedModels: Array<{ type: string; modelDir: string }>;
57
+ modelType?: string;
58
+ decodingMethod?: string;
59
+ }>;
60
+
61
+ /**
62
+ * Detect STT model type and structure without initializing the recognizer.
63
+ * Uses the same native file-based detection as initializeStt. Useful to show model-specific
64
+ * options before init or to query the type for a given path.
65
+ * @param modelDir - Absolute path to model directory (use resolveModelPath first for asset/file paths)
66
+ * @param preferInt8 - Optional: true = prefer int8, false = prefer regular, undefined = try int8 first
67
+ * @param modelType - Optional: explicit type or 'auto' (default)
68
+ * @returns Object with success, detectedModels (array of { type, modelDir }), and modelType (primary detected type)
69
+ */
70
+ detectSttModel(
71
+ modelDir: string,
72
+ preferInt8?: boolean,
73
+ modelType?: string
74
+ ): Promise<{
75
+ success: boolean;
76
+ detectedModels: Array<{ type: string; modelDir: string }>;
77
+ modelType?: string;
78
+ }>;
79
+
80
+ /**
81
+ * Transcribe an audio file. Returns full recognition result (text, tokens, timestamps, lang, emotion, event, durations).
82
+ */
83
+ transcribeFile(
84
+ instanceId: string,
85
+ filePath: string
86
+ ): Promise<{
87
+ text: string;
88
+ tokens: string[];
89
+ timestamps: number[];
90
+ lang: string;
91
+ emotion: string;
92
+ event: string;
93
+ durations: number[];
94
+ }>;
95
+
96
+ /**
97
+ * Transcribe from float PCM samples (e.g. from microphone). Same return type as transcribeFile.
98
+ */
99
+ transcribeSamples(
100
+ instanceId: string,
101
+ samples: number[],
102
+ sampleRate: number
103
+ ): Promise<{
104
+ text: string;
105
+ tokens: string[];
106
+ timestamps: number[];
107
+ lang: string;
108
+ emotion: string;
109
+ event: string;
110
+ durations: number[];
111
+ }>;
112
+
113
+ /**
114
+ * Update recognizer config at runtime (decodingMethod, maxActivePaths, hotwordsFile, hotwordsScore, blankPenalty, ruleFsts, ruleFars).
115
+ */
116
+ setSttConfig(instanceId: string, options: Object): Promise<void>;
117
+
118
+ /**
119
+ * Release STT resources.
120
+ */
121
+ unloadStt(instanceId: string): Promise<void>;
122
+
123
+ // ==================== TTS Methods ====================
124
+
125
+ /**
126
+ * Initialize Text-to-Speech (TTS) with model directory.
127
+ * @param instanceId - Unique ID for this engine instance (from createTTS)
128
+ * @param modelDir - Absolute path to model directory
129
+ * @param modelType - Model type ('vits', 'matcha', 'kokoro', 'kitten', 'pocket', 'zipvoice', 'auto')
130
+ * @param numThreads - Number of threads for inference (default: 2)
131
+ * @param debug - Enable debug logging (default: false)
132
+ * @param noiseScale - Optional noise scale (VITS/Matcha)
133
+ * @param noiseScaleW - Optional noise scale W (VITS)
134
+ * @param lengthScale - Optional length scale (VITS/Matcha/Kokoro/Kitten)
135
+ * @param ruleFsts - Optional path(s) to rule FSTs for TTS (OfflineTtsConfig)
136
+ * @param ruleFars - Optional path(s) to rule FARs for TTS (OfflineTtsConfig)
137
+ * @param maxNumSentences - Optional max sentences per callback (default: 1)
138
+ * @param silenceScale - Optional silence scale on config (default: 0.2)
139
+ * @param provider - Optional execution provider (e.g. 'cpu', 'coreml', 'xnnpack'; default: 'cpu')
140
+ * @returns Object with success boolean and array of detected models (each with type and modelDir)
141
+ */
142
+ initializeTts(
143
+ instanceId: string,
144
+ modelDir: string,
145
+ modelType: string,
146
+ numThreads: number,
147
+ debug: boolean,
148
+ noiseScale?: number,
149
+ noiseScaleW?: number,
150
+ lengthScale?: number,
151
+ ruleFsts?: string,
152
+ ruleFars?: string,
153
+ maxNumSentences?: number,
154
+ silenceScale?: number,
155
+ provider?: string
156
+ ): Promise<{
157
+ success: boolean;
158
+ detectedModels: Array<{ type: string; modelDir: string }>;
159
+ sampleRate: number;
160
+ numSpeakers: number;
161
+ }>;
162
+
163
+ /**
164
+ * Detect TTS model type and structure without initializing the engine.
165
+ * Uses the same native file-based detection as initializeTts.
166
+ * @param modelDir - Absolute path to model directory (use resolveModelPath first for asset/file paths)
167
+ * @param modelType - Optional: explicit type or 'auto' (default)
168
+ * @returns Object with success, detectedModels (array of { type, modelDir }), and modelType (primary detected type)
169
+ */
170
+ detectTtsModel(
171
+ modelDir: string,
172
+ modelType?: string
173
+ ): Promise<{
174
+ success: boolean;
175
+ detectedModels: Array<{ type: string; modelDir: string }>;
176
+ modelType?: string;
177
+ }>;
178
+
179
+ /**
180
+ * Update TTS model parameters by re-initializing with stored config.
181
+ * @param instanceId - Unique ID for this engine instance
182
+ * @param noiseScale - Optional noise scale override
183
+ * @param noiseScaleW - Optional noise scale W override
184
+ * @param lengthScale - Optional length scale override
185
+ * @returns Object with success boolean and array of detected models
186
+ */
187
+ updateTtsParams(
188
+ instanceId: string,
189
+ noiseScale?: number | null,
190
+ noiseScaleW?: number | null,
191
+ lengthScale?: number | null
192
+ ): Promise<{
193
+ success: boolean;
194
+ detectedModels: Array<{ type: string; modelDir: string }>;
195
+ sampleRate: number;
196
+ numSpeakers: number;
197
+ }>;
198
+
199
+ /**
200
+ * Generate speech from text.
201
+ * @param instanceId - Unique ID for this engine instance
202
+ * @param text - Text to convert to speech
203
+ * @param options - Generation options (sid, speed, referenceAudio, referenceText, numSteps, silenceScale, extra)
204
+ * @returns Object with { samples: number[], sampleRate: number }
205
+ */
206
+ generateTts(
207
+ instanceId: string,
208
+ text: string,
209
+ options: Object
210
+ ): Promise<{
211
+ samples: number[];
212
+ sampleRate: number;
213
+ }>;
214
+
215
+ /**
216
+ * Generate speech with subtitle/timestamp metadata.
217
+ * @param instanceId - Unique ID for this engine instance
218
+ * @param text - Text to convert to speech
219
+ * @param options - Generation options (sid, speed, referenceAudio, referenceText, numSteps, silenceScale, extra)
220
+ * @returns Object with samples, sampleRate, subtitles, and estimated flag
221
+ */
222
+ generateTtsWithTimestamps(
223
+ instanceId: string,
224
+ text: string,
225
+ options: Object
226
+ ): Promise<{
227
+ samples: number[];
228
+ sampleRate: number;
229
+ subtitles: Array<{ text: string; start: number; end: number }>;
230
+ estimated: boolean;
231
+ }>;
232
+
233
+ /**
234
+ * Generate speech in streaming mode (emits chunk events).
235
+ * @param instanceId - Unique ID for this engine instance
236
+ * @param text - Text to convert to speech
237
+ * @param options - Generation options (sid, speed, referenceAudio, referenceText, numSteps, silenceScale, extra)
238
+ */
239
+ generateTtsStream(
240
+ instanceId: string,
241
+ text: string,
242
+ options: Object
243
+ ): Promise<void>;
244
+
245
+ /**
246
+ * Cancel an ongoing streaming TTS generation.
247
+ * @param instanceId - Unique ID for this engine instance
248
+ */
249
+ cancelTtsStream(instanceId: string): Promise<void>;
250
+
251
+ /**
252
+ * Start PCM playback for streaming TTS.
253
+ * @param instanceId - Unique ID for this engine instance
254
+ * @param sampleRate - Sample rate in Hz
255
+ * @param channels - Number of channels (1 = mono)
256
+ */
257
+ startTtsPcmPlayer(
258
+ instanceId: string,
259
+ sampleRate: number,
260
+ channels: number
261
+ ): Promise<void>;
262
+
263
+ /**
264
+ * Write PCM samples to the streaming TTS player.
265
+ * @param instanceId - Unique ID for this engine instance
266
+ * @param samples - Float PCM samples in range [-1.0, 1.0]
267
+ */
268
+ writeTtsPcmChunk(instanceId: string, samples: number[]): Promise<void>;
269
+
270
+ /**
271
+ * Stop PCM playback for streaming TTS.
272
+ * @param instanceId - Unique ID for this engine instance
273
+ */
274
+ stopTtsPcmPlayer(instanceId: string): Promise<void>;
275
+
276
+ /**
277
+ * Get the sample rate of the initialized TTS model.
278
+ * @param instanceId - Unique ID for this engine instance
279
+ * @returns Sample rate in Hz
280
+ */
281
+ getTtsSampleRate(instanceId: string): Promise<number>;
282
+
283
+ /**
284
+ * Get the number of speakers/voices available in the model.
285
+ * @param instanceId - Unique ID for this engine instance
286
+ * @returns Number of speakers (0 or 1 for single-speaker models)
287
+ */
288
+ getTtsNumSpeakers(instanceId: string): Promise<number>;
289
+
290
+ /**
291
+ * Release TTS resources.
292
+ * @param instanceId - Unique ID for this engine instance
293
+ */
294
+ unloadTts(instanceId: string): Promise<void>;
295
+
296
+ /**
297
+ * Save TTS audio samples to a WAV file.
298
+ * @param samples - Audio samples array
299
+ * @param sampleRate - Sample rate in Hz
300
+ * @param filePath - Absolute path where to save the WAV file
301
+ * @returns The file path where audio was saved
302
+ */
303
+ saveTtsAudioToFile(
304
+ samples: number[],
305
+ sampleRate: number,
306
+ filePath: string
307
+ ): Promise<string>;
308
+
309
+ /**
310
+ * Save TTS audio samples to a WAV file via Android SAF content URI.
311
+ * @param samples - Audio samples array
312
+ * @param sampleRate - Sample rate in Hz
313
+ * @param directoryUri - Directory content URI (tree or document)
314
+ * @param filename - Desired file name (e.g., tts_123.wav)
315
+ * @returns The content URI of the saved file
316
+ */
317
+ saveTtsAudioToContentUri(
318
+ samples: number[],
319
+ sampleRate: number,
320
+ directoryUri: string,
321
+ filename: string
322
+ ): Promise<string>;
323
+
324
+ /**
325
+ * Save a text file via Android SAF content URI.
326
+ * @param text - Text content to write
327
+ * @param directoryUri - Directory content URI (tree or document)
328
+ * @param filename - Desired file name (e.g., tts_123.srt)
329
+ * @param mimeType - MIME type (e.g., application/x-subrip)
330
+ * @returns The content URI of the saved file
331
+ */
332
+ saveTtsTextToContentUri(
333
+ text: string,
334
+ directoryUri: string,
335
+ filename: string,
336
+ mimeType: string
337
+ ): Promise<string>;
338
+
339
+ /**
340
+ * Copy a SAF content URI to a cache file for local playback.
341
+ * @param fileUri - Content URI of the saved WAV file
342
+ * @param filename - Desired cache filename
343
+ * @returns Absolute file path to the cached copy
344
+ */
345
+ copyTtsContentUriToCache(fileUri: string, filename: string): Promise<string>;
346
+
347
+ /**
348
+ * Share a TTS audio file (file path or content URI).
349
+ * @param fileUri - File path or content URI
350
+ * @param mimeType - MIME type (e.g., audio/wav)
351
+ */
352
+ shareTtsAudio(fileUri: string, mimeType: string): Promise<void>;
353
+
354
+ // ==================== Helper - Assets ====================
355
+
356
+ /**
357
+ * Resolve model path based on configuration.
358
+ * Handles asset paths, file system paths, and auto-detection.
359
+ * Returns an absolute path that can be used by native code.
360
+ *
361
+ * @param config - Object with 'type' ('asset' | 'file' | 'auto') and 'path' (string)
362
+ */
363
+ resolveModelPath(config: { type: string; path: string }): Promise<string>;
364
+
365
+ /**
366
+ * List all model folders in the assets/models directory.
367
+ * Scans the platform-specific model directory and returns folder names.
368
+ *
369
+ * @returns Array of model info objects found in assets/models/ (Android) or bundle models/ (iOS)
370
+ *
371
+ * @example
372
+ * ```typescript
373
+ * const folders = await listAssetModels();
374
+ * // Returns: [{ folder: 'sherpa-onnx-streaming-zipformer-en-2023-06-26', hint: 'stt' }, { folder: 'sherpa-onnx-matcha-icefall-en_US-ljspeech', hint: 'tts' }]
375
+ *
376
+ * // Then use with resolveModelPath and initialize:
377
+ * for (const model of folders) {
378
+ * const path = await resolveModelPath({ type: 'asset', path: `models/${model.folder}` });
379
+ * const result = await initializeStt(path);
380
+ * if (result.success) {
381
+ * console.log(`Found models in ${model.folder}:`, result.detectedModels);
382
+ * }
383
+ * }
384
+ * ```
385
+ */
386
+ listAssetModels(): Promise<
387
+ Array<{ folder: string; hint: 'stt' | 'tts' | 'unknown' }>
388
+ >;
389
+
390
+ /**
391
+ * List model folders under a specific filesystem path.
392
+ * When recursive is true, returns relative folder paths under the base path.
393
+ */
394
+ listModelsAtPath(
395
+ path: string,
396
+ recursive: boolean
397
+ ): Promise<Array<{ folder: string; hint: 'stt' | 'tts' | 'unknown' }>>;
398
+
399
+ /**
400
+ * **Play Asset Delivery (PAD):** Returns the filesystem path to the models directory
401
+ * of an Android asset pack, or null if the pack is not available (e.g. not installed).
402
+ * Use this to list and load models that are delivered via PAD instead of bundled app assets.
403
+ */
404
+ getAssetPackPath(packName: string): Promise<string | null>;
405
+
406
+ // ==================== Helper - Extraction ====================
407
+
408
+ /**
409
+ * Extract a .tar.bz2 archive to a target folder.
410
+ * Returns { success, path } or { success, reason }.
411
+ */
412
+ extractTarBz2(
413
+ sourcePath: string,
414
+ targetPath: string,
415
+ force: boolean
416
+ ): Promise<{
417
+ success: boolean;
418
+ path?: string;
419
+ sha256?: string;
420
+ reason?: string;
421
+ }>;
422
+
423
+ /**
424
+ * Cancel any in-progress tar.bz2 extraction.
425
+ */
426
+ cancelExtractTarBz2(): Promise<void>;
427
+
428
+ /**
429
+ * Compute SHA-256 of a file and return the hex digest.
430
+ */
431
+ computeFileSha256(filePath: string): Promise<string>;
432
+
433
+ // ==================== Helper - Audio conversion ====================
434
+
435
+ /**
436
+ * Convert arbitrary audio file to requested format (e.g. "mp3", "flac", "wav").
437
+ * Requires FFmpeg prebuilts when called on Android.
438
+ * For MP3 (libshine), outputSampleRateHz can be 32000, 44100, or 48000; 0 or omitted = 44100.
439
+ * WAV output is always 16 kHz mono (sherpa-onnx). Resolves when conversion succeeds, rejects with an error message on failure.
440
+ */
441
+ convertAudioToFormat(
442
+ inputPath: string,
443
+ outputPath: string,
444
+ format: string,
445
+ outputSampleRateHz?: number
446
+ ): Promise<void>;
447
+
448
+ /**
449
+ * Convert any supported audio file to WAV 16 kHz mono 16-bit PCM.
450
+ * Requires FFmpeg prebuilts when called on Android.
451
+ */
452
+ convertAudioToWav16k(inputPath: string, outputPath: string): Promise<void>;
453
+
454
+ // ==================== Execution Provider Methods ====================
455
+
456
+ /**
457
+ * Return the list of available ONNX Runtime execution providers (e.g. "CPU", "NNAPI", "QNN", "XNNPACK").
458
+ * Requires the ORT Java bridge (libonnxruntime4j_jni.so + OrtEnvironment class) from the onnxruntime AAR.
459
+ */
460
+ getAvailableProviders(): Promise<string[]>;
461
+
462
+ // ==================== Acceleration support (unified format) ====================
463
+
464
+ /**
465
+ * Unified acceleration support: providerCompiled (ORT EP built in), hasAccelerator (NPU/ANE present), canInit (session with EP works).
466
+ * All get*Support methods return this shape. Optional modelBase64: if omitted, SDK uses embedded test model for canInit.
467
+ */
468
+ getQnnSupport(modelBase64?: string): Promise<AccelerationSupport>;
469
+ getNnapiSupport(modelBase64?: string): Promise<AccelerationSupport>;
470
+ getXnnpackSupport(modelBase64?: string): Promise<AccelerationSupport>;
471
+ getCoreMlSupport(modelBase64?: string): Promise<AccelerationSupport>;
472
+ }
473
+
474
+ export default TurboModuleRegistry.getEnforcing<Spec>('SherpaOnnx');