react-native-sherpa-onnx 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +232 -236
- package/SherpaOnnx.podspec +68 -64
- package/android/build.gradle +182 -192
- package/android/codegen.gradle +57 -0
- package/android/prebuilt-download.gradle +428 -0
- package/android/prebuilt-versions.gradle +43 -0
- package/android/proguard-rules.pro +10 -0
- package/android/src/main/assets/testModels/add_mul_add.onnx +28 -0
- package/android/src/main/assets/testModels/nnapi_internal_uint8_support.onnx +0 -0
- package/android/src/main/assets/testModels/qnn_multi_ctx_embed.onnx +0 -0
- package/android/src/main/cpp/CMakeLists.txt +166 -129
- package/android/src/main/cpp/CMakePresets.json +54 -0
- package/android/src/main/cpp/crypto/sha256.cpp +174 -0
- package/android/src/main/cpp/crypto/sha256.h +16 -0
- package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.cpp +404 -0
- package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.h +56 -0
- package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-jni.cpp +181 -0
- package/android/src/main/cpp/jni/audio/sherpa-onnx-audio-convert-jni.cpp +888 -0
- package/{ios → android/src/main/cpp/jni/model_detect}/sherpa-onnx-common.h +18 -18
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.cpp +86 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.h +20 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.cpp +423 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.h +55 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-stt.cpp +399 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +238 -0
- package/{ios → android/src/main/cpp/jni/model_detect}/sherpa-onnx-model-detect.h +122 -89
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.cpp +99 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.h +16 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.cpp +78 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.h +16 -0
- package/android/src/main/cpp/jni/module/sherpa-onnx-module-jni.cpp +190 -0
- package/android/src/main/cpp/jni/tts/sherpa-onnx-tts-zipvoice-jni.cpp +301 -0
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxArchiveHelper.kt +94 -0
- package/android/src/main/java/com/sherpaonnx/{SherpaOnnxCoreHelper.kt → SherpaOnnxAssetHelper.kt} +350 -236
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +791 -483
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxSttHelper.kt +699 -109
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +1123 -668
- package/android/src/main/java/com/sherpaonnx/ZipvoiceTtsWrapper.kt +187 -0
- package/ios/SherpaOnnx+Assets.h +11 -0
- package/ios/SherpaOnnx+Assets.mm +325 -0
- package/ios/SherpaOnnx+STT.mm +455 -118
- package/ios/SherpaOnnx+TTS.mm +1101 -712
- package/ios/SherpaOnnx.h +17 -6
- package/ios/SherpaOnnx.mm +206 -311
- package/ios/SherpaOnnx.xcconfig +19 -19
- package/ios/SherpaOnnxCoreMLHelper.swift +24 -0
- package/ios/archive/sherpa-onnx-archive-helper.h +21 -0
- package/ios/archive/sherpa-onnx-archive-helper.mm +296 -0
- package/ios/libarchive_darwin_config.h +153 -0
- package/{android/src/main/cpp/jni → ios/model_detect}/sherpa-onnx-common.h +18 -18
- package/ios/model_detect/sherpa-onnx-model-detect-helper.h +49 -0
- package/ios/model_detect/sherpa-onnx-model-detect-helper.mm +210 -0
- package/ios/model_detect/sherpa-onnx-model-detect-stt.mm +344 -0
- package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +201 -0
- package/{android/src/main/cpp/jni → ios/model_detect}/sherpa-onnx-model-detect.h +117 -89
- package/ios/scripts/patch-libarchive-includes.sh +61 -0
- package/ios/scripts/setup-ios-libarchive.sh +98 -0
- package/ios/stt/sherpa-onnx-stt-wrapper.h +129 -0
- package/ios/stt/sherpa-onnx-stt-wrapper.mm +523 -0
- package/ios/{sherpa-onnx-tts-wrapper.h → tts/sherpa-onnx-tts-wrapper.h} +90 -85
- package/ios/{sherpa-onnx-tts-wrapper.mm → tts/sherpa-onnx-tts-wrapper.mm} +376 -345
- package/lib/module/NativeSherpaOnnx.js +3 -0
- package/lib/module/NativeSherpaOnnx.js.map +1 -1
- package/lib/module/audio/index.js +22 -0
- package/lib/module/audio/index.js.map +1 -0
- package/lib/module/diarization/index.js +1 -1
- package/lib/module/diarization/index.js.map +1 -1
- package/lib/module/download/ModelDownloadManager.js +918 -0
- package/lib/module/download/ModelDownloadManager.js.map +1 -0
- package/lib/module/download/extractTarBz2.js +53 -0
- package/lib/module/download/extractTarBz2.js.map +1 -0
- package/lib/module/download/index.js +6 -0
- package/lib/module/download/index.js.map +1 -0
- package/lib/module/download/validation.js +178 -0
- package/lib/module/download/validation.js.map +1 -0
- package/lib/module/enhancement/index.js +1 -1
- package/lib/module/enhancement/index.js.map +1 -1
- package/lib/module/index.js +41 -3
- package/lib/module/index.js.map +1 -1
- package/lib/module/separation/index.js +1 -1
- package/lib/module/separation/index.js.map +1 -1
- package/lib/module/stt/index.js +127 -60
- package/lib/module/stt/index.js.map +1 -1
- package/lib/module/stt/sttModelLanguages.js +512 -0
- package/lib/module/stt/sttModelLanguages.js.map +1 -0
- package/lib/module/stt/types.js +53 -1
- package/lib/module/stt/types.js.map +1 -1
- package/lib/module/tts/index.js +216 -289
- package/lib/module/tts/index.js.map +1 -1
- package/lib/module/tts/types.js +86 -1
- package/lib/module/tts/types.js.map +1 -1
- package/lib/module/types.js.map +1 -1
- package/lib/module/utils.js +86 -73
- package/lib/module/utils.js.map +1 -1
- package/lib/module/vad/index.js +1 -1
- package/lib/module/vad/index.js.map +1 -1
- package/lib/typescript/src/NativeSherpaOnnx.d.ts +192 -38
- package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
- package/lib/typescript/src/audio/index.d.ts +13 -0
- package/lib/typescript/src/audio/index.d.ts.map +1 -0
- package/lib/typescript/src/diarization/index.d.ts +3 -2
- package/lib/typescript/src/diarization/index.d.ts.map +1 -1
- package/lib/typescript/src/download/ModelDownloadManager.d.ts +108 -0
- package/lib/typescript/src/download/ModelDownloadManager.d.ts.map +1 -0
- package/lib/typescript/src/download/extractTarBz2.d.ts +14 -0
- package/lib/typescript/src/download/extractTarBz2.d.ts.map +1 -0
- package/lib/typescript/src/download/index.d.ts +7 -0
- package/lib/typescript/src/download/index.d.ts.map +1 -0
- package/lib/typescript/src/download/validation.d.ts +57 -0
- package/lib/typescript/src/download/validation.d.ts.map +1 -0
- package/lib/typescript/src/enhancement/index.d.ts +3 -2
- package/lib/typescript/src/enhancement/index.d.ts.map +1 -1
- package/lib/typescript/src/index.d.ts +26 -2
- package/lib/typescript/src/index.d.ts.map +1 -1
- package/lib/typescript/src/separation/index.d.ts +3 -2
- package/lib/typescript/src/separation/index.d.ts.map +1 -1
- package/lib/typescript/src/stt/index.d.ts +31 -43
- package/lib/typescript/src/stt/index.d.ts.map +1 -1
- package/lib/typescript/src/stt/sttModelLanguages.d.ts +52 -0
- package/lib/typescript/src/stt/sttModelLanguages.d.ts.map +1 -0
- package/lib/typescript/src/stt/types.d.ts +196 -9
- package/lib/typescript/src/stt/types.d.ts.map +1 -1
- package/lib/typescript/src/tts/index.d.ts +25 -211
- package/lib/typescript/src/tts/index.d.ts.map +1 -1
- package/lib/typescript/src/tts/types.d.ts +148 -25
- package/lib/typescript/src/tts/types.d.ts.map +1 -1
- package/lib/typescript/src/types.d.ts +0 -32
- package/lib/typescript/src/types.d.ts.map +1 -1
- package/lib/typescript/src/utils.d.ts +28 -13
- package/lib/typescript/src/utils.d.ts.map +1 -1
- package/lib/typescript/src/vad/index.d.ts +3 -2
- package/lib/typescript/src/vad/index.d.ts.map +1 -1
- package/package.json +250 -222
- package/scripts/check-qnn-support.sh +78 -0
- package/scripts/setup-ios-framework.sh +379 -282
- package/src/NativeSherpaOnnx.ts +474 -251
- package/src/audio/index.ts +32 -0
- package/src/diarization/index.ts +4 -2
- package/src/download/ModelDownloadManager.ts +1325 -0
- package/src/download/extractTarBz2.ts +78 -0
- package/src/download/index.ts +43 -0
- package/src/download/validation.ts +279 -0
- package/src/enhancement/index.ts +4 -2
- package/src/index.tsx +78 -27
- package/src/separation/index.ts +4 -2
- package/src/stt/index.ts +249 -89
- package/src/stt/sttModelLanguages.ts +237 -0
- package/src/stt/types.ts +263 -9
- package/src/tts/index.ts +470 -458
- package/src/tts/types.ts +373 -218
- package/src/types.ts +0 -44
- package/src/utils.ts +145 -131
- package/src/vad/index.ts +4 -2
- package/third_party/ffmpeg_prebuilt/ANDROID_RELEASE_TAG +1 -0
- package/third_party/libarchive_prebuilt/ANDROID_RELEASE_TAG +1 -0
- package/third_party/libarchive_prebuilt/IOS_RELEASE_TAG +1 -0
- package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -0
- package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -0
- package/android/src/main/cpp/include/sherpa-onnx/c-api/c-api.h +0 -1918
- package/android/src/main/cpp/include/sherpa-onnx/c-api/cxx-api.h +0 -841
- package/android/src/main/cpp/jni/sherpa-onnx-model-detect.cpp +0 -541
- package/android/src/main/cpp/jni/sherpa-onnx-stt-jni.cpp +0 -336
- package/android/src/main/cpp/jni/sherpa-onnx-stt-wrapper.cpp +0 -222
- package/android/src/main/cpp/jni/sherpa-onnx-stt-wrapper.h +0 -68
- package/android/src/main/cpp/jni/sherpa-onnx-tts-jni.cpp +0 -823
- package/android/src/main/cpp/jni/sherpa-onnx-tts-wrapper.cpp +0 -387
- package/android/src/main/cpp/jni/sherpa-onnx-tts-wrapper.h +0 -147
- package/ios/Frameworks/sherpa_onnx.xcframework.zip +0 -0
- package/ios/include/sherpa-onnx/c-api/c-api.h +0 -1918
- package/ios/include/sherpa-onnx/c-api/cxx-api.h +0 -841
- package/ios/sherpa-onnx-model-detect.mm +0 -441
- package/ios/sherpa-onnx-stt-wrapper.h +0 -48
- package/ios/sherpa-onnx-stt-wrapper.mm +0 -201
- package/scripts/copy-headers.js +0 -184
- package/scripts/setup-assets.js +0 -323
package/src/NativeSherpaOnnx.ts
CHANGED
|
@@ -1,251 +1,474 @@
|
|
|
1
|
-
import { TurboModuleRegistry, type TurboModule } from 'react-native';
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
*
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
/**
|
|
19
|
-
* Initialize
|
|
20
|
-
* Expects an absolute path (use resolveModelPath first for asset/file paths).
|
|
21
|
-
* @param
|
|
22
|
-
* @param
|
|
23
|
-
* @param
|
|
24
|
-
* @
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
*
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
/**
|
|
126
|
-
*
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
*
|
|
132
|
-
* @param
|
|
133
|
-
* @param
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
*
|
|
139
|
-
* @param
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
*
|
|
167
|
-
* @param
|
|
168
|
-
* @
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
*
|
|
181
|
-
* @param
|
|
182
|
-
* @param
|
|
183
|
-
* @param
|
|
184
|
-
* @
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
*
|
|
218
|
-
* @param
|
|
219
|
-
* @param
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
*
|
|
235
|
-
*
|
|
236
|
-
*
|
|
237
|
-
*
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
1
|
+
import { TurboModuleRegistry, type TurboModule } from 'react-native';
|
|
2
|
+
|
|
3
|
+
/** Unified shape for all acceleration backends (QNN, NNAPI, XNNPACK, Core ML). */
|
|
4
|
+
export type AccelerationSupport = {
|
|
5
|
+
providerCompiled: boolean;
|
|
6
|
+
hasAccelerator: boolean;
|
|
7
|
+
canInit: boolean;
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
export interface Spec extends TurboModule {
|
|
11
|
+
/**
|
|
12
|
+
* Test method to verify sherpa-onnx native library is loaded.
|
|
13
|
+
*/
|
|
14
|
+
testSherpaInit(): Promise<string>;
|
|
15
|
+
|
|
16
|
+
// ==================== STT Methods ====================
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Initialize Speech-to-Text (STT) with model directory.
|
|
20
|
+
* Expects an absolute path (use resolveModelPath first for asset/file paths).
|
|
21
|
+
* @param instanceId - Unique ID for this engine instance (from createSTT)
|
|
22
|
+
* @param modelDir - Absolute path to model directory
|
|
23
|
+
* @param preferInt8 - Optional: true = prefer int8 models, false = prefer regular models, undefined = try int8 first (default)
|
|
24
|
+
* @param modelType - Optional: explicit model type ('transducer', 'nemo_transducer', 'paraformer', 'nemo_ctc', 'wenet_ctc', 'sense_voice', 'zipformer_ctc', 'whisper', 'funasr_nano', 'fire_red_asr', 'moonshine', 'dolphin', 'canary', 'omnilingual', 'medasr', 'telespeech_ctc', 'auto'), undefined = auto (default)
|
|
25
|
+
* @param debug - Optional: enable debug logging in native layer and sherpa-onnx (default: false)
|
|
26
|
+
* @param hotwordsFile - Optional: path to hotwords file (OfflineRecognizerConfig)
|
|
27
|
+
* @param hotwordsScore - Optional: hotwords score (default in Kotlin 1.5)
|
|
28
|
+
* @param numThreads - Optional: number of threads for inference (default in Kotlin: 1)
|
|
29
|
+
* @param provider - Optional: provider string e.g. 'cpu' (stored in config only)
|
|
30
|
+
* @param ruleFsts - Optional: path(s) to rule FSTs for ITN (comma-separated)
|
|
31
|
+
* @param ruleFars - Optional: path(s) to rule FARs for ITN (comma-separated)
|
|
32
|
+
* @param dither - Optional: dither for feature extraction (default 0)
|
|
33
|
+
* @param modelOptions - Optional: model-specific options (whisper, senseVoice, canary, funasrNano). Only the block for the loaded model type is applied.
|
|
34
|
+
* @param modelingUnit - Optional: 'cjkchar' | 'bpe' | 'cjkchar+bpe' for hotwords tokenization (OfflineModelConfig.modelingUnit)
|
|
35
|
+
* @param bpeVocab - Optional: path to BPE vocab file (OfflineModelConfig.bpeVocab), used when modelingUnit is bpe or cjkchar+bpe
|
|
36
|
+
* @returns Object with success boolean and array of detected models (each with type and modelDir)
|
|
37
|
+
*/
|
|
38
|
+
initializeStt(
|
|
39
|
+
instanceId: string,
|
|
40
|
+
modelDir: string,
|
|
41
|
+
preferInt8?: boolean,
|
|
42
|
+
modelType?: string,
|
|
43
|
+
debug?: boolean,
|
|
44
|
+
hotwordsFile?: string,
|
|
45
|
+
hotwordsScore?: number,
|
|
46
|
+
numThreads?: number,
|
|
47
|
+
provider?: string,
|
|
48
|
+
ruleFsts?: string,
|
|
49
|
+
ruleFars?: string,
|
|
50
|
+
dither?: number,
|
|
51
|
+
modelOptions?: Object,
|
|
52
|
+
modelingUnit?: string,
|
|
53
|
+
bpeVocab?: string
|
|
54
|
+
): Promise<{
|
|
55
|
+
success: boolean;
|
|
56
|
+
detectedModels: Array<{ type: string; modelDir: string }>;
|
|
57
|
+
modelType?: string;
|
|
58
|
+
decodingMethod?: string;
|
|
59
|
+
}>;
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Detect STT model type and structure without initializing the recognizer.
|
|
63
|
+
* Uses the same native file-based detection as initializeStt. Useful to show model-specific
|
|
64
|
+
* options before init or to query the type for a given path.
|
|
65
|
+
* @param modelDir - Absolute path to model directory (use resolveModelPath first for asset/file paths)
|
|
66
|
+
* @param preferInt8 - Optional: true = prefer int8, false = prefer regular, undefined = try int8 first
|
|
67
|
+
* @param modelType - Optional: explicit type or 'auto' (default)
|
|
68
|
+
* @returns Object with success, detectedModels (array of { type, modelDir }), and modelType (primary detected type)
|
|
69
|
+
*/
|
|
70
|
+
detectSttModel(
|
|
71
|
+
modelDir: string,
|
|
72
|
+
preferInt8?: boolean,
|
|
73
|
+
modelType?: string
|
|
74
|
+
): Promise<{
|
|
75
|
+
success: boolean;
|
|
76
|
+
detectedModels: Array<{ type: string; modelDir: string }>;
|
|
77
|
+
modelType?: string;
|
|
78
|
+
}>;
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Transcribe an audio file. Returns full recognition result (text, tokens, timestamps, lang, emotion, event, durations).
|
|
82
|
+
*/
|
|
83
|
+
transcribeFile(
|
|
84
|
+
instanceId: string,
|
|
85
|
+
filePath: string
|
|
86
|
+
): Promise<{
|
|
87
|
+
text: string;
|
|
88
|
+
tokens: string[];
|
|
89
|
+
timestamps: number[];
|
|
90
|
+
lang: string;
|
|
91
|
+
emotion: string;
|
|
92
|
+
event: string;
|
|
93
|
+
durations: number[];
|
|
94
|
+
}>;
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Transcribe from float PCM samples (e.g. from microphone). Same return type as transcribeFile.
|
|
98
|
+
*/
|
|
99
|
+
transcribeSamples(
|
|
100
|
+
instanceId: string,
|
|
101
|
+
samples: number[],
|
|
102
|
+
sampleRate: number
|
|
103
|
+
): Promise<{
|
|
104
|
+
text: string;
|
|
105
|
+
tokens: string[];
|
|
106
|
+
timestamps: number[];
|
|
107
|
+
lang: string;
|
|
108
|
+
emotion: string;
|
|
109
|
+
event: string;
|
|
110
|
+
durations: number[];
|
|
111
|
+
}>;
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Update recognizer config at runtime (decodingMethod, maxActivePaths, hotwordsFile, hotwordsScore, blankPenalty, ruleFsts, ruleFars).
|
|
115
|
+
*/
|
|
116
|
+
setSttConfig(instanceId: string, options: Object): Promise<void>;
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Release STT resources.
|
|
120
|
+
*/
|
|
121
|
+
unloadStt(instanceId: string): Promise<void>;
|
|
122
|
+
|
|
123
|
+
// ==================== TTS Methods ====================
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Initialize Text-to-Speech (TTS) with model directory.
|
|
127
|
+
* @param instanceId - Unique ID for this engine instance (from createTTS)
|
|
128
|
+
* @param modelDir - Absolute path to model directory
|
|
129
|
+
* @param modelType - Model type ('vits', 'matcha', 'kokoro', 'kitten', 'pocket', 'zipvoice', 'auto')
|
|
130
|
+
* @param numThreads - Number of threads for inference (default: 2)
|
|
131
|
+
* @param debug - Enable debug logging (default: false)
|
|
132
|
+
* @param noiseScale - Optional noise scale (VITS/Matcha)
|
|
133
|
+
* @param noiseScaleW - Optional noise scale W (VITS)
|
|
134
|
+
* @param lengthScale - Optional length scale (VITS/Matcha/Kokoro/Kitten)
|
|
135
|
+
* @param ruleFsts - Optional path(s) to rule FSTs for TTS (OfflineTtsConfig)
|
|
136
|
+
* @param ruleFars - Optional path(s) to rule FARs for TTS (OfflineTtsConfig)
|
|
137
|
+
* @param maxNumSentences - Optional max sentences per callback (default: 1)
|
|
138
|
+
* @param silenceScale - Optional silence scale on config (default: 0.2)
|
|
139
|
+
* @param provider - Optional execution provider (e.g. 'cpu', 'coreml', 'xnnpack'; default: 'cpu')
|
|
140
|
+
* @returns Object with success boolean and array of detected models (each with type and modelDir)
|
|
141
|
+
*/
|
|
142
|
+
initializeTts(
|
|
143
|
+
instanceId: string,
|
|
144
|
+
modelDir: string,
|
|
145
|
+
modelType: string,
|
|
146
|
+
numThreads: number,
|
|
147
|
+
debug: boolean,
|
|
148
|
+
noiseScale?: number,
|
|
149
|
+
noiseScaleW?: number,
|
|
150
|
+
lengthScale?: number,
|
|
151
|
+
ruleFsts?: string,
|
|
152
|
+
ruleFars?: string,
|
|
153
|
+
maxNumSentences?: number,
|
|
154
|
+
silenceScale?: number,
|
|
155
|
+
provider?: string
|
|
156
|
+
): Promise<{
|
|
157
|
+
success: boolean;
|
|
158
|
+
detectedModels: Array<{ type: string; modelDir: string }>;
|
|
159
|
+
sampleRate: number;
|
|
160
|
+
numSpeakers: number;
|
|
161
|
+
}>;
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* Detect TTS model type and structure without initializing the engine.
|
|
165
|
+
* Uses the same native file-based detection as initializeTts.
|
|
166
|
+
* @param modelDir - Absolute path to model directory (use resolveModelPath first for asset/file paths)
|
|
167
|
+
* @param modelType - Optional: explicit type or 'auto' (default)
|
|
168
|
+
* @returns Object with success, detectedModels (array of { type, modelDir }), and modelType (primary detected type)
|
|
169
|
+
*/
|
|
170
|
+
detectTtsModel(
|
|
171
|
+
modelDir: string,
|
|
172
|
+
modelType?: string
|
|
173
|
+
): Promise<{
|
|
174
|
+
success: boolean;
|
|
175
|
+
detectedModels: Array<{ type: string; modelDir: string }>;
|
|
176
|
+
modelType?: string;
|
|
177
|
+
}>;
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Update TTS model parameters by re-initializing with stored config.
|
|
181
|
+
* @param instanceId - Unique ID for this engine instance
|
|
182
|
+
* @param noiseScale - Optional noise scale override
|
|
183
|
+
* @param noiseScaleW - Optional noise scale W override
|
|
184
|
+
* @param lengthScale - Optional length scale override
|
|
185
|
+
* @returns Object with success boolean and array of detected models
|
|
186
|
+
*/
|
|
187
|
+
updateTtsParams(
|
|
188
|
+
instanceId: string,
|
|
189
|
+
noiseScale?: number | null,
|
|
190
|
+
noiseScaleW?: number | null,
|
|
191
|
+
lengthScale?: number | null
|
|
192
|
+
): Promise<{
|
|
193
|
+
success: boolean;
|
|
194
|
+
detectedModels: Array<{ type: string; modelDir: string }>;
|
|
195
|
+
sampleRate: number;
|
|
196
|
+
numSpeakers: number;
|
|
197
|
+
}>;
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* Generate speech from text.
|
|
201
|
+
* @param instanceId - Unique ID for this engine instance
|
|
202
|
+
* @param text - Text to convert to speech
|
|
203
|
+
* @param options - Generation options (sid, speed, referenceAudio, referenceText, numSteps, silenceScale, extra)
|
|
204
|
+
* @returns Object with { samples: number[], sampleRate: number }
|
|
205
|
+
*/
|
|
206
|
+
generateTts(
|
|
207
|
+
instanceId: string,
|
|
208
|
+
text: string,
|
|
209
|
+
options: Object
|
|
210
|
+
): Promise<{
|
|
211
|
+
samples: number[];
|
|
212
|
+
sampleRate: number;
|
|
213
|
+
}>;
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* Generate speech with subtitle/timestamp metadata.
|
|
217
|
+
* @param instanceId - Unique ID for this engine instance
|
|
218
|
+
* @param text - Text to convert to speech
|
|
219
|
+
* @param options - Generation options (sid, speed, referenceAudio, referenceText, numSteps, silenceScale, extra)
|
|
220
|
+
* @returns Object with samples, sampleRate, subtitles, and estimated flag
|
|
221
|
+
*/
|
|
222
|
+
generateTtsWithTimestamps(
|
|
223
|
+
instanceId: string,
|
|
224
|
+
text: string,
|
|
225
|
+
options: Object
|
|
226
|
+
): Promise<{
|
|
227
|
+
samples: number[];
|
|
228
|
+
sampleRate: number;
|
|
229
|
+
subtitles: Array<{ text: string; start: number; end: number }>;
|
|
230
|
+
estimated: boolean;
|
|
231
|
+
}>;
|
|
232
|
+
|
|
233
|
+
/**
|
|
234
|
+
* Generate speech in streaming mode (emits chunk events).
|
|
235
|
+
* @param instanceId - Unique ID for this engine instance
|
|
236
|
+
* @param text - Text to convert to speech
|
|
237
|
+
* @param options - Generation options (sid, speed, referenceAudio, referenceText, numSteps, silenceScale, extra)
|
|
238
|
+
*/
|
|
239
|
+
generateTtsStream(
|
|
240
|
+
instanceId: string,
|
|
241
|
+
text: string,
|
|
242
|
+
options: Object
|
|
243
|
+
): Promise<void>;
|
|
244
|
+
|
|
245
|
+
/**
|
|
246
|
+
* Cancel an ongoing streaming TTS generation.
|
|
247
|
+
* @param instanceId - Unique ID for this engine instance
|
|
248
|
+
*/
|
|
249
|
+
cancelTtsStream(instanceId: string): Promise<void>;
|
|
250
|
+
|
|
251
|
+
/**
|
|
252
|
+
* Start PCM playback for streaming TTS.
|
|
253
|
+
* @param instanceId - Unique ID for this engine instance
|
|
254
|
+
* @param sampleRate - Sample rate in Hz
|
|
255
|
+
* @param channels - Number of channels (1 = mono)
|
|
256
|
+
*/
|
|
257
|
+
startTtsPcmPlayer(
|
|
258
|
+
instanceId: string,
|
|
259
|
+
sampleRate: number,
|
|
260
|
+
channels: number
|
|
261
|
+
): Promise<void>;
|
|
262
|
+
|
|
263
|
+
/**
|
|
264
|
+
* Write PCM samples to the streaming TTS player.
|
|
265
|
+
* @param instanceId - Unique ID for this engine instance
|
|
266
|
+
* @param samples - Float PCM samples in range [-1.0, 1.0]
|
|
267
|
+
*/
|
|
268
|
+
writeTtsPcmChunk(instanceId: string, samples: number[]): Promise<void>;
|
|
269
|
+
|
|
270
|
+
/**
|
|
271
|
+
* Stop PCM playback for streaming TTS.
|
|
272
|
+
* @param instanceId - Unique ID for this engine instance
|
|
273
|
+
*/
|
|
274
|
+
stopTtsPcmPlayer(instanceId: string): Promise<void>;
|
|
275
|
+
|
|
276
|
+
/**
|
|
277
|
+
* Get the sample rate of the initialized TTS model.
|
|
278
|
+
* @param instanceId - Unique ID for this engine instance
|
|
279
|
+
* @returns Sample rate in Hz
|
|
280
|
+
*/
|
|
281
|
+
getTtsSampleRate(instanceId: string): Promise<number>;
|
|
282
|
+
|
|
283
|
+
/**
|
|
284
|
+
* Get the number of speakers/voices available in the model.
|
|
285
|
+
* @param instanceId - Unique ID for this engine instance
|
|
286
|
+
* @returns Number of speakers (0 or 1 for single-speaker models)
|
|
287
|
+
*/
|
|
288
|
+
getTtsNumSpeakers(instanceId: string): Promise<number>;
|
|
289
|
+
|
|
290
|
+
/**
|
|
291
|
+
* Release TTS resources.
|
|
292
|
+
* @param instanceId - Unique ID for this engine instance
|
|
293
|
+
*/
|
|
294
|
+
unloadTts(instanceId: string): Promise<void>;
|
|
295
|
+
|
|
296
|
+
/**
|
|
297
|
+
* Save TTS audio samples to a WAV file.
|
|
298
|
+
* @param samples - Audio samples array
|
|
299
|
+
* @param sampleRate - Sample rate in Hz
|
|
300
|
+
* @param filePath - Absolute path where to save the WAV file
|
|
301
|
+
* @returns The file path where audio was saved
|
|
302
|
+
*/
|
|
303
|
+
saveTtsAudioToFile(
|
|
304
|
+
samples: number[],
|
|
305
|
+
sampleRate: number,
|
|
306
|
+
filePath: string
|
|
307
|
+
): Promise<string>;
|
|
308
|
+
|
|
309
|
+
/**
|
|
310
|
+
* Save TTS audio samples to a WAV file via Android SAF content URI.
|
|
311
|
+
* @param samples - Audio samples array
|
|
312
|
+
* @param sampleRate - Sample rate in Hz
|
|
313
|
+
* @param directoryUri - Directory content URI (tree or document)
|
|
314
|
+
* @param filename - Desired file name (e.g., tts_123.wav)
|
|
315
|
+
* @returns The content URI of the saved file
|
|
316
|
+
*/
|
|
317
|
+
saveTtsAudioToContentUri(
|
|
318
|
+
samples: number[],
|
|
319
|
+
sampleRate: number,
|
|
320
|
+
directoryUri: string,
|
|
321
|
+
filename: string
|
|
322
|
+
): Promise<string>;
|
|
323
|
+
|
|
324
|
+
/**
|
|
325
|
+
* Save a text file via Android SAF content URI.
|
|
326
|
+
* @param text - Text content to write
|
|
327
|
+
* @param directoryUri - Directory content URI (tree or document)
|
|
328
|
+
* @param filename - Desired file name (e.g., tts_123.srt)
|
|
329
|
+
* @param mimeType - MIME type (e.g., application/x-subrip)
|
|
330
|
+
* @returns The content URI of the saved file
|
|
331
|
+
*/
|
|
332
|
+
saveTtsTextToContentUri(
|
|
333
|
+
text: string,
|
|
334
|
+
directoryUri: string,
|
|
335
|
+
filename: string,
|
|
336
|
+
mimeType: string
|
|
337
|
+
): Promise<string>;
|
|
338
|
+
|
|
339
|
+
/**
|
|
340
|
+
* Copy a SAF content URI to a cache file for local playback.
|
|
341
|
+
* @param fileUri - Content URI of the saved WAV file
|
|
342
|
+
* @param filename - Desired cache filename
|
|
343
|
+
* @returns Absolute file path to the cached copy
|
|
344
|
+
*/
|
|
345
|
+
copyTtsContentUriToCache(fileUri: string, filename: string): Promise<string>;
|
|
346
|
+
|
|
347
|
+
/**
|
|
348
|
+
* Share a TTS audio file (file path or content URI).
|
|
349
|
+
* @param fileUri - File path or content URI
|
|
350
|
+
* @param mimeType - MIME type (e.g., audio/wav)
|
|
351
|
+
*/
|
|
352
|
+
shareTtsAudio(fileUri: string, mimeType: string): Promise<void>;
|
|
353
|
+
|
|
354
|
+
// ==================== Helper - Assets ====================
|
|
355
|
+
|
|
356
|
+
/**
|
|
357
|
+
* Resolve model path based on configuration.
|
|
358
|
+
* Handles asset paths, file system paths, and auto-detection.
|
|
359
|
+
* Returns an absolute path that can be used by native code.
|
|
360
|
+
*
|
|
361
|
+
* @param config - Object with 'type' ('asset' | 'file' | 'auto') and 'path' (string)
|
|
362
|
+
*/
|
|
363
|
+
resolveModelPath(config: { type: string; path: string }): Promise<string>;
|
|
364
|
+
|
|
365
|
+
/**
|
|
366
|
+
* List all model folders in the assets/models directory.
|
|
367
|
+
* Scans the platform-specific model directory and returns folder names.
|
|
368
|
+
*
|
|
369
|
+
* @returns Array of model info objects found in assets/models/ (Android) or bundle models/ (iOS)
|
|
370
|
+
*
|
|
371
|
+
* @example
|
|
372
|
+
* ```typescript
|
|
373
|
+
* const folders = await listAssetModels();
|
|
374
|
+
* // Returns: [{ folder: 'sherpa-onnx-streaming-zipformer-en-2023-06-26', hint: 'stt' }, { folder: 'sherpa-onnx-matcha-icefall-en_US-ljspeech', hint: 'tts' }]
|
|
375
|
+
*
|
|
376
|
+
* // Then use with resolveModelPath and initialize:
|
|
377
|
+
* for (const model of folders) {
|
|
378
|
+
* const path = await resolveModelPath({ type: 'asset', path: `models/${model.folder}` });
|
|
379
|
+
* const result = await initializeStt(path);
|
|
380
|
+
* if (result.success) {
|
|
381
|
+
* console.log(`Found models in ${model.folder}:`, result.detectedModels);
|
|
382
|
+
* }
|
|
383
|
+
* }
|
|
384
|
+
* ```
|
|
385
|
+
*/
|
|
386
|
+
listAssetModels(): Promise<
|
|
387
|
+
Array<{ folder: string; hint: 'stt' | 'tts' | 'unknown' }>
|
|
388
|
+
>;
|
|
389
|
+
|
|
390
|
+
/**
|
|
391
|
+
* List model folders under a specific filesystem path.
|
|
392
|
+
* When recursive is true, returns relative folder paths under the base path.
|
|
393
|
+
*/
|
|
394
|
+
listModelsAtPath(
|
|
395
|
+
path: string,
|
|
396
|
+
recursive: boolean
|
|
397
|
+
): Promise<Array<{ folder: string; hint: 'stt' | 'tts' | 'unknown' }>>;
|
|
398
|
+
|
|
399
|
+
/**
|
|
400
|
+
* **Play Asset Delivery (PAD):** Returns the filesystem path to the models directory
|
|
401
|
+
* of an Android asset pack, or null if the pack is not available (e.g. not installed).
|
|
402
|
+
* Use this to list and load models that are delivered via PAD instead of bundled app assets.
|
|
403
|
+
*/
|
|
404
|
+
getAssetPackPath(packName: string): Promise<string | null>;
|
|
405
|
+
|
|
406
|
+
// ==================== Helper - Extraction ====================
|
|
407
|
+
|
|
408
|
+
/**
|
|
409
|
+
* Extract a .tar.bz2 archive to a target folder.
|
|
410
|
+
* Returns { success, path } or { success, reason }.
|
|
411
|
+
*/
|
|
412
|
+
extractTarBz2(
|
|
413
|
+
sourcePath: string,
|
|
414
|
+
targetPath: string,
|
|
415
|
+
force: boolean
|
|
416
|
+
): Promise<{
|
|
417
|
+
success: boolean;
|
|
418
|
+
path?: string;
|
|
419
|
+
sha256?: string;
|
|
420
|
+
reason?: string;
|
|
421
|
+
}>;
|
|
422
|
+
|
|
423
|
+
/**
|
|
424
|
+
* Cancel any in-progress tar.bz2 extraction.
|
|
425
|
+
*/
|
|
426
|
+
cancelExtractTarBz2(): Promise<void>;
|
|
427
|
+
|
|
428
|
+
/**
|
|
429
|
+
* Compute SHA-256 of a file and return the hex digest.
|
|
430
|
+
*/
|
|
431
|
+
computeFileSha256(filePath: string): Promise<string>;
|
|
432
|
+
|
|
433
|
+
// ==================== Helper - Audio conversion ====================
|
|
434
|
+
|
|
435
|
+
/**
|
|
436
|
+
* Convert arbitrary audio file to requested format (e.g. "mp3", "flac", "wav").
|
|
437
|
+
* Requires FFmpeg prebuilts when called on Android.
|
|
438
|
+
* For MP3 (libshine), outputSampleRateHz can be 32000, 44100, or 48000; 0 or omitted = 44100.
|
|
439
|
+
* WAV output is always 16 kHz mono (sherpa-onnx). Resolves when conversion succeeds, rejects with an error message on failure.
|
|
440
|
+
*/
|
|
441
|
+
convertAudioToFormat(
|
|
442
|
+
inputPath: string,
|
|
443
|
+
outputPath: string,
|
|
444
|
+
format: string,
|
|
445
|
+
outputSampleRateHz?: number
|
|
446
|
+
): Promise<void>;
|
|
447
|
+
|
|
448
|
+
/**
|
|
449
|
+
* Convert any supported audio file to WAV 16 kHz mono 16-bit PCM.
|
|
450
|
+
* Requires FFmpeg prebuilts when called on Android.
|
|
451
|
+
*/
|
|
452
|
+
convertAudioToWav16k(inputPath: string, outputPath: string): Promise<void>;
|
|
453
|
+
|
|
454
|
+
// ==================== Execution Provider Methods ====================
|
|
455
|
+
|
|
456
|
+
/**
|
|
457
|
+
* Return the list of available ONNX Runtime execution providers (e.g. "CPU", "NNAPI", "QNN", "XNNPACK").
|
|
458
|
+
* Requires the ORT Java bridge (libonnxruntime4j_jni.so + OrtEnvironment class) from the onnxruntime AAR.
|
|
459
|
+
*/
|
|
460
|
+
getAvailableProviders(): Promise<string[]>;
|
|
461
|
+
|
|
462
|
+
// ==================== Acceleration support (unified format) ====================
|
|
463
|
+
|
|
464
|
+
/**
|
|
465
|
+
* Unified acceleration support: providerCompiled (ORT EP built in), hasAccelerator (NPU/ANE present), canInit (session with EP works).
|
|
466
|
+
* All get*Support methods return this shape. Optional modelBase64: if omitted, SDK uses embedded test model for canInit.
|
|
467
|
+
*/
|
|
468
|
+
getQnnSupport(modelBase64?: string): Promise<AccelerationSupport>;
|
|
469
|
+
getNnapiSupport(modelBase64?: string): Promise<AccelerationSupport>;
|
|
470
|
+
getXnnpackSupport(modelBase64?: string): Promise<AccelerationSupport>;
|
|
471
|
+
getCoreMlSupport(modelBase64?: string): Promise<AccelerationSupport>;
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
export default TurboModuleRegistry.getEnforcing<Spec>('SherpaOnnx');
|