react-native-sherpa-onnx 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +232 -236
- package/SherpaOnnx.podspec +68 -64
- package/android/build.gradle +182 -192
- package/android/codegen.gradle +57 -0
- package/android/prebuilt-download.gradle +428 -0
- package/android/prebuilt-versions.gradle +43 -0
- package/android/proguard-rules.pro +10 -0
- package/android/src/main/assets/testModels/add_mul_add.onnx +28 -0
- package/android/src/main/assets/testModels/nnapi_internal_uint8_support.onnx +0 -0
- package/android/src/main/assets/testModels/qnn_multi_ctx_embed.onnx +0 -0
- package/android/src/main/cpp/CMakeLists.txt +166 -129
- package/android/src/main/cpp/CMakePresets.json +54 -0
- package/android/src/main/cpp/crypto/sha256.cpp +174 -0
- package/android/src/main/cpp/crypto/sha256.h +16 -0
- package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.cpp +404 -0
- package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.h +56 -0
- package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-jni.cpp +181 -0
- package/android/src/main/cpp/jni/audio/sherpa-onnx-audio-convert-jni.cpp +888 -0
- package/{ios → android/src/main/cpp/jni/model_detect}/sherpa-onnx-common.h +18 -18
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.cpp +86 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.h +20 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.cpp +423 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.h +55 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-stt.cpp +399 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +238 -0
- package/{ios → android/src/main/cpp/jni/model_detect}/sherpa-onnx-model-detect.h +122 -89
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.cpp +99 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.h +16 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.cpp +78 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.h +16 -0
- package/android/src/main/cpp/jni/module/sherpa-onnx-module-jni.cpp +190 -0
- package/android/src/main/cpp/jni/tts/sherpa-onnx-tts-zipvoice-jni.cpp +301 -0
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxArchiveHelper.kt +94 -0
- package/android/src/main/java/com/sherpaonnx/{SherpaOnnxCoreHelper.kt → SherpaOnnxAssetHelper.kt} +350 -236
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +791 -483
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxSttHelper.kt +699 -109
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +1123 -668
- package/android/src/main/java/com/sherpaonnx/ZipvoiceTtsWrapper.kt +187 -0
- package/ios/SherpaOnnx+Assets.h +11 -0
- package/ios/SherpaOnnx+Assets.mm +325 -0
- package/ios/SherpaOnnx+STT.mm +455 -118
- package/ios/SherpaOnnx+TTS.mm +1101 -712
- package/ios/SherpaOnnx.h +17 -6
- package/ios/SherpaOnnx.mm +206 -311
- package/ios/SherpaOnnx.xcconfig +19 -19
- package/ios/SherpaOnnxCoreMLHelper.swift +24 -0
- package/ios/archive/sherpa-onnx-archive-helper.h +21 -0
- package/ios/archive/sherpa-onnx-archive-helper.mm +296 -0
- package/ios/libarchive_darwin_config.h +153 -0
- package/{android/src/main/cpp/jni → ios/model_detect}/sherpa-onnx-common.h +18 -18
- package/ios/model_detect/sherpa-onnx-model-detect-helper.h +49 -0
- package/ios/model_detect/sherpa-onnx-model-detect-helper.mm +210 -0
- package/ios/model_detect/sherpa-onnx-model-detect-stt.mm +344 -0
- package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +201 -0
- package/{android/src/main/cpp/jni → ios/model_detect}/sherpa-onnx-model-detect.h +117 -89
- package/ios/scripts/patch-libarchive-includes.sh +61 -0
- package/ios/scripts/setup-ios-libarchive.sh +98 -0
- package/ios/stt/sherpa-onnx-stt-wrapper.h +129 -0
- package/ios/stt/sherpa-onnx-stt-wrapper.mm +523 -0
- package/ios/{sherpa-onnx-tts-wrapper.h → tts/sherpa-onnx-tts-wrapper.h} +90 -85
- package/ios/{sherpa-onnx-tts-wrapper.mm → tts/sherpa-onnx-tts-wrapper.mm} +376 -345
- package/lib/module/NativeSherpaOnnx.js +3 -0
- package/lib/module/NativeSherpaOnnx.js.map +1 -1
- package/lib/module/audio/index.js +22 -0
- package/lib/module/audio/index.js.map +1 -0
- package/lib/module/diarization/index.js +1 -1
- package/lib/module/diarization/index.js.map +1 -1
- package/lib/module/download/ModelDownloadManager.js +918 -0
- package/lib/module/download/ModelDownloadManager.js.map +1 -0
- package/lib/module/download/extractTarBz2.js +53 -0
- package/lib/module/download/extractTarBz2.js.map +1 -0
- package/lib/module/download/index.js +6 -0
- package/lib/module/download/index.js.map +1 -0
- package/lib/module/download/validation.js +178 -0
- package/lib/module/download/validation.js.map +1 -0
- package/lib/module/enhancement/index.js +1 -1
- package/lib/module/enhancement/index.js.map +1 -1
- package/lib/module/index.js +41 -3
- package/lib/module/index.js.map +1 -1
- package/lib/module/separation/index.js +1 -1
- package/lib/module/separation/index.js.map +1 -1
- package/lib/module/stt/index.js +127 -60
- package/lib/module/stt/index.js.map +1 -1
- package/lib/module/stt/sttModelLanguages.js +512 -0
- package/lib/module/stt/sttModelLanguages.js.map +1 -0
- package/lib/module/stt/types.js +53 -1
- package/lib/module/stt/types.js.map +1 -1
- package/lib/module/tts/index.js +216 -289
- package/lib/module/tts/index.js.map +1 -1
- package/lib/module/tts/types.js +86 -1
- package/lib/module/tts/types.js.map +1 -1
- package/lib/module/types.js.map +1 -1
- package/lib/module/utils.js +86 -73
- package/lib/module/utils.js.map +1 -1
- package/lib/module/vad/index.js +1 -1
- package/lib/module/vad/index.js.map +1 -1
- package/lib/typescript/src/NativeSherpaOnnx.d.ts +192 -38
- package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
- package/lib/typescript/src/audio/index.d.ts +13 -0
- package/lib/typescript/src/audio/index.d.ts.map +1 -0
- package/lib/typescript/src/diarization/index.d.ts +3 -2
- package/lib/typescript/src/diarization/index.d.ts.map +1 -1
- package/lib/typescript/src/download/ModelDownloadManager.d.ts +108 -0
- package/lib/typescript/src/download/ModelDownloadManager.d.ts.map +1 -0
- package/lib/typescript/src/download/extractTarBz2.d.ts +14 -0
- package/lib/typescript/src/download/extractTarBz2.d.ts.map +1 -0
- package/lib/typescript/src/download/index.d.ts +7 -0
- package/lib/typescript/src/download/index.d.ts.map +1 -0
- package/lib/typescript/src/download/validation.d.ts +57 -0
- package/lib/typescript/src/download/validation.d.ts.map +1 -0
- package/lib/typescript/src/enhancement/index.d.ts +3 -2
- package/lib/typescript/src/enhancement/index.d.ts.map +1 -1
- package/lib/typescript/src/index.d.ts +26 -2
- package/lib/typescript/src/index.d.ts.map +1 -1
- package/lib/typescript/src/separation/index.d.ts +3 -2
- package/lib/typescript/src/separation/index.d.ts.map +1 -1
- package/lib/typescript/src/stt/index.d.ts +31 -43
- package/lib/typescript/src/stt/index.d.ts.map +1 -1
- package/lib/typescript/src/stt/sttModelLanguages.d.ts +52 -0
- package/lib/typescript/src/stt/sttModelLanguages.d.ts.map +1 -0
- package/lib/typescript/src/stt/types.d.ts +196 -9
- package/lib/typescript/src/stt/types.d.ts.map +1 -1
- package/lib/typescript/src/tts/index.d.ts +25 -211
- package/lib/typescript/src/tts/index.d.ts.map +1 -1
- package/lib/typescript/src/tts/types.d.ts +148 -25
- package/lib/typescript/src/tts/types.d.ts.map +1 -1
- package/lib/typescript/src/types.d.ts +0 -32
- package/lib/typescript/src/types.d.ts.map +1 -1
- package/lib/typescript/src/utils.d.ts +28 -13
- package/lib/typescript/src/utils.d.ts.map +1 -1
- package/lib/typescript/src/vad/index.d.ts +3 -2
- package/lib/typescript/src/vad/index.d.ts.map +1 -1
- package/package.json +250 -222
- package/scripts/check-qnn-support.sh +78 -0
- package/scripts/setup-ios-framework.sh +379 -282
- package/src/NativeSherpaOnnx.ts +474 -251
- package/src/audio/index.ts +32 -0
- package/src/diarization/index.ts +4 -2
- package/src/download/ModelDownloadManager.ts +1325 -0
- package/src/download/extractTarBz2.ts +78 -0
- package/src/download/index.ts +43 -0
- package/src/download/validation.ts +279 -0
- package/src/enhancement/index.ts +4 -2
- package/src/index.tsx +78 -27
- package/src/separation/index.ts +4 -2
- package/src/stt/index.ts +249 -89
- package/src/stt/sttModelLanguages.ts +237 -0
- package/src/stt/types.ts +263 -9
- package/src/tts/index.ts +470 -458
- package/src/tts/types.ts +373 -218
- package/src/types.ts +0 -44
- package/src/utils.ts +145 -131
- package/src/vad/index.ts +4 -2
- package/third_party/ffmpeg_prebuilt/ANDROID_RELEASE_TAG +1 -0
- package/third_party/libarchive_prebuilt/ANDROID_RELEASE_TAG +1 -0
- package/third_party/libarchive_prebuilt/IOS_RELEASE_TAG +1 -0
- package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -0
- package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -0
- package/android/src/main/cpp/include/sherpa-onnx/c-api/c-api.h +0 -1918
- package/android/src/main/cpp/include/sherpa-onnx/c-api/cxx-api.h +0 -841
- package/android/src/main/cpp/jni/sherpa-onnx-model-detect.cpp +0 -541
- package/android/src/main/cpp/jni/sherpa-onnx-stt-jni.cpp +0 -336
- package/android/src/main/cpp/jni/sherpa-onnx-stt-wrapper.cpp +0 -222
- package/android/src/main/cpp/jni/sherpa-onnx-stt-wrapper.h +0 -68
- package/android/src/main/cpp/jni/sherpa-onnx-tts-jni.cpp +0 -823
- package/android/src/main/cpp/jni/sherpa-onnx-tts-wrapper.cpp +0 -387
- package/android/src/main/cpp/jni/sherpa-onnx-tts-wrapper.h +0 -147
- package/ios/Frameworks/sherpa_onnx.xcframework.zip +0 -0
- package/ios/include/sherpa-onnx/c-api/c-api.h +0 -1918
- package/ios/include/sherpa-onnx/c-api/cxx-api.h +0 -841
- package/ios/sherpa-onnx-model-detect.mm +0 -441
- package/ios/sherpa-onnx-stt-wrapper.h +0 -48
- package/ios/sherpa-onnx-stt-wrapper.mm +0 -201
- package/scripts/copy-headers.js +0 -184
- package/scripts/setup-assets.js +0 -323
package/ios/SherpaOnnx+TTS.mm
CHANGED
|
@@ -1,712 +1,1101 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
#
|
|
9
|
-
#
|
|
10
|
-
#
|
|
11
|
-
#
|
|
12
|
-
#
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
}
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
}
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
}
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
std::
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
}
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
}
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
}
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
{
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
}
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
{
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
{
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
1
|
+
/**
|
|
2
|
+
* SherpaOnnx+TTS.mm
|
|
3
|
+
*
|
|
4
|
+
* Purpose: TTS (text-to-speech) TurboModule methods: createTTS, releaseTTS, generateTTS, and event
|
|
5
|
+
* emission. Uses sherpa-onnx-tts-wrapper for native synthesis and sherpa-onnx-model-detect for model detection.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#import "SherpaOnnx.h"
|
|
9
|
+
#import <React/RCTLog.h>
|
|
10
|
+
#import <React/RCTUtils.h>
|
|
11
|
+
#import <UIKit/UIKit.h>
|
|
12
|
+
#import <AVFoundation/AVFoundation.h>
|
|
13
|
+
|
|
14
|
+
#include "sherpa-onnx-tts-wrapper.h"
|
|
15
|
+
#include "sherpa-onnx-model-detect.h"
|
|
16
|
+
#include <atomic>
|
|
17
|
+
#include <condition_variable>
|
|
18
|
+
#include <memory>
|
|
19
|
+
#include <mutex>
|
|
20
|
+
#include <sstream>
|
|
21
|
+
#include <string>
|
|
22
|
+
#include <unordered_map>
|
|
23
|
+
#include <vector>
|
|
24
|
+
#include <chrono>
|
|
25
|
+
|
|
26
|
+
struct TtsInstanceState {
|
|
27
|
+
std::unique_ptr<sherpaonnx::TtsWrapper> wrapper;
|
|
28
|
+
std::atomic<bool> streamRunning{false};
|
|
29
|
+
std::atomic<bool> streamCancelled{false};
|
|
30
|
+
__strong AVAudioEngine *engine = nil;
|
|
31
|
+
__strong AVAudioPlayerNode *player = nil;
|
|
32
|
+
__strong AVAudioFormat *format = nil;
|
|
33
|
+
__strong NSString *modelDir = nil;
|
|
34
|
+
__strong NSString *modelType = nil;
|
|
35
|
+
int32_t numThreads = 2;
|
|
36
|
+
BOOL debug = NO;
|
|
37
|
+
__strong NSNumber *noiseScale = nil;
|
|
38
|
+
__strong NSNumber *noiseScaleW = nil;
|
|
39
|
+
__strong NSNumber *lengthScale = nil;
|
|
40
|
+
__strong NSString *ruleFsts = nil;
|
|
41
|
+
__strong NSString *ruleFars = nil;
|
|
42
|
+
__strong NSNumber *maxNumSentences = nil;
|
|
43
|
+
__strong NSNumber *silenceScale = nil;
|
|
44
|
+
__strong NSString *provider = nil;
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
static std::unordered_map<std::string, std::shared_ptr<TtsInstanceState>> g_tts_instances;
|
|
48
|
+
static std::mutex g_tts_mutex;
|
|
49
|
+
static std::condition_variable g_tts_stream_cv;
|
|
50
|
+
|
|
51
|
+
static NSString *ttsModelKindToNSString(sherpaonnx::TtsModelKind kind) {
|
|
52
|
+
using K = sherpaonnx::TtsModelKind;
|
|
53
|
+
switch (kind) {
|
|
54
|
+
case K::kVits: return @"vits";
|
|
55
|
+
case K::kMatcha: return @"matcha";
|
|
56
|
+
case K::kKokoro: return @"kokoro";
|
|
57
|
+
case K::kKitten: return @"kitten";
|
|
58
|
+
case K::kZipvoice: return @"zipvoice";
|
|
59
|
+
default: return @"unknown";
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
namespace {
|
|
64
|
+
std::vector<std::string> SplitTtsTokens(const std::string &text) {
|
|
65
|
+
std::vector<std::string> tokens;
|
|
66
|
+
std::istringstream iss(text);
|
|
67
|
+
std::string token;
|
|
68
|
+
while (iss >> token) {
|
|
69
|
+
tokens.push_back(token);
|
|
70
|
+
}
|
|
71
|
+
if (tokens.empty() && !text.empty()) {
|
|
72
|
+
tokens.push_back(text);
|
|
73
|
+
}
|
|
74
|
+
return tokens;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
@implementation SherpaOnnx (TTS)
|
|
79
|
+
|
|
80
|
+
- (void)initializeTts:(NSString *)instanceId
|
|
81
|
+
modelDir:(NSString *)modelDir
|
|
82
|
+
modelType:(NSString *)modelType
|
|
83
|
+
numThreads:(double)numThreads
|
|
84
|
+
debug:(BOOL)debug
|
|
85
|
+
noiseScale:(NSNumber *)noiseScale
|
|
86
|
+
noiseScaleW:(NSNumber *)noiseScaleW
|
|
87
|
+
lengthScale:(NSNumber *)lengthScale
|
|
88
|
+
ruleFsts:(NSString *)ruleFsts
|
|
89
|
+
ruleFars:(NSString *)ruleFars
|
|
90
|
+
maxNumSentences:(NSNumber *)maxNumSentences
|
|
91
|
+
silenceScale:(NSNumber *)silenceScale
|
|
92
|
+
provider:(NSString *)provider
|
|
93
|
+
resolve:(RCTPromiseResolveBlock)resolve
|
|
94
|
+
reject:(RCTPromiseRejectBlock)reject
|
|
95
|
+
{
|
|
96
|
+
if (instanceId == nil || [instanceId length] == 0) {
|
|
97
|
+
reject(@"TTS_INIT_ERROR", @"instanceId is required", nil);
|
|
98
|
+
return;
|
|
99
|
+
}
|
|
100
|
+
std::string instanceIdStr = [instanceId UTF8String];
|
|
101
|
+
RCTLogInfo(@"Initializing TTS instance %@ with modelDir: %@, modelType: %@", instanceId, modelDir, modelType);
|
|
102
|
+
|
|
103
|
+
@try {
|
|
104
|
+
std::lock_guard<std::mutex> lock(g_tts_mutex);
|
|
105
|
+
auto it = g_tts_instances.find(instanceIdStr);
|
|
106
|
+
if (it == g_tts_instances.end()) {
|
|
107
|
+
g_tts_instances[instanceIdStr] = std::make_shared<TtsInstanceState>();
|
|
108
|
+
}
|
|
109
|
+
TtsInstanceState *inst = g_tts_instances[instanceIdStr].get();
|
|
110
|
+
if (inst->wrapper == nullptr) {
|
|
111
|
+
inst->wrapper = std::make_unique<sherpaonnx::TtsWrapper>();
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
std::string modelDirStr = [modelDir UTF8String];
|
|
115
|
+
std::string modelTypeStr = [modelType UTF8String];
|
|
116
|
+
|
|
117
|
+
std::optional<float> noiseScaleOpt = std::nullopt;
|
|
118
|
+
std::optional<float> noiseScaleWOpt = std::nullopt;
|
|
119
|
+
std::optional<float> lengthScaleOpt = std::nullopt;
|
|
120
|
+
if (noiseScale != nil) {
|
|
121
|
+
noiseScaleOpt = [noiseScale floatValue];
|
|
122
|
+
}
|
|
123
|
+
if (noiseScaleW != nil) {
|
|
124
|
+
noiseScaleWOpt = [noiseScaleW floatValue];
|
|
125
|
+
}
|
|
126
|
+
if (lengthScale != nil) {
|
|
127
|
+
lengthScaleOpt = [lengthScale floatValue];
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
std::optional<std::string> ruleFstsOpt = std::nullopt;
|
|
131
|
+
std::optional<std::string> ruleFarsOpt = std::nullopt;
|
|
132
|
+
std::optional<int32_t> maxNumSentencesOpt = std::nullopt;
|
|
133
|
+
std::optional<float> silenceScaleOpt = std::nullopt;
|
|
134
|
+
if (ruleFsts != nil && [ruleFsts length] > 0) {
|
|
135
|
+
ruleFstsOpt = std::string([ruleFsts UTF8String]);
|
|
136
|
+
}
|
|
137
|
+
if (ruleFars != nil && [ruleFars length] > 0) {
|
|
138
|
+
ruleFarsOpt = std::string([ruleFars UTF8String]);
|
|
139
|
+
}
|
|
140
|
+
if (maxNumSentences != nil && [maxNumSentences intValue] >= 1) {
|
|
141
|
+
maxNumSentencesOpt = static_cast<int32_t>([maxNumSentences intValue]);
|
|
142
|
+
}
|
|
143
|
+
if (silenceScale != nil) {
|
|
144
|
+
silenceScaleOpt = [silenceScale floatValue];
|
|
145
|
+
}
|
|
146
|
+
std::optional<std::string> providerOpt = std::nullopt;
|
|
147
|
+
if (provider != nil && [provider length] > 0) {
|
|
148
|
+
providerOpt = std::string([provider UTF8String]);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
sherpaonnx::TtsInitializeResult result = inst->wrapper->initialize(
|
|
152
|
+
modelDirStr,
|
|
153
|
+
modelTypeStr,
|
|
154
|
+
static_cast<int32_t>(numThreads),
|
|
155
|
+
debug,
|
|
156
|
+
noiseScaleOpt,
|
|
157
|
+
noiseScaleWOpt,
|
|
158
|
+
lengthScaleOpt,
|
|
159
|
+
ruleFstsOpt,
|
|
160
|
+
ruleFarsOpt,
|
|
161
|
+
maxNumSentencesOpt,
|
|
162
|
+
silenceScaleOpt,
|
|
163
|
+
providerOpt
|
|
164
|
+
);
|
|
165
|
+
|
|
166
|
+
if (result.success) {
|
|
167
|
+
RCTLogInfo(@"TTS initialization successful for instance %@", instanceId);
|
|
168
|
+
|
|
169
|
+
inst->modelDir = [modelDir copy];
|
|
170
|
+
inst->modelType = [modelType copy];
|
|
171
|
+
inst->numThreads = static_cast<int32_t>(numThreads);
|
|
172
|
+
inst->debug = debug;
|
|
173
|
+
inst->noiseScale = noiseScale ? [noiseScale copy] : nil;
|
|
174
|
+
inst->noiseScaleW = noiseScaleW ? [noiseScaleW copy] : nil;
|
|
175
|
+
inst->lengthScale = lengthScale ? [lengthScale copy] : nil;
|
|
176
|
+
inst->ruleFsts = (ruleFsts != nil && [ruleFsts length] > 0) ? [ruleFsts copy] : nil;
|
|
177
|
+
inst->ruleFars = (ruleFars != nil && [ruleFars length] > 0) ? [ruleFars copy] : nil;
|
|
178
|
+
inst->maxNumSentences = (maxNumSentences != nil && [maxNumSentences intValue] >= 1) ? [maxNumSentences copy] : nil;
|
|
179
|
+
inst->silenceScale = silenceScale ? [silenceScale copy] : nil;
|
|
180
|
+
inst->provider = (provider != nil && [provider length] > 0) ? [provider copy] : nil;
|
|
181
|
+
|
|
182
|
+
NSMutableArray *detectedModelsArray = [NSMutableArray array];
|
|
183
|
+
for (const auto& model : result.detectedModels) {
|
|
184
|
+
NSDictionary *modelDict = @{
|
|
185
|
+
@"type": [NSString stringWithUTF8String:model.type.c_str()],
|
|
186
|
+
@"modelDir": [NSString stringWithUTF8String:model.modelDir.c_str()]
|
|
187
|
+
};
|
|
188
|
+
[detectedModelsArray addObject:modelDict];
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
NSDictionary *resultDict = @{
|
|
192
|
+
@"success": @YES,
|
|
193
|
+
@"detectedModels": detectedModelsArray
|
|
194
|
+
};
|
|
195
|
+
|
|
196
|
+
resolve(resultDict);
|
|
197
|
+
} else {
|
|
198
|
+
NSString *errorMsg = @"Failed to initialize TTS";
|
|
199
|
+
RCTLogError(@"%@", errorMsg);
|
|
200
|
+
reject(@"TTS_INIT_ERROR", errorMsg, nil);
|
|
201
|
+
}
|
|
202
|
+
} @catch (NSException *exception) {
|
|
203
|
+
NSString *errorMsg = [NSString stringWithFormat:@"Exception during TTS init: %@", exception.reason];
|
|
204
|
+
RCTLogError(@"%@", errorMsg);
|
|
205
|
+
reject(@"TTS_INIT_ERROR", errorMsg, nil);
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
- (void)detectTtsModel:(NSString *)modelDir
|
|
210
|
+
modelType:(NSString *)modelType
|
|
211
|
+
resolve:(RCTPromiseResolveBlock)resolve
|
|
212
|
+
reject:(RCTPromiseRejectBlock)reject
|
|
213
|
+
{
|
|
214
|
+
RCTLogInfo(@"Detecting TTS model in: %@", modelDir);
|
|
215
|
+
@try {
|
|
216
|
+
std::string modelDirStr = [modelDir UTF8String];
|
|
217
|
+
std::string modelTypeStr = (modelType != nil && [modelType length] > 0 && ![modelType isEqualToString:@"auto"])
|
|
218
|
+
? [modelType UTF8String] : "auto";
|
|
219
|
+
sherpaonnx::TtsDetectResult result = sherpaonnx::DetectTtsModel(modelDirStr, modelTypeStr);
|
|
220
|
+
|
|
221
|
+
NSMutableDictionary *resultDict = [NSMutableDictionary dictionary];
|
|
222
|
+
resultDict[@"success"] = @(result.ok);
|
|
223
|
+
if (!result.error.empty()) {
|
|
224
|
+
resultDict[@"error"] = [NSString stringWithUTF8String:result.error.c_str()];
|
|
225
|
+
}
|
|
226
|
+
NSMutableArray *detectedModelsArray = [NSMutableArray array];
|
|
227
|
+
for (const auto& model : result.detectedModels) {
|
|
228
|
+
[detectedModelsArray addObject:@{
|
|
229
|
+
@"type": [NSString stringWithUTF8String:model.type.c_str()],
|
|
230
|
+
@"modelDir": [NSString stringWithUTF8String:model.modelDir.c_str()]
|
|
231
|
+
}];
|
|
232
|
+
}
|
|
233
|
+
resultDict[@"detectedModels"] = detectedModelsArray;
|
|
234
|
+
resultDict[@"modelType"] = ttsModelKindToNSString(result.selectedKind);
|
|
235
|
+
resolve(resultDict);
|
|
236
|
+
} @catch (NSException *exception) {
|
|
237
|
+
NSString *errorMsg = [NSString stringWithFormat:@"TTS model detection failed: %@", exception.reason];
|
|
238
|
+
RCTLogError(@"%@", errorMsg);
|
|
239
|
+
reject(@"DETECT_ERROR", errorMsg, nil);
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
- (void)updateTtsParams:(NSString *)instanceId
|
|
244
|
+
noiseScale:(NSNumber *)noiseScale
|
|
245
|
+
noiseScaleW:(NSNumber *)noiseScaleW
|
|
246
|
+
lengthScale:(NSNumber *)lengthScale
|
|
247
|
+
resolve:(RCTPromiseResolveBlock)resolve
|
|
248
|
+
reject:(RCTPromiseRejectBlock)reject
|
|
249
|
+
{
|
|
250
|
+
if (instanceId == nil || [instanceId length] == 0) {
|
|
251
|
+
reject(@"TTS_UPDATE_ERROR", @"instanceId is required", nil);
|
|
252
|
+
return;
|
|
253
|
+
}
|
|
254
|
+
std::string instanceIdStr = [instanceId UTF8String];
|
|
255
|
+
std::lock_guard<std::mutex> lock(g_tts_mutex);
|
|
256
|
+
auto it = g_tts_instances.find(instanceIdStr);
|
|
257
|
+
if (it == g_tts_instances.end() || it->second->wrapper == nullptr || it->second->modelDir == nil || it->second->modelType == nil) {
|
|
258
|
+
reject(@"TTS_UPDATE_ERROR", @"TTS instance not found or not initialized", nil);
|
|
259
|
+
return;
|
|
260
|
+
}
|
|
261
|
+
TtsInstanceState *inst = it->second.get();
|
|
262
|
+
if (inst->streamRunning.load()) {
|
|
263
|
+
reject(@"TTS_UPDATE_ERROR", @"Cannot update params while streaming", nil);
|
|
264
|
+
return;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
NSNumber *nextNoiseScale = nil;
|
|
268
|
+
if (noiseScale == nil) {
|
|
269
|
+
nextNoiseScale = nil;
|
|
270
|
+
} else if (isnan([noiseScale doubleValue])) {
|
|
271
|
+
nextNoiseScale = inst->noiseScale;
|
|
272
|
+
} else {
|
|
273
|
+
nextNoiseScale = noiseScale;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
NSNumber *nextNoiseScaleW = nil;
|
|
277
|
+
if (noiseScaleW == nil) {
|
|
278
|
+
nextNoiseScaleW = nil;
|
|
279
|
+
} else if (isnan([noiseScaleW doubleValue])) {
|
|
280
|
+
nextNoiseScaleW = inst->noiseScaleW;
|
|
281
|
+
} else {
|
|
282
|
+
nextNoiseScaleW = noiseScaleW;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
NSNumber *nextLengthScale = nil;
|
|
286
|
+
if (lengthScale == nil) {
|
|
287
|
+
nextLengthScale = nil;
|
|
288
|
+
} else if (isnan([lengthScale doubleValue])) {
|
|
289
|
+
nextLengthScale = inst->lengthScale;
|
|
290
|
+
} else {
|
|
291
|
+
nextLengthScale = lengthScale;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
@try {
|
|
295
|
+
std::optional<float> noiseScaleOpt = std::nullopt;
|
|
296
|
+
std::optional<float> noiseScaleWOpt = std::nullopt;
|
|
297
|
+
std::optional<float> lengthScaleOpt = std::nullopt;
|
|
298
|
+
if (nextNoiseScale != nil) {
|
|
299
|
+
noiseScaleOpt = [nextNoiseScale floatValue];
|
|
300
|
+
}
|
|
301
|
+
if (nextNoiseScaleW != nil) {
|
|
302
|
+
noiseScaleWOpt = [nextNoiseScaleW floatValue];
|
|
303
|
+
}
|
|
304
|
+
if (nextLengthScale != nil) {
|
|
305
|
+
lengthScaleOpt = [nextLengthScale floatValue];
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
std::optional<std::string> ruleFstsOpt = std::nullopt;
|
|
309
|
+
std::optional<std::string> ruleFarsOpt = std::nullopt;
|
|
310
|
+
std::optional<int32_t> maxNumSentencesOpt = std::nullopt;
|
|
311
|
+
std::optional<float> silenceScaleOpt = std::nullopt;
|
|
312
|
+
if (inst->ruleFsts != nil && [inst->ruleFsts length] > 0) {
|
|
313
|
+
ruleFstsOpt = std::string([inst->ruleFsts UTF8String]);
|
|
314
|
+
}
|
|
315
|
+
if (inst->ruleFars != nil && [inst->ruleFars length] > 0) {
|
|
316
|
+
ruleFarsOpt = std::string([inst->ruleFars UTF8String]);
|
|
317
|
+
}
|
|
318
|
+
if (inst->maxNumSentences != nil && [inst->maxNumSentences intValue] >= 1) {
|
|
319
|
+
maxNumSentencesOpt = static_cast<int32_t>([inst->maxNumSentences intValue]);
|
|
320
|
+
}
|
|
321
|
+
if (inst->silenceScale != nil) {
|
|
322
|
+
silenceScaleOpt = [inst->silenceScale floatValue];
|
|
323
|
+
}
|
|
324
|
+
std::optional<std::string> providerOpt = std::nullopt;
|
|
325
|
+
if (inst->provider != nil && [inst->provider length] > 0) {
|
|
326
|
+
providerOpt = std::string([inst->provider UTF8String]);
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
sherpaonnx::TtsInitializeResult result = inst->wrapper->initialize(
|
|
330
|
+
std::string([inst->modelDir UTF8String]),
|
|
331
|
+
std::string([inst->modelType UTF8String]),
|
|
332
|
+
inst->numThreads,
|
|
333
|
+
inst->debug,
|
|
334
|
+
noiseScaleOpt,
|
|
335
|
+
noiseScaleWOpt,
|
|
336
|
+
lengthScaleOpt,
|
|
337
|
+
ruleFstsOpt,
|
|
338
|
+
ruleFarsOpt,
|
|
339
|
+
maxNumSentencesOpt,
|
|
340
|
+
silenceScaleOpt,
|
|
341
|
+
providerOpt
|
|
342
|
+
);
|
|
343
|
+
|
|
344
|
+
if (!result.success) {
|
|
345
|
+
NSString *errorMsg = @"Failed to update TTS params";
|
|
346
|
+
RCTLogError(@"%@", errorMsg);
|
|
347
|
+
reject(@"TTS_UPDATE_ERROR", errorMsg, nil);
|
|
348
|
+
return;
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
inst->noiseScale = nextNoiseScale ? [nextNoiseScale copy] : nil;
|
|
352
|
+
inst->noiseScaleW = nextNoiseScaleW ? [nextNoiseScaleW copy] : nil;
|
|
353
|
+
inst->lengthScale = nextLengthScale ? [nextLengthScale copy] : nil;
|
|
354
|
+
|
|
355
|
+
NSMutableArray *detectedModelsArray = [NSMutableArray array];
|
|
356
|
+
for (const auto& model : result.detectedModels) {
|
|
357
|
+
NSDictionary *modelDict = @{
|
|
358
|
+
@"type": [NSString stringWithUTF8String:model.type.c_str()],
|
|
359
|
+
@"modelDir": [NSString stringWithUTF8String:model.modelDir.c_str()]
|
|
360
|
+
};
|
|
361
|
+
[detectedModelsArray addObject:modelDict];
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
NSDictionary *resultDict = @{
|
|
365
|
+
@"success": @YES,
|
|
366
|
+
@"detectedModels": detectedModelsArray
|
|
367
|
+
};
|
|
368
|
+
|
|
369
|
+
resolve(resultDict);
|
|
370
|
+
} @catch (NSException *exception) {
|
|
371
|
+
NSString *errorMsg = [NSString stringWithFormat:@"Exception during TTS update: %@", exception.reason];
|
|
372
|
+
RCTLogError(@"%@", errorMsg);
|
|
373
|
+
reject(@"TTS_UPDATE_ERROR", errorMsg, nil);
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
- (void)generateTts:(NSString *)instanceId
|
|
378
|
+
text:(NSString *)text
|
|
379
|
+
options:(NSDictionary *)options
|
|
380
|
+
resolve:(RCTPromiseResolveBlock)resolve
|
|
381
|
+
reject:(RCTPromiseRejectBlock)reject
|
|
382
|
+
{
|
|
383
|
+
if (instanceId == nil || [instanceId length] == 0) {
|
|
384
|
+
reject(@"TTS_GENERATE_ERROR", @"instanceId is required", nil);
|
|
385
|
+
return;
|
|
386
|
+
}
|
|
387
|
+
double sid = 0;
|
|
388
|
+
double speed = 1.0;
|
|
389
|
+
if (options != nil) {
|
|
390
|
+
if (options[@"sid"] != nil) sid = [options[@"sid"] doubleValue];
|
|
391
|
+
if (options[@"speed"] != nil) speed = [options[@"speed"] doubleValue];
|
|
392
|
+
}
|
|
393
|
+
std::string instanceIdStr = [instanceId UTF8String];
|
|
394
|
+
std::lock_guard<std::mutex> lock(g_tts_mutex);
|
|
395
|
+
auto it = g_tts_instances.find(instanceIdStr);
|
|
396
|
+
if (it == g_tts_instances.end() || it->second->wrapper == nullptr || !it->second->wrapper->isInitialized()) {
|
|
397
|
+
reject(@"TTS_NOT_INITIALIZED", @"TTS not initialized. Call initializeTts() first.", nil);
|
|
398
|
+
return;
|
|
399
|
+
}
|
|
400
|
+
sherpaonnx::TtsWrapper *wrapper = it->second->wrapper.get();
|
|
401
|
+
@try {
|
|
402
|
+
std::string textStr = [text UTF8String];
|
|
403
|
+
|
|
404
|
+
auto result = wrapper->generate(
|
|
405
|
+
textStr,
|
|
406
|
+
static_cast<int32_t>(sid),
|
|
407
|
+
static_cast<float>(speed)
|
|
408
|
+
);
|
|
409
|
+
|
|
410
|
+
if (result.samples.empty() || result.sampleRate == 0) {
|
|
411
|
+
NSString *errorMsg = @"Failed to generate speech or result is empty";
|
|
412
|
+
RCTLogError(@"%@", errorMsg);
|
|
413
|
+
reject(@"TTS_GENERATE_ERROR", errorMsg, nil);
|
|
414
|
+
return;
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
NSMutableArray *samplesArray = [NSMutableArray arrayWithCapacity:result.samples.size()];
|
|
418
|
+
for (float sample : result.samples) {
|
|
419
|
+
[samplesArray addObject:@(sample)];
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
NSDictionary *resultDict = @{
|
|
423
|
+
@"samples": samplesArray,
|
|
424
|
+
@"sampleRate": @(result.sampleRate)
|
|
425
|
+
};
|
|
426
|
+
|
|
427
|
+
RCTLogInfo(@"TTS: Generated %lu samples at %d Hz",
|
|
428
|
+
(unsigned long)result.samples.size(), result.sampleRate);
|
|
429
|
+
|
|
430
|
+
resolve(resultDict);
|
|
431
|
+
} @catch (NSException *exception) {
|
|
432
|
+
NSString *errorMsg = [NSString stringWithFormat:@"Exception during TTS generation: %@", exception.reason];
|
|
433
|
+
RCTLogError(@"%@", errorMsg);
|
|
434
|
+
reject(@"TTS_GENERATE_ERROR", errorMsg, nil);
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
- (void)generateTtsWithTimestamps:(NSString *)instanceId
|
|
439
|
+
text:(NSString *)text
|
|
440
|
+
options:(NSDictionary *)options
|
|
441
|
+
resolve:(RCTPromiseResolveBlock)resolve
|
|
442
|
+
reject:(RCTPromiseRejectBlock)reject
|
|
443
|
+
{
|
|
444
|
+
if (instanceId == nil || [instanceId length] == 0) {
|
|
445
|
+
reject(@"TTS_GENERATE_ERROR", @"instanceId is required", nil);
|
|
446
|
+
return;
|
|
447
|
+
}
|
|
448
|
+
double sid = 0;
|
|
449
|
+
double speed = 1.0;
|
|
450
|
+
if (options != nil) {
|
|
451
|
+
if (options[@"sid"] != nil) sid = [options[@"sid"] doubleValue];
|
|
452
|
+
if (options[@"speed"] != nil) speed = [options[@"speed"] doubleValue];
|
|
453
|
+
}
|
|
454
|
+
std::string instanceIdStr = [instanceId UTF8String];
|
|
455
|
+
std::lock_guard<std::mutex> lock(g_tts_mutex);
|
|
456
|
+
auto it = g_tts_instances.find(instanceIdStr);
|
|
457
|
+
if (it == g_tts_instances.end() || it->second->wrapper == nullptr || !it->second->wrapper->isInitialized()) {
|
|
458
|
+
reject(@"TTS_NOT_INITIALIZED", @"TTS not initialized. Call initializeTts() first.", nil);
|
|
459
|
+
return;
|
|
460
|
+
}
|
|
461
|
+
sherpaonnx::TtsWrapper *wrapper = it->second->wrapper.get();
|
|
462
|
+
@try {
|
|
463
|
+
std::string textStr = [text UTF8String];
|
|
464
|
+
|
|
465
|
+
auto result = wrapper->generate(
|
|
466
|
+
textStr,
|
|
467
|
+
static_cast<int32_t>(sid),
|
|
468
|
+
static_cast<float>(speed)
|
|
469
|
+
);
|
|
470
|
+
|
|
471
|
+
if (result.samples.empty() || result.sampleRate == 0) {
|
|
472
|
+
NSString *errorMsg = @"Failed to generate speech or result is empty";
|
|
473
|
+
RCTLogError(@"%@", errorMsg);
|
|
474
|
+
reject(@"TTS_GENERATE_ERROR", errorMsg, nil);
|
|
475
|
+
return;
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
NSMutableArray *samplesArray = [NSMutableArray arrayWithCapacity:result.samples.size()];
|
|
479
|
+
for (float sample : result.samples) {
|
|
480
|
+
[samplesArray addObject:@(sample)];
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
std::vector<std::string> tokens = SplitTtsTokens(textStr);
|
|
484
|
+
NSMutableArray *subtitlesArray = [NSMutableArray array];
|
|
485
|
+
if (!tokens.empty()) {
|
|
486
|
+
double totalSeconds = static_cast<double>(result.samples.size()) /
|
|
487
|
+
static_cast<double>(result.sampleRate);
|
|
488
|
+
double perToken = totalSeconds / static_cast<double>(tokens.size());
|
|
489
|
+
|
|
490
|
+
for (size_t i = 0; i < tokens.size(); ++i) {
|
|
491
|
+
double start = perToken * static_cast<double>(i);
|
|
492
|
+
double end = perToken * static_cast<double>(i + 1);
|
|
493
|
+
NSDictionary *item = @{
|
|
494
|
+
@"text": [NSString stringWithUTF8String:tokens[i].c_str()],
|
|
495
|
+
@"start": @(start),
|
|
496
|
+
@"end": @(end)
|
|
497
|
+
};
|
|
498
|
+
[subtitlesArray addObject:item];
|
|
499
|
+
}
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
NSDictionary *resultDict = @{
|
|
503
|
+
@"samples": samplesArray,
|
|
504
|
+
@"sampleRate": @(result.sampleRate),
|
|
505
|
+
@"subtitles": subtitlesArray,
|
|
506
|
+
@"estimated": @YES
|
|
507
|
+
};
|
|
508
|
+
|
|
509
|
+
resolve(resultDict);
|
|
510
|
+
} @catch (NSException *exception) {
|
|
511
|
+
NSString *errorMsg = [NSString stringWithFormat:@"Exception during TTS generation: %@", exception.reason];
|
|
512
|
+
RCTLogError(@"%@", errorMsg);
|
|
513
|
+
reject(@"TTS_GENERATE_ERROR", errorMsg, nil);
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
- (void)generateTtsStream:(NSString *)instanceId
|
|
518
|
+
text:(NSString *)text
|
|
519
|
+
options:(NSDictionary *)options
|
|
520
|
+
resolve:(RCTPromiseResolveBlock)resolve
|
|
521
|
+
reject:(RCTPromiseRejectBlock)reject
|
|
522
|
+
{
|
|
523
|
+
if (instanceId == nil || [instanceId length] == 0) {
|
|
524
|
+
reject(@"TTS_STREAM_ERROR", @"instanceId is required", nil);
|
|
525
|
+
return;
|
|
526
|
+
}
|
|
527
|
+
double sid = 0;
|
|
528
|
+
double speed = 1.0;
|
|
529
|
+
if (options != nil) {
|
|
530
|
+
if (options[@"sid"] != nil) sid = [options[@"sid"] doubleValue];
|
|
531
|
+
if (options[@"speed"] != nil) speed = [options[@"speed"] doubleValue];
|
|
532
|
+
}
|
|
533
|
+
std::string instanceIdStr = [instanceId UTF8String];
|
|
534
|
+
std::shared_ptr<TtsInstanceState> instRef;
|
|
535
|
+
{
|
|
536
|
+
std::lock_guard<std::mutex> lock(g_tts_mutex);
|
|
537
|
+
auto it = g_tts_instances.find(instanceIdStr);
|
|
538
|
+
if (it == g_tts_instances.end() || it->second->wrapper == nullptr || !it->second->wrapper->isInitialized()) {
|
|
539
|
+
reject(@"TTS_NOT_INITIALIZED", @"TTS not initialized. Call initializeTts() first.", nil);
|
|
540
|
+
return;
|
|
541
|
+
}
|
|
542
|
+
instRef = it->second; // shared_ptr copy keeps TtsInstanceState alive during streaming
|
|
543
|
+
if (instRef->streamRunning.load()) {
|
|
544
|
+
reject(@"TTS_STREAM_ERROR", @"TTS streaming already in progress", nil);
|
|
545
|
+
return;
|
|
546
|
+
}
|
|
547
|
+
instRef->streamCancelled.store(false);
|
|
548
|
+
instRef->streamRunning.store(true);
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
std::string textStr = [text UTF8String];
|
|
552
|
+
int32_t sampleRate = instRef->wrapper->getSampleRate();
|
|
553
|
+
NSString *instanceIdCopy = [instanceId copy];
|
|
554
|
+
|
|
555
|
+
__weak SherpaOnnx *weakSelf = self;
|
|
556
|
+
dispatch_async(dispatch_get_global_queue(QOS_CLASS_USER_INITIATED, 0), ^{
|
|
557
|
+
bool success = false;
|
|
558
|
+
@try {
|
|
559
|
+
success = instRef->wrapper->generateStream(
|
|
560
|
+
textStr,
|
|
561
|
+
static_cast<int32_t>(sid),
|
|
562
|
+
static_cast<float>(speed),
|
|
563
|
+
[weakSelf, sampleRate, instanceIdCopy, instRef](const float *samples, int32_t numSamples, float progress) -> int32_t {
|
|
564
|
+
if (instRef->streamCancelled.load()) {
|
|
565
|
+
return 0;
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
NSMutableArray *samplesArray = [NSMutableArray arrayWithCapacity:numSamples];
|
|
569
|
+
for (int32_t i = 0; i < numSamples; i++) {
|
|
570
|
+
[samplesArray addObject:@(samples[i])];
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
NSDictionary *payload = @{
|
|
574
|
+
@"instanceId": instanceIdCopy,
|
|
575
|
+
@"samples": samplesArray,
|
|
576
|
+
@"sampleRate": @(sampleRate),
|
|
577
|
+
@"progress": @(progress),
|
|
578
|
+
@"isFinal": @NO
|
|
579
|
+
};
|
|
580
|
+
|
|
581
|
+
dispatch_async(dispatch_get_main_queue(), ^{
|
|
582
|
+
if (weakSelf) {
|
|
583
|
+
[weakSelf sendEventWithName:@"ttsStreamChunk" body:payload];
|
|
584
|
+
}
|
|
585
|
+
});
|
|
586
|
+
|
|
587
|
+
return instRef->streamCancelled.load() ? 0 : 1;
|
|
588
|
+
}
|
|
589
|
+
);
|
|
590
|
+
} @catch (NSException *exception) {
|
|
591
|
+
NSString *errorMsg = [NSString stringWithFormat:@"TTS streaming failed: %@", exception.reason];
|
|
592
|
+
dispatch_async(dispatch_get_main_queue(), ^{
|
|
593
|
+
if (weakSelf) {
|
|
594
|
+
[weakSelf sendEventWithName:@"ttsStreamError" body:@{ @"instanceId": instanceIdCopy, @"message": errorMsg }];
|
|
595
|
+
}
|
|
596
|
+
});
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
bool cancelled = instRef->streamCancelled.load();
|
|
600
|
+
if (!success && !cancelled) {
|
|
601
|
+
dispatch_async(dispatch_get_main_queue(), ^{
|
|
602
|
+
if (weakSelf) {
|
|
603
|
+
[weakSelf sendEventWithName:@"ttsStreamError" body:@{ @"instanceId": instanceIdCopy, @"message": @"TTS streaming generation failed" }];
|
|
604
|
+
}
|
|
605
|
+
});
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
dispatch_async(dispatch_get_main_queue(), ^{
|
|
609
|
+
if (weakSelf) {
|
|
610
|
+
[weakSelf sendEventWithName:@"ttsStreamEnd" body:@{ @"instanceId": instanceIdCopy, @"cancelled": @(cancelled) }];
|
|
611
|
+
}
|
|
612
|
+
});
|
|
613
|
+
|
|
614
|
+
instRef->streamRunning.store(false);
|
|
615
|
+
{
|
|
616
|
+
std::lock_guard<std::mutex> lock(g_tts_mutex);
|
|
617
|
+
g_tts_stream_cv.notify_all();
|
|
618
|
+
}
|
|
619
|
+
});
|
|
620
|
+
|
|
621
|
+
resolve(nil);
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
- (void)cancelTtsStream:(NSString *)instanceId
|
|
625
|
+
resolve:(RCTPromiseResolveBlock)resolve
|
|
626
|
+
reject:(RCTPromiseRejectBlock)reject
|
|
627
|
+
{
|
|
628
|
+
if (instanceId == nil || [instanceId length] == 0) {
|
|
629
|
+
resolve(nil);
|
|
630
|
+
return;
|
|
631
|
+
}
|
|
632
|
+
std::string instanceIdStr = [instanceId UTF8String];
|
|
633
|
+
std::lock_guard<std::mutex> lock(g_tts_mutex);
|
|
634
|
+
auto it = g_tts_instances.find(instanceIdStr);
|
|
635
|
+
if (it != g_tts_instances.end()) {
|
|
636
|
+
it->second->streamCancelled.store(true);
|
|
637
|
+
}
|
|
638
|
+
resolve(nil);
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
- (void)startTtsPcmPlayer:(NSString *)instanceId
|
|
642
|
+
sampleRate:(double)sampleRate
|
|
643
|
+
channels:(double)channels
|
|
644
|
+
resolve:(RCTPromiseResolveBlock)resolve
|
|
645
|
+
reject:(RCTPromiseRejectBlock)reject
|
|
646
|
+
{
|
|
647
|
+
if (instanceId == nil || [instanceId length] == 0) {
|
|
648
|
+
reject(@"TTS_PCM_ERROR", @"instanceId is required", nil);
|
|
649
|
+
return;
|
|
650
|
+
}
|
|
651
|
+
std::string instanceIdStr = [instanceId UTF8String];
|
|
652
|
+
dispatch_async(dispatch_get_main_queue(), ^{
|
|
653
|
+
@try {
|
|
654
|
+
TtsInstanceState *inst = nullptr;
|
|
655
|
+
NSError *startError = nil;
|
|
656
|
+
NSString *errorMsg = nil;
|
|
657
|
+
AVAudioSession *session = nil;
|
|
658
|
+
{
|
|
659
|
+
std::lock_guard<std::mutex> lock(g_tts_mutex);
|
|
660
|
+
auto it = g_tts_instances.find(instanceIdStr);
|
|
661
|
+
if (it == g_tts_instances.end()) {
|
|
662
|
+
errorMsg = @"TTS instance not found";
|
|
663
|
+
goto out_start;
|
|
664
|
+
}
|
|
665
|
+
inst = it->second.get();
|
|
666
|
+
if (channels != 1.0) {
|
|
667
|
+
errorMsg = @"PCM playback supports mono only";
|
|
668
|
+
goto out_start;
|
|
669
|
+
}
|
|
670
|
+
if (inst->player != nil) [inst->player stop];
|
|
671
|
+
if (inst->engine != nil) {
|
|
672
|
+
[inst->engine stop];
|
|
673
|
+
[inst->engine reset];
|
|
674
|
+
}
|
|
675
|
+
inst->player = nil;
|
|
676
|
+
inst->engine = nil;
|
|
677
|
+
inst->format = nil;
|
|
678
|
+
}
|
|
679
|
+
|
|
680
|
+
session = [AVAudioSession sharedInstance];
|
|
681
|
+
[session setCategory:AVAudioSessionCategoryPlayback error:nil];
|
|
682
|
+
[session setActive:YES error:nil];
|
|
683
|
+
|
|
684
|
+
{
|
|
685
|
+
std::lock_guard<std::mutex> lock(g_tts_mutex);
|
|
686
|
+
auto it = g_tts_instances.find(instanceIdStr);
|
|
687
|
+
if (it == g_tts_instances.end()) {
|
|
688
|
+
errorMsg = @"TTS instance not found";
|
|
689
|
+
goto out_start;
|
|
690
|
+
}
|
|
691
|
+
inst = it->second.get();
|
|
692
|
+
inst->engine = [[AVAudioEngine alloc] init];
|
|
693
|
+
inst->player = [[AVAudioPlayerNode alloc] init];
|
|
694
|
+
inst->format = [[AVAudioFormat alloc] initStandardFormatWithSampleRate:sampleRate channels:1];
|
|
695
|
+
|
|
696
|
+
[inst->engine attachNode:inst->player];
|
|
697
|
+
[inst->engine connect:inst->player to:inst->engine.mainMixerNode format:inst->format];
|
|
698
|
+
|
|
699
|
+
if (![inst->engine startAndReturnError:&startError]) {
|
|
700
|
+
errorMsg = [NSString stringWithFormat:@"Failed to start audio engine: %@", startError.localizedDescription];
|
|
701
|
+
goto out_start;
|
|
702
|
+
}
|
|
703
|
+
[inst->player play];
|
|
704
|
+
}
|
|
705
|
+
out_start:
|
|
706
|
+
if (errorMsg != nil) {
|
|
707
|
+
if (startError) {
|
|
708
|
+
reject(@"TTS_PCM_ERROR", errorMsg, startError);
|
|
709
|
+
} else {
|
|
710
|
+
reject(@"TTS_PCM_ERROR", errorMsg, nil);
|
|
711
|
+
}
|
|
712
|
+
} else {
|
|
713
|
+
resolve(nil);
|
|
714
|
+
}
|
|
715
|
+
} @catch (NSException *exception) {
|
|
716
|
+
NSString *errorMsg = [NSString stringWithFormat:@"Failed to start PCM player: %@", exception.reason];
|
|
717
|
+
reject(@"TTS_PCM_ERROR", errorMsg, nil);
|
|
718
|
+
}
|
|
719
|
+
});
|
|
720
|
+
}
|
|
721
|
+
|
|
722
|
+
- (void)writeTtsPcmChunk:(NSString *)instanceId
|
|
723
|
+
samples:(NSArray<NSNumber *> *)samples
|
|
724
|
+
resolve:(RCTPromiseResolveBlock)resolve
|
|
725
|
+
reject:(RCTPromiseRejectBlock)reject
|
|
726
|
+
{
|
|
727
|
+
if (instanceId == nil || [instanceId length] == 0) {
|
|
728
|
+
reject(@"TTS_PCM_ERROR", @"instanceId is required", nil);
|
|
729
|
+
return;
|
|
730
|
+
}
|
|
731
|
+
std::string instanceIdStr = [instanceId UTF8String];
|
|
732
|
+
std::lock_guard<std::mutex> lock(g_tts_mutex);
|
|
733
|
+
auto it = g_tts_instances.find(instanceIdStr);
|
|
734
|
+
if (it == g_tts_instances.end() || it->second->engine == nil || it->second->player == nil || it->second->format == nil) {
|
|
735
|
+
reject(@"TTS_PCM_ERROR", @"PCM player not initialized", nil);
|
|
736
|
+
return;
|
|
737
|
+
}
|
|
738
|
+
TtsInstanceState *inst = it->second.get();
|
|
739
|
+
@try {
|
|
740
|
+
AVAudioFrameCount frameCount = (AVAudioFrameCount)[samples count];
|
|
741
|
+
AVAudioPCMBuffer *buffer = [[AVAudioPCMBuffer alloc] initWithPCMFormat:inst->format frameCapacity:frameCount];
|
|
742
|
+
buffer.frameLength = frameCount;
|
|
743
|
+
|
|
744
|
+
float *channelData = buffer.floatChannelData[0];
|
|
745
|
+
for (NSUInteger i = 0; i < [samples count]; i++) {
|
|
746
|
+
channelData[i] = [samples[i] floatValue];
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
[inst->player scheduleBuffer:buffer completionHandler:nil];
|
|
750
|
+
resolve(nil);
|
|
751
|
+
} @catch (NSException *exception) {
|
|
752
|
+
NSString *errorMsg = [NSString stringWithFormat:@"Failed to write PCM chunk: %@", exception.reason];
|
|
753
|
+
reject(@"TTS_PCM_ERROR", errorMsg, nil);
|
|
754
|
+
}
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
- (void)stopTtsPcmPlayer:(NSString *)instanceId
|
|
758
|
+
resolve:(RCTPromiseResolveBlock)resolve
|
|
759
|
+
reject:(RCTPromiseRejectBlock)reject
|
|
760
|
+
{
|
|
761
|
+
if (instanceId == nil || [instanceId length] == 0) {
|
|
762
|
+
resolve(nil);
|
|
763
|
+
return;
|
|
764
|
+
}
|
|
765
|
+
std::string instanceIdStr = [instanceId UTF8String];
|
|
766
|
+
dispatch_async(dispatch_get_main_queue(), ^{
|
|
767
|
+
@try {
|
|
768
|
+
std::lock_guard<std::mutex> lock(g_tts_mutex);
|
|
769
|
+
auto it = g_tts_instances.find(instanceIdStr);
|
|
770
|
+
if (it != g_tts_instances.end()) {
|
|
771
|
+
TtsInstanceState *inst = it->second.get();
|
|
772
|
+
if (inst->player != nil) {
|
|
773
|
+
[inst->player stop];
|
|
774
|
+
}
|
|
775
|
+
if (inst->engine != nil) {
|
|
776
|
+
[inst->engine stop];
|
|
777
|
+
[inst->engine reset];
|
|
778
|
+
}
|
|
779
|
+
inst->player = nil;
|
|
780
|
+
inst->engine = nil;
|
|
781
|
+
inst->format = nil;
|
|
782
|
+
}
|
|
783
|
+
resolve(nil);
|
|
784
|
+
} @catch (NSException *exception) {
|
|
785
|
+
NSString *errorMsg = [NSString stringWithFormat:@"Failed to stop PCM player: %@", exception.reason];
|
|
786
|
+
reject(@"TTS_PCM_ERROR", errorMsg, nil);
|
|
787
|
+
}
|
|
788
|
+
});
|
|
789
|
+
}
|
|
790
|
+
|
|
791
|
+
- (void)getTtsSampleRate:(NSString *)instanceId
|
|
792
|
+
resolve:(RCTPromiseResolveBlock)resolve
|
|
793
|
+
reject:(RCTPromiseRejectBlock)reject
|
|
794
|
+
{
|
|
795
|
+
if (instanceId == nil || [instanceId length] == 0) {
|
|
796
|
+
reject(@"TTS_ERROR", @"instanceId is required", nil);
|
|
797
|
+
return;
|
|
798
|
+
}
|
|
799
|
+
std::string instanceIdStr = [instanceId UTF8String];
|
|
800
|
+
std::lock_guard<std::mutex> lock(g_tts_mutex);
|
|
801
|
+
auto it = g_tts_instances.find(instanceIdStr);
|
|
802
|
+
if (it == g_tts_instances.end() || it->second->wrapper == nullptr || !it->second->wrapper->isInitialized()) {
|
|
803
|
+
reject(@"TTS_NOT_INITIALIZED", @"TTS not initialized. Call initializeTts() first.", nil);
|
|
804
|
+
return;
|
|
805
|
+
}
|
|
806
|
+
int32_t sampleRate = it->second->wrapper->getSampleRate();
|
|
807
|
+
resolve(@(sampleRate));
|
|
808
|
+
}
|
|
809
|
+
|
|
810
|
+
- (void)getTtsNumSpeakers:(NSString *)instanceId
|
|
811
|
+
resolve:(RCTPromiseResolveBlock)resolve
|
|
812
|
+
reject:(RCTPromiseRejectBlock)reject
|
|
813
|
+
{
|
|
814
|
+
if (instanceId == nil || [instanceId length] == 0) {
|
|
815
|
+
reject(@"TTS_ERROR", @"instanceId is required", nil);
|
|
816
|
+
return;
|
|
817
|
+
}
|
|
818
|
+
std::string instanceIdStr = [instanceId UTF8String];
|
|
819
|
+
std::lock_guard<std::mutex> lock(g_tts_mutex);
|
|
820
|
+
auto it = g_tts_instances.find(instanceIdStr);
|
|
821
|
+
if (it == g_tts_instances.end() || it->second->wrapper == nullptr || !it->second->wrapper->isInitialized()) {
|
|
822
|
+
reject(@"TTS_NOT_INITIALIZED", @"TTS not initialized. Call initializeTts() first.", nil);
|
|
823
|
+
return;
|
|
824
|
+
}
|
|
825
|
+
int32_t numSpeakers = it->second->wrapper->getNumSpeakers();
|
|
826
|
+
resolve(@(numSpeakers));
|
|
827
|
+
}
|
|
828
|
+
|
|
829
|
+
- (void)unloadTts:(NSString *)instanceId
|
|
830
|
+
resolve:(RCTPromiseResolveBlock)resolve
|
|
831
|
+
reject:(RCTPromiseRejectBlock)reject
|
|
832
|
+
{
|
|
833
|
+
if (instanceId == nil || [instanceId length] == 0) {
|
|
834
|
+
resolve(nil);
|
|
835
|
+
return;
|
|
836
|
+
}
|
|
837
|
+
std::string instanceIdStr = [instanceId UTF8String];
|
|
838
|
+
RCTPromiseResolveBlock resolveCopy = resolve;
|
|
839
|
+
RCTPromiseRejectBlock rejectCopy = reject;
|
|
840
|
+
NSString *instanceIdCopy = [instanceId copy];
|
|
841
|
+
@try {
|
|
842
|
+
dispatch_async(dispatch_get_main_queue(), ^{
|
|
843
|
+
TtsInstanceState *inst = nullptr;
|
|
844
|
+
{
|
|
845
|
+
std::lock_guard<std::mutex> lock(g_tts_mutex);
|
|
846
|
+
auto it = g_tts_instances.find(instanceIdStr);
|
|
847
|
+
if (it == g_tts_instances.end()) {
|
|
848
|
+
resolveCopy(nil);
|
|
849
|
+
return;
|
|
850
|
+
}
|
|
851
|
+
inst = it->second.get();
|
|
852
|
+
if (inst->player != nil) [inst->player stop];
|
|
853
|
+
if (inst->engine != nil) {
|
|
854
|
+
[inst->engine stop];
|
|
855
|
+
[inst->engine reset];
|
|
856
|
+
}
|
|
857
|
+
inst->player = nil;
|
|
858
|
+
inst->engine = nil;
|
|
859
|
+
inst->format = nil;
|
|
860
|
+
inst->streamCancelled.store(true);
|
|
861
|
+
}
|
|
862
|
+
dispatch_async(dispatch_get_global_queue(QOS_CLASS_USER_INITIATED, 0), ^{
|
|
863
|
+
{
|
|
864
|
+
std::unique_lock<std::mutex> lock(g_tts_mutex);
|
|
865
|
+
auto it = g_tts_instances.find(instanceIdStr);
|
|
866
|
+
if (it == g_tts_instances.end()) {
|
|
867
|
+
dispatch_async(dispatch_get_main_queue(), ^{ resolveCopy(nil); });
|
|
868
|
+
return;
|
|
869
|
+
}
|
|
870
|
+
TtsInstanceState *i = it->second.get();
|
|
871
|
+
bool done = g_tts_stream_cv.wait_for(
|
|
872
|
+
lock,
|
|
873
|
+
std::chrono::seconds(5),
|
|
874
|
+
[i] { return !i->streamRunning.load(); }
|
|
875
|
+
);
|
|
876
|
+
if (!done) {
|
|
877
|
+
RCTLogWarn(@"TTS unload: stream did not stop within 5s, releasing anyway");
|
|
878
|
+
}
|
|
879
|
+
if (i->wrapper != nullptr) {
|
|
880
|
+
i->wrapper->release();
|
|
881
|
+
i->wrapper.reset();
|
|
882
|
+
}
|
|
883
|
+
i->modelDir = nil;
|
|
884
|
+
i->modelType = nil;
|
|
885
|
+
i->provider = nil;
|
|
886
|
+
i->noiseScale = nil;
|
|
887
|
+
i->noiseScaleW = nil;
|
|
888
|
+
i->lengthScale = nil;
|
|
889
|
+
i->ruleFsts = nil;
|
|
890
|
+
i->ruleFars = nil;
|
|
891
|
+
i->maxNumSentences = nil;
|
|
892
|
+
i->silenceScale = nil;
|
|
893
|
+
g_tts_instances.erase(it);
|
|
894
|
+
}
|
|
895
|
+
RCTLogInfo(@"TTS instance %@ released", instanceIdCopy);
|
|
896
|
+
dispatch_async(dispatch_get_main_queue(), ^{ resolveCopy(nil); });
|
|
897
|
+
});
|
|
898
|
+
});
|
|
899
|
+
} @catch (NSException *exception) {
|
|
900
|
+
NSString *errorMsg = [NSString stringWithFormat:@"Exception during TTS cleanup: %@", exception.reason];
|
|
901
|
+
RCTLogError(@"%@", errorMsg);
|
|
902
|
+
rejectCopy(@"TTS_CLEANUP_ERROR", errorMsg, nil);
|
|
903
|
+
}
|
|
904
|
+
}
|
|
905
|
+
|
|
906
|
+
- (void)saveTtsAudioToFile:(NSArray<NSNumber *> *)samples
|
|
907
|
+
sampleRate:(double)sampleRate
|
|
908
|
+
filePath:(NSString *)filePath
|
|
909
|
+
resolve:(RCTPromiseResolveBlock)resolve
|
|
910
|
+
reject:(RCTPromiseRejectBlock)reject
|
|
911
|
+
{
|
|
912
|
+
@try {
|
|
913
|
+
std::vector<float> samplesVec;
|
|
914
|
+
samplesVec.reserve([samples count]);
|
|
915
|
+
for (NSNumber *num in samples) {
|
|
916
|
+
samplesVec.push_back([num floatValue]);
|
|
917
|
+
}
|
|
918
|
+
|
|
919
|
+
std::string filePathStr = std::string([filePath UTF8String]);
|
|
920
|
+
|
|
921
|
+
bool success = sherpaonnx::TtsWrapper::saveToWavFile(
|
|
922
|
+
samplesVec,
|
|
923
|
+
static_cast<int32_t>(sampleRate),
|
|
924
|
+
filePathStr
|
|
925
|
+
);
|
|
926
|
+
|
|
927
|
+
if (success) {
|
|
928
|
+
resolve(filePath);
|
|
929
|
+
} else {
|
|
930
|
+
reject(@"TTS_SAVE_ERROR", @"Failed to save audio to file", nil);
|
|
931
|
+
}
|
|
932
|
+
} @catch (NSException *exception) {
|
|
933
|
+
NSString *errorMsg = [NSString stringWithFormat:@"Exception saving TTS audio: %@", exception.reason];
|
|
934
|
+
reject(@"TTS_SAVE_ERROR", errorMsg, nil);
|
|
935
|
+
}
|
|
936
|
+
}
|
|
937
|
+
|
|
938
|
+
- (void)saveTtsAudioToContentUri:(NSArray<NSNumber *> *)samples
|
|
939
|
+
sampleRate:(double)sampleRate
|
|
940
|
+
directoryUri:(NSString *)directoryUri
|
|
941
|
+
filename:(NSString *)filename
|
|
942
|
+
resolve:(RCTPromiseResolveBlock)resolve
|
|
943
|
+
reject:(RCTPromiseRejectBlock)reject
|
|
944
|
+
{
|
|
945
|
+
@try {
|
|
946
|
+
if ([directoryUri hasPrefix:@"content://"]) {
|
|
947
|
+
reject(@"TTS_SAVE_ERROR", @"Content URIs are not supported on iOS", nil);
|
|
948
|
+
return;
|
|
949
|
+
}
|
|
950
|
+
std::vector<float> samplesVec;
|
|
951
|
+
samplesVec.reserve([samples count]);
|
|
952
|
+
for (NSNumber *num in samples) {
|
|
953
|
+
samplesVec.push_back([num floatValue]);
|
|
954
|
+
}
|
|
955
|
+
NSString *dirPath = [directoryUri hasPrefix:@"file://"]
|
|
956
|
+
? [[NSURL URLWithString:directoryUri] path]
|
|
957
|
+
: directoryUri;
|
|
958
|
+
NSString *filePath = [dirPath stringByAppendingPathComponent:filename];
|
|
959
|
+
std::string filePathStr = std::string([filePath UTF8String]);
|
|
960
|
+
bool success = sherpaonnx::TtsWrapper::saveToWavFile(
|
|
961
|
+
samplesVec,
|
|
962
|
+
static_cast<int32_t>(sampleRate),
|
|
963
|
+
filePathStr
|
|
964
|
+
);
|
|
965
|
+
if (success) {
|
|
966
|
+
resolve(filePath);
|
|
967
|
+
} else {
|
|
968
|
+
reject(@"TTS_SAVE_ERROR", @"Failed to save audio to file", nil);
|
|
969
|
+
}
|
|
970
|
+
} @catch (NSException *exception) {
|
|
971
|
+
NSString *errorMsg = [NSString stringWithFormat:@"Exception saving TTS audio: %@", exception.reason];
|
|
972
|
+
reject(@"TTS_SAVE_ERROR", errorMsg, nil);
|
|
973
|
+
}
|
|
974
|
+
}
|
|
975
|
+
|
|
976
|
+
- (void)copyTtsContentUriToCache:(NSString *)fileUri
|
|
977
|
+
filename:(NSString *)filename
|
|
978
|
+
resolve:(RCTPromiseResolveBlock)resolve
|
|
979
|
+
reject:(RCTPromiseRejectBlock)reject
|
|
980
|
+
{
|
|
981
|
+
@try {
|
|
982
|
+
if ([fileUri hasPrefix:@"content://"]) {
|
|
983
|
+
reject(@"TTS_SAVE_ERROR", @"Content URIs are not supported on iOS", nil);
|
|
984
|
+
return;
|
|
985
|
+
}
|
|
986
|
+
NSString *srcPath = [fileUri hasPrefix:@"file://"]
|
|
987
|
+
? [[NSURL URLWithString:fileUri] path]
|
|
988
|
+
: fileUri;
|
|
989
|
+
NSFileManager *fm = [NSFileManager defaultManager];
|
|
990
|
+
if (![fm fileExistsAtPath:srcPath]) {
|
|
991
|
+
reject(@"TTS_SAVE_ERROR", @"Source file does not exist", nil);
|
|
992
|
+
return;
|
|
993
|
+
}
|
|
994
|
+
NSArray *caches = NSSearchPathForDirectoriesInDomains(NSCachesDirectory, NSUserDomainMask, YES);
|
|
995
|
+
NSString *cacheDir = caches.firstObject;
|
|
996
|
+
NSString *destPath = [[cacheDir stringByAppendingPathComponent:@"sherpa_tts"] stringByAppendingPathComponent:filename];
|
|
997
|
+
NSError *err = nil;
|
|
998
|
+
[fm createDirectoryAtPath:[destPath stringByDeletingLastPathComponent] withIntermediateDirectories:YES attributes:nil error:&err];
|
|
999
|
+
if (err) {
|
|
1000
|
+
reject(@"TTS_SAVE_ERROR", err.localizedDescription, err);
|
|
1001
|
+
return;
|
|
1002
|
+
}
|
|
1003
|
+
if ([fm fileExistsAtPath:destPath]) {
|
|
1004
|
+
[fm removeItemAtPath:destPath error:nil];
|
|
1005
|
+
}
|
|
1006
|
+
BOOL ok = [fm copyItemAtPath:srcPath toPath:destPath error:&err];
|
|
1007
|
+
if (!ok || err) {
|
|
1008
|
+
reject(@"TTS_SAVE_ERROR", err ? err.localizedDescription : @"Copy failed", err);
|
|
1009
|
+
return;
|
|
1010
|
+
}
|
|
1011
|
+
resolve(destPath);
|
|
1012
|
+
} @catch (NSException *exception) {
|
|
1013
|
+
NSString *errorMsg = [NSString stringWithFormat:@"Exception copying file: %@", exception.reason];
|
|
1014
|
+
reject(@"TTS_SAVE_ERROR", errorMsg, nil);
|
|
1015
|
+
}
|
|
1016
|
+
}
|
|
1017
|
+
|
|
1018
|
+
- (void)saveTtsTextToContentUri:(NSString *)text
|
|
1019
|
+
directoryUri:(NSString *)directoryUri
|
|
1020
|
+
filename:(NSString *)filename
|
|
1021
|
+
mimeType:(NSString *)mimeType
|
|
1022
|
+
resolve:(RCTPromiseResolveBlock)resolve
|
|
1023
|
+
reject:(RCTPromiseRejectBlock)reject
|
|
1024
|
+
{
|
|
1025
|
+
@try {
|
|
1026
|
+
if ([directoryUri hasPrefix:@"content://"]) {
|
|
1027
|
+
reject(@"TTS_SAVE_ERROR", @"Content URIs are not supported on iOS", nil);
|
|
1028
|
+
return;
|
|
1029
|
+
}
|
|
1030
|
+
|
|
1031
|
+
NSURL *directoryUrl = nil;
|
|
1032
|
+
if ([directoryUri hasPrefix:@"file://"]) {
|
|
1033
|
+
directoryUrl = [NSURL URLWithString:directoryUri];
|
|
1034
|
+
} else {
|
|
1035
|
+
directoryUrl = [NSURL fileURLWithPath:directoryUri];
|
|
1036
|
+
}
|
|
1037
|
+
|
|
1038
|
+
if (!directoryUrl) {
|
|
1039
|
+
reject(@"TTS_SAVE_ERROR", @"Invalid directory URL", nil);
|
|
1040
|
+
return;
|
|
1041
|
+
}
|
|
1042
|
+
|
|
1043
|
+
NSString *directoryPath = [directoryUrl path];
|
|
1044
|
+
NSString *filePath = [directoryPath stringByAppendingPathComponent:filename];
|
|
1045
|
+
|
|
1046
|
+
NSError *writeError = nil;
|
|
1047
|
+
BOOL success = [text writeToFile:filePath
|
|
1048
|
+
atomically:YES
|
|
1049
|
+
encoding:NSUTF8StringEncoding
|
|
1050
|
+
error:&writeError];
|
|
1051
|
+
|
|
1052
|
+
if (!success || writeError) {
|
|
1053
|
+
reject(@"TTS_SAVE_ERROR", @"Failed to save text to file", writeError);
|
|
1054
|
+
return;
|
|
1055
|
+
}
|
|
1056
|
+
|
|
1057
|
+
resolve(filePath);
|
|
1058
|
+
} @catch (NSException *exception) {
|
|
1059
|
+
NSString *errorMsg = [NSString stringWithFormat:@"Exception saving text file: %@", exception.reason];
|
|
1060
|
+
reject(@"TTS_SAVE_ERROR", errorMsg, nil);
|
|
1061
|
+
}
|
|
1062
|
+
}
|
|
1063
|
+
|
|
1064
|
+
- (void)shareTtsAudio:(NSString *)fileUri
|
|
1065
|
+
mimeType:(NSString *)mimeType
|
|
1066
|
+
resolve:(RCTPromiseResolveBlock)resolve
|
|
1067
|
+
reject:(RCTPromiseRejectBlock)reject
|
|
1068
|
+
{
|
|
1069
|
+
@try {
|
|
1070
|
+
NSURL *url = nil;
|
|
1071
|
+
if ([fileUri hasPrefix:@"file://"] || [fileUri hasPrefix:@"content://"]) {
|
|
1072
|
+
url = [NSURL URLWithString:fileUri];
|
|
1073
|
+
} else {
|
|
1074
|
+
url = [NSURL fileURLWithPath:fileUri];
|
|
1075
|
+
}
|
|
1076
|
+
|
|
1077
|
+
if (!url) {
|
|
1078
|
+
reject(@"TTS_SHARE_ERROR", @"Invalid file URL", nil);
|
|
1079
|
+
return;
|
|
1080
|
+
}
|
|
1081
|
+
|
|
1082
|
+
dispatch_async(dispatch_get_main_queue(), ^{
|
|
1083
|
+
UIViewController *controller = RCTPresentedViewController();
|
|
1084
|
+
if (!controller) {
|
|
1085
|
+
reject(@"TTS_SHARE_ERROR", @"No active view controller", nil);
|
|
1086
|
+
return;
|
|
1087
|
+
}
|
|
1088
|
+
|
|
1089
|
+
UIActivityViewController *activity =
|
|
1090
|
+
[[UIActivityViewController alloc] initWithActivityItems:@[url]
|
|
1091
|
+
applicationActivities:nil];
|
|
1092
|
+
[controller presentViewController:activity animated:YES completion:nil];
|
|
1093
|
+
resolve(nil);
|
|
1094
|
+
});
|
|
1095
|
+
} @catch (NSException *exception) {
|
|
1096
|
+
NSString *errorMsg = [NSString stringWithFormat:@"Failed to share audio: %@", exception.reason];
|
|
1097
|
+
reject(@"TTS_SHARE_ERROR", errorMsg, nil);
|
|
1098
|
+
}
|
|
1099
|
+
}
|
|
1100
|
+
|
|
1101
|
+
@end
|