react-native-sherpa-onnx 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. package/README.md +232 -236
  2. package/SherpaOnnx.podspec +68 -64
  3. package/android/build.gradle +182 -192
  4. package/android/codegen.gradle +57 -0
  5. package/android/prebuilt-download.gradle +428 -0
  6. package/android/prebuilt-versions.gradle +43 -0
  7. package/android/proguard-rules.pro +10 -0
  8. package/android/src/main/assets/testModels/add_mul_add.onnx +28 -0
  9. package/android/src/main/assets/testModels/nnapi_internal_uint8_support.onnx +0 -0
  10. package/android/src/main/assets/testModels/qnn_multi_ctx_embed.onnx +0 -0
  11. package/android/src/main/cpp/CMakeLists.txt +166 -129
  12. package/android/src/main/cpp/CMakePresets.json +54 -0
  13. package/android/src/main/cpp/crypto/sha256.cpp +174 -0
  14. package/android/src/main/cpp/crypto/sha256.h +16 -0
  15. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.cpp +404 -0
  16. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.h +56 -0
  17. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-jni.cpp +181 -0
  18. package/android/src/main/cpp/jni/audio/sherpa-onnx-audio-convert-jni.cpp +888 -0
  19. package/{ios → android/src/main/cpp/jni/model_detect}/sherpa-onnx-common.h +18 -18
  20. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.cpp +86 -0
  21. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.h +20 -0
  22. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.cpp +423 -0
  23. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.h +55 -0
  24. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-stt.cpp +399 -0
  25. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +238 -0
  26. package/{ios → android/src/main/cpp/jni/model_detect}/sherpa-onnx-model-detect.h +122 -89
  27. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.cpp +99 -0
  28. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.h +16 -0
  29. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.cpp +78 -0
  30. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.h +16 -0
  31. package/android/src/main/cpp/jni/module/sherpa-onnx-module-jni.cpp +190 -0
  32. package/android/src/main/cpp/jni/tts/sherpa-onnx-tts-zipvoice-jni.cpp +301 -0
  33. package/android/src/main/java/com/sherpaonnx/SherpaOnnxArchiveHelper.kt +94 -0
  34. package/android/src/main/java/com/sherpaonnx/{SherpaOnnxCoreHelper.kt → SherpaOnnxAssetHelper.kt} +350 -236
  35. package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +791 -483
  36. package/android/src/main/java/com/sherpaonnx/SherpaOnnxSttHelper.kt +699 -109
  37. package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +1123 -668
  38. package/android/src/main/java/com/sherpaonnx/ZipvoiceTtsWrapper.kt +187 -0
  39. package/ios/SherpaOnnx+Assets.h +11 -0
  40. package/ios/SherpaOnnx+Assets.mm +325 -0
  41. package/ios/SherpaOnnx+STT.mm +455 -118
  42. package/ios/SherpaOnnx+TTS.mm +1101 -712
  43. package/ios/SherpaOnnx.h +17 -6
  44. package/ios/SherpaOnnx.mm +206 -311
  45. package/ios/SherpaOnnx.xcconfig +19 -19
  46. package/ios/SherpaOnnxCoreMLHelper.swift +24 -0
  47. package/ios/archive/sherpa-onnx-archive-helper.h +21 -0
  48. package/ios/archive/sherpa-onnx-archive-helper.mm +296 -0
  49. package/ios/libarchive_darwin_config.h +153 -0
  50. package/{android/src/main/cpp/jni → ios/model_detect}/sherpa-onnx-common.h +18 -18
  51. package/ios/model_detect/sherpa-onnx-model-detect-helper.h +49 -0
  52. package/ios/model_detect/sherpa-onnx-model-detect-helper.mm +210 -0
  53. package/ios/model_detect/sherpa-onnx-model-detect-stt.mm +344 -0
  54. package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +201 -0
  55. package/{android/src/main/cpp/jni → ios/model_detect}/sherpa-onnx-model-detect.h +117 -89
  56. package/ios/scripts/patch-libarchive-includes.sh +61 -0
  57. package/ios/scripts/setup-ios-libarchive.sh +98 -0
  58. package/ios/stt/sherpa-onnx-stt-wrapper.h +129 -0
  59. package/ios/stt/sherpa-onnx-stt-wrapper.mm +523 -0
  60. package/ios/{sherpa-onnx-tts-wrapper.h → tts/sherpa-onnx-tts-wrapper.h} +90 -85
  61. package/ios/{sherpa-onnx-tts-wrapper.mm → tts/sherpa-onnx-tts-wrapper.mm} +376 -345
  62. package/lib/module/NativeSherpaOnnx.js +3 -0
  63. package/lib/module/NativeSherpaOnnx.js.map +1 -1
  64. package/lib/module/audio/index.js +22 -0
  65. package/lib/module/audio/index.js.map +1 -0
  66. package/lib/module/diarization/index.js +1 -1
  67. package/lib/module/diarization/index.js.map +1 -1
  68. package/lib/module/download/ModelDownloadManager.js +918 -0
  69. package/lib/module/download/ModelDownloadManager.js.map +1 -0
  70. package/lib/module/download/extractTarBz2.js +53 -0
  71. package/lib/module/download/extractTarBz2.js.map +1 -0
  72. package/lib/module/download/index.js +6 -0
  73. package/lib/module/download/index.js.map +1 -0
  74. package/lib/module/download/validation.js +178 -0
  75. package/lib/module/download/validation.js.map +1 -0
  76. package/lib/module/enhancement/index.js +1 -1
  77. package/lib/module/enhancement/index.js.map +1 -1
  78. package/lib/module/index.js +41 -3
  79. package/lib/module/index.js.map +1 -1
  80. package/lib/module/separation/index.js +1 -1
  81. package/lib/module/separation/index.js.map +1 -1
  82. package/lib/module/stt/index.js +127 -60
  83. package/lib/module/stt/index.js.map +1 -1
  84. package/lib/module/stt/sttModelLanguages.js +512 -0
  85. package/lib/module/stt/sttModelLanguages.js.map +1 -0
  86. package/lib/module/stt/types.js +53 -1
  87. package/lib/module/stt/types.js.map +1 -1
  88. package/lib/module/tts/index.js +216 -289
  89. package/lib/module/tts/index.js.map +1 -1
  90. package/lib/module/tts/types.js +86 -1
  91. package/lib/module/tts/types.js.map +1 -1
  92. package/lib/module/types.js.map +1 -1
  93. package/lib/module/utils.js +86 -73
  94. package/lib/module/utils.js.map +1 -1
  95. package/lib/module/vad/index.js +1 -1
  96. package/lib/module/vad/index.js.map +1 -1
  97. package/lib/typescript/src/NativeSherpaOnnx.d.ts +192 -38
  98. package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
  99. package/lib/typescript/src/audio/index.d.ts +13 -0
  100. package/lib/typescript/src/audio/index.d.ts.map +1 -0
  101. package/lib/typescript/src/diarization/index.d.ts +3 -2
  102. package/lib/typescript/src/diarization/index.d.ts.map +1 -1
  103. package/lib/typescript/src/download/ModelDownloadManager.d.ts +108 -0
  104. package/lib/typescript/src/download/ModelDownloadManager.d.ts.map +1 -0
  105. package/lib/typescript/src/download/extractTarBz2.d.ts +14 -0
  106. package/lib/typescript/src/download/extractTarBz2.d.ts.map +1 -0
  107. package/lib/typescript/src/download/index.d.ts +7 -0
  108. package/lib/typescript/src/download/index.d.ts.map +1 -0
  109. package/lib/typescript/src/download/validation.d.ts +57 -0
  110. package/lib/typescript/src/download/validation.d.ts.map +1 -0
  111. package/lib/typescript/src/enhancement/index.d.ts +3 -2
  112. package/lib/typescript/src/enhancement/index.d.ts.map +1 -1
  113. package/lib/typescript/src/index.d.ts +26 -2
  114. package/lib/typescript/src/index.d.ts.map +1 -1
  115. package/lib/typescript/src/separation/index.d.ts +3 -2
  116. package/lib/typescript/src/separation/index.d.ts.map +1 -1
  117. package/lib/typescript/src/stt/index.d.ts +31 -43
  118. package/lib/typescript/src/stt/index.d.ts.map +1 -1
  119. package/lib/typescript/src/stt/sttModelLanguages.d.ts +52 -0
  120. package/lib/typescript/src/stt/sttModelLanguages.d.ts.map +1 -0
  121. package/lib/typescript/src/stt/types.d.ts +196 -9
  122. package/lib/typescript/src/stt/types.d.ts.map +1 -1
  123. package/lib/typescript/src/tts/index.d.ts +25 -211
  124. package/lib/typescript/src/tts/index.d.ts.map +1 -1
  125. package/lib/typescript/src/tts/types.d.ts +148 -25
  126. package/lib/typescript/src/tts/types.d.ts.map +1 -1
  127. package/lib/typescript/src/types.d.ts +0 -32
  128. package/lib/typescript/src/types.d.ts.map +1 -1
  129. package/lib/typescript/src/utils.d.ts +28 -13
  130. package/lib/typescript/src/utils.d.ts.map +1 -1
  131. package/lib/typescript/src/vad/index.d.ts +3 -2
  132. package/lib/typescript/src/vad/index.d.ts.map +1 -1
  133. package/package.json +250 -222
  134. package/scripts/check-qnn-support.sh +78 -0
  135. package/scripts/setup-ios-framework.sh +379 -282
  136. package/src/NativeSherpaOnnx.ts +474 -251
  137. package/src/audio/index.ts +32 -0
  138. package/src/diarization/index.ts +4 -2
  139. package/src/download/ModelDownloadManager.ts +1325 -0
  140. package/src/download/extractTarBz2.ts +78 -0
  141. package/src/download/index.ts +43 -0
  142. package/src/download/validation.ts +279 -0
  143. package/src/enhancement/index.ts +4 -2
  144. package/src/index.tsx +78 -27
  145. package/src/separation/index.ts +4 -2
  146. package/src/stt/index.ts +249 -89
  147. package/src/stt/sttModelLanguages.ts +237 -0
  148. package/src/stt/types.ts +263 -9
  149. package/src/tts/index.ts +470 -458
  150. package/src/tts/types.ts +373 -218
  151. package/src/types.ts +0 -44
  152. package/src/utils.ts +145 -131
  153. package/src/vad/index.ts +4 -2
  154. package/third_party/ffmpeg_prebuilt/ANDROID_RELEASE_TAG +1 -0
  155. package/third_party/libarchive_prebuilt/ANDROID_RELEASE_TAG +1 -0
  156. package/third_party/libarchive_prebuilt/IOS_RELEASE_TAG +1 -0
  157. package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -0
  158. package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -0
  159. package/android/src/main/cpp/include/sherpa-onnx/c-api/c-api.h +0 -1918
  160. package/android/src/main/cpp/include/sherpa-onnx/c-api/cxx-api.h +0 -841
  161. package/android/src/main/cpp/jni/sherpa-onnx-model-detect.cpp +0 -541
  162. package/android/src/main/cpp/jni/sherpa-onnx-stt-jni.cpp +0 -336
  163. package/android/src/main/cpp/jni/sherpa-onnx-stt-wrapper.cpp +0 -222
  164. package/android/src/main/cpp/jni/sherpa-onnx-stt-wrapper.h +0 -68
  165. package/android/src/main/cpp/jni/sherpa-onnx-tts-jni.cpp +0 -823
  166. package/android/src/main/cpp/jni/sherpa-onnx-tts-wrapper.cpp +0 -387
  167. package/android/src/main/cpp/jni/sherpa-onnx-tts-wrapper.h +0 -147
  168. package/ios/Frameworks/sherpa_onnx.xcframework.zip +0 -0
  169. package/ios/include/sherpa-onnx/c-api/c-api.h +0 -1918
  170. package/ios/include/sherpa-onnx/c-api/cxx-api.h +0 -841
  171. package/ios/sherpa-onnx-model-detect.mm +0 -441
  172. package/ios/sherpa-onnx-stt-wrapper.h +0 -48
  173. package/ios/sherpa-onnx-stt-wrapper.mm +0 -201
  174. package/scripts/copy-headers.js +0 -184
  175. package/scripts/setup-assets.js +0 -323
@@ -1,483 +1,791 @@
1
- package com.sherpaonnx
2
-
3
- import com.facebook.react.bridge.ReactApplicationContext
4
- import com.facebook.react.bridge.Promise
5
- import com.facebook.react.bridge.ReadableArray
6
- import com.facebook.react.bridge.ReadableMap
7
- import com.facebook.react.bridge.Arguments
8
- import com.facebook.react.module.annotations.ReactModule
9
- import com.facebook.react.modules.core.DeviceEventManagerModule
10
-
11
- @ReactModule(name = SherpaOnnxModule.NAME)
12
- class SherpaOnnxModule(reactContext: ReactApplicationContext) :
13
- NativeSherpaOnnxSpec(reactContext) {
14
-
15
- init {
16
- System.loadLibrary("sherpaonnx")
17
- instance = this
18
- }
19
-
20
- private val coreHelper = SherpaOnnxCoreHelper(reactApplicationContext, NAME)
21
- private val sttHelper = SherpaOnnxSttHelper(
22
- object : SherpaOnnxSttHelper.NativeSttBridge {
23
- override fun nativeSttInitialize(
24
- modelDir: String,
25
- preferInt8: Boolean,
26
- hasPreferInt8: Boolean,
27
- modelType: String
28
- ): HashMap<String, Any>? {
29
- return Companion.nativeSttInitialize(modelDir, preferInt8, hasPreferInt8, modelType)
30
- }
31
-
32
- override fun nativeSttTranscribe(filePath: String): String {
33
- return Companion.nativeSttTranscribe(filePath)
34
- }
35
-
36
- override fun nativeSttRelease() {
37
- Companion.nativeSttRelease()
38
- }
39
- },
40
- NAME
41
- )
42
- private val ttsHelper = SherpaOnnxTtsHelper(
43
- reactApplicationContext,
44
- object : SherpaOnnxTtsHelper.NativeTtsBridge {
45
- override fun nativeTtsInitialize(
46
- modelDir: String,
47
- modelType: String,
48
- numThreads: Int,
49
- debug: Boolean,
50
- noiseScale: Double,
51
- noiseScaleW: Double,
52
- lengthScale: Double
53
- ): HashMap<String, Any>? {
54
- return Companion.nativeTtsInitialize(
55
- modelDir,
56
- modelType,
57
- numThreads,
58
- debug,
59
- noiseScale,
60
- noiseScaleW,
61
- lengthScale
62
- )
63
- }
64
-
65
- override fun nativeTtsGenerate(text: String, sid: Int, speed: Float): HashMap<String, Any>? {
66
- return Companion.nativeTtsGenerate(text, sid, speed)
67
- }
68
-
69
- override fun nativeTtsGenerateWithTimestamps(
70
- text: String,
71
- sid: Int,
72
- speed: Float
73
- ): HashMap<String, Any>? {
74
- return Companion.nativeTtsGenerateWithTimestamps(text, sid, speed)
75
- }
76
-
77
- override fun nativeTtsGenerateStream(text: String, sid: Int, speed: Float): Boolean {
78
- return Companion.nativeTtsGenerateStream(text, sid, speed)
79
- }
80
-
81
- override fun nativeTtsCancelStream() {
82
- Companion.nativeTtsCancelStream()
83
- }
84
-
85
- override fun nativeTtsGetSampleRate(): Int {
86
- return Companion.nativeTtsGetSampleRate()
87
- }
88
-
89
- override fun nativeTtsGetNumSpeakers(): Int {
90
- return Companion.nativeTtsGetNumSpeakers()
91
- }
92
-
93
- override fun nativeTtsRelease() {
94
- Companion.nativeTtsRelease()
95
- }
96
-
97
- override fun nativeTtsSaveToWavFile(samples: FloatArray, sampleRate: Int, filePath: String): Boolean {
98
- return Companion.nativeTtsSaveToWavFile(samples, sampleRate, filePath)
99
- }
100
- },
101
- ::emitTtsStreamChunk,
102
- ::emitTtsStreamError,
103
- ::emitTtsStreamEnd
104
- )
105
-
106
- override fun getName(): String {
107
- return NAME
108
- }
109
-
110
- /**
111
- * Test method to verify sherpa-onnx native library is loaded.
112
- * This is a minimal "Hello World" test for Phase 1.
113
- */
114
- override fun testSherpaInit(promise: Promise) {
115
- try {
116
- val result = nativeTestSherpaInit()
117
- promise.resolve(result)
118
- } catch (e: Exception) {
119
- promise.reject("INIT_ERROR", "Failed to test sherpa-onnx initialization", e)
120
- }
121
- }
122
-
123
- /**
124
- * Resolve model path based on configuration.
125
- * Handles asset paths, file system paths, and auto-detection.
126
- */
127
- override fun resolveModelPath(config: ReadableMap, promise: Promise) {
128
- coreHelper.resolveModelPath(config, promise)
129
- }
130
-
131
- /**
132
- * Resolve asset path - copy from assets to internal storage if needed
133
- * Preserves the directory structure from assets (e.g., test_wavs/ stays as test_wavs/)
134
- */
135
-
136
- /**
137
- * Initialize sherpa-onnx with model directory.
138
- */
139
- override fun initializeSherpaOnnx(
140
- modelDir: String,
141
- preferInt8: Boolean?,
142
- modelType: String?,
143
- promise: Promise
144
- ) {
145
- sttHelper.initializeSherpaOnnx(modelDir, preferInt8, modelType, promise)
146
- }
147
-
148
- /**
149
- * Release sherpa-onnx resources.
150
- */
151
- override fun unloadSherpaOnnx(promise: Promise) {
152
- sttHelper.unloadSherpaOnnx(promise)
153
- }
154
-
155
- // ==================== STT Methods ====================
156
-
157
- /**
158
- * Transcribe an audio file.
159
- */
160
- override fun transcribeFile(filePath: String, promise: Promise) {
161
- sttHelper.transcribeFile(filePath, promise)
162
- }
163
-
164
- // ==================== TTS Methods ====================
165
-
166
- /**
167
- * Initialize TTS with model directory.
168
- */
169
- override fun initializeTts(
170
- modelDir: String,
171
- modelType: String,
172
- numThreads: Double,
173
- debug: Boolean,
174
- noiseScale: Double?,
175
- noiseScaleW: Double?,
176
- lengthScale: Double?,
177
- promise: Promise
178
- ) {
179
- ttsHelper.initializeTts(
180
- modelDir,
181
- modelType,
182
- numThreads,
183
- debug,
184
- noiseScale,
185
- noiseScaleW,
186
- lengthScale,
187
- promise
188
- )
189
- }
190
-
191
- /**
192
- * Update TTS params by re-initializing with stored config.
193
- */
194
- override fun updateTtsParams(
195
- noiseScale: Double?,
196
- noiseScaleW: Double?,
197
- lengthScale: Double?,
198
- promise: Promise
199
- ) {
200
- ttsHelper.updateTtsParams(noiseScale, noiseScaleW, lengthScale, promise)
201
- }
202
-
203
- /**
204
- * Generate speech from text.
205
- */
206
- override fun generateTts(
207
- text: String,
208
- sid: Double,
209
- speed: Double,
210
- promise: Promise
211
- ) {
212
- ttsHelper.generateTts(text, sid, speed, promise)
213
- }
214
-
215
- /**
216
- * Generate speech with subtitle/timestamp metadata.
217
- */
218
- override fun generateTtsWithTimestamps(
219
- text: String,
220
- sid: Double,
221
- speed: Double,
222
- promise: Promise
223
- ) {
224
- ttsHelper.generateTtsWithTimestamps(text, sid, speed, promise)
225
- }
226
-
227
- /**
228
- * Generate speech in streaming mode (emits chunk events).
229
- */
230
- override fun generateTtsStream(
231
- text: String,
232
- sid: Double,
233
- speed: Double,
234
- promise: Promise
235
- ) {
236
- ttsHelper.generateTtsStream(text, sid, speed, promise)
237
- }
238
-
239
- /**
240
- * Cancel ongoing streaming TTS.
241
- */
242
- override fun cancelTtsStream(promise: Promise) {
243
- ttsHelper.cancelTtsStream(promise)
244
- }
245
-
246
- /**
247
- * Start PCM playback for streaming TTS.
248
- */
249
- override fun startTtsPcmPlayer(sampleRate: Double, channels: Double, promise: Promise) {
250
- ttsHelper.startTtsPcmPlayer(sampleRate, channels, promise)
251
- }
252
-
253
- /**
254
- * Write PCM samples to the streaming TTS player.
255
- */
256
- override fun writeTtsPcmChunk(samples: ReadableArray, promise: Promise) {
257
- ttsHelper.writeTtsPcmChunk(samples, promise)
258
- }
259
-
260
- /**
261
- * Stop PCM playback for streaming TTS.
262
- */
263
- override fun stopTtsPcmPlayer(promise: Promise) {
264
- ttsHelper.stopTtsPcmPlayer(promise)
265
- }
266
-
267
- private fun emitTtsStreamChunk(
268
- samples: FloatArray,
269
- sampleRate: Int,
270
- progress: Float,
271
- isFinal: Boolean
272
- ) {
273
- val eventEmitter = reactApplicationContext
274
- .getJSModule(DeviceEventManagerModule.RCTDeviceEventEmitter::class.java)
275
- val samplesArray = Arguments.createArray()
276
- for (sample in samples) {
277
- samplesArray.pushDouble(sample.toDouble())
278
- }
279
- val payload = Arguments.createMap()
280
- payload.putArray("samples", samplesArray)
281
- payload.putInt("sampleRate", sampleRate)
282
- payload.putDouble("progress", progress.toDouble())
283
- payload.putBoolean("isFinal", isFinal)
284
- eventEmitter.emit("ttsStreamChunk", payload)
285
- }
286
-
287
- private fun emitTtsStreamError(message: String) {
288
- val eventEmitter = reactApplicationContext
289
- .getJSModule(DeviceEventManagerModule.RCTDeviceEventEmitter::class.java)
290
- val payload = Arguments.createMap()
291
- payload.putString("message", message)
292
- eventEmitter.emit("ttsStreamError", payload)
293
- }
294
-
295
- private fun emitTtsStreamEnd(cancelled: Boolean) {
296
- val eventEmitter = reactApplicationContext
297
- .getJSModule(DeviceEventManagerModule.RCTDeviceEventEmitter::class.java)
298
- val payload = Arguments.createMap()
299
- payload.putBoolean("cancelled", cancelled)
300
- eventEmitter.emit("ttsStreamEnd", payload)
301
- }
302
-
303
- /**
304
- * Get TTS sample rate.
305
- */
306
- override fun getTtsSampleRate(promise: Promise) {
307
- ttsHelper.getTtsSampleRate(promise)
308
- }
309
-
310
- /**
311
- * Get number of speakers.
312
- */
313
- override fun getTtsNumSpeakers(promise: Promise) {
314
- ttsHelper.getTtsNumSpeakers(promise)
315
- }
316
-
317
- /**
318
- * Release TTS resources.
319
- */
320
- override fun unloadTts(promise: Promise) {
321
- ttsHelper.unloadTts(promise)
322
- }
323
-
324
- /**
325
- * Save TTS audio samples to a WAV file.
326
- */
327
- override fun saveTtsAudioToFile(
328
- samples: ReadableArray,
329
- sampleRate: Double,
330
- filePath: String,
331
- promise: Promise
332
- ) {
333
- ttsHelper.saveTtsAudioToFile(samples, sampleRate, filePath, promise)
334
- }
335
-
336
- /**
337
- * Save TTS audio samples to a WAV file via Android SAF content URI.
338
- */
339
- override fun saveTtsAudioToContentUri(
340
- samples: ReadableArray,
341
- sampleRate: Double,
342
- directoryUri: String,
343
- filename: String,
344
- promise: Promise
345
- ) {
346
- ttsHelper.saveTtsAudioToContentUri(samples, sampleRate, directoryUri, filename, promise)
347
- }
348
-
349
- /**
350
- * Save text content to a file via Android SAF content URI.
351
- */
352
- override fun saveTtsTextToContentUri(
353
- text: String,
354
- directoryUri: String,
355
- filename: String,
356
- mimeType: String,
357
- promise: Promise
358
- ) {
359
- ttsHelper.saveTtsTextToContentUri(text, directoryUri, filename, mimeType, promise)
360
- }
361
-
362
- /**
363
- * Copy a SAF content URI to a cache file for local playback.
364
- */
365
- override fun copyTtsContentUriToCache(
366
- fileUri: String,
367
- filename: String,
368
- promise: Promise
369
- ) {
370
- ttsHelper.copyTtsContentUriToCache(fileUri, filename, promise)
371
- }
372
-
373
- /**
374
- * Share a TTS audio file (file path or content URI).
375
- */
376
- override fun shareTtsAudio(fileUri: String, mimeType: String, promise: Promise) {
377
- ttsHelper.shareTtsAudio(fileUri, mimeType, promise)
378
- }
379
-
380
- /**
381
- * List all model folders in the assets/models directory.
382
- * Scans the platform-specific model directory and returns folder names.
383
- */
384
- override fun listAssetModels(promise: Promise) {
385
- coreHelper.listAssetModels(promise)
386
- }
387
- companion object {
388
- const val NAME = "SherpaOnnx"
389
-
390
- @Volatile
391
- private var instance: SherpaOnnxModule? = null
392
-
393
- @JvmStatic
394
- fun onTtsStreamChunk(
395
- samples: FloatArray,
396
- sampleRate: Int,
397
- progress: Float,
398
- isFinal: Boolean
399
- ) {
400
- instance?.emitTtsStreamChunk(samples, sampleRate, progress, isFinal)
401
- }
402
-
403
- @JvmStatic
404
- fun onTtsStreamError(message: String) {
405
- instance?.emitTtsStreamError(message)
406
- }
407
-
408
- @JvmStatic
409
- fun onTtsStreamEnd(cancelled: Boolean) {
410
- instance?.emitTtsStreamEnd(cancelled)
411
- }
412
-
413
- // Native JNI methods
414
- @JvmStatic
415
- private external fun nativeTestSherpaInit(): String
416
-
417
- @JvmStatic
418
- private external fun nativeSttInitialize(
419
- modelDir: String,
420
- preferInt8: Boolean,
421
- hasPreferInt8: Boolean,
422
- modelType: String
423
- ): HashMap<String, Any>?
424
-
425
- @JvmStatic
426
- private external fun nativeSttTranscribe(filePath: String): String
427
-
428
- @JvmStatic
429
- private external fun nativeSttRelease()
430
-
431
- // TTS Native JNI methods
432
- @JvmStatic
433
- private external fun nativeTtsInitialize(
434
- modelDir: String,
435
- modelType: String,
436
- numThreads: Int,
437
- debug: Boolean,
438
- noiseScale: Double,
439
- noiseScaleW: Double,
440
- lengthScale: Double
441
- ): java.util.HashMap<String, Any>?
442
-
443
- @JvmStatic
444
- private external fun nativeTtsGenerate(
445
- text: String,
446
- sid: Int,
447
- speed: Float
448
- ): java.util.HashMap<String, Any>?
449
-
450
- @JvmStatic
451
- private external fun nativeTtsGenerateWithTimestamps(
452
- text: String,
453
- sid: Int,
454
- speed: Float
455
- ): java.util.HashMap<String, Any>?
456
-
457
- @JvmStatic
458
- private external fun nativeTtsGenerateStream(
459
- text: String,
460
- sid: Int,
461
- speed: Float
462
- ): Boolean
463
-
464
- @JvmStatic
465
- private external fun nativeTtsCancelStream()
466
-
467
- @JvmStatic
468
- private external fun nativeTtsGetSampleRate(): Int
469
-
470
- @JvmStatic
471
- private external fun nativeTtsGetNumSpeakers(): Int
472
-
473
- @JvmStatic
474
- private external fun nativeTtsRelease()
475
-
476
- @JvmStatic
477
- private external fun nativeTtsSaveToWavFile(
478
- samples: FloatArray,
479
- sampleRate: Int,
480
- filePath: String
481
- ): Boolean
482
- }
483
- }
1
+ package com.sherpaonnx
2
+
3
+ import com.facebook.react.bridge.ReactApplicationContext
4
+ import com.facebook.react.bridge.Promise
5
+ import com.facebook.react.bridge.ReadableArray
6
+ import com.facebook.react.bridge.ReadableMap
7
+ import com.facebook.react.bridge.Arguments
8
+ import com.facebook.react.module.annotations.ReactModule
9
+ import com.facebook.react.modules.core.DeviceEventManagerModule
10
+ import com.facebook.fbreact.specs.NativeSherpaOnnxSpec
11
+
12
+ @ReactModule(name = SherpaOnnxModule.NAME)
13
+ class SherpaOnnxModule(reactContext: ReactApplicationContext) :
14
+ NativeSherpaOnnxSpec(reactContext) {
15
+
16
+ init {
17
+ // Load onnxruntime first so libsherpa-onnx-jni.so can resolve OrtGetApiBase.
18
+ // When the app adds com.xdcobra.sherpa:onnxruntime and uses pickFirst, this loads the AAR's version.
19
+ try {
20
+ System.loadLibrary("onnxruntime")
21
+ } catch (e: UnsatisfiedLinkError) {
22
+ android.util.Log.w(NAME, "onnxruntime not loaded (will use SDK copy if present): ${e.message}")
23
+ }
24
+ // Load sherpa-onnx JNI (from AAR; required for Kotlin API: OfflineRecognizer, OfflineTts, etc.)
25
+ try {
26
+ System.loadLibrary("sherpa-onnx-jni")
27
+ } catch (e: UnsatisfiedLinkError) {
28
+ throw RuntimeException("Failed to load sherpa-onnx-jni (from sherpa-onnx AAR): ${e.message}", e)
29
+ }
30
+ // Load sherpa-onnx C-API (from AAR; needed at runtime only if Zipvoice TTS is used).
31
+ // Non-fatal: if the .so is missing, Zipvoice init will fail with a clear error later.
32
+ try {
33
+ System.loadLibrary("sherpa-onnx-c-api")
34
+ } catch (e: UnsatisfiedLinkError) {
35
+ android.util.Log.w("SherpaOnnx", "sherpa-onnx-c-api not available — Zipvoice TTS will not work: ${e.message}")
36
+ }
37
+ // Then load our library (Archive, FFmpeg, model detection, Zipvoice JNI wrapper)
38
+ System.loadLibrary("sherpaonnx")
39
+ instance = this
40
+ }
41
+
42
+ private val assetHelper = SherpaOnnxAssetHelper(reactApplicationContext, NAME)
43
+ private val sttHelper = SherpaOnnxSttHelper(
44
+ reactApplicationContext,
45
+ { modelDir, preferInt8, hasPreferInt8, modelType, debug ->
46
+ Companion.nativeDetectSttModel(modelDir, preferInt8, hasPreferInt8, modelType, debug)
47
+ },
48
+ NAME
49
+ )
50
+ private val ttsHelper = SherpaOnnxTtsHelper(
51
+ reactApplicationContext,
52
+ { modelDir, modelType -> Companion.nativeDetectTtsModel(modelDir, modelType) },
53
+ { instanceId, samples, sampleRate, progress, isFinal -> emitTtsStreamChunk(instanceId, samples, sampleRate, progress, isFinal) },
54
+ { instanceId, message -> emitTtsStreamError(instanceId, message) },
55
+ { instanceId, cancelled -> emitTtsStreamEnd(instanceId, cancelled) }
56
+ )
57
+ private val archiveHelper = SherpaOnnxArchiveHelper()
58
+
59
+ override fun getName(): String {
60
+ return NAME
61
+ }
62
+
63
+ override fun onCatalystInstanceDestroy() {
64
+ super.onCatalystInstanceDestroy()
65
+ ttsHelper.shutdown()
66
+ }
67
+
68
+ /**
69
+ * Test method to verify sherpa-onnx native library is loaded.
70
+ * This is a minimal "Hello World" test for Phase 1.
71
+ */
72
+ override fun testSherpaInit(promise: Promise) {
73
+ try {
74
+ val result = nativeTestSherpaInit()
75
+ promise.resolve(result)
76
+ } catch (e: Exception) {
77
+ android.util.Log.e(NAME, "INIT_ERROR: Failed to test sherpa-onnx initialization", e)
78
+ promise.reject("INIT_ERROR", "Failed to test sherpa-onnx initialization", e)
79
+ }
80
+ }
81
+
82
+ /** Asset path for embedded QNN test model (ORT testdata: qnn_multi_ctx_embed). */
83
+ private val qnnTestModelAsset = "testModels/qnn_multi_ctx_embed.onnx"
84
+
85
+ /**
86
+ * QNN support (AccelerationSupport): providerCompiled, hasAccelerator (native HTP init), canInit (session test).
87
+ * If modelBase64 is not provided, uses embedded test model from assets for canInit (same pattern as NNAPI/XNNPACK).
88
+ */
89
+ override fun getQnnSupport(modelBase64: String?, promise: Promise) {
90
+ try {
91
+ val providers = ai.onnxruntime.OrtEnvironment.getAvailableProviders()
92
+ val providerCompiled = providers.any { it.name.contains("QNN", ignoreCase = true) }
93
+ val hasAccelerator = try { nativeCanInitQnnHtp() } catch (_: Exception) { false }
94
+ val modelSource = if (!modelBase64.isNullOrEmpty()) "user-provided modelBase64" else "embedded test model"
95
+ val modelBytes = when {
96
+ !modelBase64.isNullOrEmpty() -> try {
97
+ android.util.Base64.decode(modelBase64, android.util.Base64.DEFAULT)
98
+ } catch (_: Exception) { null }
99
+ else -> loadTestModelFromAssets(qnnTestModelAsset)
100
+ }
101
+ val canInit = providerCompiled && modelBytes != null && canReallyUseQnn(modelBytes)
102
+ val map = Arguments.createMap()
103
+ map.putBoolean("providerCompiled", providerCompiled)
104
+ map.putBoolean("hasAccelerator", hasAccelerator)
105
+ map.putBoolean("canInit", canInit)
106
+ android.util.Log.i(NAME, "QNN support: providerCompiled=$providerCompiled hasAccelerator=$hasAccelerator canInit=$canInit (canInit test: $modelSource)")
107
+ promise.resolve(map)
108
+ } catch (e: Exception) {
109
+ android.util.Log.e(NAME, "getQnnSupport failed", e)
110
+ promise.reject("QNN_SUPPORT_ERROR", "Failed to get QNN support: ${e.message}", e)
111
+ }
112
+ }
113
+
114
+ private fun canReallyUseQnn(modelBytes: ByteArray): Boolean {
115
+ if (modelBytes.isEmpty()) return false
116
+ return try {
117
+ ai.onnxruntime.OrtSession.SessionOptions().use { opts ->
118
+ opts.addQnn(emptyMap())
119
+ ai.onnxruntime.OrtEnvironment.getEnvironment().createSession(modelBytes, opts).use { }
120
+ }
121
+ true
122
+ } catch (_: Throwable) {
123
+ false
124
+ }
125
+ }
126
+
127
+ override fun getAvailableProviders(promise: Promise) {
128
+ try {
129
+ val providers = ai.onnxruntime.OrtEnvironment.getAvailableProviders()
130
+ val list = Arguments.createArray()
131
+ for (p in providers) {
132
+ list.pushString(p.name)
133
+ }
134
+ promise.resolve(list)
135
+ } catch (e: Exception) {
136
+ android.util.Log.e(NAME, "getAvailableProviders failed", e)
137
+ promise.reject("PROVIDERS_ERROR", "Failed to get available providers: ${e.message}", e)
138
+ }
139
+ }
140
+
141
+ /** Asset path for embedded NNAPI test model (ORT testdata: nnapi_internal_uint8_support). */
142
+ private val nnapiTestModelAsset = "testModels/nnapi_internal_uint8_support.onnx"
143
+
144
+ /**
145
+ * NNAPI support (AccelerationSupport): providerCompiled, hasAccelerator (native), canInit (session test).
146
+ * If modelBase64 is not provided, uses embedded test model from assets for canInit.
147
+ */
148
+ override fun getNnapiSupport(modelBase64: String?, promise: Promise) {
149
+ try {
150
+ val providers = ai.onnxruntime.OrtEnvironment.getAvailableProviders()
151
+ val providerCompiled = providers.any { it.name.contains("NNAPI", ignoreCase = true) }
152
+ val hasAccelerator = try { nativeHasNnapiAccelerator(android.os.Build.VERSION.SDK_INT) } catch (_: Exception) { false }
153
+ val modelSource = if (!modelBase64.isNullOrEmpty()) "user-provided modelBase64" else "embedded test model"
154
+ val modelBytes = when {
155
+ !modelBase64.isNullOrEmpty() -> try {
156
+ android.util.Base64.decode(modelBase64, android.util.Base64.DEFAULT)
157
+ } catch (_: Exception) { null }
158
+ else -> loadTestModelFromAssets(nnapiTestModelAsset)
159
+ }
160
+ val canInit = providerCompiled && modelBytes != null && canReallyUseNnapi(modelBytes)
161
+ val map = Arguments.createMap()
162
+ map.putBoolean("providerCompiled", providerCompiled)
163
+ map.putBoolean("hasAccelerator", hasAccelerator)
164
+ map.putBoolean("canInit", canInit)
165
+ android.util.Log.i(NAME, "NNAPI support: providerCompiled=$providerCompiled hasAccelerator=$hasAccelerator canInit=$canInit (canInit test: $modelSource)")
166
+ promise.resolve(map)
167
+ } catch (e: Exception) {
168
+ android.util.Log.e(NAME, "getNnapiSupport failed", e)
169
+ promise.reject("NNAPI_SUPPORT_ERROR", "Failed to get NNAPI support: ${e.message}", e)
170
+ }
171
+ }
172
+
173
+ private fun canReallyUseNnapi(modelBytes: ByteArray): Boolean {
174
+ if (modelBytes.isEmpty()) return false
175
+ return try {
176
+ ai.onnxruntime.OrtSession.SessionOptions().use { opts ->
177
+ opts.addNnapi()
178
+ ai.onnxruntime.OrtEnvironment.getEnvironment().createSession(modelBytes, opts).use { }
179
+ }
180
+ true
181
+ } catch (_: Throwable) {
182
+ false
183
+ }
184
+ }
185
+
186
+ /** Asset path for embedded XNNPACK test model (ORT testdata: add_mul_add). */
187
+ private val xnnpackTestModelAsset = "testModels/add_mul_add.onnx"
188
+
189
+ /**
190
+ * XNNPACK support (AccelerationSupport): providerCompiled, hasAccelerator = true when compiled (CPU-optimized), canInit (session test).
191
+ * If modelBase64 is not provided, uses embedded test model from assets for canInit.
192
+ */
193
+ override fun getXnnpackSupport(modelBase64: String?, promise: Promise) {
194
+ try {
195
+ val providers = ai.onnxruntime.OrtEnvironment.getAvailableProviders()
196
+ val providerCompiled = providers.any { it.name.contains("XNNPACK", ignoreCase = true) }
197
+ val modelSource = if (!modelBase64.isNullOrEmpty()) "user-provided modelBase64" else "embedded test model"
198
+ val modelBytes = when {
199
+ !modelBase64.isNullOrEmpty() -> try {
200
+ android.util.Base64.decode(modelBase64, android.util.Base64.DEFAULT)
201
+ } catch (_: Exception) { null }
202
+ else -> loadTestModelFromAssets(xnnpackTestModelAsset)
203
+ }
204
+ val canInit = providerCompiled && modelBytes != null && canReallyUseXnnpack(modelBytes)
205
+ val hasAccelerator = providerCompiled // XNNPACK: CPU-optimized
206
+ val map = Arguments.createMap()
207
+ map.putBoolean("providerCompiled", providerCompiled)
208
+ map.putBoolean("hasAccelerator", hasAccelerator)
209
+ map.putBoolean("canInit", canInit)
210
+ android.util.Log.i(NAME, "XNNPACK support: providerCompiled=$providerCompiled hasAccelerator=$hasAccelerator canInit=$canInit (canInit test: $modelSource)")
211
+ promise.resolve(map)
212
+ } catch (e: Exception) {
213
+ android.util.Log.e(NAME, "getXnnpackSupport failed", e)
214
+ promise.reject("XNNPACK_SUPPORT_ERROR", "Failed to get XNNPACK support: ${e.message}", e)
215
+ }
216
+ }
217
+
218
+ /**
219
+ * Load embedded ONNX test model from module assets (used for NNAPI/XNNPACK canInit when no modelBase64 is passed).
220
+ */
221
+ private fun loadTestModelFromAssets(assetPath: String): ByteArray? {
222
+ return try {
223
+ reactApplicationContext.assets.open(assetPath).use { it.readBytes() }
224
+ } catch (e: Exception) {
225
+ android.util.Log.w(NAME, "Could not load test model from assets: $assetPath", e)
226
+ null
227
+ }
228
+ }
229
+
230
+ private fun canReallyUseXnnpack(modelBytes: ByteArray): Boolean {
231
+ if (modelBytes.isEmpty()) return false
232
+ return try {
233
+ ai.onnxruntime.OrtSession.SessionOptions().use { opts ->
234
+ opts.addXnnpack(emptyMap())
235
+ ai.onnxruntime.OrtEnvironment.getEnvironment().createSession(modelBytes, opts).use { }
236
+ }
237
+ true
238
+ } catch (_: Throwable) {
239
+ false
240
+ }
241
+ }
242
+
243
+ /**
244
+ * Core ML support (AccelerationSupport). Android: always false (Core ML is iOS-only).
245
+ */
246
+ override fun getCoreMlSupport(modelBase64: String?, promise: Promise) {
247
+ try {
248
+ val map = Arguments.createMap()
249
+ map.putBoolean("providerCompiled", false)
250
+ map.putBoolean("hasAccelerator", false)
251
+ map.putBoolean("canInit", false)
252
+ promise.resolve(map)
253
+ } catch (e: Exception) {
254
+ android.util.Log.e(NAME, "getCoreMlSupport failed", e)
255
+ promise.reject("COREML_SUPPORT_ERROR", "Failed to get Core ML support: ${e.message}", e)
256
+ }
257
+ }
258
+
259
+ /**
260
+ * Resolve model path based on configuration.
261
+ * Handles asset paths, file system paths, and auto-detection.
262
+ */
263
+ override fun resolveModelPath(config: ReadableMap, promise: Promise) {
264
+ assetHelper.resolveModelPath(config, promise)
265
+ }
266
+
267
+ override fun extractTarBz2(sourcePath: String, targetPath: String, force: Boolean, promise: Promise) {
268
+ archiveHelper.extractTarBz2(sourcePath, targetPath, force, promise) { bytes, total, percent ->
269
+ emitExtractProgress(sourcePath, bytes, total, percent)
270
+ }
271
+ }
272
+
273
+ override fun cancelExtractTarBz2(promise: Promise) {
274
+ archiveHelper.cancelExtractTarBz2()
275
+ promise.resolve(null)
276
+ }
277
+
278
+ override fun computeFileSha256(filePath: String, promise: Promise) {
279
+ archiveHelper.computeFileSha256(filePath, promise)
280
+ }
281
+
282
+ private fun emitExtractProgress(sourcePath: String, bytes: Long, totalBytes: Long, percent: Double) {
283
+ val eventEmitter = reactApplicationContext
284
+ .getJSModule(DeviceEventManagerModule.RCTDeviceEventEmitter::class.java)
285
+ val payload = Arguments.createMap()
286
+ payload.putString("sourcePath", sourcePath)
287
+ payload.putDouble("bytes", bytes.toDouble())
288
+ payload.putDouble("totalBytes", totalBytes.toDouble())
289
+ payload.putDouble("percent", percent)
290
+ eventEmitter.emit("extractTarBz2Progress", payload)
291
+ }
292
+
293
+ /**
294
+ * Resolve asset path - copy from assets to internal storage if needed
295
+ * Preserves the directory structure from assets (e.g., test_wavs/ stays as test_wavs/)
296
+ */
297
+
298
+ /**
299
+ * Detect STT model type and structure without initializing the recognizer.
300
+ */
301
+ override fun detectSttModel(
302
+ modelDir: String,
303
+ preferInt8: Boolean?,
304
+ modelType: String?,
305
+ promise: Promise
306
+ ) {
307
+ try {
308
+ val result = Companion.nativeDetectSttModel(
309
+ modelDir,
310
+ preferInt8 ?: false,
311
+ preferInt8 != null,
312
+ modelType ?: "auto",
313
+ false
314
+ )
315
+ if (result == null) {
316
+ android.util.Log.e(NAME, "DETECT_ERROR: STT model detection returned null")
317
+ promise.reject("DETECT_ERROR", "STT model detection returned null")
318
+ return
319
+ }
320
+ val success = result["success"] as? Boolean ?: false
321
+ val detectedModels = result["detectedModels"] as? ArrayList<*>
322
+ ?: arrayListOf<HashMap<String, String>>()
323
+ val modelTypeStr = result["modelType"] as? String
324
+
325
+ val resultMap = Arguments.createMap()
326
+ resultMap.putBoolean("success", success)
327
+ val modelsArray = Arguments.createArray()
328
+ for (model in detectedModels) {
329
+ val modelMap = model as? HashMap<*, *>
330
+ if (modelMap != null) {
331
+ val entry = Arguments.createMap()
332
+ entry.putString("type", modelMap["type"] as? String ?: "")
333
+ entry.putString("modelDir", modelMap["modelDir"] as? String ?: "")
334
+ modelsArray.pushMap(entry)
335
+ }
336
+ }
337
+ resultMap.putArray("detectedModels", modelsArray)
338
+ if (modelTypeStr != null) {
339
+ resultMap.putString("modelType", modelTypeStr)
340
+ }
341
+ if (!success) {
342
+ val error = result["error"] as? String
343
+ if (!error.isNullOrBlank()) {
344
+ resultMap.putString("error", error)
345
+ }
346
+ }
347
+ promise.resolve(resultMap)
348
+ } catch (e: Exception) {
349
+ android.util.Log.e(NAME, "DETECT_ERROR: STT model detection failed: ${e.message}", e)
350
+ promise.reject("DETECT_ERROR", "STT model detection failed: ${e.message}", e)
351
+ }
352
+ }
353
+
354
+ /**
355
+ * Initialize Speech-to-Text (STT) with model directory.
356
+ */
357
+ override fun initializeStt(
358
+ instanceId: String,
359
+ modelDir: String,
360
+ preferInt8: Boolean?,
361
+ modelType: String?,
362
+ debug: Boolean?,
363
+ hotwordsFile: String?,
364
+ hotwordsScore: Double?,
365
+ numThreads: Double?,
366
+ provider: String?,
367
+ ruleFsts: String?,
368
+ ruleFars: String?,
369
+ dither: Double?,
370
+ modelOptions: ReadableMap?,
371
+ modelingUnit: String?,
372
+ bpeVocab: String?,
373
+ promise: Promise
374
+ ) {
375
+ sttHelper.initializeStt(instanceId, modelDir, preferInt8, modelType, debug, hotwordsFile, hotwordsScore, numThreads, provider, ruleFsts, ruleFars, dither, modelOptions, modelingUnit, bpeVocab, promise)
376
+ }
377
+
378
+ /**
379
+ * Release STT resources.
380
+ */
381
+ override fun unloadStt(instanceId: String, promise: Promise) {
382
+ sttHelper.unloadStt(instanceId, promise)
383
+ }
384
+
385
+ // ==================== STT Methods ====================
386
+
387
+ /**
388
+ * Transcribe an audio file. Returns full result (text, tokens, timestamps, lang, emotion, event, durations).
389
+ */
390
+ override fun transcribeFile(instanceId: String, filePath: String, promise: Promise) {
391
+ sttHelper.transcribeFile(instanceId, filePath, promise)
392
+ }
393
+
394
+ /**
395
+ * Transcribe from float PCM samples.
396
+ */
397
+ override fun transcribeSamples(instanceId: String, samples: ReadableArray, sampleRate: Double, promise: Promise) {
398
+ sttHelper.transcribeSamples(instanceId, samples, sampleRate.toInt(), promise)
399
+ }
400
+
401
+ /**
402
+ * Update recognizer config at runtime.
403
+ */
404
+ override fun setSttConfig(instanceId: String, options: ReadableMap, promise: Promise) {
405
+ sttHelper.setSttConfig(instanceId, options, promise)
406
+ }
407
+
408
+ /**
409
+ * Convert any supported audio file to a requested format using native FFmpeg prebuilts.
410
+ * For MP3, outputSampleRateHz can be 32000, 44100, or 48000; null/0 = 44100. WAV output is always 16 kHz mono.
411
+ * Resolves with null on success, rejects with an error message on failure.
412
+ */
413
+ override fun convertAudioToFormat(inputPath: String, outputPath: String, format: String, outputSampleRateHz: Double?, promise: Promise) {
414
+ try {
415
+ var rate = outputSampleRateHz?.toInt() ?: 0
416
+
417
+ if (rate < 0) {
418
+ android.util.Log.e(NAME, "CONVERT_ERROR: Invalid outputSampleRateHz: must be >= 0")
419
+ promise.reject("CONVERT_ERROR", "Invalid outputSampleRateHz: must be >= 0")
420
+ return
421
+ }
422
+
423
+ if (format.equals("mp3", ignoreCase = true)) {
424
+ val allowed = setOf(0, 32000, 44100, 48000)
425
+ if (!allowed.contains(rate)) {
426
+ android.util.Log.e(NAME, "CONVERT_ERROR: MP3 output sample rate invalid: $rate")
427
+ promise.reject("CONVERT_ERROR", "MP3 output sample rate must be one of 32000, 44100, 48000, or 0 (default). Received: $rate")
428
+ return
429
+ }
430
+ } else {
431
+ rate = rate.coerceIn(0, 48000)
432
+ }
433
+
434
+ val err = Companion.nativeConvertAudioToFormat(inputPath, outputPath, format, rate)
435
+ if (err.isEmpty()) {
436
+ promise.resolve(null)
437
+ } else {
438
+ android.util.Log.e(NAME, "CONVERT_ERROR: $err")
439
+ promise.reject("CONVERT_ERROR", err)
440
+ }
441
+ } catch (e: Exception) {
442
+ android.util.Log.e(NAME, "CONVERT_EXCEPTION: Failed to convert audio: ${e.message}", e)
443
+ promise.reject("CONVERT_EXCEPTION", "Failed to convert audio: ${e.message}", e)
444
+ }
445
+ }
446
+
447
+ /**
448
+ * Convert any supported audio file to WAV 16 kHz mono 16-bit PCM using native FFmpeg prebuilts.
449
+ * Resolves with null on success, rejects with an error message on failure.
450
+ */
451
+ override fun convertAudioToWav16k(inputPath: String, outputPath: String, promise: Promise) {
452
+ try {
453
+ val err = Companion.nativeConvertAudioToWav16k(inputPath, outputPath)
454
+ if (err.isEmpty()) {
455
+ promise.resolve(null)
456
+ } else {
457
+ android.util.Log.e(NAME, "CONVERT_ERROR: $err")
458
+ promise.reject("CONVERT_ERROR", err)
459
+ }
460
+ } catch (e: Exception) {
461
+ android.util.Log.e(NAME, "CONVERT_EXCEPTION: Failed to convert audio to WAV16k: ${e.message}", e)
462
+ promise.reject("CONVERT_EXCEPTION", "Failed to convert audio to WAV16k: ${e.message}", e)
463
+ }
464
+ }
465
+
466
+ // ==================== TTS Methods ====================
467
+
468
+ /**
469
+ * Initialize TTS with model directory.
470
+ */
471
+ override fun initializeTts(
472
+ instanceId: String,
473
+ modelDir: String,
474
+ modelType: String,
475
+ numThreads: Double,
476
+ debug: Boolean,
477
+ noiseScale: Double?,
478
+ noiseScaleW: Double?,
479
+ lengthScale: Double?,
480
+ ruleFsts: String?,
481
+ ruleFars: String?,
482
+ maxNumSentences: Double?,
483
+ silenceScale: Double?,
484
+ provider: String?,
485
+ promise: Promise
486
+ ) {
487
+ ttsHelper.initializeTts(
488
+ instanceId,
489
+ modelDir,
490
+ modelType,
491
+ numThreads,
492
+ debug,
493
+ noiseScale,
494
+ noiseScaleW,
495
+ lengthScale,
496
+ ruleFsts,
497
+ ruleFars,
498
+ maxNumSentences,
499
+ silenceScale,
500
+ provider,
501
+ promise
502
+ )
503
+ }
504
+
505
+ /**
506
+ * Detect TTS model type and structure without initializing the engine.
507
+ */
508
+ override fun detectTtsModel(modelDir: String, modelType: String?, promise: Promise) {
509
+ try {
510
+ val result = Companion.nativeDetectTtsModel(modelDir, modelType ?: "auto")
511
+ if (result == null) {
512
+ android.util.Log.e(NAME, "DETECT_ERROR: TTS model detection returned null")
513
+ promise.reject("DETECT_ERROR", "TTS model detection returned null")
514
+ return
515
+ }
516
+ val success = result["success"] as? Boolean ?: false
517
+ val detectedModels = result["detectedModels"] as? ArrayList<*>
518
+ ?: arrayListOf<HashMap<String, String>>()
519
+ val modelTypeStr = result["modelType"] as? String
520
+
521
+ val resultMap = Arguments.createMap()
522
+ resultMap.putBoolean("success", success)
523
+ val modelsArray = Arguments.createArray()
524
+ for (model in detectedModels) {
525
+ val modelMap = model as? HashMap<*, *>
526
+ if (modelMap != null) {
527
+ val entry = Arguments.createMap()
528
+ entry.putString("type", modelMap["type"] as? String ?: "")
529
+ entry.putString("modelDir", modelMap["modelDir"] as? String ?: "")
530
+ modelsArray.pushMap(entry)
531
+ }
532
+ }
533
+ resultMap.putArray("detectedModels", modelsArray)
534
+ if (modelTypeStr != null) {
535
+ resultMap.putString("modelType", modelTypeStr)
536
+ }
537
+ if (!success) {
538
+ val error = result["error"] as? String
539
+ if (!error.isNullOrBlank()) {
540
+ resultMap.putString("error", error)
541
+ }
542
+ }
543
+ promise.resolve(resultMap)
544
+ } catch (e: Exception) {
545
+ android.util.Log.e(NAME, "DETECT_ERROR: TTS model detection failed: ${e.message}", e)
546
+ promise.reject("DETECT_ERROR", "TTS model detection failed: ${e.message}", e)
547
+ }
548
+ }
549
+
550
+ /**
551
+ * Update TTS params by re-initializing with stored config.
552
+ */
553
+ override fun updateTtsParams(
554
+ instanceId: String,
555
+ noiseScale: Double?,
556
+ noiseScaleW: Double?,
557
+ lengthScale: Double?,
558
+ promise: Promise
559
+ ) {
560
+ ttsHelper.updateTtsParams(instanceId, noiseScale, noiseScaleW, lengthScale, promise)
561
+ }
562
+
563
+ /**
564
+ * Generate speech from text.
565
+ */
566
+ override fun generateTts(instanceId: String, text: String, options: ReadableMap?, promise: Promise) {
567
+ ttsHelper.generateTts(instanceId, text, options, promise)
568
+ }
569
+
570
+ /**
571
+ * Generate speech with subtitle/timestamp metadata.
572
+ */
573
+ override fun generateTtsWithTimestamps(instanceId: String, text: String, options: ReadableMap?, promise: Promise) {
574
+ ttsHelper.generateTtsWithTimestamps(instanceId, text, options, promise)
575
+ }
576
+
577
+ /**
578
+ * Generate speech in streaming mode (emits chunk events).
579
+ */
580
+ override fun generateTtsStream(instanceId: String, text: String, options: ReadableMap?, promise: Promise) {
581
+ ttsHelper.generateTtsStream(instanceId, text, options, promise)
582
+ }
583
+
584
+ /**
585
+ * Cancel ongoing streaming TTS.
586
+ */
587
+ override fun cancelTtsStream(instanceId: String, promise: Promise) {
588
+ ttsHelper.cancelTtsStream(instanceId, promise)
589
+ }
590
+
591
+ /**
592
+ * Start PCM playback for streaming TTS.
593
+ */
594
+ override fun startTtsPcmPlayer(instanceId: String, sampleRate: Double, channels: Double, promise: Promise) {
595
+ ttsHelper.startTtsPcmPlayer(instanceId, sampleRate, channels, promise)
596
+ }
597
+
598
+ /**
599
+ * Write PCM samples to the streaming TTS player.
600
+ */
601
+ override fun writeTtsPcmChunk(instanceId: String, samples: ReadableArray, promise: Promise) {
602
+ ttsHelper.writeTtsPcmChunk(instanceId, samples, promise)
603
+ }
604
+
605
+ /**
606
+ * Stop PCM playback for streaming TTS.
607
+ */
608
+ override fun stopTtsPcmPlayer(instanceId: String, promise: Promise) {
609
+ ttsHelper.stopTtsPcmPlayer(instanceId, promise)
610
+ }
611
+
612
+ private fun emitTtsStreamChunk(
613
+ instanceId: String,
614
+ samples: FloatArray,
615
+ sampleRate: Int,
616
+ progress: Float,
617
+ isFinal: Boolean
618
+ ) {
619
+ val eventEmitter = reactApplicationContext
620
+ .getJSModule(DeviceEventManagerModule.RCTDeviceEventEmitter::class.java)
621
+ val samplesArray = Arguments.createArray()
622
+ for (sample in samples) {
623
+ samplesArray.pushDouble(sample.toDouble())
624
+ }
625
+ val payload = Arguments.createMap()
626
+ payload.putString("instanceId", instanceId)
627
+ payload.putArray("samples", samplesArray)
628
+ payload.putInt("sampleRate", sampleRate)
629
+ payload.putDouble("progress", progress.toDouble())
630
+ payload.putBoolean("isFinal", isFinal)
631
+ eventEmitter.emit("ttsStreamChunk", payload)
632
+ }
633
+
634
+ private fun emitTtsStreamError(instanceId: String, message: String) {
635
+ val eventEmitter = reactApplicationContext
636
+ .getJSModule(DeviceEventManagerModule.RCTDeviceEventEmitter::class.java)
637
+ val payload = Arguments.createMap()
638
+ payload.putString("instanceId", instanceId)
639
+ payload.putString("message", message)
640
+ eventEmitter.emit("ttsStreamError", payload)
641
+ }
642
+
643
+ private fun emitTtsStreamEnd(instanceId: String, cancelled: Boolean) {
644
+ val eventEmitter = reactApplicationContext
645
+ .getJSModule(DeviceEventManagerModule.RCTDeviceEventEmitter::class.java)
646
+ val payload = Arguments.createMap()
647
+ payload.putString("instanceId", instanceId)
648
+ payload.putBoolean("cancelled", cancelled)
649
+ eventEmitter.emit("ttsStreamEnd", payload)
650
+ }
651
+
652
+ /**
653
+ * Get TTS sample rate.
654
+ */
655
+ override fun getTtsSampleRate(instanceId: String, promise: Promise) {
656
+ ttsHelper.getTtsSampleRate(instanceId, promise)
657
+ }
658
+
659
+ /**
660
+ * Get number of speakers.
661
+ */
662
+ override fun getTtsNumSpeakers(instanceId: String, promise: Promise) {
663
+ ttsHelper.getTtsNumSpeakers(instanceId, promise)
664
+ }
665
+
666
+ /**
667
+ * Release TTS resources.
668
+ */
669
+ override fun unloadTts(instanceId: String, promise: Promise) {
670
+ ttsHelper.unloadTts(instanceId, promise)
671
+ }
672
+
673
+ /**
674
+ * Save TTS audio samples to a WAV file.
675
+ */
676
+ override fun saveTtsAudioToFile(
677
+ samples: ReadableArray,
678
+ sampleRate: Double,
679
+ filePath: String,
680
+ promise: Promise
681
+ ) {
682
+ ttsHelper.saveTtsAudioToFile(samples, sampleRate, filePath, promise)
683
+ }
684
+
685
+ /**
686
+ * Save TTS audio samples to a WAV file via Android SAF content URI.
687
+ */
688
+ override fun saveTtsAudioToContentUri(
689
+ samples: ReadableArray,
690
+ sampleRate: Double,
691
+ directoryUri: String,
692
+ filename: String,
693
+ promise: Promise
694
+ ) {
695
+ ttsHelper.saveTtsAudioToContentUri(samples, sampleRate, directoryUri, filename, promise)
696
+ }
697
+
698
+ /**
699
+ * Save text content to a file via Android SAF content URI.
700
+ */
701
+ override fun saveTtsTextToContentUri(
702
+ text: String,
703
+ directoryUri: String,
704
+ filename: String,
705
+ mimeType: String,
706
+ promise: Promise
707
+ ) {
708
+ ttsHelper.saveTtsTextToContentUri(text, directoryUri, filename, mimeType, promise)
709
+ }
710
+
711
+ /**
712
+ * Copy a SAF content URI to a cache file for local playback.
713
+ */
714
+ override fun copyTtsContentUriToCache(
715
+ fileUri: String,
716
+ filename: String,
717
+ promise: Promise
718
+ ) {
719
+ ttsHelper.copyTtsContentUriToCache(fileUri, filename, promise)
720
+ }
721
+
722
+ /**
723
+ * Share a TTS audio file (file path or content URI).
724
+ */
725
+ override fun shareTtsAudio(fileUri: String, mimeType: String, promise: Promise) {
726
+ ttsHelper.shareTtsAudio(fileUri, mimeType, promise)
727
+ }
728
+
729
+ /**
730
+ * List all model folders in the assets/models directory.
731
+ * Scans the platform-specific model directory and returns folder names.
732
+ */
733
+ override fun listAssetModels(promise: Promise) {
734
+ assetHelper.listAssetModels(promise)
735
+ }
736
+
737
+ /**
738
+ * List model folders under a specific filesystem path.
739
+ */
740
+ override fun listModelsAtPath(path: String, recursive: Boolean, promise: Promise) {
741
+ assetHelper.listModelsAtPath(path, recursive, promise)
742
+ }
743
+
744
+ override fun getAssetPackPath(packName: String, promise: Promise) {
745
+ assetHelper.getAssetPackPath(packName, promise)
746
+ }
747
+
748
+ companion object {
749
+ const val NAME = "SherpaOnnx"
750
+
751
+ @Volatile
752
+ private var instance: SherpaOnnxModule? = null
753
+
754
+ // Native JNI methods
755
+ @JvmStatic
756
+ private external fun nativeTestSherpaInit(): String
757
+
758
+ /** True if QNN HTP backend can be initialized (QnnBackend_create + free). */
759
+ @JvmStatic
760
+ private external fun nativeCanInitQnnHtp(): Boolean
761
+
762
+ /** True if the device has an NNAPI accelerator (GPU/DSP). Android API 29+. */
763
+ @JvmStatic
764
+ private external fun nativeHasNnapiAccelerator(sdkInt: Int): Boolean
765
+
766
+ /** Model detection for STT: returns HashMap with success, error, detectedModels, modelType, paths (for Kotlin API config). */
767
+ @JvmStatic
768
+ private external fun nativeDetectSttModel(
769
+ modelDir: String,
770
+ preferInt8: Boolean,
771
+ hasPreferInt8: Boolean,
772
+ modelType: String,
773
+ debug: Boolean
774
+ ): HashMap<String, Any>?
775
+
776
+ /** Model detection for TTS: returns HashMap with success, error, detectedModels, modelType, paths (for Kotlin API config). */
777
+ @JvmStatic
778
+ private external fun nativeDetectTtsModel(modelDir: String, modelType: String): HashMap<String, Any>?
779
+
780
+ /** Convert arbitrary audio file to requested format (e.g. "mp3", "flac", "wav").
781
+ * outputSampleRateHz: for MP3 use 32000/44100/48000, 0 = default 44100. Ignored for WAV/FLAC.
782
+ * Returns empty string on success, or an error message otherwise. Requires FFmpeg prebuilts when called on Android.
783
+ */
784
+ @JvmStatic
785
+ private external fun nativeConvertAudioToFormat(inputPath: String, outputPath: String, format: String, outputSampleRateHz: Int): String
786
+
787
+ /** Convert any supported audio file to WAV 16 kHz mono 16-bit PCM. Returns empty string on success, error message otherwise. Requires FFmpeg prebuilts. */
788
+ @JvmStatic
789
+ private external fun nativeConvertAudioToWav16k(inputPath: String, outputPath: String): String
790
+ }
791
+ }