react-native-sherpa-onnx 0.3.6 → 0.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (222) hide show
  1. package/LICENSE +1 -0
  2. package/README.md +89 -21
  3. package/SherpaOnnx.podspec +3 -0
  4. package/THIRD_PARTY_LICENSES/README.md +62 -0
  5. package/THIRD_PARTY_LICENSES/ffmpeg.txt +502 -0
  6. package/THIRD_PARTY_LICENSES/libarchive.txt +65 -0
  7. package/THIRD_PARTY_LICENSES/nvidia_omla.txt +181 -0
  8. package/THIRD_PARTY_LICENSES/onnxruntime.txt +21 -0
  9. package/THIRD_PARTY_LICENSES/opus.txt +44 -0
  10. package/THIRD_PARTY_LICENSES/sherpa-onnx.txt +201 -0
  11. package/THIRD_PARTY_LICENSES/shine.txt +482 -0
  12. package/THIRD_PARTY_LICENSES/zstd.txt +30 -0
  13. package/android/build.gradle +7 -3
  14. package/android/prebuilt-download.gradle +344 -152
  15. package/android/prebuilt-versions.gradle +1 -1
  16. package/android/src/main/assets/model_licenses/asr-models-license-status.csv +409 -0
  17. package/android/src/main/assets/model_licenses/qnn-asr-models-license-status.csv +695 -0
  18. package/android/src/main/assets/model_licenses/tts-models-license-status.csv +596 -0
  19. package/android/src/main/cpp/CMakeLists.txt +28 -10
  20. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.cpp +2 -2
  21. package/android/src/main/cpp/jni/audio/sherpa-onnx-audio-convert-jni.cpp +268 -2
  22. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +6 -2
  23. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-tts.cpp +4 -2
  24. package/android/src/main/java/com/sherpaonnx/SherpaOnnxArchiveHelper.kt +40 -10
  25. package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +99 -0
  26. package/android/src/main/java/com/sherpaonnx/SherpaOnnxOnlineSttHelper.kt +4 -1
  27. package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +112 -97
  28. package/ios/Resources/model_licenses/asr-models-license-status.csv +409 -0
  29. package/ios/Resources/model_licenses/qnn-asr-models-license-status.csv +695 -0
  30. package/ios/Resources/model_licenses/tts-models-license-status.csv +596 -0
  31. package/ios/SherpaOnnx+OnlineSTT.mm +2 -0
  32. package/ios/SherpaOnnx+PcmLiveStream.mm +2 -29
  33. package/ios/SherpaOnnx+TTS.mm +178 -20
  34. package/ios/SherpaOnnx.mm +54 -0
  35. package/ios/SherpaOnnxAudioConvert.h +10 -0
  36. package/ios/SherpaOnnxAudioConvert.mm +257 -1
  37. package/ios/archive/sherpa-onnx-archive-helper.h +3 -0
  38. package/ios/archive/sherpa-onnx-archive-helper.mm +39 -6
  39. package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +13 -2
  40. package/ios/model_detect/sherpa-onnx-validate-tts.mm +4 -2
  41. package/ios/online_stt/sherpa-onnx-online-stt-wrapper.h +1 -0
  42. package/ios/online_stt/sherpa-onnx-online-stt-wrapper.mm +4 -0
  43. package/ios/tts/sherpa-onnx-tts-wrapper.h +37 -0
  44. package/ios/tts/sherpa-onnx-tts-wrapper.mm +149 -3
  45. package/lib/module/NativeSherpaOnnx.js.map +1 -1
  46. package/lib/module/audio/index.js +8 -0
  47. package/lib/module/audio/index.js.map +1 -1
  48. package/lib/module/download/ModelDownloadManager.js +10 -929
  49. package/lib/module/download/ModelDownloadManager.js.map +1 -1
  50. package/lib/module/download/activeModelOperations.js +26 -0
  51. package/lib/module/download/activeModelOperations.js.map +1 -0
  52. package/lib/module/download/background-downloader.d.js +2 -0
  53. package/lib/module/download/background-downloader.d.js.map +1 -0
  54. package/lib/module/download/bulkPurge.js +72 -0
  55. package/lib/module/download/bulkPurge.js.map +1 -0
  56. package/lib/module/download/checksumPrompt.js +19 -0
  57. package/lib/module/download/checksumPrompt.js.map +1 -0
  58. package/lib/module/download/constants.js +7 -0
  59. package/lib/module/download/constants.js.map +1 -0
  60. package/lib/module/download/downloadEvents.js +35 -0
  61. package/lib/module/download/downloadEvents.js.map +1 -0
  62. package/lib/module/download/downloadTask.js +385 -0
  63. package/lib/module/download/downloadTask.js.map +1 -0
  64. package/lib/module/download/ensureModel.js +89 -0
  65. package/lib/module/download/ensureModel.js.map +1 -0
  66. package/lib/module/download/index.js +4 -4
  67. package/lib/module/download/index.js.map +1 -1
  68. package/lib/module/download/localModels.js +151 -0
  69. package/lib/module/download/localModels.js.map +1 -0
  70. package/lib/module/download/modelExtraction.js +174 -0
  71. package/lib/module/download/modelExtraction.js.map +1 -0
  72. package/lib/module/download/paths.js +98 -0
  73. package/lib/module/download/paths.js.map +1 -0
  74. package/lib/module/download/postDownloadProcessing.js +206 -0
  75. package/lib/module/download/postDownloadProcessing.js.map +1 -0
  76. package/lib/module/download/protectedModelKeys.js +31 -0
  77. package/lib/module/download/protectedModelKeys.js.map +1 -0
  78. package/lib/module/download/registry.js +267 -0
  79. package/lib/module/download/registry.js.map +1 -0
  80. package/lib/module/download/retry.js +59 -0
  81. package/lib/module/download/retry.js.map +1 -0
  82. package/lib/module/download/types.js +17 -0
  83. package/lib/module/download/types.js.map +1 -0
  84. package/lib/module/download/validation.js +101 -5
  85. package/lib/module/download/validation.js.map +1 -1
  86. package/lib/module/{download → extraction}/extractTarBz2.js +3 -1
  87. package/lib/module/extraction/extractTarBz2.js.map +1 -0
  88. package/lib/module/{download → extraction}/extractTarZst.js +3 -1
  89. package/lib/module/extraction/extractTarZst.js.map +1 -0
  90. package/lib/module/extraction/index.js +3 -4
  91. package/lib/module/extraction/index.js.map +1 -1
  92. package/lib/module/index.js +1 -1
  93. package/lib/module/index.js.map +1 -1
  94. package/lib/module/licenses.js +63 -0
  95. package/lib/module/licenses.js.map +1 -0
  96. package/lib/module/stt/index.js +16 -2
  97. package/lib/module/stt/index.js.map +1 -1
  98. package/lib/module/stt/streaming.js +2 -0
  99. package/lib/module/stt/streaming.js.map +1 -1
  100. package/lib/module/stt/streamingTypes.js.map +1 -1
  101. package/lib/module/stt/types.js.map +1 -1
  102. package/lib/module/tts/index.js +20 -2
  103. package/lib/module/tts/index.js.map +1 -1
  104. package/lib/module/tts/streaming.js +4 -0
  105. package/lib/module/tts/streaming.js.map +1 -1
  106. package/lib/module/tts/types.js.map +1 -1
  107. package/lib/module/utils.js +16 -1
  108. package/lib/module/utils.js.map +1 -1
  109. package/lib/typescript/src/NativeSherpaOnnx.d.ts +33 -5
  110. package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
  111. package/lib/typescript/src/audio/index.d.ts +10 -0
  112. package/lib/typescript/src/audio/index.d.ts.map +1 -1
  113. package/lib/typescript/src/download/ModelDownloadManager.d.ts +10 -108
  114. package/lib/typescript/src/download/ModelDownloadManager.d.ts.map +1 -1
  115. package/lib/typescript/src/download/activeModelOperations.d.ts +6 -0
  116. package/lib/typescript/src/download/activeModelOperations.d.ts.map +1 -0
  117. package/lib/typescript/src/download/bulkPurge.d.ts +14 -0
  118. package/lib/typescript/src/download/bulkPurge.d.ts.map +1 -0
  119. package/lib/typescript/src/download/checksumPrompt.d.ts +3 -0
  120. package/lib/typescript/src/download/checksumPrompt.d.ts.map +1 -0
  121. package/lib/typescript/src/download/constants.d.ts +5 -0
  122. package/lib/typescript/src/download/constants.d.ts.map +1 -0
  123. package/lib/typescript/src/download/downloadEvents.d.ts +6 -0
  124. package/lib/typescript/src/download/downloadEvents.d.ts.map +1 -0
  125. package/lib/typescript/src/download/downloadTask.d.ts +20 -0
  126. package/lib/typescript/src/download/downloadTask.d.ts.map +1 -0
  127. package/lib/typescript/src/download/ensureModel.d.ts +26 -0
  128. package/lib/typescript/src/download/ensureModel.d.ts.map +1 -0
  129. package/lib/typescript/src/download/index.d.ts +7 -7
  130. package/lib/typescript/src/download/index.d.ts.map +1 -1
  131. package/lib/typescript/src/download/localModels.d.ts +15 -0
  132. package/lib/typescript/src/download/localModels.d.ts.map +1 -0
  133. package/lib/typescript/src/download/modelExtraction.d.ts +36 -0
  134. package/lib/typescript/src/download/modelExtraction.d.ts.map +1 -0
  135. package/lib/typescript/src/download/paths.d.ts +28 -0
  136. package/lib/typescript/src/download/paths.d.ts.map +1 -0
  137. package/lib/typescript/src/download/postDownloadProcessing.d.ts +19 -0
  138. package/lib/typescript/src/download/postDownloadProcessing.d.ts.map +1 -0
  139. package/lib/typescript/src/download/protectedModelKeys.d.ts +6 -0
  140. package/lib/typescript/src/download/protectedModelKeys.d.ts.map +1 -0
  141. package/lib/typescript/src/download/registry.d.ts +14 -0
  142. package/lib/typescript/src/download/registry.d.ts.map +1 -0
  143. package/lib/typescript/src/download/retry.d.ts +15 -0
  144. package/lib/typescript/src/download/retry.d.ts.map +1 -0
  145. package/lib/typescript/src/download/types.d.ts +96 -0
  146. package/lib/typescript/src/download/types.d.ts.map +1 -0
  147. package/lib/typescript/src/download/validation.d.ts +19 -0
  148. package/lib/typescript/src/download/validation.d.ts.map +1 -1
  149. package/lib/typescript/src/extraction/extractTarBz2.d.ts.map +1 -0
  150. package/lib/typescript/src/extraction/extractTarZst.d.ts.map +1 -0
  151. package/lib/typescript/src/index.d.ts +1 -0
  152. package/lib/typescript/src/index.d.ts.map +1 -1
  153. package/lib/typescript/src/licenses.d.ts +10 -0
  154. package/lib/typescript/src/licenses.d.ts.map +1 -0
  155. package/lib/typescript/src/stt/index.d.ts +4 -1
  156. package/lib/typescript/src/stt/index.d.ts.map +1 -1
  157. package/lib/typescript/src/stt/streaming.d.ts.map +1 -1
  158. package/lib/typescript/src/stt/streamingTypes.d.ts +5 -0
  159. package/lib/typescript/src/stt/streamingTypes.d.ts.map +1 -1
  160. package/lib/typescript/src/stt/types.d.ts +3 -1
  161. package/lib/typescript/src/stt/types.d.ts.map +1 -1
  162. package/lib/typescript/src/tts/index.d.ts +3 -1
  163. package/lib/typescript/src/tts/index.d.ts.map +1 -1
  164. package/lib/typescript/src/tts/streaming.d.ts.map +1 -1
  165. package/lib/typescript/src/tts/types.d.ts +6 -5
  166. package/lib/typescript/src/tts/types.d.ts.map +1 -1
  167. package/lib/typescript/src/utils.d.ts +5 -0
  168. package/lib/typescript/src/utils.d.ts.map +1 -1
  169. package/package.json +6 -1
  170. package/scripts/{check-model-csvs.sh → ci/check-model-csvs.sh} +9 -2
  171. package/scripts/ci/collect_all_sherpa_model_streams.sh +101 -0
  172. package/scripts/ci/collect_one_sherpa_release_stream.sh +189 -0
  173. package/scripts/ci/sherpa_asr_model_release_streams.json +21 -0
  174. package/scripts/ci/sherpa_tts_model_release_streams.json +13 -0
  175. package/scripts/ci/update_model_license_csv.sh +765 -0
  176. package/scripts/setup-ios-framework.sh +14 -11
  177. package/scripts/update_commercial_use.js +73 -0
  178. package/src/NativeSherpaOnnx.ts +36 -5
  179. package/src/audio/index.ts +20 -0
  180. package/src/download/ModelDownloadManager.ts +55 -1343
  181. package/src/download/activeModelOperations.ts +38 -0
  182. package/src/download/background-downloader.d.ts +43 -0
  183. package/src/download/bulkPurge.ts +102 -0
  184. package/src/download/checksumPrompt.ts +25 -0
  185. package/src/download/constants.ts +5 -0
  186. package/src/download/downloadEvents.ts +55 -0
  187. package/src/download/downloadTask.ts +497 -0
  188. package/src/download/ensureModel.ts +124 -0
  189. package/src/download/index.ts +19 -4
  190. package/src/download/localModels.ts +234 -0
  191. package/src/download/modelExtraction.ts +244 -0
  192. package/src/download/paths.ts +134 -0
  193. package/src/download/postDownloadProcessing.ts +292 -0
  194. package/src/download/protectedModelKeys.ts +30 -0
  195. package/src/download/registry.ts +404 -0
  196. package/src/download/retry.ts +76 -0
  197. package/src/download/types.ts +120 -0
  198. package/src/download/validation.ts +114 -8
  199. package/src/{download → extraction}/extractTarBz2.ts +3 -1
  200. package/src/{download → extraction}/extractTarZst.ts +3 -1
  201. package/src/extraction/index.ts +3 -7
  202. package/src/index.tsx +1 -0
  203. package/src/licenses.ts +100 -0
  204. package/src/stt/index.ts +20 -2
  205. package/src/stt/streaming.ts +3 -0
  206. package/src/stt/streamingTypes.ts +5 -0
  207. package/src/stt/types.ts +3 -1
  208. package/src/tts/index.ts +30 -2
  209. package/src/tts/streaming.ts +10 -0
  210. package/src/tts/types.ts +6 -5
  211. package/src/utils.ts +22 -1
  212. package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -1
  213. package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -1
  214. package/android/src/main/cpp/jni/tts/sherpa-onnx-tts-zipvoice-jni.cpp +0 -301
  215. package/android/src/main/java/com/sherpaonnx/ZipvoiceTtsWrapper.kt +0 -187
  216. package/lib/module/download/extractTarBz2.js.map +0 -1
  217. package/lib/module/download/extractTarZst.js.map +0 -1
  218. package/lib/typescript/src/download/extractTarBz2.d.ts.map +0 -1
  219. package/lib/typescript/src/download/extractTarZst.d.ts.map +0 -1
  220. package/scripts/check-qnn-support.sh +0 -78
  221. /package/lib/typescript/src/{download → extraction}/extractTarBz2.d.ts +0 -0
  222. /package/lib/typescript/src/{download → extraction}/extractTarZst.d.ts +0 -0
@@ -8,6 +8,7 @@ import com.facebook.react.bridge.ReadableMap
8
8
  import com.facebook.react.bridge.Arguments
9
9
  import com.facebook.react.module.annotations.ReactModule
10
10
  import com.facebook.react.modules.core.DeviceEventManagerModule
11
+ import com.k2fsa.sherpa.onnx.WaveReader
11
12
 
12
13
  @ReactModule(name = SherpaOnnxModule.NAME)
13
14
  class SherpaOnnxModule(reactContext: ReactApplicationContext) :
@@ -314,6 +315,11 @@ class SherpaOnnxModule(reactContext: ReactApplicationContext) :
314
315
  promise.resolve(null)
315
316
  }
316
317
 
318
+ override fun cancelExtractBySourcePath(sourcePath: String, promise: Promise) {
319
+ archiveHelper.cancelExtractBySourcePath(sourcePath)
320
+ promise.resolve(null)
321
+ }
322
+
317
323
  override fun computeFileSha256(filePath: String, promise: Promise) {
318
324
  archiveHelper.computeFileSha256(filePath, promise)
319
325
  }
@@ -452,6 +458,7 @@ class SherpaOnnxModule(reactContext: ReactApplicationContext) :
452
458
  val provider = if (options.hasKey("provider")) options.getString("provider") else null
453
459
  val ruleFsts = if (options.hasKey("ruleFsts")) options.getString("ruleFsts") else null
454
460
  val ruleFars = if (options.hasKey("ruleFars")) options.getString("ruleFars") else null
461
+ val dither = if (options.hasKey("dither")) options.getDouble("dither") else null
455
462
  val blankPenalty = if (options.hasKey("blankPenalty")) options.getDouble("blankPenalty") else null
456
463
  val debug = if (options.hasKey("debug")) options.getBoolean("debug") else null
457
464
  val rule1MustContainNonSilence = if (options.hasKey("rule1MustContainNonSilence")) options.getBoolean("rule1MustContainNonSilence") else null
@@ -476,6 +483,7 @@ class SherpaOnnxModule(reactContext: ReactApplicationContext) :
476
483
  provider,
477
484
  ruleFsts,
478
485
  ruleFars,
486
+ dither,
479
487
  blankPenalty,
480
488
  debug,
481
489
  rule1MustContainNonSilence,
@@ -712,6 +720,72 @@ class SherpaOnnxModule(reactContext: ReactApplicationContext) :
712
720
  }
713
721
  }
714
722
 
723
+ /**
724
+ * Decode audio to mono float samples (approx. [-1, 1]) and effective sample rate.
725
+ * Same path/URI handling as [convertAudioToFormat]. WAV may use [WaveReader] when no resample is requested.
726
+ */
727
+ override fun decodeAudioFileToFloatSamples(inputPath: String, targetSampleRateHz: Double?, promise: Promise) {
728
+ var tmpFile: java.io.File? = null
729
+ try {
730
+ val targetHz = (targetSampleRateHz ?: 0.0).toInt()
731
+ if (targetHz < 0) {
732
+ promise.reject("DECODE_ERROR", "targetSampleRateHz must be >= 0")
733
+ return
734
+ }
735
+ val (pathToUse, tmp) = resolveInputForConvert(inputPath)
736
+ tmpFile = tmp
737
+
738
+ if (pathToUse.endsWith(".wav", ignoreCase = true)) {
739
+ try {
740
+ val wave = WaveReader.readWave(pathToUse)
741
+ val s = wave.samples
742
+ if (s != null && s.isNotEmpty() && wave.sampleRate > 0 && (targetHz == 0 || targetHz == wave.sampleRate)) {
743
+ val map = Arguments.createMap()
744
+ val arr = Arguments.createArray()
745
+ for (i in s.indices) {
746
+ arr.pushDouble(s[i].toDouble())
747
+ }
748
+ map.putArray("samples", arr)
749
+ map.putInt("sampleRate", wave.sampleRate)
750
+ promise.resolve(map)
751
+ return
752
+ }
753
+ } catch (_: Throwable) {
754
+ // Fall through to FFmpeg/native path (e.g. odd WAV or resample requested).
755
+ }
756
+ }
757
+
758
+ val result = Companion.nativeDecodeAudioFileToFloatSamples(pathToUse, targetHz)
759
+ if (result.size == 1 && result[0] is String) {
760
+ promise.reject("DECODE_ERROR", result[0] as String)
761
+ return
762
+ }
763
+ if (result.size != 2 || result[0] !is FloatArray) {
764
+ promise.reject("DECODE_ERROR", "Unexpected native decode result")
765
+ return
766
+ }
767
+ val floats = result[0] as FloatArray
768
+ val rateObj = result.getOrNull(1) as? Number ?: run {
769
+ promise.reject("DECODE_ERROR", "Unexpected sample rate in native decode result")
770
+ return
771
+ }
772
+ val sr = rateObj.toInt()
773
+ val map = Arguments.createMap()
774
+ val arr = Arguments.createArray()
775
+ for (i in floats.indices) {
776
+ arr.pushDouble(floats[i].toDouble())
777
+ }
778
+ map.putArray("samples", arr)
779
+ map.putInt("sampleRate", sr)
780
+ promise.resolve(map)
781
+ } catch (e: Exception) {
782
+ android.util.Log.e(NAME, "DECODE_EXCEPTION: ${e.message}", e)
783
+ promise.reject("DECODE_EXCEPTION", e.message ?: "Failed to decode audio", e)
784
+ } finally {
785
+ tmpFile?.delete()
786
+ }
787
+ }
788
+
715
789
  // ==================== TTS Methods ====================
716
790
 
717
791
  /**
@@ -1057,6 +1131,25 @@ class SherpaOnnxModule(reactContext: ReactApplicationContext) :
1057
1131
  }
1058
1132
  }
1059
1133
 
1134
+ override fun readAssetFileAsUtf8(assetPath: String, promise: Promise) {
1135
+ // Validate assetPath to prevent path traversal: reject paths containing
1136
+ // "..", starting with "/" or "\", or containing backslashes.
1137
+ if (assetPath.contains("..") ||
1138
+ assetPath.startsWith("/") ||
1139
+ assetPath.startsWith("\\") ||
1140
+ assetPath.contains("\\")) {
1141
+ promise.reject("ASSET_READ_ERROR", "Invalid asset path: $assetPath")
1142
+ return
1143
+ }
1144
+ try {
1145
+ val content = reactApplicationContext.assets.open(assetPath).bufferedReader().use { it.readText() }
1146
+ promise.resolve(content)
1147
+ } catch (e: Exception) {
1148
+ android.util.Log.e(NAME, "Failed to read asset $assetPath: ${e.message}", e)
1149
+ promise.reject("ASSET_READ_ERROR", "Failed to read asset $assetPath: ${e.message}", e)
1150
+ }
1151
+ }
1152
+
1060
1153
  companion object {
1061
1154
  const val NAME = "SherpaOnnx"
1062
1155
 
@@ -1099,5 +1192,11 @@ class SherpaOnnxModule(reactContext: ReactApplicationContext) :
1099
1192
  /** Convert any supported audio file to WAV 16 kHz mono 16-bit PCM. Returns empty string on success, error message otherwise. Requires FFmpeg prebuilts. */
1100
1193
  @JvmStatic
1101
1194
  private external fun nativeConvertAudioToWav16k(inputPath: String, outputPath: String): String
1195
+
1196
+ /**
1197
+ * On success: [FloatArray samples, Integer sampleRate]. On error: [String message].
1198
+ */
1199
+ @JvmStatic
1200
+ private external fun nativeDecodeAudioFileToFloatSamples(inputPath: String, targetSampleRateHz: Int): Array<Any>
1102
1201
  }
1103
1202
  }
@@ -132,6 +132,7 @@ internal class SherpaOnnxOnlineSttHelper(
132
132
  provider: String?,
133
133
  ruleFsts: String?,
134
134
  ruleFars: String?,
135
+ dither: Float?,
135
136
  blankPenalty: Float?,
136
137
  debug: Boolean?,
137
138
  rule1MustContainNonSilence: Boolean?,
@@ -233,7 +234,7 @@ internal class SherpaOnnxOnlineSttHelper(
233
234
  }
234
235
 
235
236
  return OnlineRecognizerConfig(
236
- featConfig = FeatureConfig(sampleRate = 16000, featureDim = 80, dither = 0f),
237
+ featConfig = FeatureConfig(sampleRate = 16000, featureDim = 80, dither = dither ?: 0f),
237
238
  modelConfig = modelConfig,
238
239
  endpointConfig = endpointConfig,
239
240
  enableEndpoint = enableEndpoint,
@@ -260,6 +261,7 @@ internal class SherpaOnnxOnlineSttHelper(
260
261
  provider: String?,
261
262
  ruleFsts: String?,
262
263
  ruleFars: String?,
264
+ dither: Double?,
263
265
  blankPenalty: Double?,
264
266
  debug: Boolean?,
265
267
  rule1MustContainNonSilence: Boolean?,
@@ -286,6 +288,7 @@ internal class SherpaOnnxOnlineSttHelper(
286
288
  provider = provider,
287
289
  ruleFsts = ruleFsts,
288
290
  ruleFars = ruleFars,
291
+ dither = dither?.toFloat(),
289
292
  blankPenalty = blankPenalty?.toFloat(),
290
293
  debug = debug,
291
294
  rule1MustContainNonSilence = rule1MustContainNonSilence,
@@ -30,6 +30,7 @@ import com.k2fsa.sherpa.onnx.OfflineTtsVitsModelConfig
30
30
  import com.k2fsa.sherpa.onnx.OfflineTtsMatchaModelConfig
31
31
  import com.k2fsa.sherpa.onnx.OfflineTtsKokoroModelConfig
32
32
  import com.k2fsa.sherpa.onnx.OfflineTtsKittenModelConfig
33
+ import com.k2fsa.sherpa.onnx.OfflineTtsZipVoiceModelConfig
33
34
  import java.io.File
34
35
  import java.io.FileInputStream
35
36
  import java.io.FileOutputStream
@@ -64,8 +65,7 @@ internal class SherpaOnnxTtsHelper(
64
65
 
65
66
  private data class TtsEngineInstance(
66
67
  @Volatile var tts: OfflineTts? = null,
67
- @Volatile var zipvoiceTts: ZipvoiceTtsWrapper? = null,
68
- var ttsInitState: TtsInitState? = null,
68
+ @Volatile var ttsInitState: TtsInitState? = null,
69
69
  val ttsStreamRunning: AtomicBoolean = AtomicBoolean(false),
70
70
  val ttsStreamCancelled: AtomicBoolean = AtomicBoolean(false),
71
71
  var ttsStreamThread: Thread? = null,
@@ -73,15 +73,13 @@ internal class SherpaOnnxTtsHelper(
73
73
  ) {
74
74
  private val lock = Any()
75
75
 
76
- fun hasEngine(): Boolean = synchronized(lock) { tts != null || zipvoiceTts != null }
77
- val isZipvoice: Boolean get() = synchronized(lock) { zipvoiceTts != null }
76
+ fun hasEngine(): Boolean = synchronized(lock) { tts != null }
77
+ val isZipvoice: Boolean get() = ttsInitState?.modelType == "zipvoice"
78
78
  val isPocket: Boolean get() = ttsInitState?.modelType == "pocket"
79
79
  fun releaseEngines() {
80
80
  synchronized(lock) {
81
81
  tts?.release()
82
82
  tts = null
83
- zipvoiceTts?.release()
84
- zipvoiceTts = null
85
83
  ttsInitState = null
86
84
  }
87
85
  }
@@ -186,6 +184,13 @@ internal class SherpaOnnxTtsHelper(
186
184
  rejectOnUiThread(promise, "TTS_INIT_ERROR", msg)
187
185
  return@init
188
186
  }
187
+ val lexiconPath = path(paths, "lexicon")
188
+ if (lexiconPath.isBlank()) {
189
+ val msg = "Zipvoice requires lexicon.txt (or lexicon-<lang>.txt) in the model directory. The sherpa-onnx engine aborts if it is missing. Copy lexicon from the official k2-fsa sherpa-onnx Zipvoice model package or hr-files release next to tokens.txt."
190
+ Log.e("SherpaOnnxTts", "TTS_INIT_ERROR: $msg")
191
+ rejectOnUiThread(promise, "TTS_INIT_ERROR", msg)
192
+ return@init
193
+ }
189
194
  val am = context.applicationContext.getSystemService(Context.ACTIVITY_SERVICE) as? ActivityManager
190
195
  if (am != null) {
191
196
  val memInfo = ActivityManager.MemoryInfo()
@@ -206,34 +211,26 @@ internal class SherpaOnnxTtsHelper(
206
211
  Log.i("SherpaOnnxTts", "Zipvoice init: availMem=${memInfoBefore.availMem / (1024 * 1024)} MB (before load)")
207
212
  }
208
213
  val zipvoiceNumThreads = 1
209
- val wrapper = ZipvoiceTtsWrapper.create(
210
- tokens = path(paths, "tokens"),
211
- encoder = path(paths, "encoder"),
212
- decoder = path(paths, "decoder"),
213
- vocoder = vocoderPath,
214
- dataDir = path(paths, "dataDir"),
215
- lexicon = path(paths, "lexicon"),
216
- numThreads = zipvoiceNumThreads,
217
- debug = debug,
218
- ruleFsts = ruleFsts?.takeIf { it.isNotBlank() } ?: "",
219
- ruleFars = ruleFars?.takeIf { it.isNotBlank() } ?: "",
220
- maxNumSentences = maxNumSentences?.toInt()?.coerceAtLeast(1) ?: 1,
221
- silenceScale = silenceScale?.toFloat()?.coerceIn(0f, 10f) ?: 0.2f,
222
- provider = provider?.takeIf { it.isNotBlank() } ?: "cpu"
214
+ val config = buildTtsConfig(
215
+ paths, "zipvoice", zipvoiceNumThreads, debug,
216
+ noiseScale, noiseScaleW, lengthScale,
217
+ ruleFsts, ruleFars, maxNumSentences?.toInt(), silenceScale,
218
+ provider
223
219
  )
224
220
  if (am != null) {
225
221
  val memInfo = ActivityManager.MemoryInfo()
226
222
  am.getMemoryInfo(memInfo)
227
223
  Log.i("SherpaOnnxTts", "Zipvoice init: availMem=${memInfo.availMem / (1024 * 1024)} MB (after load)")
228
224
  }
229
- if (wrapper == null) {
230
- Log.e("SherpaOnnxTts", "TTS_INIT_ERROR: Failed to create Zipvoice TTS engine via C-API. Check logcat for details.")
231
- rejectOnUiThread(promise, "TTS_INIT_ERROR", "Failed to create Zipvoice TTS engine via C-API. Check logcat for details.")
225
+ try {
226
+ inst.tts = OfflineTts(config = config)
227
+ } catch (e: Exception) {
228
+ Log.e("SherpaOnnxTts", "TTS_INIT_ERROR: Failed to create Zipvoice OfflineTts: ${e.message}", e)
229
+ rejectOnUiThread(promise, "TTS_INIT_ERROR", "Failed to create Zipvoice TTS engine: ${e.message}", e)
232
230
  return@init
233
231
  }
234
- inst.zipvoiceTts = wrapper
235
- sampleRate = wrapper.sampleRate()
236
- numSpeakers = wrapper.numSpeakers()
232
+ sampleRate = inst.tts!!.sampleRate()
233
+ numSpeakers = inst.tts!!.numSpeakers()
237
234
  } else {
238
235
  val config = buildTtsConfig(
239
236
  paths, modelTypeStr, numThreads.toInt(), debug,
@@ -246,8 +243,6 @@ internal class SherpaOnnxTtsHelper(
246
243
  numSpeakers = inst.tts!!.numSpeakers()
247
244
  }
248
245
 
249
- Log.i("SherpaOnnxTts", "initializeTts: instanceId=$instanceId, engine=${if (inst.isZipvoice) "zipvoice-c-api" else "kotlin-api"}, sampleRate=$sampleRate, numSpeakers=$numSpeakers")
250
-
251
246
  val modelsArray = Arguments.createArray()
252
247
  detectedModels?.forEach { modelObj ->
253
248
  if (modelObj is HashMap<*, *>) {
@@ -273,6 +268,8 @@ internal class SherpaOnnxTtsHelper(
273
268
  provider?.takeIf { it.isNotBlank() }
274
269
  )
275
270
 
271
+ Log.i("SherpaOnnxTts", "initializeTts: instanceId=$instanceId, engine=kotlin-api modelType=$modelTypeStr, sampleRate=$sampleRate, numSpeakers=$numSpeakers")
272
+
276
273
  val resultMap = Arguments.createMap()
277
274
  resultMap.putBoolean("success", true)
278
275
  resultMap.putArray("detectedModels", modelsArray)
@@ -309,18 +306,6 @@ internal class SherpaOnnxTtsHelper(
309
306
  return
310
307
  }
311
308
 
312
- if (inst.isZipvoice) {
313
- initializeTts(
314
- instanceId,
315
- state.modelDir, state.modelType, state.numThreads.toDouble(), state.debug,
316
- noiseScale, noiseScaleW, lengthScale,
317
- state.ruleFsts, state.ruleFars, state.maxNumSentences?.toDouble(), state.silenceScale,
318
- state.provider,
319
- promise
320
- )
321
- return
322
- }
323
-
324
309
  val nextNoiseScale = when {
325
310
  noiseScale == null -> null
326
311
  noiseScale.isNaN() -> state.noiseScale
@@ -401,26 +386,35 @@ internal class SherpaOnnxTtsHelper(
401
386
  val sid = getSid(options)
402
387
  val speed = getSpeed(options)
403
388
  val audio = when {
404
- hasReferenceOptions(options) && inst.isZipvoice -> {
405
- val refAudio = options?.getArray("referenceAudio")
406
- ?: run {
407
- Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: referenceAudio required for Zipvoice voice cloning")
408
- promise.reject("TTS_GENERATE_ERROR", "referenceAudio required for Zipvoice voice cloning")
389
+ hasReferenceAudio(options) && (inst.isZipvoice || inst.isPocket) -> {
390
+ if (inst.isZipvoice) {
391
+ val promptText = options!!.getString("referenceText")?.trim().orEmpty()
392
+ if (promptText.isEmpty()) {
393
+ Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Zipvoice voice cloning requires non-empty referenceText")
394
+ promise.reject(
395
+ "TTS_GENERATE_ERROR",
396
+ "Zipvoice voice cloning requires non-empty referenceText (transcript of reference audio)."
397
+ )
409
398
  return
410
399
  }
411
- val promptSr = if (options.hasKey("referenceSampleRate")) options.getDouble("referenceSampleRate").toInt() else 0
412
- val promptText = options.getString("referenceText").orEmpty()
413
- val numSteps = if (options.hasKey("numSteps")) options.getDouble("numSteps").toInt() else 20
414
- val samples = FloatArray(refAudio.size()) { i -> refAudio.getDouble(i).toFloat() }
415
- inst.zipvoiceTts!!.generateWithZipvoice(text, promptText, samples, promptSr, speed, numSteps)
416
- }
417
- hasReferenceOptions(options) && inst.tts != null -> {
400
+ }
418
401
  val config = parseGenerationConfig(options) ?: GenerationConfig(speed = speed, sid = sid)
419
402
  inst.tts!!.generateWithConfig(text, config)
420
403
  }
421
- inst.isPocket && !hasReferenceOptions(options) -> {
404
+ hasReferenceAudio(options) -> {
405
+ Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Reference audio is not supported for this TTS model type")
406
+ promise.reject(
407
+ "TTS_GENERATE_ERROR",
408
+ "Reference audio is only supported for Zipvoice and Pocket TTS."
409
+ )
410
+ return
411
+ }
412
+ inst.isPocket -> {
422
413
  Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Pocket TTS requires reference audio for voice cloning")
423
- promise.reject("TTS_GENERATE_ERROR", "Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate in options.")
414
+ promise.reject(
415
+ "TTS_GENERATE_ERROR",
416
+ "Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate (> 0) in options."
417
+ )
424
418
  return
425
419
  }
426
420
  else -> dispatchGenerate(inst, text, sid, speed)
@@ -459,26 +453,35 @@ internal class SherpaOnnxTtsHelper(
459
453
  val sid = getSid(options)
460
454
  val speed = getSpeed(options)
461
455
  val audio = when {
462
- hasReferenceOptions(options) && inst.isZipvoice -> {
463
- val refAudio = options?.getArray("referenceAudio")
464
- ?: run {
465
- Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: referenceAudio required for Zipvoice voice cloning")
466
- promise.reject("TTS_GENERATE_ERROR", "referenceAudio required for Zipvoice voice cloning")
456
+ hasReferenceAudio(options) && (inst.isZipvoice || inst.isPocket) -> {
457
+ if (inst.isZipvoice) {
458
+ val promptText = options!!.getString("referenceText")?.trim().orEmpty()
459
+ if (promptText.isEmpty()) {
460
+ Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Zipvoice voice cloning requires non-empty referenceText")
461
+ promise.reject(
462
+ "TTS_GENERATE_ERROR",
463
+ "Zipvoice voice cloning requires non-empty referenceText (transcript of reference audio)."
464
+ )
467
465
  return
468
466
  }
469
- val promptSr = if (options.hasKey("referenceSampleRate")) options.getDouble("referenceSampleRate").toInt() else 0
470
- val promptText = options.getString("referenceText").orEmpty()
471
- val numSteps = if (options.hasKey("numSteps")) options.getDouble("numSteps").toInt() else 20
472
- val samples = FloatArray(refAudio.size()) { i -> refAudio.getDouble(i).toFloat() }
473
- inst.zipvoiceTts!!.generateWithZipvoice(text, promptText, samples, promptSr, speed, numSteps)
474
- }
475
- hasReferenceOptions(options) && inst.tts != null -> {
467
+ }
476
468
  val config = parseGenerationConfig(options) ?: GenerationConfig(speed = speed, sid = sid)
477
469
  inst.tts!!.generateWithConfig(text, config)
478
470
  }
479
- inst.isPocket && !hasReferenceOptions(options) -> {
471
+ hasReferenceAudio(options) -> {
472
+ Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Reference audio is not supported for this TTS model type")
473
+ promise.reject(
474
+ "TTS_GENERATE_ERROR",
475
+ "Reference audio is only supported for Zipvoice and Pocket TTS."
476
+ )
477
+ return
478
+ }
479
+ inst.isPocket -> {
480
480
  Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Pocket TTS requires reference audio for voice cloning")
481
- promise.reject("TTS_GENERATE_ERROR", "Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate in options.")
481
+ promise.reject(
482
+ "TTS_GENERATE_ERROR",
483
+ "Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate (> 0) in options."
484
+ )
482
485
  return
483
486
  }
484
487
  else -> dispatchGenerate(inst, text, sid, speed)
@@ -529,16 +532,27 @@ internal class SherpaOnnxTtsHelper(
529
532
  promise.reject("TTS_STREAM_ERROR", "TTS not initialized")
530
533
  return
531
534
  }
532
- if (inst.isPocket && !hasReferenceOptions(options)) {
535
+ if (inst.isPocket && !hasReferenceAudio(options)) {
533
536
  Log.e("SherpaOnnxTts", "TTS_STREAM_ERROR: Pocket TTS requires reference audio for voice cloning")
534
- promise.reject("TTS_STREAM_ERROR", "Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate in options.")
537
+ promise.reject(
538
+ "TTS_STREAM_ERROR",
539
+ "Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate (> 0) in options."
540
+ )
535
541
  return
536
542
  }
537
- if (hasReferenceOptions(options) && inst.isZipvoice) {
543
+ if (hasReferenceAudio(options) && inst.isZipvoice) {
538
544
  Log.e("SherpaOnnxTts", "TTS_STREAM_ERROR: Streaming with reference audio not supported for Zipvoice")
539
545
  promise.reject("TTS_STREAM_ERROR", "Streaming with reference audio not supported for Zipvoice")
540
546
  return
541
547
  }
548
+ if (hasReferenceAudio(options) && !inst.isPocket) {
549
+ Log.e("SherpaOnnxTts", "TTS_STREAM_ERROR: Reference audio streaming is only supported for Pocket TTS")
550
+ promise.reject(
551
+ "TTS_STREAM_ERROR",
552
+ "Reference audio streaming is only supported for Pocket TTS."
553
+ )
554
+ return
555
+ }
542
556
  val sid = getSid(options)
543
557
  val speed = getSpeed(options)
544
558
  inst.ttsStreamCancelled.set(false)
@@ -547,7 +561,7 @@ internal class SherpaOnnxTtsHelper(
547
561
  try {
548
562
  val sampleRate = dispatchSampleRate(inst)
549
563
  when {
550
- hasReferenceOptions(options) && inst.tts != null -> {
564
+ hasReferenceAudio(options) && inst.isPocket -> {
551
565
  val config = parseGenerationConfig(options) ?: GenerationConfig(speed = speed, sid = sid)
552
566
  inst.tts!!.generateWithConfigAndCallback(text, config) { chunk ->
553
567
  if (inst.ttsStreamCancelled.get()) return@generateWithConfigAndCallback 0
@@ -555,13 +569,6 @@ internal class SherpaOnnxTtsHelper(
555
569
  chunk.size
556
570
  }
557
571
  }
558
- inst.zipvoiceTts != null -> {
559
- inst.zipvoiceTts!!.generateWithCallback(text, sid, speed) { chunk ->
560
- if (inst.ttsStreamCancelled.get()) return@generateWithCallback 0
561
- emitChunk(instanceId, requestId, chunk, sampleRate, 0f, false)
562
- chunk.size
563
- }
564
- }
565
572
  else -> {
566
573
  inst.tts!!.generateWithCallback(text, sid, speed) { chunk ->
567
574
  if (inst.ttsStreamCancelled.get()) return@generateWithCallback 0
@@ -885,14 +892,21 @@ internal class SherpaOnnxTtsHelper(
885
892
 
886
893
  // -- Dual-engine dispatch helpers --
887
894
 
888
- /** True if options contain reference-audio fields for voice cloning. */
889
- private fun hasReferenceOptions(options: ReadableMap?): Boolean {
895
+ /**
896
+ * True when voice-cloning reference audio is present and valid for native use:
897
+ * non-empty [referenceAudio] array and [referenceSampleRate] > 0.
898
+ * [referenceText] alone does not enable cloning (matches sherpa-onnx behavior).
899
+ */
900
+ private fun hasReferenceAudio(options: ReadableMap?): Boolean {
890
901
  if (options == null) return false
891
- val refAudio = options.getArray("referenceAudio")
892
- val refText = options.getString("referenceText")
893
- return (refAudio != null && refAudio.size() > 0) || !refText.isNullOrEmpty()
902
+ val refAudio = options.getArray("referenceAudio") ?: return false
903
+ if (refAudio.size() == 0) return false
904
+ return readReferenceSampleRate(options) > 0
894
905
  }
895
906
 
907
+ private fun readReferenceSampleRate(options: ReadableMap): Int =
908
+ if (options.hasKey("referenceSampleRate")) options.getDouble("referenceSampleRate").toInt() else 0
909
+
896
910
  /** Parse sid and speed from options with defaults. */
897
911
  private fun getSid(options: ReadableMap?): Int =
898
912
  if (options != null && options.hasKey("sid")) options.getDouble("sid").toInt() else 0
@@ -936,18 +950,14 @@ internal class SherpaOnnxTtsHelper(
936
950
 
937
951
  /** Dispatch generate to whichever engine is active on the instance. Returns null if none loaded. */
938
952
  private fun dispatchGenerate(inst: TtsEngineInstance, text: String, sid: Int, speed: Float): GeneratedAudio? {
939
- inst.zipvoiceTts?.let { return it.generate(text, sid, speed) }
940
- inst.tts?.let { return it.generate(text, sid, speed) }
941
- return null
953
+ return inst.tts?.generate(text, sid, speed)
942
954
  }
943
955
 
944
956
  private fun dispatchSampleRate(inst: TtsEngineInstance): Int {
945
- inst.zipvoiceTts?.let { return it.sampleRate() }
946
957
  return inst.tts?.sampleRate() ?: 0
947
958
  }
948
959
 
949
960
  private fun dispatchNumSpeakers(inst: TtsEngineInstance): Int {
950
- inst.zipvoiceTts?.let { return it.numSpeakers() }
951
961
  return inst.tts?.numSpeakers() ?: 0
952
962
  }
953
963
 
@@ -1039,14 +1049,19 @@ internal class SherpaOnnxTtsHelper(
1039
1049
  debug = debug,
1040
1050
  provider = prov
1041
1051
  )
1042
- "zipvoice" -> {
1043
- // Zipvoice is handled by ZipvoiceTtsWrapper (C-API), not OfflineTts (Kotlin API).
1044
- // This branch should not be reached because initializeTts/updateTtsParams handle
1045
- // the "zipvoice" case before calling buildTtsConfig.
1046
- throw IllegalStateException(
1047
- "buildTtsConfig should not be called for zipvoice models. Use ZipvoiceTtsWrapper instead."
1048
- )
1049
- }
1052
+ "zipvoice" -> OfflineTtsModelConfig(
1053
+ zipvoice = OfflineTtsZipVoiceModelConfig(
1054
+ tokens = path(paths, "tokens"),
1055
+ encoder = path(paths, "encoder"),
1056
+ decoder = path(paths, "decoder"),
1057
+ vocoder = path(paths, "vocoder"),
1058
+ dataDir = path(paths, "dataDir"),
1059
+ lexicon = path(paths, "lexicon")
1060
+ ),
1061
+ numThreads = numThreads,
1062
+ debug = debug,
1063
+ provider = prov
1064
+ )
1050
1065
  else -> {
1051
1066
  if (path(paths, "acousticModel").isNotEmpty()) {
1052
1067
  OfflineTtsModelConfig(