react-native-sherpa-onnx 0.3.5 → 0.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (232) hide show
  1. package/LICENSE +1 -0
  2. package/README.md +90 -21
  3. package/SherpaOnnx.podspec +3 -0
  4. package/THIRD_PARTY_LICENSES/README.md +62 -0
  5. package/THIRD_PARTY_LICENSES/ffmpeg.txt +502 -0
  6. package/THIRD_PARTY_LICENSES/libarchive.txt +65 -0
  7. package/THIRD_PARTY_LICENSES/nvidia_omla.txt +181 -0
  8. package/THIRD_PARTY_LICENSES/onnxruntime.txt +21 -0
  9. package/THIRD_PARTY_LICENSES/opus.txt +44 -0
  10. package/THIRD_PARTY_LICENSES/sherpa-onnx.txt +201 -0
  11. package/THIRD_PARTY_LICENSES/shine.txt +482 -0
  12. package/THIRD_PARTY_LICENSES/zstd.txt +30 -0
  13. package/android/build.gradle +7 -3
  14. package/android/prebuilt-download.gradle +345 -153
  15. package/android/prebuilt-versions.gradle +2 -2
  16. package/android/src/main/assets/model_licenses/asr-models-license-status.csv +409 -0
  17. package/android/src/main/assets/model_licenses/qnn-asr-models-license-status.csv +695 -0
  18. package/android/src/main/assets/model_licenses/tts-models-license-status.csv +596 -0
  19. package/android/src/main/cpp/CMakeLists.txt +28 -10
  20. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.cpp +306 -6
  21. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.h +33 -4
  22. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-jni.cpp +266 -7
  23. package/android/src/main/cpp/jni/audio/sherpa-onnx-audio-convert-jni.cpp +268 -2
  24. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +6 -2
  25. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-tts.cpp +4 -2
  26. package/android/src/main/java/com/sherpaonnx/SherpaOnnxArchiveHelper.kt +137 -7
  27. package/android/src/main/java/com/sherpaonnx/SherpaOnnxAssetHelper.kt +51 -6
  28. package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +159 -0
  29. package/android/src/main/java/com/sherpaonnx/SherpaOnnxOnlineSttHelper.kt +4 -1
  30. package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +112 -97
  31. package/ios/Resources/model_licenses/asr-models-license-status.csv +409 -0
  32. package/ios/Resources/model_licenses/qnn-asr-models-license-status.csv +695 -0
  33. package/ios/Resources/model_licenses/tts-models-license-status.csv +596 -0
  34. package/ios/SherpaOnnx+OnlineSTT.mm +2 -0
  35. package/ios/SherpaOnnx+PcmLiveStream.mm +2 -29
  36. package/ios/SherpaOnnx+TTS.mm +178 -20
  37. package/ios/SherpaOnnx.mm +108 -1
  38. package/ios/SherpaOnnxAudioConvert.h +10 -0
  39. package/ios/SherpaOnnxAudioConvert.mm +257 -1
  40. package/ios/archive/sherpa-onnx-archive-helper.h +10 -0
  41. package/ios/archive/sherpa-onnx-archive-helper.mm +56 -5
  42. package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +13 -2
  43. package/ios/model_detect/sherpa-onnx-validate-tts.mm +4 -2
  44. package/ios/online_stt/sherpa-onnx-online-stt-wrapper.h +1 -0
  45. package/ios/online_stt/sherpa-onnx-online-stt-wrapper.mm +4 -0
  46. package/ios/tts/sherpa-onnx-tts-wrapper.h +37 -0
  47. package/ios/tts/sherpa-onnx-tts-wrapper.mm +149 -3
  48. package/lib/module/NativeSherpaOnnx.js.map +1 -1
  49. package/lib/module/audio/index.js +8 -0
  50. package/lib/module/audio/index.js.map +1 -1
  51. package/lib/module/download/ModelDownloadManager.js +10 -929
  52. package/lib/module/download/ModelDownloadManager.js.map +1 -1
  53. package/lib/module/download/activeModelOperations.js +26 -0
  54. package/lib/module/download/activeModelOperations.js.map +1 -0
  55. package/lib/module/download/background-downloader.d.js +2 -0
  56. package/lib/module/download/background-downloader.d.js.map +1 -0
  57. package/lib/module/download/bulkPurge.js +72 -0
  58. package/lib/module/download/bulkPurge.js.map +1 -0
  59. package/lib/module/download/checksumPrompt.js +19 -0
  60. package/lib/module/download/checksumPrompt.js.map +1 -0
  61. package/lib/module/download/constants.js +7 -0
  62. package/lib/module/download/constants.js.map +1 -0
  63. package/lib/module/download/downloadEvents.js +35 -0
  64. package/lib/module/download/downloadEvents.js.map +1 -0
  65. package/lib/module/download/downloadTask.js +385 -0
  66. package/lib/module/download/downloadTask.js.map +1 -0
  67. package/lib/module/download/ensureModel.js +89 -0
  68. package/lib/module/download/ensureModel.js.map +1 -0
  69. package/lib/module/download/index.js +4 -3
  70. package/lib/module/download/index.js.map +1 -1
  71. package/lib/module/download/localModels.js +151 -0
  72. package/lib/module/download/localModels.js.map +1 -0
  73. package/lib/module/download/modelExtraction.js +174 -0
  74. package/lib/module/download/modelExtraction.js.map +1 -0
  75. package/lib/module/download/paths.js +98 -0
  76. package/lib/module/download/paths.js.map +1 -0
  77. package/lib/module/download/postDownloadProcessing.js +206 -0
  78. package/lib/module/download/postDownloadProcessing.js.map +1 -0
  79. package/lib/module/download/protectedModelKeys.js +31 -0
  80. package/lib/module/download/protectedModelKeys.js.map +1 -0
  81. package/lib/module/download/registry.js +267 -0
  82. package/lib/module/download/registry.js.map +1 -0
  83. package/lib/module/download/retry.js +59 -0
  84. package/lib/module/download/retry.js.map +1 -0
  85. package/lib/module/download/types.js +17 -0
  86. package/lib/module/download/types.js.map +1 -0
  87. package/lib/module/download/validation.js +101 -5
  88. package/lib/module/download/validation.js.map +1 -1
  89. package/lib/module/{download → extraction}/extractTarBz2.js +3 -1
  90. package/lib/module/extraction/extractTarBz2.js.map +1 -0
  91. package/lib/module/extraction/extractTarZst.js +54 -0
  92. package/lib/module/extraction/extractTarZst.js.map +1 -0
  93. package/lib/module/extraction/index.js +190 -0
  94. package/lib/module/extraction/index.js.map +1 -0
  95. package/lib/module/extraction/types.js +2 -0
  96. package/lib/module/extraction/types.js.map +1 -0
  97. package/lib/module/index.js +2 -1
  98. package/lib/module/index.js.map +1 -1
  99. package/lib/module/licenses.js +63 -0
  100. package/lib/module/licenses.js.map +1 -0
  101. package/lib/module/stt/index.js +16 -2
  102. package/lib/module/stt/index.js.map +1 -1
  103. package/lib/module/stt/streaming.js +2 -0
  104. package/lib/module/stt/streaming.js.map +1 -1
  105. package/lib/module/stt/streamingTypes.js.map +1 -1
  106. package/lib/module/stt/types.js.map +1 -1
  107. package/lib/module/tts/index.js +20 -2
  108. package/lib/module/tts/index.js.map +1 -1
  109. package/lib/module/tts/streaming.js +4 -0
  110. package/lib/module/tts/streaming.js.map +1 -1
  111. package/lib/module/tts/types.js.map +1 -1
  112. package/lib/module/utils.js +16 -1
  113. package/lib/module/utils.js.map +1 -1
  114. package/lib/typescript/src/NativeSherpaOnnx.d.ts +72 -5
  115. package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
  116. package/lib/typescript/src/audio/index.d.ts +10 -0
  117. package/lib/typescript/src/audio/index.d.ts.map +1 -1
  118. package/lib/typescript/src/download/ModelDownloadManager.d.ts +10 -108
  119. package/lib/typescript/src/download/ModelDownloadManager.d.ts.map +1 -1
  120. package/lib/typescript/src/download/activeModelOperations.d.ts +6 -0
  121. package/lib/typescript/src/download/activeModelOperations.d.ts.map +1 -0
  122. package/lib/typescript/src/download/bulkPurge.d.ts +14 -0
  123. package/lib/typescript/src/download/bulkPurge.d.ts.map +1 -0
  124. package/lib/typescript/src/download/checksumPrompt.d.ts +3 -0
  125. package/lib/typescript/src/download/checksumPrompt.d.ts.map +1 -0
  126. package/lib/typescript/src/download/constants.d.ts +5 -0
  127. package/lib/typescript/src/download/constants.d.ts.map +1 -0
  128. package/lib/typescript/src/download/downloadEvents.d.ts +6 -0
  129. package/lib/typescript/src/download/downloadEvents.d.ts.map +1 -0
  130. package/lib/typescript/src/download/downloadTask.d.ts +20 -0
  131. package/lib/typescript/src/download/downloadTask.d.ts.map +1 -0
  132. package/lib/typescript/src/download/ensureModel.d.ts +26 -0
  133. package/lib/typescript/src/download/ensureModel.d.ts.map +1 -0
  134. package/lib/typescript/src/download/index.d.ts +7 -5
  135. package/lib/typescript/src/download/index.d.ts.map +1 -1
  136. package/lib/typescript/src/download/localModels.d.ts +15 -0
  137. package/lib/typescript/src/download/localModels.d.ts.map +1 -0
  138. package/lib/typescript/src/download/modelExtraction.d.ts +36 -0
  139. package/lib/typescript/src/download/modelExtraction.d.ts.map +1 -0
  140. package/lib/typescript/src/download/paths.d.ts +28 -0
  141. package/lib/typescript/src/download/paths.d.ts.map +1 -0
  142. package/lib/typescript/src/download/postDownloadProcessing.d.ts +19 -0
  143. package/lib/typescript/src/download/postDownloadProcessing.d.ts.map +1 -0
  144. package/lib/typescript/src/download/protectedModelKeys.d.ts +6 -0
  145. package/lib/typescript/src/download/protectedModelKeys.d.ts.map +1 -0
  146. package/lib/typescript/src/download/registry.d.ts +14 -0
  147. package/lib/typescript/src/download/registry.d.ts.map +1 -0
  148. package/lib/typescript/src/download/retry.d.ts +15 -0
  149. package/lib/typescript/src/download/retry.d.ts.map +1 -0
  150. package/lib/typescript/src/download/types.d.ts +96 -0
  151. package/lib/typescript/src/download/types.d.ts.map +1 -0
  152. package/lib/typescript/src/download/validation.d.ts +19 -0
  153. package/lib/typescript/src/download/validation.d.ts.map +1 -1
  154. package/lib/typescript/src/extraction/extractTarBz2.d.ts.map +1 -0
  155. package/lib/typescript/src/extraction/extractTarZst.d.ts +14 -0
  156. package/lib/typescript/src/extraction/extractTarZst.d.ts.map +1 -0
  157. package/lib/typescript/src/extraction/index.d.ts +50 -0
  158. package/lib/typescript/src/extraction/index.d.ts.map +1 -0
  159. package/lib/typescript/src/extraction/types.d.ts +60 -0
  160. package/lib/typescript/src/extraction/types.d.ts.map +1 -0
  161. package/lib/typescript/src/index.d.ts +1 -0
  162. package/lib/typescript/src/index.d.ts.map +1 -1
  163. package/lib/typescript/src/licenses.d.ts +10 -0
  164. package/lib/typescript/src/licenses.d.ts.map +1 -0
  165. package/lib/typescript/src/stt/index.d.ts +4 -1
  166. package/lib/typescript/src/stt/index.d.ts.map +1 -1
  167. package/lib/typescript/src/stt/streaming.d.ts.map +1 -1
  168. package/lib/typescript/src/stt/streamingTypes.d.ts +5 -0
  169. package/lib/typescript/src/stt/streamingTypes.d.ts.map +1 -1
  170. package/lib/typescript/src/stt/types.d.ts +3 -1
  171. package/lib/typescript/src/stt/types.d.ts.map +1 -1
  172. package/lib/typescript/src/tts/index.d.ts +3 -1
  173. package/lib/typescript/src/tts/index.d.ts.map +1 -1
  174. package/lib/typescript/src/tts/streaming.d.ts.map +1 -1
  175. package/lib/typescript/src/tts/types.d.ts +6 -5
  176. package/lib/typescript/src/tts/types.d.ts.map +1 -1
  177. package/lib/typescript/src/utils.d.ts +5 -0
  178. package/lib/typescript/src/utils.d.ts.map +1 -1
  179. package/package.json +11 -1
  180. package/scripts/{check-model-csvs.sh → ci/check-model-csvs.sh} +9 -2
  181. package/scripts/ci/collect_all_sherpa_model_streams.sh +101 -0
  182. package/scripts/ci/collect_one_sherpa_release_stream.sh +189 -0
  183. package/scripts/ci/sherpa_asr_model_release_streams.json +21 -0
  184. package/scripts/ci/sherpa_tts_model_release_streams.json +13 -0
  185. package/scripts/ci/update_model_license_csv.sh +765 -0
  186. package/scripts/setup-ios-framework.sh +14 -11
  187. package/scripts/update_commercial_use.js +73 -0
  188. package/src/NativeSherpaOnnx.ts +92 -5
  189. package/src/audio/index.ts +20 -0
  190. package/src/download/ModelDownloadManager.ts +55 -1343
  191. package/src/download/activeModelOperations.ts +38 -0
  192. package/src/download/background-downloader.d.ts +43 -0
  193. package/src/download/bulkPurge.ts +102 -0
  194. package/src/download/checksumPrompt.ts +25 -0
  195. package/src/download/constants.ts +5 -0
  196. package/src/download/downloadEvents.ts +55 -0
  197. package/src/download/downloadTask.ts +497 -0
  198. package/src/download/ensureModel.ts +124 -0
  199. package/src/download/index.ts +19 -2
  200. package/src/download/localModels.ts +234 -0
  201. package/src/download/modelExtraction.ts +244 -0
  202. package/src/download/paths.ts +134 -0
  203. package/src/download/postDownloadProcessing.ts +292 -0
  204. package/src/download/protectedModelKeys.ts +30 -0
  205. package/src/download/registry.ts +404 -0
  206. package/src/download/retry.ts +76 -0
  207. package/src/download/types.ts +120 -0
  208. package/src/download/validation.ts +114 -8
  209. package/src/{download → extraction}/extractTarBz2.ts +3 -1
  210. package/src/extraction/extractTarZst.ts +79 -0
  211. package/src/extraction/index.ts +269 -0
  212. package/src/extraction/types.ts +63 -0
  213. package/src/index.tsx +2 -0
  214. package/src/licenses.ts +100 -0
  215. package/src/stt/index.ts +20 -2
  216. package/src/stt/streaming.ts +3 -0
  217. package/src/stt/streamingTypes.ts +5 -0
  218. package/src/stt/types.ts +3 -1
  219. package/src/tts/index.ts +30 -2
  220. package/src/tts/streaming.ts +10 -0
  221. package/src/tts/types.ts +6 -5
  222. package/src/utils.ts +22 -1
  223. package/third_party/libarchive_prebuilt/ANDROID_RELEASE_TAG +1 -1
  224. package/third_party/libarchive_prebuilt/IOS_RELEASE_TAG +1 -1
  225. package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -1
  226. package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -1
  227. package/android/src/main/cpp/jni/tts/sherpa-onnx-tts-zipvoice-jni.cpp +0 -301
  228. package/android/src/main/java/com/sherpaonnx/ZipvoiceTtsWrapper.kt +0 -187
  229. package/lib/module/download/extractTarBz2.js.map +0 -1
  230. package/lib/typescript/src/download/extractTarBz2.d.ts.map +0 -1
  231. package/scripts/check-qnn-support.sh +0 -78
  232. /package/lib/typescript/src/{download → extraction}/extractTarBz2.d.ts +0 -0
@@ -30,6 +30,7 @@ import com.k2fsa.sherpa.onnx.OfflineTtsVitsModelConfig
30
30
  import com.k2fsa.sherpa.onnx.OfflineTtsMatchaModelConfig
31
31
  import com.k2fsa.sherpa.onnx.OfflineTtsKokoroModelConfig
32
32
  import com.k2fsa.sherpa.onnx.OfflineTtsKittenModelConfig
33
+ import com.k2fsa.sherpa.onnx.OfflineTtsZipVoiceModelConfig
33
34
  import java.io.File
34
35
  import java.io.FileInputStream
35
36
  import java.io.FileOutputStream
@@ -64,8 +65,7 @@ internal class SherpaOnnxTtsHelper(
64
65
 
65
66
  private data class TtsEngineInstance(
66
67
  @Volatile var tts: OfflineTts? = null,
67
- @Volatile var zipvoiceTts: ZipvoiceTtsWrapper? = null,
68
- var ttsInitState: TtsInitState? = null,
68
+ @Volatile var ttsInitState: TtsInitState? = null,
69
69
  val ttsStreamRunning: AtomicBoolean = AtomicBoolean(false),
70
70
  val ttsStreamCancelled: AtomicBoolean = AtomicBoolean(false),
71
71
  var ttsStreamThread: Thread? = null,
@@ -73,15 +73,13 @@ internal class SherpaOnnxTtsHelper(
73
73
  ) {
74
74
  private val lock = Any()
75
75
 
76
- fun hasEngine(): Boolean = synchronized(lock) { tts != null || zipvoiceTts != null }
77
- val isZipvoice: Boolean get() = synchronized(lock) { zipvoiceTts != null }
76
+ fun hasEngine(): Boolean = synchronized(lock) { tts != null }
77
+ val isZipvoice: Boolean get() = ttsInitState?.modelType == "zipvoice"
78
78
  val isPocket: Boolean get() = ttsInitState?.modelType == "pocket"
79
79
  fun releaseEngines() {
80
80
  synchronized(lock) {
81
81
  tts?.release()
82
82
  tts = null
83
- zipvoiceTts?.release()
84
- zipvoiceTts = null
85
83
  ttsInitState = null
86
84
  }
87
85
  }
@@ -186,6 +184,13 @@ internal class SherpaOnnxTtsHelper(
186
184
  rejectOnUiThread(promise, "TTS_INIT_ERROR", msg)
187
185
  return@init
188
186
  }
187
+ val lexiconPath = path(paths, "lexicon")
188
+ if (lexiconPath.isBlank()) {
189
+ val msg = "Zipvoice requires lexicon.txt (or lexicon-<lang>.txt) in the model directory. The sherpa-onnx engine aborts if it is missing. Copy lexicon from the official k2-fsa sherpa-onnx Zipvoice model package or hr-files release next to tokens.txt."
190
+ Log.e("SherpaOnnxTts", "TTS_INIT_ERROR: $msg")
191
+ rejectOnUiThread(promise, "TTS_INIT_ERROR", msg)
192
+ return@init
193
+ }
189
194
  val am = context.applicationContext.getSystemService(Context.ACTIVITY_SERVICE) as? ActivityManager
190
195
  if (am != null) {
191
196
  val memInfo = ActivityManager.MemoryInfo()
@@ -206,34 +211,26 @@ internal class SherpaOnnxTtsHelper(
206
211
  Log.i("SherpaOnnxTts", "Zipvoice init: availMem=${memInfoBefore.availMem / (1024 * 1024)} MB (before load)")
207
212
  }
208
213
  val zipvoiceNumThreads = 1
209
- val wrapper = ZipvoiceTtsWrapper.create(
210
- tokens = path(paths, "tokens"),
211
- encoder = path(paths, "encoder"),
212
- decoder = path(paths, "decoder"),
213
- vocoder = vocoderPath,
214
- dataDir = path(paths, "dataDir"),
215
- lexicon = path(paths, "lexicon"),
216
- numThreads = zipvoiceNumThreads,
217
- debug = debug,
218
- ruleFsts = ruleFsts?.takeIf { it.isNotBlank() } ?: "",
219
- ruleFars = ruleFars?.takeIf { it.isNotBlank() } ?: "",
220
- maxNumSentences = maxNumSentences?.toInt()?.coerceAtLeast(1) ?: 1,
221
- silenceScale = silenceScale?.toFloat()?.coerceIn(0f, 10f) ?: 0.2f,
222
- provider = provider?.takeIf { it.isNotBlank() } ?: "cpu"
214
+ val config = buildTtsConfig(
215
+ paths, "zipvoice", zipvoiceNumThreads, debug,
216
+ noiseScale, noiseScaleW, lengthScale,
217
+ ruleFsts, ruleFars, maxNumSentences?.toInt(), silenceScale,
218
+ provider
223
219
  )
224
220
  if (am != null) {
225
221
  val memInfo = ActivityManager.MemoryInfo()
226
222
  am.getMemoryInfo(memInfo)
227
223
  Log.i("SherpaOnnxTts", "Zipvoice init: availMem=${memInfo.availMem / (1024 * 1024)} MB (after load)")
228
224
  }
229
- if (wrapper == null) {
230
- Log.e("SherpaOnnxTts", "TTS_INIT_ERROR: Failed to create Zipvoice TTS engine via C-API. Check logcat for details.")
231
- rejectOnUiThread(promise, "TTS_INIT_ERROR", "Failed to create Zipvoice TTS engine via C-API. Check logcat for details.")
225
+ try {
226
+ inst.tts = OfflineTts(config = config)
227
+ } catch (e: Exception) {
228
+ Log.e("SherpaOnnxTts", "TTS_INIT_ERROR: Failed to create Zipvoice OfflineTts: ${e.message}", e)
229
+ rejectOnUiThread(promise, "TTS_INIT_ERROR", "Failed to create Zipvoice TTS engine: ${e.message}", e)
232
230
  return@init
233
231
  }
234
- inst.zipvoiceTts = wrapper
235
- sampleRate = wrapper.sampleRate()
236
- numSpeakers = wrapper.numSpeakers()
232
+ sampleRate = inst.tts!!.sampleRate()
233
+ numSpeakers = inst.tts!!.numSpeakers()
237
234
  } else {
238
235
  val config = buildTtsConfig(
239
236
  paths, modelTypeStr, numThreads.toInt(), debug,
@@ -246,8 +243,6 @@ internal class SherpaOnnxTtsHelper(
246
243
  numSpeakers = inst.tts!!.numSpeakers()
247
244
  }
248
245
 
249
- Log.i("SherpaOnnxTts", "initializeTts: instanceId=$instanceId, engine=${if (inst.isZipvoice) "zipvoice-c-api" else "kotlin-api"}, sampleRate=$sampleRate, numSpeakers=$numSpeakers")
250
-
251
246
  val modelsArray = Arguments.createArray()
252
247
  detectedModels?.forEach { modelObj ->
253
248
  if (modelObj is HashMap<*, *>) {
@@ -273,6 +268,8 @@ internal class SherpaOnnxTtsHelper(
273
268
  provider?.takeIf { it.isNotBlank() }
274
269
  )
275
270
 
271
+ Log.i("SherpaOnnxTts", "initializeTts: instanceId=$instanceId, engine=kotlin-api modelType=$modelTypeStr, sampleRate=$sampleRate, numSpeakers=$numSpeakers")
272
+
276
273
  val resultMap = Arguments.createMap()
277
274
  resultMap.putBoolean("success", true)
278
275
  resultMap.putArray("detectedModels", modelsArray)
@@ -309,18 +306,6 @@ internal class SherpaOnnxTtsHelper(
309
306
  return
310
307
  }
311
308
 
312
- if (inst.isZipvoice) {
313
- initializeTts(
314
- instanceId,
315
- state.modelDir, state.modelType, state.numThreads.toDouble(), state.debug,
316
- noiseScale, noiseScaleW, lengthScale,
317
- state.ruleFsts, state.ruleFars, state.maxNumSentences?.toDouble(), state.silenceScale,
318
- state.provider,
319
- promise
320
- )
321
- return
322
- }
323
-
324
309
  val nextNoiseScale = when {
325
310
  noiseScale == null -> null
326
311
  noiseScale.isNaN() -> state.noiseScale
@@ -401,26 +386,35 @@ internal class SherpaOnnxTtsHelper(
401
386
  val sid = getSid(options)
402
387
  val speed = getSpeed(options)
403
388
  val audio = when {
404
- hasReferenceOptions(options) && inst.isZipvoice -> {
405
- val refAudio = options?.getArray("referenceAudio")
406
- ?: run {
407
- Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: referenceAudio required for Zipvoice voice cloning")
408
- promise.reject("TTS_GENERATE_ERROR", "referenceAudio required for Zipvoice voice cloning")
389
+ hasReferenceAudio(options) && (inst.isZipvoice || inst.isPocket) -> {
390
+ if (inst.isZipvoice) {
391
+ val promptText = options!!.getString("referenceText")?.trim().orEmpty()
392
+ if (promptText.isEmpty()) {
393
+ Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Zipvoice voice cloning requires non-empty referenceText")
394
+ promise.reject(
395
+ "TTS_GENERATE_ERROR",
396
+ "Zipvoice voice cloning requires non-empty referenceText (transcript of reference audio)."
397
+ )
409
398
  return
410
399
  }
411
- val promptSr = if (options.hasKey("referenceSampleRate")) options.getDouble("referenceSampleRate").toInt() else 0
412
- val promptText = options.getString("referenceText").orEmpty()
413
- val numSteps = if (options.hasKey("numSteps")) options.getDouble("numSteps").toInt() else 20
414
- val samples = FloatArray(refAudio.size()) { i -> refAudio.getDouble(i).toFloat() }
415
- inst.zipvoiceTts!!.generateWithZipvoice(text, promptText, samples, promptSr, speed, numSteps)
416
- }
417
- hasReferenceOptions(options) && inst.tts != null -> {
400
+ }
418
401
  val config = parseGenerationConfig(options) ?: GenerationConfig(speed = speed, sid = sid)
419
402
  inst.tts!!.generateWithConfig(text, config)
420
403
  }
421
- inst.isPocket && !hasReferenceOptions(options) -> {
404
+ hasReferenceAudio(options) -> {
405
+ Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Reference audio is not supported for this TTS model type")
406
+ promise.reject(
407
+ "TTS_GENERATE_ERROR",
408
+ "Reference audio is only supported for Zipvoice and Pocket TTS."
409
+ )
410
+ return
411
+ }
412
+ inst.isPocket -> {
422
413
  Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Pocket TTS requires reference audio for voice cloning")
423
- promise.reject("TTS_GENERATE_ERROR", "Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate in options.")
414
+ promise.reject(
415
+ "TTS_GENERATE_ERROR",
416
+ "Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate (> 0) in options."
417
+ )
424
418
  return
425
419
  }
426
420
  else -> dispatchGenerate(inst, text, sid, speed)
@@ -459,26 +453,35 @@ internal class SherpaOnnxTtsHelper(
459
453
  val sid = getSid(options)
460
454
  val speed = getSpeed(options)
461
455
  val audio = when {
462
- hasReferenceOptions(options) && inst.isZipvoice -> {
463
- val refAudio = options?.getArray("referenceAudio")
464
- ?: run {
465
- Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: referenceAudio required for Zipvoice voice cloning")
466
- promise.reject("TTS_GENERATE_ERROR", "referenceAudio required for Zipvoice voice cloning")
456
+ hasReferenceAudio(options) && (inst.isZipvoice || inst.isPocket) -> {
457
+ if (inst.isZipvoice) {
458
+ val promptText = options!!.getString("referenceText")?.trim().orEmpty()
459
+ if (promptText.isEmpty()) {
460
+ Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Zipvoice voice cloning requires non-empty referenceText")
461
+ promise.reject(
462
+ "TTS_GENERATE_ERROR",
463
+ "Zipvoice voice cloning requires non-empty referenceText (transcript of reference audio)."
464
+ )
467
465
  return
468
466
  }
469
- val promptSr = if (options.hasKey("referenceSampleRate")) options.getDouble("referenceSampleRate").toInt() else 0
470
- val promptText = options.getString("referenceText").orEmpty()
471
- val numSteps = if (options.hasKey("numSteps")) options.getDouble("numSteps").toInt() else 20
472
- val samples = FloatArray(refAudio.size()) { i -> refAudio.getDouble(i).toFloat() }
473
- inst.zipvoiceTts!!.generateWithZipvoice(text, promptText, samples, promptSr, speed, numSteps)
474
- }
475
- hasReferenceOptions(options) && inst.tts != null -> {
467
+ }
476
468
  val config = parseGenerationConfig(options) ?: GenerationConfig(speed = speed, sid = sid)
477
469
  inst.tts!!.generateWithConfig(text, config)
478
470
  }
479
- inst.isPocket && !hasReferenceOptions(options) -> {
471
+ hasReferenceAudio(options) -> {
472
+ Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Reference audio is not supported for this TTS model type")
473
+ promise.reject(
474
+ "TTS_GENERATE_ERROR",
475
+ "Reference audio is only supported for Zipvoice and Pocket TTS."
476
+ )
477
+ return
478
+ }
479
+ inst.isPocket -> {
480
480
  Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Pocket TTS requires reference audio for voice cloning")
481
- promise.reject("TTS_GENERATE_ERROR", "Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate in options.")
481
+ promise.reject(
482
+ "TTS_GENERATE_ERROR",
483
+ "Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate (> 0) in options."
484
+ )
482
485
  return
483
486
  }
484
487
  else -> dispatchGenerate(inst, text, sid, speed)
@@ -529,16 +532,27 @@ internal class SherpaOnnxTtsHelper(
529
532
  promise.reject("TTS_STREAM_ERROR", "TTS not initialized")
530
533
  return
531
534
  }
532
- if (inst.isPocket && !hasReferenceOptions(options)) {
535
+ if (inst.isPocket && !hasReferenceAudio(options)) {
533
536
  Log.e("SherpaOnnxTts", "TTS_STREAM_ERROR: Pocket TTS requires reference audio for voice cloning")
534
- promise.reject("TTS_STREAM_ERROR", "Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate in options.")
537
+ promise.reject(
538
+ "TTS_STREAM_ERROR",
539
+ "Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate (> 0) in options."
540
+ )
535
541
  return
536
542
  }
537
- if (hasReferenceOptions(options) && inst.isZipvoice) {
543
+ if (hasReferenceAudio(options) && inst.isZipvoice) {
538
544
  Log.e("SherpaOnnxTts", "TTS_STREAM_ERROR: Streaming with reference audio not supported for Zipvoice")
539
545
  promise.reject("TTS_STREAM_ERROR", "Streaming with reference audio not supported for Zipvoice")
540
546
  return
541
547
  }
548
+ if (hasReferenceAudio(options) && !inst.isPocket) {
549
+ Log.e("SherpaOnnxTts", "TTS_STREAM_ERROR: Reference audio streaming is only supported for Pocket TTS")
550
+ promise.reject(
551
+ "TTS_STREAM_ERROR",
552
+ "Reference audio streaming is only supported for Pocket TTS."
553
+ )
554
+ return
555
+ }
542
556
  val sid = getSid(options)
543
557
  val speed = getSpeed(options)
544
558
  inst.ttsStreamCancelled.set(false)
@@ -547,7 +561,7 @@ internal class SherpaOnnxTtsHelper(
547
561
  try {
548
562
  val sampleRate = dispatchSampleRate(inst)
549
563
  when {
550
- hasReferenceOptions(options) && inst.tts != null -> {
564
+ hasReferenceAudio(options) && inst.isPocket -> {
551
565
  val config = parseGenerationConfig(options) ?: GenerationConfig(speed = speed, sid = sid)
552
566
  inst.tts!!.generateWithConfigAndCallback(text, config) { chunk ->
553
567
  if (inst.ttsStreamCancelled.get()) return@generateWithConfigAndCallback 0
@@ -555,13 +569,6 @@ internal class SherpaOnnxTtsHelper(
555
569
  chunk.size
556
570
  }
557
571
  }
558
- inst.zipvoiceTts != null -> {
559
- inst.zipvoiceTts!!.generateWithCallback(text, sid, speed) { chunk ->
560
- if (inst.ttsStreamCancelled.get()) return@generateWithCallback 0
561
- emitChunk(instanceId, requestId, chunk, sampleRate, 0f, false)
562
- chunk.size
563
- }
564
- }
565
572
  else -> {
566
573
  inst.tts!!.generateWithCallback(text, sid, speed) { chunk ->
567
574
  if (inst.ttsStreamCancelled.get()) return@generateWithCallback 0
@@ -885,14 +892,21 @@ internal class SherpaOnnxTtsHelper(
885
892
 
886
893
  // -- Dual-engine dispatch helpers --
887
894
 
888
- /** True if options contain reference-audio fields for voice cloning. */
889
- private fun hasReferenceOptions(options: ReadableMap?): Boolean {
895
+ /**
896
+ * True when voice-cloning reference audio is present and valid for native use:
897
+ * non-empty [referenceAudio] array and [referenceSampleRate] > 0.
898
+ * [referenceText] alone does not enable cloning (matches sherpa-onnx behavior).
899
+ */
900
+ private fun hasReferenceAudio(options: ReadableMap?): Boolean {
890
901
  if (options == null) return false
891
- val refAudio = options.getArray("referenceAudio")
892
- val refText = options.getString("referenceText")
893
- return (refAudio != null && refAudio.size() > 0) || !refText.isNullOrEmpty()
902
+ val refAudio = options.getArray("referenceAudio") ?: return false
903
+ if (refAudio.size() == 0) return false
904
+ return readReferenceSampleRate(options) > 0
894
905
  }
895
906
 
907
+ private fun readReferenceSampleRate(options: ReadableMap): Int =
908
+ if (options.hasKey("referenceSampleRate")) options.getDouble("referenceSampleRate").toInt() else 0
909
+
896
910
  /** Parse sid and speed from options with defaults. */
897
911
  private fun getSid(options: ReadableMap?): Int =
898
912
  if (options != null && options.hasKey("sid")) options.getDouble("sid").toInt() else 0
@@ -936,18 +950,14 @@ internal class SherpaOnnxTtsHelper(
936
950
 
937
951
  /** Dispatch generate to whichever engine is active on the instance. Returns null if none loaded. */
938
952
  private fun dispatchGenerate(inst: TtsEngineInstance, text: String, sid: Int, speed: Float): GeneratedAudio? {
939
- inst.zipvoiceTts?.let { return it.generate(text, sid, speed) }
940
- inst.tts?.let { return it.generate(text, sid, speed) }
941
- return null
953
+ return inst.tts?.generate(text, sid, speed)
942
954
  }
943
955
 
944
956
  private fun dispatchSampleRate(inst: TtsEngineInstance): Int {
945
- inst.zipvoiceTts?.let { return it.sampleRate() }
946
957
  return inst.tts?.sampleRate() ?: 0
947
958
  }
948
959
 
949
960
  private fun dispatchNumSpeakers(inst: TtsEngineInstance): Int {
950
- inst.zipvoiceTts?.let { return it.numSpeakers() }
951
961
  return inst.tts?.numSpeakers() ?: 0
952
962
  }
953
963
 
@@ -1039,14 +1049,19 @@ internal class SherpaOnnxTtsHelper(
1039
1049
  debug = debug,
1040
1050
  provider = prov
1041
1051
  )
1042
- "zipvoice" -> {
1043
- // Zipvoice is handled by ZipvoiceTtsWrapper (C-API), not OfflineTts (Kotlin API).
1044
- // This branch should not be reached because initializeTts/updateTtsParams handle
1045
- // the "zipvoice" case before calling buildTtsConfig.
1046
- throw IllegalStateException(
1047
- "buildTtsConfig should not be called for zipvoice models. Use ZipvoiceTtsWrapper instead."
1048
- )
1049
- }
1052
+ "zipvoice" -> OfflineTtsModelConfig(
1053
+ zipvoice = OfflineTtsZipVoiceModelConfig(
1054
+ tokens = path(paths, "tokens"),
1055
+ encoder = path(paths, "encoder"),
1056
+ decoder = path(paths, "decoder"),
1057
+ vocoder = path(paths, "vocoder"),
1058
+ dataDir = path(paths, "dataDir"),
1059
+ lexicon = path(paths, "lexicon")
1060
+ ),
1061
+ numThreads = numThreads,
1062
+ debug = debug,
1063
+ provider = prov
1064
+ )
1050
1065
  else -> {
1051
1066
  if (path(paths, "acousticModel").isNotEmpty()) {
1052
1067
  OfflineTtsModelConfig(