react-native-sherpa-onnx 0.3.6 → 0.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. package/LICENSE +1 -0
  2. package/README.md +92 -21
  3. package/SherpaOnnx.podspec +3 -0
  4. package/THIRD_PARTY_LICENSES/README.md +62 -0
  5. package/THIRD_PARTY_LICENSES/ffmpeg.txt +502 -0
  6. package/THIRD_PARTY_LICENSES/libarchive.txt +65 -0
  7. package/THIRD_PARTY_LICENSES/nvidia_omla.txt +181 -0
  8. package/THIRD_PARTY_LICENSES/onnxruntime.txt +21 -0
  9. package/THIRD_PARTY_LICENSES/opus.txt +44 -0
  10. package/THIRD_PARTY_LICENSES/sherpa-onnx.txt +201 -0
  11. package/THIRD_PARTY_LICENSES/shine.txt +482 -0
  12. package/THIRD_PARTY_LICENSES/zstd.txt +30 -0
  13. package/android/build.gradle +7 -3
  14. package/android/prebuilt-download.gradle +344 -152
  15. package/android/prebuilt-versions.gradle +1 -1
  16. package/android/src/main/assets/model_licenses/asr-models-license-status.csv +409 -0
  17. package/android/src/main/assets/model_licenses/qnn-asr-models-license-status.csv +695 -0
  18. package/android/src/main/assets/model_licenses/tts-models-license-status.csv +596 -0
  19. package/android/src/main/cpp/CMakeLists.txt +28 -10
  20. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.cpp +2 -2
  21. package/android/src/main/cpp/jni/audio/sherpa-onnx-audio-convert-jni.cpp +268 -2
  22. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +37 -6
  23. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect.h +9 -1
  24. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.cpp +7 -0
  25. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-tts.cpp +18 -2
  26. package/android/src/main/java/com/sherpaonnx/SherpaOnnxArchiveHelper.kt +40 -10
  27. package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +99 -0
  28. package/android/src/main/java/com/sherpaonnx/SherpaOnnxOnlineSttHelper.kt +4 -1
  29. package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +127 -97
  30. package/ios/Resources/model_licenses/asr-models-license-status.csv +409 -0
  31. package/ios/Resources/model_licenses/qnn-asr-models-license-status.csv +695 -0
  32. package/ios/Resources/model_licenses/tts-models-license-status.csv +596 -0
  33. package/ios/SherpaOnnx+OnlineSTT.mm +2 -0
  34. package/ios/SherpaOnnx+PcmLiveStream.mm +2 -29
  35. package/ios/SherpaOnnx+TTS.mm +179 -20
  36. package/ios/SherpaOnnx.mm +54 -0
  37. package/ios/SherpaOnnxAudioConvert.h +10 -0
  38. package/ios/SherpaOnnxAudioConvert.mm +257 -1
  39. package/ios/archive/sherpa-onnx-archive-helper.h +3 -0
  40. package/ios/archive/sherpa-onnx-archive-helper.mm +39 -6
  41. package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +49 -6
  42. package/ios/model_detect/sherpa-onnx-model-detect.h +9 -1
  43. package/ios/model_detect/sherpa-onnx-validate-tts.mm +18 -2
  44. package/ios/online_stt/sherpa-onnx-online-stt-wrapper.h +1 -0
  45. package/ios/online_stt/sherpa-onnx-online-stt-wrapper.mm +4 -0
  46. package/ios/tts/sherpa-onnx-tts-wrapper.h +37 -0
  47. package/ios/tts/sherpa-onnx-tts-wrapper.mm +158 -3
  48. package/lib/module/NativeSherpaOnnx.js.map +1 -1
  49. package/lib/module/audio/index.js +8 -0
  50. package/lib/module/audio/index.js.map +1 -1
  51. package/lib/module/download/ModelDownloadManager.js +10 -929
  52. package/lib/module/download/ModelDownloadManager.js.map +1 -1
  53. package/lib/module/download/activeModelOperations.js +26 -0
  54. package/lib/module/download/activeModelOperations.js.map +1 -0
  55. package/lib/module/download/background-downloader-types.js +2 -0
  56. package/lib/module/download/background-downloader-types.js.map +1 -0
  57. package/lib/module/download/bulkPurge.js +72 -0
  58. package/lib/module/download/bulkPurge.js.map +1 -0
  59. package/lib/module/download/checksumPrompt.js +19 -0
  60. package/lib/module/download/checksumPrompt.js.map +1 -0
  61. package/lib/module/download/constants.js +7 -0
  62. package/lib/module/download/constants.js.map +1 -0
  63. package/lib/module/download/downloadEvents.js +35 -0
  64. package/lib/module/download/downloadEvents.js.map +1 -0
  65. package/lib/module/download/downloadTask.js +438 -0
  66. package/lib/module/download/downloadTask.js.map +1 -0
  67. package/lib/module/download/ensureModel.js +89 -0
  68. package/lib/module/download/ensureModel.js.map +1 -0
  69. package/lib/module/download/index.js +4 -4
  70. package/lib/module/download/index.js.map +1 -1
  71. package/lib/module/download/localModels.js +151 -0
  72. package/lib/module/download/localModels.js.map +1 -0
  73. package/lib/module/download/modelExtraction.js +174 -0
  74. package/lib/module/download/modelExtraction.js.map +1 -0
  75. package/lib/module/download/paths.js +98 -0
  76. package/lib/module/download/paths.js.map +1 -0
  77. package/lib/module/download/postDownloadProcessing.js +206 -0
  78. package/lib/module/download/postDownloadProcessing.js.map +1 -0
  79. package/lib/module/download/protectedModelKeys.js +31 -0
  80. package/lib/module/download/protectedModelKeys.js.map +1 -0
  81. package/lib/module/download/registry.js +268 -0
  82. package/lib/module/download/registry.js.map +1 -0
  83. package/lib/module/download/retry.js +59 -0
  84. package/lib/module/download/retry.js.map +1 -0
  85. package/lib/module/download/types.js +17 -0
  86. package/lib/module/download/types.js.map +1 -0
  87. package/lib/module/download/validation.js +101 -5
  88. package/lib/module/download/validation.js.map +1 -1
  89. package/lib/module/{download → extraction}/extractTarBz2.js +3 -1
  90. package/lib/module/extraction/extractTarBz2.js.map +1 -0
  91. package/lib/module/{download → extraction}/extractTarZst.js +3 -1
  92. package/lib/module/extraction/extractTarZst.js.map +1 -0
  93. package/lib/module/extraction/index.js +3 -4
  94. package/lib/module/extraction/index.js.map +1 -1
  95. package/lib/module/index.js +1 -1
  96. package/lib/module/index.js.map +1 -1
  97. package/lib/module/licenses.js +63 -0
  98. package/lib/module/licenses.js.map +1 -0
  99. package/lib/module/stt/index.js +16 -2
  100. package/lib/module/stt/index.js.map +1 -1
  101. package/lib/module/stt/streaming.js +2 -0
  102. package/lib/module/stt/streaming.js.map +1 -1
  103. package/lib/module/stt/streamingTypes.js.map +1 -1
  104. package/lib/module/stt/types.js.map +1 -1
  105. package/lib/module/tts/index.js +21 -3
  106. package/lib/module/tts/index.js.map +1 -1
  107. package/lib/module/tts/streaming.js +5 -1
  108. package/lib/module/tts/streaming.js.map +1 -1
  109. package/lib/module/tts/types.js +4 -1
  110. package/lib/module/tts/types.js.map +1 -1
  111. package/lib/module/utils.js +16 -1
  112. package/lib/module/utils.js.map +1 -1
  113. package/lib/typescript/src/NativeSherpaOnnx.d.ts +34 -6
  114. package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
  115. package/lib/typescript/src/audio/index.d.ts +10 -0
  116. package/lib/typescript/src/audio/index.d.ts.map +1 -1
  117. package/lib/typescript/src/download/ModelDownloadManager.d.ts +11 -108
  118. package/lib/typescript/src/download/ModelDownloadManager.d.ts.map +1 -1
  119. package/lib/typescript/src/download/activeModelOperations.d.ts +6 -0
  120. package/lib/typescript/src/download/activeModelOperations.d.ts.map +1 -0
  121. package/lib/typescript/src/download/background-downloader-types.d.ts +64 -0
  122. package/lib/typescript/src/download/background-downloader-types.d.ts.map +1 -0
  123. package/lib/typescript/src/download/bulkPurge.d.ts +14 -0
  124. package/lib/typescript/src/download/bulkPurge.d.ts.map +1 -0
  125. package/lib/typescript/src/download/checksumPrompt.d.ts +3 -0
  126. package/lib/typescript/src/download/checksumPrompt.d.ts.map +1 -0
  127. package/lib/typescript/src/download/constants.d.ts +5 -0
  128. package/lib/typescript/src/download/constants.d.ts.map +1 -0
  129. package/lib/typescript/src/download/downloadEvents.d.ts +6 -0
  130. package/lib/typescript/src/download/downloadEvents.d.ts.map +1 -0
  131. package/lib/typescript/src/download/downloadTask.d.ts +30 -0
  132. package/lib/typescript/src/download/downloadTask.d.ts.map +1 -0
  133. package/lib/typescript/src/download/ensureModel.d.ts +26 -0
  134. package/lib/typescript/src/download/ensureModel.d.ts.map +1 -0
  135. package/lib/typescript/src/download/index.d.ts +7 -7
  136. package/lib/typescript/src/download/index.d.ts.map +1 -1
  137. package/lib/typescript/src/download/localModels.d.ts +15 -0
  138. package/lib/typescript/src/download/localModels.d.ts.map +1 -0
  139. package/lib/typescript/src/download/modelExtraction.d.ts +36 -0
  140. package/lib/typescript/src/download/modelExtraction.d.ts.map +1 -0
  141. package/lib/typescript/src/download/paths.d.ts +28 -0
  142. package/lib/typescript/src/download/paths.d.ts.map +1 -0
  143. package/lib/typescript/src/download/postDownloadProcessing.d.ts +19 -0
  144. package/lib/typescript/src/download/postDownloadProcessing.d.ts.map +1 -0
  145. package/lib/typescript/src/download/protectedModelKeys.d.ts +6 -0
  146. package/lib/typescript/src/download/protectedModelKeys.d.ts.map +1 -0
  147. package/lib/typescript/src/download/registry.d.ts +14 -0
  148. package/lib/typescript/src/download/registry.d.ts.map +1 -0
  149. package/lib/typescript/src/download/retry.d.ts +15 -0
  150. package/lib/typescript/src/download/retry.d.ts.map +1 -0
  151. package/lib/typescript/src/download/types.d.ts +96 -0
  152. package/lib/typescript/src/download/types.d.ts.map +1 -0
  153. package/lib/typescript/src/download/validation.d.ts +19 -0
  154. package/lib/typescript/src/download/validation.d.ts.map +1 -1
  155. package/lib/typescript/src/extraction/extractTarBz2.d.ts.map +1 -0
  156. package/lib/typescript/src/extraction/extractTarZst.d.ts.map +1 -0
  157. package/lib/typescript/src/index.d.ts +1 -0
  158. package/lib/typescript/src/index.d.ts.map +1 -1
  159. package/lib/typescript/src/licenses.d.ts +10 -0
  160. package/lib/typescript/src/licenses.d.ts.map +1 -0
  161. package/lib/typescript/src/stt/index.d.ts +4 -1
  162. package/lib/typescript/src/stt/index.d.ts.map +1 -1
  163. package/lib/typescript/src/stt/streaming.d.ts.map +1 -1
  164. package/lib/typescript/src/stt/streamingTypes.d.ts +5 -0
  165. package/lib/typescript/src/stt/streamingTypes.d.ts.map +1 -1
  166. package/lib/typescript/src/stt/types.d.ts +3 -1
  167. package/lib/typescript/src/stt/types.d.ts.map +1 -1
  168. package/lib/typescript/src/tts/index.d.ts +4 -2
  169. package/lib/typescript/src/tts/index.d.ts.map +1 -1
  170. package/lib/typescript/src/tts/streaming.d.ts.map +1 -1
  171. package/lib/typescript/src/tts/types.d.ts +12 -6
  172. package/lib/typescript/src/tts/types.d.ts.map +1 -1
  173. package/lib/typescript/src/utils.d.ts +5 -0
  174. package/lib/typescript/src/utils.d.ts.map +1 -1
  175. package/package.json +6 -1
  176. package/scripts/{check-model-csvs.sh → ci/check-model-csvs.sh} +9 -2
  177. package/scripts/ci/collect_all_sherpa_model_streams.sh +101 -0
  178. package/scripts/ci/collect_one_sherpa_release_stream.sh +189 -0
  179. package/scripts/ci/sherpa_asr_model_release_streams.json +21 -0
  180. package/scripts/ci/sherpa_tts_model_release_streams.json +13 -0
  181. package/scripts/ci/update_model_license_csv.sh +765 -0
  182. package/scripts/setup-ios-framework.sh +14 -11
  183. package/scripts/update_commercial_use.js +73 -0
  184. package/src/NativeSherpaOnnx.ts +37 -6
  185. package/src/audio/index.ts +20 -0
  186. package/src/download/ModelDownloadManager.ts +57 -1343
  187. package/src/download/activeModelOperations.ts +38 -0
  188. package/src/download/background-downloader-types.ts +73 -0
  189. package/src/download/bulkPurge.ts +102 -0
  190. package/src/download/checksumPrompt.ts +25 -0
  191. package/src/download/constants.ts +5 -0
  192. package/src/download/downloadEvents.ts +55 -0
  193. package/src/download/downloadTask.ts +565 -0
  194. package/src/download/ensureModel.ts +124 -0
  195. package/src/download/index.ts +21 -4
  196. package/src/download/localModels.ts +234 -0
  197. package/src/download/modelExtraction.ts +244 -0
  198. package/src/download/paths.ts +134 -0
  199. package/src/download/postDownloadProcessing.ts +292 -0
  200. package/src/download/protectedModelKeys.ts +30 -0
  201. package/src/download/registry.ts +405 -0
  202. package/src/download/retry.ts +76 -0
  203. package/src/download/types.ts +120 -0
  204. package/src/download/validation.ts +114 -8
  205. package/src/{download → extraction}/extractTarBz2.ts +3 -1
  206. package/src/{download → extraction}/extractTarZst.ts +3 -1
  207. package/src/extraction/index.ts +3 -7
  208. package/src/index.tsx +1 -0
  209. package/src/licenses.ts +100 -0
  210. package/src/stt/index.ts +20 -2
  211. package/src/stt/streaming.ts +3 -0
  212. package/src/stt/streamingTypes.ts +5 -0
  213. package/src/stt/types.ts +3 -1
  214. package/src/tts/index.ts +33 -2
  215. package/src/tts/streaming.ts +12 -0
  216. package/src/tts/types.ts +15 -5
  217. package/src/utils.ts +22 -1
  218. package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -1
  219. package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -1
  220. package/android/src/main/cpp/jni/tts/sherpa-onnx-tts-zipvoice-jni.cpp +0 -301
  221. package/android/src/main/java/com/sherpaonnx/ZipvoiceTtsWrapper.kt +0 -187
  222. package/lib/module/download/extractTarBz2.js.map +0 -1
  223. package/lib/module/download/extractTarZst.js.map +0 -1
  224. package/lib/typescript/src/download/extractTarBz2.d.ts.map +0 -1
  225. package/lib/typescript/src/download/extractTarZst.d.ts.map +0 -1
  226. package/scripts/check-qnn-support.sh +0 -78
  227. /package/lib/typescript/src/{download → extraction}/extractTarBz2.d.ts +0 -0
  228. /package/lib/typescript/src/{download → extraction}/extractTarZst.d.ts +0 -0
@@ -30,6 +30,8 @@ import com.k2fsa.sherpa.onnx.OfflineTtsVitsModelConfig
30
30
  import com.k2fsa.sherpa.onnx.OfflineTtsMatchaModelConfig
31
31
  import com.k2fsa.sherpa.onnx.OfflineTtsKokoroModelConfig
32
32
  import com.k2fsa.sherpa.onnx.OfflineTtsKittenModelConfig
33
+ import com.k2fsa.sherpa.onnx.OfflineTtsZipVoiceModelConfig
34
+ import com.k2fsa.sherpa.onnx.OfflineTtsSupertonicModelConfig
33
35
  import java.io.File
34
36
  import java.io.FileInputStream
35
37
  import java.io.FileOutputStream
@@ -64,8 +66,7 @@ internal class SherpaOnnxTtsHelper(
64
66
 
65
67
  private data class TtsEngineInstance(
66
68
  @Volatile var tts: OfflineTts? = null,
67
- @Volatile var zipvoiceTts: ZipvoiceTtsWrapper? = null,
68
- var ttsInitState: TtsInitState? = null,
69
+ @Volatile var ttsInitState: TtsInitState? = null,
69
70
  val ttsStreamRunning: AtomicBoolean = AtomicBoolean(false),
70
71
  val ttsStreamCancelled: AtomicBoolean = AtomicBoolean(false),
71
72
  var ttsStreamThread: Thread? = null,
@@ -73,15 +74,13 @@ internal class SherpaOnnxTtsHelper(
73
74
  ) {
74
75
  private val lock = Any()
75
76
 
76
- fun hasEngine(): Boolean = synchronized(lock) { tts != null || zipvoiceTts != null }
77
- val isZipvoice: Boolean get() = synchronized(lock) { zipvoiceTts != null }
77
+ fun hasEngine(): Boolean = synchronized(lock) { tts != null }
78
+ val isZipvoice: Boolean get() = ttsInitState?.modelType == "zipvoice"
78
79
  val isPocket: Boolean get() = ttsInitState?.modelType == "pocket"
79
80
  fun releaseEngines() {
80
81
  synchronized(lock) {
81
82
  tts?.release()
82
83
  tts = null
83
- zipvoiceTts?.release()
84
- zipvoiceTts = null
85
84
  ttsInitState = null
86
85
  }
87
86
  }
@@ -186,6 +185,13 @@ internal class SherpaOnnxTtsHelper(
186
185
  rejectOnUiThread(promise, "TTS_INIT_ERROR", msg)
187
186
  return@init
188
187
  }
188
+ val lexiconPath = path(paths, "lexicon")
189
+ if (lexiconPath.isBlank()) {
190
+ val msg = "Zipvoice requires lexicon.txt (or lexicon-<lang>.txt) in the model directory. The sherpa-onnx engine aborts if it is missing. Copy lexicon from the official k2-fsa sherpa-onnx Zipvoice model package or hr-files release next to tokens.txt."
191
+ Log.e("SherpaOnnxTts", "TTS_INIT_ERROR: $msg")
192
+ rejectOnUiThread(promise, "TTS_INIT_ERROR", msg)
193
+ return@init
194
+ }
189
195
  val am = context.applicationContext.getSystemService(Context.ACTIVITY_SERVICE) as? ActivityManager
190
196
  if (am != null) {
191
197
  val memInfo = ActivityManager.MemoryInfo()
@@ -206,34 +212,26 @@ internal class SherpaOnnxTtsHelper(
206
212
  Log.i("SherpaOnnxTts", "Zipvoice init: availMem=${memInfoBefore.availMem / (1024 * 1024)} MB (before load)")
207
213
  }
208
214
  val zipvoiceNumThreads = 1
209
- val wrapper = ZipvoiceTtsWrapper.create(
210
- tokens = path(paths, "tokens"),
211
- encoder = path(paths, "encoder"),
212
- decoder = path(paths, "decoder"),
213
- vocoder = vocoderPath,
214
- dataDir = path(paths, "dataDir"),
215
- lexicon = path(paths, "lexicon"),
216
- numThreads = zipvoiceNumThreads,
217
- debug = debug,
218
- ruleFsts = ruleFsts?.takeIf { it.isNotBlank() } ?: "",
219
- ruleFars = ruleFars?.takeIf { it.isNotBlank() } ?: "",
220
- maxNumSentences = maxNumSentences?.toInt()?.coerceAtLeast(1) ?: 1,
221
- silenceScale = silenceScale?.toFloat()?.coerceIn(0f, 10f) ?: 0.2f,
222
- provider = provider?.takeIf { it.isNotBlank() } ?: "cpu"
215
+ val config = buildTtsConfig(
216
+ paths, "zipvoice", zipvoiceNumThreads, debug,
217
+ noiseScale, noiseScaleW, lengthScale,
218
+ ruleFsts, ruleFars, maxNumSentences?.toInt(), silenceScale,
219
+ provider
223
220
  )
224
221
  if (am != null) {
225
222
  val memInfo = ActivityManager.MemoryInfo()
226
223
  am.getMemoryInfo(memInfo)
227
224
  Log.i("SherpaOnnxTts", "Zipvoice init: availMem=${memInfo.availMem / (1024 * 1024)} MB (after load)")
228
225
  }
229
- if (wrapper == null) {
230
- Log.e("SherpaOnnxTts", "TTS_INIT_ERROR: Failed to create Zipvoice TTS engine via C-API. Check logcat for details.")
231
- rejectOnUiThread(promise, "TTS_INIT_ERROR", "Failed to create Zipvoice TTS engine via C-API. Check logcat for details.")
226
+ try {
227
+ inst.tts = OfflineTts(config = config)
228
+ } catch (e: Exception) {
229
+ Log.e("SherpaOnnxTts", "TTS_INIT_ERROR: Failed to create Zipvoice OfflineTts: ${e.message}", e)
230
+ rejectOnUiThread(promise, "TTS_INIT_ERROR", "Failed to create Zipvoice TTS engine: ${e.message}", e)
232
231
  return@init
233
232
  }
234
- inst.zipvoiceTts = wrapper
235
- sampleRate = wrapper.sampleRate()
236
- numSpeakers = wrapper.numSpeakers()
233
+ sampleRate = inst.tts!!.sampleRate()
234
+ numSpeakers = inst.tts!!.numSpeakers()
237
235
  } else {
238
236
  val config = buildTtsConfig(
239
237
  paths, modelTypeStr, numThreads.toInt(), debug,
@@ -246,8 +244,6 @@ internal class SherpaOnnxTtsHelper(
246
244
  numSpeakers = inst.tts!!.numSpeakers()
247
245
  }
248
246
 
249
- Log.i("SherpaOnnxTts", "initializeTts: instanceId=$instanceId, engine=${if (inst.isZipvoice) "zipvoice-c-api" else "kotlin-api"}, sampleRate=$sampleRate, numSpeakers=$numSpeakers")
250
-
251
247
  val modelsArray = Arguments.createArray()
252
248
  detectedModels?.forEach { modelObj ->
253
249
  if (modelObj is HashMap<*, *>) {
@@ -273,6 +269,8 @@ internal class SherpaOnnxTtsHelper(
273
269
  provider?.takeIf { it.isNotBlank() }
274
270
  )
275
271
 
272
+ Log.i("SherpaOnnxTts", "initializeTts: instanceId=$instanceId, engine=kotlin-api modelType=$modelTypeStr, sampleRate=$sampleRate, numSpeakers=$numSpeakers")
273
+
276
274
  val resultMap = Arguments.createMap()
277
275
  resultMap.putBoolean("success", true)
278
276
  resultMap.putArray("detectedModels", modelsArray)
@@ -309,18 +307,6 @@ internal class SherpaOnnxTtsHelper(
309
307
  return
310
308
  }
311
309
 
312
- if (inst.isZipvoice) {
313
- initializeTts(
314
- instanceId,
315
- state.modelDir, state.modelType, state.numThreads.toDouble(), state.debug,
316
- noiseScale, noiseScaleW, lengthScale,
317
- state.ruleFsts, state.ruleFars, state.maxNumSentences?.toDouble(), state.silenceScale,
318
- state.provider,
319
- promise
320
- )
321
- return
322
- }
323
-
324
310
  val nextNoiseScale = when {
325
311
  noiseScale == null -> null
326
312
  noiseScale.isNaN() -> state.noiseScale
@@ -401,26 +387,35 @@ internal class SherpaOnnxTtsHelper(
401
387
  val sid = getSid(options)
402
388
  val speed = getSpeed(options)
403
389
  val audio = when {
404
- hasReferenceOptions(options) && inst.isZipvoice -> {
405
- val refAudio = options?.getArray("referenceAudio")
406
- ?: run {
407
- Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: referenceAudio required for Zipvoice voice cloning")
408
- promise.reject("TTS_GENERATE_ERROR", "referenceAudio required for Zipvoice voice cloning")
390
+ hasReferenceAudio(options) && (inst.isZipvoice || inst.isPocket) -> {
391
+ if (inst.isZipvoice) {
392
+ val promptText = options!!.getString("referenceText")?.trim().orEmpty()
393
+ if (promptText.isEmpty()) {
394
+ Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Zipvoice voice cloning requires non-empty referenceText")
395
+ promise.reject(
396
+ "TTS_GENERATE_ERROR",
397
+ "Zipvoice voice cloning requires non-empty referenceText (transcript of reference audio)."
398
+ )
409
399
  return
410
400
  }
411
- val promptSr = if (options.hasKey("referenceSampleRate")) options.getDouble("referenceSampleRate").toInt() else 0
412
- val promptText = options.getString("referenceText").orEmpty()
413
- val numSteps = if (options.hasKey("numSteps")) options.getDouble("numSteps").toInt() else 20
414
- val samples = FloatArray(refAudio.size()) { i -> refAudio.getDouble(i).toFloat() }
415
- inst.zipvoiceTts!!.generateWithZipvoice(text, promptText, samples, promptSr, speed, numSteps)
416
- }
417
- hasReferenceOptions(options) && inst.tts != null -> {
401
+ }
418
402
  val config = parseGenerationConfig(options) ?: GenerationConfig(speed = speed, sid = sid)
419
403
  inst.tts!!.generateWithConfig(text, config)
420
404
  }
421
- inst.isPocket && !hasReferenceOptions(options) -> {
405
+ hasReferenceAudio(options) -> {
406
+ Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Reference audio is not supported for this TTS model type")
407
+ promise.reject(
408
+ "TTS_GENERATE_ERROR",
409
+ "Reference audio is only supported for Zipvoice and Pocket TTS."
410
+ )
411
+ return
412
+ }
413
+ inst.isPocket -> {
422
414
  Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Pocket TTS requires reference audio for voice cloning")
423
- promise.reject("TTS_GENERATE_ERROR", "Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate in options.")
415
+ promise.reject(
416
+ "TTS_GENERATE_ERROR",
417
+ "Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate (> 0) in options."
418
+ )
424
419
  return
425
420
  }
426
421
  else -> dispatchGenerate(inst, text, sid, speed)
@@ -459,26 +454,35 @@ internal class SherpaOnnxTtsHelper(
459
454
  val sid = getSid(options)
460
455
  val speed = getSpeed(options)
461
456
  val audio = when {
462
- hasReferenceOptions(options) && inst.isZipvoice -> {
463
- val refAudio = options?.getArray("referenceAudio")
464
- ?: run {
465
- Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: referenceAudio required for Zipvoice voice cloning")
466
- promise.reject("TTS_GENERATE_ERROR", "referenceAudio required for Zipvoice voice cloning")
457
+ hasReferenceAudio(options) && (inst.isZipvoice || inst.isPocket) -> {
458
+ if (inst.isZipvoice) {
459
+ val promptText = options!!.getString("referenceText")?.trim().orEmpty()
460
+ if (promptText.isEmpty()) {
461
+ Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Zipvoice voice cloning requires non-empty referenceText")
462
+ promise.reject(
463
+ "TTS_GENERATE_ERROR",
464
+ "Zipvoice voice cloning requires non-empty referenceText (transcript of reference audio)."
465
+ )
467
466
  return
468
467
  }
469
- val promptSr = if (options.hasKey("referenceSampleRate")) options.getDouble("referenceSampleRate").toInt() else 0
470
- val promptText = options.getString("referenceText").orEmpty()
471
- val numSteps = if (options.hasKey("numSteps")) options.getDouble("numSteps").toInt() else 20
472
- val samples = FloatArray(refAudio.size()) { i -> refAudio.getDouble(i).toFloat() }
473
- inst.zipvoiceTts!!.generateWithZipvoice(text, promptText, samples, promptSr, speed, numSteps)
474
- }
475
- hasReferenceOptions(options) && inst.tts != null -> {
468
+ }
476
469
  val config = parseGenerationConfig(options) ?: GenerationConfig(speed = speed, sid = sid)
477
470
  inst.tts!!.generateWithConfig(text, config)
478
471
  }
479
- inst.isPocket && !hasReferenceOptions(options) -> {
472
+ hasReferenceAudio(options) -> {
473
+ Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Reference audio is not supported for this TTS model type")
474
+ promise.reject(
475
+ "TTS_GENERATE_ERROR",
476
+ "Reference audio is only supported for Zipvoice and Pocket TTS."
477
+ )
478
+ return
479
+ }
480
+ inst.isPocket -> {
480
481
  Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Pocket TTS requires reference audio for voice cloning")
481
- promise.reject("TTS_GENERATE_ERROR", "Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate in options.")
482
+ promise.reject(
483
+ "TTS_GENERATE_ERROR",
484
+ "Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate (> 0) in options."
485
+ )
482
486
  return
483
487
  }
484
488
  else -> dispatchGenerate(inst, text, sid, speed)
@@ -529,16 +533,27 @@ internal class SherpaOnnxTtsHelper(
529
533
  promise.reject("TTS_STREAM_ERROR", "TTS not initialized")
530
534
  return
531
535
  }
532
- if (inst.isPocket && !hasReferenceOptions(options)) {
536
+ if (inst.isPocket && !hasReferenceAudio(options)) {
533
537
  Log.e("SherpaOnnxTts", "TTS_STREAM_ERROR: Pocket TTS requires reference audio for voice cloning")
534
- promise.reject("TTS_STREAM_ERROR", "Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate in options.")
538
+ promise.reject(
539
+ "TTS_STREAM_ERROR",
540
+ "Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate (> 0) in options."
541
+ )
535
542
  return
536
543
  }
537
- if (hasReferenceOptions(options) && inst.isZipvoice) {
544
+ if (hasReferenceAudio(options) && inst.isZipvoice) {
538
545
  Log.e("SherpaOnnxTts", "TTS_STREAM_ERROR: Streaming with reference audio not supported for Zipvoice")
539
546
  promise.reject("TTS_STREAM_ERROR", "Streaming with reference audio not supported for Zipvoice")
540
547
  return
541
548
  }
549
+ if (hasReferenceAudio(options) && !inst.isPocket) {
550
+ Log.e("SherpaOnnxTts", "TTS_STREAM_ERROR: Reference audio streaming is only supported for Pocket TTS")
551
+ promise.reject(
552
+ "TTS_STREAM_ERROR",
553
+ "Reference audio streaming is only supported for Pocket TTS."
554
+ )
555
+ return
556
+ }
542
557
  val sid = getSid(options)
543
558
  val speed = getSpeed(options)
544
559
  inst.ttsStreamCancelled.set(false)
@@ -547,7 +562,7 @@ internal class SherpaOnnxTtsHelper(
547
562
  try {
548
563
  val sampleRate = dispatchSampleRate(inst)
549
564
  when {
550
- hasReferenceOptions(options) && inst.tts != null -> {
565
+ hasReferenceAudio(options) && inst.isPocket -> {
551
566
  val config = parseGenerationConfig(options) ?: GenerationConfig(speed = speed, sid = sid)
552
567
  inst.tts!!.generateWithConfigAndCallback(text, config) { chunk ->
553
568
  if (inst.ttsStreamCancelled.get()) return@generateWithConfigAndCallback 0
@@ -555,13 +570,6 @@ internal class SherpaOnnxTtsHelper(
555
570
  chunk.size
556
571
  }
557
572
  }
558
- inst.zipvoiceTts != null -> {
559
- inst.zipvoiceTts!!.generateWithCallback(text, sid, speed) { chunk ->
560
- if (inst.ttsStreamCancelled.get()) return@generateWithCallback 0
561
- emitChunk(instanceId, requestId, chunk, sampleRate, 0f, false)
562
- chunk.size
563
- }
564
- }
565
573
  else -> {
566
574
  inst.tts!!.generateWithCallback(text, sid, speed) { chunk ->
567
575
  if (inst.ttsStreamCancelled.get()) return@generateWithCallback 0
@@ -885,14 +893,21 @@ internal class SherpaOnnxTtsHelper(
885
893
 
886
894
  // -- Dual-engine dispatch helpers --
887
895
 
888
- /** True if options contain reference-audio fields for voice cloning. */
889
- private fun hasReferenceOptions(options: ReadableMap?): Boolean {
896
+ /**
897
+ * True when voice-cloning reference audio is present and valid for native use:
898
+ * non-empty [referenceAudio] array and [referenceSampleRate] > 0.
899
+ * [referenceText] alone does not enable cloning (matches sherpa-onnx behavior).
900
+ */
901
+ private fun hasReferenceAudio(options: ReadableMap?): Boolean {
890
902
  if (options == null) return false
891
- val refAudio = options.getArray("referenceAudio")
892
- val refText = options.getString("referenceText")
893
- return (refAudio != null && refAudio.size() > 0) || !refText.isNullOrEmpty()
903
+ val refAudio = options.getArray("referenceAudio") ?: return false
904
+ if (refAudio.size() == 0) return false
905
+ return readReferenceSampleRate(options) > 0
894
906
  }
895
907
 
908
+ private fun readReferenceSampleRate(options: ReadableMap): Int =
909
+ if (options.hasKey("referenceSampleRate")) options.getDouble("referenceSampleRate").toInt() else 0
910
+
896
911
  /** Parse sid and speed from options with defaults. */
897
912
  private fun getSid(options: ReadableMap?): Int =
898
913
  if (options != null && options.hasKey("sid")) options.getDouble("sid").toInt() else 0
@@ -936,18 +951,14 @@ internal class SherpaOnnxTtsHelper(
936
951
 
937
952
  /** Dispatch generate to whichever engine is active on the instance. Returns null if none loaded. */
938
953
  private fun dispatchGenerate(inst: TtsEngineInstance, text: String, sid: Int, speed: Float): GeneratedAudio? {
939
- inst.zipvoiceTts?.let { return it.generate(text, sid, speed) }
940
- inst.tts?.let { return it.generate(text, sid, speed) }
941
- return null
954
+ return inst.tts?.generate(text, sid, speed)
942
955
  }
943
956
 
944
957
  private fun dispatchSampleRate(inst: TtsEngineInstance): Int {
945
- inst.zipvoiceTts?.let { return it.sampleRate() }
946
958
  return inst.tts?.sampleRate() ?: 0
947
959
  }
948
960
 
949
961
  private fun dispatchNumSpeakers(inst: TtsEngineInstance): Int {
950
- inst.zipvoiceTts?.let { return it.numSpeakers() }
951
962
  return inst.tts?.numSpeakers() ?: 0
952
963
  }
953
964
 
@@ -1039,14 +1050,33 @@ internal class SherpaOnnxTtsHelper(
1039
1050
  debug = debug,
1040
1051
  provider = prov
1041
1052
  )
1042
- "zipvoice" -> {
1043
- // Zipvoice is handled by ZipvoiceTtsWrapper (C-API), not OfflineTts (Kotlin API).
1044
- // This branch should not be reached because initializeTts/updateTtsParams handle
1045
- // the "zipvoice" case before calling buildTtsConfig.
1046
- throw IllegalStateException(
1047
- "buildTtsConfig should not be called for zipvoice models. Use ZipvoiceTtsWrapper instead."
1048
- )
1049
- }
1053
+ "zipvoice" -> OfflineTtsModelConfig(
1054
+ zipvoice = OfflineTtsZipVoiceModelConfig(
1055
+ tokens = path(paths, "tokens"),
1056
+ encoder = path(paths, "encoder"),
1057
+ decoder = path(paths, "decoder"),
1058
+ vocoder = path(paths, "vocoder"),
1059
+ dataDir = path(paths, "dataDir"),
1060
+ lexicon = path(paths, "lexicon")
1061
+ ),
1062
+ numThreads = numThreads,
1063
+ debug = debug,
1064
+ provider = prov
1065
+ )
1066
+ "supertonic" -> OfflineTtsModelConfig(
1067
+ supertonic = OfflineTtsSupertonicModelConfig(
1068
+ durationPredictor = path(paths, "durationPredictor"),
1069
+ textEncoder = path(paths, "textEncoder"),
1070
+ vectorEstimator = path(paths, "vectorEstimator"),
1071
+ vocoder = path(paths, "vocoder"),
1072
+ ttsJson = path(paths, "ttsJson"),
1073
+ unicodeIndexer = path(paths, "unicodeIndexer"),
1074
+ voiceStyle = path(paths, "voiceStyle")
1075
+ ),
1076
+ numThreads = numThreads,
1077
+ debug = debug,
1078
+ provider = prov
1079
+ )
1050
1080
  else -> {
1051
1081
  if (path(paths, "acousticModel").isNotEmpty()) {
1052
1082
  OfflineTtsModelConfig(