react-native-sherpa-onnx 0.3.6 → 0.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -0
- package/README.md +92 -21
- package/SherpaOnnx.podspec +3 -0
- package/THIRD_PARTY_LICENSES/README.md +62 -0
- package/THIRD_PARTY_LICENSES/ffmpeg.txt +502 -0
- package/THIRD_PARTY_LICENSES/libarchive.txt +65 -0
- package/THIRD_PARTY_LICENSES/nvidia_omla.txt +181 -0
- package/THIRD_PARTY_LICENSES/onnxruntime.txt +21 -0
- package/THIRD_PARTY_LICENSES/opus.txt +44 -0
- package/THIRD_PARTY_LICENSES/sherpa-onnx.txt +201 -0
- package/THIRD_PARTY_LICENSES/shine.txt +482 -0
- package/THIRD_PARTY_LICENSES/zstd.txt +30 -0
- package/android/build.gradle +7 -3
- package/android/prebuilt-download.gradle +344 -152
- package/android/prebuilt-versions.gradle +1 -1
- package/android/src/main/assets/model_licenses/asr-models-license-status.csv +409 -0
- package/android/src/main/assets/model_licenses/qnn-asr-models-license-status.csv +695 -0
- package/android/src/main/assets/model_licenses/tts-models-license-status.csv +596 -0
- package/android/src/main/cpp/CMakeLists.txt +28 -10
- package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.cpp +2 -2
- package/android/src/main/cpp/jni/audio/sherpa-onnx-audio-convert-jni.cpp +268 -2
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +37 -6
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect.h +9 -1
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.cpp +7 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-tts.cpp +18 -2
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxArchiveHelper.kt +40 -10
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +99 -0
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxOnlineSttHelper.kt +4 -1
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +127 -97
- package/ios/Resources/model_licenses/asr-models-license-status.csv +409 -0
- package/ios/Resources/model_licenses/qnn-asr-models-license-status.csv +695 -0
- package/ios/Resources/model_licenses/tts-models-license-status.csv +596 -0
- package/ios/SherpaOnnx+OnlineSTT.mm +2 -0
- package/ios/SherpaOnnx+PcmLiveStream.mm +2 -29
- package/ios/SherpaOnnx+TTS.mm +179 -20
- package/ios/SherpaOnnx.mm +54 -0
- package/ios/SherpaOnnxAudioConvert.h +10 -0
- package/ios/SherpaOnnxAudioConvert.mm +257 -1
- package/ios/archive/sherpa-onnx-archive-helper.h +3 -0
- package/ios/archive/sherpa-onnx-archive-helper.mm +39 -6
- package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +49 -6
- package/ios/model_detect/sherpa-onnx-model-detect.h +9 -1
- package/ios/model_detect/sherpa-onnx-validate-tts.mm +18 -2
- package/ios/online_stt/sherpa-onnx-online-stt-wrapper.h +1 -0
- package/ios/online_stt/sherpa-onnx-online-stt-wrapper.mm +4 -0
- package/ios/tts/sherpa-onnx-tts-wrapper.h +37 -0
- package/ios/tts/sherpa-onnx-tts-wrapper.mm +158 -3
- package/lib/module/NativeSherpaOnnx.js.map +1 -1
- package/lib/module/audio/index.js +8 -0
- package/lib/module/audio/index.js.map +1 -1
- package/lib/module/download/ModelDownloadManager.js +10 -929
- package/lib/module/download/ModelDownloadManager.js.map +1 -1
- package/lib/module/download/activeModelOperations.js +26 -0
- package/lib/module/download/activeModelOperations.js.map +1 -0
- package/lib/module/download/background-downloader-types.js +2 -0
- package/lib/module/download/background-downloader-types.js.map +1 -0
- package/lib/module/download/bulkPurge.js +72 -0
- package/lib/module/download/bulkPurge.js.map +1 -0
- package/lib/module/download/checksumPrompt.js +19 -0
- package/lib/module/download/checksumPrompt.js.map +1 -0
- package/lib/module/download/constants.js +7 -0
- package/lib/module/download/constants.js.map +1 -0
- package/lib/module/download/downloadEvents.js +35 -0
- package/lib/module/download/downloadEvents.js.map +1 -0
- package/lib/module/download/downloadTask.js +438 -0
- package/lib/module/download/downloadTask.js.map +1 -0
- package/lib/module/download/ensureModel.js +89 -0
- package/lib/module/download/ensureModel.js.map +1 -0
- package/lib/module/download/index.js +4 -4
- package/lib/module/download/index.js.map +1 -1
- package/lib/module/download/localModels.js +151 -0
- package/lib/module/download/localModels.js.map +1 -0
- package/lib/module/download/modelExtraction.js +174 -0
- package/lib/module/download/modelExtraction.js.map +1 -0
- package/lib/module/download/paths.js +98 -0
- package/lib/module/download/paths.js.map +1 -0
- package/lib/module/download/postDownloadProcessing.js +206 -0
- package/lib/module/download/postDownloadProcessing.js.map +1 -0
- package/lib/module/download/protectedModelKeys.js +31 -0
- package/lib/module/download/protectedModelKeys.js.map +1 -0
- package/lib/module/download/registry.js +268 -0
- package/lib/module/download/registry.js.map +1 -0
- package/lib/module/download/retry.js +59 -0
- package/lib/module/download/retry.js.map +1 -0
- package/lib/module/download/types.js +17 -0
- package/lib/module/download/types.js.map +1 -0
- package/lib/module/download/validation.js +101 -5
- package/lib/module/download/validation.js.map +1 -1
- package/lib/module/{download → extraction}/extractTarBz2.js +3 -1
- package/lib/module/extraction/extractTarBz2.js.map +1 -0
- package/lib/module/{download → extraction}/extractTarZst.js +3 -1
- package/lib/module/extraction/extractTarZst.js.map +1 -0
- package/lib/module/extraction/index.js +3 -4
- package/lib/module/extraction/index.js.map +1 -1
- package/lib/module/index.js +1 -1
- package/lib/module/index.js.map +1 -1
- package/lib/module/licenses.js +63 -0
- package/lib/module/licenses.js.map +1 -0
- package/lib/module/stt/index.js +16 -2
- package/lib/module/stt/index.js.map +1 -1
- package/lib/module/stt/streaming.js +2 -0
- package/lib/module/stt/streaming.js.map +1 -1
- package/lib/module/stt/streamingTypes.js.map +1 -1
- package/lib/module/stt/types.js.map +1 -1
- package/lib/module/tts/index.js +21 -3
- package/lib/module/tts/index.js.map +1 -1
- package/lib/module/tts/streaming.js +5 -1
- package/lib/module/tts/streaming.js.map +1 -1
- package/lib/module/tts/types.js +4 -1
- package/lib/module/tts/types.js.map +1 -1
- package/lib/module/utils.js +16 -1
- package/lib/module/utils.js.map +1 -1
- package/lib/typescript/src/NativeSherpaOnnx.d.ts +34 -6
- package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
- package/lib/typescript/src/audio/index.d.ts +10 -0
- package/lib/typescript/src/audio/index.d.ts.map +1 -1
- package/lib/typescript/src/download/ModelDownloadManager.d.ts +11 -108
- package/lib/typescript/src/download/ModelDownloadManager.d.ts.map +1 -1
- package/lib/typescript/src/download/activeModelOperations.d.ts +6 -0
- package/lib/typescript/src/download/activeModelOperations.d.ts.map +1 -0
- package/lib/typescript/src/download/background-downloader-types.d.ts +64 -0
- package/lib/typescript/src/download/background-downloader-types.d.ts.map +1 -0
- package/lib/typescript/src/download/bulkPurge.d.ts +14 -0
- package/lib/typescript/src/download/bulkPurge.d.ts.map +1 -0
- package/lib/typescript/src/download/checksumPrompt.d.ts +3 -0
- package/lib/typescript/src/download/checksumPrompt.d.ts.map +1 -0
- package/lib/typescript/src/download/constants.d.ts +5 -0
- package/lib/typescript/src/download/constants.d.ts.map +1 -0
- package/lib/typescript/src/download/downloadEvents.d.ts +6 -0
- package/lib/typescript/src/download/downloadEvents.d.ts.map +1 -0
- package/lib/typescript/src/download/downloadTask.d.ts +30 -0
- package/lib/typescript/src/download/downloadTask.d.ts.map +1 -0
- package/lib/typescript/src/download/ensureModel.d.ts +26 -0
- package/lib/typescript/src/download/ensureModel.d.ts.map +1 -0
- package/lib/typescript/src/download/index.d.ts +7 -7
- package/lib/typescript/src/download/index.d.ts.map +1 -1
- package/lib/typescript/src/download/localModels.d.ts +15 -0
- package/lib/typescript/src/download/localModels.d.ts.map +1 -0
- package/lib/typescript/src/download/modelExtraction.d.ts +36 -0
- package/lib/typescript/src/download/modelExtraction.d.ts.map +1 -0
- package/lib/typescript/src/download/paths.d.ts +28 -0
- package/lib/typescript/src/download/paths.d.ts.map +1 -0
- package/lib/typescript/src/download/postDownloadProcessing.d.ts +19 -0
- package/lib/typescript/src/download/postDownloadProcessing.d.ts.map +1 -0
- package/lib/typescript/src/download/protectedModelKeys.d.ts +6 -0
- package/lib/typescript/src/download/protectedModelKeys.d.ts.map +1 -0
- package/lib/typescript/src/download/registry.d.ts +14 -0
- package/lib/typescript/src/download/registry.d.ts.map +1 -0
- package/lib/typescript/src/download/retry.d.ts +15 -0
- package/lib/typescript/src/download/retry.d.ts.map +1 -0
- package/lib/typescript/src/download/types.d.ts +96 -0
- package/lib/typescript/src/download/types.d.ts.map +1 -0
- package/lib/typescript/src/download/validation.d.ts +19 -0
- package/lib/typescript/src/download/validation.d.ts.map +1 -1
- package/lib/typescript/src/extraction/extractTarBz2.d.ts.map +1 -0
- package/lib/typescript/src/extraction/extractTarZst.d.ts.map +1 -0
- package/lib/typescript/src/index.d.ts +1 -0
- package/lib/typescript/src/index.d.ts.map +1 -1
- package/lib/typescript/src/licenses.d.ts +10 -0
- package/lib/typescript/src/licenses.d.ts.map +1 -0
- package/lib/typescript/src/stt/index.d.ts +4 -1
- package/lib/typescript/src/stt/index.d.ts.map +1 -1
- package/lib/typescript/src/stt/streaming.d.ts.map +1 -1
- package/lib/typescript/src/stt/streamingTypes.d.ts +5 -0
- package/lib/typescript/src/stt/streamingTypes.d.ts.map +1 -1
- package/lib/typescript/src/stt/types.d.ts +3 -1
- package/lib/typescript/src/stt/types.d.ts.map +1 -1
- package/lib/typescript/src/tts/index.d.ts +4 -2
- package/lib/typescript/src/tts/index.d.ts.map +1 -1
- package/lib/typescript/src/tts/streaming.d.ts.map +1 -1
- package/lib/typescript/src/tts/types.d.ts +12 -6
- package/lib/typescript/src/tts/types.d.ts.map +1 -1
- package/lib/typescript/src/utils.d.ts +5 -0
- package/lib/typescript/src/utils.d.ts.map +1 -1
- package/package.json +6 -1
- package/scripts/{check-model-csvs.sh → ci/check-model-csvs.sh} +9 -2
- package/scripts/ci/collect_all_sherpa_model_streams.sh +101 -0
- package/scripts/ci/collect_one_sherpa_release_stream.sh +189 -0
- package/scripts/ci/sherpa_asr_model_release_streams.json +21 -0
- package/scripts/ci/sherpa_tts_model_release_streams.json +13 -0
- package/scripts/ci/update_model_license_csv.sh +765 -0
- package/scripts/setup-ios-framework.sh +14 -11
- package/scripts/update_commercial_use.js +73 -0
- package/src/NativeSherpaOnnx.ts +37 -6
- package/src/audio/index.ts +20 -0
- package/src/download/ModelDownloadManager.ts +57 -1343
- package/src/download/activeModelOperations.ts +38 -0
- package/src/download/background-downloader-types.ts +73 -0
- package/src/download/bulkPurge.ts +102 -0
- package/src/download/checksumPrompt.ts +25 -0
- package/src/download/constants.ts +5 -0
- package/src/download/downloadEvents.ts +55 -0
- package/src/download/downloadTask.ts +565 -0
- package/src/download/ensureModel.ts +124 -0
- package/src/download/index.ts +21 -4
- package/src/download/localModels.ts +234 -0
- package/src/download/modelExtraction.ts +244 -0
- package/src/download/paths.ts +134 -0
- package/src/download/postDownloadProcessing.ts +292 -0
- package/src/download/protectedModelKeys.ts +30 -0
- package/src/download/registry.ts +405 -0
- package/src/download/retry.ts +76 -0
- package/src/download/types.ts +120 -0
- package/src/download/validation.ts +114 -8
- package/src/{download → extraction}/extractTarBz2.ts +3 -1
- package/src/{download → extraction}/extractTarZst.ts +3 -1
- package/src/extraction/index.ts +3 -7
- package/src/index.tsx +1 -0
- package/src/licenses.ts +100 -0
- package/src/stt/index.ts +20 -2
- package/src/stt/streaming.ts +3 -0
- package/src/stt/streamingTypes.ts +5 -0
- package/src/stt/types.ts +3 -1
- package/src/tts/index.ts +33 -2
- package/src/tts/streaming.ts +12 -0
- package/src/tts/types.ts +15 -5
- package/src/utils.ts +22 -1
- package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -1
- package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -1
- package/android/src/main/cpp/jni/tts/sherpa-onnx-tts-zipvoice-jni.cpp +0 -301
- package/android/src/main/java/com/sherpaonnx/ZipvoiceTtsWrapper.kt +0 -187
- package/lib/module/download/extractTarBz2.js.map +0 -1
- package/lib/module/download/extractTarZst.js.map +0 -1
- package/lib/typescript/src/download/extractTarBz2.d.ts.map +0 -1
- package/lib/typescript/src/download/extractTarZst.d.ts.map +0 -1
- package/scripts/check-qnn-support.sh +0 -78
- /package/lib/typescript/src/{download → extraction}/extractTarBz2.d.ts +0 -0
- /package/lib/typescript/src/{download → extraction}/extractTarZst.d.ts +0 -0
|
@@ -30,6 +30,8 @@ import com.k2fsa.sherpa.onnx.OfflineTtsVitsModelConfig
|
|
|
30
30
|
import com.k2fsa.sherpa.onnx.OfflineTtsMatchaModelConfig
|
|
31
31
|
import com.k2fsa.sherpa.onnx.OfflineTtsKokoroModelConfig
|
|
32
32
|
import com.k2fsa.sherpa.onnx.OfflineTtsKittenModelConfig
|
|
33
|
+
import com.k2fsa.sherpa.onnx.OfflineTtsZipVoiceModelConfig
|
|
34
|
+
import com.k2fsa.sherpa.onnx.OfflineTtsSupertonicModelConfig
|
|
33
35
|
import java.io.File
|
|
34
36
|
import java.io.FileInputStream
|
|
35
37
|
import java.io.FileOutputStream
|
|
@@ -64,8 +66,7 @@ internal class SherpaOnnxTtsHelper(
|
|
|
64
66
|
|
|
65
67
|
private data class TtsEngineInstance(
|
|
66
68
|
@Volatile var tts: OfflineTts? = null,
|
|
67
|
-
@Volatile var
|
|
68
|
-
var ttsInitState: TtsInitState? = null,
|
|
69
|
+
@Volatile var ttsInitState: TtsInitState? = null,
|
|
69
70
|
val ttsStreamRunning: AtomicBoolean = AtomicBoolean(false),
|
|
70
71
|
val ttsStreamCancelled: AtomicBoolean = AtomicBoolean(false),
|
|
71
72
|
var ttsStreamThread: Thread? = null,
|
|
@@ -73,15 +74,13 @@ internal class SherpaOnnxTtsHelper(
|
|
|
73
74
|
) {
|
|
74
75
|
private val lock = Any()
|
|
75
76
|
|
|
76
|
-
fun hasEngine(): Boolean = synchronized(lock) { tts != null
|
|
77
|
-
val isZipvoice: Boolean get() =
|
|
77
|
+
fun hasEngine(): Boolean = synchronized(lock) { tts != null }
|
|
78
|
+
val isZipvoice: Boolean get() = ttsInitState?.modelType == "zipvoice"
|
|
78
79
|
val isPocket: Boolean get() = ttsInitState?.modelType == "pocket"
|
|
79
80
|
fun releaseEngines() {
|
|
80
81
|
synchronized(lock) {
|
|
81
82
|
tts?.release()
|
|
82
83
|
tts = null
|
|
83
|
-
zipvoiceTts?.release()
|
|
84
|
-
zipvoiceTts = null
|
|
85
84
|
ttsInitState = null
|
|
86
85
|
}
|
|
87
86
|
}
|
|
@@ -186,6 +185,13 @@ internal class SherpaOnnxTtsHelper(
|
|
|
186
185
|
rejectOnUiThread(promise, "TTS_INIT_ERROR", msg)
|
|
187
186
|
return@init
|
|
188
187
|
}
|
|
188
|
+
val lexiconPath = path(paths, "lexicon")
|
|
189
|
+
if (lexiconPath.isBlank()) {
|
|
190
|
+
val msg = "Zipvoice requires lexicon.txt (or lexicon-<lang>.txt) in the model directory. The sherpa-onnx engine aborts if it is missing. Copy lexicon from the official k2-fsa sherpa-onnx Zipvoice model package or hr-files release next to tokens.txt."
|
|
191
|
+
Log.e("SherpaOnnxTts", "TTS_INIT_ERROR: $msg")
|
|
192
|
+
rejectOnUiThread(promise, "TTS_INIT_ERROR", msg)
|
|
193
|
+
return@init
|
|
194
|
+
}
|
|
189
195
|
val am = context.applicationContext.getSystemService(Context.ACTIVITY_SERVICE) as? ActivityManager
|
|
190
196
|
if (am != null) {
|
|
191
197
|
val memInfo = ActivityManager.MemoryInfo()
|
|
@@ -206,34 +212,26 @@ internal class SherpaOnnxTtsHelper(
|
|
|
206
212
|
Log.i("SherpaOnnxTts", "Zipvoice init: availMem=${memInfoBefore.availMem / (1024 * 1024)} MB (before load)")
|
|
207
213
|
}
|
|
208
214
|
val zipvoiceNumThreads = 1
|
|
209
|
-
val
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
dataDir = path(paths, "dataDir"),
|
|
215
|
-
lexicon = path(paths, "lexicon"),
|
|
216
|
-
numThreads = zipvoiceNumThreads,
|
|
217
|
-
debug = debug,
|
|
218
|
-
ruleFsts = ruleFsts?.takeIf { it.isNotBlank() } ?: "",
|
|
219
|
-
ruleFars = ruleFars?.takeIf { it.isNotBlank() } ?: "",
|
|
220
|
-
maxNumSentences = maxNumSentences?.toInt()?.coerceAtLeast(1) ?: 1,
|
|
221
|
-
silenceScale = silenceScale?.toFloat()?.coerceIn(0f, 10f) ?: 0.2f,
|
|
222
|
-
provider = provider?.takeIf { it.isNotBlank() } ?: "cpu"
|
|
215
|
+
val config = buildTtsConfig(
|
|
216
|
+
paths, "zipvoice", zipvoiceNumThreads, debug,
|
|
217
|
+
noiseScale, noiseScaleW, lengthScale,
|
|
218
|
+
ruleFsts, ruleFars, maxNumSentences?.toInt(), silenceScale,
|
|
219
|
+
provider
|
|
223
220
|
)
|
|
224
221
|
if (am != null) {
|
|
225
222
|
val memInfo = ActivityManager.MemoryInfo()
|
|
226
223
|
am.getMemoryInfo(memInfo)
|
|
227
224
|
Log.i("SherpaOnnxTts", "Zipvoice init: availMem=${memInfo.availMem / (1024 * 1024)} MB (after load)")
|
|
228
225
|
}
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
226
|
+
try {
|
|
227
|
+
inst.tts = OfflineTts(config = config)
|
|
228
|
+
} catch (e: Exception) {
|
|
229
|
+
Log.e("SherpaOnnxTts", "TTS_INIT_ERROR: Failed to create Zipvoice OfflineTts: ${e.message}", e)
|
|
230
|
+
rejectOnUiThread(promise, "TTS_INIT_ERROR", "Failed to create Zipvoice TTS engine: ${e.message}", e)
|
|
232
231
|
return@init
|
|
233
232
|
}
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
numSpeakers = wrapper.numSpeakers()
|
|
233
|
+
sampleRate = inst.tts!!.sampleRate()
|
|
234
|
+
numSpeakers = inst.tts!!.numSpeakers()
|
|
237
235
|
} else {
|
|
238
236
|
val config = buildTtsConfig(
|
|
239
237
|
paths, modelTypeStr, numThreads.toInt(), debug,
|
|
@@ -246,8 +244,6 @@ internal class SherpaOnnxTtsHelper(
|
|
|
246
244
|
numSpeakers = inst.tts!!.numSpeakers()
|
|
247
245
|
}
|
|
248
246
|
|
|
249
|
-
Log.i("SherpaOnnxTts", "initializeTts: instanceId=$instanceId, engine=${if (inst.isZipvoice) "zipvoice-c-api" else "kotlin-api"}, sampleRate=$sampleRate, numSpeakers=$numSpeakers")
|
|
250
|
-
|
|
251
247
|
val modelsArray = Arguments.createArray()
|
|
252
248
|
detectedModels?.forEach { modelObj ->
|
|
253
249
|
if (modelObj is HashMap<*, *>) {
|
|
@@ -273,6 +269,8 @@ internal class SherpaOnnxTtsHelper(
|
|
|
273
269
|
provider?.takeIf { it.isNotBlank() }
|
|
274
270
|
)
|
|
275
271
|
|
|
272
|
+
Log.i("SherpaOnnxTts", "initializeTts: instanceId=$instanceId, engine=kotlin-api modelType=$modelTypeStr, sampleRate=$sampleRate, numSpeakers=$numSpeakers")
|
|
273
|
+
|
|
276
274
|
val resultMap = Arguments.createMap()
|
|
277
275
|
resultMap.putBoolean("success", true)
|
|
278
276
|
resultMap.putArray("detectedModels", modelsArray)
|
|
@@ -309,18 +307,6 @@ internal class SherpaOnnxTtsHelper(
|
|
|
309
307
|
return
|
|
310
308
|
}
|
|
311
309
|
|
|
312
|
-
if (inst.isZipvoice) {
|
|
313
|
-
initializeTts(
|
|
314
|
-
instanceId,
|
|
315
|
-
state.modelDir, state.modelType, state.numThreads.toDouble(), state.debug,
|
|
316
|
-
noiseScale, noiseScaleW, lengthScale,
|
|
317
|
-
state.ruleFsts, state.ruleFars, state.maxNumSentences?.toDouble(), state.silenceScale,
|
|
318
|
-
state.provider,
|
|
319
|
-
promise
|
|
320
|
-
)
|
|
321
|
-
return
|
|
322
|
-
}
|
|
323
|
-
|
|
324
310
|
val nextNoiseScale = when {
|
|
325
311
|
noiseScale == null -> null
|
|
326
312
|
noiseScale.isNaN() -> state.noiseScale
|
|
@@ -401,26 +387,35 @@ internal class SherpaOnnxTtsHelper(
|
|
|
401
387
|
val sid = getSid(options)
|
|
402
388
|
val speed = getSpeed(options)
|
|
403
389
|
val audio = when {
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
390
|
+
hasReferenceAudio(options) && (inst.isZipvoice || inst.isPocket) -> {
|
|
391
|
+
if (inst.isZipvoice) {
|
|
392
|
+
val promptText = options!!.getString("referenceText")?.trim().orEmpty()
|
|
393
|
+
if (promptText.isEmpty()) {
|
|
394
|
+
Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Zipvoice voice cloning requires non-empty referenceText")
|
|
395
|
+
promise.reject(
|
|
396
|
+
"TTS_GENERATE_ERROR",
|
|
397
|
+
"Zipvoice voice cloning requires non-empty referenceText (transcript of reference audio)."
|
|
398
|
+
)
|
|
409
399
|
return
|
|
410
400
|
}
|
|
411
|
-
|
|
412
|
-
val promptText = options.getString("referenceText").orEmpty()
|
|
413
|
-
val numSteps = if (options.hasKey("numSteps")) options.getDouble("numSteps").toInt() else 20
|
|
414
|
-
val samples = FloatArray(refAudio.size()) { i -> refAudio.getDouble(i).toFloat() }
|
|
415
|
-
inst.zipvoiceTts!!.generateWithZipvoice(text, promptText, samples, promptSr, speed, numSteps)
|
|
416
|
-
}
|
|
417
|
-
hasReferenceOptions(options) && inst.tts != null -> {
|
|
401
|
+
}
|
|
418
402
|
val config = parseGenerationConfig(options) ?: GenerationConfig(speed = speed, sid = sid)
|
|
419
403
|
inst.tts!!.generateWithConfig(text, config)
|
|
420
404
|
}
|
|
421
|
-
|
|
405
|
+
hasReferenceAudio(options) -> {
|
|
406
|
+
Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Reference audio is not supported for this TTS model type")
|
|
407
|
+
promise.reject(
|
|
408
|
+
"TTS_GENERATE_ERROR",
|
|
409
|
+
"Reference audio is only supported for Zipvoice and Pocket TTS."
|
|
410
|
+
)
|
|
411
|
+
return
|
|
412
|
+
}
|
|
413
|
+
inst.isPocket -> {
|
|
422
414
|
Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Pocket TTS requires reference audio for voice cloning")
|
|
423
|
-
promise.reject(
|
|
415
|
+
promise.reject(
|
|
416
|
+
"TTS_GENERATE_ERROR",
|
|
417
|
+
"Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate (> 0) in options."
|
|
418
|
+
)
|
|
424
419
|
return
|
|
425
420
|
}
|
|
426
421
|
else -> dispatchGenerate(inst, text, sid, speed)
|
|
@@ -459,26 +454,35 @@ internal class SherpaOnnxTtsHelper(
|
|
|
459
454
|
val sid = getSid(options)
|
|
460
455
|
val speed = getSpeed(options)
|
|
461
456
|
val audio = when {
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
457
|
+
hasReferenceAudio(options) && (inst.isZipvoice || inst.isPocket) -> {
|
|
458
|
+
if (inst.isZipvoice) {
|
|
459
|
+
val promptText = options!!.getString("referenceText")?.trim().orEmpty()
|
|
460
|
+
if (promptText.isEmpty()) {
|
|
461
|
+
Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Zipvoice voice cloning requires non-empty referenceText")
|
|
462
|
+
promise.reject(
|
|
463
|
+
"TTS_GENERATE_ERROR",
|
|
464
|
+
"Zipvoice voice cloning requires non-empty referenceText (transcript of reference audio)."
|
|
465
|
+
)
|
|
467
466
|
return
|
|
468
467
|
}
|
|
469
|
-
|
|
470
|
-
val promptText = options.getString("referenceText").orEmpty()
|
|
471
|
-
val numSteps = if (options.hasKey("numSteps")) options.getDouble("numSteps").toInt() else 20
|
|
472
|
-
val samples = FloatArray(refAudio.size()) { i -> refAudio.getDouble(i).toFloat() }
|
|
473
|
-
inst.zipvoiceTts!!.generateWithZipvoice(text, promptText, samples, promptSr, speed, numSteps)
|
|
474
|
-
}
|
|
475
|
-
hasReferenceOptions(options) && inst.tts != null -> {
|
|
468
|
+
}
|
|
476
469
|
val config = parseGenerationConfig(options) ?: GenerationConfig(speed = speed, sid = sid)
|
|
477
470
|
inst.tts!!.generateWithConfig(text, config)
|
|
478
471
|
}
|
|
479
|
-
|
|
472
|
+
hasReferenceAudio(options) -> {
|
|
473
|
+
Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Reference audio is not supported for this TTS model type")
|
|
474
|
+
promise.reject(
|
|
475
|
+
"TTS_GENERATE_ERROR",
|
|
476
|
+
"Reference audio is only supported for Zipvoice and Pocket TTS."
|
|
477
|
+
)
|
|
478
|
+
return
|
|
479
|
+
}
|
|
480
|
+
inst.isPocket -> {
|
|
480
481
|
Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Pocket TTS requires reference audio for voice cloning")
|
|
481
|
-
promise.reject(
|
|
482
|
+
promise.reject(
|
|
483
|
+
"TTS_GENERATE_ERROR",
|
|
484
|
+
"Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate (> 0) in options."
|
|
485
|
+
)
|
|
482
486
|
return
|
|
483
487
|
}
|
|
484
488
|
else -> dispatchGenerate(inst, text, sid, speed)
|
|
@@ -529,16 +533,27 @@ internal class SherpaOnnxTtsHelper(
|
|
|
529
533
|
promise.reject("TTS_STREAM_ERROR", "TTS not initialized")
|
|
530
534
|
return
|
|
531
535
|
}
|
|
532
|
-
if (inst.isPocket && !
|
|
536
|
+
if (inst.isPocket && !hasReferenceAudio(options)) {
|
|
533
537
|
Log.e("SherpaOnnxTts", "TTS_STREAM_ERROR: Pocket TTS requires reference audio for voice cloning")
|
|
534
|
-
promise.reject(
|
|
538
|
+
promise.reject(
|
|
539
|
+
"TTS_STREAM_ERROR",
|
|
540
|
+
"Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate (> 0) in options."
|
|
541
|
+
)
|
|
535
542
|
return
|
|
536
543
|
}
|
|
537
|
-
if (
|
|
544
|
+
if (hasReferenceAudio(options) && inst.isZipvoice) {
|
|
538
545
|
Log.e("SherpaOnnxTts", "TTS_STREAM_ERROR: Streaming with reference audio not supported for Zipvoice")
|
|
539
546
|
promise.reject("TTS_STREAM_ERROR", "Streaming with reference audio not supported for Zipvoice")
|
|
540
547
|
return
|
|
541
548
|
}
|
|
549
|
+
if (hasReferenceAudio(options) && !inst.isPocket) {
|
|
550
|
+
Log.e("SherpaOnnxTts", "TTS_STREAM_ERROR: Reference audio streaming is only supported for Pocket TTS")
|
|
551
|
+
promise.reject(
|
|
552
|
+
"TTS_STREAM_ERROR",
|
|
553
|
+
"Reference audio streaming is only supported for Pocket TTS."
|
|
554
|
+
)
|
|
555
|
+
return
|
|
556
|
+
}
|
|
542
557
|
val sid = getSid(options)
|
|
543
558
|
val speed = getSpeed(options)
|
|
544
559
|
inst.ttsStreamCancelled.set(false)
|
|
@@ -547,7 +562,7 @@ internal class SherpaOnnxTtsHelper(
|
|
|
547
562
|
try {
|
|
548
563
|
val sampleRate = dispatchSampleRate(inst)
|
|
549
564
|
when {
|
|
550
|
-
|
|
565
|
+
hasReferenceAudio(options) && inst.isPocket -> {
|
|
551
566
|
val config = parseGenerationConfig(options) ?: GenerationConfig(speed = speed, sid = sid)
|
|
552
567
|
inst.tts!!.generateWithConfigAndCallback(text, config) { chunk ->
|
|
553
568
|
if (inst.ttsStreamCancelled.get()) return@generateWithConfigAndCallback 0
|
|
@@ -555,13 +570,6 @@ internal class SherpaOnnxTtsHelper(
|
|
|
555
570
|
chunk.size
|
|
556
571
|
}
|
|
557
572
|
}
|
|
558
|
-
inst.zipvoiceTts != null -> {
|
|
559
|
-
inst.zipvoiceTts!!.generateWithCallback(text, sid, speed) { chunk ->
|
|
560
|
-
if (inst.ttsStreamCancelled.get()) return@generateWithCallback 0
|
|
561
|
-
emitChunk(instanceId, requestId, chunk, sampleRate, 0f, false)
|
|
562
|
-
chunk.size
|
|
563
|
-
}
|
|
564
|
-
}
|
|
565
573
|
else -> {
|
|
566
574
|
inst.tts!!.generateWithCallback(text, sid, speed) { chunk ->
|
|
567
575
|
if (inst.ttsStreamCancelled.get()) return@generateWithCallback 0
|
|
@@ -885,14 +893,21 @@ internal class SherpaOnnxTtsHelper(
|
|
|
885
893
|
|
|
886
894
|
// -- Dual-engine dispatch helpers --
|
|
887
895
|
|
|
888
|
-
/**
|
|
889
|
-
|
|
896
|
+
/**
|
|
897
|
+
* True when voice-cloning reference audio is present and valid for native use:
|
|
898
|
+
* non-empty [referenceAudio] array and [referenceSampleRate] > 0.
|
|
899
|
+
* [referenceText] alone does not enable cloning (matches sherpa-onnx behavior).
|
|
900
|
+
*/
|
|
901
|
+
private fun hasReferenceAudio(options: ReadableMap?): Boolean {
|
|
890
902
|
if (options == null) return false
|
|
891
|
-
val refAudio = options.getArray("referenceAudio")
|
|
892
|
-
|
|
893
|
-
return (
|
|
903
|
+
val refAudio = options.getArray("referenceAudio") ?: return false
|
|
904
|
+
if (refAudio.size() == 0) return false
|
|
905
|
+
return readReferenceSampleRate(options) > 0
|
|
894
906
|
}
|
|
895
907
|
|
|
908
|
+
private fun readReferenceSampleRate(options: ReadableMap): Int =
|
|
909
|
+
if (options.hasKey("referenceSampleRate")) options.getDouble("referenceSampleRate").toInt() else 0
|
|
910
|
+
|
|
896
911
|
/** Parse sid and speed from options with defaults. */
|
|
897
912
|
private fun getSid(options: ReadableMap?): Int =
|
|
898
913
|
if (options != null && options.hasKey("sid")) options.getDouble("sid").toInt() else 0
|
|
@@ -936,18 +951,14 @@ internal class SherpaOnnxTtsHelper(
|
|
|
936
951
|
|
|
937
952
|
/** Dispatch generate to whichever engine is active on the instance. Returns null if none loaded. */
|
|
938
953
|
private fun dispatchGenerate(inst: TtsEngineInstance, text: String, sid: Int, speed: Float): GeneratedAudio? {
|
|
939
|
-
inst.
|
|
940
|
-
inst.tts?.let { return it.generate(text, sid, speed) }
|
|
941
|
-
return null
|
|
954
|
+
return inst.tts?.generate(text, sid, speed)
|
|
942
955
|
}
|
|
943
956
|
|
|
944
957
|
private fun dispatchSampleRate(inst: TtsEngineInstance): Int {
|
|
945
|
-
inst.zipvoiceTts?.let { return it.sampleRate() }
|
|
946
958
|
return inst.tts?.sampleRate() ?: 0
|
|
947
959
|
}
|
|
948
960
|
|
|
949
961
|
private fun dispatchNumSpeakers(inst: TtsEngineInstance): Int {
|
|
950
|
-
inst.zipvoiceTts?.let { return it.numSpeakers() }
|
|
951
962
|
return inst.tts?.numSpeakers() ?: 0
|
|
952
963
|
}
|
|
953
964
|
|
|
@@ -1039,14 +1050,33 @@ internal class SherpaOnnxTtsHelper(
|
|
|
1039
1050
|
debug = debug,
|
|
1040
1051
|
provider = prov
|
|
1041
1052
|
)
|
|
1042
|
-
"zipvoice" ->
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1053
|
+
"zipvoice" -> OfflineTtsModelConfig(
|
|
1054
|
+
zipvoice = OfflineTtsZipVoiceModelConfig(
|
|
1055
|
+
tokens = path(paths, "tokens"),
|
|
1056
|
+
encoder = path(paths, "encoder"),
|
|
1057
|
+
decoder = path(paths, "decoder"),
|
|
1058
|
+
vocoder = path(paths, "vocoder"),
|
|
1059
|
+
dataDir = path(paths, "dataDir"),
|
|
1060
|
+
lexicon = path(paths, "lexicon")
|
|
1061
|
+
),
|
|
1062
|
+
numThreads = numThreads,
|
|
1063
|
+
debug = debug,
|
|
1064
|
+
provider = prov
|
|
1065
|
+
)
|
|
1066
|
+
"supertonic" -> OfflineTtsModelConfig(
|
|
1067
|
+
supertonic = OfflineTtsSupertonicModelConfig(
|
|
1068
|
+
durationPredictor = path(paths, "durationPredictor"),
|
|
1069
|
+
textEncoder = path(paths, "textEncoder"),
|
|
1070
|
+
vectorEstimator = path(paths, "vectorEstimator"),
|
|
1071
|
+
vocoder = path(paths, "vocoder"),
|
|
1072
|
+
ttsJson = path(paths, "ttsJson"),
|
|
1073
|
+
unicodeIndexer = path(paths, "unicodeIndexer"),
|
|
1074
|
+
voiceStyle = path(paths, "voiceStyle")
|
|
1075
|
+
),
|
|
1076
|
+
numThreads = numThreads,
|
|
1077
|
+
debug = debug,
|
|
1078
|
+
provider = prov
|
|
1079
|
+
)
|
|
1050
1080
|
else -> {
|
|
1051
1081
|
if (path(paths, "acousticModel").isNotEmpty()) {
|
|
1052
1082
|
OfflineTtsModelConfig(
|