react-native-sherpa-onnx 0.3.5 → 0.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -0
- package/README.md +90 -21
- package/SherpaOnnx.podspec +3 -0
- package/THIRD_PARTY_LICENSES/README.md +62 -0
- package/THIRD_PARTY_LICENSES/ffmpeg.txt +502 -0
- package/THIRD_PARTY_LICENSES/libarchive.txt +65 -0
- package/THIRD_PARTY_LICENSES/nvidia_omla.txt +181 -0
- package/THIRD_PARTY_LICENSES/onnxruntime.txt +21 -0
- package/THIRD_PARTY_LICENSES/opus.txt +44 -0
- package/THIRD_PARTY_LICENSES/sherpa-onnx.txt +201 -0
- package/THIRD_PARTY_LICENSES/shine.txt +482 -0
- package/THIRD_PARTY_LICENSES/zstd.txt +30 -0
- package/android/build.gradle +7 -3
- package/android/prebuilt-download.gradle +345 -153
- package/android/prebuilt-versions.gradle +2 -2
- package/android/src/main/assets/model_licenses/asr-models-license-status.csv +409 -0
- package/android/src/main/assets/model_licenses/qnn-asr-models-license-status.csv +695 -0
- package/android/src/main/assets/model_licenses/tts-models-license-status.csv +596 -0
- package/android/src/main/cpp/CMakeLists.txt +28 -10
- package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.cpp +306 -6
- package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.h +33 -4
- package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-jni.cpp +266 -7
- package/android/src/main/cpp/jni/audio/sherpa-onnx-audio-convert-jni.cpp +268 -2
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +6 -2
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-tts.cpp +4 -2
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxArchiveHelper.kt +137 -7
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxAssetHelper.kt +51 -6
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +159 -0
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxOnlineSttHelper.kt +4 -1
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +112 -97
- package/ios/Resources/model_licenses/asr-models-license-status.csv +409 -0
- package/ios/Resources/model_licenses/qnn-asr-models-license-status.csv +695 -0
- package/ios/Resources/model_licenses/tts-models-license-status.csv +596 -0
- package/ios/SherpaOnnx+OnlineSTT.mm +2 -0
- package/ios/SherpaOnnx+PcmLiveStream.mm +2 -29
- package/ios/SherpaOnnx+TTS.mm +178 -20
- package/ios/SherpaOnnx.mm +108 -1
- package/ios/SherpaOnnxAudioConvert.h +10 -0
- package/ios/SherpaOnnxAudioConvert.mm +257 -1
- package/ios/archive/sherpa-onnx-archive-helper.h +10 -0
- package/ios/archive/sherpa-onnx-archive-helper.mm +56 -5
- package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +13 -2
- package/ios/model_detect/sherpa-onnx-validate-tts.mm +4 -2
- package/ios/online_stt/sherpa-onnx-online-stt-wrapper.h +1 -0
- package/ios/online_stt/sherpa-onnx-online-stt-wrapper.mm +4 -0
- package/ios/tts/sherpa-onnx-tts-wrapper.h +37 -0
- package/ios/tts/sherpa-onnx-tts-wrapper.mm +149 -3
- package/lib/module/NativeSherpaOnnx.js.map +1 -1
- package/lib/module/audio/index.js +8 -0
- package/lib/module/audio/index.js.map +1 -1
- package/lib/module/download/ModelDownloadManager.js +10 -929
- package/lib/module/download/ModelDownloadManager.js.map +1 -1
- package/lib/module/download/activeModelOperations.js +26 -0
- package/lib/module/download/activeModelOperations.js.map +1 -0
- package/lib/module/download/background-downloader.d.js +2 -0
- package/lib/module/download/background-downloader.d.js.map +1 -0
- package/lib/module/download/bulkPurge.js +72 -0
- package/lib/module/download/bulkPurge.js.map +1 -0
- package/lib/module/download/checksumPrompt.js +19 -0
- package/lib/module/download/checksumPrompt.js.map +1 -0
- package/lib/module/download/constants.js +7 -0
- package/lib/module/download/constants.js.map +1 -0
- package/lib/module/download/downloadEvents.js +35 -0
- package/lib/module/download/downloadEvents.js.map +1 -0
- package/lib/module/download/downloadTask.js +385 -0
- package/lib/module/download/downloadTask.js.map +1 -0
- package/lib/module/download/ensureModel.js +89 -0
- package/lib/module/download/ensureModel.js.map +1 -0
- package/lib/module/download/index.js +4 -3
- package/lib/module/download/index.js.map +1 -1
- package/lib/module/download/localModels.js +151 -0
- package/lib/module/download/localModels.js.map +1 -0
- package/lib/module/download/modelExtraction.js +174 -0
- package/lib/module/download/modelExtraction.js.map +1 -0
- package/lib/module/download/paths.js +98 -0
- package/lib/module/download/paths.js.map +1 -0
- package/lib/module/download/postDownloadProcessing.js +206 -0
- package/lib/module/download/postDownloadProcessing.js.map +1 -0
- package/lib/module/download/protectedModelKeys.js +31 -0
- package/lib/module/download/protectedModelKeys.js.map +1 -0
- package/lib/module/download/registry.js +267 -0
- package/lib/module/download/registry.js.map +1 -0
- package/lib/module/download/retry.js +59 -0
- package/lib/module/download/retry.js.map +1 -0
- package/lib/module/download/types.js +17 -0
- package/lib/module/download/types.js.map +1 -0
- package/lib/module/download/validation.js +101 -5
- package/lib/module/download/validation.js.map +1 -1
- package/lib/module/{download → extraction}/extractTarBz2.js +3 -1
- package/lib/module/extraction/extractTarBz2.js.map +1 -0
- package/lib/module/extraction/extractTarZst.js +54 -0
- package/lib/module/extraction/extractTarZst.js.map +1 -0
- package/lib/module/extraction/index.js +190 -0
- package/lib/module/extraction/index.js.map +1 -0
- package/lib/module/extraction/types.js +2 -0
- package/lib/module/extraction/types.js.map +1 -0
- package/lib/module/index.js +2 -1
- package/lib/module/index.js.map +1 -1
- package/lib/module/licenses.js +63 -0
- package/lib/module/licenses.js.map +1 -0
- package/lib/module/stt/index.js +16 -2
- package/lib/module/stt/index.js.map +1 -1
- package/lib/module/stt/streaming.js +2 -0
- package/lib/module/stt/streaming.js.map +1 -1
- package/lib/module/stt/streamingTypes.js.map +1 -1
- package/lib/module/stt/types.js.map +1 -1
- package/lib/module/tts/index.js +20 -2
- package/lib/module/tts/index.js.map +1 -1
- package/lib/module/tts/streaming.js +4 -0
- package/lib/module/tts/streaming.js.map +1 -1
- package/lib/module/tts/types.js.map +1 -1
- package/lib/module/utils.js +16 -1
- package/lib/module/utils.js.map +1 -1
- package/lib/typescript/src/NativeSherpaOnnx.d.ts +72 -5
- package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
- package/lib/typescript/src/audio/index.d.ts +10 -0
- package/lib/typescript/src/audio/index.d.ts.map +1 -1
- package/lib/typescript/src/download/ModelDownloadManager.d.ts +10 -108
- package/lib/typescript/src/download/ModelDownloadManager.d.ts.map +1 -1
- package/lib/typescript/src/download/activeModelOperations.d.ts +6 -0
- package/lib/typescript/src/download/activeModelOperations.d.ts.map +1 -0
- package/lib/typescript/src/download/bulkPurge.d.ts +14 -0
- package/lib/typescript/src/download/bulkPurge.d.ts.map +1 -0
- package/lib/typescript/src/download/checksumPrompt.d.ts +3 -0
- package/lib/typescript/src/download/checksumPrompt.d.ts.map +1 -0
- package/lib/typescript/src/download/constants.d.ts +5 -0
- package/lib/typescript/src/download/constants.d.ts.map +1 -0
- package/lib/typescript/src/download/downloadEvents.d.ts +6 -0
- package/lib/typescript/src/download/downloadEvents.d.ts.map +1 -0
- package/lib/typescript/src/download/downloadTask.d.ts +20 -0
- package/lib/typescript/src/download/downloadTask.d.ts.map +1 -0
- package/lib/typescript/src/download/ensureModel.d.ts +26 -0
- package/lib/typescript/src/download/ensureModel.d.ts.map +1 -0
- package/lib/typescript/src/download/index.d.ts +7 -5
- package/lib/typescript/src/download/index.d.ts.map +1 -1
- package/lib/typescript/src/download/localModels.d.ts +15 -0
- package/lib/typescript/src/download/localModels.d.ts.map +1 -0
- package/lib/typescript/src/download/modelExtraction.d.ts +36 -0
- package/lib/typescript/src/download/modelExtraction.d.ts.map +1 -0
- package/lib/typescript/src/download/paths.d.ts +28 -0
- package/lib/typescript/src/download/paths.d.ts.map +1 -0
- package/lib/typescript/src/download/postDownloadProcessing.d.ts +19 -0
- package/lib/typescript/src/download/postDownloadProcessing.d.ts.map +1 -0
- package/lib/typescript/src/download/protectedModelKeys.d.ts +6 -0
- package/lib/typescript/src/download/protectedModelKeys.d.ts.map +1 -0
- package/lib/typescript/src/download/registry.d.ts +14 -0
- package/lib/typescript/src/download/registry.d.ts.map +1 -0
- package/lib/typescript/src/download/retry.d.ts +15 -0
- package/lib/typescript/src/download/retry.d.ts.map +1 -0
- package/lib/typescript/src/download/types.d.ts +96 -0
- package/lib/typescript/src/download/types.d.ts.map +1 -0
- package/lib/typescript/src/download/validation.d.ts +19 -0
- package/lib/typescript/src/download/validation.d.ts.map +1 -1
- package/lib/typescript/src/extraction/extractTarBz2.d.ts.map +1 -0
- package/lib/typescript/src/extraction/extractTarZst.d.ts +14 -0
- package/lib/typescript/src/extraction/extractTarZst.d.ts.map +1 -0
- package/lib/typescript/src/extraction/index.d.ts +50 -0
- package/lib/typescript/src/extraction/index.d.ts.map +1 -0
- package/lib/typescript/src/extraction/types.d.ts +60 -0
- package/lib/typescript/src/extraction/types.d.ts.map +1 -0
- package/lib/typescript/src/index.d.ts +1 -0
- package/lib/typescript/src/index.d.ts.map +1 -1
- package/lib/typescript/src/licenses.d.ts +10 -0
- package/lib/typescript/src/licenses.d.ts.map +1 -0
- package/lib/typescript/src/stt/index.d.ts +4 -1
- package/lib/typescript/src/stt/index.d.ts.map +1 -1
- package/lib/typescript/src/stt/streaming.d.ts.map +1 -1
- package/lib/typescript/src/stt/streamingTypes.d.ts +5 -0
- package/lib/typescript/src/stt/streamingTypes.d.ts.map +1 -1
- package/lib/typescript/src/stt/types.d.ts +3 -1
- package/lib/typescript/src/stt/types.d.ts.map +1 -1
- package/lib/typescript/src/tts/index.d.ts +3 -1
- package/lib/typescript/src/tts/index.d.ts.map +1 -1
- package/lib/typescript/src/tts/streaming.d.ts.map +1 -1
- package/lib/typescript/src/tts/types.d.ts +6 -5
- package/lib/typescript/src/tts/types.d.ts.map +1 -1
- package/lib/typescript/src/utils.d.ts +5 -0
- package/lib/typescript/src/utils.d.ts.map +1 -1
- package/package.json +11 -1
- package/scripts/{check-model-csvs.sh → ci/check-model-csvs.sh} +9 -2
- package/scripts/ci/collect_all_sherpa_model_streams.sh +101 -0
- package/scripts/ci/collect_one_sherpa_release_stream.sh +189 -0
- package/scripts/ci/sherpa_asr_model_release_streams.json +21 -0
- package/scripts/ci/sherpa_tts_model_release_streams.json +13 -0
- package/scripts/ci/update_model_license_csv.sh +765 -0
- package/scripts/setup-ios-framework.sh +14 -11
- package/scripts/update_commercial_use.js +73 -0
- package/src/NativeSherpaOnnx.ts +92 -5
- package/src/audio/index.ts +20 -0
- package/src/download/ModelDownloadManager.ts +55 -1343
- package/src/download/activeModelOperations.ts +38 -0
- package/src/download/background-downloader.d.ts +43 -0
- package/src/download/bulkPurge.ts +102 -0
- package/src/download/checksumPrompt.ts +25 -0
- package/src/download/constants.ts +5 -0
- package/src/download/downloadEvents.ts +55 -0
- package/src/download/downloadTask.ts +497 -0
- package/src/download/ensureModel.ts +124 -0
- package/src/download/index.ts +19 -2
- package/src/download/localModels.ts +234 -0
- package/src/download/modelExtraction.ts +244 -0
- package/src/download/paths.ts +134 -0
- package/src/download/postDownloadProcessing.ts +292 -0
- package/src/download/protectedModelKeys.ts +30 -0
- package/src/download/registry.ts +404 -0
- package/src/download/retry.ts +76 -0
- package/src/download/types.ts +120 -0
- package/src/download/validation.ts +114 -8
- package/src/{download → extraction}/extractTarBz2.ts +3 -1
- package/src/extraction/extractTarZst.ts +79 -0
- package/src/extraction/index.ts +269 -0
- package/src/extraction/types.ts +63 -0
- package/src/index.tsx +2 -0
- package/src/licenses.ts +100 -0
- package/src/stt/index.ts +20 -2
- package/src/stt/streaming.ts +3 -0
- package/src/stt/streamingTypes.ts +5 -0
- package/src/stt/types.ts +3 -1
- package/src/tts/index.ts +30 -2
- package/src/tts/streaming.ts +10 -0
- package/src/tts/types.ts +6 -5
- package/src/utils.ts +22 -1
- package/third_party/libarchive_prebuilt/ANDROID_RELEASE_TAG +1 -1
- package/third_party/libarchive_prebuilt/IOS_RELEASE_TAG +1 -1
- package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -1
- package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -1
- package/android/src/main/cpp/jni/tts/sherpa-onnx-tts-zipvoice-jni.cpp +0 -301
- package/android/src/main/java/com/sherpaonnx/ZipvoiceTtsWrapper.kt +0 -187
- package/lib/module/download/extractTarBz2.js.map +0 -1
- package/lib/typescript/src/download/extractTarBz2.d.ts.map +0 -1
- package/scripts/check-qnn-support.sh +0 -78
- /package/lib/typescript/src/{download → extraction}/extractTarBz2.d.ts +0 -0
|
@@ -30,6 +30,7 @@ import com.k2fsa.sherpa.onnx.OfflineTtsVitsModelConfig
|
|
|
30
30
|
import com.k2fsa.sherpa.onnx.OfflineTtsMatchaModelConfig
|
|
31
31
|
import com.k2fsa.sherpa.onnx.OfflineTtsKokoroModelConfig
|
|
32
32
|
import com.k2fsa.sherpa.onnx.OfflineTtsKittenModelConfig
|
|
33
|
+
import com.k2fsa.sherpa.onnx.OfflineTtsZipVoiceModelConfig
|
|
33
34
|
import java.io.File
|
|
34
35
|
import java.io.FileInputStream
|
|
35
36
|
import java.io.FileOutputStream
|
|
@@ -64,8 +65,7 @@ internal class SherpaOnnxTtsHelper(
|
|
|
64
65
|
|
|
65
66
|
private data class TtsEngineInstance(
|
|
66
67
|
@Volatile var tts: OfflineTts? = null,
|
|
67
|
-
@Volatile var
|
|
68
|
-
var ttsInitState: TtsInitState? = null,
|
|
68
|
+
@Volatile var ttsInitState: TtsInitState? = null,
|
|
69
69
|
val ttsStreamRunning: AtomicBoolean = AtomicBoolean(false),
|
|
70
70
|
val ttsStreamCancelled: AtomicBoolean = AtomicBoolean(false),
|
|
71
71
|
var ttsStreamThread: Thread? = null,
|
|
@@ -73,15 +73,13 @@ internal class SherpaOnnxTtsHelper(
|
|
|
73
73
|
) {
|
|
74
74
|
private val lock = Any()
|
|
75
75
|
|
|
76
|
-
fun hasEngine(): Boolean = synchronized(lock) { tts != null
|
|
77
|
-
val isZipvoice: Boolean get() =
|
|
76
|
+
fun hasEngine(): Boolean = synchronized(lock) { tts != null }
|
|
77
|
+
val isZipvoice: Boolean get() = ttsInitState?.modelType == "zipvoice"
|
|
78
78
|
val isPocket: Boolean get() = ttsInitState?.modelType == "pocket"
|
|
79
79
|
fun releaseEngines() {
|
|
80
80
|
synchronized(lock) {
|
|
81
81
|
tts?.release()
|
|
82
82
|
tts = null
|
|
83
|
-
zipvoiceTts?.release()
|
|
84
|
-
zipvoiceTts = null
|
|
85
83
|
ttsInitState = null
|
|
86
84
|
}
|
|
87
85
|
}
|
|
@@ -186,6 +184,13 @@ internal class SherpaOnnxTtsHelper(
|
|
|
186
184
|
rejectOnUiThread(promise, "TTS_INIT_ERROR", msg)
|
|
187
185
|
return@init
|
|
188
186
|
}
|
|
187
|
+
val lexiconPath = path(paths, "lexicon")
|
|
188
|
+
if (lexiconPath.isBlank()) {
|
|
189
|
+
val msg = "Zipvoice requires lexicon.txt (or lexicon-<lang>.txt) in the model directory. The sherpa-onnx engine aborts if it is missing. Copy lexicon from the official k2-fsa sherpa-onnx Zipvoice model package or hr-files release next to tokens.txt."
|
|
190
|
+
Log.e("SherpaOnnxTts", "TTS_INIT_ERROR: $msg")
|
|
191
|
+
rejectOnUiThread(promise, "TTS_INIT_ERROR", msg)
|
|
192
|
+
return@init
|
|
193
|
+
}
|
|
189
194
|
val am = context.applicationContext.getSystemService(Context.ACTIVITY_SERVICE) as? ActivityManager
|
|
190
195
|
if (am != null) {
|
|
191
196
|
val memInfo = ActivityManager.MemoryInfo()
|
|
@@ -206,34 +211,26 @@ internal class SherpaOnnxTtsHelper(
|
|
|
206
211
|
Log.i("SherpaOnnxTts", "Zipvoice init: availMem=${memInfoBefore.availMem / (1024 * 1024)} MB (before load)")
|
|
207
212
|
}
|
|
208
213
|
val zipvoiceNumThreads = 1
|
|
209
|
-
val
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
dataDir = path(paths, "dataDir"),
|
|
215
|
-
lexicon = path(paths, "lexicon"),
|
|
216
|
-
numThreads = zipvoiceNumThreads,
|
|
217
|
-
debug = debug,
|
|
218
|
-
ruleFsts = ruleFsts?.takeIf { it.isNotBlank() } ?: "",
|
|
219
|
-
ruleFars = ruleFars?.takeIf { it.isNotBlank() } ?: "",
|
|
220
|
-
maxNumSentences = maxNumSentences?.toInt()?.coerceAtLeast(1) ?: 1,
|
|
221
|
-
silenceScale = silenceScale?.toFloat()?.coerceIn(0f, 10f) ?: 0.2f,
|
|
222
|
-
provider = provider?.takeIf { it.isNotBlank() } ?: "cpu"
|
|
214
|
+
val config = buildTtsConfig(
|
|
215
|
+
paths, "zipvoice", zipvoiceNumThreads, debug,
|
|
216
|
+
noiseScale, noiseScaleW, lengthScale,
|
|
217
|
+
ruleFsts, ruleFars, maxNumSentences?.toInt(), silenceScale,
|
|
218
|
+
provider
|
|
223
219
|
)
|
|
224
220
|
if (am != null) {
|
|
225
221
|
val memInfo = ActivityManager.MemoryInfo()
|
|
226
222
|
am.getMemoryInfo(memInfo)
|
|
227
223
|
Log.i("SherpaOnnxTts", "Zipvoice init: availMem=${memInfo.availMem / (1024 * 1024)} MB (after load)")
|
|
228
224
|
}
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
225
|
+
try {
|
|
226
|
+
inst.tts = OfflineTts(config = config)
|
|
227
|
+
} catch (e: Exception) {
|
|
228
|
+
Log.e("SherpaOnnxTts", "TTS_INIT_ERROR: Failed to create Zipvoice OfflineTts: ${e.message}", e)
|
|
229
|
+
rejectOnUiThread(promise, "TTS_INIT_ERROR", "Failed to create Zipvoice TTS engine: ${e.message}", e)
|
|
232
230
|
return@init
|
|
233
231
|
}
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
numSpeakers = wrapper.numSpeakers()
|
|
232
|
+
sampleRate = inst.tts!!.sampleRate()
|
|
233
|
+
numSpeakers = inst.tts!!.numSpeakers()
|
|
237
234
|
} else {
|
|
238
235
|
val config = buildTtsConfig(
|
|
239
236
|
paths, modelTypeStr, numThreads.toInt(), debug,
|
|
@@ -246,8 +243,6 @@ internal class SherpaOnnxTtsHelper(
|
|
|
246
243
|
numSpeakers = inst.tts!!.numSpeakers()
|
|
247
244
|
}
|
|
248
245
|
|
|
249
|
-
Log.i("SherpaOnnxTts", "initializeTts: instanceId=$instanceId, engine=${if (inst.isZipvoice) "zipvoice-c-api" else "kotlin-api"}, sampleRate=$sampleRate, numSpeakers=$numSpeakers")
|
|
250
|
-
|
|
251
246
|
val modelsArray = Arguments.createArray()
|
|
252
247
|
detectedModels?.forEach { modelObj ->
|
|
253
248
|
if (modelObj is HashMap<*, *>) {
|
|
@@ -273,6 +268,8 @@ internal class SherpaOnnxTtsHelper(
|
|
|
273
268
|
provider?.takeIf { it.isNotBlank() }
|
|
274
269
|
)
|
|
275
270
|
|
|
271
|
+
Log.i("SherpaOnnxTts", "initializeTts: instanceId=$instanceId, engine=kotlin-api modelType=$modelTypeStr, sampleRate=$sampleRate, numSpeakers=$numSpeakers")
|
|
272
|
+
|
|
276
273
|
val resultMap = Arguments.createMap()
|
|
277
274
|
resultMap.putBoolean("success", true)
|
|
278
275
|
resultMap.putArray("detectedModels", modelsArray)
|
|
@@ -309,18 +306,6 @@ internal class SherpaOnnxTtsHelper(
|
|
|
309
306
|
return
|
|
310
307
|
}
|
|
311
308
|
|
|
312
|
-
if (inst.isZipvoice) {
|
|
313
|
-
initializeTts(
|
|
314
|
-
instanceId,
|
|
315
|
-
state.modelDir, state.modelType, state.numThreads.toDouble(), state.debug,
|
|
316
|
-
noiseScale, noiseScaleW, lengthScale,
|
|
317
|
-
state.ruleFsts, state.ruleFars, state.maxNumSentences?.toDouble(), state.silenceScale,
|
|
318
|
-
state.provider,
|
|
319
|
-
promise
|
|
320
|
-
)
|
|
321
|
-
return
|
|
322
|
-
}
|
|
323
|
-
|
|
324
309
|
val nextNoiseScale = when {
|
|
325
310
|
noiseScale == null -> null
|
|
326
311
|
noiseScale.isNaN() -> state.noiseScale
|
|
@@ -401,26 +386,35 @@ internal class SherpaOnnxTtsHelper(
|
|
|
401
386
|
val sid = getSid(options)
|
|
402
387
|
val speed = getSpeed(options)
|
|
403
388
|
val audio = when {
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
389
|
+
hasReferenceAudio(options) && (inst.isZipvoice || inst.isPocket) -> {
|
|
390
|
+
if (inst.isZipvoice) {
|
|
391
|
+
val promptText = options!!.getString("referenceText")?.trim().orEmpty()
|
|
392
|
+
if (promptText.isEmpty()) {
|
|
393
|
+
Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Zipvoice voice cloning requires non-empty referenceText")
|
|
394
|
+
promise.reject(
|
|
395
|
+
"TTS_GENERATE_ERROR",
|
|
396
|
+
"Zipvoice voice cloning requires non-empty referenceText (transcript of reference audio)."
|
|
397
|
+
)
|
|
409
398
|
return
|
|
410
399
|
}
|
|
411
|
-
|
|
412
|
-
val promptText = options.getString("referenceText").orEmpty()
|
|
413
|
-
val numSteps = if (options.hasKey("numSteps")) options.getDouble("numSteps").toInt() else 20
|
|
414
|
-
val samples = FloatArray(refAudio.size()) { i -> refAudio.getDouble(i).toFloat() }
|
|
415
|
-
inst.zipvoiceTts!!.generateWithZipvoice(text, promptText, samples, promptSr, speed, numSteps)
|
|
416
|
-
}
|
|
417
|
-
hasReferenceOptions(options) && inst.tts != null -> {
|
|
400
|
+
}
|
|
418
401
|
val config = parseGenerationConfig(options) ?: GenerationConfig(speed = speed, sid = sid)
|
|
419
402
|
inst.tts!!.generateWithConfig(text, config)
|
|
420
403
|
}
|
|
421
|
-
|
|
404
|
+
hasReferenceAudio(options) -> {
|
|
405
|
+
Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Reference audio is not supported for this TTS model type")
|
|
406
|
+
promise.reject(
|
|
407
|
+
"TTS_GENERATE_ERROR",
|
|
408
|
+
"Reference audio is only supported for Zipvoice and Pocket TTS."
|
|
409
|
+
)
|
|
410
|
+
return
|
|
411
|
+
}
|
|
412
|
+
inst.isPocket -> {
|
|
422
413
|
Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Pocket TTS requires reference audio for voice cloning")
|
|
423
|
-
promise.reject(
|
|
414
|
+
promise.reject(
|
|
415
|
+
"TTS_GENERATE_ERROR",
|
|
416
|
+
"Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate (> 0) in options."
|
|
417
|
+
)
|
|
424
418
|
return
|
|
425
419
|
}
|
|
426
420
|
else -> dispatchGenerate(inst, text, sid, speed)
|
|
@@ -459,26 +453,35 @@ internal class SherpaOnnxTtsHelper(
|
|
|
459
453
|
val sid = getSid(options)
|
|
460
454
|
val speed = getSpeed(options)
|
|
461
455
|
val audio = when {
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
456
|
+
hasReferenceAudio(options) && (inst.isZipvoice || inst.isPocket) -> {
|
|
457
|
+
if (inst.isZipvoice) {
|
|
458
|
+
val promptText = options!!.getString("referenceText")?.trim().orEmpty()
|
|
459
|
+
if (promptText.isEmpty()) {
|
|
460
|
+
Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Zipvoice voice cloning requires non-empty referenceText")
|
|
461
|
+
promise.reject(
|
|
462
|
+
"TTS_GENERATE_ERROR",
|
|
463
|
+
"Zipvoice voice cloning requires non-empty referenceText (transcript of reference audio)."
|
|
464
|
+
)
|
|
467
465
|
return
|
|
468
466
|
}
|
|
469
|
-
|
|
470
|
-
val promptText = options.getString("referenceText").orEmpty()
|
|
471
|
-
val numSteps = if (options.hasKey("numSteps")) options.getDouble("numSteps").toInt() else 20
|
|
472
|
-
val samples = FloatArray(refAudio.size()) { i -> refAudio.getDouble(i).toFloat() }
|
|
473
|
-
inst.zipvoiceTts!!.generateWithZipvoice(text, promptText, samples, promptSr, speed, numSteps)
|
|
474
|
-
}
|
|
475
|
-
hasReferenceOptions(options) && inst.tts != null -> {
|
|
467
|
+
}
|
|
476
468
|
val config = parseGenerationConfig(options) ?: GenerationConfig(speed = speed, sid = sid)
|
|
477
469
|
inst.tts!!.generateWithConfig(text, config)
|
|
478
470
|
}
|
|
479
|
-
|
|
471
|
+
hasReferenceAudio(options) -> {
|
|
472
|
+
Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Reference audio is not supported for this TTS model type")
|
|
473
|
+
promise.reject(
|
|
474
|
+
"TTS_GENERATE_ERROR",
|
|
475
|
+
"Reference audio is only supported for Zipvoice and Pocket TTS."
|
|
476
|
+
)
|
|
477
|
+
return
|
|
478
|
+
}
|
|
479
|
+
inst.isPocket -> {
|
|
480
480
|
Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Pocket TTS requires reference audio for voice cloning")
|
|
481
|
-
promise.reject(
|
|
481
|
+
promise.reject(
|
|
482
|
+
"TTS_GENERATE_ERROR",
|
|
483
|
+
"Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate (> 0) in options."
|
|
484
|
+
)
|
|
482
485
|
return
|
|
483
486
|
}
|
|
484
487
|
else -> dispatchGenerate(inst, text, sid, speed)
|
|
@@ -529,16 +532,27 @@ internal class SherpaOnnxTtsHelper(
|
|
|
529
532
|
promise.reject("TTS_STREAM_ERROR", "TTS not initialized")
|
|
530
533
|
return
|
|
531
534
|
}
|
|
532
|
-
if (inst.isPocket && !
|
|
535
|
+
if (inst.isPocket && !hasReferenceAudio(options)) {
|
|
533
536
|
Log.e("SherpaOnnxTts", "TTS_STREAM_ERROR: Pocket TTS requires reference audio for voice cloning")
|
|
534
|
-
promise.reject(
|
|
537
|
+
promise.reject(
|
|
538
|
+
"TTS_STREAM_ERROR",
|
|
539
|
+
"Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate (> 0) in options."
|
|
540
|
+
)
|
|
535
541
|
return
|
|
536
542
|
}
|
|
537
|
-
if (
|
|
543
|
+
if (hasReferenceAudio(options) && inst.isZipvoice) {
|
|
538
544
|
Log.e("SherpaOnnxTts", "TTS_STREAM_ERROR: Streaming with reference audio not supported for Zipvoice")
|
|
539
545
|
promise.reject("TTS_STREAM_ERROR", "Streaming with reference audio not supported for Zipvoice")
|
|
540
546
|
return
|
|
541
547
|
}
|
|
548
|
+
if (hasReferenceAudio(options) && !inst.isPocket) {
|
|
549
|
+
Log.e("SherpaOnnxTts", "TTS_STREAM_ERROR: Reference audio streaming is only supported for Pocket TTS")
|
|
550
|
+
promise.reject(
|
|
551
|
+
"TTS_STREAM_ERROR",
|
|
552
|
+
"Reference audio streaming is only supported for Pocket TTS."
|
|
553
|
+
)
|
|
554
|
+
return
|
|
555
|
+
}
|
|
542
556
|
val sid = getSid(options)
|
|
543
557
|
val speed = getSpeed(options)
|
|
544
558
|
inst.ttsStreamCancelled.set(false)
|
|
@@ -547,7 +561,7 @@ internal class SherpaOnnxTtsHelper(
|
|
|
547
561
|
try {
|
|
548
562
|
val sampleRate = dispatchSampleRate(inst)
|
|
549
563
|
when {
|
|
550
|
-
|
|
564
|
+
hasReferenceAudio(options) && inst.isPocket -> {
|
|
551
565
|
val config = parseGenerationConfig(options) ?: GenerationConfig(speed = speed, sid = sid)
|
|
552
566
|
inst.tts!!.generateWithConfigAndCallback(text, config) { chunk ->
|
|
553
567
|
if (inst.ttsStreamCancelled.get()) return@generateWithConfigAndCallback 0
|
|
@@ -555,13 +569,6 @@ internal class SherpaOnnxTtsHelper(
|
|
|
555
569
|
chunk.size
|
|
556
570
|
}
|
|
557
571
|
}
|
|
558
|
-
inst.zipvoiceTts != null -> {
|
|
559
|
-
inst.zipvoiceTts!!.generateWithCallback(text, sid, speed) { chunk ->
|
|
560
|
-
if (inst.ttsStreamCancelled.get()) return@generateWithCallback 0
|
|
561
|
-
emitChunk(instanceId, requestId, chunk, sampleRate, 0f, false)
|
|
562
|
-
chunk.size
|
|
563
|
-
}
|
|
564
|
-
}
|
|
565
572
|
else -> {
|
|
566
573
|
inst.tts!!.generateWithCallback(text, sid, speed) { chunk ->
|
|
567
574
|
if (inst.ttsStreamCancelled.get()) return@generateWithCallback 0
|
|
@@ -885,14 +892,21 @@ internal class SherpaOnnxTtsHelper(
|
|
|
885
892
|
|
|
886
893
|
// -- Dual-engine dispatch helpers --
|
|
887
894
|
|
|
888
|
-
/**
|
|
889
|
-
|
|
895
|
+
/**
|
|
896
|
+
* True when voice-cloning reference audio is present and valid for native use:
|
|
897
|
+
* non-empty [referenceAudio] array and [referenceSampleRate] > 0.
|
|
898
|
+
* [referenceText] alone does not enable cloning (matches sherpa-onnx behavior).
|
|
899
|
+
*/
|
|
900
|
+
private fun hasReferenceAudio(options: ReadableMap?): Boolean {
|
|
890
901
|
if (options == null) return false
|
|
891
|
-
val refAudio = options.getArray("referenceAudio")
|
|
892
|
-
|
|
893
|
-
return (
|
|
902
|
+
val refAudio = options.getArray("referenceAudio") ?: return false
|
|
903
|
+
if (refAudio.size() == 0) return false
|
|
904
|
+
return readReferenceSampleRate(options) > 0
|
|
894
905
|
}
|
|
895
906
|
|
|
907
|
+
private fun readReferenceSampleRate(options: ReadableMap): Int =
|
|
908
|
+
if (options.hasKey("referenceSampleRate")) options.getDouble("referenceSampleRate").toInt() else 0
|
|
909
|
+
|
|
896
910
|
/** Parse sid and speed from options with defaults. */
|
|
897
911
|
private fun getSid(options: ReadableMap?): Int =
|
|
898
912
|
if (options != null && options.hasKey("sid")) options.getDouble("sid").toInt() else 0
|
|
@@ -936,18 +950,14 @@ internal class SherpaOnnxTtsHelper(
|
|
|
936
950
|
|
|
937
951
|
/** Dispatch generate to whichever engine is active on the instance. Returns null if none loaded. */
|
|
938
952
|
private fun dispatchGenerate(inst: TtsEngineInstance, text: String, sid: Int, speed: Float): GeneratedAudio? {
|
|
939
|
-
inst.
|
|
940
|
-
inst.tts?.let { return it.generate(text, sid, speed) }
|
|
941
|
-
return null
|
|
953
|
+
return inst.tts?.generate(text, sid, speed)
|
|
942
954
|
}
|
|
943
955
|
|
|
944
956
|
private fun dispatchSampleRate(inst: TtsEngineInstance): Int {
|
|
945
|
-
inst.zipvoiceTts?.let { return it.sampleRate() }
|
|
946
957
|
return inst.tts?.sampleRate() ?: 0
|
|
947
958
|
}
|
|
948
959
|
|
|
949
960
|
private fun dispatchNumSpeakers(inst: TtsEngineInstance): Int {
|
|
950
|
-
inst.zipvoiceTts?.let { return it.numSpeakers() }
|
|
951
961
|
return inst.tts?.numSpeakers() ?: 0
|
|
952
962
|
}
|
|
953
963
|
|
|
@@ -1039,14 +1049,19 @@ internal class SherpaOnnxTtsHelper(
|
|
|
1039
1049
|
debug = debug,
|
|
1040
1050
|
provider = prov
|
|
1041
1051
|
)
|
|
1042
|
-
"zipvoice" ->
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1052
|
+
"zipvoice" -> OfflineTtsModelConfig(
|
|
1053
|
+
zipvoice = OfflineTtsZipVoiceModelConfig(
|
|
1054
|
+
tokens = path(paths, "tokens"),
|
|
1055
|
+
encoder = path(paths, "encoder"),
|
|
1056
|
+
decoder = path(paths, "decoder"),
|
|
1057
|
+
vocoder = path(paths, "vocoder"),
|
|
1058
|
+
dataDir = path(paths, "dataDir"),
|
|
1059
|
+
lexicon = path(paths, "lexicon")
|
|
1060
|
+
),
|
|
1061
|
+
numThreads = numThreads,
|
|
1062
|
+
debug = debug,
|
|
1063
|
+
provider = prov
|
|
1064
|
+
)
|
|
1050
1065
|
else -> {
|
|
1051
1066
|
if (path(paths, "acousticModel").isNotEmpty()) {
|
|
1052
1067
|
OfflineTtsModelConfig(
|