react-native-sherpa-onnx 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +84 -77
- package/SherpaOnnx.podspec +79 -45
- package/android/build.gradle +8 -2
- package/android/prebuilt-download.gradle +70 -16
- package/android/prebuilt-versions.gradle +14 -6
- package/android/src/main/cpp/CMakeLists.txt +2 -0
- package/android/src/main/cpp/jni/audio/sherpa-onnx-audio-convert-jni.cpp +202 -328
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.cpp +22 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.h +2 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.cpp +96 -142
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.h +40 -4
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-stt.cpp +774 -316
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +208 -122
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect.h +92 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.cpp +3 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.cpp +14 -2
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-stt.cpp +229 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-stt.h +38 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-tts.cpp +144 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-tts.h +38 -0
- package/android/src/main/cpp/jni/module/sherpa-onnx-module-jni.cpp +1 -1
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +157 -11
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxPcmCapture.kt +150 -0
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxSttHelper.kt +75 -24
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +52 -1
- package/ios/SherpaOnnx+PcmLiveStream.mm +288 -0
- package/ios/SherpaOnnx+STT.mm +2 -0
- package/ios/SherpaOnnx+TTS.mm +17 -0
- package/ios/SherpaOnnx.mm +27 -3
- package/ios/SherpaOnnxAudioConvert.h +28 -0
- package/ios/SherpaOnnxAudioConvert.mm +698 -0
- package/ios/archive/sherpa-onnx-archive-helper.mm +12 -0
- package/ios/model_detect/sherpa-onnx-model-detect-helper.h +37 -3
- package/ios/model_detect/sherpa-onnx-model-detect-helper.mm +80 -45
- package/ios/model_detect/sherpa-onnx-model-detect-stt.mm +629 -267
- package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +148 -56
- package/ios/model_detect/sherpa-onnx-model-detect.h +72 -0
- package/ios/model_detect/sherpa-onnx-validate-stt.h +38 -0
- package/ios/model_detect/sherpa-onnx-validate-stt.mm +229 -0
- package/ios/model_detect/sherpa-onnx-validate-tts.h +38 -0
- package/ios/model_detect/sherpa-onnx-validate-tts.mm +144 -0
- package/ios/stt/sherpa-onnx-stt-wrapper.mm +4 -0
- package/lib/module/NativeSherpaOnnx.js.map +1 -1
- package/lib/module/audio/index.js +55 -1
- package/lib/module/audio/index.js.map +1 -1
- package/lib/module/download/ModelDownloadManager.js +14 -0
- package/lib/module/download/ModelDownloadManager.js.map +1 -1
- package/lib/module/index.js +10 -0
- package/lib/module/index.js.map +1 -1
- package/lib/module/stt/streaming.js +6 -3
- package/lib/module/stt/streaming.js.map +1 -1
- package/lib/module/tts/index.js +13 -1
- package/lib/module/tts/index.js.map +1 -1
- package/lib/typescript/src/NativeSherpaOnnx.d.ts +32 -3
- package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
- package/lib/typescript/src/audio/index.d.ts +20 -1
- package/lib/typescript/src/audio/index.d.ts.map +1 -1
- package/lib/typescript/src/download/ModelDownloadManager.d.ts +2 -1
- package/lib/typescript/src/download/ModelDownloadManager.d.ts.map +1 -1
- package/lib/typescript/src/index.d.ts +10 -0
- package/lib/typescript/src/index.d.ts.map +1 -1
- package/lib/typescript/src/stt/streaming.d.ts.map +1 -1
- package/lib/typescript/src/stt/streamingTypes.d.ts +1 -1
- package/lib/typescript/src/stt/streamingTypes.d.ts.map +1 -1
- package/lib/typescript/src/tts/index.d.ts +12 -1
- package/lib/typescript/src/tts/index.d.ts.map +1 -1
- package/package.json +6 -1
- package/scripts/check-model-csvs.sh +72 -0
- package/scripts/setup-ios-framework.sh +272 -191
- package/src/NativeSherpaOnnx.ts +37 -3
- package/src/audio/index.ts +84 -1
- package/src/download/ModelDownloadManager.ts +19 -0
- package/src/index.tsx +15 -0
- package/src/stt/streaming.ts +10 -5
- package/src/stt/streamingTypes.ts +1 -1
- package/src/tts/index.ts +25 -1
- package/third_party/ffmpeg_prebuilt/ANDROID_RELEASE_TAG +1 -1
- package/third_party/libarchive_prebuilt/ANDROID_RELEASE_TAG +1 -1
- package/third_party/libarchive_prebuilt/IOS_RELEASE_TAG +1 -1
- package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -1
- package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -1
- package/ios/scripts/patch-libarchive-includes.sh +0 -61
- package/ios/scripts/setup-ios-libarchive.sh +0 -98
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
package com.sherpaonnx
|
|
2
2
|
|
|
3
|
+
import android.net.Uri
|
|
3
4
|
import com.facebook.react.bridge.ReactApplicationContext
|
|
4
5
|
import com.facebook.react.bridge.Promise
|
|
5
6
|
import com.facebook.react.bridge.ReadableArray
|
|
@@ -55,6 +56,7 @@ class SherpaOnnxModule(reactContext: ReactApplicationContext) :
|
|
|
55
56
|
{ instanceId, requestId, cancelled -> emitTtsStreamEnd(instanceId, requestId, cancelled) }
|
|
56
57
|
)
|
|
57
58
|
private val archiveHelper = SherpaOnnxArchiveHelper()
|
|
59
|
+
private var pcmCapture: SherpaOnnxPcmCapture? = null
|
|
58
60
|
|
|
59
61
|
override fun getName(): String {
|
|
60
62
|
return NAME
|
|
@@ -62,6 +64,8 @@ class SherpaOnnxModule(reactContext: ReactApplicationContext) :
|
|
|
62
64
|
|
|
63
65
|
override fun onCatalystInstanceDestroy() {
|
|
64
66
|
super.onCatalystInstanceDestroy()
|
|
67
|
+
pcmCapture?.stop()
|
|
68
|
+
pcmCapture = null
|
|
65
69
|
onlineSttHelper.shutdown()
|
|
66
70
|
ttsHelper.shutdown()
|
|
67
71
|
}
|
|
@@ -139,6 +143,29 @@ class SherpaOnnxModule(reactContext: ReactApplicationContext) :
|
|
|
139
143
|
}
|
|
140
144
|
}
|
|
141
145
|
|
|
146
|
+
override fun getDeviceQnnSoc(promise: Promise) {
|
|
147
|
+
try {
|
|
148
|
+
var soc: String? = null
|
|
149
|
+
if (android.os.Build.VERSION.SDK_INT >= 31) {
|
|
150
|
+
val buildClass = Class.forName("android.os.Build")
|
|
151
|
+
val field = buildClass.getDeclaredField("SOC_MODEL")
|
|
152
|
+
val value = field.get(null) as? String
|
|
153
|
+
soc = value?.trim()?.takeIf { it.isNotEmpty() }
|
|
154
|
+
}
|
|
155
|
+
val isSupported = soc != null && soc.matches(Regex("^SM8\\d{3}$", RegexOption.IGNORE_CASE))
|
|
156
|
+
val map = Arguments.createMap()
|
|
157
|
+
map.putString("soc", soc)
|
|
158
|
+
map.putBoolean("isSupported", isSupported)
|
|
159
|
+
promise.resolve(map)
|
|
160
|
+
} catch (e: Exception) {
|
|
161
|
+
android.util.Log.w(NAME, "getDeviceQnnSoc: ${e.message}")
|
|
162
|
+
val map = Arguments.createMap()
|
|
163
|
+
map.putNull("soc")
|
|
164
|
+
map.putBoolean("isSupported", false)
|
|
165
|
+
promise.resolve(map)
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
142
169
|
/** Asset path for embedded NNAPI test model (ORT testdata: nnapi_internal_uint8_support). */
|
|
143
170
|
private val nnapiTestModelAsset = "testModels/nnapi_internal_uint8_support.onnx"
|
|
144
171
|
|
|
@@ -319,12 +346,14 @@ class SherpaOnnxModule(reactContext: ReactApplicationContext) :
|
|
|
319
346
|
return
|
|
320
347
|
}
|
|
321
348
|
val success = result["success"] as? Boolean ?: false
|
|
349
|
+
val isHardwareSpecificUnsupported = result["isHardwareSpecificUnsupported"] as? Boolean ?: false
|
|
322
350
|
val detectedModels = result["detectedModels"] as? ArrayList<*>
|
|
323
351
|
?: arrayListOf<HashMap<String, String>>()
|
|
324
352
|
val modelTypeStr = result["modelType"] as? String
|
|
325
353
|
|
|
326
354
|
val resultMap = Arguments.createMap()
|
|
327
355
|
resultMap.putBoolean("success", success)
|
|
356
|
+
resultMap.putBoolean("isHardwareSpecificUnsupported", isHardwareSpecificUnsupported)
|
|
328
357
|
val modelsArray = Arguments.createArray()
|
|
329
358
|
for (model in detectedModels) {
|
|
330
359
|
val modelMap = model as? HashMap<*, *>
|
|
@@ -484,6 +513,71 @@ class SherpaOnnxModule(reactContext: ReactApplicationContext) :
|
|
|
484
513
|
onlineSttHelper.processSttAudioChunk(streamId, samples, sampleRate.toInt(), promise)
|
|
485
514
|
}
|
|
486
515
|
|
|
516
|
+
override fun startPcmLiveStream(options: ReadableMap, promise: Promise) {
|
|
517
|
+
try {
|
|
518
|
+
pcmCapture?.stop()
|
|
519
|
+
pcmCapture = null
|
|
520
|
+
val sampleRate = options.getDouble("sampleRate").toInt().takeIf { it > 0 } ?: 16000
|
|
521
|
+
val channelCount = if (options.hasKey("channelCount")) options.getDouble("channelCount").toInt().coerceIn(1, 2) else 1
|
|
522
|
+
val bufferSizeFrames = if (options.hasKey("bufferSizeFrames")) options.getDouble("bufferSizeFrames").toInt() else 0
|
|
523
|
+
var startError: String? = null
|
|
524
|
+
var started = false
|
|
525
|
+
val capture = SherpaOnnxPcmCapture(
|
|
526
|
+
targetSampleRate = sampleRate,
|
|
527
|
+
channelCount = channelCount,
|
|
528
|
+
bufferSizeFrames = bufferSizeFrames,
|
|
529
|
+
onChunk = { base64Pcm, sr -> emitPcmLiveStreamData(base64Pcm, sr) },
|
|
530
|
+
onError = { msg ->
|
|
531
|
+
if (!started) {
|
|
532
|
+
startError = msg
|
|
533
|
+
} else {
|
|
534
|
+
emitPcmLiveStreamError(msg)
|
|
535
|
+
}
|
|
536
|
+
},
|
|
537
|
+
logTag = NAME
|
|
538
|
+
)
|
|
539
|
+
pcmCapture = capture
|
|
540
|
+
capture.start()
|
|
541
|
+
started = true
|
|
542
|
+
val err = startError
|
|
543
|
+
if (err != null) {
|
|
544
|
+
promise.reject("PCM_LIVE_STREAM_ERROR", err)
|
|
545
|
+
} else {
|
|
546
|
+
promise.resolve(null)
|
|
547
|
+
}
|
|
548
|
+
} catch (e: Exception) {
|
|
549
|
+
android.util.Log.e(NAME, "startPcmLiveStream failed", e)
|
|
550
|
+
promise.reject("PCM_LIVE_STREAM_ERROR", e.message ?: "Failed to start PCM capture", e)
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
override fun stopPcmLiveStream(promise: Promise) {
|
|
555
|
+
try {
|
|
556
|
+
pcmCapture?.stop()
|
|
557
|
+
pcmCapture = null
|
|
558
|
+
promise.resolve(null)
|
|
559
|
+
} catch (e: Exception) {
|
|
560
|
+
promise.reject("PCM_LIVE_STREAM_ERROR", e.message ?: "Failed to stop PCM capture", e)
|
|
561
|
+
}
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
private fun emitPcmLiveStreamData(base64Pcm: String, sampleRate: Int) {
|
|
565
|
+
val eventEmitter = reactApplicationContext
|
|
566
|
+
.getJSModule(DeviceEventManagerModule.RCTDeviceEventEmitter::class.java)
|
|
567
|
+
val payload = Arguments.createMap()
|
|
568
|
+
payload.putString("base64Pcm", base64Pcm)
|
|
569
|
+
payload.putInt("sampleRate", sampleRate)
|
|
570
|
+
eventEmitter.emit("pcmLiveStreamData", payload)
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
private fun emitPcmLiveStreamError(message: String) {
|
|
574
|
+
val eventEmitter = reactApplicationContext
|
|
575
|
+
.getJSModule(DeviceEventManagerModule.RCTDeviceEventEmitter::class.java)
|
|
576
|
+
val payload = Arguments.createMap()
|
|
577
|
+
payload.putString("message", message)
|
|
578
|
+
eventEmitter.emit("pcmLiveStreamError", payload)
|
|
579
|
+
}
|
|
580
|
+
|
|
487
581
|
// ==================== STT Methods ====================
|
|
488
582
|
|
|
489
583
|
/**
|
|
@@ -507,17 +601,34 @@ class SherpaOnnxModule(reactContext: ReactApplicationContext) :
|
|
|
507
601
|
sttHelper.setSttConfig(instanceId, options, promise)
|
|
508
602
|
}
|
|
509
603
|
|
|
604
|
+
/**
|
|
605
|
+
* If inputPath is a content:// URI, copies it to a temp file via ContentResolver.openInputStream.
|
|
606
|
+
* Caller deletes the returned temp file in a finally block.
|
|
607
|
+
*/
|
|
608
|
+
private fun resolveInputForConvert(inputPath: String): Pair<String, java.io.File?> {
|
|
609
|
+
if (!inputPath.startsWith("content://")) return Pair(inputPath, null)
|
|
610
|
+
val uri = Uri.parse(inputPath)
|
|
611
|
+
val resolver = reactApplicationContext.contentResolver
|
|
612
|
+
val ext = android.webkit.MimeTypeMap.getSingleton()
|
|
613
|
+
.getExtensionFromMimeType(resolver.getType(uri)) ?: "tmp"
|
|
614
|
+
val tmp = java.io.File(reactApplicationContext.cacheDir, "convert_${System.nanoTime()}.$ext")
|
|
615
|
+
resolver.openInputStream(uri)?.use { input ->
|
|
616
|
+
tmp.outputStream().use { output -> input.copyTo(output) }
|
|
617
|
+
} ?: throw IllegalStateException("Content URI not readable: $inputPath")
|
|
618
|
+
return Pair(tmp.absolutePath, tmp)
|
|
619
|
+
}
|
|
620
|
+
|
|
510
621
|
/**
|
|
511
622
|
* Convert any supported audio file to a requested format using native FFmpeg prebuilts.
|
|
512
|
-
*
|
|
513
|
-
*
|
|
623
|
+
* Accepts file paths and content:// URIs. Content URIs are transparently copied to a
|
|
624
|
+
* temp file first (via ContentResolver), converted, then the temp file is deleted.
|
|
514
625
|
*/
|
|
515
626
|
override fun convertAudioToFormat(inputPath: String, outputPath: String, format: String, outputSampleRateHz: Double?, promise: Promise) {
|
|
627
|
+
var tmpFile: java.io.File? = null
|
|
516
628
|
try {
|
|
517
629
|
var rate = outputSampleRateHz?.toInt() ?: 0
|
|
518
630
|
|
|
519
631
|
if (rate < 0) {
|
|
520
|
-
android.util.Log.e(NAME, "CONVERT_ERROR: Invalid outputSampleRateHz: must be >= 0")
|
|
521
632
|
promise.reject("CONVERT_ERROR", "Invalid outputSampleRateHz: must be >= 0")
|
|
522
633
|
return
|
|
523
634
|
}
|
|
@@ -525,43 +636,57 @@ class SherpaOnnxModule(reactContext: ReactApplicationContext) :
|
|
|
525
636
|
if (format.equals("mp3", ignoreCase = true)) {
|
|
526
637
|
val allowed = setOf(0, 32000, 44100, 48000)
|
|
527
638
|
if (!allowed.contains(rate)) {
|
|
528
|
-
|
|
529
|
-
|
|
639
|
+
promise.reject("CONVERT_ERROR", "MP3 output sample rate must be one of 32000, 44100, 48000, or 0 (default). Received: $rate")
|
|
640
|
+
return
|
|
641
|
+
}
|
|
642
|
+
} else if (format.equals("opus", ignoreCase = true) || format.equals("oggm", ignoreCase = true) || format.equals("webm", ignoreCase = true) || format.equals("mkv", ignoreCase = true) || format.equals("ogg", ignoreCase = true)) {
|
|
643
|
+
val allowed = setOf(0, 8000, 12000, 16000, 24000, 48000)
|
|
644
|
+
if (!allowed.contains(rate)) {
|
|
645
|
+
promise.reject("CONVERT_ERROR", "Opus output sample rate must be 8000, 12000, 16000, 24000, 48000, or 0 (default). Received: $rate")
|
|
530
646
|
return
|
|
531
647
|
}
|
|
532
648
|
} else {
|
|
533
649
|
rate = rate.coerceIn(0, 48000)
|
|
534
650
|
}
|
|
535
651
|
|
|
536
|
-
val
|
|
652
|
+
val (pathToUse, tmp) = resolveInputForConvert(inputPath)
|
|
653
|
+
tmpFile = tmp
|
|
654
|
+
val err = Companion.nativeConvertAudioToFormat(pathToUse, outputPath, format, rate)
|
|
537
655
|
if (err.isEmpty()) {
|
|
538
656
|
promise.resolve(null)
|
|
539
657
|
} else {
|
|
540
|
-
android.util.Log.e(NAME, "CONVERT_ERROR: $err")
|
|
658
|
+
android.util.Log.e(NAME, "CONVERT_ERROR: $err (inputPath=$inputPath)")
|
|
541
659
|
promise.reject("CONVERT_ERROR", err)
|
|
542
660
|
}
|
|
543
661
|
} catch (e: Exception) {
|
|
544
662
|
android.util.Log.e(NAME, "CONVERT_EXCEPTION: Failed to convert audio: ${e.message}", e)
|
|
545
663
|
promise.reject("CONVERT_EXCEPTION", "Failed to convert audio: ${e.message}", e)
|
|
664
|
+
} finally {
|
|
665
|
+
tmpFile?.delete()
|
|
546
666
|
}
|
|
547
667
|
}
|
|
548
668
|
|
|
549
669
|
/**
|
|
550
670
|
* Convert any supported audio file to WAV 16 kHz mono 16-bit PCM using native FFmpeg prebuilts.
|
|
551
|
-
*
|
|
671
|
+
* Accepts file paths and content:// URIs. Content URIs are copied to a temp file first.
|
|
552
672
|
*/
|
|
553
673
|
override fun convertAudioToWav16k(inputPath: String, outputPath: String, promise: Promise) {
|
|
674
|
+
var tmpFile: java.io.File? = null
|
|
554
675
|
try {
|
|
555
|
-
val
|
|
676
|
+
val (pathToUse, tmp) = resolveInputForConvert(inputPath)
|
|
677
|
+
tmpFile = tmp
|
|
678
|
+
val err = Companion.nativeConvertAudioToWav16k(pathToUse, outputPath)
|
|
556
679
|
if (err.isEmpty()) {
|
|
557
680
|
promise.resolve(null)
|
|
558
681
|
} else {
|
|
559
|
-
|
|
560
|
-
|
|
682
|
+
android.util.Log.e(NAME, "CONVERT_ERROR: $err")
|
|
683
|
+
promise.reject("CONVERT_ERROR", err)
|
|
561
684
|
}
|
|
562
685
|
} catch (e: Exception) {
|
|
563
686
|
android.util.Log.e(NAME, "CONVERT_EXCEPTION: Failed to convert audio to WAV16k: ${e.message}", e)
|
|
564
687
|
promise.reject("CONVERT_EXCEPTION", "Failed to convert audio to WAV16k: ${e.message}", e)
|
|
688
|
+
} finally {
|
|
689
|
+
tmpFile?.delete()
|
|
565
690
|
}
|
|
566
691
|
}
|
|
567
692
|
|
|
@@ -642,6 +767,14 @@ class SherpaOnnxModule(reactContext: ReactApplicationContext) :
|
|
|
642
767
|
resultMap.putString("error", error)
|
|
643
768
|
}
|
|
644
769
|
}
|
|
770
|
+
val lexiconLanguageCandidates = result["lexiconLanguageCandidates"] as? ArrayList<*>
|
|
771
|
+
if (!lexiconLanguageCandidates.isNullOrEmpty()) {
|
|
772
|
+
val candidatesArray = Arguments.createArray()
|
|
773
|
+
for (c in lexiconLanguageCandidates) {
|
|
774
|
+
(c as? String)?.let { candidatesArray.pushString(it) }
|
|
775
|
+
}
|
|
776
|
+
resultMap.putArray("lexiconLanguageCandidates", candidatesArray)
|
|
777
|
+
}
|
|
645
778
|
promise.resolve(resultMap)
|
|
646
779
|
} catch (e: Exception) {
|
|
647
780
|
android.util.Log.e(NAME, "DETECT_ERROR: TTS model detection failed: ${e.message}", e)
|
|
@@ -801,6 +934,19 @@ class SherpaOnnxModule(reactContext: ReactApplicationContext) :
|
|
|
801
934
|
ttsHelper.saveTtsAudioToContentUri(samples, sampleRate, directoryUri, filename, promise)
|
|
802
935
|
}
|
|
803
936
|
|
|
937
|
+
/**
|
|
938
|
+
* Copy a local file into a document under a SAF directory URI (format-agnostic).
|
|
939
|
+
*/
|
|
940
|
+
override fun copyFileToContentUri(
|
|
941
|
+
filePath: String,
|
|
942
|
+
directoryUri: String,
|
|
943
|
+
filename: String,
|
|
944
|
+
mimeType: String,
|
|
945
|
+
promise: Promise
|
|
946
|
+
) {
|
|
947
|
+
ttsHelper.copyFileToContentUri(filePath, directoryUri, filename, mimeType, promise)
|
|
948
|
+
}
|
|
949
|
+
|
|
804
950
|
/**
|
|
805
951
|
* Save text content to a file via Android SAF content URI.
|
|
806
952
|
*/
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
package com.sherpaonnx
|
|
2
|
+
|
|
3
|
+
import android.media.AudioFormat
|
|
4
|
+
import android.media.AudioRecord
|
|
5
|
+
import android.media.MediaRecorder
|
|
6
|
+
import android.util.Base64
|
|
7
|
+
import android.util.Log
|
|
8
|
+
import java.nio.ByteBuffer
|
|
9
|
+
import java.nio.ByteOrder
|
|
10
|
+
import kotlin.concurrent.thread
|
|
11
|
+
import kotlin.math.round
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Native PCM capture from the microphone with optional resampling to a target sample rate.
|
|
15
|
+
* Captures at a supported hardware rate (e.g. 44100 or 48000 Hz), then resamples to the
|
|
16
|
+
* requested rate so the app always receives PCM at the same sample rate (e.g. 16000 for STT).
|
|
17
|
+
*/
|
|
18
|
+
class SherpaOnnxPcmCapture(
|
|
19
|
+
private val targetSampleRate: Int,
|
|
20
|
+
private val channelCount: Int,
|
|
21
|
+
private val bufferSizeFrames: Int,
|
|
22
|
+
private val onChunk: (base64Pcm: String, sampleRate: Int) -> Unit,
|
|
23
|
+
private val onError: (message: String) -> Unit,
|
|
24
|
+
private val logTag: String = "SherpaOnnxPcmCapture"
|
|
25
|
+
) {
|
|
26
|
+
private var audioRecord: AudioRecord? = null
|
|
27
|
+
@Volatile
|
|
28
|
+
private var running = false
|
|
29
|
+
private var captureThread: Thread? = null
|
|
30
|
+
|
|
31
|
+
companion object {
|
|
32
|
+
/** Supported capture sample rates to try in order (device-dependent). */
|
|
33
|
+
private val CAPTURE_RATES = intArrayOf(16000, 44100, 48000)
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Resample Int16 PCM from capture rate to target rate using linear interpolation.
|
|
37
|
+
* Returns a new ByteArray of Int16 samples at target rate.
|
|
38
|
+
*/
|
|
39
|
+
private fun resampleInt16(
|
|
40
|
+
input: ShortArray,
|
|
41
|
+
fromRate: Int,
|
|
42
|
+
toRate: Int
|
|
43
|
+
): ShortArray {
|
|
44
|
+
if (fromRate == toRate) return input
|
|
45
|
+
val ratio = fromRate.toDouble() / toRate
|
|
46
|
+
val outLength = round(input.size / ratio).toInt().coerceAtLeast(0)
|
|
47
|
+
val result = ShortArray(outLength)
|
|
48
|
+
for (i in 0 until outLength) {
|
|
49
|
+
val srcIdx = i * ratio
|
|
50
|
+
val idx0 = srcIdx.toInt().coerceIn(0, input.size - 1)
|
|
51
|
+
val idx1 = (idx0 + 1).coerceAtMost(input.size - 1)
|
|
52
|
+
val frac = (srcIdx - idx0).toFloat()
|
|
53
|
+
val v0 = input[idx0].toInt()
|
|
54
|
+
val v1 = input[idx1].toInt()
|
|
55
|
+
result[i] = (v0 + (v1 - v0) * frac).toInt().toShort()
|
|
56
|
+
}
|
|
57
|
+
return result
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Start capture. Uses a supported hardware rate and resamples to [targetSampleRate] before emitting.
|
|
63
|
+
*/
|
|
64
|
+
fun start() {
|
|
65
|
+
if (running) {
|
|
66
|
+
Log.w(logTag, "start: already running")
|
|
67
|
+
return
|
|
68
|
+
}
|
|
69
|
+
val bufferSizeBytes = if (bufferSizeFrames > 0) {
|
|
70
|
+
bufferSizeFrames * 2 // 2 bytes per sample (16-bit mono)
|
|
71
|
+
} else {
|
|
72
|
+
(0.1 * targetSampleRate).toInt() * 2 // 0.1 s default (16-bit mono)
|
|
73
|
+
}
|
|
74
|
+
val captureRate = CAPTURE_RATES.firstOrNull { rate ->
|
|
75
|
+
val size = AudioRecord.getMinBufferSize(rate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT)
|
|
76
|
+
size != AudioRecord.ERROR && size != AudioRecord.ERROR_BAD_VALUE
|
|
77
|
+
} ?: 44100
|
|
78
|
+
val minBuf = AudioRecord.getMinBufferSize(captureRate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT)
|
|
79
|
+
val bufSize = minBuf.coerceAtLeast(bufferSizeBytes)
|
|
80
|
+
val record = try {
|
|
81
|
+
AudioRecord(
|
|
82
|
+
MediaRecorder.AudioSource.VOICE_RECOGNITION,
|
|
83
|
+
captureRate,
|
|
84
|
+
AudioFormat.CHANNEL_IN_MONO,
|
|
85
|
+
AudioFormat.ENCODING_PCM_16BIT,
|
|
86
|
+
bufSize
|
|
87
|
+
)
|
|
88
|
+
} catch (e: SecurityException) {
|
|
89
|
+
Log.e(logTag, "start: RECORD_AUDIO permission not granted", e)
|
|
90
|
+
onError("RECORD_AUDIO permission not granted")
|
|
91
|
+
return
|
|
92
|
+
}
|
|
93
|
+
if (record.state != AudioRecord.STATE_INITIALIZED) {
|
|
94
|
+
Log.e(logTag, "start: AudioRecord not initialized")
|
|
95
|
+
onError("AudioRecord failed to initialize")
|
|
96
|
+
record.release()
|
|
97
|
+
return
|
|
98
|
+
}
|
|
99
|
+
audioRecord = record
|
|
100
|
+
running = true
|
|
101
|
+
captureThread = thread(name = "SherpaOnnxPcmCapture") {
|
|
102
|
+
val shortBuf = ShortArray(bufSize / 2)
|
|
103
|
+
try {
|
|
104
|
+
record.startRecording()
|
|
105
|
+
while (running && record.recordingState == AudioRecord.RECORDSTATE_RECORDING) {
|
|
106
|
+
val read = record.read(shortBuf, 0, shortBuf.size)
|
|
107
|
+
if (read <= 0) continue
|
|
108
|
+
val chunk = shortBuf.copyOf(read)
|
|
109
|
+
val toEmit = if (captureRate != targetSampleRate) {
|
|
110
|
+
resampleInt16(chunk, captureRate, targetSampleRate)
|
|
111
|
+
} else {
|
|
112
|
+
chunk
|
|
113
|
+
}
|
|
114
|
+
val byteBuf = ByteBuffer.allocate(toEmit.size * 2).order(ByteOrder.LITTLE_ENDIAN)
|
|
115
|
+
for (s in toEmit) byteBuf.putShort(s)
|
|
116
|
+
val base64 = Base64.encodeToString(byteBuf.array(), Base64.NO_WRAP)
|
|
117
|
+
onChunk(base64, targetSampleRate)
|
|
118
|
+
}
|
|
119
|
+
} catch (e: Exception) {
|
|
120
|
+
if (running) {
|
|
121
|
+
Log.e(logTag, "Capture thread error", e)
|
|
122
|
+
onError(e.message ?: "Capture error")
|
|
123
|
+
}
|
|
124
|
+
} finally {
|
|
125
|
+
try {
|
|
126
|
+
record.stop()
|
|
127
|
+
} catch (_: Exception) { }
|
|
128
|
+
record.release()
|
|
129
|
+
audioRecord = null
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/** Stop capture and release resources. */
|
|
135
|
+
fun stop() {
|
|
136
|
+
running = false
|
|
137
|
+
// Actively stop AudioRecord to unblock any pending read()
|
|
138
|
+
val record = audioRecord
|
|
139
|
+
if (record != null) {
|
|
140
|
+
try {
|
|
141
|
+
record.stop()
|
|
142
|
+
} catch (_: Exception) {
|
|
143
|
+
// Ignore; the capture thread's finally block also handles stop/release safely
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
captureThread?.join(2000)
|
|
147
|
+
captureThread = null
|
|
148
|
+
audioRecord = null
|
|
149
|
+
}
|
|
150
|
+
}
|
|
@@ -2,6 +2,7 @@ package com.sherpaonnx
|
|
|
2
2
|
|
|
3
3
|
import android.content.Context
|
|
4
4
|
import android.net.Uri
|
|
5
|
+
import android.os.HandlerThread
|
|
5
6
|
import android.util.Log
|
|
6
7
|
import com.facebook.react.bridge.Arguments
|
|
7
8
|
import com.facebook.react.bridge.Promise
|
|
@@ -51,6 +52,9 @@ internal class SherpaOnnxSttHelper(
|
|
|
51
52
|
|
|
52
53
|
private val instances = ConcurrentHashMap<String, SttEngineInstance>()
|
|
53
54
|
|
|
55
|
+
private val initThread = HandlerThread("stt-init").also { it.start() }
|
|
56
|
+
private val initHandler = android.os.Handler(initThread.looper)
|
|
57
|
+
|
|
54
58
|
private fun getInstance(instanceId: String): SttEngineInstance? = instances[instanceId]
|
|
55
59
|
|
|
56
60
|
/** Hotwords are supported for transducer and NeMo transducer models (sherpa-onnx; NeMo: https://github.com/k2-fsa/sherpa-onnx/pull/3077). */
|
|
@@ -277,26 +281,33 @@ internal class SherpaOnnxSttHelper(
|
|
|
277
281
|
)
|
|
278
282
|
inst.lastRecognizerConfig = config
|
|
279
283
|
inst.currentSttModelType = modelTypeStr
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
284
|
+
// Defer recognizer creation to the dedicated background thread so release() of the previous
|
|
285
|
+
// recognizer can complete off the UI thread (avoids "destroyed mutex" / SIGSEGV when switching models).
|
|
286
|
+
initHandler.post {
|
|
287
|
+
try {
|
|
288
|
+
inst.recognizer = OfflineRecognizer(config = config)
|
|
289
|
+
val resultMap = Arguments.createMap()
|
|
290
|
+
resultMap.putBoolean("success", true)
|
|
291
|
+
resultMap.putString("modelType", modelTypeStr)
|
|
292
|
+
resultMap.putString("decodingMethod", config.decodingMethod)
|
|
293
|
+
val detectedModelsArray = Arguments.createArray()
|
|
294
|
+
for (model in detectedModels) {
|
|
295
|
+
val modelMap = model as? HashMap<*, *>
|
|
296
|
+
if (modelMap != null) {
|
|
297
|
+
val modelResultMap = Arguments.createMap()
|
|
298
|
+
modelResultMap.putString("type", modelMap["type"] as? String ?: "")
|
|
299
|
+
modelResultMap.putString("modelDir", modelMap["modelDir"] as? String ?: "")
|
|
300
|
+
detectedModelsArray.pushMap(modelResultMap)
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
resultMap.putArray("detectedModels", detectedModelsArray)
|
|
304
|
+
promise.resolve(resultMap)
|
|
305
|
+
} catch (e: Exception) {
|
|
306
|
+
val errorMsg = "Exception creating recognizer: ${e.message ?: e.javaClass.simpleName}"
|
|
307
|
+
Log.e(logTag, errorMsg, e)
|
|
308
|
+
promise.reject("INIT_ERROR", errorMsg, e)
|
|
296
309
|
}
|
|
297
310
|
}
|
|
298
|
-
resultMap.putArray("detectedModels", detectedModelsArray)
|
|
299
|
-
promise.resolve(resultMap)
|
|
300
311
|
} catch (e: Exception) {
|
|
301
312
|
val errorMsg = "Exception during initialization: ${e.message ?: e.javaClass.simpleName}"
|
|
302
313
|
Log.e(logTag, errorMsg, e)
|
|
@@ -305,6 +316,7 @@ internal class SherpaOnnxSttHelper(
|
|
|
305
316
|
}
|
|
306
317
|
|
|
307
318
|
fun transcribeFile(instanceId: String, filePath: String, promise: Promise) {
|
|
319
|
+
var tempPath: String? = null
|
|
308
320
|
try {
|
|
309
321
|
val inst = getInstance(instanceId) ?: run {
|
|
310
322
|
promise.reject("TRANSCRIBE_ERROR", "STT instance not found: $instanceId")
|
|
@@ -315,16 +327,46 @@ internal class SherpaOnnxSttHelper(
|
|
|
315
327
|
promise.reject("TRANSCRIBE_ERROR", "STT not initialized. Call initializeStt first.")
|
|
316
328
|
return
|
|
317
329
|
}
|
|
318
|
-
val
|
|
330
|
+
val pathToRead = if (filePath.startsWith("content://")) {
|
|
331
|
+
tempPath = resolveContentUriToFile(filePath, "stt_transcribe")
|
|
332
|
+
tempPath
|
|
333
|
+
} else {
|
|
334
|
+
filePath
|
|
335
|
+
}
|
|
336
|
+
if (pathToRead == null || pathToRead.isBlank()) {
|
|
337
|
+
promise.reject("TRANSCRIBE_ERROR", "Could not resolve audio file path")
|
|
338
|
+
return
|
|
339
|
+
}
|
|
340
|
+
val f = File(pathToRead)
|
|
341
|
+
if (!f.exists() || f.length() == 0L) {
|
|
342
|
+
promise.reject("TRANSCRIBE_ERROR", "Audio file does not exist or is empty: $pathToRead (size=${f.length()})")
|
|
343
|
+
return
|
|
344
|
+
}
|
|
345
|
+
val wave = WaveReader.readWave(pathToRead)
|
|
346
|
+
val samples = wave.samples
|
|
347
|
+
if (samples == null || samples.isEmpty()) {
|
|
348
|
+
promise.reject("TRANSCRIBE_ERROR", "Could not read audio samples (file=${f.length()} bytes). The file must be WAV format (use convertAudioToWav16k for MP3/FLAC).")
|
|
349
|
+
return
|
|
350
|
+
}
|
|
319
351
|
val stream: OfflineStream = rec.createStream()
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
352
|
+
try {
|
|
353
|
+
stream.acceptWaveform(samples, wave.sampleRate)
|
|
354
|
+
rec.decode(stream)
|
|
355
|
+
val result = rec.getResult(stream)
|
|
356
|
+
promise.resolve(resultToWritableMap(result))
|
|
357
|
+
} finally {
|
|
358
|
+
stream.release()
|
|
359
|
+
}
|
|
324
360
|
} catch (e: Exception) {
|
|
325
361
|
val message = e.message?.takeIf { it.isNotBlank() } ?: "Failed to transcribe file"
|
|
326
362
|
Log.e(logTag, "transcribeFile error: $message", e)
|
|
327
363
|
promise.reject("TRANSCRIBE_ERROR", message, e)
|
|
364
|
+
} finally {
|
|
365
|
+
tempPath?.let { path ->
|
|
366
|
+
try {
|
|
367
|
+
File(path).takeIf { it.exists() }?.delete()
|
|
368
|
+
} catch (_: Exception) { }
|
|
369
|
+
}
|
|
328
370
|
}
|
|
329
371
|
}
|
|
330
372
|
|
|
@@ -588,7 +630,16 @@ internal class SherpaOnnxSttHelper(
|
|
|
588
630
|
preprocessor = path(paths, "moonshinePreprocessor"),
|
|
589
631
|
encoder = path(paths, "moonshineEncoder"),
|
|
590
632
|
uncachedDecoder = path(paths, "moonshineUncachedDecoder"),
|
|
591
|
-
cachedDecoder = path(paths, "moonshineCachedDecoder")
|
|
633
|
+
cachedDecoder = path(paths, "moonshineCachedDecoder"),
|
|
634
|
+
mergedDecoder = ""
|
|
635
|
+
),
|
|
636
|
+
tokens = path(paths, "tokens"),
|
|
637
|
+
modelType = "moonshine"
|
|
638
|
+
)
|
|
639
|
+
"moonshine_v2" -> OfflineModelConfig(
|
|
640
|
+
moonshine = OfflineMoonshineModelConfig(
|
|
641
|
+
encoder = path(paths, "moonshineEncoder"),
|
|
642
|
+
mergedDecoder = path(paths, "moonshineMergedDecoder")
|
|
592
643
|
),
|
|
593
644
|
tokens = path(paths, "tokens"),
|
|
594
645
|
modelType = "moonshine"
|
|
@@ -31,6 +31,7 @@ import com.k2fsa.sherpa.onnx.OfflineTtsMatchaModelConfig
|
|
|
31
31
|
import com.k2fsa.sherpa.onnx.OfflineTtsKokoroModelConfig
|
|
32
32
|
import com.k2fsa.sherpa.onnx.OfflineTtsKittenModelConfig
|
|
33
33
|
import java.io.File
|
|
34
|
+
import java.io.FileInputStream
|
|
34
35
|
import java.io.FileOutputStream
|
|
35
36
|
import java.io.InputStream
|
|
36
37
|
import java.io.OutputStream
|
|
@@ -74,6 +75,7 @@ internal class SherpaOnnxTtsHelper(
|
|
|
74
75
|
|
|
75
76
|
fun hasEngine(): Boolean = synchronized(lock) { tts != null || zipvoiceTts != null }
|
|
76
77
|
val isZipvoice: Boolean get() = synchronized(lock) { zipvoiceTts != null }
|
|
78
|
+
val isPocket: Boolean get() = ttsInitState?.modelType == "pocket"
|
|
77
79
|
fun releaseEngines() {
|
|
78
80
|
synchronized(lock) {
|
|
79
81
|
tts?.release()
|
|
@@ -258,7 +260,7 @@ internal class SherpaOnnxTtsHelper(
|
|
|
258
260
|
|
|
259
261
|
inst.ttsInitState = TtsInitState(
|
|
260
262
|
modelDir,
|
|
261
|
-
|
|
263
|
+
modelTypeStr, // detected model type (e.g. "pocket"), not the requested "auto"
|
|
262
264
|
numThreads.toInt(),
|
|
263
265
|
debug,
|
|
264
266
|
noiseScale?.takeUnless { it.isNaN() },
|
|
@@ -416,6 +418,11 @@ internal class SherpaOnnxTtsHelper(
|
|
|
416
418
|
val config = parseGenerationConfig(options) ?: GenerationConfig(speed = speed, sid = sid)
|
|
417
419
|
inst.tts!!.generateWithConfig(text, config)
|
|
418
420
|
}
|
|
421
|
+
inst.isPocket && !hasReferenceOptions(options) -> {
|
|
422
|
+
Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Pocket TTS requires reference audio for voice cloning")
|
|
423
|
+
promise.reject("TTS_GENERATE_ERROR", "Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate in options.")
|
|
424
|
+
return
|
|
425
|
+
}
|
|
419
426
|
else -> dispatchGenerate(inst, text, sid, speed)
|
|
420
427
|
?: run {
|
|
421
428
|
Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: TTS not initialized")
|
|
@@ -469,6 +476,11 @@ internal class SherpaOnnxTtsHelper(
|
|
|
469
476
|
val config = parseGenerationConfig(options) ?: GenerationConfig(speed = speed, sid = sid)
|
|
470
477
|
inst.tts!!.generateWithConfig(text, config)
|
|
471
478
|
}
|
|
479
|
+
inst.isPocket && !hasReferenceOptions(options) -> {
|
|
480
|
+
Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Pocket TTS requires reference audio for voice cloning")
|
|
481
|
+
promise.reject("TTS_GENERATE_ERROR", "Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate in options.")
|
|
482
|
+
return
|
|
483
|
+
}
|
|
472
484
|
else -> dispatchGenerate(inst, text, sid, speed)
|
|
473
485
|
?: run {
|
|
474
486
|
Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: TTS not initialized")
|
|
@@ -517,6 +529,11 @@ internal class SherpaOnnxTtsHelper(
|
|
|
517
529
|
promise.reject("TTS_STREAM_ERROR", "TTS not initialized")
|
|
518
530
|
return
|
|
519
531
|
}
|
|
532
|
+
if (inst.isPocket && !hasReferenceOptions(options)) {
|
|
533
|
+
Log.e("SherpaOnnxTts", "TTS_STREAM_ERROR: Pocket TTS requires reference audio for voice cloning")
|
|
534
|
+
promise.reject("TTS_STREAM_ERROR", "Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate in options.")
|
|
535
|
+
return
|
|
536
|
+
}
|
|
520
537
|
if (hasReferenceOptions(options) && inst.isZipvoice) {
|
|
521
538
|
Log.e("SherpaOnnxTts", "TTS_STREAM_ERROR: Streaming with reference audio not supported for Zipvoice")
|
|
522
539
|
promise.reject("TTS_STREAM_ERROR", "Streaming with reference audio not supported for Zipvoice")
|
|
@@ -782,6 +799,40 @@ internal class SherpaOnnxTtsHelper(
|
|
|
782
799
|
}
|
|
783
800
|
}
|
|
784
801
|
|
|
802
|
+
/**
|
|
803
|
+
* Copy a local file into a document under a SAF directory URI.
|
|
804
|
+
* Format-agnostic: any file (e.g. WAV, MP3, FLAC) can be written.
|
|
805
|
+
* Resolves with the created content URI string.
|
|
806
|
+
*/
|
|
807
|
+
fun copyFileToContentUri(
|
|
808
|
+
filePath: String,
|
|
809
|
+
directoryUri: String,
|
|
810
|
+
filename: String,
|
|
811
|
+
mimeType: String,
|
|
812
|
+
promise: Promise
|
|
813
|
+
) {
|
|
814
|
+
try {
|
|
815
|
+
val file = File(filePath)
|
|
816
|
+
if (!file.isFile || !file.canRead()) {
|
|
817
|
+
promise.reject("TTS_SAVE_ERROR", "File not found or not readable: $filePath")
|
|
818
|
+
return
|
|
819
|
+
}
|
|
820
|
+
val resolver = context.contentResolver
|
|
821
|
+
val dirUri = Uri.parse(directoryUri)
|
|
822
|
+
val fileUri = createDocumentInDirectory(resolver, dirUri, filename, mimeType)
|
|
823
|
+
FileInputStream(file).use { inputStream ->
|
|
824
|
+
resolver.openOutputStream(fileUri, "w")?.use { outputStream ->
|
|
825
|
+
inputStream.copyTo(outputStream)
|
|
826
|
+
outputStream.flush()
|
|
827
|
+
} ?: throw IllegalStateException("Failed to open output stream for URI: $fileUri")
|
|
828
|
+
}
|
|
829
|
+
promise.resolve(fileUri.toString())
|
|
830
|
+
} catch (e: Exception) {
|
|
831
|
+
Log.e("SherpaOnnxTts", "TTS_SAVE_ERROR: Failed to copy file to content URI", e)
|
|
832
|
+
promise.reject("TTS_SAVE_ERROR", "Failed to copy file to content URI", e)
|
|
833
|
+
}
|
|
834
|
+
}
|
|
835
|
+
|
|
785
836
|
fun copyTtsContentUriToCache(fileUri: String, filename: String, promise: Promise) {
|
|
786
837
|
try {
|
|
787
838
|
val resolver = context.contentResolver
|