react-native-sherpa-onnx 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/README.md +84 -77
  2. package/SherpaOnnx.podspec +79 -45
  3. package/android/build.gradle +8 -2
  4. package/android/prebuilt-download.gradle +70 -16
  5. package/android/prebuilt-versions.gradle +14 -6
  6. package/android/src/main/cpp/CMakeLists.txt +2 -0
  7. package/android/src/main/cpp/jni/audio/sherpa-onnx-audio-convert-jni.cpp +202 -328
  8. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.cpp +22 -0
  9. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.h +2 -0
  10. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.cpp +96 -142
  11. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.h +40 -4
  12. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-stt.cpp +774 -316
  13. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +208 -122
  14. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect.h +92 -0
  15. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.cpp +3 -0
  16. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.cpp +14 -2
  17. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-stt.cpp +229 -0
  18. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-stt.h +38 -0
  19. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-tts.cpp +144 -0
  20. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-tts.h +38 -0
  21. package/android/src/main/cpp/jni/module/sherpa-onnx-module-jni.cpp +1 -1
  22. package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +157 -11
  23. package/android/src/main/java/com/sherpaonnx/SherpaOnnxPcmCapture.kt +150 -0
  24. package/android/src/main/java/com/sherpaonnx/SherpaOnnxSttHelper.kt +75 -24
  25. package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +52 -1
  26. package/ios/SherpaOnnx+PcmLiveStream.mm +288 -0
  27. package/ios/SherpaOnnx+STT.mm +2 -0
  28. package/ios/SherpaOnnx+TTS.mm +17 -0
  29. package/ios/SherpaOnnx.mm +27 -3
  30. package/ios/SherpaOnnxAudioConvert.h +28 -0
  31. package/ios/SherpaOnnxAudioConvert.mm +698 -0
  32. package/ios/archive/sherpa-onnx-archive-helper.mm +12 -0
  33. package/ios/model_detect/sherpa-onnx-model-detect-helper.h +37 -3
  34. package/ios/model_detect/sherpa-onnx-model-detect-helper.mm +80 -45
  35. package/ios/model_detect/sherpa-onnx-model-detect-stt.mm +629 -267
  36. package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +148 -56
  37. package/ios/model_detect/sherpa-onnx-model-detect.h +72 -0
  38. package/ios/model_detect/sherpa-onnx-validate-stt.h +38 -0
  39. package/ios/model_detect/sherpa-onnx-validate-stt.mm +229 -0
  40. package/ios/model_detect/sherpa-onnx-validate-tts.h +38 -0
  41. package/ios/model_detect/sherpa-onnx-validate-tts.mm +144 -0
  42. package/ios/stt/sherpa-onnx-stt-wrapper.mm +4 -0
  43. package/lib/module/NativeSherpaOnnx.js.map +1 -1
  44. package/lib/module/audio/index.js +55 -1
  45. package/lib/module/audio/index.js.map +1 -1
  46. package/lib/module/download/ModelDownloadManager.js +14 -0
  47. package/lib/module/download/ModelDownloadManager.js.map +1 -1
  48. package/lib/module/index.js +10 -0
  49. package/lib/module/index.js.map +1 -1
  50. package/lib/module/stt/streaming.js +6 -3
  51. package/lib/module/stt/streaming.js.map +1 -1
  52. package/lib/module/tts/index.js +13 -1
  53. package/lib/module/tts/index.js.map +1 -1
  54. package/lib/typescript/src/NativeSherpaOnnx.d.ts +32 -3
  55. package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
  56. package/lib/typescript/src/audio/index.d.ts +20 -1
  57. package/lib/typescript/src/audio/index.d.ts.map +1 -1
  58. package/lib/typescript/src/download/ModelDownloadManager.d.ts +2 -1
  59. package/lib/typescript/src/download/ModelDownloadManager.d.ts.map +1 -1
  60. package/lib/typescript/src/index.d.ts +10 -0
  61. package/lib/typescript/src/index.d.ts.map +1 -1
  62. package/lib/typescript/src/stt/streaming.d.ts.map +1 -1
  63. package/lib/typescript/src/stt/streamingTypes.d.ts +1 -1
  64. package/lib/typescript/src/stt/streamingTypes.d.ts.map +1 -1
  65. package/lib/typescript/src/tts/index.d.ts +12 -1
  66. package/lib/typescript/src/tts/index.d.ts.map +1 -1
  67. package/package.json +6 -1
  68. package/scripts/check-model-csvs.sh +72 -0
  69. package/scripts/setup-ios-framework.sh +272 -191
  70. package/src/NativeSherpaOnnx.ts +37 -3
  71. package/src/audio/index.ts +84 -1
  72. package/src/download/ModelDownloadManager.ts +19 -0
  73. package/src/index.tsx +15 -0
  74. package/src/stt/streaming.ts +10 -5
  75. package/src/stt/streamingTypes.ts +1 -1
  76. package/src/tts/index.ts +25 -1
  77. package/third_party/ffmpeg_prebuilt/ANDROID_RELEASE_TAG +1 -1
  78. package/third_party/libarchive_prebuilt/ANDROID_RELEASE_TAG +1 -1
  79. package/third_party/libarchive_prebuilt/IOS_RELEASE_TAG +1 -1
  80. package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -1
  81. package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -1
  82. package/ios/scripts/patch-libarchive-includes.sh +0 -61
  83. package/ios/scripts/setup-ios-libarchive.sh +0 -98
@@ -1,5 +1,6 @@
1
1
  package com.sherpaonnx
2
2
 
3
+ import android.net.Uri
3
4
  import com.facebook.react.bridge.ReactApplicationContext
4
5
  import com.facebook.react.bridge.Promise
5
6
  import com.facebook.react.bridge.ReadableArray
@@ -55,6 +56,7 @@ class SherpaOnnxModule(reactContext: ReactApplicationContext) :
55
56
  { instanceId, requestId, cancelled -> emitTtsStreamEnd(instanceId, requestId, cancelled) }
56
57
  )
57
58
  private val archiveHelper = SherpaOnnxArchiveHelper()
59
+ private var pcmCapture: SherpaOnnxPcmCapture? = null
58
60
 
59
61
  override fun getName(): String {
60
62
  return NAME
@@ -62,6 +64,8 @@ class SherpaOnnxModule(reactContext: ReactApplicationContext) :
62
64
 
63
65
  override fun onCatalystInstanceDestroy() {
64
66
  super.onCatalystInstanceDestroy()
67
+ pcmCapture?.stop()
68
+ pcmCapture = null
65
69
  onlineSttHelper.shutdown()
66
70
  ttsHelper.shutdown()
67
71
  }
@@ -139,6 +143,29 @@ class SherpaOnnxModule(reactContext: ReactApplicationContext) :
139
143
  }
140
144
  }
141
145
 
146
+ override fun getDeviceQnnSoc(promise: Promise) {
147
+ try {
148
+ var soc: String? = null
149
+ if (android.os.Build.VERSION.SDK_INT >= 31) {
150
+ val buildClass = Class.forName("android.os.Build")
151
+ val field = buildClass.getDeclaredField("SOC_MODEL")
152
+ val value = field.get(null) as? String
153
+ soc = value?.trim()?.takeIf { it.isNotEmpty() }
154
+ }
155
+ val isSupported = soc != null && soc.matches(Regex("^SM8\\d{3}$", RegexOption.IGNORE_CASE))
156
+ val map = Arguments.createMap()
157
+ map.putString("soc", soc)
158
+ map.putBoolean("isSupported", isSupported)
159
+ promise.resolve(map)
160
+ } catch (e: Exception) {
161
+ android.util.Log.w(NAME, "getDeviceQnnSoc: ${e.message}")
162
+ val map = Arguments.createMap()
163
+ map.putNull("soc")
164
+ map.putBoolean("isSupported", false)
165
+ promise.resolve(map)
166
+ }
167
+ }
168
+
142
169
  /** Asset path for embedded NNAPI test model (ORT testdata: nnapi_internal_uint8_support). */
143
170
  private val nnapiTestModelAsset = "testModels/nnapi_internal_uint8_support.onnx"
144
171
 
@@ -319,12 +346,14 @@ class SherpaOnnxModule(reactContext: ReactApplicationContext) :
319
346
  return
320
347
  }
321
348
  val success = result["success"] as? Boolean ?: false
349
+ val isHardwareSpecificUnsupported = result["isHardwareSpecificUnsupported"] as? Boolean ?: false
322
350
  val detectedModels = result["detectedModels"] as? ArrayList<*>
323
351
  ?: arrayListOf<HashMap<String, String>>()
324
352
  val modelTypeStr = result["modelType"] as? String
325
353
 
326
354
  val resultMap = Arguments.createMap()
327
355
  resultMap.putBoolean("success", success)
356
+ resultMap.putBoolean("isHardwareSpecificUnsupported", isHardwareSpecificUnsupported)
328
357
  val modelsArray = Arguments.createArray()
329
358
  for (model in detectedModels) {
330
359
  val modelMap = model as? HashMap<*, *>
@@ -484,6 +513,71 @@ class SherpaOnnxModule(reactContext: ReactApplicationContext) :
484
513
  onlineSttHelper.processSttAudioChunk(streamId, samples, sampleRate.toInt(), promise)
485
514
  }
486
515
 
516
+ override fun startPcmLiveStream(options: ReadableMap, promise: Promise) {
517
+ try {
518
+ pcmCapture?.stop()
519
+ pcmCapture = null
520
+ val sampleRate = options.getDouble("sampleRate").toInt().takeIf { it > 0 } ?: 16000
521
+ val channelCount = if (options.hasKey("channelCount")) options.getDouble("channelCount").toInt().coerceIn(1, 2) else 1
522
+ val bufferSizeFrames = if (options.hasKey("bufferSizeFrames")) options.getDouble("bufferSizeFrames").toInt() else 0
523
+ var startError: String? = null
524
+ var started = false
525
+ val capture = SherpaOnnxPcmCapture(
526
+ targetSampleRate = sampleRate,
527
+ channelCount = channelCount,
528
+ bufferSizeFrames = bufferSizeFrames,
529
+ onChunk = { base64Pcm, sr -> emitPcmLiveStreamData(base64Pcm, sr) },
530
+ onError = { msg ->
531
+ if (!started) {
532
+ startError = msg
533
+ } else {
534
+ emitPcmLiveStreamError(msg)
535
+ }
536
+ },
537
+ logTag = NAME
538
+ )
539
+ pcmCapture = capture
540
+ capture.start()
541
+ started = true
542
+ val err = startError
543
+ if (err != null) {
544
+ promise.reject("PCM_LIVE_STREAM_ERROR", err)
545
+ } else {
546
+ promise.resolve(null)
547
+ }
548
+ } catch (e: Exception) {
549
+ android.util.Log.e(NAME, "startPcmLiveStream failed", e)
550
+ promise.reject("PCM_LIVE_STREAM_ERROR", e.message ?: "Failed to start PCM capture", e)
551
+ }
552
+ }
553
+
554
+ override fun stopPcmLiveStream(promise: Promise) {
555
+ try {
556
+ pcmCapture?.stop()
557
+ pcmCapture = null
558
+ promise.resolve(null)
559
+ } catch (e: Exception) {
560
+ promise.reject("PCM_LIVE_STREAM_ERROR", e.message ?: "Failed to stop PCM capture", e)
561
+ }
562
+ }
563
+
564
+ private fun emitPcmLiveStreamData(base64Pcm: String, sampleRate: Int) {
565
+ val eventEmitter = reactApplicationContext
566
+ .getJSModule(DeviceEventManagerModule.RCTDeviceEventEmitter::class.java)
567
+ val payload = Arguments.createMap()
568
+ payload.putString("base64Pcm", base64Pcm)
569
+ payload.putInt("sampleRate", sampleRate)
570
+ eventEmitter.emit("pcmLiveStreamData", payload)
571
+ }
572
+
573
+ private fun emitPcmLiveStreamError(message: String) {
574
+ val eventEmitter = reactApplicationContext
575
+ .getJSModule(DeviceEventManagerModule.RCTDeviceEventEmitter::class.java)
576
+ val payload = Arguments.createMap()
577
+ payload.putString("message", message)
578
+ eventEmitter.emit("pcmLiveStreamError", payload)
579
+ }
580
+
487
581
  // ==================== STT Methods ====================
488
582
 
489
583
  /**
@@ -507,17 +601,34 @@ class SherpaOnnxModule(reactContext: ReactApplicationContext) :
507
601
  sttHelper.setSttConfig(instanceId, options, promise)
508
602
  }
509
603
 
604
+ /**
605
+ * If inputPath is a content:// URI, copies it to a temp file via ContentResolver.openInputStream.
606
+ * Caller deletes the returned temp file in a finally block.
607
+ */
608
+ private fun resolveInputForConvert(inputPath: String): Pair<String, java.io.File?> {
609
+ if (!inputPath.startsWith("content://")) return Pair(inputPath, null)
610
+ val uri = Uri.parse(inputPath)
611
+ val resolver = reactApplicationContext.contentResolver
612
+ val ext = android.webkit.MimeTypeMap.getSingleton()
613
+ .getExtensionFromMimeType(resolver.getType(uri)) ?: "tmp"
614
+ val tmp = java.io.File(reactApplicationContext.cacheDir, "convert_${System.nanoTime()}.$ext")
615
+ resolver.openInputStream(uri)?.use { input ->
616
+ tmp.outputStream().use { output -> input.copyTo(output) }
617
+ } ?: throw IllegalStateException("Content URI not readable: $inputPath")
618
+ return Pair(tmp.absolutePath, tmp)
619
+ }
620
+
510
621
  /**
511
622
  * Convert any supported audio file to a requested format using native FFmpeg prebuilts.
512
- * For MP3, outputSampleRateHz can be 32000, 44100, or 48000; null/0 = 44100. WAV output is always 16 kHz mono.
513
- * Resolves with null on success, rejects with an error message on failure.
623
+ * Accepts file paths and content:// URIs. Content URIs are transparently copied to a
624
+ * temp file first (via ContentResolver), converted, then the temp file is deleted.
514
625
  */
515
626
  override fun convertAudioToFormat(inputPath: String, outputPath: String, format: String, outputSampleRateHz: Double?, promise: Promise) {
627
+ var tmpFile: java.io.File? = null
516
628
  try {
517
629
  var rate = outputSampleRateHz?.toInt() ?: 0
518
630
 
519
631
  if (rate < 0) {
520
- android.util.Log.e(NAME, "CONVERT_ERROR: Invalid outputSampleRateHz: must be >= 0")
521
632
  promise.reject("CONVERT_ERROR", "Invalid outputSampleRateHz: must be >= 0")
522
633
  return
523
634
  }
@@ -525,43 +636,57 @@ class SherpaOnnxModule(reactContext: ReactApplicationContext) :
525
636
  if (format.equals("mp3", ignoreCase = true)) {
526
637
  val allowed = setOf(0, 32000, 44100, 48000)
527
638
  if (!allowed.contains(rate)) {
528
- android.util.Log.e(NAME, "CONVERT_ERROR: MP3 output sample rate invalid: $rate")
529
- promise.reject("CONVERT_ERROR", "MP3 output sample rate must be one of 32000, 44100, 48000, or 0 (default). Received: $rate")
639
+ promise.reject("CONVERT_ERROR", "MP3 output sample rate must be one of 32000, 44100, 48000, or 0 (default). Received: $rate")
640
+ return
641
+ }
642
+ } else if (format.equals("opus", ignoreCase = true) || format.equals("oggm", ignoreCase = true) || format.equals("webm", ignoreCase = true) || format.equals("mkv", ignoreCase = true) || format.equals("ogg", ignoreCase = true)) {
643
+ val allowed = setOf(0, 8000, 12000, 16000, 24000, 48000)
644
+ if (!allowed.contains(rate)) {
645
+ promise.reject("CONVERT_ERROR", "Opus output sample rate must be 8000, 12000, 16000, 24000, 48000, or 0 (default). Received: $rate")
530
646
  return
531
647
  }
532
648
  } else {
533
649
  rate = rate.coerceIn(0, 48000)
534
650
  }
535
651
 
536
- val err = Companion.nativeConvertAudioToFormat(inputPath, outputPath, format, rate)
652
+ val (pathToUse, tmp) = resolveInputForConvert(inputPath)
653
+ tmpFile = tmp
654
+ val err = Companion.nativeConvertAudioToFormat(pathToUse, outputPath, format, rate)
537
655
  if (err.isEmpty()) {
538
656
  promise.resolve(null)
539
657
  } else {
540
- android.util.Log.e(NAME, "CONVERT_ERROR: $err")
658
+ android.util.Log.e(NAME, "CONVERT_ERROR: $err (inputPath=$inputPath)")
541
659
  promise.reject("CONVERT_ERROR", err)
542
660
  }
543
661
  } catch (e: Exception) {
544
662
  android.util.Log.e(NAME, "CONVERT_EXCEPTION: Failed to convert audio: ${e.message}", e)
545
663
  promise.reject("CONVERT_EXCEPTION", "Failed to convert audio: ${e.message}", e)
664
+ } finally {
665
+ tmpFile?.delete()
546
666
  }
547
667
  }
548
668
 
549
669
  /**
550
670
  * Convert any supported audio file to WAV 16 kHz mono 16-bit PCM using native FFmpeg prebuilts.
551
- * Resolves with null on success, rejects with an error message on failure.
671
+ * Accepts file paths and content:// URIs. Content URIs are copied to a temp file first.
552
672
  */
553
673
  override fun convertAudioToWav16k(inputPath: String, outputPath: String, promise: Promise) {
674
+ var tmpFile: java.io.File? = null
554
675
  try {
555
- val err = Companion.nativeConvertAudioToWav16k(inputPath, outputPath)
676
+ val (pathToUse, tmp) = resolveInputForConvert(inputPath)
677
+ tmpFile = tmp
678
+ val err = Companion.nativeConvertAudioToWav16k(pathToUse, outputPath)
556
679
  if (err.isEmpty()) {
557
680
  promise.resolve(null)
558
681
  } else {
559
- android.util.Log.e(NAME, "CONVERT_ERROR: $err")
560
- promise.reject("CONVERT_ERROR", err)
682
+ android.util.Log.e(NAME, "CONVERT_ERROR: $err")
683
+ promise.reject("CONVERT_ERROR", err)
561
684
  }
562
685
  } catch (e: Exception) {
563
686
  android.util.Log.e(NAME, "CONVERT_EXCEPTION: Failed to convert audio to WAV16k: ${e.message}", e)
564
687
  promise.reject("CONVERT_EXCEPTION", "Failed to convert audio to WAV16k: ${e.message}", e)
688
+ } finally {
689
+ tmpFile?.delete()
565
690
  }
566
691
  }
567
692
 
@@ -642,6 +767,14 @@ class SherpaOnnxModule(reactContext: ReactApplicationContext) :
642
767
  resultMap.putString("error", error)
643
768
  }
644
769
  }
770
+ val lexiconLanguageCandidates = result["lexiconLanguageCandidates"] as? ArrayList<*>
771
+ if (!lexiconLanguageCandidates.isNullOrEmpty()) {
772
+ val candidatesArray = Arguments.createArray()
773
+ for (c in lexiconLanguageCandidates) {
774
+ (c as? String)?.let { candidatesArray.pushString(it) }
775
+ }
776
+ resultMap.putArray("lexiconLanguageCandidates", candidatesArray)
777
+ }
645
778
  promise.resolve(resultMap)
646
779
  } catch (e: Exception) {
647
780
  android.util.Log.e(NAME, "DETECT_ERROR: TTS model detection failed: ${e.message}", e)
@@ -801,6 +934,19 @@ class SherpaOnnxModule(reactContext: ReactApplicationContext) :
801
934
  ttsHelper.saveTtsAudioToContentUri(samples, sampleRate, directoryUri, filename, promise)
802
935
  }
803
936
 
937
+ /**
938
+ * Copy a local file into a document under a SAF directory URI (format-agnostic).
939
+ */
940
+ override fun copyFileToContentUri(
941
+ filePath: String,
942
+ directoryUri: String,
943
+ filename: String,
944
+ mimeType: String,
945
+ promise: Promise
946
+ ) {
947
+ ttsHelper.copyFileToContentUri(filePath, directoryUri, filename, mimeType, promise)
948
+ }
949
+
804
950
  /**
805
951
  * Save text content to a file via Android SAF content URI.
806
952
  */
@@ -0,0 +1,150 @@
1
+ package com.sherpaonnx
2
+
3
+ import android.media.AudioFormat
4
+ import android.media.AudioRecord
5
+ import android.media.MediaRecorder
6
+ import android.util.Base64
7
+ import android.util.Log
8
+ import java.nio.ByteBuffer
9
+ import java.nio.ByteOrder
10
+ import kotlin.concurrent.thread
11
+ import kotlin.math.round
12
+
13
+ /**
14
+ * Native PCM capture from the microphone with optional resampling to a target sample rate.
15
+ * Captures at a supported hardware rate (e.g. 44100 or 48000 Hz), then resamples to the
16
+ * requested rate so the app always receives PCM at the same sample rate (e.g. 16000 for STT).
17
+ */
18
+ class SherpaOnnxPcmCapture(
19
+ private val targetSampleRate: Int,
20
+ private val channelCount: Int,
21
+ private val bufferSizeFrames: Int,
22
+ private val onChunk: (base64Pcm: String, sampleRate: Int) -> Unit,
23
+ private val onError: (message: String) -> Unit,
24
+ private val logTag: String = "SherpaOnnxPcmCapture"
25
+ ) {
26
+ private var audioRecord: AudioRecord? = null
27
+ @Volatile
28
+ private var running = false
29
+ private var captureThread: Thread? = null
30
+
31
+ companion object {
32
+ /** Supported capture sample rates to try in order (device-dependent). */
33
+ private val CAPTURE_RATES = intArrayOf(16000, 44100, 48000)
34
+
35
+ /**
36
+ * Resample Int16 PCM from capture rate to target rate using linear interpolation.
37
+ * Returns a new ByteArray of Int16 samples at target rate.
38
+ */
39
+ private fun resampleInt16(
40
+ input: ShortArray,
41
+ fromRate: Int,
42
+ toRate: Int
43
+ ): ShortArray {
44
+ if (fromRate == toRate) return input
45
+ val ratio = fromRate.toDouble() / toRate
46
+ val outLength = round(input.size / ratio).toInt().coerceAtLeast(0)
47
+ val result = ShortArray(outLength)
48
+ for (i in 0 until outLength) {
49
+ val srcIdx = i * ratio
50
+ val idx0 = srcIdx.toInt().coerceIn(0, input.size - 1)
51
+ val idx1 = (idx0 + 1).coerceAtMost(input.size - 1)
52
+ val frac = (srcIdx - idx0).toFloat()
53
+ val v0 = input[idx0].toInt()
54
+ val v1 = input[idx1].toInt()
55
+ result[i] = (v0 + (v1 - v0) * frac).toInt().toShort()
56
+ }
57
+ return result
58
+ }
59
+ }
60
+
61
+ /**
62
+ * Start capture. Uses a supported hardware rate and resamples to [targetSampleRate] before emitting.
63
+ */
64
+ fun start() {
65
+ if (running) {
66
+ Log.w(logTag, "start: already running")
67
+ return
68
+ }
69
+ val bufferSizeBytes = if (bufferSizeFrames > 0) {
70
+ bufferSizeFrames * 2 // 2 bytes per sample (16-bit mono)
71
+ } else {
72
+ (0.1 * targetSampleRate).toInt() * 2 // 0.1 s default (16-bit mono)
73
+ }
74
+ val captureRate = CAPTURE_RATES.firstOrNull { rate ->
75
+ val size = AudioRecord.getMinBufferSize(rate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT)
76
+ size != AudioRecord.ERROR && size != AudioRecord.ERROR_BAD_VALUE
77
+ } ?: 44100
78
+ val minBuf = AudioRecord.getMinBufferSize(captureRate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT)
79
+ val bufSize = minBuf.coerceAtLeast(bufferSizeBytes)
80
+ val record = try {
81
+ AudioRecord(
82
+ MediaRecorder.AudioSource.VOICE_RECOGNITION,
83
+ captureRate,
84
+ AudioFormat.CHANNEL_IN_MONO,
85
+ AudioFormat.ENCODING_PCM_16BIT,
86
+ bufSize
87
+ )
88
+ } catch (e: SecurityException) {
89
+ Log.e(logTag, "start: RECORD_AUDIO permission not granted", e)
90
+ onError("RECORD_AUDIO permission not granted")
91
+ return
92
+ }
93
+ if (record.state != AudioRecord.STATE_INITIALIZED) {
94
+ Log.e(logTag, "start: AudioRecord not initialized")
95
+ onError("AudioRecord failed to initialize")
96
+ record.release()
97
+ return
98
+ }
99
+ audioRecord = record
100
+ running = true
101
+ captureThread = thread(name = "SherpaOnnxPcmCapture") {
102
+ val shortBuf = ShortArray(bufSize / 2)
103
+ try {
104
+ record.startRecording()
105
+ while (running && record.recordingState == AudioRecord.RECORDSTATE_RECORDING) {
106
+ val read = record.read(shortBuf, 0, shortBuf.size)
107
+ if (read <= 0) continue
108
+ val chunk = shortBuf.copyOf(read)
109
+ val toEmit = if (captureRate != targetSampleRate) {
110
+ resampleInt16(chunk, captureRate, targetSampleRate)
111
+ } else {
112
+ chunk
113
+ }
114
+ val byteBuf = ByteBuffer.allocate(toEmit.size * 2).order(ByteOrder.LITTLE_ENDIAN)
115
+ for (s in toEmit) byteBuf.putShort(s)
116
+ val base64 = Base64.encodeToString(byteBuf.array(), Base64.NO_WRAP)
117
+ onChunk(base64, targetSampleRate)
118
+ }
119
+ } catch (e: Exception) {
120
+ if (running) {
121
+ Log.e(logTag, "Capture thread error", e)
122
+ onError(e.message ?: "Capture error")
123
+ }
124
+ } finally {
125
+ try {
126
+ record.stop()
127
+ } catch (_: Exception) { }
128
+ record.release()
129
+ audioRecord = null
130
+ }
131
+ }
132
+ }
133
+
134
+ /** Stop capture and release resources. */
135
+ fun stop() {
136
+ running = false
137
+ // Actively stop AudioRecord to unblock any pending read()
138
+ val record = audioRecord
139
+ if (record != null) {
140
+ try {
141
+ record.stop()
142
+ } catch (_: Exception) {
143
+ // Ignore; the capture thread's finally block also handles stop/release safely
144
+ }
145
+ }
146
+ captureThread?.join(2000)
147
+ captureThread = null
148
+ audioRecord = null
149
+ }
150
+ }
@@ -2,6 +2,7 @@ package com.sherpaonnx
2
2
 
3
3
  import android.content.Context
4
4
  import android.net.Uri
5
+ import android.os.HandlerThread
5
6
  import android.util.Log
6
7
  import com.facebook.react.bridge.Arguments
7
8
  import com.facebook.react.bridge.Promise
@@ -51,6 +52,9 @@ internal class SherpaOnnxSttHelper(
51
52
 
52
53
  private val instances = ConcurrentHashMap<String, SttEngineInstance>()
53
54
 
55
+ private val initThread = HandlerThread("stt-init").also { it.start() }
56
+ private val initHandler = android.os.Handler(initThread.looper)
57
+
54
58
  private fun getInstance(instanceId: String): SttEngineInstance? = instances[instanceId]
55
59
 
56
60
  /** Hotwords are supported for transducer and NeMo transducer models (sherpa-onnx; NeMo: https://github.com/k2-fsa/sherpa-onnx/pull/3077). */
@@ -277,26 +281,33 @@ internal class SherpaOnnxSttHelper(
277
281
  )
278
282
  inst.lastRecognizerConfig = config
279
283
  inst.currentSttModelType = modelTypeStr
280
- inst.recognizer = OfflineRecognizer(config = config)
281
-
282
-
283
-
284
- val resultMap = Arguments.createMap()
285
- resultMap.putBoolean("success", true)
286
- resultMap.putString("modelType", modelTypeStr)
287
- resultMap.putString("decodingMethod", config.decodingMethod)
288
- val detectedModelsArray = Arguments.createArray()
289
- for (model in detectedModels) {
290
- val modelMap = model as? HashMap<*, *>
291
- if (modelMap != null) {
292
- val modelResultMap = Arguments.createMap()
293
- modelResultMap.putString("type", modelMap["type"] as? String ?: "")
294
- modelResultMap.putString("modelDir", modelMap["modelDir"] as? String ?: "")
295
- detectedModelsArray.pushMap(modelResultMap)
284
+ // Defer recognizer creation to the dedicated background thread so release() of the previous
285
+ // recognizer can complete off the UI thread (avoids "destroyed mutex" / SIGSEGV when switching models).
286
+ initHandler.post {
287
+ try {
288
+ inst.recognizer = OfflineRecognizer(config = config)
289
+ val resultMap = Arguments.createMap()
290
+ resultMap.putBoolean("success", true)
291
+ resultMap.putString("modelType", modelTypeStr)
292
+ resultMap.putString("decodingMethod", config.decodingMethod)
293
+ val detectedModelsArray = Arguments.createArray()
294
+ for (model in detectedModels) {
295
+ val modelMap = model as? HashMap<*, *>
296
+ if (modelMap != null) {
297
+ val modelResultMap = Arguments.createMap()
298
+ modelResultMap.putString("type", modelMap["type"] as? String ?: "")
299
+ modelResultMap.putString("modelDir", modelMap["modelDir"] as? String ?: "")
300
+ detectedModelsArray.pushMap(modelResultMap)
301
+ }
302
+ }
303
+ resultMap.putArray("detectedModels", detectedModelsArray)
304
+ promise.resolve(resultMap)
305
+ } catch (e: Exception) {
306
+ val errorMsg = "Exception creating recognizer: ${e.message ?: e.javaClass.simpleName}"
307
+ Log.e(logTag, errorMsg, e)
308
+ promise.reject("INIT_ERROR", errorMsg, e)
296
309
  }
297
310
  }
298
- resultMap.putArray("detectedModels", detectedModelsArray)
299
- promise.resolve(resultMap)
300
311
  } catch (e: Exception) {
301
312
  val errorMsg = "Exception during initialization: ${e.message ?: e.javaClass.simpleName}"
302
313
  Log.e(logTag, errorMsg, e)
@@ -305,6 +316,7 @@ internal class SherpaOnnxSttHelper(
305
316
  }
306
317
 
307
318
  fun transcribeFile(instanceId: String, filePath: String, promise: Promise) {
319
+ var tempPath: String? = null
308
320
  try {
309
321
  val inst = getInstance(instanceId) ?: run {
310
322
  promise.reject("TRANSCRIBE_ERROR", "STT instance not found: $instanceId")
@@ -315,16 +327,46 @@ internal class SherpaOnnxSttHelper(
315
327
  promise.reject("TRANSCRIBE_ERROR", "STT not initialized. Call initializeStt first.")
316
328
  return
317
329
  }
318
- val wave = WaveReader.readWave(filePath)
330
+ val pathToRead = if (filePath.startsWith("content://")) {
331
+ tempPath = resolveContentUriToFile(filePath, "stt_transcribe")
332
+ tempPath
333
+ } else {
334
+ filePath
335
+ }
336
+ if (pathToRead == null || pathToRead.isBlank()) {
337
+ promise.reject("TRANSCRIBE_ERROR", "Could not resolve audio file path")
338
+ return
339
+ }
340
+ val f = File(pathToRead)
341
+ if (!f.exists() || f.length() == 0L) {
342
+ promise.reject("TRANSCRIBE_ERROR", "Audio file does not exist or is empty: $pathToRead (size=${f.length()})")
343
+ return
344
+ }
345
+ val wave = WaveReader.readWave(pathToRead)
346
+ val samples = wave.samples
347
+ if (samples == null || samples.isEmpty()) {
348
+ promise.reject("TRANSCRIBE_ERROR", "Could not read audio samples (file=${f.length()} bytes). The file must be WAV format (use convertAudioToWav16k for MP3/FLAC).")
349
+ return
350
+ }
319
351
  val stream: OfflineStream = rec.createStream()
320
- stream.acceptWaveform(wave.samples, wave.sampleRate)
321
- rec.decode(stream)
322
- val result = rec.getResult(stream)
323
- promise.resolve(resultToWritableMap(result))
352
+ try {
353
+ stream.acceptWaveform(samples, wave.sampleRate)
354
+ rec.decode(stream)
355
+ val result = rec.getResult(stream)
356
+ promise.resolve(resultToWritableMap(result))
357
+ } finally {
358
+ stream.release()
359
+ }
324
360
  } catch (e: Exception) {
325
361
  val message = e.message?.takeIf { it.isNotBlank() } ?: "Failed to transcribe file"
326
362
  Log.e(logTag, "transcribeFile error: $message", e)
327
363
  promise.reject("TRANSCRIBE_ERROR", message, e)
364
+ } finally {
365
+ tempPath?.let { path ->
366
+ try {
367
+ File(path).takeIf { it.exists() }?.delete()
368
+ } catch (_: Exception) { }
369
+ }
328
370
  }
329
371
  }
330
372
 
@@ -588,7 +630,16 @@ internal class SherpaOnnxSttHelper(
588
630
  preprocessor = path(paths, "moonshinePreprocessor"),
589
631
  encoder = path(paths, "moonshineEncoder"),
590
632
  uncachedDecoder = path(paths, "moonshineUncachedDecoder"),
591
- cachedDecoder = path(paths, "moonshineCachedDecoder")
633
+ cachedDecoder = path(paths, "moonshineCachedDecoder"),
634
+ mergedDecoder = ""
635
+ ),
636
+ tokens = path(paths, "tokens"),
637
+ modelType = "moonshine"
638
+ )
639
+ "moonshine_v2" -> OfflineModelConfig(
640
+ moonshine = OfflineMoonshineModelConfig(
641
+ encoder = path(paths, "moonshineEncoder"),
642
+ mergedDecoder = path(paths, "moonshineMergedDecoder")
592
643
  ),
593
644
  tokens = path(paths, "tokens"),
594
645
  modelType = "moonshine"
@@ -31,6 +31,7 @@ import com.k2fsa.sherpa.onnx.OfflineTtsMatchaModelConfig
31
31
  import com.k2fsa.sherpa.onnx.OfflineTtsKokoroModelConfig
32
32
  import com.k2fsa.sherpa.onnx.OfflineTtsKittenModelConfig
33
33
  import java.io.File
34
+ import java.io.FileInputStream
34
35
  import java.io.FileOutputStream
35
36
  import java.io.InputStream
36
37
  import java.io.OutputStream
@@ -74,6 +75,7 @@ internal class SherpaOnnxTtsHelper(
74
75
 
75
76
  fun hasEngine(): Boolean = synchronized(lock) { tts != null || zipvoiceTts != null }
76
77
  val isZipvoice: Boolean get() = synchronized(lock) { zipvoiceTts != null }
78
+ val isPocket: Boolean get() = ttsInitState?.modelType == "pocket"
77
79
  fun releaseEngines() {
78
80
  synchronized(lock) {
79
81
  tts?.release()
@@ -258,7 +260,7 @@ internal class SherpaOnnxTtsHelper(
258
260
 
259
261
  inst.ttsInitState = TtsInitState(
260
262
  modelDir,
261
- modelType,
263
+ modelTypeStr, // detected model type (e.g. "pocket"), not the requested "auto"
262
264
  numThreads.toInt(),
263
265
  debug,
264
266
  noiseScale?.takeUnless { it.isNaN() },
@@ -416,6 +418,11 @@ internal class SherpaOnnxTtsHelper(
416
418
  val config = parseGenerationConfig(options) ?: GenerationConfig(speed = speed, sid = sid)
417
419
  inst.tts!!.generateWithConfig(text, config)
418
420
  }
421
+ inst.isPocket && !hasReferenceOptions(options) -> {
422
+ Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Pocket TTS requires reference audio for voice cloning")
423
+ promise.reject("TTS_GENERATE_ERROR", "Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate in options.")
424
+ return
425
+ }
419
426
  else -> dispatchGenerate(inst, text, sid, speed)
420
427
  ?: run {
421
428
  Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: TTS not initialized")
@@ -469,6 +476,11 @@ internal class SherpaOnnxTtsHelper(
469
476
  val config = parseGenerationConfig(options) ?: GenerationConfig(speed = speed, sid = sid)
470
477
  inst.tts!!.generateWithConfig(text, config)
471
478
  }
479
+ inst.isPocket && !hasReferenceOptions(options) -> {
480
+ Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Pocket TTS requires reference audio for voice cloning")
481
+ promise.reject("TTS_GENERATE_ERROR", "Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate in options.")
482
+ return
483
+ }
472
484
  else -> dispatchGenerate(inst, text, sid, speed)
473
485
  ?: run {
474
486
  Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: TTS not initialized")
@@ -517,6 +529,11 @@ internal class SherpaOnnxTtsHelper(
517
529
  promise.reject("TTS_STREAM_ERROR", "TTS not initialized")
518
530
  return
519
531
  }
532
+ if (inst.isPocket && !hasReferenceOptions(options)) {
533
+ Log.e("SherpaOnnxTts", "TTS_STREAM_ERROR: Pocket TTS requires reference audio for voice cloning")
534
+ promise.reject("TTS_STREAM_ERROR", "Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate in options.")
535
+ return
536
+ }
520
537
  if (hasReferenceOptions(options) && inst.isZipvoice) {
521
538
  Log.e("SherpaOnnxTts", "TTS_STREAM_ERROR: Streaming with reference audio not supported for Zipvoice")
522
539
  promise.reject("TTS_STREAM_ERROR", "Streaming with reference audio not supported for Zipvoice")
@@ -782,6 +799,40 @@ internal class SherpaOnnxTtsHelper(
782
799
  }
783
800
  }
784
801
 
802
+ /**
803
+ * Copy a local file into a document under a SAF directory URI.
804
+ * Format-agnostic: any file (e.g. WAV, MP3, FLAC) can be written.
805
+ * Resolves with the created content URI string.
806
+ */
807
+ fun copyFileToContentUri(
808
+ filePath: String,
809
+ directoryUri: String,
810
+ filename: String,
811
+ mimeType: String,
812
+ promise: Promise
813
+ ) {
814
+ try {
815
+ val file = File(filePath)
816
+ if (!file.isFile || !file.canRead()) {
817
+ promise.reject("TTS_SAVE_ERROR", "File not found or not readable: $filePath")
818
+ return
819
+ }
820
+ val resolver = context.contentResolver
821
+ val dirUri = Uri.parse(directoryUri)
822
+ val fileUri = createDocumentInDirectory(resolver, dirUri, filename, mimeType)
823
+ FileInputStream(file).use { inputStream ->
824
+ resolver.openOutputStream(fileUri, "w")?.use { outputStream ->
825
+ inputStream.copyTo(outputStream)
826
+ outputStream.flush()
827
+ } ?: throw IllegalStateException("Failed to open output stream for URI: $fileUri")
828
+ }
829
+ promise.resolve(fileUri.toString())
830
+ } catch (e: Exception) {
831
+ Log.e("SherpaOnnxTts", "TTS_SAVE_ERROR: Failed to copy file to content URI", e)
832
+ promise.reject("TTS_SAVE_ERROR", "Failed to copy file to content URI", e)
833
+ }
834
+ }
835
+
785
836
  fun copyTtsContentUriToCache(fileUri: String, filename: String, promise: Promise) {
786
837
  try {
787
838
  val resolver = context.contentResolver