npm - react-native-sherpa-onnx - Versions diffs - 0.3.2 → 0.3.4 - Mend

react-native-sherpa-onnx 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (83) hide show

package/README.md +84 -77
package/SherpaOnnx.podspec +79 -45
package/android/build.gradle +8 -2
package/android/prebuilt-download.gradle +70 -16
package/android/prebuilt-versions.gradle +14 -6
package/android/src/main/cpp/CMakeLists.txt +2 -0
package/android/src/main/cpp/jni/audio/sherpa-onnx-audio-convert-jni.cpp +202 -328
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.cpp +22 -0
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.h +2 -0
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.cpp +96 -142
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.h +40 -4
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-stt.cpp +774 -316
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +208 -122
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect.h +92 -0
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.cpp +3 -0
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.cpp +14 -2
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-stt.cpp +229 -0
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-stt.h +38 -0
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-tts.cpp +144 -0
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-tts.h +38 -0
package/android/src/main/cpp/jni/module/sherpa-onnx-module-jni.cpp +1 -1
package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +157 -11
package/android/src/main/java/com/sherpaonnx/SherpaOnnxPcmCapture.kt +150 -0
package/android/src/main/java/com/sherpaonnx/SherpaOnnxSttHelper.kt +75 -24
package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +52 -1
package/ios/SherpaOnnx+PcmLiveStream.mm +288 -0
package/ios/SherpaOnnx+STT.mm +2 -0
package/ios/SherpaOnnx+TTS.mm +17 -0
package/ios/SherpaOnnx.mm +27 -3
package/ios/SherpaOnnxAudioConvert.h +28 -0
package/ios/SherpaOnnxAudioConvert.mm +698 -0
package/ios/archive/sherpa-onnx-archive-helper.mm +12 -0
package/ios/model_detect/sherpa-onnx-model-detect-helper.h +37 -3
package/ios/model_detect/sherpa-onnx-model-detect-helper.mm +80 -45
package/ios/model_detect/sherpa-onnx-model-detect-stt.mm +629 -267
package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +148 -56
package/ios/model_detect/sherpa-onnx-model-detect.h +72 -0
package/ios/model_detect/sherpa-onnx-validate-stt.h +38 -0
package/ios/model_detect/sherpa-onnx-validate-stt.mm +229 -0
package/ios/model_detect/sherpa-onnx-validate-tts.h +38 -0
package/ios/model_detect/sherpa-onnx-validate-tts.mm +144 -0
package/ios/stt/sherpa-onnx-stt-wrapper.mm +4 -0
package/lib/module/NativeSherpaOnnx.js.map +1 -1
package/lib/module/audio/index.js +55 -1
package/lib/module/audio/index.js.map +1 -1
package/lib/module/download/ModelDownloadManager.js +14 -0
package/lib/module/download/ModelDownloadManager.js.map +1 -1
package/lib/module/index.js +10 -0
package/lib/module/index.js.map +1 -1
package/lib/module/stt/streaming.js +6 -3
package/lib/module/stt/streaming.js.map +1 -1
package/lib/module/tts/index.js +13 -1
package/lib/module/tts/index.js.map +1 -1
package/lib/typescript/src/NativeSherpaOnnx.d.ts +32 -3
package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
package/lib/typescript/src/audio/index.d.ts +20 -1
package/lib/typescript/src/audio/index.d.ts.map +1 -1
package/lib/typescript/src/download/ModelDownloadManager.d.ts +2 -1
package/lib/typescript/src/download/ModelDownloadManager.d.ts.map +1 -1
package/lib/typescript/src/index.d.ts +10 -0
package/lib/typescript/src/index.d.ts.map +1 -1
package/lib/typescript/src/stt/streaming.d.ts.map +1 -1
package/lib/typescript/src/stt/streamingTypes.d.ts +1 -1
package/lib/typescript/src/stt/streamingTypes.d.ts.map +1 -1
package/lib/typescript/src/tts/index.d.ts +12 -1
package/lib/typescript/src/tts/index.d.ts.map +1 -1
package/package.json +6 -1
package/scripts/check-model-csvs.sh +72 -0
package/scripts/setup-ios-framework.sh +272 -191
package/src/NativeSherpaOnnx.ts +37 -3
package/src/audio/index.ts +84 -1
package/src/download/ModelDownloadManager.ts +19 -0
package/src/index.tsx +15 -0
package/src/stt/streaming.ts +10 -5
package/src/stt/streamingTypes.ts +1 -1
package/src/tts/index.ts +25 -1
package/third_party/ffmpeg_prebuilt/ANDROID_RELEASE_TAG +1 -1
package/third_party/libarchive_prebuilt/ANDROID_RELEASE_TAG +1 -1
package/third_party/libarchive_prebuilt/IOS_RELEASE_TAG +1 -1
package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -1
package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -1
package/ios/scripts/patch-libarchive-includes.sh +0 -61
package/ios/scripts/setup-ios-libarchive.sh +0 -98

package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt CHANGED Viewed

@@ -1,5 +1,6 @@
 package com.sherpaonnx
+import android.net.Uri
 import com.facebook.react.bridge.ReactApplicationContext
 import com.facebook.react.bridge.Promise
 import com.facebook.react.bridge.ReadableArray
@@ -55,6 +56,7 @@ class SherpaOnnxModule(reactContext: ReactApplicationContext) :
     { instanceId, requestId, cancelled -> emitTtsStreamEnd(instanceId, requestId, cancelled) }
   )
   private val archiveHelper = SherpaOnnxArchiveHelper()
+  private var pcmCapture: SherpaOnnxPcmCapture? = null
   override fun getName(): String {
     return NAME
@@ -62,6 +64,8 @@ class SherpaOnnxModule(reactContext: ReactApplicationContext) :
   override fun onCatalystInstanceDestroy() {
     super.onCatalystInstanceDestroy()
+    pcmCapture?.stop()
+    pcmCapture = null
     onlineSttHelper.shutdown()
     ttsHelper.shutdown()
   }
@@ -139,6 +143,29 @@ class SherpaOnnxModule(reactContext: ReactApplicationContext) :
     }
   }
+  override fun getDeviceQnnSoc(promise: Promise) {
+    try {
+      var soc: String? = null
+      if (android.os.Build.VERSION.SDK_INT >= 31) {
+        val buildClass = Class.forName("android.os.Build")
+        val field = buildClass.getDeclaredField("SOC_MODEL")
+        val value = field.get(null) as? String
+        soc = value?.trim()?.takeIf { it.isNotEmpty() }
+      }
+      val isSupported = soc != null && soc.matches(Regex("^SM8\\d{3}$", RegexOption.IGNORE_CASE))
+      val map = Arguments.createMap()
+      map.putString("soc", soc)
+      map.putBoolean("isSupported", isSupported)
+      promise.resolve(map)
+    } catch (e: Exception) {
+      android.util.Log.w(NAME, "getDeviceQnnSoc: ${e.message}")
+      val map = Arguments.createMap()
+      map.putNull("soc")
+      map.putBoolean("isSupported", false)
+      promise.resolve(map)
+    }
+  }
   /** Asset path for embedded NNAPI test model (ORT testdata: nnapi_internal_uint8_support). */
   private val nnapiTestModelAsset = "testModels/nnapi_internal_uint8_support.onnx"
@@ -319,12 +346,14 @@ class SherpaOnnxModule(reactContext: ReactApplicationContext) :
         return
       }
       val success = result["success"] as? Boolean ?: false
+      val isHardwareSpecificUnsupported = result["isHardwareSpecificUnsupported"] as? Boolean ?: false
       val detectedModels = result["detectedModels"] as? ArrayList<*>
         ?: arrayListOf<HashMap<String, String>>()
       val modelTypeStr = result["modelType"] as? String
       val resultMap = Arguments.createMap()
       resultMap.putBoolean("success", success)
+      resultMap.putBoolean("isHardwareSpecificUnsupported", isHardwareSpecificUnsupported)
       val modelsArray = Arguments.createArray()
       for (model in detectedModels) {
         val modelMap = model as? HashMap<*, *>
@@ -484,6 +513,71 @@ class SherpaOnnxModule(reactContext: ReactApplicationContext) :
     onlineSttHelper.processSttAudioChunk(streamId, samples, sampleRate.toInt(), promise)
   }
+  override fun startPcmLiveStream(options: ReadableMap, promise: Promise) {
+    try {
+      pcmCapture?.stop()
+      pcmCapture = null
+      val sampleRate = options.getDouble("sampleRate").toInt().takeIf { it > 0 } ?: 16000
+      val channelCount = if (options.hasKey("channelCount")) options.getDouble("channelCount").toInt().coerceIn(1, 2) else 1
+      val bufferSizeFrames = if (options.hasKey("bufferSizeFrames")) options.getDouble("bufferSizeFrames").toInt() else 0
+      var startError: String? = null
+      var started = false
+      val capture = SherpaOnnxPcmCapture(
+        targetSampleRate = sampleRate,
+        channelCount = channelCount,
+        bufferSizeFrames = bufferSizeFrames,
+        onChunk = { base64Pcm, sr -> emitPcmLiveStreamData(base64Pcm, sr) },
+        onError = { msg ->
+          if (!started) {
+            startError = msg
+          } else {
+            emitPcmLiveStreamError(msg)
+          }
+        },
+        logTag = NAME
+      )
+      pcmCapture = capture
+      capture.start()
+      started = true
+      val err = startError
+      if (err != null) {
+        promise.reject("PCM_LIVE_STREAM_ERROR", err)
+      } else {
+        promise.resolve(null)
+      }
+    } catch (e: Exception) {
+      android.util.Log.e(NAME, "startPcmLiveStream failed", e)
+      promise.reject("PCM_LIVE_STREAM_ERROR", e.message ?: "Failed to start PCM capture", e)
+    }
+  }
+  override fun stopPcmLiveStream(promise: Promise) {
+    try {
+      pcmCapture?.stop()
+      pcmCapture = null
+      promise.resolve(null)
+    } catch (e: Exception) {
+      promise.reject("PCM_LIVE_STREAM_ERROR", e.message ?: "Failed to stop PCM capture", e)
+    }
+  }
+  private fun emitPcmLiveStreamData(base64Pcm: String, sampleRate: Int) {
+    val eventEmitter = reactApplicationContext
+      .getJSModule(DeviceEventManagerModule.RCTDeviceEventEmitter::class.java)
+    val payload = Arguments.createMap()
+    payload.putString("base64Pcm", base64Pcm)
+    payload.putInt("sampleRate", sampleRate)
+    eventEmitter.emit("pcmLiveStreamData", payload)
+  }
+  private fun emitPcmLiveStreamError(message: String) {
+    val eventEmitter = reactApplicationContext
+      .getJSModule(DeviceEventManagerModule.RCTDeviceEventEmitter::class.java)
+    val payload = Arguments.createMap()
+    payload.putString("message", message)
+    eventEmitter.emit("pcmLiveStreamError", payload)
+  }
   // ==================== STT Methods ====================
   /**
@@ -507,17 +601,34 @@ class SherpaOnnxModule(reactContext: ReactApplicationContext) :
     sttHelper.setSttConfig(instanceId, options, promise)
   }
+  /**
+   * If inputPath is a content:// URI, copies it to a temp file via ContentResolver.openInputStream.
+   * Caller deletes the returned temp file in a finally block.
+   */
+  private fun resolveInputForConvert(inputPath: String): Pair<String, java.io.File?> {
+    if (!inputPath.startsWith("content://")) return Pair(inputPath, null)
+    val uri = Uri.parse(inputPath)
+    val resolver = reactApplicationContext.contentResolver
+    val ext = android.webkit.MimeTypeMap.getSingleton()
+      .getExtensionFromMimeType(resolver.getType(uri)) ?: "tmp"
+    val tmp = java.io.File(reactApplicationContext.cacheDir, "convert_${System.nanoTime()}.$ext")
+    resolver.openInputStream(uri)?.use { input ->
+      tmp.outputStream().use { output -> input.copyTo(output) }
+    } ?: throw IllegalStateException("Content URI not readable: $inputPath")
+    return Pair(tmp.absolutePath, tmp)
+  }
   /**
    * Convert any supported audio file to a requested format using native FFmpeg prebuilts.
-   * For MP3, outputSampleRateHz can be 32000, 44100, or 48000; null/0 = 44100. WAV output is always 16 kHz mono.
-   * Resolves with null on success, rejects with an error message on failure.
+   * Accepts file paths and content:// URIs. Content URIs are transparently copied to a
+   * temp file first (via ContentResolver), converted, then the temp file is deleted.
    */
   override fun convertAudioToFormat(inputPath: String, outputPath: String, format: String, outputSampleRateHz: Double?, promise: Promise) {
+    var tmpFile: java.io.File? = null
     try {
       var rate = outputSampleRateHz?.toInt() ?: 0
       if (rate < 0) {
-        android.util.Log.e(NAME, "CONVERT_ERROR: Invalid outputSampleRateHz: must be >= 0")
         promise.reject("CONVERT_ERROR", "Invalid outputSampleRateHz: must be >= 0")
         return
       }
@@ -525,43 +636,57 @@ class SherpaOnnxModule(reactContext: ReactApplicationContext) :
       if (format.equals("mp3", ignoreCase = true)) {
         val allowed = setOf(0, 32000, 44100, 48000)
         if (!allowed.contains(rate)) {
-            android.util.Log.e(NAME, "CONVERT_ERROR: MP3 output sample rate invalid: $rate")
-            promise.reject("CONVERT_ERROR", "MP3 output sample rate must be one of 32000, 44100, 48000, or 0 (default). Received: $rate")
+          promise.reject("CONVERT_ERROR", "MP3 output sample rate must be one of 32000, 44100, 48000, or 0 (default). Received: $rate")
+          return
+        }
+      } else if (format.equals("opus", ignoreCase = true) || format.equals("oggm", ignoreCase = true) || format.equals("webm", ignoreCase = true) || format.equals("mkv", ignoreCase = true) || format.equals("ogg", ignoreCase = true)) {
+        val allowed = setOf(0, 8000, 12000, 16000, 24000, 48000)
+        if (!allowed.contains(rate)) {
+          promise.reject("CONVERT_ERROR", "Opus output sample rate must be 8000, 12000, 16000, 24000, 48000, or 0 (default). Received: $rate")
           return
         }
       } else {
         rate = rate.coerceIn(0, 48000)
       }
-      val err = Companion.nativeConvertAudioToFormat(inputPath, outputPath, format, rate)
+      val (pathToUse, tmp) = resolveInputForConvert(inputPath)
+      tmpFile = tmp
+      val err = Companion.nativeConvertAudioToFormat(pathToUse, outputPath, format, rate)
       if (err.isEmpty()) {
         promise.resolve(null)
       } else {
-        android.util.Log.e(NAME, "CONVERT_ERROR: $err")
+        android.util.Log.e(NAME, "CONVERT_ERROR: $err (inputPath=$inputPath)")
         promise.reject("CONVERT_ERROR", err)
       }
     } catch (e: Exception) {
       android.util.Log.e(NAME, "CONVERT_EXCEPTION: Failed to convert audio: ${e.message}", e)
       promise.reject("CONVERT_EXCEPTION", "Failed to convert audio: ${e.message}", e)
+    } finally {
+      tmpFile?.delete()
     }
   }
   /**
    * Convert any supported audio file to WAV 16 kHz mono 16-bit PCM using native FFmpeg prebuilts.
-   * Resolves with null on success, rejects with an error message on failure.
+   * Accepts file paths and content:// URIs. Content URIs are copied to a temp file first.
    */
   override fun convertAudioToWav16k(inputPath: String, outputPath: String, promise: Promise) {
+    var tmpFile: java.io.File? = null
     try {
-      val err = Companion.nativeConvertAudioToWav16k(inputPath, outputPath)
+      val (pathToUse, tmp) = resolveInputForConvert(inputPath)
+      tmpFile = tmp
+      val err = Companion.nativeConvertAudioToWav16k(pathToUse, outputPath)
       if (err.isEmpty()) {
         promise.resolve(null)
       } else {
-            android.util.Log.e(NAME, "CONVERT_ERROR: $err")
-            promise.reject("CONVERT_ERROR", err)
+        android.util.Log.e(NAME, "CONVERT_ERROR: $err")
+        promise.reject("CONVERT_ERROR", err)
       }
     } catch (e: Exception) {
       android.util.Log.e(NAME, "CONVERT_EXCEPTION: Failed to convert audio to WAV16k: ${e.message}", e)
       promise.reject("CONVERT_EXCEPTION", "Failed to convert audio to WAV16k: ${e.message}", e)
+    } finally {
+      tmpFile?.delete()
     }
   }
@@ -642,6 +767,14 @@ class SherpaOnnxModule(reactContext: ReactApplicationContext) :
           resultMap.putString("error", error)
         }
       }
+      val lexiconLanguageCandidates = result["lexiconLanguageCandidates"] as? ArrayList<*>
+      if (!lexiconLanguageCandidates.isNullOrEmpty()) {
+        val candidatesArray = Arguments.createArray()
+        for (c in lexiconLanguageCandidates) {
+          (c as? String)?.let { candidatesArray.pushString(it) }
+        }
+        resultMap.putArray("lexiconLanguageCandidates", candidatesArray)
+      }
       promise.resolve(resultMap)
     } catch (e: Exception) {
       android.util.Log.e(NAME, "DETECT_ERROR: TTS model detection failed: ${e.message}", e)
@@ -801,6 +934,19 @@ class SherpaOnnxModule(reactContext: ReactApplicationContext) :
     ttsHelper.saveTtsAudioToContentUri(samples, sampleRate, directoryUri, filename, promise)
   }
+  /**
+   * Copy a local file into a document under a SAF directory URI (format-agnostic).
+   */
+  override fun copyFileToContentUri(
+    filePath: String,
+    directoryUri: String,
+    filename: String,
+    mimeType: String,
+    promise: Promise
+  ) {
+    ttsHelper.copyFileToContentUri(filePath, directoryUri, filename, mimeType, promise)
+  }
   /**
    * Save text content to a file via Android SAF content URI.
    */

package/android/src/main/java/com/sherpaonnx/SherpaOnnxPcmCapture.kt ADDED Viewed

@@ -0,0 +1,150 @@
+package com.sherpaonnx
+import android.media.AudioFormat
+import android.media.AudioRecord
+import android.media.MediaRecorder
+import android.util.Base64
+import android.util.Log
+import java.nio.ByteBuffer
+import java.nio.ByteOrder
+import kotlin.concurrent.thread
+import kotlin.math.round
+/**
+ * Native PCM capture from the microphone with optional resampling to a target sample rate.
+ * Captures at a supported hardware rate (e.g. 44100 or 48000 Hz), then resamples to the
+ * requested rate so the app always receives PCM at the same sample rate (e.g. 16000 for STT).
+ */
+class SherpaOnnxPcmCapture(
+  private val targetSampleRate: Int,
+  private val channelCount: Int,
+  private val bufferSizeFrames: Int,
+  private val onChunk: (base64Pcm: String, sampleRate: Int) -> Unit,
+  private val onError: (message: String) -> Unit,
+  private val logTag: String = "SherpaOnnxPcmCapture"
+) {
+  private var audioRecord: AudioRecord? = null
+  @Volatile
+  private var running = false
+  private var captureThread: Thread? = null
+  companion object {
+    /** Supported capture sample rates to try in order (device-dependent). */
+    private val CAPTURE_RATES = intArrayOf(16000, 44100, 48000)
+    /**
+     * Resample Int16 PCM from capture rate to target rate using linear interpolation.
+     * Returns a new ByteArray of Int16 samples at target rate.
+     */
+    private fun resampleInt16(
+      input: ShortArray,
+      fromRate: Int,
+      toRate: Int
+    ): ShortArray {
+      if (fromRate == toRate) return input
+      val ratio = fromRate.toDouble() / toRate
+      val outLength = round(input.size / ratio).toInt().coerceAtLeast(0)
+      val result = ShortArray(outLength)
+      for (i in 0 until outLength) {
+        val srcIdx = i * ratio
+        val idx0 = srcIdx.toInt().coerceIn(0, input.size - 1)
+        val idx1 = (idx0 + 1).coerceAtMost(input.size - 1)
+        val frac = (srcIdx - idx0).toFloat()
+        val v0 = input[idx0].toInt()
+        val v1 = input[idx1].toInt()
+        result[i] = (v0 + (v1 - v0) * frac).toInt().toShort()
+      }
+      return result
+    }
+  }
+  /**
+   * Start capture. Uses a supported hardware rate and resamples to [targetSampleRate] before emitting.
+   */
+  fun start() {
+    if (running) {
+      Log.w(logTag, "start: already running")
+      return
+    }
+    val bufferSizeBytes = if (bufferSizeFrames > 0) {
+      bufferSizeFrames * 2 // 2 bytes per sample (16-bit mono)
+    } else {
+      (0.1 * targetSampleRate).toInt() * 2 // 0.1 s default (16-bit mono)
+    }
+    val captureRate = CAPTURE_RATES.firstOrNull { rate ->
+      val size = AudioRecord.getMinBufferSize(rate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT)
+      size != AudioRecord.ERROR && size != AudioRecord.ERROR_BAD_VALUE
+    } ?: 44100
+    val minBuf = AudioRecord.getMinBufferSize(captureRate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT)
+    val bufSize = minBuf.coerceAtLeast(bufferSizeBytes)
+    val record = try {
+      AudioRecord(
+        MediaRecorder.AudioSource.VOICE_RECOGNITION,
+        captureRate,
+        AudioFormat.CHANNEL_IN_MONO,
+        AudioFormat.ENCODING_PCM_16BIT,
+        bufSize
+      )
+    } catch (e: SecurityException) {
+      Log.e(logTag, "start: RECORD_AUDIO permission not granted", e)
+      onError("RECORD_AUDIO permission not granted")
+      return
+    }
+    if (record.state != AudioRecord.STATE_INITIALIZED) {
+      Log.e(logTag, "start: AudioRecord not initialized")
+      onError("AudioRecord failed to initialize")
+      record.release()
+      return
+    }
+    audioRecord = record
+    running = true
+    captureThread = thread(name = "SherpaOnnxPcmCapture") {
+      val shortBuf = ShortArray(bufSize / 2)
+      try {
+        record.startRecording()
+        while (running && record.recordingState == AudioRecord.RECORDSTATE_RECORDING) {
+          val read = record.read(shortBuf, 0, shortBuf.size)
+          if (read <= 0) continue
+          val chunk = shortBuf.copyOf(read)
+          val toEmit = if (captureRate != targetSampleRate) {
+            resampleInt16(chunk, captureRate, targetSampleRate)
+          } else {
+            chunk
+          }
+          val byteBuf = ByteBuffer.allocate(toEmit.size * 2).order(ByteOrder.LITTLE_ENDIAN)
+          for (s in toEmit) byteBuf.putShort(s)
+          val base64 = Base64.encodeToString(byteBuf.array(), Base64.NO_WRAP)
+          onChunk(base64, targetSampleRate)
+        }
+      } catch (e: Exception) {
+        if (running) {
+          Log.e(logTag, "Capture thread error", e)
+          onError(e.message ?: "Capture error")
+        }
+      } finally {
+        try {
+          record.stop()
+        } catch (_: Exception) { }
+        record.release()
+        audioRecord = null
+      }
+    }
+  }
+  /** Stop capture and release resources. */
+  fun stop() {
+    running = false
+    // Actively stop AudioRecord to unblock any pending read()
+    val record = audioRecord
+    if (record != null) {
+      try {
+        record.stop()
+      } catch (_: Exception) {
+        // Ignore; the capture thread's finally block also handles stop/release safely
+      }
+    }
+    captureThread?.join(2000)
+    captureThread = null
+    audioRecord = null
+  }
+}

package/android/src/main/java/com/sherpaonnx/SherpaOnnxSttHelper.kt CHANGED Viewed

@@ -2,6 +2,7 @@ package com.sherpaonnx
 import android.content.Context
 import android.net.Uri
+import android.os.HandlerThread
 import android.util.Log
 import com.facebook.react.bridge.Arguments
 import com.facebook.react.bridge.Promise
@@ -51,6 +52,9 @@ internal class SherpaOnnxSttHelper(
   private val instances = ConcurrentHashMap<String, SttEngineInstance>()
+  private val initThread = HandlerThread("stt-init").also { it.start() }
+  private val initHandler = android.os.Handler(initThread.looper)
   private fun getInstance(instanceId: String): SttEngineInstance? = instances[instanceId]
   /** Hotwords are supported for transducer and NeMo transducer models (sherpa-onnx; NeMo: https://github.com/k2-fsa/sherpa-onnx/pull/3077). */
@@ -277,26 +281,33 @@ internal class SherpaOnnxSttHelper(
       )
       inst.lastRecognizerConfig = config
       inst.currentSttModelType = modelTypeStr
-      inst.recognizer = OfflineRecognizer(config = config)
-      val resultMap = Arguments.createMap()
-      resultMap.putBoolean("success", true)
-      resultMap.putString("modelType", modelTypeStr)
-      resultMap.putString("decodingMethod", config.decodingMethod)
-      val detectedModelsArray = Arguments.createArray()
-      for (model in detectedModels) {
-        val modelMap = model as? HashMap<*, *>
-        if (modelMap != null) {
-          val modelResultMap = Arguments.createMap()
-          modelResultMap.putString("type", modelMap["type"] as? String ?: "")
-          modelResultMap.putString("modelDir", modelMap["modelDir"] as? String ?: "")
-          detectedModelsArray.pushMap(modelResultMap)
+      // Defer recognizer creation to the dedicated background thread so release() of the previous
+      // recognizer can complete off the UI thread (avoids "destroyed mutex" / SIGSEGV when switching models).
+      initHandler.post {
+        try {
+          inst.recognizer = OfflineRecognizer(config = config)
+          val resultMap = Arguments.createMap()
+          resultMap.putBoolean("success", true)
+          resultMap.putString("modelType", modelTypeStr)
+          resultMap.putString("decodingMethod", config.decodingMethod)
+          val detectedModelsArray = Arguments.createArray()
+          for (model in detectedModels) {
+            val modelMap = model as? HashMap<*, *>
+            if (modelMap != null) {
+              val modelResultMap = Arguments.createMap()
+              modelResultMap.putString("type", modelMap["type"] as? String ?: "")
+              modelResultMap.putString("modelDir", modelMap["modelDir"] as? String ?: "")
+              detectedModelsArray.pushMap(modelResultMap)
+            }
+          }
+          resultMap.putArray("detectedModels", detectedModelsArray)
+          promise.resolve(resultMap)
+        } catch (e: Exception) {
+          val errorMsg = "Exception creating recognizer: ${e.message ?: e.javaClass.simpleName}"
+          Log.e(logTag, errorMsg, e)
+          promise.reject("INIT_ERROR", errorMsg, e)
         }
       }
-      resultMap.putArray("detectedModels", detectedModelsArray)
-      promise.resolve(resultMap)
     } catch (e: Exception) {
       val errorMsg = "Exception during initialization: ${e.message ?: e.javaClass.simpleName}"
       Log.e(logTag, errorMsg, e)
@@ -305,6 +316,7 @@ internal class SherpaOnnxSttHelper(
   }
   fun transcribeFile(instanceId: String, filePath: String, promise: Promise) {
+    var tempPath: String? = null
     try {
       val inst = getInstance(instanceId) ?: run {
         promise.reject("TRANSCRIBE_ERROR", "STT instance not found: $instanceId")
@@ -315,16 +327,46 @@ internal class SherpaOnnxSttHelper(
         promise.reject("TRANSCRIBE_ERROR", "STT not initialized. Call initializeStt first.")
         return
       }
-      val wave = WaveReader.readWave(filePath)
+      val pathToRead = if (filePath.startsWith("content://")) {
+        tempPath = resolveContentUriToFile(filePath, "stt_transcribe")
+        tempPath
+      } else {
+        filePath
+      }
+      if (pathToRead == null || pathToRead.isBlank()) {
+        promise.reject("TRANSCRIBE_ERROR", "Could not resolve audio file path")
+        return
+      }
+      val f = File(pathToRead)
+      if (!f.exists() || f.length() == 0L) {
+        promise.reject("TRANSCRIBE_ERROR", "Audio file does not exist or is empty: $pathToRead (size=${f.length()})")
+        return
+      }
+      val wave = WaveReader.readWave(pathToRead)
+      val samples = wave.samples
+      if (samples == null || samples.isEmpty()) {
+        promise.reject("TRANSCRIBE_ERROR", "Could not read audio samples (file=${f.length()} bytes). The file must be WAV format (use convertAudioToWav16k for MP3/FLAC).")
+        return
+      }
       val stream: OfflineStream = rec.createStream()
-      stream.acceptWaveform(wave.samples, wave.sampleRate)
-      rec.decode(stream)
-      val result = rec.getResult(stream)
-      promise.resolve(resultToWritableMap(result))
+      try {
+        stream.acceptWaveform(samples, wave.sampleRate)
+        rec.decode(stream)
+        val result = rec.getResult(stream)
+        promise.resolve(resultToWritableMap(result))
+      } finally {
+        stream.release()
+      }
     } catch (e: Exception) {
       val message = e.message?.takeIf { it.isNotBlank() } ?: "Failed to transcribe file"
       Log.e(logTag, "transcribeFile error: $message", e)
       promise.reject("TRANSCRIBE_ERROR", message, e)
+    } finally {
+      tempPath?.let { path ->
+        try {
+          File(path).takeIf { it.exists() }?.delete()
+        } catch (_: Exception) { }
+      }
     }
   }
@@ -588,7 +630,16 @@ internal class SherpaOnnxSttHelper(
           preprocessor = path(paths, "moonshinePreprocessor"),
           encoder = path(paths, "moonshineEncoder"),
           uncachedDecoder = path(paths, "moonshineUncachedDecoder"),
-          cachedDecoder = path(paths, "moonshineCachedDecoder")
+          cachedDecoder = path(paths, "moonshineCachedDecoder"),
+          mergedDecoder = ""
+        ),
+        tokens = path(paths, "tokens"),
+        modelType = "moonshine"
+      )
+      "moonshine_v2" -> OfflineModelConfig(
+        moonshine = OfflineMoonshineModelConfig(
+          encoder = path(paths, "moonshineEncoder"),
+          mergedDecoder = path(paths, "moonshineMergedDecoder")
         ),
         tokens = path(paths, "tokens"),
         modelType = "moonshine"

package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt CHANGED Viewed

@@ -31,6 +31,7 @@ import com.k2fsa.sherpa.onnx.OfflineTtsMatchaModelConfig
 import com.k2fsa.sherpa.onnx.OfflineTtsKokoroModelConfig
 import com.k2fsa.sherpa.onnx.OfflineTtsKittenModelConfig
 import java.io.File
+import java.io.FileInputStream
 import java.io.FileOutputStream
 import java.io.InputStream
 import java.io.OutputStream
@@ -74,6 +75,7 @@ internal class SherpaOnnxTtsHelper(
     fun hasEngine(): Boolean = synchronized(lock) { tts != null || zipvoiceTts != null }
     val isZipvoice: Boolean get() = synchronized(lock) { zipvoiceTts != null }
+    val isPocket: Boolean get() = ttsInitState?.modelType == "pocket"
     fun releaseEngines() {
       synchronized(lock) {
         tts?.release()
@@ -258,7 +260,7 @@ internal class SherpaOnnxTtsHelper(
       inst.ttsInitState = TtsInitState(
         modelDir,
-        modelType,
+        modelTypeStr,  // detected model type (e.g. "pocket"), not the requested "auto"
         numThreads.toInt(),
         debug,
         noiseScale?.takeUnless { it.isNaN() },
@@ -416,6 +418,11 @@ internal class SherpaOnnxTtsHelper(
           val config = parseGenerationConfig(options) ?: GenerationConfig(speed = speed, sid = sid)
           inst.tts!!.generateWithConfig(text, config)
         }
+        inst.isPocket && !hasReferenceOptions(options) -> {
+          Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Pocket TTS requires reference audio for voice cloning")
+          promise.reject("TTS_GENERATE_ERROR", "Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate in options.")
+          return
+        }
         else -> dispatchGenerate(inst, text, sid, speed)
           ?: run {
             Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: TTS not initialized")
@@ -469,6 +476,11 @@ internal class SherpaOnnxTtsHelper(
           val config = parseGenerationConfig(options) ?: GenerationConfig(speed = speed, sid = sid)
           inst.tts!!.generateWithConfig(text, config)
         }
+        inst.isPocket && !hasReferenceOptions(options) -> {
+          Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: Pocket TTS requires reference audio for voice cloning")
+          promise.reject("TTS_GENERATE_ERROR", "Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate in options.")
+          return
+        }
         else -> dispatchGenerate(inst, text, sid, speed)
           ?: run {
             Log.e("SherpaOnnxTts", "TTS_GENERATE_ERROR: TTS not initialized")
@@ -517,6 +529,11 @@ internal class SherpaOnnxTtsHelper(
       promise.reject("TTS_STREAM_ERROR", "TTS not initialized")
       return
     }
+    if (inst.isPocket && !hasReferenceOptions(options)) {
+      Log.e("SherpaOnnxTts", "TTS_STREAM_ERROR: Pocket TTS requires reference audio for voice cloning")
+      promise.reject("TTS_STREAM_ERROR", "Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate in options.")
+      return
+    }
     if (hasReferenceOptions(options) && inst.isZipvoice) {
       Log.e("SherpaOnnxTts", "TTS_STREAM_ERROR: Streaming with reference audio not supported for Zipvoice")
       promise.reject("TTS_STREAM_ERROR", "Streaming with reference audio not supported for Zipvoice")
@@ -782,6 +799,40 @@ internal class SherpaOnnxTtsHelper(
     }
   }
+  /**
+   * Copy a local file into a document under a SAF directory URI.
+   * Format-agnostic: any file (e.g. WAV, MP3, FLAC) can be written.
+   * Resolves with the created content URI string.
+   */
+  fun copyFileToContentUri(
+    filePath: String,
+    directoryUri: String,
+    filename: String,
+    mimeType: String,
+    promise: Promise
+  ) {
+    try {
+      val file = File(filePath)
+      if (!file.isFile || !file.canRead()) {
+        promise.reject("TTS_SAVE_ERROR", "File not found or not readable: $filePath")
+        return
+      }
+      val resolver = context.contentResolver
+      val dirUri = Uri.parse(directoryUri)
+      val fileUri = createDocumentInDirectory(resolver, dirUri, filename, mimeType)
+      FileInputStream(file).use { inputStream ->
+        resolver.openOutputStream(fileUri, "w")?.use { outputStream ->
+          inputStream.copyTo(outputStream)
+          outputStream.flush()
+        } ?: throw IllegalStateException("Failed to open output stream for URI: $fileUri")
+      }
+      promise.resolve(fileUri.toString())
+    } catch (e: Exception) {
+      Log.e("SherpaOnnxTts", "TTS_SAVE_ERROR: Failed to copy file to content URI", e)
+      promise.reject("TTS_SAVE_ERROR", "Failed to copy file to content URI", e)
+    }
+  }
   fun copyTtsContentUriToCache(fileUri: String, filename: String, promise: Promise) {
     try {
       val resolver = context.contentResolver