npm - react-native-sherpa-onnx - Versions diffs - 0.3.0 → 0.3.2 - Mend

react-native-sherpa-onnx 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

package/README.md +21 -7
package/SherpaOnnx.podspec +1 -1
package/android/build.gradle +35 -26
package/android/prebuilt-download.gradle +27 -14
package/android/src/main/cpp/CMakeLists.txt +51 -17
package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.cpp +14 -0
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.cpp +16 -0
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.h +3 -0
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-stt.cpp +19 -2
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect.h +2 -1
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.cpp +1 -0
package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +114 -8
package/android/src/main/java/com/sherpaonnx/SherpaOnnxOnlineSttHelper.kt +535 -0
package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +10 -10
package/ios/SherpaOnnx+OnlineSTT.mm +365 -0
package/ios/SherpaOnnx+TTS.mm +35 -9
package/ios/SherpaOnnx.mm +6 -0
package/ios/model_detect/sherpa-onnx-model-detect-helper.h +3 -0
package/ios/model_detect/sherpa-onnx-model-detect-helper.mm +16 -0
package/ios/model_detect/sherpa-onnx-model-detect-stt.mm +19 -2
package/ios/model_detect/sherpa-onnx-model-detect.h +2 -1
package/ios/online_stt/sherpa-onnx-online-stt-wrapper.h +85 -0
package/ios/online_stt/sherpa-onnx-online-stt-wrapper.mm +270 -0
package/lib/module/NativeSherpaOnnx.js.map +1 -1
package/lib/module/index.js +2 -2
package/lib/module/stt/index.js +4 -0
package/lib/module/stt/index.js.map +1 -1
package/lib/module/stt/streaming.js +257 -0
package/lib/module/stt/streaming.js.map +1 -0
package/lib/module/stt/streamingTypes.js +38 -0
package/lib/module/stt/streamingTypes.js.map +1 -0
package/lib/module/tts/index.js +4 -43
package/lib/module/tts/index.js.map +1 -1
package/lib/module/tts/streaming.js +220 -0
package/lib/module/tts/streaming.js.map +1 -0
package/lib/module/tts/streamingTypes.js +4 -0
package/lib/module/tts/streamingTypes.js.map +1 -0
package/lib/module/tts/types.js +8 -1
package/lib/module/tts/types.js.map +1 -1
package/lib/typescript/src/NativeSherpaOnnx.d.ts +66 -1
package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
package/lib/typescript/src/stt/index.d.ts +3 -0
package/lib/typescript/src/stt/index.d.ts.map +1 -1
package/lib/typescript/src/stt/streaming.d.ts +42 -0
package/lib/typescript/src/stt/streaming.d.ts.map +1 -0
package/lib/typescript/src/stt/streamingTypes.d.ts +122 -0
package/lib/typescript/src/stt/streamingTypes.d.ts.map +1 -0
package/lib/typescript/src/tts/index.d.ts +3 -1
package/lib/typescript/src/tts/index.d.ts.map +1 -1
package/lib/typescript/src/tts/streaming.d.ts +24 -0
package/lib/typescript/src/tts/streaming.d.ts.map +1 -0
package/lib/typescript/src/tts/streamingTypes.d.ts +27 -0
package/lib/typescript/src/tts/streamingTypes.d.ts.map +1 -0
package/lib/typescript/src/tts/types.d.ts +19 -6
package/lib/typescript/src/tts/types.d.ts.map +1 -1
package/package.json +1 -2
package/src/NativeSherpaOnnx.ts +95 -0
package/src/index.tsx +2 -2
package/src/stt/index.ts +17 -0
package/src/stt/streaming.ts +361 -0
package/src/stt/streamingTypes.ts +151 -0
package/src/tts/index.ts +6 -66
package/src/tts/streaming.ts +336 -0
package/src/tts/streamingTypes.ts +54 -0
package/src/tts/types.ts +20 -10
package/android/codegen.gradle +0 -57

package/README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # react-native-sherpa-onnx
-React Native SDK for sherpa-onnx - providing offline speech processing capabilities
+React Native SDK for sherpa-onnx – offline and streaming speech processing
 <div align="center">
   <img src="./docs/images/banner.png" alt="Banner" width="560" />
@@ -19,7 +19,7 @@ React Native SDK for sherpa-onnx - providing offline speech processing capabilit
 > **⚠️ SDK 0.3.0 – Breaking changes from 0.2.0**
 > Since the last release I have restructured and improved the SDK significantly: full iOS support, smoother behaviour, fewer failure points, and a much smaller footprint (~95% size reduction). As a result, **logic and the public API have changed**. If you are upgrading from 0.2.x, please follow the [Breaking changes (upgrading to 0.3.0)](docs/migration.md#breaking-changes-upgrading-to-030) section and the updated API documentation
-A React Native TurboModule that provides offline speech processing capabilities using [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx). The SDK aims to support all functionalities that sherpa-onnx offers, including offline speech-to-text, text-to-speech, speaker diarization, speech enhancement, source separation, and VAD (Voice Activity Detection).
+A React Native TurboModule that provides offline and streaming speech processing capabilities using [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx). The SDK aims to support all functionalities that sherpa-onnx offers, including offline and **online (streaming)** speech-to-text, text-to-speech (batch and streaming), speaker diarization, speech enhancement, source separation, and VAD (Voice Activity Detection).
 ## Table of contents
@@ -47,8 +47,10 @@ A React Native TurboModule that provides offline speech processing capabilities
 | Feature | Status | Notes |
 |---------|--------|-------|
-| Offline Speech-to-Text | ✅ **Supported** | No internet required; multiple model types (Zipformer, Paraformer, Whisper, etc.). See [Supported Model Types](#supported-model-types). |
-| Text-to-Speech | ✅ **Supported** | Multiple model types (VITS, Matcha, Kokoro, etc.). See [Supported Model Types](#supported-model-types). |
+| Offline Speech-to-Text | ✅ **Supported** | No internet required; multiple model types (Zipformer, Paraformer, Whisper, etc.). See [Supported Model Types](#supported-model-types) and [STT documentation](./docs/stt.md). |
+| Online (streaming) Speech-to-Text | ✅ **Supported** | Real-time recognition from microphone or stream; partial results, endpoint detection. Use streaming-capable models (e.g. transducer, paraformer). See [Streaming STT](./docs/stt_streaming.md). |
+| Text-to-Speech | ✅ **Supported** | Multiple model types (VITS, Matcha, Kokoro, etc.). See [Supported Model Types](#supported-model-types) and [TTS documentation](./docs/tts.md). |
+| Streaming Text-to-Speech | ✅ **Supported** | Incremental speech generation for low time-to-first-byte and playback while generating. See [Streaming TTS](./docs/tts_streaming.md). |
 | Execution providers (CPU, NNAPI, XNNPACK, Core ML, QNN) | ✅ **Supported** | See [Execution provider support](./docs/execution-providers.md). |
 | Play Asset Delivery (PAD) | ✅ **Supported** | Android only. See [Model Setup](./docs/MODEL_SETUP.md). |
 | Automatic Model type detection | ✅ **Supported** | `detectSttModel()` and `detectTtsModel()` for a path. See [Model Setup: Model type detection](./docs/MODEL_SETUP.md#model-type-detection-without-initialization). |
@@ -80,6 +82,9 @@ A React Native TurboModule that provides offline speech processing capabilities
 | **WeNet CTC**            | `'wenet_ctc'`     | Requires `model.onnx` (or `model.int8.onnx`) and `tokens.txt`                            | [Download](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/wenet/index.html)  |
 | **SenseVoice**           | `'sense_voice'`   | Requires `model.onnx` (or `model.int8.onnx`) and `tokens.txt`                            | [Download](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/sense-voice/index.html)        |
 | **FunASR Nano**          | `'funasr_nano'`   | Requires `encoder_adaptor.onnx`, `llm.onnx`, `embedding.onnx`, and `tokenizer` directory | [Download](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/funasr-nano/index.html)        |
+| **Tone CTC (t-one)**     | `'tone_ctc'`      | Single `model.onnx` + `tokens.txt`. Folder name usually contains `t-one`, `t_one` or `tone` | [Download](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-ctc/index.html) |
+For **real-time (streaming) recognition** from a microphone or audio stream, use streaming-capable model types: `transducer`, `paraformer`, `zipformer2_ctc`, `nemo_ctc`, or `tone_ctc`. See [Streaming (Online) Speech-to-Text](./docs/stt_streaming.md).
 ### Text-to-Speech (TTS) Models
@@ -92,6 +97,8 @@ A React Native TurboModule that provides offline speech processing capabilities
 | **Zipvoice**     | `'zipvoice'`      | Voice cloning capable. Requires `encoder.onnx`, `decoder.onnx`, `vocoder.onnx`, `tokens.txt`        | [Download](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/zipvoice.html) |
 | **Pocket**       | `'pocket'`        | Flow-matching TTS. Requires `lm_flow.onnx`, `lm_main.onnx`, `encoder.onnx`, `decoder.onnx`, `text_conditioner.onnx`, `vocab.json`, `token_scores.json` | [Download](https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models) |
+For **streaming TTS** (incremental generation, low latency), use `createStreamingTTS()` with supported model types. See [Streaming Text-to-Speech](./docs/tts_streaming.md).
 ## Installation
 ```sh
@@ -142,7 +149,10 @@ The XCFramework must include the C++ API (`libsherpa-onnx-cxx-api.a` merged or l
 ## Documentation
-- [Text-to-Speech (TTS)](./docs/tts.md)
+- [Speech-to-Text (STT)](./docs/stt.md) – Offline transcription (file or samples)
+- [Streaming (Online) Speech-to-Text](./docs/stt_streaming.md) – Real-time recognition, partial results, endpoint detection
+- [Text-to-Speech (TTS)](./docs/tts.md) – Offline and streaming generation
+- [Streaming Text-to-Speech](./docs/tts_streaming.md) – Incremental TTS (createStreamingTTS)
 - [Execution provider support (QNN, NNAPI, XNNPACK, Core ML)](./docs/execution-providers.md) – Checking and using acceleration backends
 - [Voice Activity Detection (VAD)](./docs/vad.md)
 - [Speaker Diarization](./docs/diarization.md)
@@ -150,6 +160,8 @@ The XCFramework must include the C++ API (`libsherpa-onnx-cxx-api.a` merged or l
 - [Source Separation](./docs/separation.md)
 - [Model Setup](./docs/MODEL_SETUP.md) – Bundled assets, Play Asset Delivery (PAD), model discovery APIs, and troubleshooting
 - [Model Download Manager](./docs/download-manager.md)
+- [Disable FFMPEG](./docs/disable-ffmpeg.md)
+- [Disable LIBARCHIVE](./docs/disable-libarchive.md)
 Note: For when to use `listAssetModels()` vs `listModelsAtPath()` and how to combine bundled and PAD/file-based models, see [Model Setup](./docs/MODEL_SETUP.md).
@@ -165,11 +177,13 @@ We provide example applications to help you get started with `react-native-sherp
 ### Example App (Audio to Text)
-The example app included in this repository demonstrates basic audio-to-text transcription capabilities. It includes:
+The example app included in this repository demonstrates audio-to-text transcription, text-to-speech, and streaming features. It includes:
 - Multiple model type support (Zipformer, Paraformer, NeMo CTC, Whisper, WeNet CTC, SenseVoice, FunASR Nano)
 - Model selection and configuration
-- Audio file transcription
+- **Offline** audio file transcription
+- **Online (streaming) STT** – live transcription from the microphone with partial results
+- **Streaming TTS** – incremental speech generation and playback
 - Test audio files for different languages
 **Getting started:**

package/SherpaOnnx.podspec CHANGED Viewed

@@ -65,7 +65,7 @@ Pod::Spec.new do |s|
   device_slice = File.join(xcframework_root, "ios-arm64")
   s.pod_target_xcconfig = {
-    "HEADER_SEARCH_PATHS" => "$(inherited) \"#{pod_root}/ios\" \"#{pod_root}/ios/archive\" \"#{pod_root}/ios/model_detect\" \"#{pod_root}/ios/stt\" \"#{pod_root}/ios/tts\" \"#{libarchive_dir}\" \"#{device_headers}\" \"#{simulator_headers}\"",
+    "HEADER_SEARCH_PATHS" => "$(inherited) \"#{pod_root}/ios\" \"#{pod_root}/ios/archive\" \"#{pod_root}/ios/model_detect\" \"#{pod_root}/ios/stt\" \"#{pod_root}/ios/tts\" \"#{pod_root}/ios/online_stt\" \"#{libarchive_dir}\" \"#{device_headers}\" \"#{simulator_headers}\"",
     "GCC_PREPROCESSOR_DEFINITIONS" => '$(inherited) PLATFORM_CONFIG_H=\\"libarchive_darwin_config.h\\"',
     "CLANG_CXX_LANGUAGE_STANDARD" => "c++17",
     "CLANG_CXX_LIBRARY" => "libc++",

package/android/build.gradle CHANGED Viewed

@@ -1,5 +1,5 @@
 // Prebuilt versions (sherpa-onnx, FFmpeg, libarchive) from prebuilt-versions.gradle.
-// Resolution: env → ANDROID_RELEASE_TAG files → project property → default.
+// Resolution: env --> ANDROID_RELEASE_TAG files --> project property --> default.
 apply from: file("${project.projectDir}/prebuilt-versions.gradle")
 // Used by dependencies below and by prebuilt-download.gradle (extractSherpaOnnxClasses, downloadNativeLibsIfNeeded).
@@ -39,13 +39,7 @@ buildscript {
 apply plugin: "com.android.library"
 apply plugin: "kotlin-android"
-// Standalone AAR build: set in root build.gradle (ext.standaloneAarBuild = true) or via -PstandaloneAarBuild=true
-def isStandaloneAarBuild = rootProject.findProperty("standaloneAarBuild") in [true, "true"]
-// Only apply React Native plugin when building inside a React Native app (e.g. example app).
-if (!isStandaloneAarBuild) {
-  apply plugin: "com.facebook.react"
-}
+apply plugin: "com.facebook.react"
 android {
   namespace "com.sherpaonnx"
@@ -55,9 +49,10 @@ android {
   // Native .so and headers: filled by prebuilt-download.gradle (local jniLibs, Maven AAR, or GitHub release).
   // Alternatively by third_party/*/copy_prebuilts_to_sdk.js or shipped in npm package.
-  // Codegen Java output: always use the flat path. For app builds RNGP writes here directly;
-  // for standalone AAR builds the codegen task normalises the nested output to this path.
-  sourceSets.main.java.srcDirs += file("${buildDir}/generated/source/codegen/java")
+  // When true, FFmpeg is not linked; convertAudioToWav16k/convertAudioToFormat will fail at runtime. See docs/disable-ffmpeg.md.
+  def sherpaOnnxDisableFfmpeg = (project.findProperty("sherpaOnnxDisableFfmpeg") ?: "false").toString().toLowerCase() in ["true", "1"]
+  // When true, libarchive is not linked; extractTarBz2/cancelExtractTarBz2 will fail at runtime. See docs/disable-libarchive.md.
+  def sherpaOnnxDisableLibarchive = (project.findProperty("sherpaOnnxDisableLibarchive") ?: "false").toString().toLowerCase() in ["true", "1"]
   defaultConfig {
     minSdkVersion getExtOrDefault("minSdkVersion")
@@ -71,7 +66,10 @@ android {
     externalNativeBuild {
       cmake {
         cppFlags "-std=c++17", "-Wall", "-Wextra", "-fvisibility=hidden"
-        arguments "-DANDROID_STL=c++_shared"
+        def cmakeArgList = ["-DANDROID_STL=c++_shared"]
+        if (sherpaOnnxDisableFfmpeg) cmakeArgList.add("-DSHERPA_ONNX_DISABLE_FFMPEG=ON")
+        if (sherpaOnnxDisableLibarchive) cmakeArgList.add("-DSHERPA_ONNX_DISABLE_LIBARCHIVE=ON")
+        arguments(*cmakeArgList)
       }
     }
@@ -99,6 +97,27 @@ android {
     }
   }
+  // When FFmpeg is disabled, exclude FFmpeg .so from our output to avoid merge conflicts with other libs (e.g. react-native-audio-api).
+  if (sherpaOnnxDisableFfmpeg) {
+    packaging {
+      jniLibs {
+        excludes += [
+          "**/libavcodec.so", "**/libavformat.so", "**/libavutil.so",
+          "**/libavfilter.so", "**/libswresample.so", "**/libshine.so"
+        ]
+      }
+    }
+  }
+  // When libarchive is disabled, exclude libarchive.so from our output to avoid merge conflicts.
+  if (sherpaOnnxDisableLibarchive) {
+    packaging {
+      jniLibs {
+        excludes += ["**/libarchive.so"]
+      }
+    }
+  }
   lint {
     disable "GradleCompatible"
   }
@@ -119,7 +138,7 @@ repositories {
   maven { url "https://xdcobra.github.io/maven" }
 }
-// Configurations used by prebuilt-download.gradle: downloadNativeLibsIfNeeded (AAR → jniLibs + headers),
+// Configurations used by prebuilt-download.gradle: downloadNativeLibsIfNeeded (AAR --> jniLibs + headers),
 // extractSherpaOnnxClasses (sherpa-onnx classes.jar), extractOnnxruntimeClasses (onnxruntime classes.jar).
 configurations { sherpaOnnxAar; ffmpegAar; libarchiveAar; onnxruntimeAar }
@@ -129,8 +148,7 @@ apply from: file("${project.projectDir}/prebuilt-download.gradle")
 def kotlin_version = getExtOrDefault("kotlinVersion")
 dependencies {
-  // React Native dependency: explicit version ensures IDE (VS Code Kotlin LSP) and standalone
-  // AAR builds can resolve com.facebook.react types. When consumed by a React Native app,
+  // React Native dependency: explicit version for IDE (VS Code Kotlin LSP). When consumed by a React Native app,
   // Gradle's dependency resolution picks the app's react-android version (typically higher).
   implementation "com.facebook.react:react-android:0.83.0"
   implementation "org.jetbrains.kotlin:kotlin-stdlib:$kotlin_version"
@@ -155,13 +173,10 @@ dependencies {
   implementation "com.google.android.play:asset-delivery:2.3.0"
 }
-// TurboModule codegen; separate script to avoid Gradle 9 Groovy "source is null" with NodeTask.
-apply from: file("${project.projectDir}/codegen.gradle")
-// Wire prebuilt tasks: extractSherpaOnnxClasses → downloadNativeLibsIfNeeded when not standalone;
+// Wire prebuilt tasks: extractSherpaOnnxClasses --> downloadNativeLibsIfNeeded;
 // CMake/compile tasks depend on extractSherpaOnnxClasses and checkJniLibs.
 afterEvaluate {
-  if (!isStandaloneAarBuild && tasks.findByName("downloadNativeLibsIfNeeded") != null) {
+  if (tasks.findByName("downloadNativeLibsIfNeeded") != null) {
     tasks.named("extractSherpaOnnxClasses").configure { dependsOn tasks.named("downloadNativeLibsIfNeeded") }
     tasks.named("extractOnnxruntimeClasses").configure { dependsOn tasks.named("downloadNativeLibsIfNeeded") }
   }
@@ -170,14 +185,8 @@ afterEvaluate {
     dependsOn tasks.named("extractOnnxruntimeClasses")
     dependsOn tasks.named("checkJniLibs")
   }
-  // With includesGeneratedCode=false, codegen is always run by our custom task (not RNGP).
-  // Kotlin compile must depend on it for both standalone AAR and app-dependency builds.
-  def codegenJavaDir = project.file("${project.buildDir}/generated/source/codegen/java")
   tasks.matching { it.name == "compileReleaseKotlin" || it.name == "compileDebugKotlin" }.configureEach {
     dependsOn tasks.named("extractSherpaOnnxClasses")
     dependsOn tasks.named("extractOnnxruntimeClasses")
-    dependsOn tasks.named('generateCodegenSpec')
-    inputs.dir(codegenJavaDir)
   }
 }

package/android/prebuilt-download.gradle CHANGED Viewed

@@ -1,6 +1,11 @@
 // Prebuilt download and check logic. Applied after android {} and configurations.
 // Depends on: sherpaOnnxVersion, ffmpegVersion, libarchiveVersion, ortVersion (from prebuilt-versions.gradle)
 // and configurations.sherpaOnnxAar, ffmpegAar, libarchiveAar, onnxruntimeAar (from build.gradle).
+// When sherpaOnnxDisableFfmpeg=true (gradle.properties), FFmpeg is not required; see docs/disable-ffmpeg.md.
+// When sherpaOnnxDisableLibarchive=true (gradle.properties), libarchive is not required; see docs/disable-libarchive.md.
+def sherpaOnnxDisableFfmpeg = (project.findProperty("sherpaOnnxDisableFfmpeg") ?: "false").toString().toLowerCase() in ["true", "1"]
+def sherpaOnnxDisableLibarchive = (project.findProperty("sherpaOnnxDisableLibarchive") ?: "false").toString().toLowerCase() in ["true", "1"]
 def requiredAbis = ["arm64-v8a", "armeabi-v7a", "x86", "x86_64"]
 def requiredFfmpegSoFiles = [
@@ -129,7 +134,7 @@ project.tasks.register("downloadNativeLibsIfNeeded") {
       }
     }
-    if (!hasAllFfmpegLibs() || !hasFfmpegHeaders()) {
+    if (!sherpaOnnxDisableFfmpeg && (!hasAllFfmpegLibs() || !hasFfmpegHeaders())) {
       try {
         def aarFiles = project.configurations.ffmpegAar.files
         if (!aarFiles.isEmpty()) {
@@ -159,7 +164,7 @@ project.tasks.register("downloadNativeLibsIfNeeded") {
       }
     }
-    if (!hasAllLibarchiveLibs() || !hasLibarchiveHeaders()) {
+    if (!sherpaOnnxDisableLibarchive && (!hasAllLibarchiveLibs() || !hasLibarchiveHeaders())) {
       try {
         def aarFiles = project.configurations.libarchiveAar.files
         if (!aarFiles.isEmpty()) {
@@ -221,10 +226,14 @@ project.tasks.register("downloadNativeLibsIfNeeded") {
     def repo = project.findProperty('prebuiltGitHubRepo') ?: getGitHubRepo()
     if (!repo) {
-      if (!hasAllFfmpegLibs() || !hasFfmpegHeaders() || !hasAllSherpaLibs() || !hasSherpaHeaders() || !hasAllLibarchiveLibs() || !hasLibarchiveHeaders()) {
+      def needFfmpeg = !sherpaOnnxDisableFfmpeg && (!hasAllFfmpegLibs() || !hasFfmpegHeaders())
+      def needLibarchive = !sherpaOnnxDisableLibarchive && (!hasAllLibarchiveLibs() || !hasLibarchiveHeaders())
+      if (needFfmpeg || needLibarchive || !hasAllSherpaLibs() || !hasSherpaHeaders()) {
         throw new RuntimeException(
           "Native libs/headers still missing and GitHub repo unknown. Set -PprebuiltGitHubRepo=owner/repo or ensure git remote origin is a GitHub URL. " +
-          "Alternatively run third_party/ffmpeg_prebuilt/copy_prebuilts_to_sdk.js, third_party/sherpa-onnx-prebuilt/copy_prebuilts_to_sdk.js, third_party/libarchive_prebuilt/copy_prebuilts_to_sdk.js, or use Maven (com.xdcobra.sherpa:ffmpeg / sherpa-onnx / libarchive), or ensure ANDROID_RELEASE_TAG releases exist."
+          "Alternatively run third_party/ffmpeg_prebuilt/copy_prebuilts_to_sdk.js, third_party/sherpa-onnx-prebuilt/copy_prebuilts_to_sdk.js, third_party/libarchive_prebuilt/copy_prebuilts_to_sdk.js, or use Maven (com.xdcobra.sherpa:ffmpeg / sherpa-onnx / libarchive), or ensure ANDROID_RELEASE_TAG releases exist. " +
+          (sherpaOnnxDisableFfmpeg ? "(FFmpeg disabled via sherpaOnnxDisableFfmpeg=true.) " : "") +
+          (sherpaOnnxDisableLibarchive ? "(libarchive disabled via sherpaOnnxDisableLibarchive=true.)" : "")
         )
       }
       return
@@ -232,7 +241,7 @@ project.tasks.register("downloadNativeLibsIfNeeded") {
     def baseUrl = "https://github.com/${repo}/releases/download"
     downloadDir.mkdirs()
-    if (!hasAllFfmpegLibs() || !hasFfmpegHeaders()) {
+    if (!sherpaOnnxDisableFfmpeg && (!hasAllFfmpegLibs() || !hasFfmpegHeaders())) {
       def tagFile = file("${project.projectDir.parent}/third_party/ffmpeg_prebuilt/ANDROID_RELEASE_TAG")
       def tag = readReleaseTag(tagFile)
       if (!tag) throw new RuntimeException("Missing or empty third_party/ffmpeg_prebuilt/ANDROID_RELEASE_TAG")
@@ -256,7 +265,7 @@ project.tasks.register("downloadNativeLibsIfNeeded") {
       println "Downloaded and extracted FFmpeg prebuilts (libs + include) from ${tag}"
     }
-    if (!hasAllLibarchiveLibs() || !hasLibarchiveHeaders()) {
+    if (!sherpaOnnxDisableLibarchive && (!hasAllLibarchiveLibs() || !hasLibarchiveHeaders())) {
       def tagFile = file("${project.projectDir.parent}/third_party/libarchive_prebuilt/ANDROID_RELEASE_TAG")
       def tag = readReleaseTag(tagFile)
       if (!tag) throw new RuntimeException("Missing or empty third_party/libarchive_prebuilt/ANDROID_RELEASE_TAG")
@@ -330,10 +339,12 @@ project.tasks.register("checkJniLibs") {
       if (!dir.exists()) {
         throw new RuntimeException("Missing native libs for ABI ${abi} in ${dir}. Run third_party/ffmpeg_prebuilt/copy_prebuilts_to_sdk.js, third_party/sherpa-onnx-prebuilt/copy_prebuilts_to_sdk.js, third_party/libarchive_prebuilt/copy_prebuilts_to_sdk.js, or use Maven (com.xdcobra.sherpa:ffmpeg / sherpa-onnx / libarchive), or ensure ANDROID_RELEASE_TAG releases exist.")
       }
-      requiredFfmpegSoFiles.each { soName ->
-        def soFile = new File(dir, soName)
-        if (!soFile.exists()) {
-          throw new RuntimeException("Missing required FFmpeg library '${soName}' for ABI ${abi}. Run third_party/ffmpeg_prebuilt/copy_prebuilts_to_sdk.js or ensure Maven com.xdcobra.sherpa:ffmpeg is available.")
+      if (!sherpaOnnxDisableFfmpeg) {
+        requiredFfmpegSoFiles.each { soName ->
+          def soFile = new File(dir, soName)
+          if (!soFile.exists()) {
+            throw new RuntimeException("Missing required FFmpeg library '${soName}' for ABI ${abi}. Run third_party/ffmpeg_prebuilt/copy_prebuilts_to_sdk.js or ensure Maven com.xdcobra.sherpa:ffmpeg is available. Or set sherpaOnnxDisableFfmpeg=true in gradle.properties (see docs/disable-ffmpeg.md).")
+          }
         }
       }
       requiredSherpaOnnxSoFiles.each { soName ->
@@ -342,10 +353,12 @@ project.tasks.register("checkJniLibs") {
           throw new RuntimeException("Missing required sherpa-onnx library '${soName}' for ABI ${abi}. Run third_party/sherpa-onnx-prebuilt/copy_prebuilts_to_sdk.js")
         }
       }
-      requiredLibarchiveSoFiles.each { soName ->
-        def soFile = new File(dir, soName)
-        if (!soFile.exists()) {
-          throw new RuntimeException("Missing required libarchive library '${soName}' for ABI ${abi}. Run third_party/libarchive_prebuilt/copy_prebuilts_to_sdk.js or ensure Maven com.xdcobra.sherpa:libarchive is available.")
+      if (!sherpaOnnxDisableLibarchive) {
+        requiredLibarchiveSoFiles.each { soName ->
+          def soFile = new File(dir, soName)
+          if (!soFile.exists()) {
+            throw new RuntimeException("Missing required libarchive library '${soName}' for ABI ${abi}. Run third_party/libarchive_prebuilt/copy_prebuilts_to_sdk.js or ensure Maven com.xdcobra.sherpa:libarchive is available. Or set sherpaOnnxDisableLibarchive=true in gradle.properties (see docs/disable-libarchive.md).")
+          }
         }
       }
       requiredOnnxruntimeJniSoFiles.each { soName ->

package/android/src/main/cpp/CMakeLists.txt CHANGED Viewed

@@ -25,6 +25,14 @@ endif()
 # Calculate path relative to project root: android/src/main/cpp -> android -> project root
 get_filename_component(PROJECT_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/../../.." ABSOLUTE)
+# Optional: disable FFmpeg (gradle.properties: sherpaOnnxDisableFfmpeg=true). See docs/disable-ffmpeg.md.
+set(USE_FFMPEG ON)
+if(SHERPA_ONNX_DISABLE_FFMPEG)
+    set(USE_FFMPEG OFF)
+    message(STATUS "FFmpeg disabled (SHERPA_ONNX_DISABLE_FFMPEG=ON). convertAudioToWav16k/convertAudioToFormat will return an error at runtime.")
+endif()
+if(USE_FFMPEG)
 # FFmpeg: libs and headers from (1) third_party/ffmpeg_prebuilt/android (local build) or (2) jniLibs + cpp/include/ffmpeg (GitHub release zip).
 # PROJECT_ROOT = android/; repo root = PROJECT_ROOT/..
 set(FFMPEG_PREBUILT_BASE "${PROJECT_ROOT}/../third_party/ffmpeg_prebuilt/android")
@@ -51,6 +59,14 @@ elseif(EXISTS "${FFMPEG_JNILIBS}/libavcodec.so")
 else()
     message(FATAL_ERROR "FFmpeg libs missing for ABI ${ANDROID_ABI}. Run third_party/ffmpeg_prebuilt/copy_prebuilts_to_sdk.js or ensure ANDROID_RELEASE_TAG release is downloaded.")
 endif()
+endif(USE_FFMPEG)
+# Optional: disable libarchive (gradle.properties: sherpaOnnxDisableLibarchive=true). See docs/disable-libarchive.md.
+set(USE_LIBARCHIVE ON)
+if(SHERPA_ONNX_DISABLE_LIBARCHIVE)
+    set(USE_LIBARCHIVE OFF)
+    message(STATUS "libarchive disabled (SHERPA_ONNX_DISABLE_LIBARCHIVE=ON). extractTarBz2/cancelExtractTarBz2 will return an error at runtime.")
+endif()
 # Source files by domain (see docs/NATIVE_NAMING_CONVENTION.md). Move .cpp into subdirs; .h go alongside for include path.
 set(SOURCES
@@ -68,6 +84,7 @@ set(SOURCES
     crypto/sha256.cpp
 )
+if(USE_LIBARCHIVE)
 # libarchive: (1) prebuilt from third_party/libarchive_prebuilt or jniLibs + cpp/include/libarchive (Maven/GitHub), or (2) build from source.
 set(LIBARCHIVE_PREBUILT_BASE "${PROJECT_ROOT}/../third_party/libarchive_prebuilt/android")
 set(LIBARCHIVE_PREBUILT_LIB "${LIBARCHIVE_PREBUILT_BASE}/${ANDROID_ABI}/lib")
@@ -96,6 +113,7 @@ else()
     add_subdirectory(${PROJECT_ROOT}/../third_party/libarchive ${CMAKE_BINARY_DIR}/libarchive)
     message(STATUS "libarchive built successfully")
 endif()
+endif(USE_LIBARCHIVE)
 # Create shared library
 add_library(sherpaonnx SHARED
@@ -105,7 +123,7 @@ add_library(sherpaonnx SHARED
 # sherpa-onnx C-API: link by directory + library name only (no IMPORTED target).
 # The .so and headers land in jniLibs and cpp/include/sherpa-onnx via:
 #   (1) local third_party build, (2) Maven AAR extraction (libs + c-api headers), or (3) GitHub release zip (last resort).
-# If we used IMPORTED here, AGP would also copy .so from CMake → duplicate in mergeNativeLibs.
+# If we used IMPORTED here, AGP would also copy .so from CMake --> duplicate in mergeNativeLibs.
 set(SHERPA_ONNX_PREBUILT_BASE "${PROJECT_ROOT}/../third_party/sherpa-onnx-prebuilt/android")
 set(SHERPA_ONNX_ABI_LIB "${SHERPA_ONNX_PREBUILT_BASE}/${ANDROID_ABI}/lib")
 set(SHERPA_C_API_LIB_DIR "")
@@ -130,33 +148,49 @@ target_include_directories(sherpaonnx PRIVATE
     ${CMAKE_CURRENT_SOURCE_DIR}/jni/audio
     ${CMAKE_CURRENT_SOURCE_DIR}/jni/tts
     ${CMAKE_CURRENT_SOURCE_DIR}/include
-    ${FFMPEG_INCLUDE_DIR}
 )
-if(USE_LIBARCHIVE_PREBUILT AND LIBARCHIVE_INCLUDE_DIR)
+if(USE_FFMPEG)
+    target_include_directories(sherpaonnx PRIVATE ${FFMPEG_INCLUDE_DIR})
+endif()
+if(USE_LIBARCHIVE AND USE_LIBARCHIVE_PREBUILT AND LIBARCHIVE_INCLUDE_DIR)
     target_include_directories(sherpaonnx PRIVATE ${LIBARCHIVE_INCLUDE_DIR})
 endif()
-target_compile_definitions(sherpaonnx PRIVATE HAVE_FFMPEG=1)
+# When USE_FFMPEG=OFF, do NOT define HAVE_FFMPEG at all. Then #ifdef HAVE_FFMPEG is false
+# in C++ and the stub (#else) branches are used — no FFmpeg headers needed.
+if(USE_FFMPEG)
+    target_compile_definitions(sherpaonnx PRIVATE HAVE_FFMPEG=1)
+endif()
+# When USE_LIBARCHIVE=OFF, do NOT define HAVE_LIBARCHIVE; extractTarBz2 stubs return an error.
+if(USE_LIBARCHIVE)
+    target_compile_definitions(sherpaonnx PRIVATE HAVE_LIBARCHIVE=1)
+endif()
 # Link libraries (Kotlin API from AAR handles STT/TTS; C-API only for Zipvoice)
-target_link_directories(sherpaonnx PRIVATE ${FFMPEG_LIB_DIR})
-if(USE_LIBARCHIVE_PREBUILT AND LIBARCHIVE_LIB_DIR)
+if(USE_FFMPEG)
+    target_link_directories(sherpaonnx PRIVATE ${FFMPEG_LIB_DIR})
+endif()
+if(USE_LIBARCHIVE AND USE_LIBARCHIVE_PREBUILT AND LIBARCHIVE_LIB_DIR)
     target_link_directories(sherpaonnx PRIVATE ${LIBARCHIVE_LIB_DIR})
 endif()
 if(SHERPA_C_API_LIB_DIR)
     target_link_directories(sherpaonnx PRIVATE ${SHERPA_C_API_LIB_DIR})
     target_link_libraries(sherpaonnx PRIVATE sherpa-onnx-c-api)
 endif()
-target_link_libraries(sherpaonnx
-    PRIVATE
-    archive
-    avcodec
-    avformat
-    avutil
-    avfilter
-    swresample
-    log
-    android
-)
+# Link archive only when USE_LIBARCHIVE (otherwise libsherpaonnx.so would depend on libarchive.so at load time).
+if(USE_FFMPEG)
+    target_link_libraries(sherpaonnx
+        PRIVATE
+        avcodec
+        avformat
+        avutil
+        avfilter
+        swresample
+    )
+endif()
+if(USE_LIBARCHIVE)
+    target_link_libraries(sherpaonnx PRIVATE archive)
+endif()
+target_link_libraries(sherpaonnx PRIVATE log android)
 # Compiler flags for our library
 target_compile_options(sherpaonnx PRIVATE

package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.cpp CHANGED Viewed

@@ -6,8 +6,10 @@
  */
 #include "sherpa-onnx-archive-helper.h"
+#ifdef HAVE_LIBARCHIVE
 #include <archive.h>
 #include <archive_entry.h>
+#endif
 #include <array>
 #include <atomic>
 #include <cerrno>
@@ -23,6 +25,7 @@
 std::atomic<bool> ArchiveHelper::cancel_requested_(false);
 namespace {
+#ifdef HAVE_LIBARCHIVE
 struct ArchiveReadContext {
   FILE* file = nullptr;
   std::array<unsigned char, 64 * 1024> buffer{};
@@ -72,6 +75,7 @@ static void DrainRemainingAndClose(ArchiveReadContext* ctx) {
   fclose(ctx->file);
   ctx->file = nullptr;
 }
+#endif  // HAVE_LIBARCHIVE
 static std::string ToHex(const unsigned char* data, size_t size) {
   static const char* kHex = "0123456789abcdef";
@@ -103,6 +107,15 @@ bool ArchiveHelper::ExtractTarBz2(
   std::string* out_sha256) {
   cancel_requested_.store(false);
+#ifndef HAVE_LIBARCHIVE
+  (void)source_path;
+  (void)target_path;
+  (void)force;
+  (void)on_progress;
+  (void)out_sha256;
+  if (out_error) *out_error = "libarchive not available. Build with libarchive or set sherpaOnnxDisableLibarchive=false in gradle.properties. See docs/disable-libarchive.md.";
+  return false;
+#else
   // Validate source file exists
   if (!std::filesystem::exists(source_path)) {
     if (out_error) *out_error = "Source file does not exist";
@@ -360,6 +373,7 @@ bool ArchiveHelper::ExtractTarBz2(
   }
   return true;
+#endif  // HAVE_LIBARCHIVE
 }
 bool ArchiveHelper::ComputeFileSha256(

package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.cpp CHANGED Viewed

@@ -419,5 +419,21 @@ std::string FindLargestOnnxExcludingTokens(
     return ChooseLargest(files, excludeTokens, false, false);
 }
+bool ContainsWord(const std::string& haystack, const std::string& word) {
+    if (word.empty()) return false;
+    size_t pos = 0;
+    auto isSep = [](char c) {
+        return c == '\0' || c == '/' || c == '-' || c == '_' || c == '.' || c == ' ';
+    };
+    while ((pos = haystack.find(word, pos)) != std::string::npos) {
+        char before = (pos == 0) ? '\0' : haystack[pos - 1];
+        size_t afterPos = pos + word.size();
+        char after = (afterPos >= haystack.size()) ? '\0' : haystack[afterPos];
+        if (isSep(before) && isSep(after)) return true;
+        pos++;
+    }
+    return false;
+}
 } // namespace model_detect
 } // namespace sherpaonnx

package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.h CHANGED Viewed

@@ -49,6 +49,9 @@ std::string FindLargestOnnxExcludingTokens(
     const std::vector<std::string>& excludeTokens
 );
+/** Returns true if \p word appears in \p haystack as a standalone token (surrounded by separators: / - _ . space). */
+bool ContainsWord(const std::string& haystack, const std::string& word);
 } // namespace model_detect
 } // namespace sherpaonnx

package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-stt.cpp CHANGED Viewed

@@ -35,6 +35,7 @@ SttModelKind ParseSttModelType(const std::string& modelType) {
     if (modelType == "omnilingual") return SttModelKind::kOmnilingual;
     if (modelType == "medasr") return SttModelKind::kMedAsr;
     if (modelType == "telespeech_ctc") return SttModelKind::kTeleSpeechCtc;
+    if (modelType == "tone_ctc") return SttModelKind::kToneCtc;
     return SttModelKind::kUnknown;
 }
@@ -163,6 +164,10 @@ SttDetectResult DetectSttModel(
     bool isLikelyOmnilingual = modelDirLower.find("omnilingual") != std::string::npos;
     bool isLikelyMedAsr = modelDirLower.find("medasr") != std::string::npos;
     bool isLikelyTeleSpeech = modelDirLower.find("telespeech") != std::string::npos;
+    // Tone CTC: match "tone" only as standalone word (not e.g. "cantonese"); also accept "t-one" / "t_one"
+    bool isLikelyToneCtc = modelDirLower.find("t-one") != std::string::npos ||
+                           modelDirLower.find("t_one") != std::string::npos ||
+                           model_detect::ContainsWord(modelDirLower, "tone");
     bool hasMoonshine = !moonshinePreprocessor.empty() && !moonshineUncachedDecoder.empty() &&
                         !moonshineCachedDecoder.empty() && !moonshineEncoder.empty();
@@ -173,6 +178,7 @@ SttDetectResult DetectSttModel(
     bool hasOmnilingual = !ctcModelPath.empty() && isLikelyOmnilingual;
     bool hasMedAsr = !ctcModelPath.empty() && isLikelyMedAsr;
     bool hasTeleSpeechCtc = (!ctcModelPath.empty() || !paraformerModelPath.empty()) && isLikelyTeleSpeech;
+    bool hasToneCtc = !ctcModelPath.empty() && isLikelyToneCtc;
     if (hasTransducer) {
         if (isLikelyNemo || isLikelyTdt) {
@@ -224,6 +230,9 @@ SttDetectResult DetectSttModel(
     if (hasTeleSpeechCtc) {
         result.detectedModels.push_back({"telespeech_ctc", modelDir});
     }
+    if (hasToneCtc) {
+        result.detectedModels.push_back({"tone_ctc", modelDir});
+    }
     SttModelKind selected = SttModelKind::kUnknown;
@@ -247,7 +256,8 @@ SttDetectResult DetectSttModel(
             return result;
         }
         if ((selected == SttModelKind::kNemoCtc || selected == SttModelKind::kWenetCtc ||
-             selected == SttModelKind::kSenseVoice || selected == SttModelKind::kZipformerCtc) &&
+             selected == SttModelKind::kSenseVoice || selected == SttModelKind::kZipformerCtc ||
+             selected == SttModelKind::kToneCtc) &&
             ctcModelPath.empty()) {
             result.error = "CTC model requested but model file not found in " + modelDir;
             return result;
@@ -288,6 +298,10 @@ SttDetectResult DetectSttModel(
             result.error = "TeleSpeech CTC model requested but model not found in " + modelDir;
             return result;
         }
+        if (selected == SttModelKind::kToneCtc && !hasToneCtc) {
+            result.error = "Tone CTC model requested but path does not contain 'tone' (as a word), 't-one', or 't_one' (e.g. sherpa-onnx-streaming-t-one-*) in " + modelDir;
+            return result;
+        }
     } else {
         if (hasTransducer) {
             selected = (isLikelyNemo || isLikelyTdt) ? SttModelKind::kNemoTransducer : SttModelKind::kTransducer;
@@ -321,6 +335,8 @@ SttDetectResult DetectSttModel(
             selected = SttModelKind::kMedAsr;
         } else if (hasTeleSpeechCtc) {
             selected = SttModelKind::kTeleSpeechCtc;
+        } else if (hasToneCtc) {
+            selected = SttModelKind::kToneCtc;
         } else if (!ctcModelPath.empty()) {
             selected = SttModelKind::kZipformerCtc;
         }
@@ -346,7 +362,8 @@ SttDetectResult DetectSttModel(
     } else if (selected == SttModelKind::kParaformer) {
         result.paths.paraformerModel = paraformerModelPath;
     } else if (selected == SttModelKind::kNemoCtc || selected == SttModelKind::kWenetCtc ||
-               selected == SttModelKind::kSenseVoice || selected == SttModelKind::kZipformerCtc) {
+               selected == SttModelKind::kSenseVoice || selected == SttModelKind::kZipformerCtc ||
+               selected == SttModelKind::kToneCtc) {
         result.paths.ctcModel = ctcModelPath;
     } else if (selected == SttModelKind::kWhisper) {
         result.paths.whisperEncoder = encoderPath;

package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect.h CHANGED Viewed

@@ -25,7 +25,8 @@ enum class SttModelKind {
     kCanary,
     kOmnilingual,
     kMedAsr,
-    kTeleSpeechCtc
+    kTeleSpeechCtc,
+    kToneCtc
 };
 enum class TtsModelKind {

package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.cpp CHANGED Viewed

@@ -30,6 +30,7 @@ const char* SttModelKindToString(SttModelKind k) {
     case SttModelKind::kOmnilingual: return "omnilingual";
     case SttModelKind::kMedAsr: return "medasr";
     case SttModelKind::kTeleSpeechCtc: return "telespeech_ctc";
+    case SttModelKind::kToneCtc: return "tone_ctc";
     default: return "unknown";
   }
 }