react-native-sherpa-onnx 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -7
- package/SherpaOnnx.podspec +1 -1
- package/android/build.gradle +35 -26
- package/android/prebuilt-download.gradle +27 -14
- package/android/src/main/cpp/CMakeLists.txt +51 -17
- package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.cpp +14 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.cpp +16 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.h +3 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-stt.cpp +19 -2
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect.h +2 -1
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.cpp +1 -0
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +114 -8
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxOnlineSttHelper.kt +535 -0
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +10 -10
- package/ios/SherpaOnnx+OnlineSTT.mm +365 -0
- package/ios/SherpaOnnx+TTS.mm +35 -9
- package/ios/SherpaOnnx.mm +6 -0
- package/ios/model_detect/sherpa-onnx-model-detect-helper.h +3 -0
- package/ios/model_detect/sherpa-onnx-model-detect-helper.mm +16 -0
- package/ios/model_detect/sherpa-onnx-model-detect-stt.mm +19 -2
- package/ios/model_detect/sherpa-onnx-model-detect.h +2 -1
- package/ios/online_stt/sherpa-onnx-online-stt-wrapper.h +85 -0
- package/ios/online_stt/sherpa-onnx-online-stt-wrapper.mm +270 -0
- package/lib/module/NativeSherpaOnnx.js.map +1 -1
- package/lib/module/index.js +2 -2
- package/lib/module/stt/index.js +4 -0
- package/lib/module/stt/index.js.map +1 -1
- package/lib/module/stt/streaming.js +257 -0
- package/lib/module/stt/streaming.js.map +1 -0
- package/lib/module/stt/streamingTypes.js +38 -0
- package/lib/module/stt/streamingTypes.js.map +1 -0
- package/lib/module/tts/index.js +4 -43
- package/lib/module/tts/index.js.map +1 -1
- package/lib/module/tts/streaming.js +220 -0
- package/lib/module/tts/streaming.js.map +1 -0
- package/lib/module/tts/streamingTypes.js +4 -0
- package/lib/module/tts/streamingTypes.js.map +1 -0
- package/lib/module/tts/types.js +8 -1
- package/lib/module/tts/types.js.map +1 -1
- package/lib/typescript/src/NativeSherpaOnnx.d.ts +66 -1
- package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
- package/lib/typescript/src/stt/index.d.ts +3 -0
- package/lib/typescript/src/stt/index.d.ts.map +1 -1
- package/lib/typescript/src/stt/streaming.d.ts +42 -0
- package/lib/typescript/src/stt/streaming.d.ts.map +1 -0
- package/lib/typescript/src/stt/streamingTypes.d.ts +122 -0
- package/lib/typescript/src/stt/streamingTypes.d.ts.map +1 -0
- package/lib/typescript/src/tts/index.d.ts +3 -1
- package/lib/typescript/src/tts/index.d.ts.map +1 -1
- package/lib/typescript/src/tts/streaming.d.ts +24 -0
- package/lib/typescript/src/tts/streaming.d.ts.map +1 -0
- package/lib/typescript/src/tts/streamingTypes.d.ts +27 -0
- package/lib/typescript/src/tts/streamingTypes.d.ts.map +1 -0
- package/lib/typescript/src/tts/types.d.ts +19 -6
- package/lib/typescript/src/tts/types.d.ts.map +1 -1
- package/package.json +1 -2
- package/src/NativeSherpaOnnx.ts +95 -0
- package/src/index.tsx +2 -2
- package/src/stt/index.ts +17 -0
- package/src/stt/streaming.ts +361 -0
- package/src/stt/streamingTypes.ts +151 -0
- package/src/tts/index.ts +6 -66
- package/src/tts/streaming.ts +336 -0
- package/src/tts/streamingTypes.ts +54 -0
- package/src/tts/types.ts +20 -10
- package/android/codegen.gradle +0 -57
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# react-native-sherpa-onnx
|
|
2
2
|
|
|
3
|
-
React Native SDK for sherpa-onnx
|
|
3
|
+
React Native SDK for sherpa-onnx – offline and streaming speech processing
|
|
4
4
|
|
|
5
5
|
<div align="center">
|
|
6
6
|
<img src="./docs/images/banner.png" alt="Banner" width="560" />
|
|
@@ -19,7 +19,7 @@ React Native SDK for sherpa-onnx - providing offline speech processing capabilit
|
|
|
19
19
|
> **⚠️ SDK 0.3.0 – Breaking changes from 0.2.0**
|
|
20
20
|
> Since the last release I have restructured and improved the SDK significantly: full iOS support, smoother behaviour, fewer failure points, and a much smaller footprint (~95% size reduction). As a result, **logic and the public API have changed**. If you are upgrading from 0.2.x, please follow the [Breaking changes (upgrading to 0.3.0)](docs/migration.md#breaking-changes-upgrading-to-030) section and the updated API documentation
|
|
21
21
|
|
|
22
|
-
A React Native TurboModule that provides offline speech processing capabilities using [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx). The SDK aims to support all functionalities that sherpa-onnx offers, including offline speech-to-text, text-to-speech, speaker diarization, speech enhancement, source separation, and VAD (Voice Activity Detection).
|
|
22
|
+
A React Native TurboModule that provides offline and streaming speech processing capabilities using [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx). The SDK aims to support all functionalities that sherpa-onnx offers, including offline and **online (streaming)** speech-to-text, text-to-speech (batch and streaming), speaker diarization, speech enhancement, source separation, and VAD (Voice Activity Detection).
|
|
23
23
|
|
|
24
24
|
## Table of contents
|
|
25
25
|
|
|
@@ -47,8 +47,10 @@ A React Native TurboModule that provides offline speech processing capabilities
|
|
|
47
47
|
|
|
48
48
|
| Feature | Status | Notes |
|
|
49
49
|
|---------|--------|-------|
|
|
50
|
-
| Offline Speech-to-Text | ✅ **Supported** | No internet required; multiple model types (Zipformer, Paraformer, Whisper, etc.). See [Supported Model Types](#supported-model-types). |
|
|
51
|
-
|
|
|
50
|
+
| Offline Speech-to-Text | ✅ **Supported** | No internet required; multiple model types (Zipformer, Paraformer, Whisper, etc.). See [Supported Model Types](#supported-model-types) and [STT documentation](./docs/stt.md). |
|
|
51
|
+
| Online (streaming) Speech-to-Text | ✅ **Supported** | Real-time recognition from microphone or stream; partial results, endpoint detection. Use streaming-capable models (e.g. transducer, paraformer). See [Streaming STT](./docs/stt_streaming.md). |
|
|
52
|
+
| Text-to-Speech | ✅ **Supported** | Multiple model types (VITS, Matcha, Kokoro, etc.). See [Supported Model Types](#supported-model-types) and [TTS documentation](./docs/tts.md). |
|
|
53
|
+
| Streaming Text-to-Speech | ✅ **Supported** | Incremental speech generation for low time-to-first-byte and playback while generating. See [Streaming TTS](./docs/tts_streaming.md). |
|
|
52
54
|
| Execution providers (CPU, NNAPI, XNNPACK, Core ML, QNN) | ✅ **Supported** | See [Execution provider support](./docs/execution-providers.md). |
|
|
53
55
|
| Play Asset Delivery (PAD) | ✅ **Supported** | Android only. See [Model Setup](./docs/MODEL_SETUP.md). |
|
|
54
56
|
| Automatic Model type detection | ✅ **Supported** | `detectSttModel()` and `detectTtsModel()` for a path. See [Model Setup: Model type detection](./docs/MODEL_SETUP.md#model-type-detection-without-initialization). |
|
|
@@ -80,6 +82,9 @@ A React Native TurboModule that provides offline speech processing capabilities
|
|
|
80
82
|
| **WeNet CTC** | `'wenet_ctc'` | Requires `model.onnx` (or `model.int8.onnx`) and `tokens.txt` | [Download](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/wenet/index.html) |
|
|
81
83
|
| **SenseVoice** | `'sense_voice'` | Requires `model.onnx` (or `model.int8.onnx`) and `tokens.txt` | [Download](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/sense-voice/index.html) |
|
|
82
84
|
| **FunASR Nano** | `'funasr_nano'` | Requires `encoder_adaptor.onnx`, `llm.onnx`, `embedding.onnx`, and `tokenizer` directory | [Download](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/funasr-nano/index.html) |
|
|
85
|
+
| **Tone CTC (t-one)** | `'tone_ctc'` | Single `model.onnx` + `tokens.txt`. Folder name usually contains `t-one`, `t_one` or `tone` | [Download](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-ctc/index.html) |
|
|
86
|
+
|
|
87
|
+
For **real-time (streaming) recognition** from a microphone or audio stream, use streaming-capable model types: `transducer`, `paraformer`, `zipformer2_ctc`, `nemo_ctc`, or `tone_ctc`. See [Streaming (Online) Speech-to-Text](./docs/stt_streaming.md).
|
|
83
88
|
|
|
84
89
|
### Text-to-Speech (TTS) Models
|
|
85
90
|
|
|
@@ -92,6 +97,8 @@ A React Native TurboModule that provides offline speech processing capabilities
|
|
|
92
97
|
| **Zipvoice** | `'zipvoice'` | Voice cloning capable. Requires `encoder.onnx`, `decoder.onnx`, `vocoder.onnx`, `tokens.txt` | [Download](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/zipvoice.html) |
|
|
93
98
|
| **Pocket** | `'pocket'` | Flow-matching TTS. Requires `lm_flow.onnx`, `lm_main.onnx`, `encoder.onnx`, `decoder.onnx`, `text_conditioner.onnx`, `vocab.json`, `token_scores.json` | [Download](https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models) |
|
|
94
99
|
|
|
100
|
+
For **streaming TTS** (incremental generation, low latency), use `createStreamingTTS()` with supported model types. See [Streaming Text-to-Speech](./docs/tts_streaming.md).
|
|
101
|
+
|
|
95
102
|
## Installation
|
|
96
103
|
|
|
97
104
|
```sh
|
|
@@ -142,7 +149,10 @@ The XCFramework must include the C++ API (`libsherpa-onnx-cxx-api.a` merged or l
|
|
|
142
149
|
|
|
143
150
|
## Documentation
|
|
144
151
|
|
|
145
|
-
- [
|
|
152
|
+
- [Speech-to-Text (STT)](./docs/stt.md) – Offline transcription (file or samples)
|
|
153
|
+
- [Streaming (Online) Speech-to-Text](./docs/stt_streaming.md) – Real-time recognition, partial results, endpoint detection
|
|
154
|
+
- [Text-to-Speech (TTS)](./docs/tts.md) – Offline and streaming generation
|
|
155
|
+
- [Streaming Text-to-Speech](./docs/tts_streaming.md) – Incremental TTS (createStreamingTTS)
|
|
146
156
|
- [Execution provider support (QNN, NNAPI, XNNPACK, Core ML)](./docs/execution-providers.md) – Checking and using acceleration backends
|
|
147
157
|
- [Voice Activity Detection (VAD)](./docs/vad.md)
|
|
148
158
|
- [Speaker Diarization](./docs/diarization.md)
|
|
@@ -150,6 +160,8 @@ The XCFramework must include the C++ API (`libsherpa-onnx-cxx-api.a` merged or l
|
|
|
150
160
|
- [Source Separation](./docs/separation.md)
|
|
151
161
|
- [Model Setup](./docs/MODEL_SETUP.md) – Bundled assets, Play Asset Delivery (PAD), model discovery APIs, and troubleshooting
|
|
152
162
|
- [Model Download Manager](./docs/download-manager.md)
|
|
163
|
+
- [Disable FFMPEG](./docs/disable-ffmpeg.md)
|
|
164
|
+
- [Disable LIBARCHIVE](./docs/disable-libarchive.md)
|
|
153
165
|
|
|
154
166
|
Note: For when to use `listAssetModels()` vs `listModelsAtPath()` and how to combine bundled and PAD/file-based models, see [Model Setup](./docs/MODEL_SETUP.md).
|
|
155
167
|
|
|
@@ -165,11 +177,13 @@ We provide example applications to help you get started with `react-native-sherp
|
|
|
165
177
|
|
|
166
178
|
### Example App (Audio to Text)
|
|
167
179
|
|
|
168
|
-
The example app included in this repository demonstrates
|
|
180
|
+
The example app included in this repository demonstrates audio-to-text transcription, text-to-speech, and streaming features. It includes:
|
|
169
181
|
|
|
170
182
|
- Multiple model type support (Zipformer, Paraformer, NeMo CTC, Whisper, WeNet CTC, SenseVoice, FunASR Nano)
|
|
171
183
|
- Model selection and configuration
|
|
172
|
-
-
|
|
184
|
+
- **Offline** audio file transcription
|
|
185
|
+
- **Online (streaming) STT** – live transcription from the microphone with partial results
|
|
186
|
+
- **Streaming TTS** – incremental speech generation and playback
|
|
173
187
|
- Test audio files for different languages
|
|
174
188
|
|
|
175
189
|
**Getting started:**
|
package/SherpaOnnx.podspec
CHANGED
|
@@ -65,7 +65,7 @@ Pod::Spec.new do |s|
|
|
|
65
65
|
device_slice = File.join(xcframework_root, "ios-arm64")
|
|
66
66
|
|
|
67
67
|
s.pod_target_xcconfig = {
|
|
68
|
-
"HEADER_SEARCH_PATHS" => "$(inherited) \"#{pod_root}/ios\" \"#{pod_root}/ios/archive\" \"#{pod_root}/ios/model_detect\" \"#{pod_root}/ios/stt\" \"#{pod_root}/ios/tts\" \"#{libarchive_dir}\" \"#{device_headers}\" \"#{simulator_headers}\"",
|
|
68
|
+
"HEADER_SEARCH_PATHS" => "$(inherited) \"#{pod_root}/ios\" \"#{pod_root}/ios/archive\" \"#{pod_root}/ios/model_detect\" \"#{pod_root}/ios/stt\" \"#{pod_root}/ios/tts\" \"#{pod_root}/ios/online_stt\" \"#{libarchive_dir}\" \"#{device_headers}\" \"#{simulator_headers}\"",
|
|
69
69
|
"GCC_PREPROCESSOR_DEFINITIONS" => '$(inherited) PLATFORM_CONFIG_H=\\"libarchive_darwin_config.h\\"',
|
|
70
70
|
"CLANG_CXX_LANGUAGE_STANDARD" => "c++17",
|
|
71
71
|
"CLANG_CXX_LIBRARY" => "libc++",
|
package/android/build.gradle
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
// Prebuilt versions (sherpa-onnx, FFmpeg, libarchive) from prebuilt-versions.gradle.
|
|
2
|
-
// Resolution: env
|
|
2
|
+
// Resolution: env --> ANDROID_RELEASE_TAG files --> project property --> default.
|
|
3
3
|
apply from: file("${project.projectDir}/prebuilt-versions.gradle")
|
|
4
4
|
|
|
5
5
|
// Used by dependencies below and by prebuilt-download.gradle (extractSherpaOnnxClasses, downloadNativeLibsIfNeeded).
|
|
@@ -39,13 +39,7 @@ buildscript {
|
|
|
39
39
|
|
|
40
40
|
apply plugin: "com.android.library"
|
|
41
41
|
apply plugin: "kotlin-android"
|
|
42
|
-
|
|
43
|
-
// Standalone AAR build: set in root build.gradle (ext.standaloneAarBuild = true) or via -PstandaloneAarBuild=true
|
|
44
|
-
def isStandaloneAarBuild = rootProject.findProperty("standaloneAarBuild") in [true, "true"]
|
|
45
|
-
// Only apply React Native plugin when building inside a React Native app (e.g. example app).
|
|
46
|
-
if (!isStandaloneAarBuild) {
|
|
47
|
-
apply plugin: "com.facebook.react"
|
|
48
|
-
}
|
|
42
|
+
apply plugin: "com.facebook.react"
|
|
49
43
|
|
|
50
44
|
android {
|
|
51
45
|
namespace "com.sherpaonnx"
|
|
@@ -55,9 +49,10 @@ android {
|
|
|
55
49
|
// Native .so and headers: filled by prebuilt-download.gradle (local jniLibs, Maven AAR, or GitHub release).
|
|
56
50
|
// Alternatively by third_party/*/copy_prebuilts_to_sdk.js or shipped in npm package.
|
|
57
51
|
|
|
58
|
-
//
|
|
59
|
-
|
|
60
|
-
|
|
52
|
+
// When true, FFmpeg is not linked; convertAudioToWav16k/convertAudioToFormat will fail at runtime. See docs/disable-ffmpeg.md.
|
|
53
|
+
def sherpaOnnxDisableFfmpeg = (project.findProperty("sherpaOnnxDisableFfmpeg") ?: "false").toString().toLowerCase() in ["true", "1"]
|
|
54
|
+
// When true, libarchive is not linked; extractTarBz2/cancelExtractTarBz2 will fail at runtime. See docs/disable-libarchive.md.
|
|
55
|
+
def sherpaOnnxDisableLibarchive = (project.findProperty("sherpaOnnxDisableLibarchive") ?: "false").toString().toLowerCase() in ["true", "1"]
|
|
61
56
|
|
|
62
57
|
defaultConfig {
|
|
63
58
|
minSdkVersion getExtOrDefault("minSdkVersion")
|
|
@@ -71,7 +66,10 @@ android {
|
|
|
71
66
|
externalNativeBuild {
|
|
72
67
|
cmake {
|
|
73
68
|
cppFlags "-std=c++17", "-Wall", "-Wextra", "-fvisibility=hidden"
|
|
74
|
-
|
|
69
|
+
def cmakeArgList = ["-DANDROID_STL=c++_shared"]
|
|
70
|
+
if (sherpaOnnxDisableFfmpeg) cmakeArgList.add("-DSHERPA_ONNX_DISABLE_FFMPEG=ON")
|
|
71
|
+
if (sherpaOnnxDisableLibarchive) cmakeArgList.add("-DSHERPA_ONNX_DISABLE_LIBARCHIVE=ON")
|
|
72
|
+
arguments(*cmakeArgList)
|
|
75
73
|
}
|
|
76
74
|
}
|
|
77
75
|
|
|
@@ -99,6 +97,27 @@ android {
|
|
|
99
97
|
}
|
|
100
98
|
}
|
|
101
99
|
|
|
100
|
+
// When FFmpeg is disabled, exclude FFmpeg .so from our output to avoid merge conflicts with other libs (e.g. react-native-audio-api).
|
|
101
|
+
if (sherpaOnnxDisableFfmpeg) {
|
|
102
|
+
packaging {
|
|
103
|
+
jniLibs {
|
|
104
|
+
excludes += [
|
|
105
|
+
"**/libavcodec.so", "**/libavformat.so", "**/libavutil.so",
|
|
106
|
+
"**/libavfilter.so", "**/libswresample.so", "**/libshine.so"
|
|
107
|
+
]
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// When libarchive is disabled, exclude libarchive.so from our output to avoid merge conflicts.
|
|
113
|
+
if (sherpaOnnxDisableLibarchive) {
|
|
114
|
+
packaging {
|
|
115
|
+
jniLibs {
|
|
116
|
+
excludes += ["**/libarchive.so"]
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
102
121
|
lint {
|
|
103
122
|
disable "GradleCompatible"
|
|
104
123
|
}
|
|
@@ -119,7 +138,7 @@ repositories {
|
|
|
119
138
|
maven { url "https://xdcobra.github.io/maven" }
|
|
120
139
|
}
|
|
121
140
|
|
|
122
|
-
// Configurations used by prebuilt-download.gradle: downloadNativeLibsIfNeeded (AAR
|
|
141
|
+
// Configurations used by prebuilt-download.gradle: downloadNativeLibsIfNeeded (AAR --> jniLibs + headers),
|
|
123
142
|
// extractSherpaOnnxClasses (sherpa-onnx classes.jar), extractOnnxruntimeClasses (onnxruntime classes.jar).
|
|
124
143
|
configurations { sherpaOnnxAar; ffmpegAar; libarchiveAar; onnxruntimeAar }
|
|
125
144
|
|
|
@@ -129,8 +148,7 @@ apply from: file("${project.projectDir}/prebuilt-download.gradle")
|
|
|
129
148
|
def kotlin_version = getExtOrDefault("kotlinVersion")
|
|
130
149
|
|
|
131
150
|
dependencies {
|
|
132
|
-
// React Native dependency: explicit version
|
|
133
|
-
// AAR builds can resolve com.facebook.react types. When consumed by a React Native app,
|
|
151
|
+
// React Native dependency: explicit version for IDE (VS Code Kotlin LSP). When consumed by a React Native app,
|
|
134
152
|
// Gradle's dependency resolution picks the app's react-android version (typically higher).
|
|
135
153
|
implementation "com.facebook.react:react-android:0.83.0"
|
|
136
154
|
implementation "org.jetbrains.kotlin:kotlin-stdlib:$kotlin_version"
|
|
@@ -155,13 +173,10 @@ dependencies {
|
|
|
155
173
|
implementation "com.google.android.play:asset-delivery:2.3.0"
|
|
156
174
|
}
|
|
157
175
|
|
|
158
|
-
//
|
|
159
|
-
apply from: file("${project.projectDir}/codegen.gradle")
|
|
160
|
-
|
|
161
|
-
// Wire prebuilt tasks: extractSherpaOnnxClasses → downloadNativeLibsIfNeeded when not standalone;
|
|
176
|
+
// Wire prebuilt tasks: extractSherpaOnnxClasses --> downloadNativeLibsIfNeeded;
|
|
162
177
|
// CMake/compile tasks depend on extractSherpaOnnxClasses and checkJniLibs.
|
|
163
178
|
afterEvaluate {
|
|
164
|
-
if (
|
|
179
|
+
if (tasks.findByName("downloadNativeLibsIfNeeded") != null) {
|
|
165
180
|
tasks.named("extractSherpaOnnxClasses").configure { dependsOn tasks.named("downloadNativeLibsIfNeeded") }
|
|
166
181
|
tasks.named("extractOnnxruntimeClasses").configure { dependsOn tasks.named("downloadNativeLibsIfNeeded") }
|
|
167
182
|
}
|
|
@@ -170,14 +185,8 @@ afterEvaluate {
|
|
|
170
185
|
dependsOn tasks.named("extractOnnxruntimeClasses")
|
|
171
186
|
dependsOn tasks.named("checkJniLibs")
|
|
172
187
|
}
|
|
173
|
-
|
|
174
|
-
// With includesGeneratedCode=false, codegen is always run by our custom task (not RNGP).
|
|
175
|
-
// Kotlin compile must depend on it for both standalone AAR and app-dependency builds.
|
|
176
|
-
def codegenJavaDir = project.file("${project.buildDir}/generated/source/codegen/java")
|
|
177
188
|
tasks.matching { it.name == "compileReleaseKotlin" || it.name == "compileDebugKotlin" }.configureEach {
|
|
178
189
|
dependsOn tasks.named("extractSherpaOnnxClasses")
|
|
179
190
|
dependsOn tasks.named("extractOnnxruntimeClasses")
|
|
180
|
-
dependsOn tasks.named('generateCodegenSpec')
|
|
181
|
-
inputs.dir(codegenJavaDir)
|
|
182
191
|
}
|
|
183
192
|
}
|
|
@@ -1,6 +1,11 @@
|
|
|
1
1
|
// Prebuilt download and check logic. Applied after android {} and configurations.
|
|
2
2
|
// Depends on: sherpaOnnxVersion, ffmpegVersion, libarchiveVersion, ortVersion (from prebuilt-versions.gradle)
|
|
3
3
|
// and configurations.sherpaOnnxAar, ffmpegAar, libarchiveAar, onnxruntimeAar (from build.gradle).
|
|
4
|
+
// When sherpaOnnxDisableFfmpeg=true (gradle.properties), FFmpeg is not required; see docs/disable-ffmpeg.md.
|
|
5
|
+
// When sherpaOnnxDisableLibarchive=true (gradle.properties), libarchive is not required; see docs/disable-libarchive.md.
|
|
6
|
+
|
|
7
|
+
def sherpaOnnxDisableFfmpeg = (project.findProperty("sherpaOnnxDisableFfmpeg") ?: "false").toString().toLowerCase() in ["true", "1"]
|
|
8
|
+
def sherpaOnnxDisableLibarchive = (project.findProperty("sherpaOnnxDisableLibarchive") ?: "false").toString().toLowerCase() in ["true", "1"]
|
|
4
9
|
|
|
5
10
|
def requiredAbis = ["arm64-v8a", "armeabi-v7a", "x86", "x86_64"]
|
|
6
11
|
def requiredFfmpegSoFiles = [
|
|
@@ -129,7 +134,7 @@ project.tasks.register("downloadNativeLibsIfNeeded") {
|
|
|
129
134
|
}
|
|
130
135
|
}
|
|
131
136
|
|
|
132
|
-
if (!hasAllFfmpegLibs() || !hasFfmpegHeaders()) {
|
|
137
|
+
if (!sherpaOnnxDisableFfmpeg && (!hasAllFfmpegLibs() || !hasFfmpegHeaders())) {
|
|
133
138
|
try {
|
|
134
139
|
def aarFiles = project.configurations.ffmpegAar.files
|
|
135
140
|
if (!aarFiles.isEmpty()) {
|
|
@@ -159,7 +164,7 @@ project.tasks.register("downloadNativeLibsIfNeeded") {
|
|
|
159
164
|
}
|
|
160
165
|
}
|
|
161
166
|
|
|
162
|
-
if (!hasAllLibarchiveLibs() || !hasLibarchiveHeaders()) {
|
|
167
|
+
if (!sherpaOnnxDisableLibarchive && (!hasAllLibarchiveLibs() || !hasLibarchiveHeaders())) {
|
|
163
168
|
try {
|
|
164
169
|
def aarFiles = project.configurations.libarchiveAar.files
|
|
165
170
|
if (!aarFiles.isEmpty()) {
|
|
@@ -221,10 +226,14 @@ project.tasks.register("downloadNativeLibsIfNeeded") {
|
|
|
221
226
|
|
|
222
227
|
def repo = project.findProperty('prebuiltGitHubRepo') ?: getGitHubRepo()
|
|
223
228
|
if (!repo) {
|
|
224
|
-
|
|
229
|
+
def needFfmpeg = !sherpaOnnxDisableFfmpeg && (!hasAllFfmpegLibs() || !hasFfmpegHeaders())
|
|
230
|
+
def needLibarchive = !sherpaOnnxDisableLibarchive && (!hasAllLibarchiveLibs() || !hasLibarchiveHeaders())
|
|
231
|
+
if (needFfmpeg || needLibarchive || !hasAllSherpaLibs() || !hasSherpaHeaders()) {
|
|
225
232
|
throw new RuntimeException(
|
|
226
233
|
"Native libs/headers still missing and GitHub repo unknown. Set -PprebuiltGitHubRepo=owner/repo or ensure git remote origin is a GitHub URL. " +
|
|
227
|
-
"Alternatively run third_party/ffmpeg_prebuilt/copy_prebuilts_to_sdk.js, third_party/sherpa-onnx-prebuilt/copy_prebuilts_to_sdk.js, third_party/libarchive_prebuilt/copy_prebuilts_to_sdk.js, or use Maven (com.xdcobra.sherpa:ffmpeg / sherpa-onnx / libarchive), or ensure ANDROID_RELEASE_TAG releases exist."
|
|
234
|
+
"Alternatively run third_party/ffmpeg_prebuilt/copy_prebuilts_to_sdk.js, third_party/sherpa-onnx-prebuilt/copy_prebuilts_to_sdk.js, third_party/libarchive_prebuilt/copy_prebuilts_to_sdk.js, or use Maven (com.xdcobra.sherpa:ffmpeg / sherpa-onnx / libarchive), or ensure ANDROID_RELEASE_TAG releases exist. " +
|
|
235
|
+
(sherpaOnnxDisableFfmpeg ? "(FFmpeg disabled via sherpaOnnxDisableFfmpeg=true.) " : "") +
|
|
236
|
+
(sherpaOnnxDisableLibarchive ? "(libarchive disabled via sherpaOnnxDisableLibarchive=true.)" : "")
|
|
228
237
|
)
|
|
229
238
|
}
|
|
230
239
|
return
|
|
@@ -232,7 +241,7 @@ project.tasks.register("downloadNativeLibsIfNeeded") {
|
|
|
232
241
|
def baseUrl = "https://github.com/${repo}/releases/download"
|
|
233
242
|
downloadDir.mkdirs()
|
|
234
243
|
|
|
235
|
-
if (!hasAllFfmpegLibs() || !hasFfmpegHeaders()) {
|
|
244
|
+
if (!sherpaOnnxDisableFfmpeg && (!hasAllFfmpegLibs() || !hasFfmpegHeaders())) {
|
|
236
245
|
def tagFile = file("${project.projectDir.parent}/third_party/ffmpeg_prebuilt/ANDROID_RELEASE_TAG")
|
|
237
246
|
def tag = readReleaseTag(tagFile)
|
|
238
247
|
if (!tag) throw new RuntimeException("Missing or empty third_party/ffmpeg_prebuilt/ANDROID_RELEASE_TAG")
|
|
@@ -256,7 +265,7 @@ project.tasks.register("downloadNativeLibsIfNeeded") {
|
|
|
256
265
|
println "Downloaded and extracted FFmpeg prebuilts (libs + include) from ${tag}"
|
|
257
266
|
}
|
|
258
267
|
|
|
259
|
-
if (!hasAllLibarchiveLibs() || !hasLibarchiveHeaders()) {
|
|
268
|
+
if (!sherpaOnnxDisableLibarchive && (!hasAllLibarchiveLibs() || !hasLibarchiveHeaders())) {
|
|
260
269
|
def tagFile = file("${project.projectDir.parent}/third_party/libarchive_prebuilt/ANDROID_RELEASE_TAG")
|
|
261
270
|
def tag = readReleaseTag(tagFile)
|
|
262
271
|
if (!tag) throw new RuntimeException("Missing or empty third_party/libarchive_prebuilt/ANDROID_RELEASE_TAG")
|
|
@@ -330,10 +339,12 @@ project.tasks.register("checkJniLibs") {
|
|
|
330
339
|
if (!dir.exists()) {
|
|
331
340
|
throw new RuntimeException("Missing native libs for ABI ${abi} in ${dir}. Run third_party/ffmpeg_prebuilt/copy_prebuilts_to_sdk.js, third_party/sherpa-onnx-prebuilt/copy_prebuilts_to_sdk.js, third_party/libarchive_prebuilt/copy_prebuilts_to_sdk.js, or use Maven (com.xdcobra.sherpa:ffmpeg / sherpa-onnx / libarchive), or ensure ANDROID_RELEASE_TAG releases exist.")
|
|
332
341
|
}
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
342
|
+
if (!sherpaOnnxDisableFfmpeg) {
|
|
343
|
+
requiredFfmpegSoFiles.each { soName ->
|
|
344
|
+
def soFile = new File(dir, soName)
|
|
345
|
+
if (!soFile.exists()) {
|
|
346
|
+
throw new RuntimeException("Missing required FFmpeg library '${soName}' for ABI ${abi}. Run third_party/ffmpeg_prebuilt/copy_prebuilts_to_sdk.js or ensure Maven com.xdcobra.sherpa:ffmpeg is available. Or set sherpaOnnxDisableFfmpeg=true in gradle.properties (see docs/disable-ffmpeg.md).")
|
|
347
|
+
}
|
|
337
348
|
}
|
|
338
349
|
}
|
|
339
350
|
requiredSherpaOnnxSoFiles.each { soName ->
|
|
@@ -342,10 +353,12 @@ project.tasks.register("checkJniLibs") {
|
|
|
342
353
|
throw new RuntimeException("Missing required sherpa-onnx library '${soName}' for ABI ${abi}. Run third_party/sherpa-onnx-prebuilt/copy_prebuilts_to_sdk.js")
|
|
343
354
|
}
|
|
344
355
|
}
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
356
|
+
if (!sherpaOnnxDisableLibarchive) {
|
|
357
|
+
requiredLibarchiveSoFiles.each { soName ->
|
|
358
|
+
def soFile = new File(dir, soName)
|
|
359
|
+
if (!soFile.exists()) {
|
|
360
|
+
throw new RuntimeException("Missing required libarchive library '${soName}' for ABI ${abi}. Run third_party/libarchive_prebuilt/copy_prebuilts_to_sdk.js or ensure Maven com.xdcobra.sherpa:libarchive is available. Or set sherpaOnnxDisableLibarchive=true in gradle.properties (see docs/disable-libarchive.md).")
|
|
361
|
+
}
|
|
349
362
|
}
|
|
350
363
|
}
|
|
351
364
|
requiredOnnxruntimeJniSoFiles.each { soName ->
|
|
@@ -25,6 +25,14 @@ endif()
|
|
|
25
25
|
# Calculate path relative to project root: android/src/main/cpp -> android -> project root
|
|
26
26
|
get_filename_component(PROJECT_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/../../.." ABSOLUTE)
|
|
27
27
|
|
|
28
|
+
# Optional: disable FFmpeg (gradle.properties: sherpaOnnxDisableFfmpeg=true). See docs/disable-ffmpeg.md.
|
|
29
|
+
set(USE_FFMPEG ON)
|
|
30
|
+
if(SHERPA_ONNX_DISABLE_FFMPEG)
|
|
31
|
+
set(USE_FFMPEG OFF)
|
|
32
|
+
message(STATUS "FFmpeg disabled (SHERPA_ONNX_DISABLE_FFMPEG=ON). convertAudioToWav16k/convertAudioToFormat will return an error at runtime.")
|
|
33
|
+
endif()
|
|
34
|
+
|
|
35
|
+
if(USE_FFMPEG)
|
|
28
36
|
# FFmpeg: libs and headers from (1) third_party/ffmpeg_prebuilt/android (local build) or (2) jniLibs + cpp/include/ffmpeg (GitHub release zip).
|
|
29
37
|
# PROJECT_ROOT = android/; repo root = PROJECT_ROOT/..
|
|
30
38
|
set(FFMPEG_PREBUILT_BASE "${PROJECT_ROOT}/../third_party/ffmpeg_prebuilt/android")
|
|
@@ -51,6 +59,14 @@ elseif(EXISTS "${FFMPEG_JNILIBS}/libavcodec.so")
|
|
|
51
59
|
else()
|
|
52
60
|
message(FATAL_ERROR "FFmpeg libs missing for ABI ${ANDROID_ABI}. Run third_party/ffmpeg_prebuilt/copy_prebuilts_to_sdk.js or ensure ANDROID_RELEASE_TAG release is downloaded.")
|
|
53
61
|
endif()
|
|
62
|
+
endif(USE_FFMPEG)
|
|
63
|
+
|
|
64
|
+
# Optional: disable libarchive (gradle.properties: sherpaOnnxDisableLibarchive=true). See docs/disable-libarchive.md.
|
|
65
|
+
set(USE_LIBARCHIVE ON)
|
|
66
|
+
if(SHERPA_ONNX_DISABLE_LIBARCHIVE)
|
|
67
|
+
set(USE_LIBARCHIVE OFF)
|
|
68
|
+
message(STATUS "libarchive disabled (SHERPA_ONNX_DISABLE_LIBARCHIVE=ON). extractTarBz2/cancelExtractTarBz2 will return an error at runtime.")
|
|
69
|
+
endif()
|
|
54
70
|
|
|
55
71
|
# Source files by domain (see docs/NATIVE_NAMING_CONVENTION.md). Move .cpp into subdirs; .h go alongside for include path.
|
|
56
72
|
set(SOURCES
|
|
@@ -68,6 +84,7 @@ set(SOURCES
|
|
|
68
84
|
crypto/sha256.cpp
|
|
69
85
|
)
|
|
70
86
|
|
|
87
|
+
if(USE_LIBARCHIVE)
|
|
71
88
|
# libarchive: (1) prebuilt from third_party/libarchive_prebuilt or jniLibs + cpp/include/libarchive (Maven/GitHub), or (2) build from source.
|
|
72
89
|
set(LIBARCHIVE_PREBUILT_BASE "${PROJECT_ROOT}/../third_party/libarchive_prebuilt/android")
|
|
73
90
|
set(LIBARCHIVE_PREBUILT_LIB "${LIBARCHIVE_PREBUILT_BASE}/${ANDROID_ABI}/lib")
|
|
@@ -96,6 +113,7 @@ else()
|
|
|
96
113
|
add_subdirectory(${PROJECT_ROOT}/../third_party/libarchive ${CMAKE_BINARY_DIR}/libarchive)
|
|
97
114
|
message(STATUS "libarchive built successfully")
|
|
98
115
|
endif()
|
|
116
|
+
endif(USE_LIBARCHIVE)
|
|
99
117
|
|
|
100
118
|
# Create shared library
|
|
101
119
|
add_library(sherpaonnx SHARED
|
|
@@ -105,7 +123,7 @@ add_library(sherpaonnx SHARED
|
|
|
105
123
|
# sherpa-onnx C-API: link by directory + library name only (no IMPORTED target).
|
|
106
124
|
# The .so and headers land in jniLibs and cpp/include/sherpa-onnx via:
|
|
107
125
|
# (1) local third_party build, (2) Maven AAR extraction (libs + c-api headers), or (3) GitHub release zip (last resort).
|
|
108
|
-
# If we used IMPORTED here, AGP would also copy .so from CMake
|
|
126
|
+
# If we used IMPORTED here, AGP would also copy .so from CMake --> duplicate in mergeNativeLibs.
|
|
109
127
|
set(SHERPA_ONNX_PREBUILT_BASE "${PROJECT_ROOT}/../third_party/sherpa-onnx-prebuilt/android")
|
|
110
128
|
set(SHERPA_ONNX_ABI_LIB "${SHERPA_ONNX_PREBUILT_BASE}/${ANDROID_ABI}/lib")
|
|
111
129
|
set(SHERPA_C_API_LIB_DIR "")
|
|
@@ -130,33 +148,49 @@ target_include_directories(sherpaonnx PRIVATE
|
|
|
130
148
|
${CMAKE_CURRENT_SOURCE_DIR}/jni/audio
|
|
131
149
|
${CMAKE_CURRENT_SOURCE_DIR}/jni/tts
|
|
132
150
|
${CMAKE_CURRENT_SOURCE_DIR}/include
|
|
133
|
-
${FFMPEG_INCLUDE_DIR}
|
|
134
151
|
)
|
|
135
|
-
if(
|
|
152
|
+
if(USE_FFMPEG)
|
|
153
|
+
target_include_directories(sherpaonnx PRIVATE ${FFMPEG_INCLUDE_DIR})
|
|
154
|
+
endif()
|
|
155
|
+
if(USE_LIBARCHIVE AND USE_LIBARCHIVE_PREBUILT AND LIBARCHIVE_INCLUDE_DIR)
|
|
136
156
|
target_include_directories(sherpaonnx PRIVATE ${LIBARCHIVE_INCLUDE_DIR})
|
|
137
157
|
endif()
|
|
138
|
-
|
|
158
|
+
# When USE_FFMPEG=OFF, do NOT define HAVE_FFMPEG at all. Then #ifdef HAVE_FFMPEG is false
|
|
159
|
+
# in C++ and the stub (#else) branches are used — no FFmpeg headers needed.
|
|
160
|
+
if(USE_FFMPEG)
|
|
161
|
+
target_compile_definitions(sherpaonnx PRIVATE HAVE_FFMPEG=1)
|
|
162
|
+
endif()
|
|
163
|
+
# When USE_LIBARCHIVE=OFF, do NOT define HAVE_LIBARCHIVE; extractTarBz2 stubs return an error.
|
|
164
|
+
if(USE_LIBARCHIVE)
|
|
165
|
+
target_compile_definitions(sherpaonnx PRIVATE HAVE_LIBARCHIVE=1)
|
|
166
|
+
endif()
|
|
139
167
|
|
|
140
168
|
# Link libraries (Kotlin API from AAR handles STT/TTS; C-API only for Zipvoice)
|
|
141
|
-
|
|
142
|
-
|
|
169
|
+
if(USE_FFMPEG)
|
|
170
|
+
target_link_directories(sherpaonnx PRIVATE ${FFMPEG_LIB_DIR})
|
|
171
|
+
endif()
|
|
172
|
+
if(USE_LIBARCHIVE AND USE_LIBARCHIVE_PREBUILT AND LIBARCHIVE_LIB_DIR)
|
|
143
173
|
target_link_directories(sherpaonnx PRIVATE ${LIBARCHIVE_LIB_DIR})
|
|
144
174
|
endif()
|
|
145
175
|
if(SHERPA_C_API_LIB_DIR)
|
|
146
176
|
target_link_directories(sherpaonnx PRIVATE ${SHERPA_C_API_LIB_DIR})
|
|
147
177
|
target_link_libraries(sherpaonnx PRIVATE sherpa-onnx-c-api)
|
|
148
178
|
endif()
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
)
|
|
179
|
+
# Link archive only when USE_LIBARCHIVE (otherwise libsherpaonnx.so would depend on libarchive.so at load time).
|
|
180
|
+
if(USE_FFMPEG)
|
|
181
|
+
target_link_libraries(sherpaonnx
|
|
182
|
+
PRIVATE
|
|
183
|
+
avcodec
|
|
184
|
+
avformat
|
|
185
|
+
avutil
|
|
186
|
+
avfilter
|
|
187
|
+
swresample
|
|
188
|
+
)
|
|
189
|
+
endif()
|
|
190
|
+
if(USE_LIBARCHIVE)
|
|
191
|
+
target_link_libraries(sherpaonnx PRIVATE archive)
|
|
192
|
+
endif()
|
|
193
|
+
target_link_libraries(sherpaonnx PRIVATE log android)
|
|
160
194
|
|
|
161
195
|
# Compiler flags for our library
|
|
162
196
|
target_compile_options(sherpaonnx PRIVATE
|
|
@@ -6,8 +6,10 @@
|
|
|
6
6
|
*/
|
|
7
7
|
#include "sherpa-onnx-archive-helper.h"
|
|
8
8
|
|
|
9
|
+
#ifdef HAVE_LIBARCHIVE
|
|
9
10
|
#include <archive.h>
|
|
10
11
|
#include <archive_entry.h>
|
|
12
|
+
#endif
|
|
11
13
|
#include <array>
|
|
12
14
|
#include <atomic>
|
|
13
15
|
#include <cerrno>
|
|
@@ -23,6 +25,7 @@
|
|
|
23
25
|
std::atomic<bool> ArchiveHelper::cancel_requested_(false);
|
|
24
26
|
|
|
25
27
|
namespace {
|
|
28
|
+
#ifdef HAVE_LIBARCHIVE
|
|
26
29
|
struct ArchiveReadContext {
|
|
27
30
|
FILE* file = nullptr;
|
|
28
31
|
std::array<unsigned char, 64 * 1024> buffer{};
|
|
@@ -72,6 +75,7 @@ static void DrainRemainingAndClose(ArchiveReadContext* ctx) {
|
|
|
72
75
|
fclose(ctx->file);
|
|
73
76
|
ctx->file = nullptr;
|
|
74
77
|
}
|
|
78
|
+
#endif // HAVE_LIBARCHIVE
|
|
75
79
|
|
|
76
80
|
static std::string ToHex(const unsigned char* data, size_t size) {
|
|
77
81
|
static const char* kHex = "0123456789abcdef";
|
|
@@ -103,6 +107,15 @@ bool ArchiveHelper::ExtractTarBz2(
|
|
|
103
107
|
std::string* out_sha256) {
|
|
104
108
|
cancel_requested_.store(false);
|
|
105
109
|
|
|
110
|
+
#ifndef HAVE_LIBARCHIVE
|
|
111
|
+
(void)source_path;
|
|
112
|
+
(void)target_path;
|
|
113
|
+
(void)force;
|
|
114
|
+
(void)on_progress;
|
|
115
|
+
(void)out_sha256;
|
|
116
|
+
if (out_error) *out_error = "libarchive not available. Build with libarchive or set sherpaOnnxDisableLibarchive=false in gradle.properties. See docs/disable-libarchive.md.";
|
|
117
|
+
return false;
|
|
118
|
+
#else
|
|
106
119
|
// Validate source file exists
|
|
107
120
|
if (!std::filesystem::exists(source_path)) {
|
|
108
121
|
if (out_error) *out_error = "Source file does not exist";
|
|
@@ -360,6 +373,7 @@ bool ArchiveHelper::ExtractTarBz2(
|
|
|
360
373
|
}
|
|
361
374
|
|
|
362
375
|
return true;
|
|
376
|
+
#endif // HAVE_LIBARCHIVE
|
|
363
377
|
}
|
|
364
378
|
|
|
365
379
|
bool ArchiveHelper::ComputeFileSha256(
|
|
@@ -419,5 +419,21 @@ std::string FindLargestOnnxExcludingTokens(
|
|
|
419
419
|
return ChooseLargest(files, excludeTokens, false, false);
|
|
420
420
|
}
|
|
421
421
|
|
|
422
|
+
bool ContainsWord(const std::string& haystack, const std::string& word) {
|
|
423
|
+
if (word.empty()) return false;
|
|
424
|
+
size_t pos = 0;
|
|
425
|
+
auto isSep = [](char c) {
|
|
426
|
+
return c == '\0' || c == '/' || c == '-' || c == '_' || c == '.' || c == ' ';
|
|
427
|
+
};
|
|
428
|
+
while ((pos = haystack.find(word, pos)) != std::string::npos) {
|
|
429
|
+
char before = (pos == 0) ? '\0' : haystack[pos - 1];
|
|
430
|
+
size_t afterPos = pos + word.size();
|
|
431
|
+
char after = (afterPos >= haystack.size()) ? '\0' : haystack[afterPos];
|
|
432
|
+
if (isSep(before) && isSep(after)) return true;
|
|
433
|
+
pos++;
|
|
434
|
+
}
|
|
435
|
+
return false;
|
|
436
|
+
}
|
|
437
|
+
|
|
422
438
|
} // namespace model_detect
|
|
423
439
|
} // namespace sherpaonnx
|
|
@@ -49,6 +49,9 @@ std::string FindLargestOnnxExcludingTokens(
|
|
|
49
49
|
const std::vector<std::string>& excludeTokens
|
|
50
50
|
);
|
|
51
51
|
|
|
52
|
+
/** Returns true if \p word appears in \p haystack as a standalone token (surrounded by separators: / - _ . space). */
|
|
53
|
+
bool ContainsWord(const std::string& haystack, const std::string& word);
|
|
54
|
+
|
|
52
55
|
} // namespace model_detect
|
|
53
56
|
} // namespace sherpaonnx
|
|
54
57
|
|
|
@@ -35,6 +35,7 @@ SttModelKind ParseSttModelType(const std::string& modelType) {
|
|
|
35
35
|
if (modelType == "omnilingual") return SttModelKind::kOmnilingual;
|
|
36
36
|
if (modelType == "medasr") return SttModelKind::kMedAsr;
|
|
37
37
|
if (modelType == "telespeech_ctc") return SttModelKind::kTeleSpeechCtc;
|
|
38
|
+
if (modelType == "tone_ctc") return SttModelKind::kToneCtc;
|
|
38
39
|
return SttModelKind::kUnknown;
|
|
39
40
|
}
|
|
40
41
|
|
|
@@ -163,6 +164,10 @@ SttDetectResult DetectSttModel(
|
|
|
163
164
|
bool isLikelyOmnilingual = modelDirLower.find("omnilingual") != std::string::npos;
|
|
164
165
|
bool isLikelyMedAsr = modelDirLower.find("medasr") != std::string::npos;
|
|
165
166
|
bool isLikelyTeleSpeech = modelDirLower.find("telespeech") != std::string::npos;
|
|
167
|
+
// Tone CTC: match "tone" only as standalone word (not e.g. "cantonese"); also accept "t-one" / "t_one"
|
|
168
|
+
bool isLikelyToneCtc = modelDirLower.find("t-one") != std::string::npos ||
|
|
169
|
+
modelDirLower.find("t_one") != std::string::npos ||
|
|
170
|
+
model_detect::ContainsWord(modelDirLower, "tone");
|
|
166
171
|
|
|
167
172
|
bool hasMoonshine = !moonshinePreprocessor.empty() && !moonshineUncachedDecoder.empty() &&
|
|
168
173
|
!moonshineCachedDecoder.empty() && !moonshineEncoder.empty();
|
|
@@ -173,6 +178,7 @@ SttDetectResult DetectSttModel(
|
|
|
173
178
|
bool hasOmnilingual = !ctcModelPath.empty() && isLikelyOmnilingual;
|
|
174
179
|
bool hasMedAsr = !ctcModelPath.empty() && isLikelyMedAsr;
|
|
175
180
|
bool hasTeleSpeechCtc = (!ctcModelPath.empty() || !paraformerModelPath.empty()) && isLikelyTeleSpeech;
|
|
181
|
+
bool hasToneCtc = !ctcModelPath.empty() && isLikelyToneCtc;
|
|
176
182
|
|
|
177
183
|
if (hasTransducer) {
|
|
178
184
|
if (isLikelyNemo || isLikelyTdt) {
|
|
@@ -224,6 +230,9 @@ SttDetectResult DetectSttModel(
|
|
|
224
230
|
if (hasTeleSpeechCtc) {
|
|
225
231
|
result.detectedModels.push_back({"telespeech_ctc", modelDir});
|
|
226
232
|
}
|
|
233
|
+
if (hasToneCtc) {
|
|
234
|
+
result.detectedModels.push_back({"tone_ctc", modelDir});
|
|
235
|
+
}
|
|
227
236
|
|
|
228
237
|
SttModelKind selected = SttModelKind::kUnknown;
|
|
229
238
|
|
|
@@ -247,7 +256,8 @@ SttDetectResult DetectSttModel(
|
|
|
247
256
|
return result;
|
|
248
257
|
}
|
|
249
258
|
if ((selected == SttModelKind::kNemoCtc || selected == SttModelKind::kWenetCtc ||
|
|
250
|
-
selected == SttModelKind::kSenseVoice || selected == SttModelKind::kZipformerCtc
|
|
259
|
+
selected == SttModelKind::kSenseVoice || selected == SttModelKind::kZipformerCtc ||
|
|
260
|
+
selected == SttModelKind::kToneCtc) &&
|
|
251
261
|
ctcModelPath.empty()) {
|
|
252
262
|
result.error = "CTC model requested but model file not found in " + modelDir;
|
|
253
263
|
return result;
|
|
@@ -288,6 +298,10 @@ SttDetectResult DetectSttModel(
|
|
|
288
298
|
result.error = "TeleSpeech CTC model requested but model not found in " + modelDir;
|
|
289
299
|
return result;
|
|
290
300
|
}
|
|
301
|
+
if (selected == SttModelKind::kToneCtc && !hasToneCtc) {
|
|
302
|
+
result.error = "Tone CTC model requested but path does not contain 'tone' (as a word), 't-one', or 't_one' (e.g. sherpa-onnx-streaming-t-one-*) in " + modelDir;
|
|
303
|
+
return result;
|
|
304
|
+
}
|
|
291
305
|
} else {
|
|
292
306
|
if (hasTransducer) {
|
|
293
307
|
selected = (isLikelyNemo || isLikelyTdt) ? SttModelKind::kNemoTransducer : SttModelKind::kTransducer;
|
|
@@ -321,6 +335,8 @@ SttDetectResult DetectSttModel(
|
|
|
321
335
|
selected = SttModelKind::kMedAsr;
|
|
322
336
|
} else if (hasTeleSpeechCtc) {
|
|
323
337
|
selected = SttModelKind::kTeleSpeechCtc;
|
|
338
|
+
} else if (hasToneCtc) {
|
|
339
|
+
selected = SttModelKind::kToneCtc;
|
|
324
340
|
} else if (!ctcModelPath.empty()) {
|
|
325
341
|
selected = SttModelKind::kZipformerCtc;
|
|
326
342
|
}
|
|
@@ -346,7 +362,8 @@ SttDetectResult DetectSttModel(
|
|
|
346
362
|
} else if (selected == SttModelKind::kParaformer) {
|
|
347
363
|
result.paths.paraformerModel = paraformerModelPath;
|
|
348
364
|
} else if (selected == SttModelKind::kNemoCtc || selected == SttModelKind::kWenetCtc ||
|
|
349
|
-
selected == SttModelKind::kSenseVoice || selected == SttModelKind::kZipformerCtc
|
|
365
|
+
selected == SttModelKind::kSenseVoice || selected == SttModelKind::kZipformerCtc ||
|
|
366
|
+
selected == SttModelKind::kToneCtc) {
|
|
350
367
|
result.paths.ctcModel = ctcModelPath;
|
|
351
368
|
} else if (selected == SttModelKind::kWhisper) {
|
|
352
369
|
result.paths.whisperEncoder = encoderPath;
|
|
@@ -30,6 +30,7 @@ const char* SttModelKindToString(SttModelKind k) {
|
|
|
30
30
|
case SttModelKind::kOmnilingual: return "omnilingual";
|
|
31
31
|
case SttModelKind::kMedAsr: return "medasr";
|
|
32
32
|
case SttModelKind::kTeleSpeechCtc: return "telespeech_ctc";
|
|
33
|
+
case SttModelKind::kToneCtc: return "tone_ctc";
|
|
33
34
|
default: return "unknown";
|
|
34
35
|
}
|
|
35
36
|
}
|