react-native-sherpa-onnx 0.3.8 → 0.3.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -2
- package/SherpaOnnx.podspec +4 -1
- package/android/prebuilt-download.gradle +23 -23
- package/android/src/main/assets/model_licenses/asr-models-license-status.csv +1 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.cpp +23 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.h +9 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-stt.cpp +51 -8
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect.h +10 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.cpp +5 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-stt.cpp +11 -0
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxArchiveHelper.kt +110 -35
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxExtractionNotificationHelper.kt +102 -0
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +92 -18
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxSttHelper.kt +22 -0
- package/ios/Resources/model_licenses/asr-models-license-status.csv +1 -0
- package/ios/SherpaOnnx+STT.mm +13 -1
- package/ios/SherpaOnnx.mm +87 -17
- package/ios/model_detect/sherpa-onnx-model-detect-helper.h +5 -0
- package/ios/model_detect/sherpa-onnx-model-detect-helper.mm +23 -0
- package/ios/model_detect/sherpa-onnx-model-detect-stt.mm +51 -7
- package/ios/model_detect/sherpa-onnx-model-detect.h +10 -0
- package/ios/model_detect/sherpa-onnx-validate-stt.mm +11 -0
- package/ios/stt/sherpa-onnx-stt-wrapper.h +11 -1
- package/ios/stt/sherpa-onnx-stt-wrapper.mm +30 -2
- package/ios/tts/sherpa-onnx-tts-wrapper.mm +16 -0
- package/lib/module/NativeSherpaOnnx.js.map +1 -1
- package/lib/module/download/postDownloadProcessing.js +17 -4
- package/lib/module/download/postDownloadProcessing.js.map +1 -1
- package/lib/module/extraction/extractTarBz2.js +2 -2
- package/lib/module/extraction/extractTarBz2.js.map +1 -1
- package/lib/module/extraction/extractTarZst.js +2 -2
- package/lib/module/extraction/extractTarZst.js.map +1 -1
- package/lib/module/extraction/index.js +10 -5
- package/lib/module/extraction/index.js.map +1 -1
- package/lib/module/stt/index.js +4 -2
- package/lib/module/stt/index.js.map +1 -1
- package/lib/module/stt/streaming.js +2 -1
- package/lib/module/stt/streaming.js.map +1 -1
- package/lib/module/stt/types.js +3 -1
- package/lib/module/stt/types.js.map +1 -1
- package/lib/module/tts/index.js +4 -2
- package/lib/module/tts/index.js.map +1 -1
- package/lib/module/tts/streaming.js +3 -1
- package/lib/module/tts/streaming.js.map +1 -1
- package/lib/typescript/src/NativeSherpaOnnx.d.ts +25 -9
- package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
- package/lib/typescript/src/download/postDownloadProcessing.d.ts +9 -0
- package/lib/typescript/src/download/postDownloadProcessing.d.ts.map +1 -1
- package/lib/typescript/src/extraction/extractTarBz2.d.ts +2 -1
- package/lib/typescript/src/extraction/extractTarBz2.d.ts.map +1 -1
- package/lib/typescript/src/extraction/extractTarZst.d.ts +2 -1
- package/lib/typescript/src/extraction/extractTarZst.d.ts.map +1 -1
- package/lib/typescript/src/extraction/index.d.ts +1 -1
- package/lib/typescript/src/extraction/index.d.ts.map +1 -1
- package/lib/typescript/src/extraction/types.d.ts +12 -0
- package/lib/typescript/src/extraction/types.d.ts.map +1 -1
- package/lib/typescript/src/stt/index.d.ts +1 -1
- package/lib/typescript/src/stt/index.d.ts.map +1 -1
- package/lib/typescript/src/stt/streaming.d.ts.map +1 -1
- package/lib/typescript/src/stt/types.d.ts +16 -1
- package/lib/typescript/src/stt/types.d.ts.map +1 -1
- package/lib/typescript/src/tts/index.d.ts.map +1 -1
- package/lib/typescript/src/tts/streaming.d.ts.map +1 -1
- package/package.json +1 -1
- package/scripts/ci/update_model_license_csv.sh +16 -16
- package/src/NativeSherpaOnnx.ts +37 -10
- package/src/download/postDownloadProcessing.ts +24 -1
- package/src/extraction/extractTarBz2.ts +7 -2
- package/src/extraction/extractTarZst.ts +7 -2
- package/src/extraction/index.ts +29 -6
- package/src/extraction/types.ts +16 -0
- package/src/stt/index.ts +8 -7
- package/src/stt/streaming.ts +7 -1
- package/src/stt/types.ts +18 -0
- package/src/tts/index.ts +7 -7
- package/src/tts/streaming.ts +6 -3
- package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -1
- package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -1
package/README.md
CHANGED
|
@@ -126,8 +126,8 @@ Full step-by-step: [Download manager – Setup (iOS & Android)](docs/download-ma
|
|
|
126
126
|
| Model quantization | ✅ **Supported** | [Model setup](./docs/model-setup.md) | Automatic detection and preference for quantized (int8) models. |
|
|
127
127
|
| Flexible model loading | ✅ **Supported** | [Model setup](./docs/model-setup.md) | Asset models, file system models, or auto-detection. |
|
|
128
128
|
| TypeScript | ✅ **Supported** | — | Full type definitions included. |
|
|
129
|
-
|
|
|
130
|
-
|
|
|
129
|
+
| Speech Enhancement | ❌ Not yet supported | [Enhancement](./docs/enhancement.md) | Scheduled for release 0.4.0 |
|
|
130
|
+
| Speaker Diarization | ❌ Not yet supported | [Diarization](./docs/diarization.md) | Scheduled for release 0.5.0 |
|
|
131
131
|
| Source Separation | ❌ Not yet supported | [Separation](./docs/separation.md) | Scheduled for release 0.6.0 |
|
|
132
132
|
| VAD (Voice Activity Detection) | ❌ Not yet supported | [VAD](./docs/vad.md) | Scheduled for release 0.7.0 |
|
|
133
133
|
|
|
@@ -148,6 +148,7 @@ Full step-by-step: [Download manager – Setup (iOS & Android)](docs/download-ma
|
|
|
148
148
|
|
|
149
149
|
| Model Type | `modelType` Value | Description | Download Links |
|
|
150
150
|
| ------------------------ | ----------------- | ---------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------ |
|
|
151
|
+
| **Auto Detect** | `'auto'` | Automatically detects model layout/type from files in the model folder and picks the best supported STT type. | n/a |
|
|
151
152
|
| **Zipformer/Transducer** | `'transducer'` | Encoder–decoder–joiner (e.g. icefall). Good balance of speed and accuracy. Folder name should contain **zipformer** or **transducer** for auto-detection. | [Download](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/index.html) |
|
|
152
153
|
| **LSTM Transducer** | `'transducer'` | Same layout as Zipformer (encoder–decoder–joiner). LSTM-based streaming ASR; detected as transducer. Folder name may contain **lstm**. | [Download](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/lstm-transducer-models.html) |
|
|
153
154
|
| **Paraformer** | `'paraformer'` | Single-model non-autoregressive ASR; fast and accurate. Detected by `model.onnx`; no folder token required. | [Download](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/index.html) |
|
|
@@ -172,6 +173,7 @@ For **real-time (streaming) recognition** from a microphone or audio stream, use
|
|
|
172
173
|
|
|
173
174
|
| Model Type | `modelType` Value | Description | Download Links |
|
|
174
175
|
| ---------------- | ----------------- | ---------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- |
|
|
176
|
+
| **Auto Detect** | `'auto'` | Automatically detects the TTS model layout from files in the model folder and selects the matching supported type. | n/a |
|
|
175
177
|
| **VITS** | `'vits'` | Fast, high-quality TTS (Piper, Coqui, MeloTTS, MMS). Folder name should contain **vits** if used with other voice models. | [Download](https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models) |
|
|
176
178
|
| **Matcha** | `'matcha'` | High-quality acoustic model + vocoder. Detected by acoustic_model + vocoder; no folder token required. | [Download](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html) |
|
|
177
179
|
| **Kokoro** | `'kokoro'` | Multi-speaker, multi-language. Folder name should contain **kokoro** (not kitten) for auto-detection. | [Download](https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models) |
|
package/SherpaOnnx.podspec
CHANGED
|
@@ -140,7 +140,10 @@ Pod::Spec.new do |s|
|
|
|
140
140
|
s.libraries = "c++", "z", "iconv", "bz2"
|
|
141
141
|
|
|
142
142
|
# Per-release-model license metadata (synced from CI; same CSV as android/src/main/assets/model_licenses/).
|
|
143
|
-
|
|
143
|
+
# Use resource_bundles so assets are packaged reliably across CocoaPods integration modes.
|
|
144
|
+
s.resource_bundles = {
|
|
145
|
+
"SherpaOnnxResources" => ["ios/Resources/model_licenses/*.csv"]
|
|
146
|
+
}
|
|
144
147
|
|
|
145
148
|
install_modules_dependencies(s)
|
|
146
149
|
end
|
|
@@ -222,14 +222,14 @@ project.tasks.register("downloadNativeLibsIfNeeded") {
|
|
|
222
222
|
sherpaVersionFile.text = currentSherpaVersion
|
|
223
223
|
sherpaResolved = true
|
|
224
224
|
println "[sherpa-onnx] jniLibs (*.so per ABI) .............. MAVEN_AAR ${aar.name}"
|
|
225
|
-
println "[sherpa-onnx] install: jni/<abi>/*.so
|
|
226
|
-
println "[sherpa-onnx] C headers (sherpa-onnx) ............. ${copiedHeaders ? 'MAVEN_AAR c-api/**
|
|
225
|
+
println "[sherpa-onnx] install: jni/<abi>/*.so --> ${jniLibsHuman}/<abi>/"
|
|
226
|
+
println "[sherpa-onnx] C headers (sherpa-onnx) ............. ${copiedHeaders ? 'MAVEN_AAR c-api/** --> ' + includeSherpaDir : 'unchanged (no c-api/ in AAR; existing tree kept)'}"
|
|
227
227
|
println "[sherpa-onnx] version stamp ...................... written ${sherpaVersionFile.name}=${currentSherpaVersion}"
|
|
228
228
|
} else {
|
|
229
|
-
println "[sherpa-onnx] MAVEN_AAR: sherpaOnnxAar empty
|
|
229
|
+
println "[sherpa-onnx] MAVEN_AAR: sherpaOnnxAar empty --> trying GITHUB_RELEASE"
|
|
230
230
|
}
|
|
231
231
|
} catch (Exception e) {
|
|
232
|
-
println "[sherpa-onnx] MAVEN_AAR failed: ${e.message}
|
|
232
|
+
println "[sherpa-onnx] MAVEN_AAR failed: ${e.message} --> trying GITHUB_RELEASE"
|
|
233
233
|
}
|
|
234
234
|
}
|
|
235
235
|
} else {
|
|
@@ -290,14 +290,14 @@ project.tasks.register("downloadNativeLibsIfNeeded") {
|
|
|
290
290
|
ffmpegVersionFile.text = currentFfmpegVersion
|
|
291
291
|
ffmpegResolved = true
|
|
292
292
|
println "[FFmpeg] jniLibs .................................. MAVEN_AAR ${aar.name}"
|
|
293
|
-
println "[FFmpeg] install: jni/<abi>/*.so
|
|
294
|
-
println "[FFmpeg] C headers ................................ ${copiedHdr ? 'MAVEN_AAR include/**
|
|
293
|
+
println "[FFmpeg] install: jni/<abi>/*.so --> ${jniLibsHuman}/<abi>/"
|
|
294
|
+
println "[FFmpeg] C headers ................................ ${copiedHdr ? 'MAVEN_AAR include/** --> ' + ffmpegIncludeDir : 'unchanged (no include/ in AAR)'}"
|
|
295
295
|
println "[FFmpeg] version stamp ............................ written ${ffmpegVersionFile.name}=${currentFfmpegVersion}"
|
|
296
296
|
} else {
|
|
297
|
-
println "[FFmpeg] MAVEN_AAR: ffmpegAar empty
|
|
297
|
+
println "[FFmpeg] MAVEN_AAR: ffmpegAar empty --> trying GITHUB_RELEASE"
|
|
298
298
|
}
|
|
299
299
|
} catch (Exception e) {
|
|
300
|
-
println "[FFmpeg] MAVEN_AAR failed: ${e.message}
|
|
300
|
+
println "[FFmpeg] MAVEN_AAR failed: ${e.message} --> trying GITHUB_RELEASE"
|
|
301
301
|
}
|
|
302
302
|
}
|
|
303
303
|
} else {
|
|
@@ -358,14 +358,14 @@ project.tasks.register("downloadNativeLibsIfNeeded") {
|
|
|
358
358
|
libarchiveVersionFile.text = currentLibarchiveVersion
|
|
359
359
|
libarchiveResolved = true
|
|
360
360
|
println "[libarchive] jniLibs .............................. MAVEN_AAR ${aar.name}"
|
|
361
|
-
println "[libarchive] install: jni/<abi>/*.so
|
|
362
|
-
println "[libarchive] C headers ............................ ${copiedHdr ? 'MAVEN_AAR include/**
|
|
361
|
+
println "[libarchive] install: jni/<abi>/*.so --> ${jniLibsHuman}/<abi>/"
|
|
362
|
+
println "[libarchive] C headers ............................ ${copiedHdr ? 'MAVEN_AAR include/** --> ' + libarchiveIncludeDir : 'unchanged (no include/ in AAR)'}"
|
|
363
363
|
println "[libarchive] version stamp ........................ written ${libarchiveVersionFile.name}=${currentLibarchiveVersion}"
|
|
364
364
|
} else {
|
|
365
|
-
println "[libarchive] MAVEN_AAR: libarchiveAar empty
|
|
365
|
+
println "[libarchive] MAVEN_AAR: libarchiveAar empty --> trying GITHUB_RELEASE"
|
|
366
366
|
}
|
|
367
367
|
} catch (Exception e) {
|
|
368
|
-
println "[libarchive] MAVEN_AAR failed: ${e.message}
|
|
368
|
+
println "[libarchive] MAVEN_AAR failed: ${e.message} --> trying GITHUB_RELEASE"
|
|
369
369
|
}
|
|
370
370
|
}
|
|
371
371
|
} else {
|
|
@@ -415,7 +415,7 @@ project.tasks.register("downloadNativeLibsIfNeeded") {
|
|
|
415
415
|
}
|
|
416
416
|
ortJniResolved = true
|
|
417
417
|
println "[onnxruntime] libonnxruntime4j_jni.so .......... MAVEN_AAR ${aar.name}"
|
|
418
|
-
println "[onnxruntime] install: per ABI
|
|
418
|
+
println "[onnxruntime] install: per ABI --> ${jniLibsHuman}/<abi>/ (only JNI bridge; libonnxruntime.so from sherpa prebuilts)"
|
|
419
419
|
} else {
|
|
420
420
|
println "[onnxruntime] MAVEN_AAR: onnxruntimeAar empty — libonnxruntime4j_jni.so still missing"
|
|
421
421
|
}
|
|
@@ -436,7 +436,7 @@ project.tasks.register("downloadNativeLibsIfNeeded") {
|
|
|
436
436
|
def needLibarchive = !sherpaOnnxDisableLibarchive && !libarchiveResolved
|
|
437
437
|
def needSherpa = !sherpaResolved
|
|
438
438
|
if (needFfmpeg || needLibarchive || needSherpa) {
|
|
439
|
-
println "[prebuilt] GITHUB_RELEASE: skipped (no repo). Set -PprebuiltGitHubRepo=owner/repo or git remote origin
|
|
439
|
+
println "[prebuilt] GITHUB_RELEASE: skipped (no repo). Set -PprebuiltGitHubRepo=owner/repo or git remote origin --> github.com"
|
|
440
440
|
println "[prebuilt] still need: sherpa=${needSherpa}, ffmpeg=${needFfmpeg}, libarchive=${needLibarchive}"
|
|
441
441
|
def diag = [
|
|
442
442
|
"prebuiltGitHubRepo=${project.findProperty('prebuiltGitHubRepo') ?: '(not set)'}",
|
|
@@ -492,7 +492,7 @@ project.tasks.register("downloadNativeLibsIfNeeded") {
|
|
|
492
492
|
ffmpegVersionFile.text = currentFfmpegVersion
|
|
493
493
|
println "[FFmpeg] jniLibs + C headers ...................... GITHUB_RELEASE tag=${tag}"
|
|
494
494
|
println "[FFmpeg] url: ${url}"
|
|
495
|
-
println "[FFmpeg] install: <abi>/*.so
|
|
495
|
+
println "[FFmpeg] install: <abi>/*.so --> ${jniLibsHuman}/"
|
|
496
496
|
println "[FFmpeg] version stamp .......................... written ${ffmpegVersionFile.name}=${currentFfmpegVersion}"
|
|
497
497
|
}
|
|
498
498
|
|
|
@@ -522,7 +522,7 @@ project.tasks.register("downloadNativeLibsIfNeeded") {
|
|
|
522
522
|
libarchiveVersionFile.text = currentLibarchiveVersion
|
|
523
523
|
println "[libarchive] jniLibs + C headers .................. GITHUB_RELEASE tag=${tag}"
|
|
524
524
|
println "[libarchive] url: ${url}"
|
|
525
|
-
println "[libarchive] install: <abi>/*.so
|
|
525
|
+
println "[libarchive] install: <abi>/*.so --> ${jniLibsHuman}/"
|
|
526
526
|
println "[libarchive] version stamp ........................ written ${libarchiveVersionFile.name}=${currentLibarchiveVersion}"
|
|
527
527
|
}
|
|
528
528
|
|
|
@@ -560,8 +560,8 @@ project.tasks.register("downloadNativeLibsIfNeeded") {
|
|
|
560
560
|
sherpaVersionFile.text = currentSherpaVersion
|
|
561
561
|
println "[sherpa-onnx] jniLibs + C headers ................. GITHUB_RELEASE tag=${tag}"
|
|
562
562
|
println "[sherpa-onnx] url: ${url}"
|
|
563
|
-
println "[sherpa-onnx] install: <abi>/*.so
|
|
564
|
-
println "[sherpa-onnx] classes.jar ......................... ${sherpaJavaJar.exists() ? 'GITHUB_RELEASE
|
|
563
|
+
println "[sherpa-onnx] install: <abi>/*.so --> ${jniLibsHuman}/"
|
|
564
|
+
println "[sherpa-onnx] classes.jar ......................... ${sherpaJavaJar.exists() ? 'GITHUB_RELEASE --> ' + sherpaOnnxClassesDir : 'not in zip (use extractSherpaOnnxClasses)'}"
|
|
565
565
|
println "[sherpa-onnx] version stamp ...................... written ${sherpaVersionFile.name}=${currentSherpaVersion}"
|
|
566
566
|
}
|
|
567
567
|
println ""
|
|
@@ -612,7 +612,7 @@ project.afterEvaluate {
|
|
|
612
612
|
project.tasks.findByName('preBuild')?.dependsOn(project.tasks.findByName('checkJniLibs'))
|
|
613
613
|
}
|
|
614
614
|
|
|
615
|
-
// sherpa-onnx classes.jar: resolution order THIRD_PARTY
|
|
615
|
+
// sherpa-onnx classes.jar: resolution order THIRD_PARTY --> MAVEN_AAR --> GITHUB_EXTRACT
|
|
616
616
|
def sherpaLocalJar = file("${project.projectDir.parent}/third_party/sherpa-onnx-prebuilt/android/java/classes.jar")
|
|
617
617
|
def sherpaExtractedJar = file("${project.buildDir}/prebuilt-downloads/sherpa-onnx-extract/java/classes.jar")
|
|
618
618
|
|
|
@@ -627,7 +627,7 @@ project.tasks.register("extractSherpaOnnxClasses") {
|
|
|
627
627
|
copy { from sherpaLocalJar; into sherpaOnnxClassesDir }
|
|
628
628
|
println "[prebuilt] extractSherpaOnnxClasses"
|
|
629
629
|
println "[sherpa-onnx] classes.jar (Kotlin API) .......... THIRD_PARTY"
|
|
630
|
-
println "[sherpa-onnx] ${sherpaLocalJar.absolutePath}
|
|
630
|
+
println "[sherpa-onnx] ${sherpaLocalJar.absolutePath} --> ${sherpaOnnxClassesDir}"
|
|
631
631
|
return
|
|
632
632
|
}
|
|
633
633
|
def aarFiles = project.configurations.sherpaOnnxAar.files
|
|
@@ -640,14 +640,14 @@ project.tasks.register("extractSherpaOnnxClasses") {
|
|
|
640
640
|
}
|
|
641
641
|
println "[prebuilt] extractSherpaOnnxClasses"
|
|
642
642
|
println "[sherpa-onnx] classes.jar (Kotlin API) .......... MAVEN_AAR ${aar.name}"
|
|
643
|
-
println "[sherpa-onnx] classes.jar
|
|
643
|
+
println "[sherpa-onnx] classes.jar --> ${sherpaOnnxClassesDir}"
|
|
644
644
|
return
|
|
645
645
|
}
|
|
646
646
|
if (sherpaExtractedJar.exists()) {
|
|
647
647
|
copy { from sherpaExtractedJar; into sherpaOnnxClassesDir }
|
|
648
648
|
println "[prebuilt] extractSherpaOnnxClasses"
|
|
649
649
|
println "[sherpa-onnx] classes.jar (Kotlin API) .......... GITHUB_EXTRACT"
|
|
650
|
-
println "[sherpa-onnx] ${sherpaExtractedJar.absolutePath}
|
|
650
|
+
println "[sherpa-onnx] ${sherpaExtractedJar.absolutePath} --> ${sherpaOnnxClassesDir}"
|
|
651
651
|
return
|
|
652
652
|
}
|
|
653
653
|
throw new RuntimeException(
|
|
@@ -678,7 +678,7 @@ project.tasks.register("extractOnnxruntimeClasses") {
|
|
|
678
678
|
}
|
|
679
679
|
println "[prebuilt] extractOnnxruntimeClasses"
|
|
680
680
|
println "[onnxruntime] classes.jar (Java API) ........... MAVEN_AAR ${aar.name}"
|
|
681
|
-
println "[onnxruntime] renamed to onnxruntime-classes.jar
|
|
681
|
+
println "[onnxruntime] renamed to onnxruntime-classes.jar --> ${onnxruntimeClassesDir}"
|
|
682
682
|
return
|
|
683
683
|
}
|
|
684
684
|
throw new RuntimeException(
|
|
@@ -397,6 +397,7 @@ sherpa-onnx-rk3576-streaming-zipformer-en-2023-06-26.tar.bz2,apache-2.0,yes,high
|
|
|
397
397
|
sherpa-onnx-rk3568-streaming-zipformer-en-2023-06-26.tar.bz2,apache-2.0,yes,high,manual,https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26
|
|
398
398
|
sherpa-onnx-rk3566-streaming-zipformer-en-2023-06-26.tar.bz2,apache-2.0,yes,high,manual,https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26
|
|
399
399
|
sherpa-onnx-rk3562-streaming-zipformer-en-2023-06-26.tar.bz2,apache-2.0,yes,high,manual,https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26
|
|
400
|
+
sherpa-onnx-qwen3-asr-0.6B-int8-2026-03-25.tar.bz2,apache-2.0,yes,high,manual,https://huggingface.co/Qwen/Qwen3-ASR-0.6B
|
|
400
401
|
sherpa-onnx-rk3588-streaming-zipformer-small-bilingual-zh-en-2023-02-16.tar.bz2,apache-2.0,yes,high,manual,https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t
|
|
401
402
|
sherpa-onnx-rk3588-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2,apache-2.0,yes,high,manual,https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t
|
|
402
403
|
sherpa-onnx-rk3576-streaming-zipformer-small-bilingual-zh-en-2023-02-16.tar.bz2,apache-2.0,yes,high,manual,https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t
|
|
@@ -389,5 +389,28 @@ std::vector<LexiconCandidate> FindLexiconCandidates(
|
|
|
389
389
|
return candidates;
|
|
390
390
|
}
|
|
391
391
|
|
|
392
|
+
bool Qwen3TokenizerDirHasVocabAndMerges(
|
|
393
|
+
const std::vector<FileEntry>& files,
|
|
394
|
+
const std::string& dirRaw
|
|
395
|
+
) {
|
|
396
|
+
std::string dir = dirRaw;
|
|
397
|
+
while (!dir.empty() && (dir.back() == '/' || dir.back() == '\\'))
|
|
398
|
+
dir.pop_back();
|
|
399
|
+
if (dir.empty()) return false;
|
|
400
|
+
bool hasVocab = false;
|
|
401
|
+
bool hasMerges = false;
|
|
402
|
+
const std::string prefix = dir + "/";
|
|
403
|
+
for (const auto& e : files) {
|
|
404
|
+
if (e.path.size() <= prefix.size()) continue;
|
|
405
|
+
if (e.path.compare(0, prefix.size(), prefix) != 0) continue;
|
|
406
|
+
std::string rest = e.path.substr(prefix.size());
|
|
407
|
+
if (rest.find('/') != std::string::npos || rest.find('\\') != std::string::npos) continue;
|
|
408
|
+
if (e.nameLower == "vocab.json") hasVocab = true;
|
|
409
|
+
if (e.nameLower == "merges.txt") hasMerges = true;
|
|
410
|
+
}
|
|
411
|
+
if (hasVocab && hasMerges) return true;
|
|
412
|
+
return FileExists(dir + "/vocab.json") && FileExists(dir + "/merges.txt");
|
|
413
|
+
}
|
|
414
|
+
|
|
392
415
|
} // namespace model_detect
|
|
393
416
|
} // namespace sherpaonnx
|
|
@@ -88,6 +88,15 @@ std::vector<LexiconCandidate> FindLexiconCandidates(
|
|
|
88
88
|
const std::string& rootDir
|
|
89
89
|
);
|
|
90
90
|
|
|
91
|
+
/**
|
|
92
|
+
* True if `dir` contains vocab.json and merges.txt: listed in `files` (fixture / synthetic trees)
|
|
93
|
+
* or present on disk. Used for Qwen3-ASR tokenizer directory detection.
|
|
94
|
+
*/
|
|
95
|
+
bool Qwen3TokenizerDirHasVocabAndMerges(
|
|
96
|
+
const std::vector<FileEntry>& files,
|
|
97
|
+
const std::string& dir
|
|
98
|
+
);
|
|
99
|
+
|
|
91
100
|
} // namespace model_detect
|
|
92
101
|
} // namespace sherpaonnx
|
|
93
102
|
|
|
@@ -61,6 +61,7 @@ static const char* KindToName(SttModelKind k) {
|
|
|
61
61
|
case SttModelKind::kZipformerCtc: return "zipformer_ctc";
|
|
62
62
|
case SttModelKind::kWhisper: return "whisper";
|
|
63
63
|
case SttModelKind::kFunAsrNano: return "funasr_nano";
|
|
64
|
+
case SttModelKind::kQwen3Asr: return "qwen3_asr";
|
|
64
65
|
case SttModelKind::kFireRedAsr: return "fire_red_asr";
|
|
65
66
|
case SttModelKind::kMoonshine: return "moonshine";
|
|
66
67
|
case SttModelKind::kMoonshineV2: return "moonshine_v2";
|
|
@@ -88,6 +89,7 @@ SttModelKind ParseSttModelType(const std::string& modelType) {
|
|
|
88
89
|
if (modelType == "zipformer_ctc" || modelType == "ctc") return SttModelKind::kZipformerCtc;
|
|
89
90
|
if (modelType == "whisper") return SttModelKind::kWhisper;
|
|
90
91
|
if (modelType == "funasr_nano") return SttModelKind::kFunAsrNano;
|
|
92
|
+
if (modelType == "qwen3_asr") return SttModelKind::kQwen3Asr;
|
|
91
93
|
if (modelType == "fire_red_asr") return SttModelKind::kFireRedAsr;
|
|
92
94
|
if (modelType == "moonshine") return SttModelKind::kMoonshine;
|
|
93
95
|
if (modelType == "moonshine_v2") return SttModelKind::kMoonshineV2;
|
|
@@ -126,6 +128,8 @@ static bool CapabilitySupportsKind(
|
|
|
126
128
|
return cap.hasWhisper;
|
|
127
129
|
case SttModelKind::kFunAsrNano:
|
|
128
130
|
return cap.hasFunAsrNano;
|
|
131
|
+
case SttModelKind::kQwen3Asr:
|
|
132
|
+
return cap.hasQwen3Asr;
|
|
129
133
|
case SttModelKind::kFireRedAsr:
|
|
130
134
|
return cap.hasFireRedAsr;
|
|
131
135
|
case SttModelKind::kMoonshine:
|
|
@@ -189,6 +193,8 @@ static std::vector<SttModelKind> GetKindsFromDirName(const std::string& modelDir
|
|
|
189
193
|
add(SttModelKind::kTransducer);
|
|
190
194
|
add(SttModelKind::kZipformerCtc);
|
|
191
195
|
}
|
|
196
|
+
if (lower.find("qwen3-asr") != std::string::npos || lower.find("qwen3_asr") != std::string::npos)
|
|
197
|
+
add(SttModelKind::kQwen3Asr);
|
|
192
198
|
if (lower.find("funasr") != std::string::npos)
|
|
193
199
|
add(SttModelKind::kFunAsrNano);
|
|
194
200
|
if (lower.find("canary") != std::string::npos)
|
|
@@ -249,6 +255,19 @@ static SttCandidatePaths GatherSttCandidatePaths(
|
|
|
249
255
|
p.funasrTokenizerDir = vocabInSubdir.substr(0, lastSlash);
|
|
250
256
|
}
|
|
251
257
|
}
|
|
258
|
+
p.qwen3ConvFrontend = FindOnnxByAnyToken(files, {"conv_frontend"}, preferInt8);
|
|
259
|
+
{
|
|
260
|
+
for (const auto& entry : files) {
|
|
261
|
+
if (entry.nameLower != "tokenizer_config.json") continue;
|
|
262
|
+
size_t slash = entry.path.find_last_of("/\\");
|
|
263
|
+
if (slash == std::string::npos) continue;
|
|
264
|
+
std::string dir = entry.path.substr(0, slash);
|
|
265
|
+
if (Qwen3TokenizerDirHasVocabAndMerges(files, dir)) {
|
|
266
|
+
p.qwen3TokenizerDir = dir;
|
|
267
|
+
break;
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
}
|
|
252
271
|
p.moonshinePreprocessor = FindOnnxByAnyToken(files, {"preprocess", "preprocessor"}, preferInt8);
|
|
253
272
|
p.moonshineEncoder = FindOnnxByAnyToken(files, {"encode", "encoder_model"}, preferInt8);
|
|
254
273
|
p.moonshineUncachedDecoder = FindOnnxByAnyToken(files, {"uncached_decode", "uncached"}, preferInt8);
|
|
@@ -258,7 +277,8 @@ static SttCandidatePaths GatherSttCandidatePaths(
|
|
|
258
277
|
static const std::vector<std::string> modelExcludes = {
|
|
259
278
|
"encoder", "decoder", "joiner", "vocoder", "acoustic", "embedding", "llm",
|
|
260
279
|
"encoder_adaptor", "encoder-adaptor", "encoder_model", "decoder_model",
|
|
261
|
-
"merged_decoder", "decoder_model_merged", "preprocess", "encode", "uncached", "cached"
|
|
280
|
+
"merged_decoder", "decoder_model_merged", "preprocess", "encode", "uncached", "cached",
|
|
281
|
+
"conv_frontend"
|
|
262
282
|
};
|
|
263
283
|
p.paraformerModel = FindOnnxByAnyToken(files, {"model"}, preferInt8);
|
|
264
284
|
if (!p.paraformerModel.empty()) {
|
|
@@ -302,6 +322,7 @@ static SttPathHints GetSttPathHints(const std::string& modelDir) {
|
|
|
302
322
|
h.isLikelyWenetCtc = lower.find("wenet") != std::string::npos;
|
|
303
323
|
h.isLikelySenseVoice = lower.find("sense") != std::string::npos || lower.find("sensevoice") != std::string::npos;
|
|
304
324
|
h.isLikelyFunAsrNano = lower.find("funasr") != std::string::npos || lower.find("funasr-nano") != std::string::npos;
|
|
325
|
+
h.isLikelyQwen3Asr = lower.find("qwen3-asr") != std::string::npos || lower.find("qwen3_asr") != std::string::npos;
|
|
305
326
|
h.isLikelyZipformer = lower.find("zipformer") != std::string::npos;
|
|
306
327
|
h.isLikelyMoonshine = lower.find("moonshine") != std::string::npos;
|
|
307
328
|
h.isLikelyDolphin = lower.find("dolphin") != std::string::npos;
|
|
@@ -404,7 +425,9 @@ static SttCapabilities ComputeSttCapabilities(const SttCandidatePaths& paths, co
|
|
|
404
425
|
c.hasTransducer = !paths.encoder.empty() && !paths.decoder.empty() && !paths.joiner.empty();
|
|
405
426
|
bool hasWhisperEnc = !paths.encoder.empty();
|
|
406
427
|
bool hasWhisperDec = !paths.decoder.empty();
|
|
407
|
-
|
|
428
|
+
bool hasQwen3Tok = !paths.qwen3TokenizerDir.empty();
|
|
429
|
+
c.hasQwen3Asr = !paths.qwen3ConvFrontend.empty() && hasWhisperEnc && hasWhisperDec && hasQwen3Tok;
|
|
430
|
+
c.hasWhisper = hasWhisperEnc && hasWhisperDec && paths.joiner.empty() && !c.hasQwen3Asr;
|
|
408
431
|
bool hasFunAsrTok = !paths.funasrTokenizerDir.empty();
|
|
409
432
|
c.hasFunAsrNano = !paths.funasrEncoderAdaptor.empty() && !paths.funasrLLM.empty() &&
|
|
410
433
|
!paths.funasrEmbedding.empty() && hasFunAsrTok;
|
|
@@ -446,6 +469,7 @@ static void CollectDetectedModels(
|
|
|
446
469
|
out.push_back({"paraformer", modelDir});
|
|
447
470
|
}
|
|
448
471
|
if (cap.hasWhisper) out.push_back({"whisper", modelDir});
|
|
472
|
+
if (cap.hasQwen3Asr) out.push_back({"qwen3_asr", modelDir});
|
|
449
473
|
if (cap.hasFunAsrNano) out.push_back({"funasr_nano", modelDir});
|
|
450
474
|
if (cap.hasMoonshine) out.push_back({"moonshine", modelDir});
|
|
451
475
|
if (cap.hasMoonshineV2) out.push_back({"moonshine_v2", modelDir});
|
|
@@ -507,6 +531,10 @@ static SttModelKind ResolveSttKind(
|
|
|
507
531
|
outError = "FunASR Nano model requested but required files not found in " + modelDir;
|
|
508
532
|
return SttModelKind::kUnknown;
|
|
509
533
|
}
|
|
534
|
+
if (selected == SttModelKind::kQwen3Asr && !cap.hasQwen3Asr) {
|
|
535
|
+
outError = "Qwen3-ASR model requested but conv_frontend/encoder/decoder/tokenizer not found in " + modelDir;
|
|
536
|
+
return SttModelKind::kUnknown;
|
|
537
|
+
}
|
|
510
538
|
if (selected == SttModelKind::kMoonshine && !cap.hasMoonshine) {
|
|
511
539
|
outError = "Moonshine v1 model requested but preprocess/encode/uncached_decode/cached_decode not found in " + modelDir;
|
|
512
540
|
return SttModelKind::kUnknown;
|
|
@@ -573,7 +601,9 @@ static SttModelKind ResolveSttKind(
|
|
|
573
601
|
if (!paths.paraformerModel.empty()) return SttModelKind::kParaformer;
|
|
574
602
|
if (cap.hasCanary) return SttModelKind::kCanary;
|
|
575
603
|
if (cap.hasFireRedAsr) return SttModelKind::kFireRedAsr;
|
|
604
|
+
if (cap.hasQwen3Asr && hints.isLikelyQwen3Asr) return SttModelKind::kQwen3Asr;
|
|
576
605
|
if (cap.hasWhisper) return SttModelKind::kWhisper;
|
|
606
|
+
if (cap.hasQwen3Asr) return SttModelKind::kQwen3Asr;
|
|
577
607
|
if (cap.hasFunAsrNano) return SttModelKind::kFunAsrNano;
|
|
578
608
|
if (cap.hasMoonshineV2) return SttModelKind::kMoonshineV2;
|
|
579
609
|
if (cap.hasDolphin) return SttModelKind::kDolphin;
|
|
@@ -618,6 +648,12 @@ static void ApplyPathsForSttKind(SttModelKind kind, const SttCandidatePaths& can
|
|
|
618
648
|
resultPaths.funasrEmbedding = candidate.funasrEmbedding;
|
|
619
649
|
resultPaths.funasrTokenizer = candidate.funasrTokenizerDir;
|
|
620
650
|
break;
|
|
651
|
+
case SttModelKind::kQwen3Asr:
|
|
652
|
+
resultPaths.qwen3ConvFrontend = candidate.qwen3ConvFrontend;
|
|
653
|
+
resultPaths.qwen3Encoder = candidate.encoder;
|
|
654
|
+
resultPaths.qwen3Decoder = candidate.decoder;
|
|
655
|
+
resultPaths.qwen3Tokenizer = candidate.qwen3TokenizerDir;
|
|
656
|
+
break;
|
|
621
657
|
case SttModelKind::kMoonshine:
|
|
622
658
|
resultPaths.moonshinePreprocessor = candidate.moonshinePreprocessor;
|
|
623
659
|
resultPaths.moonshineEncoder = candidate.moonshineEncoder;
|
|
@@ -711,13 +747,13 @@ SttDetectResult DetectSttModel(
|
|
|
711
747
|
EmptyOrPath(candidate.encoder), EmptyOrPath(candidate.decoder));
|
|
712
748
|
LOGI("DetectSttModel: funasr encoderAdaptor=%s llm=%s embedding=%s tokenizerDir=%s",
|
|
713
749
|
EmptyOrPath(candidate.funasrEncoderAdaptor), EmptyOrPath(candidate.funasrLLM), EmptyOrPath(candidate.funasrEmbedding), EmptyOrPath(candidate.funasrTokenizerDir));
|
|
714
|
-
LOGI("DetectSttModel: hasTransducer=%d hasWhisper=%d hasMoonshine=%d hasMoonshineV2=%d hasParaformer=%d hasFunAsrNano=%d hasDolphin=%d hasFireRedAsr=%d hasFireRedCtc=%d hasCanary=%d hasOmnilingual=%d hasMedAsr=%d hasTeleSpeechCtc=%d hasToneCtc=%d",
|
|
750
|
+
LOGI("DetectSttModel: hasTransducer=%d hasWhisper=%d hasMoonshine=%d hasMoonshineV2=%d hasParaformer=%d hasFunAsrNano=%d hasQwen3Asr=%d hasDolphin=%d hasFireRedAsr=%d hasFireRedCtc=%d hasCanary=%d hasOmnilingual=%d hasMedAsr=%d hasTeleSpeechCtc=%d hasToneCtc=%d",
|
|
715
751
|
(int)cap.hasTransducer, (int)cap.hasWhisper, (int)cap.hasMoonshine, (int)cap.hasMoonshineV2,
|
|
716
|
-
(int)cap.hasParaformer, (int)cap.hasFunAsrNano, (int)cap.hasDolphin, (int)cap.hasFireRedAsr, (int)cap.hasFireRedCtc,
|
|
752
|
+
(int)cap.hasParaformer, (int)cap.hasFunAsrNano, (int)cap.hasQwen3Asr, (int)cap.hasDolphin, (int)cap.hasFireRedAsr, (int)cap.hasFireRedCtc,
|
|
717
753
|
(int)cap.hasCanary, (int)cap.hasOmnilingual, (int)cap.hasMedAsr, (int)cap.hasTeleSpeechCtc, (int)cap.hasToneCtc);
|
|
718
|
-
LOGI("DetectSttModel: hints isLikelyNemo=%d isLikelyTdt=%d isLikelyWenetCtc=%d isLikelySenseVoice=%d isLikelyFunAsrNano=%d isLikelyZipformer=%d isLikelyMoonshine=%d isLikelyDolphin=%d isLikelyFireRedAsr=%d isLikelyCanary=%d isLikelyOmnilingual=%d isLikelyMedAsr=%d isLikelyTeleSpeech=%d isLikelyToneCtc=%d isLikelyParaformer=%d isLikelyVad=%d isLikelyTdnn=%d",
|
|
754
|
+
LOGI("DetectSttModel: hints isLikelyNemo=%d isLikelyTdt=%d isLikelyWenetCtc=%d isLikelySenseVoice=%d isLikelyFunAsrNano=%d isLikelyQwen3Asr=%d isLikelyZipformer=%d isLikelyMoonshine=%d isLikelyDolphin=%d isLikelyFireRedAsr=%d isLikelyCanary=%d isLikelyOmnilingual=%d isLikelyMedAsr=%d isLikelyTeleSpeech=%d isLikelyToneCtc=%d isLikelyParaformer=%d isLikelyVad=%d isLikelyTdnn=%d",
|
|
719
755
|
(int)hints.isLikelyNemo, (int)hints.isLikelyTdt, (int)hints.isLikelyWenetCtc, (int)hints.isLikelySenseVoice,
|
|
720
|
-
(int)hints.isLikelyFunAsrNano, (int)hints.isLikelyZipformer, (int)hints.isLikelyMoonshine, (int)hints.isLikelyDolphin,
|
|
756
|
+
(int)hints.isLikelyFunAsrNano, (int)hints.isLikelyQwen3Asr, (int)hints.isLikelyZipformer, (int)hints.isLikelyMoonshine, (int)hints.isLikelyDolphin,
|
|
721
757
|
(int)hints.isLikelyFireRedAsr, (int)hints.isLikelyCanary, (int)hints.isLikelyOmnilingual, (int)hints.isLikelyMedAsr,
|
|
722
758
|
(int)hints.isLikelyTeleSpeech, (int)hints.isLikelyToneCtc, (int)hints.isLikelyParaformer, (int)hints.isLikelyVad, (int)hints.isLikelyTdnn);
|
|
723
759
|
}
|
|
@@ -747,7 +783,8 @@ SttDetectResult DetectSttModel(
|
|
|
747
783
|
}
|
|
748
784
|
|
|
749
785
|
LOGI("DetectSttModel: selected kind=%d (%s)", static_cast<int>(result.selectedKind), KindToName(result.selectedKind));
|
|
750
|
-
result.tokensRequired = (result.selectedKind != SttModelKind::kFunAsrNano
|
|
786
|
+
result.tokensRequired = (result.selectedKind != SttModelKind::kFunAsrNano &&
|
|
787
|
+
result.selectedKind != SttModelKind::kQwen3Asr);
|
|
751
788
|
ApplyPathsForSttKind(result.selectedKind, candidate, result.paths);
|
|
752
789
|
|
|
753
790
|
if (!candidate.tokens.empty() && FileExists(candidate.tokens)) {
|
|
@@ -808,6 +845,11 @@ SttDetectResult DetectSttModel(
|
|
|
808
845
|
EmptyOrPath(result.paths.funasrEncoderAdaptor), EmptyOrPath(result.paths.funasrLLM),
|
|
809
846
|
EmptyOrPath(result.paths.funasrEmbedding), EmptyOrPath(result.paths.funasrTokenizer));
|
|
810
847
|
break;
|
|
848
|
+
case SttModelKind::kQwen3Asr:
|
|
849
|
+
LOGI("DetectSttModel: paths set qwen3_asr conv=%s encoder=%s decoder=%s tokenizer=%s",
|
|
850
|
+
EmptyOrPath(result.paths.qwen3ConvFrontend), EmptyOrPath(result.paths.qwen3Encoder),
|
|
851
|
+
EmptyOrPath(result.paths.qwen3Decoder), EmptyOrPath(result.paths.qwen3Tokenizer));
|
|
852
|
+
break;
|
|
811
853
|
default:
|
|
812
854
|
break;
|
|
813
855
|
}
|
|
@@ -854,7 +896,8 @@ SttDetectResult DetectSttModelFromFileList(
|
|
|
854
896
|
return result;
|
|
855
897
|
}
|
|
856
898
|
|
|
857
|
-
result.tokensRequired = (result.selectedKind != SttModelKind::kFunAsrNano
|
|
899
|
+
result.tokensRequired = (result.selectedKind != SttModelKind::kFunAsrNano &&
|
|
900
|
+
result.selectedKind != SttModelKind::kQwen3Asr);
|
|
858
901
|
ApplyPathsForSttKind(result.selectedKind, candidate, result.paths);
|
|
859
902
|
|
|
860
903
|
result.paths.tokens = candidate.tokens;
|
|
@@ -20,6 +20,7 @@ enum class SttModelKind {
|
|
|
20
20
|
kZipformerCtc,
|
|
21
21
|
kWhisper,
|
|
22
22
|
kFunAsrNano,
|
|
23
|
+
kQwen3Asr,
|
|
23
24
|
kFireRedAsr,
|
|
24
25
|
kMoonshine,
|
|
25
26
|
kMoonshineV2,
|
|
@@ -57,6 +58,11 @@ struct SttModelPaths {
|
|
|
57
58
|
std::string funasrLLM;
|
|
58
59
|
std::string funasrEmbedding;
|
|
59
60
|
std::string funasrTokenizer;
|
|
61
|
+
/** Qwen3-ASR: conv_frontend + encoder + decoder + tokenizer directory. */
|
|
62
|
+
std::string qwen3ConvFrontend;
|
|
63
|
+
std::string qwen3Encoder;
|
|
64
|
+
std::string qwen3Decoder;
|
|
65
|
+
std::string qwen3Tokenizer;
|
|
60
66
|
// Moonshine
|
|
61
67
|
std::string moonshinePreprocessor;
|
|
62
68
|
std::string moonshineEncoder;
|
|
@@ -89,6 +95,8 @@ struct SttCandidatePaths {
|
|
|
89
95
|
std::string funasrLLM;
|
|
90
96
|
std::string funasrEmbedding;
|
|
91
97
|
std::string funasrTokenizerDir;
|
|
98
|
+
std::string qwen3ConvFrontend;
|
|
99
|
+
std::string qwen3TokenizerDir;
|
|
92
100
|
std::string moonshinePreprocessor;
|
|
93
101
|
std::string moonshineEncoder;
|
|
94
102
|
std::string moonshineUncachedDecoder;
|
|
@@ -104,6 +112,7 @@ struct SttPathHints {
|
|
|
104
112
|
bool isLikelyWenetCtc = false;
|
|
105
113
|
bool isLikelySenseVoice = false;
|
|
106
114
|
bool isLikelyFunAsrNano = false;
|
|
115
|
+
bool isLikelyQwen3Asr = false;
|
|
107
116
|
bool isLikelyZipformer = false;
|
|
108
117
|
bool isLikelyMoonshine = false;
|
|
109
118
|
bool isLikelyDolphin = false;
|
|
@@ -128,6 +137,7 @@ struct SttCapabilities {
|
|
|
128
137
|
bool hasMoonshineV2 = false;
|
|
129
138
|
bool hasParaformer = false;
|
|
130
139
|
bool hasFunAsrNano = false;
|
|
140
|
+
bool hasQwen3Asr = false;
|
|
131
141
|
bool hasDolphin = false;
|
|
132
142
|
bool hasFireRedAsr = false;
|
|
133
143
|
/** True when dir name suggests Fire Red but only a single CTC/paraformer model (no encoder/decoder). Use zipformer_ctc. */
|
|
@@ -23,6 +23,7 @@ const char* SttModelKindToString(SttModelKind k) {
|
|
|
23
23
|
case SttModelKind::kZipformerCtc: return "zipformer_ctc";
|
|
24
24
|
case SttModelKind::kWhisper: return "whisper";
|
|
25
25
|
case SttModelKind::kFunAsrNano: return "funasr_nano";
|
|
26
|
+
case SttModelKind::kQwen3Asr: return "qwen3_asr";
|
|
26
27
|
case SttModelKind::kFireRedAsr: return "fire_red_asr";
|
|
27
28
|
case SttModelKind::kMoonshine: return "moonshine";
|
|
28
29
|
case SttModelKind::kMoonshineV2: return "moonshine_v2";
|
|
@@ -79,6 +80,10 @@ jobject SttDetectResultToJava(JNIEnv* env, const SttDetectResult& result) {
|
|
|
79
80
|
PutString(env, pathsMap, mapPut, "funasrLLM", result.paths.funasrLLM);
|
|
80
81
|
PutString(env, pathsMap, mapPut, "funasrEmbedding", result.paths.funasrEmbedding);
|
|
81
82
|
PutString(env, pathsMap, mapPut, "funasrTokenizer", result.paths.funasrTokenizer);
|
|
83
|
+
PutString(env, pathsMap, mapPut, "qwen3ConvFrontend", result.paths.qwen3ConvFrontend);
|
|
84
|
+
PutString(env, pathsMap, mapPut, "qwen3Encoder", result.paths.qwen3Encoder);
|
|
85
|
+
PutString(env, pathsMap, mapPut, "qwen3Decoder", result.paths.qwen3Decoder);
|
|
86
|
+
PutString(env, pathsMap, mapPut, "qwen3Tokenizer", result.paths.qwen3Tokenizer);
|
|
82
87
|
PutString(env, pathsMap, mapPut, "moonshinePreprocessor", result.paths.moonshinePreprocessor);
|
|
83
88
|
PutString(env, pathsMap, mapPut, "moonshineEncoder", result.paths.moonshineEncoder);
|
|
84
89
|
PutString(env, pathsMap, mapPut, "moonshineUncachedDecoder", result.paths.moonshineUncachedDecoder);
|
|
@@ -52,6 +52,13 @@ static const SttFieldRequirement kFunAsrNanoReqs[] = {
|
|
|
52
52
|
{"funasrTokenizer", &SttModelPaths::funasrTokenizer, true},
|
|
53
53
|
};
|
|
54
54
|
|
|
55
|
+
static const SttFieldRequirement kQwen3AsrReqs[] = {
|
|
56
|
+
{"qwen3ConvFrontend", &SttModelPaths::qwen3ConvFrontend, true},
|
|
57
|
+
{"qwen3Encoder", &SttModelPaths::qwen3Encoder, true},
|
|
58
|
+
{"qwen3Decoder", &SttModelPaths::qwen3Decoder, true},
|
|
59
|
+
{"qwen3Tokenizer", &SttModelPaths::qwen3Tokenizer, true},
|
|
60
|
+
};
|
|
61
|
+
|
|
55
62
|
static const SttFieldRequirement kMoonshineReqs[] = {
|
|
56
63
|
{"moonshinePreprocessor", &SttModelPaths::moonshinePreprocessor, true},
|
|
57
64
|
{"moonshineEncoder", &SttModelPaths::moonshineEncoder, true},
|
|
@@ -120,6 +127,9 @@ static const SttFieldRequirement* GetRequirements(SttModelKind kind, size_t& cou
|
|
|
120
127
|
case SttModelKind::kFunAsrNano:
|
|
121
128
|
count = std::size(kFunAsrNanoReqs);
|
|
122
129
|
return kFunAsrNanoReqs;
|
|
130
|
+
case SttModelKind::kQwen3Asr:
|
|
131
|
+
count = std::size(kQwen3AsrReqs);
|
|
132
|
+
return kQwen3AsrReqs;
|
|
123
133
|
case SttModelKind::kMoonshine:
|
|
124
134
|
count = std::size(kMoonshineReqs);
|
|
125
135
|
return kMoonshineReqs;
|
|
@@ -161,6 +171,7 @@ static const char* SttKindToName(SttModelKind k) {
|
|
|
161
171
|
case SttModelKind::kZipformerCtc: return "Zipformer CTC";
|
|
162
172
|
case SttModelKind::kWhisper: return "Whisper";
|
|
163
173
|
case SttModelKind::kFunAsrNano: return "FunASR Nano";
|
|
174
|
+
case SttModelKind::kQwen3Asr: return "Qwen3 ASR";
|
|
164
175
|
case SttModelKind::kFireRedAsr: return "Fire Red ASR";
|
|
165
176
|
case SttModelKind::kMoonshine: return "Moonshine";
|
|
166
177
|
case SttModelKind::kMoonshineV2: return "Moonshine v2";
|