react-native-sherpa-onnx 0.3.8 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -5
- package/SherpaOnnx.podspec +5 -1
- package/android/prebuilt-download.gradle +89 -49
- package/android/prebuilt-versions.gradle +1 -1
- package/android/src/main/assets/model_licenses/asr-models-license-status.csv +1 -0
- package/android/src/main/assets/model_licenses/speech-enhancement-models-license-status.csv +7 -0
- package/android/src/main/cpp/CMakeLists.txt +3 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-enhancement-wrapper.cpp +68 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-enhancement-wrapper.h +17 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-enhancement.cpp +119 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.cpp +23 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.h +9 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-stt.cpp +51 -8
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect.h +41 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.cpp +5 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-enhancement.cpp +68 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-enhancement.h +30 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-stt.cpp +11 -0
- package/android/src/main/cpp/jni/module/sherpa-onnx-module-jni.cpp +21 -0
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxArchiveHelper.kt +110 -35
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxAssetHelper.kt +6 -0
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxEnhancementHelper.kt +377 -0
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxExtractionNotificationHelper.kt +102 -0
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +198 -18
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxSttHelper.kt +22 -0
- package/ios/Resources/model_licenses/asr-models-license-status.csv +1 -0
- package/ios/Resources/model_licenses/speech-enhancement-models-license-status.csv +7 -0
- package/ios/SherpaOnnx+Assets.mm +5 -0
- package/ios/SherpaOnnx+Enhancement.mm +435 -0
- package/ios/SherpaOnnx+STT.mm +13 -1
- package/ios/SherpaOnnx.mm +87 -17
- package/ios/enhancement/sherpa-onnx-enhancement-wrapper.h +85 -0
- package/ios/enhancement/sherpa-onnx-enhancement-wrapper.mm +218 -0
- package/ios/model_detect/sherpa-onnx-model-detect-enhancement.mm +92 -0
- package/ios/model_detect/sherpa-onnx-model-detect-helper.h +5 -0
- package/ios/model_detect/sherpa-onnx-model-detect-helper.mm +23 -0
- package/ios/model_detect/sherpa-onnx-model-detect-stt.mm +51 -7
- package/ios/model_detect/sherpa-onnx-model-detect.h +33 -0
- package/ios/model_detect/sherpa-onnx-validate-enhancement.h +30 -0
- package/ios/model_detect/sherpa-onnx-validate-enhancement.mm +69 -0
- package/ios/model_detect/sherpa-onnx-validate-stt.mm +11 -0
- package/ios/stt/sherpa-onnx-stt-wrapper.h +11 -1
- package/ios/stt/sherpa-onnx-stt-wrapper.mm +30 -2
- package/ios/tts/sherpa-onnx-tts-wrapper.mm +16 -0
- package/lib/module/NativeSherpaOnnx.js.map +1 -1
- package/lib/module/download/localModels.js +2 -3
- package/lib/module/download/localModels.js.map +1 -1
- package/lib/module/download/paths.js +2 -1
- package/lib/module/download/paths.js.map +1 -1
- package/lib/module/download/postDownloadProcessing.js +17 -4
- package/lib/module/download/postDownloadProcessing.js.map +1 -1
- package/lib/module/enhancement/index.js +63 -48
- package/lib/module/enhancement/index.js.map +1 -1
- package/lib/module/enhancement/streaming.js +60 -0
- package/lib/module/enhancement/streaming.js.map +1 -0
- package/lib/module/enhancement/streamingTypes.js +4 -0
- package/lib/module/enhancement/streamingTypes.js.map +1 -0
- package/lib/module/enhancement/types.js +4 -0
- package/lib/module/enhancement/types.js.map +1 -0
- package/lib/module/extraction/extractTarBz2.js +2 -2
- package/lib/module/extraction/extractTarBz2.js.map +1 -1
- package/lib/module/extraction/extractTarZst.js +2 -2
- package/lib/module/extraction/extractTarZst.js.map +1 -1
- package/lib/module/extraction/index.js +10 -5
- package/lib/module/extraction/index.js.map +1 -1
- package/lib/module/licenses.js +9 -3
- package/lib/module/licenses.js.map +1 -1
- package/lib/module/stt/index.js +4 -2
- package/lib/module/stt/index.js.map +1 -1
- package/lib/module/stt/streaming.js +2 -1
- package/lib/module/stt/streaming.js.map +1 -1
- package/lib/module/stt/types.js +3 -1
- package/lib/module/stt/types.js.map +1 -1
- package/lib/module/tts/index.js +4 -2
- package/lib/module/tts/index.js.map +1 -1
- package/lib/module/tts/streaming.js +3 -1
- package/lib/module/tts/streaming.js.map +1 -1
- package/lib/typescript/src/NativeSherpaOnnx.d.ts +70 -9
- package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
- package/lib/typescript/src/download/localModels.d.ts.map +1 -1
- package/lib/typescript/src/download/paths.d.ts +2 -1
- package/lib/typescript/src/download/paths.d.ts.map +1 -1
- package/lib/typescript/src/download/postDownloadProcessing.d.ts +9 -0
- package/lib/typescript/src/download/postDownloadProcessing.d.ts.map +1 -1
- package/lib/typescript/src/enhancement/index.d.ts +9 -46
- package/lib/typescript/src/enhancement/index.d.ts.map +1 -1
- package/lib/typescript/src/enhancement/streaming.d.ts +6 -0
- package/lib/typescript/src/enhancement/streaming.d.ts.map +1 -0
- package/lib/typescript/src/enhancement/streamingTypes.d.ts +12 -0
- package/lib/typescript/src/enhancement/streamingTypes.d.ts.map +1 -0
- package/lib/typescript/src/enhancement/types.d.ts +31 -0
- package/lib/typescript/src/enhancement/types.d.ts.map +1 -0
- package/lib/typescript/src/extraction/extractTarBz2.d.ts +2 -1
- package/lib/typescript/src/extraction/extractTarBz2.d.ts.map +1 -1
- package/lib/typescript/src/extraction/extractTarZst.d.ts +2 -1
- package/lib/typescript/src/extraction/extractTarZst.d.ts.map +1 -1
- package/lib/typescript/src/extraction/index.d.ts +1 -1
- package/lib/typescript/src/extraction/index.d.ts.map +1 -1
- package/lib/typescript/src/extraction/types.d.ts +12 -0
- package/lib/typescript/src/extraction/types.d.ts.map +1 -1
- package/lib/typescript/src/licenses.d.ts.map +1 -1
- package/lib/typescript/src/stt/index.d.ts +1 -1
- package/lib/typescript/src/stt/index.d.ts.map +1 -1
- package/lib/typescript/src/stt/streaming.d.ts.map +1 -1
- package/lib/typescript/src/stt/types.d.ts +16 -1
- package/lib/typescript/src/stt/types.d.ts.map +1 -1
- package/lib/typescript/src/tts/index.d.ts.map +1 -1
- package/lib/typescript/src/tts/streaming.d.ts.map +1 -1
- package/package.json +1 -1
- package/scripts/ci/check-model-csvs.sh +27 -2
- package/scripts/ci/collect_all_sherpa_model_streams.sh +3 -1
- package/scripts/ci/collect_one_sherpa_release_stream.sh +3 -1
- package/scripts/ci/sherpa_speech_enhancement_model_release_streams.json +13 -0
- package/scripts/ci/update_model_license_csv.sh +17 -17
- package/src/NativeSherpaOnnx.ts +108 -10
- package/src/download/localModels.ts +1 -3
- package/src/download/paths.ts +2 -1
- package/src/download/postDownloadProcessing.ts +24 -1
- package/src/enhancement/index.ts +120 -58
- package/src/enhancement/streaming.ts +105 -0
- package/src/enhancement/streamingTypes.ts +14 -0
- package/src/enhancement/types.ts +36 -0
- package/src/extraction/extractTarBz2.ts +7 -2
- package/src/extraction/extractTarZst.ts +7 -2
- package/src/extraction/index.ts +29 -6
- package/src/extraction/types.ts +16 -0
- package/src/licenses.ts +13 -2
- package/src/stt/index.ts +8 -7
- package/src/stt/streaming.ts +7 -1
- package/src/stt/types.ts +18 -0
- package/src/tts/index.ts +7 -7
- package/src/tts/streaming.ts +6 -3
- package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -1
- package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -1
|
@@ -389,5 +389,28 @@ std::vector<LexiconCandidate> FindLexiconCandidates(
|
|
|
389
389
|
return candidates;
|
|
390
390
|
}
|
|
391
391
|
|
|
392
|
+
bool Qwen3TokenizerDirHasVocabAndMerges(
|
|
393
|
+
const std::vector<FileEntry>& files,
|
|
394
|
+
const std::string& dirRaw
|
|
395
|
+
) {
|
|
396
|
+
std::string dir = dirRaw;
|
|
397
|
+
while (!dir.empty() && (dir.back() == '/' || dir.back() == '\\'))
|
|
398
|
+
dir.pop_back();
|
|
399
|
+
if (dir.empty()) return false;
|
|
400
|
+
bool hasVocab = false;
|
|
401
|
+
bool hasMerges = false;
|
|
402
|
+
const std::string prefix = dir + "/";
|
|
403
|
+
for (const auto& e : files) {
|
|
404
|
+
if (e.path.size() <= prefix.size()) continue;
|
|
405
|
+
if (e.path.compare(0, prefix.size(), prefix) != 0) continue;
|
|
406
|
+
std::string rest = e.path.substr(prefix.size());
|
|
407
|
+
if (rest.find('/') != std::string::npos || rest.find('\\') != std::string::npos) continue;
|
|
408
|
+
if (e.nameLower == "vocab.json") hasVocab = true;
|
|
409
|
+
if (e.nameLower == "merges.txt") hasMerges = true;
|
|
410
|
+
}
|
|
411
|
+
if (hasVocab && hasMerges) return true;
|
|
412
|
+
return FileExists(dir + "/vocab.json") && FileExists(dir + "/merges.txt");
|
|
413
|
+
}
|
|
414
|
+
|
|
392
415
|
} // namespace model_detect
|
|
393
416
|
} // namespace sherpaonnx
|
|
@@ -88,6 +88,15 @@ std::vector<LexiconCandidate> FindLexiconCandidates(
|
|
|
88
88
|
const std::string& rootDir
|
|
89
89
|
);
|
|
90
90
|
|
|
91
|
+
/**
|
|
92
|
+
* True if `dir` contains vocab.json and merges.txt: listed in `files` (fixture / synthetic trees)
|
|
93
|
+
* or present on disk. Used for Qwen3-ASR tokenizer directory detection.
|
|
94
|
+
*/
|
|
95
|
+
bool Qwen3TokenizerDirHasVocabAndMerges(
|
|
96
|
+
const std::vector<FileEntry>& files,
|
|
97
|
+
const std::string& dir
|
|
98
|
+
);
|
|
99
|
+
|
|
91
100
|
} // namespace model_detect
|
|
92
101
|
} // namespace sherpaonnx
|
|
93
102
|
|
|
@@ -61,6 +61,7 @@ static const char* KindToName(SttModelKind k) {
|
|
|
61
61
|
case SttModelKind::kZipformerCtc: return "zipformer_ctc";
|
|
62
62
|
case SttModelKind::kWhisper: return "whisper";
|
|
63
63
|
case SttModelKind::kFunAsrNano: return "funasr_nano";
|
|
64
|
+
case SttModelKind::kQwen3Asr: return "qwen3_asr";
|
|
64
65
|
case SttModelKind::kFireRedAsr: return "fire_red_asr";
|
|
65
66
|
case SttModelKind::kMoonshine: return "moonshine";
|
|
66
67
|
case SttModelKind::kMoonshineV2: return "moonshine_v2";
|
|
@@ -88,6 +89,7 @@ SttModelKind ParseSttModelType(const std::string& modelType) {
|
|
|
88
89
|
if (modelType == "zipformer_ctc" || modelType == "ctc") return SttModelKind::kZipformerCtc;
|
|
89
90
|
if (modelType == "whisper") return SttModelKind::kWhisper;
|
|
90
91
|
if (modelType == "funasr_nano") return SttModelKind::kFunAsrNano;
|
|
92
|
+
if (modelType == "qwen3_asr") return SttModelKind::kQwen3Asr;
|
|
91
93
|
if (modelType == "fire_red_asr") return SttModelKind::kFireRedAsr;
|
|
92
94
|
if (modelType == "moonshine") return SttModelKind::kMoonshine;
|
|
93
95
|
if (modelType == "moonshine_v2") return SttModelKind::kMoonshineV2;
|
|
@@ -126,6 +128,8 @@ static bool CapabilitySupportsKind(
|
|
|
126
128
|
return cap.hasWhisper;
|
|
127
129
|
case SttModelKind::kFunAsrNano:
|
|
128
130
|
return cap.hasFunAsrNano;
|
|
131
|
+
case SttModelKind::kQwen3Asr:
|
|
132
|
+
return cap.hasQwen3Asr;
|
|
129
133
|
case SttModelKind::kFireRedAsr:
|
|
130
134
|
return cap.hasFireRedAsr;
|
|
131
135
|
case SttModelKind::kMoonshine:
|
|
@@ -189,6 +193,8 @@ static std::vector<SttModelKind> GetKindsFromDirName(const std::string& modelDir
|
|
|
189
193
|
add(SttModelKind::kTransducer);
|
|
190
194
|
add(SttModelKind::kZipformerCtc);
|
|
191
195
|
}
|
|
196
|
+
if (lower.find("qwen3-asr") != std::string::npos || lower.find("qwen3_asr") != std::string::npos)
|
|
197
|
+
add(SttModelKind::kQwen3Asr);
|
|
192
198
|
if (lower.find("funasr") != std::string::npos)
|
|
193
199
|
add(SttModelKind::kFunAsrNano);
|
|
194
200
|
if (lower.find("canary") != std::string::npos)
|
|
@@ -249,6 +255,19 @@ static SttCandidatePaths GatherSttCandidatePaths(
|
|
|
249
255
|
p.funasrTokenizerDir = vocabInSubdir.substr(0, lastSlash);
|
|
250
256
|
}
|
|
251
257
|
}
|
|
258
|
+
p.qwen3ConvFrontend = FindOnnxByAnyToken(files, {"conv_frontend"}, preferInt8);
|
|
259
|
+
{
|
|
260
|
+
for (const auto& entry : files) {
|
|
261
|
+
if (entry.nameLower != "tokenizer_config.json") continue;
|
|
262
|
+
size_t slash = entry.path.find_last_of("/\\");
|
|
263
|
+
if (slash == std::string::npos) continue;
|
|
264
|
+
std::string dir = entry.path.substr(0, slash);
|
|
265
|
+
if (Qwen3TokenizerDirHasVocabAndMerges(files, dir)) {
|
|
266
|
+
p.qwen3TokenizerDir = dir;
|
|
267
|
+
break;
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
}
|
|
252
271
|
p.moonshinePreprocessor = FindOnnxByAnyToken(files, {"preprocess", "preprocessor"}, preferInt8);
|
|
253
272
|
p.moonshineEncoder = FindOnnxByAnyToken(files, {"encode", "encoder_model"}, preferInt8);
|
|
254
273
|
p.moonshineUncachedDecoder = FindOnnxByAnyToken(files, {"uncached_decode", "uncached"}, preferInt8);
|
|
@@ -258,7 +277,8 @@ static SttCandidatePaths GatherSttCandidatePaths(
|
|
|
258
277
|
static const std::vector<std::string> modelExcludes = {
|
|
259
278
|
"encoder", "decoder", "joiner", "vocoder", "acoustic", "embedding", "llm",
|
|
260
279
|
"encoder_adaptor", "encoder-adaptor", "encoder_model", "decoder_model",
|
|
261
|
-
"merged_decoder", "decoder_model_merged", "preprocess", "encode", "uncached", "cached"
|
|
280
|
+
"merged_decoder", "decoder_model_merged", "preprocess", "encode", "uncached", "cached",
|
|
281
|
+
"conv_frontend"
|
|
262
282
|
};
|
|
263
283
|
p.paraformerModel = FindOnnxByAnyToken(files, {"model"}, preferInt8);
|
|
264
284
|
if (!p.paraformerModel.empty()) {
|
|
@@ -302,6 +322,7 @@ static SttPathHints GetSttPathHints(const std::string& modelDir) {
|
|
|
302
322
|
h.isLikelyWenetCtc = lower.find("wenet") != std::string::npos;
|
|
303
323
|
h.isLikelySenseVoice = lower.find("sense") != std::string::npos || lower.find("sensevoice") != std::string::npos;
|
|
304
324
|
h.isLikelyFunAsrNano = lower.find("funasr") != std::string::npos || lower.find("funasr-nano") != std::string::npos;
|
|
325
|
+
h.isLikelyQwen3Asr = lower.find("qwen3-asr") != std::string::npos || lower.find("qwen3_asr") != std::string::npos;
|
|
305
326
|
h.isLikelyZipformer = lower.find("zipformer") != std::string::npos;
|
|
306
327
|
h.isLikelyMoonshine = lower.find("moonshine") != std::string::npos;
|
|
307
328
|
h.isLikelyDolphin = lower.find("dolphin") != std::string::npos;
|
|
@@ -404,7 +425,9 @@ static SttCapabilities ComputeSttCapabilities(const SttCandidatePaths& paths, co
|
|
|
404
425
|
c.hasTransducer = !paths.encoder.empty() && !paths.decoder.empty() && !paths.joiner.empty();
|
|
405
426
|
bool hasWhisperEnc = !paths.encoder.empty();
|
|
406
427
|
bool hasWhisperDec = !paths.decoder.empty();
|
|
407
|
-
|
|
428
|
+
bool hasQwen3Tok = !paths.qwen3TokenizerDir.empty();
|
|
429
|
+
c.hasQwen3Asr = !paths.qwen3ConvFrontend.empty() && hasWhisperEnc && hasWhisperDec && hasQwen3Tok;
|
|
430
|
+
c.hasWhisper = hasWhisperEnc && hasWhisperDec && paths.joiner.empty() && !c.hasQwen3Asr;
|
|
408
431
|
bool hasFunAsrTok = !paths.funasrTokenizerDir.empty();
|
|
409
432
|
c.hasFunAsrNano = !paths.funasrEncoderAdaptor.empty() && !paths.funasrLLM.empty() &&
|
|
410
433
|
!paths.funasrEmbedding.empty() && hasFunAsrTok;
|
|
@@ -446,6 +469,7 @@ static void CollectDetectedModels(
|
|
|
446
469
|
out.push_back({"paraformer", modelDir});
|
|
447
470
|
}
|
|
448
471
|
if (cap.hasWhisper) out.push_back({"whisper", modelDir});
|
|
472
|
+
if (cap.hasQwen3Asr) out.push_back({"qwen3_asr", modelDir});
|
|
449
473
|
if (cap.hasFunAsrNano) out.push_back({"funasr_nano", modelDir});
|
|
450
474
|
if (cap.hasMoonshine) out.push_back({"moonshine", modelDir});
|
|
451
475
|
if (cap.hasMoonshineV2) out.push_back({"moonshine_v2", modelDir});
|
|
@@ -507,6 +531,10 @@ static SttModelKind ResolveSttKind(
|
|
|
507
531
|
outError = "FunASR Nano model requested but required files not found in " + modelDir;
|
|
508
532
|
return SttModelKind::kUnknown;
|
|
509
533
|
}
|
|
534
|
+
if (selected == SttModelKind::kQwen3Asr && !cap.hasQwen3Asr) {
|
|
535
|
+
outError = "Qwen3-ASR model requested but conv_frontend/encoder/decoder/tokenizer not found in " + modelDir;
|
|
536
|
+
return SttModelKind::kUnknown;
|
|
537
|
+
}
|
|
510
538
|
if (selected == SttModelKind::kMoonshine && !cap.hasMoonshine) {
|
|
511
539
|
outError = "Moonshine v1 model requested but preprocess/encode/uncached_decode/cached_decode not found in " + modelDir;
|
|
512
540
|
return SttModelKind::kUnknown;
|
|
@@ -573,7 +601,9 @@ static SttModelKind ResolveSttKind(
|
|
|
573
601
|
if (!paths.paraformerModel.empty()) return SttModelKind::kParaformer;
|
|
574
602
|
if (cap.hasCanary) return SttModelKind::kCanary;
|
|
575
603
|
if (cap.hasFireRedAsr) return SttModelKind::kFireRedAsr;
|
|
604
|
+
if (cap.hasQwen3Asr && hints.isLikelyQwen3Asr) return SttModelKind::kQwen3Asr;
|
|
576
605
|
if (cap.hasWhisper) return SttModelKind::kWhisper;
|
|
606
|
+
if (cap.hasQwen3Asr) return SttModelKind::kQwen3Asr;
|
|
577
607
|
if (cap.hasFunAsrNano) return SttModelKind::kFunAsrNano;
|
|
578
608
|
if (cap.hasMoonshineV2) return SttModelKind::kMoonshineV2;
|
|
579
609
|
if (cap.hasDolphin) return SttModelKind::kDolphin;
|
|
@@ -618,6 +648,12 @@ static void ApplyPathsForSttKind(SttModelKind kind, const SttCandidatePaths& can
|
|
|
618
648
|
resultPaths.funasrEmbedding = candidate.funasrEmbedding;
|
|
619
649
|
resultPaths.funasrTokenizer = candidate.funasrTokenizerDir;
|
|
620
650
|
break;
|
|
651
|
+
case SttModelKind::kQwen3Asr:
|
|
652
|
+
resultPaths.qwen3ConvFrontend = candidate.qwen3ConvFrontend;
|
|
653
|
+
resultPaths.qwen3Encoder = candidate.encoder;
|
|
654
|
+
resultPaths.qwen3Decoder = candidate.decoder;
|
|
655
|
+
resultPaths.qwen3Tokenizer = candidate.qwen3TokenizerDir;
|
|
656
|
+
break;
|
|
621
657
|
case SttModelKind::kMoonshine:
|
|
622
658
|
resultPaths.moonshinePreprocessor = candidate.moonshinePreprocessor;
|
|
623
659
|
resultPaths.moonshineEncoder = candidate.moonshineEncoder;
|
|
@@ -711,13 +747,13 @@ SttDetectResult DetectSttModel(
|
|
|
711
747
|
EmptyOrPath(candidate.encoder), EmptyOrPath(candidate.decoder));
|
|
712
748
|
LOGI("DetectSttModel: funasr encoderAdaptor=%s llm=%s embedding=%s tokenizerDir=%s",
|
|
713
749
|
EmptyOrPath(candidate.funasrEncoderAdaptor), EmptyOrPath(candidate.funasrLLM), EmptyOrPath(candidate.funasrEmbedding), EmptyOrPath(candidate.funasrTokenizerDir));
|
|
714
|
-
LOGI("DetectSttModel: hasTransducer=%d hasWhisper=%d hasMoonshine=%d hasMoonshineV2=%d hasParaformer=%d hasFunAsrNano=%d hasDolphin=%d hasFireRedAsr=%d hasFireRedCtc=%d hasCanary=%d hasOmnilingual=%d hasMedAsr=%d hasTeleSpeechCtc=%d hasToneCtc=%d",
|
|
750
|
+
LOGI("DetectSttModel: hasTransducer=%d hasWhisper=%d hasMoonshine=%d hasMoonshineV2=%d hasParaformer=%d hasFunAsrNano=%d hasQwen3Asr=%d hasDolphin=%d hasFireRedAsr=%d hasFireRedCtc=%d hasCanary=%d hasOmnilingual=%d hasMedAsr=%d hasTeleSpeechCtc=%d hasToneCtc=%d",
|
|
715
751
|
(int)cap.hasTransducer, (int)cap.hasWhisper, (int)cap.hasMoonshine, (int)cap.hasMoonshineV2,
|
|
716
|
-
(int)cap.hasParaformer, (int)cap.hasFunAsrNano, (int)cap.hasDolphin, (int)cap.hasFireRedAsr, (int)cap.hasFireRedCtc,
|
|
752
|
+
(int)cap.hasParaformer, (int)cap.hasFunAsrNano, (int)cap.hasQwen3Asr, (int)cap.hasDolphin, (int)cap.hasFireRedAsr, (int)cap.hasFireRedCtc,
|
|
717
753
|
(int)cap.hasCanary, (int)cap.hasOmnilingual, (int)cap.hasMedAsr, (int)cap.hasTeleSpeechCtc, (int)cap.hasToneCtc);
|
|
718
|
-
LOGI("DetectSttModel: hints isLikelyNemo=%d isLikelyTdt=%d isLikelyWenetCtc=%d isLikelySenseVoice=%d isLikelyFunAsrNano=%d isLikelyZipformer=%d isLikelyMoonshine=%d isLikelyDolphin=%d isLikelyFireRedAsr=%d isLikelyCanary=%d isLikelyOmnilingual=%d isLikelyMedAsr=%d isLikelyTeleSpeech=%d isLikelyToneCtc=%d isLikelyParaformer=%d isLikelyVad=%d isLikelyTdnn=%d",
|
|
754
|
+
LOGI("DetectSttModel: hints isLikelyNemo=%d isLikelyTdt=%d isLikelyWenetCtc=%d isLikelySenseVoice=%d isLikelyFunAsrNano=%d isLikelyQwen3Asr=%d isLikelyZipformer=%d isLikelyMoonshine=%d isLikelyDolphin=%d isLikelyFireRedAsr=%d isLikelyCanary=%d isLikelyOmnilingual=%d isLikelyMedAsr=%d isLikelyTeleSpeech=%d isLikelyToneCtc=%d isLikelyParaformer=%d isLikelyVad=%d isLikelyTdnn=%d",
|
|
719
755
|
(int)hints.isLikelyNemo, (int)hints.isLikelyTdt, (int)hints.isLikelyWenetCtc, (int)hints.isLikelySenseVoice,
|
|
720
|
-
(int)hints.isLikelyFunAsrNano, (int)hints.isLikelyZipformer, (int)hints.isLikelyMoonshine, (int)hints.isLikelyDolphin,
|
|
756
|
+
(int)hints.isLikelyFunAsrNano, (int)hints.isLikelyQwen3Asr, (int)hints.isLikelyZipformer, (int)hints.isLikelyMoonshine, (int)hints.isLikelyDolphin,
|
|
721
757
|
(int)hints.isLikelyFireRedAsr, (int)hints.isLikelyCanary, (int)hints.isLikelyOmnilingual, (int)hints.isLikelyMedAsr,
|
|
722
758
|
(int)hints.isLikelyTeleSpeech, (int)hints.isLikelyToneCtc, (int)hints.isLikelyParaformer, (int)hints.isLikelyVad, (int)hints.isLikelyTdnn);
|
|
723
759
|
}
|
|
@@ -747,7 +783,8 @@ SttDetectResult DetectSttModel(
|
|
|
747
783
|
}
|
|
748
784
|
|
|
749
785
|
LOGI("DetectSttModel: selected kind=%d (%s)", static_cast<int>(result.selectedKind), KindToName(result.selectedKind));
|
|
750
|
-
result.tokensRequired = (result.selectedKind != SttModelKind::kFunAsrNano
|
|
786
|
+
result.tokensRequired = (result.selectedKind != SttModelKind::kFunAsrNano &&
|
|
787
|
+
result.selectedKind != SttModelKind::kQwen3Asr);
|
|
751
788
|
ApplyPathsForSttKind(result.selectedKind, candidate, result.paths);
|
|
752
789
|
|
|
753
790
|
if (!candidate.tokens.empty() && FileExists(candidate.tokens)) {
|
|
@@ -808,6 +845,11 @@ SttDetectResult DetectSttModel(
|
|
|
808
845
|
EmptyOrPath(result.paths.funasrEncoderAdaptor), EmptyOrPath(result.paths.funasrLLM),
|
|
809
846
|
EmptyOrPath(result.paths.funasrEmbedding), EmptyOrPath(result.paths.funasrTokenizer));
|
|
810
847
|
break;
|
|
848
|
+
case SttModelKind::kQwen3Asr:
|
|
849
|
+
LOGI("DetectSttModel: paths set qwen3_asr conv=%s encoder=%s decoder=%s tokenizer=%s",
|
|
850
|
+
EmptyOrPath(result.paths.qwen3ConvFrontend), EmptyOrPath(result.paths.qwen3Encoder),
|
|
851
|
+
EmptyOrPath(result.paths.qwen3Decoder), EmptyOrPath(result.paths.qwen3Tokenizer));
|
|
852
|
+
break;
|
|
811
853
|
default:
|
|
812
854
|
break;
|
|
813
855
|
}
|
|
@@ -854,7 +896,8 @@ SttDetectResult DetectSttModelFromFileList(
|
|
|
854
896
|
return result;
|
|
855
897
|
}
|
|
856
898
|
|
|
857
|
-
result.tokensRequired = (result.selectedKind != SttModelKind::kFunAsrNano
|
|
899
|
+
result.tokensRequired = (result.selectedKind != SttModelKind::kFunAsrNano &&
|
|
900
|
+
result.selectedKind != SttModelKind::kQwen3Asr);
|
|
858
901
|
ApplyPathsForSttKind(result.selectedKind, candidate, result.paths);
|
|
859
902
|
|
|
860
903
|
result.paths.tokens = candidate.tokens;
|
|
@@ -20,6 +20,7 @@ enum class SttModelKind {
|
|
|
20
20
|
kZipformerCtc,
|
|
21
21
|
kWhisper,
|
|
22
22
|
kFunAsrNano,
|
|
23
|
+
kQwen3Asr,
|
|
23
24
|
kFireRedAsr,
|
|
24
25
|
kMoonshine,
|
|
25
26
|
kMoonshineV2,
|
|
@@ -42,6 +43,12 @@ enum class TtsModelKind {
|
|
|
42
43
|
kSupertonic
|
|
43
44
|
};
|
|
44
45
|
|
|
46
|
+
enum class EnhancementModelKind {
|
|
47
|
+
kUnknown,
|
|
48
|
+
kGtcrn,
|
|
49
|
+
kDpdfNet
|
|
50
|
+
};
|
|
51
|
+
|
|
45
52
|
struct SttModelPaths {
|
|
46
53
|
std::string encoder;
|
|
47
54
|
std::string decoder;
|
|
@@ -57,6 +64,11 @@ struct SttModelPaths {
|
|
|
57
64
|
std::string funasrLLM;
|
|
58
65
|
std::string funasrEmbedding;
|
|
59
66
|
std::string funasrTokenizer;
|
|
67
|
+
/** Qwen3-ASR: conv_frontend + encoder + decoder + tokenizer directory. */
|
|
68
|
+
std::string qwen3ConvFrontend;
|
|
69
|
+
std::string qwen3Encoder;
|
|
70
|
+
std::string qwen3Decoder;
|
|
71
|
+
std::string qwen3Tokenizer;
|
|
60
72
|
// Moonshine
|
|
61
73
|
std::string moonshinePreprocessor;
|
|
62
74
|
std::string moonshineEncoder;
|
|
@@ -89,6 +101,8 @@ struct SttCandidatePaths {
|
|
|
89
101
|
std::string funasrLLM;
|
|
90
102
|
std::string funasrEmbedding;
|
|
91
103
|
std::string funasrTokenizerDir;
|
|
104
|
+
std::string qwen3ConvFrontend;
|
|
105
|
+
std::string qwen3TokenizerDir;
|
|
92
106
|
std::string moonshinePreprocessor;
|
|
93
107
|
std::string moonshineEncoder;
|
|
94
108
|
std::string moonshineUncachedDecoder;
|
|
@@ -104,6 +118,7 @@ struct SttPathHints {
|
|
|
104
118
|
bool isLikelyWenetCtc = false;
|
|
105
119
|
bool isLikelySenseVoice = false;
|
|
106
120
|
bool isLikelyFunAsrNano = false;
|
|
121
|
+
bool isLikelyQwen3Asr = false;
|
|
107
122
|
bool isLikelyZipformer = false;
|
|
108
123
|
bool isLikelyMoonshine = false;
|
|
109
124
|
bool isLikelyDolphin = false;
|
|
@@ -128,6 +143,7 @@ struct SttCapabilities {
|
|
|
128
143
|
bool hasMoonshineV2 = false;
|
|
129
144
|
bool hasParaformer = false;
|
|
130
145
|
bool hasFunAsrNano = false;
|
|
146
|
+
bool hasQwen3Asr = false;
|
|
131
147
|
bool hasDolphin = false;
|
|
132
148
|
bool hasFireRedAsr = false;
|
|
133
149
|
/** True when dir name suggests Fire Red but only a single CTC/paraformer model (no encoder/decoder). Use zipformer_ctc. */
|
|
@@ -164,6 +180,10 @@ struct TtsModelPaths {
|
|
|
164
180
|
std::string voiceStyle;
|
|
165
181
|
};
|
|
166
182
|
|
|
183
|
+
struct EnhancementModelPaths {
|
|
184
|
+
std::string model;
|
|
185
|
+
};
|
|
186
|
+
|
|
167
187
|
struct SttDetectResult {
|
|
168
188
|
bool ok = false;
|
|
169
189
|
std::string error;
|
|
@@ -185,6 +205,14 @@ struct TtsDetectResult {
|
|
|
185
205
|
std::vector<std::string> lexiconLanguageCandidates;
|
|
186
206
|
};
|
|
187
207
|
|
|
208
|
+
struct EnhancementDetectResult {
|
|
209
|
+
bool ok = false;
|
|
210
|
+
std::string error;
|
|
211
|
+
std::vector<DetectedModel> detectedModels;
|
|
212
|
+
EnhancementModelKind selectedKind = EnhancementModelKind::kUnknown;
|
|
213
|
+
EnhancementModelPaths paths;
|
|
214
|
+
};
|
|
215
|
+
|
|
188
216
|
SttDetectResult DetectSttModel(
|
|
189
217
|
const std::string& modelDir,
|
|
190
218
|
const std::optional<bool>& preferInt8,
|
|
@@ -218,6 +246,19 @@ TtsDetectResult DetectTtsModelFromFileList(
|
|
|
218
246
|
const std::string& modelType = "auto"
|
|
219
247
|
);
|
|
220
248
|
|
|
249
|
+
EnhancementDetectResult DetectEnhancementModel(
|
|
250
|
+
const std::string& modelDir,
|
|
251
|
+
const std::string& modelType
|
|
252
|
+
);
|
|
253
|
+
|
|
254
|
+
/** Test-only: Like DetectEnhancementModel but takes a pre-built file list; no filesystem access.
|
|
255
|
+
* Only used by the host-side C++ test suite (test/cpp/model_detect_test.cpp). */
|
|
256
|
+
EnhancementDetectResult DetectEnhancementModelFromFileList(
|
|
257
|
+
const std::vector<model_detect::FileEntry>& files,
|
|
258
|
+
const std::string& modelDir,
|
|
259
|
+
const std::string& modelType = "auto"
|
|
260
|
+
);
|
|
261
|
+
|
|
221
262
|
} // namespace sherpaonnx
|
|
222
263
|
|
|
223
264
|
#endif // SHERPA_ONNX_MODEL_DETECT_H
|
|
@@ -23,6 +23,7 @@ const char* SttModelKindToString(SttModelKind k) {
|
|
|
23
23
|
case SttModelKind::kZipformerCtc: return "zipformer_ctc";
|
|
24
24
|
case SttModelKind::kWhisper: return "whisper";
|
|
25
25
|
case SttModelKind::kFunAsrNano: return "funasr_nano";
|
|
26
|
+
case SttModelKind::kQwen3Asr: return "qwen3_asr";
|
|
26
27
|
case SttModelKind::kFireRedAsr: return "fire_red_asr";
|
|
27
28
|
case SttModelKind::kMoonshine: return "moonshine";
|
|
28
29
|
case SttModelKind::kMoonshineV2: return "moonshine_v2";
|
|
@@ -79,6 +80,10 @@ jobject SttDetectResultToJava(JNIEnv* env, const SttDetectResult& result) {
|
|
|
79
80
|
PutString(env, pathsMap, mapPut, "funasrLLM", result.paths.funasrLLM);
|
|
80
81
|
PutString(env, pathsMap, mapPut, "funasrEmbedding", result.paths.funasrEmbedding);
|
|
81
82
|
PutString(env, pathsMap, mapPut, "funasrTokenizer", result.paths.funasrTokenizer);
|
|
83
|
+
PutString(env, pathsMap, mapPut, "qwen3ConvFrontend", result.paths.qwen3ConvFrontend);
|
|
84
|
+
PutString(env, pathsMap, mapPut, "qwen3Encoder", result.paths.qwen3Encoder);
|
|
85
|
+
PutString(env, pathsMap, mapPut, "qwen3Decoder", result.paths.qwen3Decoder);
|
|
86
|
+
PutString(env, pathsMap, mapPut, "qwen3Tokenizer", result.paths.qwen3Tokenizer);
|
|
82
87
|
PutString(env, pathsMap, mapPut, "moonshinePreprocessor", result.paths.moonshinePreprocessor);
|
|
83
88
|
PutString(env, pathsMap, mapPut, "moonshineEncoder", result.paths.moonshineEncoder);
|
|
84
89
|
PutString(env, pathsMap, mapPut, "moonshineUncachedDecoder", result.paths.moonshineUncachedDecoder);
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
#include "sherpa-onnx-validate-enhancement.h"
|
|
2
|
+
|
|
3
|
+
#include <cstddef>
|
|
4
|
+
|
|
5
|
+
namespace sherpaonnx {
|
|
6
|
+
namespace {
|
|
7
|
+
|
|
8
|
+
static const EnhancementFieldRequirement kGenericReqs[] = {
|
|
9
|
+
{"model", &EnhancementModelPaths::model, true},
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
static const EnhancementFieldRequirement* GetRequirements(
|
|
13
|
+
EnhancementModelKind kind,
|
|
14
|
+
size_t& count
|
|
15
|
+
) {
|
|
16
|
+
switch (kind) {
|
|
17
|
+
case EnhancementModelKind::kGtcrn:
|
|
18
|
+
case EnhancementModelKind::kDpdfNet:
|
|
19
|
+
count = std::size(kGenericReqs);
|
|
20
|
+
return kGenericReqs;
|
|
21
|
+
default:
|
|
22
|
+
count = 0;
|
|
23
|
+
return nullptr;
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
static const char* EnhancementKindToName(EnhancementModelKind kind) {
|
|
28
|
+
switch (kind) {
|
|
29
|
+
case EnhancementModelKind::kGtcrn:
|
|
30
|
+
return "GTCRN";
|
|
31
|
+
case EnhancementModelKind::kDpdfNet:
|
|
32
|
+
return "DPDFNet";
|
|
33
|
+
default:
|
|
34
|
+
return "Unknown";
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
} // namespace
|
|
39
|
+
|
|
40
|
+
EnhancementValidationResult ValidateEnhancementPaths(
|
|
41
|
+
EnhancementModelKind kind,
|
|
42
|
+
const EnhancementModelPaths& paths,
|
|
43
|
+
const std::string& modelDir
|
|
44
|
+
) {
|
|
45
|
+
EnhancementValidationResult result;
|
|
46
|
+
size_t count = 0;
|
|
47
|
+
const auto* reqs = GetRequirements(kind, count);
|
|
48
|
+
if (!reqs) return result;
|
|
49
|
+
|
|
50
|
+
for (size_t i = 0; i < count; ++i) {
|
|
51
|
+
if (reqs[i].required && (paths.*(reqs[i].field)).empty()) {
|
|
52
|
+
result.missingRequired.push_back(reqs[i].fieldName);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
if (!result.missingRequired.empty()) {
|
|
57
|
+
result.ok = false;
|
|
58
|
+
result.error = std::string("Enhancement ") + EnhancementKindToName(kind) +
|
|
59
|
+
": missing required files in " + modelDir + ": ";
|
|
60
|
+
for (size_t i = 0; i < result.missingRequired.size(); ++i) {
|
|
61
|
+
if (i > 0) result.error += ", ";
|
|
62
|
+
result.error += result.missingRequired[i];
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
return result;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
} // namespace sherpaonnx
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
#ifndef SHERPA_ONNX_VALIDATE_ENHANCEMENT_H
|
|
2
|
+
#define SHERPA_ONNX_VALIDATE_ENHANCEMENT_H
|
|
3
|
+
|
|
4
|
+
#include "sherpa-onnx-model-detect.h"
|
|
5
|
+
#include <string>
|
|
6
|
+
#include <vector>
|
|
7
|
+
|
|
8
|
+
namespace sherpaonnx {
|
|
9
|
+
|
|
10
|
+
struct EnhancementFieldRequirement {
|
|
11
|
+
const char* fieldName;
|
|
12
|
+
std::string EnhancementModelPaths::* field;
|
|
13
|
+
bool required;
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
struct EnhancementValidationResult {
|
|
17
|
+
bool ok = true;
|
|
18
|
+
std::vector<std::string> missingRequired;
|
|
19
|
+
std::string error;
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
EnhancementValidationResult ValidateEnhancementPaths(
|
|
23
|
+
EnhancementModelKind kind,
|
|
24
|
+
const EnhancementModelPaths& paths,
|
|
25
|
+
const std::string& modelDir
|
|
26
|
+
);
|
|
27
|
+
|
|
28
|
+
} // namespace sherpaonnx
|
|
29
|
+
|
|
30
|
+
#endif // SHERPA_ONNX_VALIDATE_ENHANCEMENT_H
|
|
@@ -52,6 +52,13 @@ static const SttFieldRequirement kFunAsrNanoReqs[] = {
|
|
|
52
52
|
{"funasrTokenizer", &SttModelPaths::funasrTokenizer, true},
|
|
53
53
|
};
|
|
54
54
|
|
|
55
|
+
static const SttFieldRequirement kQwen3AsrReqs[] = {
|
|
56
|
+
{"qwen3ConvFrontend", &SttModelPaths::qwen3ConvFrontend, true},
|
|
57
|
+
{"qwen3Encoder", &SttModelPaths::qwen3Encoder, true},
|
|
58
|
+
{"qwen3Decoder", &SttModelPaths::qwen3Decoder, true},
|
|
59
|
+
{"qwen3Tokenizer", &SttModelPaths::qwen3Tokenizer, true},
|
|
60
|
+
};
|
|
61
|
+
|
|
55
62
|
static const SttFieldRequirement kMoonshineReqs[] = {
|
|
56
63
|
{"moonshinePreprocessor", &SttModelPaths::moonshinePreprocessor, true},
|
|
57
64
|
{"moonshineEncoder", &SttModelPaths::moonshineEncoder, true},
|
|
@@ -120,6 +127,9 @@ static const SttFieldRequirement* GetRequirements(SttModelKind kind, size_t& cou
|
|
|
120
127
|
case SttModelKind::kFunAsrNano:
|
|
121
128
|
count = std::size(kFunAsrNanoReqs);
|
|
122
129
|
return kFunAsrNanoReqs;
|
|
130
|
+
case SttModelKind::kQwen3Asr:
|
|
131
|
+
count = std::size(kQwen3AsrReqs);
|
|
132
|
+
return kQwen3AsrReqs;
|
|
123
133
|
case SttModelKind::kMoonshine:
|
|
124
134
|
count = std::size(kMoonshineReqs);
|
|
125
135
|
return kMoonshineReqs;
|
|
@@ -161,6 +171,7 @@ static const char* SttKindToName(SttModelKind k) {
|
|
|
161
171
|
case SttModelKind::kZipformerCtc: return "Zipformer CTC";
|
|
162
172
|
case SttModelKind::kWhisper: return "Whisper";
|
|
163
173
|
case SttModelKind::kFunAsrNano: return "FunASR Nano";
|
|
174
|
+
case SttModelKind::kQwen3Asr: return "Qwen3 ASR";
|
|
164
175
|
case SttModelKind::kFireRedAsr: return "Fire Red ASR";
|
|
165
176
|
case SttModelKind::kMoonshine: return "Moonshine";
|
|
166
177
|
case SttModelKind::kMoonshineV2: return "Moonshine v2";
|
|
@@ -20,6 +20,7 @@
|
|
|
20
20
|
#include "sherpa-onnx-model-detect.h"
|
|
21
21
|
#include "sherpa-onnx-stt-wrapper.h"
|
|
22
22
|
#include "sherpa-onnx-tts-wrapper.h"
|
|
23
|
+
#include "sherpa-onnx-enhancement-wrapper.h"
|
|
23
24
|
|
|
24
25
|
extern "C" {
|
|
25
26
|
|
|
@@ -187,4 +188,24 @@ Java_com_sherpaonnx_SherpaOnnxModule_nativeDetectTtsModel(
|
|
|
187
188
|
return sherpaonnx::TtsDetectResultToJava(env, result);
|
|
188
189
|
}
|
|
189
190
|
|
|
191
|
+
// Detect enhancement model in directory. Returns HashMap with success, error, detectedModels, modelType, paths.
|
|
192
|
+
JNIEXPORT jobject JNICALL
|
|
193
|
+
Java_com_sherpaonnx_SherpaOnnxModule_nativeDetectEnhancementModel(
|
|
194
|
+
JNIEnv* env,
|
|
195
|
+
jobject /* this */,
|
|
196
|
+
jstring j_model_dir,
|
|
197
|
+
jstring j_model_type) {
|
|
198
|
+
const char* model_dir_c = env->GetStringUTFChars(j_model_dir, nullptr);
|
|
199
|
+
const char* model_type_c =
|
|
200
|
+
j_model_type ? env->GetStringUTFChars(j_model_type, nullptr) : nullptr;
|
|
201
|
+
std::string model_dir(model_dir_c ? model_dir_c : "");
|
|
202
|
+
std::string model_type(model_type_c ? model_type_c : "auto");
|
|
203
|
+
env->ReleaseStringUTFChars(j_model_dir, model_dir_c);
|
|
204
|
+
if (model_type_c) env->ReleaseStringUTFChars(j_model_type, model_type_c);
|
|
205
|
+
|
|
206
|
+
sherpaonnx::EnhancementDetectResult result =
|
|
207
|
+
sherpaonnx::DetectEnhancementModel(model_dir, model_type);
|
|
208
|
+
return sherpaonnx::EnhancementDetectResultToJava(env, result);
|
|
209
|
+
}
|
|
210
|
+
|
|
190
211
|
} // extern "C"
|