react-native-sherpa-onnx 0.3.8 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/README.md +20 -5
  2. package/SherpaOnnx.podspec +5 -1
  3. package/android/prebuilt-download.gradle +89 -49
  4. package/android/prebuilt-versions.gradle +1 -1
  5. package/android/src/main/assets/model_licenses/asr-models-license-status.csv +1 -0
  6. package/android/src/main/assets/model_licenses/speech-enhancement-models-license-status.csv +7 -0
  7. package/android/src/main/cpp/CMakeLists.txt +3 -0
  8. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-enhancement-wrapper.cpp +68 -0
  9. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-enhancement-wrapper.h +17 -0
  10. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-enhancement.cpp +119 -0
  11. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.cpp +23 -0
  12. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.h +9 -0
  13. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-stt.cpp +51 -8
  14. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect.h +41 -0
  15. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.cpp +5 -0
  16. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-enhancement.cpp +68 -0
  17. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-enhancement.h +30 -0
  18. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-stt.cpp +11 -0
  19. package/android/src/main/cpp/jni/module/sherpa-onnx-module-jni.cpp +21 -0
  20. package/android/src/main/java/com/sherpaonnx/SherpaOnnxArchiveHelper.kt +110 -35
  21. package/android/src/main/java/com/sherpaonnx/SherpaOnnxAssetHelper.kt +6 -0
  22. package/android/src/main/java/com/sherpaonnx/SherpaOnnxEnhancementHelper.kt +377 -0
  23. package/android/src/main/java/com/sherpaonnx/SherpaOnnxExtractionNotificationHelper.kt +102 -0
  24. package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +198 -18
  25. package/android/src/main/java/com/sherpaonnx/SherpaOnnxSttHelper.kt +22 -0
  26. package/ios/Resources/model_licenses/asr-models-license-status.csv +1 -0
  27. package/ios/Resources/model_licenses/speech-enhancement-models-license-status.csv +7 -0
  28. package/ios/SherpaOnnx+Assets.mm +5 -0
  29. package/ios/SherpaOnnx+Enhancement.mm +435 -0
  30. package/ios/SherpaOnnx+STT.mm +13 -1
  31. package/ios/SherpaOnnx.mm +87 -17
  32. package/ios/enhancement/sherpa-onnx-enhancement-wrapper.h +85 -0
  33. package/ios/enhancement/sherpa-onnx-enhancement-wrapper.mm +218 -0
  34. package/ios/model_detect/sherpa-onnx-model-detect-enhancement.mm +92 -0
  35. package/ios/model_detect/sherpa-onnx-model-detect-helper.h +5 -0
  36. package/ios/model_detect/sherpa-onnx-model-detect-helper.mm +23 -0
  37. package/ios/model_detect/sherpa-onnx-model-detect-stt.mm +51 -7
  38. package/ios/model_detect/sherpa-onnx-model-detect.h +33 -0
  39. package/ios/model_detect/sherpa-onnx-validate-enhancement.h +30 -0
  40. package/ios/model_detect/sherpa-onnx-validate-enhancement.mm +69 -0
  41. package/ios/model_detect/sherpa-onnx-validate-stt.mm +11 -0
  42. package/ios/stt/sherpa-onnx-stt-wrapper.h +11 -1
  43. package/ios/stt/sherpa-onnx-stt-wrapper.mm +30 -2
  44. package/ios/tts/sherpa-onnx-tts-wrapper.mm +16 -0
  45. package/lib/module/NativeSherpaOnnx.js.map +1 -1
  46. package/lib/module/download/localModels.js +2 -3
  47. package/lib/module/download/localModels.js.map +1 -1
  48. package/lib/module/download/paths.js +2 -1
  49. package/lib/module/download/paths.js.map +1 -1
  50. package/lib/module/download/postDownloadProcessing.js +17 -4
  51. package/lib/module/download/postDownloadProcessing.js.map +1 -1
  52. package/lib/module/enhancement/index.js +63 -48
  53. package/lib/module/enhancement/index.js.map +1 -1
  54. package/lib/module/enhancement/streaming.js +60 -0
  55. package/lib/module/enhancement/streaming.js.map +1 -0
  56. package/lib/module/enhancement/streamingTypes.js +4 -0
  57. package/lib/module/enhancement/streamingTypes.js.map +1 -0
  58. package/lib/module/enhancement/types.js +4 -0
  59. package/lib/module/enhancement/types.js.map +1 -0
  60. package/lib/module/extraction/extractTarBz2.js +2 -2
  61. package/lib/module/extraction/extractTarBz2.js.map +1 -1
  62. package/lib/module/extraction/extractTarZst.js +2 -2
  63. package/lib/module/extraction/extractTarZst.js.map +1 -1
  64. package/lib/module/extraction/index.js +10 -5
  65. package/lib/module/extraction/index.js.map +1 -1
  66. package/lib/module/licenses.js +9 -3
  67. package/lib/module/licenses.js.map +1 -1
  68. package/lib/module/stt/index.js +4 -2
  69. package/lib/module/stt/index.js.map +1 -1
  70. package/lib/module/stt/streaming.js +2 -1
  71. package/lib/module/stt/streaming.js.map +1 -1
  72. package/lib/module/stt/types.js +3 -1
  73. package/lib/module/stt/types.js.map +1 -1
  74. package/lib/module/tts/index.js +4 -2
  75. package/lib/module/tts/index.js.map +1 -1
  76. package/lib/module/tts/streaming.js +3 -1
  77. package/lib/module/tts/streaming.js.map +1 -1
  78. package/lib/typescript/src/NativeSherpaOnnx.d.ts +70 -9
  79. package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
  80. package/lib/typescript/src/download/localModels.d.ts.map +1 -1
  81. package/lib/typescript/src/download/paths.d.ts +2 -1
  82. package/lib/typescript/src/download/paths.d.ts.map +1 -1
  83. package/lib/typescript/src/download/postDownloadProcessing.d.ts +9 -0
  84. package/lib/typescript/src/download/postDownloadProcessing.d.ts.map +1 -1
  85. package/lib/typescript/src/enhancement/index.d.ts +9 -46
  86. package/lib/typescript/src/enhancement/index.d.ts.map +1 -1
  87. package/lib/typescript/src/enhancement/streaming.d.ts +6 -0
  88. package/lib/typescript/src/enhancement/streaming.d.ts.map +1 -0
  89. package/lib/typescript/src/enhancement/streamingTypes.d.ts +12 -0
  90. package/lib/typescript/src/enhancement/streamingTypes.d.ts.map +1 -0
  91. package/lib/typescript/src/enhancement/types.d.ts +31 -0
  92. package/lib/typescript/src/enhancement/types.d.ts.map +1 -0
  93. package/lib/typescript/src/extraction/extractTarBz2.d.ts +2 -1
  94. package/lib/typescript/src/extraction/extractTarBz2.d.ts.map +1 -1
  95. package/lib/typescript/src/extraction/extractTarZst.d.ts +2 -1
  96. package/lib/typescript/src/extraction/extractTarZst.d.ts.map +1 -1
  97. package/lib/typescript/src/extraction/index.d.ts +1 -1
  98. package/lib/typescript/src/extraction/index.d.ts.map +1 -1
  99. package/lib/typescript/src/extraction/types.d.ts +12 -0
  100. package/lib/typescript/src/extraction/types.d.ts.map +1 -1
  101. package/lib/typescript/src/licenses.d.ts.map +1 -1
  102. package/lib/typescript/src/stt/index.d.ts +1 -1
  103. package/lib/typescript/src/stt/index.d.ts.map +1 -1
  104. package/lib/typescript/src/stt/streaming.d.ts.map +1 -1
  105. package/lib/typescript/src/stt/types.d.ts +16 -1
  106. package/lib/typescript/src/stt/types.d.ts.map +1 -1
  107. package/lib/typescript/src/tts/index.d.ts.map +1 -1
  108. package/lib/typescript/src/tts/streaming.d.ts.map +1 -1
  109. package/package.json +1 -1
  110. package/scripts/ci/check-model-csvs.sh +27 -2
  111. package/scripts/ci/collect_all_sherpa_model_streams.sh +3 -1
  112. package/scripts/ci/collect_one_sherpa_release_stream.sh +3 -1
  113. package/scripts/ci/sherpa_speech_enhancement_model_release_streams.json +13 -0
  114. package/scripts/ci/update_model_license_csv.sh +17 -17
  115. package/src/NativeSherpaOnnx.ts +108 -10
  116. package/src/download/localModels.ts +1 -3
  117. package/src/download/paths.ts +2 -1
  118. package/src/download/postDownloadProcessing.ts +24 -1
  119. package/src/enhancement/index.ts +120 -58
  120. package/src/enhancement/streaming.ts +105 -0
  121. package/src/enhancement/streamingTypes.ts +14 -0
  122. package/src/enhancement/types.ts +36 -0
  123. package/src/extraction/extractTarBz2.ts +7 -2
  124. package/src/extraction/extractTarZst.ts +7 -2
  125. package/src/extraction/index.ts +29 -6
  126. package/src/extraction/types.ts +16 -0
  127. package/src/licenses.ts +13 -2
  128. package/src/stt/index.ts +8 -7
  129. package/src/stt/streaming.ts +7 -1
  130. package/src/stt/types.ts +18 -0
  131. package/src/tts/index.ts +7 -7
  132. package/src/tts/streaming.ts +6 -3
  133. package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -1
  134. package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -1
@@ -389,5 +389,28 @@ std::vector<LexiconCandidate> FindLexiconCandidates(
389
389
  return candidates;
390
390
  }
391
391
 
392
+ bool Qwen3TokenizerDirHasVocabAndMerges(
393
+ const std::vector<FileEntry>& files,
394
+ const std::string& dirRaw
395
+ ) {
396
+ std::string dir = dirRaw;
397
+ while (!dir.empty() && (dir.back() == '/' || dir.back() == '\\'))
398
+ dir.pop_back();
399
+ if (dir.empty()) return false;
400
+ bool hasVocab = false;
401
+ bool hasMerges = false;
402
+ const std::string prefix = dir + "/";
403
+ for (const auto& e : files) {
404
+ if (e.path.size() <= prefix.size()) continue;
405
+ if (e.path.compare(0, prefix.size(), prefix) != 0) continue;
406
+ std::string rest = e.path.substr(prefix.size());
407
+ if (rest.find('/') != std::string::npos || rest.find('\\') != std::string::npos) continue;
408
+ if (e.nameLower == "vocab.json") hasVocab = true;
409
+ if (e.nameLower == "merges.txt") hasMerges = true;
410
+ }
411
+ if (hasVocab && hasMerges) return true;
412
+ return FileExists(dir + "/vocab.json") && FileExists(dir + "/merges.txt");
413
+ }
414
+
392
415
  } // namespace model_detect
393
416
  } // namespace sherpaonnx
@@ -88,6 +88,15 @@ std::vector<LexiconCandidate> FindLexiconCandidates(
88
88
  const std::string& rootDir
89
89
  );
90
90
 
91
+ /**
92
+ * True if `dir` contains vocab.json and merges.txt: listed in `files` (fixture / synthetic trees)
93
+ * or present on disk. Used for Qwen3-ASR tokenizer directory detection.
94
+ */
95
+ bool Qwen3TokenizerDirHasVocabAndMerges(
96
+ const std::vector<FileEntry>& files,
97
+ const std::string& dir
98
+ );
99
+
91
100
  } // namespace model_detect
92
101
  } // namespace sherpaonnx
93
102
 
@@ -61,6 +61,7 @@ static const char* KindToName(SttModelKind k) {
61
61
  case SttModelKind::kZipformerCtc: return "zipformer_ctc";
62
62
  case SttModelKind::kWhisper: return "whisper";
63
63
  case SttModelKind::kFunAsrNano: return "funasr_nano";
64
+ case SttModelKind::kQwen3Asr: return "qwen3_asr";
64
65
  case SttModelKind::kFireRedAsr: return "fire_red_asr";
65
66
  case SttModelKind::kMoonshine: return "moonshine";
66
67
  case SttModelKind::kMoonshineV2: return "moonshine_v2";
@@ -88,6 +89,7 @@ SttModelKind ParseSttModelType(const std::string& modelType) {
88
89
  if (modelType == "zipformer_ctc" || modelType == "ctc") return SttModelKind::kZipformerCtc;
89
90
  if (modelType == "whisper") return SttModelKind::kWhisper;
90
91
  if (modelType == "funasr_nano") return SttModelKind::kFunAsrNano;
92
+ if (modelType == "qwen3_asr") return SttModelKind::kQwen3Asr;
91
93
  if (modelType == "fire_red_asr") return SttModelKind::kFireRedAsr;
92
94
  if (modelType == "moonshine") return SttModelKind::kMoonshine;
93
95
  if (modelType == "moonshine_v2") return SttModelKind::kMoonshineV2;
@@ -126,6 +128,8 @@ static bool CapabilitySupportsKind(
126
128
  return cap.hasWhisper;
127
129
  case SttModelKind::kFunAsrNano:
128
130
  return cap.hasFunAsrNano;
131
+ case SttModelKind::kQwen3Asr:
132
+ return cap.hasQwen3Asr;
129
133
  case SttModelKind::kFireRedAsr:
130
134
  return cap.hasFireRedAsr;
131
135
  case SttModelKind::kMoonshine:
@@ -189,6 +193,8 @@ static std::vector<SttModelKind> GetKindsFromDirName(const std::string& modelDir
189
193
  add(SttModelKind::kTransducer);
190
194
  add(SttModelKind::kZipformerCtc);
191
195
  }
196
+ if (lower.find("qwen3-asr") != std::string::npos || lower.find("qwen3_asr") != std::string::npos)
197
+ add(SttModelKind::kQwen3Asr);
192
198
  if (lower.find("funasr") != std::string::npos)
193
199
  add(SttModelKind::kFunAsrNano);
194
200
  if (lower.find("canary") != std::string::npos)
@@ -249,6 +255,19 @@ static SttCandidatePaths GatherSttCandidatePaths(
249
255
  p.funasrTokenizerDir = vocabInSubdir.substr(0, lastSlash);
250
256
  }
251
257
  }
258
+ p.qwen3ConvFrontend = FindOnnxByAnyToken(files, {"conv_frontend"}, preferInt8);
259
+ {
260
+ for (const auto& entry : files) {
261
+ if (entry.nameLower != "tokenizer_config.json") continue;
262
+ size_t slash = entry.path.find_last_of("/\\");
263
+ if (slash == std::string::npos) continue;
264
+ std::string dir = entry.path.substr(0, slash);
265
+ if (Qwen3TokenizerDirHasVocabAndMerges(files, dir)) {
266
+ p.qwen3TokenizerDir = dir;
267
+ break;
268
+ }
269
+ }
270
+ }
252
271
  p.moonshinePreprocessor = FindOnnxByAnyToken(files, {"preprocess", "preprocessor"}, preferInt8);
253
272
  p.moonshineEncoder = FindOnnxByAnyToken(files, {"encode", "encoder_model"}, preferInt8);
254
273
  p.moonshineUncachedDecoder = FindOnnxByAnyToken(files, {"uncached_decode", "uncached"}, preferInt8);
@@ -258,7 +277,8 @@ static SttCandidatePaths GatherSttCandidatePaths(
258
277
  static const std::vector<std::string> modelExcludes = {
259
278
  "encoder", "decoder", "joiner", "vocoder", "acoustic", "embedding", "llm",
260
279
  "encoder_adaptor", "encoder-adaptor", "encoder_model", "decoder_model",
261
- "merged_decoder", "decoder_model_merged", "preprocess", "encode", "uncached", "cached"
280
+ "merged_decoder", "decoder_model_merged", "preprocess", "encode", "uncached", "cached",
281
+ "conv_frontend"
262
282
  };
263
283
  p.paraformerModel = FindOnnxByAnyToken(files, {"model"}, preferInt8);
264
284
  if (!p.paraformerModel.empty()) {
@@ -302,6 +322,7 @@ static SttPathHints GetSttPathHints(const std::string& modelDir) {
302
322
  h.isLikelyWenetCtc = lower.find("wenet") != std::string::npos;
303
323
  h.isLikelySenseVoice = lower.find("sense") != std::string::npos || lower.find("sensevoice") != std::string::npos;
304
324
  h.isLikelyFunAsrNano = lower.find("funasr") != std::string::npos || lower.find("funasr-nano") != std::string::npos;
325
+ h.isLikelyQwen3Asr = lower.find("qwen3-asr") != std::string::npos || lower.find("qwen3_asr") != std::string::npos;
305
326
  h.isLikelyZipformer = lower.find("zipformer") != std::string::npos;
306
327
  h.isLikelyMoonshine = lower.find("moonshine") != std::string::npos;
307
328
  h.isLikelyDolphin = lower.find("dolphin") != std::string::npos;
@@ -404,7 +425,9 @@ static SttCapabilities ComputeSttCapabilities(const SttCandidatePaths& paths, co
404
425
  c.hasTransducer = !paths.encoder.empty() && !paths.decoder.empty() && !paths.joiner.empty();
405
426
  bool hasWhisperEnc = !paths.encoder.empty();
406
427
  bool hasWhisperDec = !paths.decoder.empty();
407
- c.hasWhisper = hasWhisperEnc && hasWhisperDec && paths.joiner.empty();
428
+ bool hasQwen3Tok = !paths.qwen3TokenizerDir.empty();
429
+ c.hasQwen3Asr = !paths.qwen3ConvFrontend.empty() && hasWhisperEnc && hasWhisperDec && hasQwen3Tok;
430
+ c.hasWhisper = hasWhisperEnc && hasWhisperDec && paths.joiner.empty() && !c.hasQwen3Asr;
408
431
  bool hasFunAsrTok = !paths.funasrTokenizerDir.empty();
409
432
  c.hasFunAsrNano = !paths.funasrEncoderAdaptor.empty() && !paths.funasrLLM.empty() &&
410
433
  !paths.funasrEmbedding.empty() && hasFunAsrTok;
@@ -446,6 +469,7 @@ static void CollectDetectedModels(
446
469
  out.push_back({"paraformer", modelDir});
447
470
  }
448
471
  if (cap.hasWhisper) out.push_back({"whisper", modelDir});
472
+ if (cap.hasQwen3Asr) out.push_back({"qwen3_asr", modelDir});
449
473
  if (cap.hasFunAsrNano) out.push_back({"funasr_nano", modelDir});
450
474
  if (cap.hasMoonshine) out.push_back({"moonshine", modelDir});
451
475
  if (cap.hasMoonshineV2) out.push_back({"moonshine_v2", modelDir});
@@ -507,6 +531,10 @@ static SttModelKind ResolveSttKind(
507
531
  outError = "FunASR Nano model requested but required files not found in " + modelDir;
508
532
  return SttModelKind::kUnknown;
509
533
  }
534
+ if (selected == SttModelKind::kQwen3Asr && !cap.hasQwen3Asr) {
535
+ outError = "Qwen3-ASR model requested but conv_frontend/encoder/decoder/tokenizer not found in " + modelDir;
536
+ return SttModelKind::kUnknown;
537
+ }
510
538
  if (selected == SttModelKind::kMoonshine && !cap.hasMoonshine) {
511
539
  outError = "Moonshine v1 model requested but preprocess/encode/uncached_decode/cached_decode not found in " + modelDir;
512
540
  return SttModelKind::kUnknown;
@@ -573,7 +601,9 @@ static SttModelKind ResolveSttKind(
573
601
  if (!paths.paraformerModel.empty()) return SttModelKind::kParaformer;
574
602
  if (cap.hasCanary) return SttModelKind::kCanary;
575
603
  if (cap.hasFireRedAsr) return SttModelKind::kFireRedAsr;
604
+ if (cap.hasQwen3Asr && hints.isLikelyQwen3Asr) return SttModelKind::kQwen3Asr;
576
605
  if (cap.hasWhisper) return SttModelKind::kWhisper;
606
+ if (cap.hasQwen3Asr) return SttModelKind::kQwen3Asr;
577
607
  if (cap.hasFunAsrNano) return SttModelKind::kFunAsrNano;
578
608
  if (cap.hasMoonshineV2) return SttModelKind::kMoonshineV2;
579
609
  if (cap.hasDolphin) return SttModelKind::kDolphin;
@@ -618,6 +648,12 @@ static void ApplyPathsForSttKind(SttModelKind kind, const SttCandidatePaths& can
618
648
  resultPaths.funasrEmbedding = candidate.funasrEmbedding;
619
649
  resultPaths.funasrTokenizer = candidate.funasrTokenizerDir;
620
650
  break;
651
+ case SttModelKind::kQwen3Asr:
652
+ resultPaths.qwen3ConvFrontend = candidate.qwen3ConvFrontend;
653
+ resultPaths.qwen3Encoder = candidate.encoder;
654
+ resultPaths.qwen3Decoder = candidate.decoder;
655
+ resultPaths.qwen3Tokenizer = candidate.qwen3TokenizerDir;
656
+ break;
621
657
  case SttModelKind::kMoonshine:
622
658
  resultPaths.moonshinePreprocessor = candidate.moonshinePreprocessor;
623
659
  resultPaths.moonshineEncoder = candidate.moonshineEncoder;
@@ -711,13 +747,13 @@ SttDetectResult DetectSttModel(
711
747
  EmptyOrPath(candidate.encoder), EmptyOrPath(candidate.decoder));
712
748
  LOGI("DetectSttModel: funasr encoderAdaptor=%s llm=%s embedding=%s tokenizerDir=%s",
713
749
  EmptyOrPath(candidate.funasrEncoderAdaptor), EmptyOrPath(candidate.funasrLLM), EmptyOrPath(candidate.funasrEmbedding), EmptyOrPath(candidate.funasrTokenizerDir));
714
- LOGI("DetectSttModel: hasTransducer=%d hasWhisper=%d hasMoonshine=%d hasMoonshineV2=%d hasParaformer=%d hasFunAsrNano=%d hasDolphin=%d hasFireRedAsr=%d hasFireRedCtc=%d hasCanary=%d hasOmnilingual=%d hasMedAsr=%d hasTeleSpeechCtc=%d hasToneCtc=%d",
750
+ LOGI("DetectSttModel: hasTransducer=%d hasWhisper=%d hasMoonshine=%d hasMoonshineV2=%d hasParaformer=%d hasFunAsrNano=%d hasQwen3Asr=%d hasDolphin=%d hasFireRedAsr=%d hasFireRedCtc=%d hasCanary=%d hasOmnilingual=%d hasMedAsr=%d hasTeleSpeechCtc=%d hasToneCtc=%d",
715
751
  (int)cap.hasTransducer, (int)cap.hasWhisper, (int)cap.hasMoonshine, (int)cap.hasMoonshineV2,
716
- (int)cap.hasParaformer, (int)cap.hasFunAsrNano, (int)cap.hasDolphin, (int)cap.hasFireRedAsr, (int)cap.hasFireRedCtc,
752
+ (int)cap.hasParaformer, (int)cap.hasFunAsrNano, (int)cap.hasQwen3Asr, (int)cap.hasDolphin, (int)cap.hasFireRedAsr, (int)cap.hasFireRedCtc,
717
753
  (int)cap.hasCanary, (int)cap.hasOmnilingual, (int)cap.hasMedAsr, (int)cap.hasTeleSpeechCtc, (int)cap.hasToneCtc);
718
- LOGI("DetectSttModel: hints isLikelyNemo=%d isLikelyTdt=%d isLikelyWenetCtc=%d isLikelySenseVoice=%d isLikelyFunAsrNano=%d isLikelyZipformer=%d isLikelyMoonshine=%d isLikelyDolphin=%d isLikelyFireRedAsr=%d isLikelyCanary=%d isLikelyOmnilingual=%d isLikelyMedAsr=%d isLikelyTeleSpeech=%d isLikelyToneCtc=%d isLikelyParaformer=%d isLikelyVad=%d isLikelyTdnn=%d",
754
+ LOGI("DetectSttModel: hints isLikelyNemo=%d isLikelyTdt=%d isLikelyWenetCtc=%d isLikelySenseVoice=%d isLikelyFunAsrNano=%d isLikelyQwen3Asr=%d isLikelyZipformer=%d isLikelyMoonshine=%d isLikelyDolphin=%d isLikelyFireRedAsr=%d isLikelyCanary=%d isLikelyOmnilingual=%d isLikelyMedAsr=%d isLikelyTeleSpeech=%d isLikelyToneCtc=%d isLikelyParaformer=%d isLikelyVad=%d isLikelyTdnn=%d",
719
755
  (int)hints.isLikelyNemo, (int)hints.isLikelyTdt, (int)hints.isLikelyWenetCtc, (int)hints.isLikelySenseVoice,
720
- (int)hints.isLikelyFunAsrNano, (int)hints.isLikelyZipformer, (int)hints.isLikelyMoonshine, (int)hints.isLikelyDolphin,
756
+ (int)hints.isLikelyFunAsrNano, (int)hints.isLikelyQwen3Asr, (int)hints.isLikelyZipformer, (int)hints.isLikelyMoonshine, (int)hints.isLikelyDolphin,
721
757
  (int)hints.isLikelyFireRedAsr, (int)hints.isLikelyCanary, (int)hints.isLikelyOmnilingual, (int)hints.isLikelyMedAsr,
722
758
  (int)hints.isLikelyTeleSpeech, (int)hints.isLikelyToneCtc, (int)hints.isLikelyParaformer, (int)hints.isLikelyVad, (int)hints.isLikelyTdnn);
723
759
  }
@@ -747,7 +783,8 @@ SttDetectResult DetectSttModel(
747
783
  }
748
784
 
749
785
  LOGI("DetectSttModel: selected kind=%d (%s)", static_cast<int>(result.selectedKind), KindToName(result.selectedKind));
750
- result.tokensRequired = (result.selectedKind != SttModelKind::kFunAsrNano);
786
+ result.tokensRequired = (result.selectedKind != SttModelKind::kFunAsrNano &&
787
+ result.selectedKind != SttModelKind::kQwen3Asr);
751
788
  ApplyPathsForSttKind(result.selectedKind, candidate, result.paths);
752
789
 
753
790
  if (!candidate.tokens.empty() && FileExists(candidate.tokens)) {
@@ -808,6 +845,11 @@ SttDetectResult DetectSttModel(
808
845
  EmptyOrPath(result.paths.funasrEncoderAdaptor), EmptyOrPath(result.paths.funasrLLM),
809
846
  EmptyOrPath(result.paths.funasrEmbedding), EmptyOrPath(result.paths.funasrTokenizer));
810
847
  break;
848
+ case SttModelKind::kQwen3Asr:
849
+ LOGI("DetectSttModel: paths set qwen3_asr conv=%s encoder=%s decoder=%s tokenizer=%s",
850
+ EmptyOrPath(result.paths.qwen3ConvFrontend), EmptyOrPath(result.paths.qwen3Encoder),
851
+ EmptyOrPath(result.paths.qwen3Decoder), EmptyOrPath(result.paths.qwen3Tokenizer));
852
+ break;
811
853
  default:
812
854
  break;
813
855
  }
@@ -854,7 +896,8 @@ SttDetectResult DetectSttModelFromFileList(
854
896
  return result;
855
897
  }
856
898
 
857
- result.tokensRequired = (result.selectedKind != SttModelKind::kFunAsrNano);
899
+ result.tokensRequired = (result.selectedKind != SttModelKind::kFunAsrNano &&
900
+ result.selectedKind != SttModelKind::kQwen3Asr);
858
901
  ApplyPathsForSttKind(result.selectedKind, candidate, result.paths);
859
902
 
860
903
  result.paths.tokens = candidate.tokens;
@@ -20,6 +20,7 @@ enum class SttModelKind {
20
20
  kZipformerCtc,
21
21
  kWhisper,
22
22
  kFunAsrNano,
23
+ kQwen3Asr,
23
24
  kFireRedAsr,
24
25
  kMoonshine,
25
26
  kMoonshineV2,
@@ -42,6 +43,12 @@ enum class TtsModelKind {
42
43
  kSupertonic
43
44
  };
44
45
 
46
+ enum class EnhancementModelKind {
47
+ kUnknown,
48
+ kGtcrn,
49
+ kDpdfNet
50
+ };
51
+
45
52
  struct SttModelPaths {
46
53
  std::string encoder;
47
54
  std::string decoder;
@@ -57,6 +64,11 @@ struct SttModelPaths {
57
64
  std::string funasrLLM;
58
65
  std::string funasrEmbedding;
59
66
  std::string funasrTokenizer;
67
+ /** Qwen3-ASR: conv_frontend + encoder + decoder + tokenizer directory. */
68
+ std::string qwen3ConvFrontend;
69
+ std::string qwen3Encoder;
70
+ std::string qwen3Decoder;
71
+ std::string qwen3Tokenizer;
60
72
  // Moonshine
61
73
  std::string moonshinePreprocessor;
62
74
  std::string moonshineEncoder;
@@ -89,6 +101,8 @@ struct SttCandidatePaths {
89
101
  std::string funasrLLM;
90
102
  std::string funasrEmbedding;
91
103
  std::string funasrTokenizerDir;
104
+ std::string qwen3ConvFrontend;
105
+ std::string qwen3TokenizerDir;
92
106
  std::string moonshinePreprocessor;
93
107
  std::string moonshineEncoder;
94
108
  std::string moonshineUncachedDecoder;
@@ -104,6 +118,7 @@ struct SttPathHints {
104
118
  bool isLikelyWenetCtc = false;
105
119
  bool isLikelySenseVoice = false;
106
120
  bool isLikelyFunAsrNano = false;
121
+ bool isLikelyQwen3Asr = false;
107
122
  bool isLikelyZipformer = false;
108
123
  bool isLikelyMoonshine = false;
109
124
  bool isLikelyDolphin = false;
@@ -128,6 +143,7 @@ struct SttCapabilities {
128
143
  bool hasMoonshineV2 = false;
129
144
  bool hasParaformer = false;
130
145
  bool hasFunAsrNano = false;
146
+ bool hasQwen3Asr = false;
131
147
  bool hasDolphin = false;
132
148
  bool hasFireRedAsr = false;
133
149
  /** True when dir name suggests Fire Red but only a single CTC/paraformer model (no encoder/decoder). Use zipformer_ctc. */
@@ -164,6 +180,10 @@ struct TtsModelPaths {
164
180
  std::string voiceStyle;
165
181
  };
166
182
 
183
+ struct EnhancementModelPaths {
184
+ std::string model;
185
+ };
186
+
167
187
  struct SttDetectResult {
168
188
  bool ok = false;
169
189
  std::string error;
@@ -185,6 +205,14 @@ struct TtsDetectResult {
185
205
  std::vector<std::string> lexiconLanguageCandidates;
186
206
  };
187
207
 
208
+ struct EnhancementDetectResult {
209
+ bool ok = false;
210
+ std::string error;
211
+ std::vector<DetectedModel> detectedModels;
212
+ EnhancementModelKind selectedKind = EnhancementModelKind::kUnknown;
213
+ EnhancementModelPaths paths;
214
+ };
215
+
188
216
  SttDetectResult DetectSttModel(
189
217
  const std::string& modelDir,
190
218
  const std::optional<bool>& preferInt8,
@@ -218,6 +246,19 @@ TtsDetectResult DetectTtsModelFromFileList(
218
246
  const std::string& modelType = "auto"
219
247
  );
220
248
 
249
+ EnhancementDetectResult DetectEnhancementModel(
250
+ const std::string& modelDir,
251
+ const std::string& modelType
252
+ );
253
+
254
+ /** Test-only: Like DetectEnhancementModel but takes a pre-built file list; no filesystem access.
255
+ * Only used by the host-side C++ test suite (test/cpp/model_detect_test.cpp). */
256
+ EnhancementDetectResult DetectEnhancementModelFromFileList(
257
+ const std::vector<model_detect::FileEntry>& files,
258
+ const std::string& modelDir,
259
+ const std::string& modelType = "auto"
260
+ );
261
+
221
262
  } // namespace sherpaonnx
222
263
 
223
264
  #endif // SHERPA_ONNX_MODEL_DETECT_H
@@ -23,6 +23,7 @@ const char* SttModelKindToString(SttModelKind k) {
23
23
  case SttModelKind::kZipformerCtc: return "zipformer_ctc";
24
24
  case SttModelKind::kWhisper: return "whisper";
25
25
  case SttModelKind::kFunAsrNano: return "funasr_nano";
26
+ case SttModelKind::kQwen3Asr: return "qwen3_asr";
26
27
  case SttModelKind::kFireRedAsr: return "fire_red_asr";
27
28
  case SttModelKind::kMoonshine: return "moonshine";
28
29
  case SttModelKind::kMoonshineV2: return "moonshine_v2";
@@ -79,6 +80,10 @@ jobject SttDetectResultToJava(JNIEnv* env, const SttDetectResult& result) {
79
80
  PutString(env, pathsMap, mapPut, "funasrLLM", result.paths.funasrLLM);
80
81
  PutString(env, pathsMap, mapPut, "funasrEmbedding", result.paths.funasrEmbedding);
81
82
  PutString(env, pathsMap, mapPut, "funasrTokenizer", result.paths.funasrTokenizer);
83
+ PutString(env, pathsMap, mapPut, "qwen3ConvFrontend", result.paths.qwen3ConvFrontend);
84
+ PutString(env, pathsMap, mapPut, "qwen3Encoder", result.paths.qwen3Encoder);
85
+ PutString(env, pathsMap, mapPut, "qwen3Decoder", result.paths.qwen3Decoder);
86
+ PutString(env, pathsMap, mapPut, "qwen3Tokenizer", result.paths.qwen3Tokenizer);
82
87
  PutString(env, pathsMap, mapPut, "moonshinePreprocessor", result.paths.moonshinePreprocessor);
83
88
  PutString(env, pathsMap, mapPut, "moonshineEncoder", result.paths.moonshineEncoder);
84
89
  PutString(env, pathsMap, mapPut, "moonshineUncachedDecoder", result.paths.moonshineUncachedDecoder);
@@ -0,0 +1,68 @@
1
+ #include "sherpa-onnx-validate-enhancement.h"
2
+
3
+ #include <cstddef>
4
+
5
+ namespace sherpaonnx {
6
+ namespace {
7
+
8
+ static const EnhancementFieldRequirement kGenericReqs[] = {
9
+ {"model", &EnhancementModelPaths::model, true},
10
+ };
11
+
12
+ static const EnhancementFieldRequirement* GetRequirements(
13
+ EnhancementModelKind kind,
14
+ size_t& count
15
+ ) {
16
+ switch (kind) {
17
+ case EnhancementModelKind::kGtcrn:
18
+ case EnhancementModelKind::kDpdfNet:
19
+ count = std::size(kGenericReqs);
20
+ return kGenericReqs;
21
+ default:
22
+ count = 0;
23
+ return nullptr;
24
+ }
25
+ }
26
+
27
+ static const char* EnhancementKindToName(EnhancementModelKind kind) {
28
+ switch (kind) {
29
+ case EnhancementModelKind::kGtcrn:
30
+ return "GTCRN";
31
+ case EnhancementModelKind::kDpdfNet:
32
+ return "DPDFNet";
33
+ default:
34
+ return "Unknown";
35
+ }
36
+ }
37
+
38
+ } // namespace
39
+
40
+ EnhancementValidationResult ValidateEnhancementPaths(
41
+ EnhancementModelKind kind,
42
+ const EnhancementModelPaths& paths,
43
+ const std::string& modelDir
44
+ ) {
45
+ EnhancementValidationResult result;
46
+ size_t count = 0;
47
+ const auto* reqs = GetRequirements(kind, count);
48
+ if (!reqs) return result;
49
+
50
+ for (size_t i = 0; i < count; ++i) {
51
+ if (reqs[i].required && (paths.*(reqs[i].field)).empty()) {
52
+ result.missingRequired.push_back(reqs[i].fieldName);
53
+ }
54
+ }
55
+
56
+ if (!result.missingRequired.empty()) {
57
+ result.ok = false;
58
+ result.error = std::string("Enhancement ") + EnhancementKindToName(kind) +
59
+ ": missing required files in " + modelDir + ": ";
60
+ for (size_t i = 0; i < result.missingRequired.size(); ++i) {
61
+ if (i > 0) result.error += ", ";
62
+ result.error += result.missingRequired[i];
63
+ }
64
+ }
65
+ return result;
66
+ }
67
+
68
+ } // namespace sherpaonnx
@@ -0,0 +1,30 @@
1
+ #ifndef SHERPA_ONNX_VALIDATE_ENHANCEMENT_H
2
+ #define SHERPA_ONNX_VALIDATE_ENHANCEMENT_H
3
+
4
+ #include "sherpa-onnx-model-detect.h"
5
+ #include <string>
6
+ #include <vector>
7
+
8
+ namespace sherpaonnx {
9
+
10
+ struct EnhancementFieldRequirement {
11
+ const char* fieldName;
12
+ std::string EnhancementModelPaths::* field;
13
+ bool required;
14
+ };
15
+
16
+ struct EnhancementValidationResult {
17
+ bool ok = true;
18
+ std::vector<std::string> missingRequired;
19
+ std::string error;
20
+ };
21
+
22
+ EnhancementValidationResult ValidateEnhancementPaths(
23
+ EnhancementModelKind kind,
24
+ const EnhancementModelPaths& paths,
25
+ const std::string& modelDir
26
+ );
27
+
28
+ } // namespace sherpaonnx
29
+
30
+ #endif // SHERPA_ONNX_VALIDATE_ENHANCEMENT_H
@@ -52,6 +52,13 @@ static const SttFieldRequirement kFunAsrNanoReqs[] = {
52
52
  {"funasrTokenizer", &SttModelPaths::funasrTokenizer, true},
53
53
  };
54
54
 
55
+ static const SttFieldRequirement kQwen3AsrReqs[] = {
56
+ {"qwen3ConvFrontend", &SttModelPaths::qwen3ConvFrontend, true},
57
+ {"qwen3Encoder", &SttModelPaths::qwen3Encoder, true},
58
+ {"qwen3Decoder", &SttModelPaths::qwen3Decoder, true},
59
+ {"qwen3Tokenizer", &SttModelPaths::qwen3Tokenizer, true},
60
+ };
61
+
55
62
  static const SttFieldRequirement kMoonshineReqs[] = {
56
63
  {"moonshinePreprocessor", &SttModelPaths::moonshinePreprocessor, true},
57
64
  {"moonshineEncoder", &SttModelPaths::moonshineEncoder, true},
@@ -120,6 +127,9 @@ static const SttFieldRequirement* GetRequirements(SttModelKind kind, size_t& cou
120
127
  case SttModelKind::kFunAsrNano:
121
128
  count = std::size(kFunAsrNanoReqs);
122
129
  return kFunAsrNanoReqs;
130
+ case SttModelKind::kQwen3Asr:
131
+ count = std::size(kQwen3AsrReqs);
132
+ return kQwen3AsrReqs;
123
133
  case SttModelKind::kMoonshine:
124
134
  count = std::size(kMoonshineReqs);
125
135
  return kMoonshineReqs;
@@ -161,6 +171,7 @@ static const char* SttKindToName(SttModelKind k) {
161
171
  case SttModelKind::kZipformerCtc: return "Zipformer CTC";
162
172
  case SttModelKind::kWhisper: return "Whisper";
163
173
  case SttModelKind::kFunAsrNano: return "FunASR Nano";
174
+ case SttModelKind::kQwen3Asr: return "Qwen3 ASR";
164
175
  case SttModelKind::kFireRedAsr: return "Fire Red ASR";
165
176
  case SttModelKind::kMoonshine: return "Moonshine";
166
177
  case SttModelKind::kMoonshineV2: return "Moonshine v2";
@@ -20,6 +20,7 @@
20
20
  #include "sherpa-onnx-model-detect.h"
21
21
  #include "sherpa-onnx-stt-wrapper.h"
22
22
  #include "sherpa-onnx-tts-wrapper.h"
23
+ #include "sherpa-onnx-enhancement-wrapper.h"
23
24
 
24
25
  extern "C" {
25
26
 
@@ -187,4 +188,24 @@ Java_com_sherpaonnx_SherpaOnnxModule_nativeDetectTtsModel(
187
188
  return sherpaonnx::TtsDetectResultToJava(env, result);
188
189
  }
189
190
 
191
+ // Detect enhancement model in directory. Returns HashMap with success, error, detectedModels, modelType, paths.
192
+ JNIEXPORT jobject JNICALL
193
+ Java_com_sherpaonnx_SherpaOnnxModule_nativeDetectEnhancementModel(
194
+ JNIEnv* env,
195
+ jobject /* this */,
196
+ jstring j_model_dir,
197
+ jstring j_model_type) {
198
+ const char* model_dir_c = env->GetStringUTFChars(j_model_dir, nullptr);
199
+ const char* model_type_c =
200
+ j_model_type ? env->GetStringUTFChars(j_model_type, nullptr) : nullptr;
201
+ std::string model_dir(model_dir_c ? model_dir_c : "");
202
+ std::string model_type(model_type_c ? model_type_c : "auto");
203
+ env->ReleaseStringUTFChars(j_model_dir, model_dir_c);
204
+ if (model_type_c) env->ReleaseStringUTFChars(j_model_type, model_type_c);
205
+
206
+ sherpaonnx::EnhancementDetectResult result =
207
+ sherpaonnx::DetectEnhancementModel(model_dir, model_type);
208
+ return sherpaonnx::EnhancementDetectResultToJava(env, result);
209
+ }
210
+
190
211
  } // extern "C"