react-native-sherpa-onnx 0.3.7 → 0.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/README.md +7 -2
  2. package/SherpaOnnx.podspec +4 -1
  3. package/android/prebuilt-download.gradle +23 -23
  4. package/android/src/main/assets/model_licenses/asr-models-license-status.csv +1 -0
  5. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.cpp +23 -0
  6. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.h +9 -0
  7. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-stt.cpp +51 -8
  8. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +31 -4
  9. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect.h +19 -1
  10. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.cpp +5 -0
  11. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.cpp +7 -0
  12. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-stt.cpp +11 -0
  13. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-tts.cpp +14 -0
  14. package/android/src/main/java/com/sherpaonnx/SherpaOnnxArchiveHelper.kt +110 -35
  15. package/android/src/main/java/com/sherpaonnx/SherpaOnnxExtractionNotificationHelper.kt +102 -0
  16. package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +92 -18
  17. package/android/src/main/java/com/sherpaonnx/SherpaOnnxSttHelper.kt +22 -0
  18. package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +15 -0
  19. package/ios/Resources/model_licenses/asr-models-license-status.csv +1 -0
  20. package/ios/SherpaOnnx+STT.mm +13 -1
  21. package/ios/SherpaOnnx+TTS.mm +1 -0
  22. package/ios/SherpaOnnx.mm +87 -17
  23. package/ios/model_detect/sherpa-onnx-model-detect-helper.h +5 -0
  24. package/ios/model_detect/sherpa-onnx-model-detect-helper.mm +23 -0
  25. package/ios/model_detect/sherpa-onnx-model-detect-stt.mm +51 -7
  26. package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +36 -4
  27. package/ios/model_detect/sherpa-onnx-model-detect.h +19 -1
  28. package/ios/model_detect/sherpa-onnx-validate-stt.mm +11 -0
  29. package/ios/model_detect/sherpa-onnx-validate-tts.mm +14 -0
  30. package/ios/stt/sherpa-onnx-stt-wrapper.h +11 -1
  31. package/ios/stt/sherpa-onnx-stt-wrapper.mm +30 -2
  32. package/ios/tts/sherpa-onnx-tts-wrapper.mm +25 -0
  33. package/lib/module/NativeSherpaOnnx.js.map +1 -1
  34. package/lib/module/download/ModelDownloadManager.js +1 -1
  35. package/lib/module/download/ModelDownloadManager.js.map +1 -1
  36. package/lib/module/download/background-downloader-types.js +2 -0
  37. package/lib/module/download/background-downloader-types.js.map +1 -0
  38. package/lib/module/download/downloadTask.js +54 -1
  39. package/lib/module/download/downloadTask.js.map +1 -1
  40. package/lib/module/download/index.js +1 -1
  41. package/lib/module/download/index.js.map +1 -1
  42. package/lib/module/download/postDownloadProcessing.js +17 -4
  43. package/lib/module/download/postDownloadProcessing.js.map +1 -1
  44. package/lib/module/download/registry.js +1 -0
  45. package/lib/module/download/registry.js.map +1 -1
  46. package/lib/module/extraction/extractTarBz2.js +2 -2
  47. package/lib/module/extraction/extractTarBz2.js.map +1 -1
  48. package/lib/module/extraction/extractTarZst.js +2 -2
  49. package/lib/module/extraction/extractTarZst.js.map +1 -1
  50. package/lib/module/extraction/index.js +10 -5
  51. package/lib/module/extraction/index.js.map +1 -1
  52. package/lib/module/stt/index.js +4 -2
  53. package/lib/module/stt/index.js.map +1 -1
  54. package/lib/module/stt/streaming.js +2 -1
  55. package/lib/module/stt/streaming.js.map +1 -1
  56. package/lib/module/stt/types.js +3 -1
  57. package/lib/module/stt/types.js.map +1 -1
  58. package/lib/module/tts/index.js +5 -3
  59. package/lib/module/tts/index.js.map +1 -1
  60. package/lib/module/tts/streaming.js +4 -2
  61. package/lib/module/tts/streaming.js.map +1 -1
  62. package/lib/module/tts/types.js +4 -1
  63. package/lib/module/tts/types.js.map +1 -1
  64. package/lib/typescript/src/NativeSherpaOnnx.d.ts +26 -10
  65. package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
  66. package/lib/typescript/src/download/ModelDownloadManager.d.ts +2 -1
  67. package/lib/typescript/src/download/ModelDownloadManager.d.ts.map +1 -1
  68. package/lib/typescript/src/download/background-downloader-types.d.ts +64 -0
  69. package/lib/typescript/src/download/background-downloader-types.d.ts.map +1 -0
  70. package/lib/typescript/src/download/downloadTask.d.ts +10 -0
  71. package/lib/typescript/src/download/downloadTask.d.ts.map +1 -1
  72. package/lib/typescript/src/download/index.d.ts +2 -2
  73. package/lib/typescript/src/download/index.d.ts.map +1 -1
  74. package/lib/typescript/src/download/postDownloadProcessing.d.ts +9 -0
  75. package/lib/typescript/src/download/postDownloadProcessing.d.ts.map +1 -1
  76. package/lib/typescript/src/download/registry.d.ts.map +1 -1
  77. package/lib/typescript/src/extraction/extractTarBz2.d.ts +2 -1
  78. package/lib/typescript/src/extraction/extractTarBz2.d.ts.map +1 -1
  79. package/lib/typescript/src/extraction/extractTarZst.d.ts +2 -1
  80. package/lib/typescript/src/extraction/extractTarZst.d.ts.map +1 -1
  81. package/lib/typescript/src/extraction/index.d.ts +1 -1
  82. package/lib/typescript/src/extraction/index.d.ts.map +1 -1
  83. package/lib/typescript/src/extraction/types.d.ts +12 -0
  84. package/lib/typescript/src/extraction/types.d.ts.map +1 -1
  85. package/lib/typescript/src/stt/index.d.ts +1 -1
  86. package/lib/typescript/src/stt/index.d.ts.map +1 -1
  87. package/lib/typescript/src/stt/streaming.d.ts.map +1 -1
  88. package/lib/typescript/src/stt/types.d.ts +16 -1
  89. package/lib/typescript/src/stt/types.d.ts.map +1 -1
  90. package/lib/typescript/src/tts/index.d.ts +1 -1
  91. package/lib/typescript/src/tts/index.d.ts.map +1 -1
  92. package/lib/typescript/src/tts/streaming.d.ts.map +1 -1
  93. package/lib/typescript/src/tts/types.d.ts +6 -1
  94. package/lib/typescript/src/tts/types.d.ts.map +1 -1
  95. package/package.json +1 -1
  96. package/scripts/ci/update_model_license_csv.sh +16 -16
  97. package/src/NativeSherpaOnnx.ts +38 -11
  98. package/src/download/ModelDownloadManager.ts +2 -0
  99. package/src/download/background-downloader-types.ts +73 -0
  100. package/src/download/downloadTask.ts +68 -0
  101. package/src/download/index.ts +2 -0
  102. package/src/download/postDownloadProcessing.ts +24 -1
  103. package/src/download/registry.ts +1 -0
  104. package/src/extraction/extractTarBz2.ts +7 -2
  105. package/src/extraction/extractTarZst.ts +7 -2
  106. package/src/extraction/index.ts +29 -6
  107. package/src/extraction/types.ts +16 -0
  108. package/src/stt/index.ts +8 -7
  109. package/src/stt/streaming.ts +7 -1
  110. package/src/stt/types.ts +18 -0
  111. package/src/tts/index.ts +10 -7
  112. package/src/tts/streaming.ts +8 -3
  113. package/src/tts/types.ts +9 -0
  114. package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -1
  115. package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -1
  116. package/lib/module/download/background-downloader.d.js +0 -2
  117. package/lib/module/download/background-downloader.d.js.map +0 -1
  118. package/src/download/background-downloader.d.ts +0 -43
@@ -36,6 +36,7 @@ static NSString *sttModelKindToNSString(sherpaonnx::SttModelKind kind) {
36
36
  case K::kZipformerCtc: return @"zipformer_ctc";
37
37
  case K::kWhisper: return @"whisper";
38
38
  case K::kFunAsrNano: return @"funasr_nano";
39
+ case K::kQwen3Asr: return @"qwen3_asr";
39
40
  case K::kFireRedAsr: return @"fire_red_asr";
40
41
  case K::kMoonshine: return @"moonshine";
41
42
  case K::kMoonshineV2: return @"moonshine_v2";
@@ -164,10 +165,12 @@ static NSDictionary *sttResultToDict(const sherpaonnx::SttRecognitionResult& r)
164
165
  sherpaonnx::SttSenseVoiceOptions senseVoiceOpts;
165
166
  sherpaonnx::SttCanaryOptions canaryOpts;
166
167
  sherpaonnx::SttFunAsrNanoOptions funasrNanoOpts;
168
+ sherpaonnx::SttQwen3AsrOptions qwen3AsrOpts;
167
169
  const sherpaonnx::SttWhisperOptions *whisperOptsPtr = nullptr;
168
170
  const sherpaonnx::SttSenseVoiceOptions *senseVoiceOptsPtr = nullptr;
169
171
  const sherpaonnx::SttCanaryOptions *canaryOptsPtr = nullptr;
170
172
  const sherpaonnx::SttFunAsrNanoOptions *funasrNanoOptsPtr = nullptr;
173
+ const sherpaonnx::SttQwen3AsrOptions *qwen3AsrOptsPtr = nullptr;
171
174
  if (modelOptions != nil && [modelOptions isKindOfClass:[NSDictionary class]]) {
172
175
  NSDictionary *w = modelOptions[@"whisper"];
173
176
  if ([w isKindOfClass:[NSDictionary class]]) {
@@ -202,12 +205,21 @@ static NSDictionary *sttResultToDict(const sherpaonnx::SttRecognitionResult& r)
202
205
  if (fn[@"hotwords"] != nil) funasrNanoOpts.hotwords = std::string([(NSString *)fn[@"hotwords"] UTF8String]);
203
206
  funasrNanoOptsPtr = &funasrNanoOpts;
204
207
  }
208
+ NSDictionary *q3 = modelOptions[@"qwen3Asr"];
209
+ if ([q3 isKindOfClass:[NSDictionary class]]) {
210
+ if (q3[@"maxTotalLen"] != nil) qwen3AsrOpts.max_total_len = [(NSNumber *)q3[@"maxTotalLen"] intValue];
211
+ if (q3[@"maxNewTokens"] != nil) qwen3AsrOpts.max_new_tokens = [(NSNumber *)q3[@"maxNewTokens"] intValue];
212
+ if (q3[@"temperature"] != nil) qwen3AsrOpts.temperature = [(NSNumber *)q3[@"temperature"] floatValue];
213
+ if (q3[@"topP"] != nil) qwen3AsrOpts.top_p = [(NSNumber *)q3[@"topP"] floatValue];
214
+ if (q3[@"seed"] != nil) qwen3AsrOpts.seed = [(NSNumber *)q3[@"seed"] intValue];
215
+ qwen3AsrOptsPtr = &qwen3AsrOpts;
216
+ }
205
217
  }
206
218
 
207
219
  sherpaonnx::SttInitializeResult result = inst->wrapper->initialize(
208
220
  modelDirStr, preferInt8Opt, modelTypeOpt, debugVal, hotwordsFileOpt, hotwordsScoreOpt,
209
221
  numThreadsOpt, providerOpt, ruleFstsOpt, ruleFarsOpt, ditherOpt,
210
- whisperOptsPtr, senseVoiceOptsPtr, canaryOptsPtr, funasrNanoOptsPtr);
222
+ whisperOptsPtr, senseVoiceOptsPtr, canaryOptsPtr, funasrNanoOptsPtr, qwen3AsrOptsPtr);
211
223
 
212
224
  if (result.success) {
213
225
  RCTLogInfo(@"Sherpa-onnx initialized successfully");
@@ -58,6 +58,7 @@ static NSString *ttsModelKindToNSString(sherpaonnx::TtsModelKind kind) {
58
58
  case K::kKitten: return @"kitten";
59
59
  case K::kPocket: return @"pocket";
60
60
  case K::kZipvoice: return @"zipvoice";
61
+ case K::kSupertonic: return @"supertonic";
61
62
  default: return @"unknown";
62
63
  }
63
64
  }
package/ios/SherpaOnnx.mm CHANGED
@@ -138,9 +138,15 @@
138
138
  - (void)extractTarBz2:(NSString *)sourcePath
139
139
  targetPath:(NSString *)targetPath
140
140
  force:(BOOL)force
141
- resolve:(RCTPromiseResolveBlock)resolve
142
- reject:(RCTPromiseRejectBlock)reject
141
+ showNotificationsEnabled:(NSNumber *)showNotificationsEnabled
142
+ notificationTitle:(NSString *)notificationTitle
143
+ notificationText:(NSString *)notificationText
144
+ resolve:(RCTPromiseResolveBlock)resolve
145
+ reject:(RCTPromiseRejectBlock)reject
143
146
  {
147
+ (void)showNotificationsEnabled;
148
+ (void)notificationTitle;
149
+ (void)notificationText;
144
150
  SherpaOnnxArchiveHelper *helper = [SherpaOnnxArchiveHelper new];
145
151
  NSDictionary *result = [helper extractTarBz2:sourcePath
146
152
  targetPath:targetPath
@@ -165,9 +171,15 @@
165
171
  - (void)extractTarZst:(NSString *)sourcePath
166
172
  targetPath:(NSString *)targetPath
167
173
  force:(BOOL)force
168
- resolve:(RCTPromiseResolveBlock)resolve
169
- reject:(RCTPromiseRejectBlock)reject
174
+ showNotificationsEnabled:(NSNumber *)showNotificationsEnabled
175
+ notificationTitle:(NSString *)notificationTitle
176
+ notificationText:(NSString *)notificationText
177
+ resolve:(RCTPromiseResolveBlock)resolve
178
+ reject:(RCTPromiseRejectBlock)reject
170
179
  {
180
+ (void)showNotificationsEnabled;
181
+ (void)notificationTitle;
182
+ (void)notificationText;
171
183
  SherpaOnnxArchiveHelper *helper = [SherpaOnnxArchiveHelper new];
172
184
  NSDictionary *result = [helper extractTarZst:sourcePath
173
185
  targetPath:targetPath
@@ -229,19 +241,33 @@
229
241
 
230
242
  - (void)extractTarZstFromAsset:(NSString *)assetPath
231
243
  targetPath:(NSString *)targetPath
232
- force:(NSNumber *)force
233
- resolve:(RCTPromiseResolveBlock)resolve
234
- reject:(RCTPromiseRejectBlock)reject
244
+ force:(BOOL)force
245
+ showNotificationsEnabled:(NSNumber *)showNotificationsEnabled
246
+ notificationTitle:(NSString *)notificationTitle
247
+ notificationText:(NSString *)notificationText
248
+ resolve:(RCTPromiseResolveBlock)resolve
249
+ reject:(RCTPromiseRejectBlock)reject
235
250
  {
251
+ (void)force;
252
+ (void)showNotificationsEnabled;
253
+ (void)notificationTitle;
254
+ (void)notificationText;
236
255
  resolve(@{ @"success": @NO, @"reason": @"Not supported on iOS; use path-based extraction." });
237
256
  }
238
257
 
239
258
  - (void)extractTarBz2FromAsset:(NSString *)assetPath
240
259
  targetPath:(NSString *)targetPath
241
- force:(NSNumber *)force
242
- resolve:(RCTPromiseResolveBlock)resolve
243
- reject:(RCTPromiseRejectBlock)reject
260
+ force:(BOOL)force
261
+ showNotificationsEnabled:(NSNumber *)showNotificationsEnabled
262
+ notificationTitle:(NSString *)notificationTitle
263
+ notificationText:(NSString *)notificationText
264
+ resolve:(RCTPromiseResolveBlock)resolve
265
+ reject:(RCTPromiseRejectBlock)reject
244
266
  {
267
+ (void)force;
268
+ (void)showNotificationsEnabled;
269
+ (void)notificationTitle;
270
+ (void)notificationText;
245
271
  resolve(@{ @"success": @NO, @"reason": @"Not supported on iOS; use path-based extraction." });
246
272
  }
247
273
 
@@ -329,15 +355,59 @@
329
355
  nil);
330
356
  return;
331
357
  }
332
- NSString *resourcePath = [[NSBundle mainBundle] resourcePath];
333
- NSString *fullPath = [resourcePath stringByAppendingPathComponent:assetPath];
358
+ NSString *fullPath = nil;
359
+ NSBundle *mainBundle = [NSBundle mainBundle];
360
+ NSString *assetDir = [assetPath stringByDeletingLastPathComponent];
361
+ NSString *assetNameWithExt = [assetPath lastPathComponent];
362
+ NSString *assetName = [assetNameWithExt stringByDeletingPathExtension];
363
+ NSString *assetExt = [assetNameWithExt pathExtension];
364
+
365
+ // 1) App bundle: regular nested path (keeps generic asset support)
366
+ NSString *mainPath = [mainBundle pathForResource:assetName
367
+ ofType:assetExt.length > 0 ? assetExt : nil
368
+ inDirectory:assetDir.length > 0 ? assetDir : nil];
369
+ if (mainPath.length > 0) {
370
+ fullPath = mainPath;
371
+ }
372
+
373
+ // 2) CocoaPods resource bundle: files are flattened into bundle root
374
+ if (!fullPath) {
375
+ NSString *resBundlePath = [mainBundle pathForResource:@"SherpaOnnxResources"
376
+ ofType:@"bundle"];
377
+ if (resBundlePath.length > 0) {
378
+ NSBundle *resBundle = [NSBundle bundleWithPath:resBundlePath];
379
+ if (resBundle) {
380
+ NSString *bundleRootPath = [resBundle pathForResource:assetName
381
+ ofType:assetExt.length > 0 ? assetExt : nil];
382
+ if (bundleRootPath.length > 0) {
383
+ fullPath = bundleRootPath;
384
+ }
385
+ }
386
+ }
387
+ }
388
+
389
+ if (!fullPath) {
390
+ reject(@"ASSET_READ_ERROR",
391
+ [NSString stringWithFormat:@"Failed to locate asset %@", assetPath],
392
+ nil);
393
+ return;
394
+ }
395
+
334
396
  NSError *error = nil;
335
- NSString *content = [NSString stringWithContentsOfFile:fullPath encoding:NSUTF8StringEncoding error:&error];
336
- if (error) {
337
- reject(@"ASSET_READ_ERROR", [NSString stringWithFormat:@"Failed to read asset %@: %@", assetPath, error.localizedDescription], error);
338
- } else {
339
- resolve(content);
397
+ NSString *content = [NSString stringWithContentsOfFile:fullPath
398
+ encoding:NSUTF8StringEncoding
399
+ error:&error];
400
+ if (error || content == nil) {
401
+ reject(@"ASSET_READ_ERROR",
402
+ [NSString stringWithFormat:@"Failed to read asset %@ at %@: %@",
403
+ assetPath,
404
+ fullPath,
405
+ error.localizedDescription ?: @"Unknown error"],
406
+ error);
407
+ return;
340
408
  }
409
+
410
+ resolve(content);
341
411
  }
342
412
 
343
413
  @end
@@ -80,6 +80,11 @@ std::vector<LexiconCandidate> FindLexiconCandidates(
80
80
  const std::string& rootDir
81
81
  );
82
82
 
83
+ bool Qwen3TokenizerDirHasVocabAndMerges(
84
+ const std::vector<FileEntry>& files,
85
+ const std::string& dir
86
+ );
87
+
83
88
  } // namespace model_detect
84
89
  } // namespace sherpaonnx
85
90
 
@@ -257,5 +257,28 @@ std::vector<LexiconCandidate> FindLexiconCandidates(
257
257
  return candidates;
258
258
  }
259
259
 
260
+ bool Qwen3TokenizerDirHasVocabAndMerges(
261
+ const std::vector<FileEntry>& files,
262
+ const std::string& dirRaw
263
+ ) {
264
+ std::string dir = dirRaw;
265
+ while (!dir.empty() && (dir.back() == '/' || dir.back() == '\\'))
266
+ dir.pop_back();
267
+ if (dir.empty()) return false;
268
+ bool hasVocab = false;
269
+ bool hasMerges = false;
270
+ const std::string prefix = dir + "/";
271
+ for (const auto& e : files) {
272
+ if (e.path.size() <= prefix.size()) continue;
273
+ if (e.path.compare(0, prefix.size(), prefix) != 0) continue;
274
+ std::string rest = e.path.substr(prefix.size());
275
+ if (rest.find('/') != std::string::npos || rest.find('\\') != std::string::npos) continue;
276
+ if (e.nameLower == "vocab.json") hasVocab = true;
277
+ if (e.nameLower == "merges.txt") hasMerges = true;
278
+ }
279
+ if (hasVocab && hasMerges) return true;
280
+ return FileExists(dir + "/vocab.json") && FileExists(dir + "/merges.txt");
281
+ }
282
+
260
283
  } // namespace model_detect
261
284
  } // namespace sherpaonnx
@@ -58,6 +58,7 @@ static const char* KindToName(SttModelKind k) {
58
58
  case SttModelKind::kZipformerCtc: return "zipformer_ctc";
59
59
  case SttModelKind::kWhisper: return "whisper";
60
60
  case SttModelKind::kFunAsrNano: return "funasr_nano";
61
+ case SttModelKind::kQwen3Asr: return "qwen3_asr";
61
62
  case SttModelKind::kFireRedAsr: return "fire_red_asr";
62
63
  case SttModelKind::kMoonshine: return "moonshine";
63
64
  case SttModelKind::kMoonshineV2: return "moonshine_v2";
@@ -85,6 +86,7 @@ SttModelKind ParseSttModelType(const std::string& modelType) {
85
86
  if (modelType == "zipformer_ctc" || modelType == "ctc") return SttModelKind::kZipformerCtc;
86
87
  if (modelType == "whisper") return SttModelKind::kWhisper;
87
88
  if (modelType == "funasr_nano") return SttModelKind::kFunAsrNano;
89
+ if (modelType == "qwen3_asr") return SttModelKind::kQwen3Asr;
88
90
  if (modelType == "fire_red_asr") return SttModelKind::kFireRedAsr;
89
91
  if (modelType == "moonshine") return SttModelKind::kMoonshine;
90
92
  if (modelType == "moonshine_v2") return SttModelKind::kMoonshineV2;
@@ -123,6 +125,8 @@ static bool CapabilitySupportsKind(
123
125
  return cap.hasWhisper;
124
126
  case SttModelKind::kFunAsrNano:
125
127
  return cap.hasFunAsrNano;
128
+ case SttModelKind::kQwen3Asr:
129
+ return cap.hasQwen3Asr;
126
130
  case SttModelKind::kFireRedAsr:
127
131
  return cap.hasFireRedAsr;
128
132
  case SttModelKind::kMoonshine:
@@ -185,6 +189,8 @@ static std::vector<SttModelKind> GetKindsFromDirName(const std::string& modelDir
185
189
  add(SttModelKind::kTransducer);
186
190
  add(SttModelKind::kZipformerCtc);
187
191
  }
192
+ if (lower.find("qwen3-asr") != std::string::npos || lower.find("qwen3_asr") != std::string::npos)
193
+ add(SttModelKind::kQwen3Asr);
188
194
  if (lower.find("funasr") != std::string::npos)
189
195
  add(SttModelKind::kFunAsrNano);
190
196
  if (lower.find("canary") != std::string::npos)
@@ -245,6 +251,19 @@ static SttCandidatePaths GatherSttCandidatePaths(
245
251
  p.funasrTokenizerDir = vocabInSubdir.substr(0, lastSlash);
246
252
  }
247
253
  }
254
+ p.qwen3ConvFrontend = FindOnnxByAnyToken(files, {"conv_frontend"}, preferInt8);
255
+ {
256
+ for (const auto& entry : files) {
257
+ if (entry.nameLower != "tokenizer_config.json") continue;
258
+ size_t slash = entry.path.find_last_of("/\\");
259
+ if (slash == std::string::npos) continue;
260
+ std::string dir = entry.path.substr(0, slash);
261
+ if (Qwen3TokenizerDirHasVocabAndMerges(files, dir)) {
262
+ p.qwen3TokenizerDir = dir;
263
+ break;
264
+ }
265
+ }
266
+ }
248
267
  p.moonshinePreprocessor = FindOnnxByAnyToken(files, {"preprocess", "preprocessor"}, preferInt8);
249
268
  p.moonshineEncoder = FindOnnxByAnyToken(files, {"encode", "encoder_model"}, preferInt8);
250
269
  p.moonshineUncachedDecoder = FindOnnxByAnyToken(files, {"uncached_decode", "uncached"}, preferInt8);
@@ -254,7 +273,8 @@ static SttCandidatePaths GatherSttCandidatePaths(
254
273
  static const std::vector<std::string> modelExcludes = {
255
274
  "encoder", "decoder", "joiner", "vocoder", "acoustic", "embedding", "llm",
256
275
  "encoder_adaptor", "encoder-adaptor", "encoder_model", "decoder_model",
257
- "merged_decoder", "decoder_model_merged", "preprocess", "encode", "uncached", "cached"
276
+ "merged_decoder", "decoder_model_merged", "preprocess", "encode", "uncached", "cached",
277
+ "conv_frontend"
258
278
  };
259
279
  p.paraformerModel = FindOnnxByAnyToken(files, {"model"}, preferInt8);
260
280
  if (!p.paraformerModel.empty()) {
@@ -297,6 +317,7 @@ static SttPathHints GetSttPathHints(const std::string& modelDir) {
297
317
  h.isLikelyWenetCtc = lower.find("wenet") != std::string::npos;
298
318
  h.isLikelySenseVoice = lower.find("sense") != std::string::npos || lower.find("sensevoice") != std::string::npos;
299
319
  h.isLikelyFunAsrNano = lower.find("funasr") != std::string::npos || lower.find("funasr-nano") != std::string::npos;
320
+ h.isLikelyQwen3Asr = lower.find("qwen3-asr") != std::string::npos || lower.find("qwen3_asr") != std::string::npos;
300
321
  h.isLikelyZipformer = lower.find("zipformer") != std::string::npos;
301
322
  h.isLikelyMoonshine = lower.find("moonshine") != std::string::npos;
302
323
  h.isLikelyDolphin = lower.find("dolphin") != std::string::npos;
@@ -338,7 +359,9 @@ static SttCapabilities ComputeSttCapabilities(const SttCandidatePaths& paths, co
338
359
  c.hasTransducer = !paths.encoder.empty() && !paths.decoder.empty() && !paths.joiner.empty();
339
360
  bool hasWhisperEnc = !paths.encoder.empty();
340
361
  bool hasWhisperDec = !paths.decoder.empty();
341
- c.hasWhisper = hasWhisperEnc && hasWhisperDec && paths.joiner.empty();
362
+ bool hasQwen3Tok = !paths.qwen3TokenizerDir.empty();
363
+ c.hasQwen3Asr = !paths.qwen3ConvFrontend.empty() && hasWhisperEnc && hasWhisperDec && hasQwen3Tok;
364
+ c.hasWhisper = hasWhisperEnc && hasWhisperDec && paths.joiner.empty() && !c.hasQwen3Asr;
342
365
  bool hasFunAsrTok = !paths.funasrTokenizerDir.empty();
343
366
  c.hasFunAsrNano = !paths.funasrEncoderAdaptor.empty() && !paths.funasrLLM.empty() &&
344
367
  !paths.funasrEmbedding.empty() && hasFunAsrTok;
@@ -378,6 +401,7 @@ static void CollectDetectedModels(
378
401
  out.push_back({"paraformer", modelDir});
379
402
  }
380
403
  if (cap.hasWhisper) out.push_back({"whisper", modelDir});
404
+ if (cap.hasQwen3Asr) out.push_back({"qwen3_asr", modelDir});
381
405
  if (cap.hasFunAsrNano) out.push_back({"funasr_nano", modelDir});
382
406
  if (cap.hasMoonshine) out.push_back({"moonshine", modelDir});
383
407
  if (cap.hasMoonshineV2) out.push_back({"moonshine_v2", modelDir});
@@ -439,6 +463,10 @@ static SttModelKind ResolveSttKind(
439
463
  outError = "FunASR Nano model requested but required files not found in " + modelDir;
440
464
  return SttModelKind::kUnknown;
441
465
  }
466
+ if (selected == SttModelKind::kQwen3Asr && !cap.hasQwen3Asr) {
467
+ outError = "Qwen3-ASR model requested but conv_frontend/encoder/decoder/tokenizer not found in " + modelDir;
468
+ return SttModelKind::kUnknown;
469
+ }
442
470
  if (selected == SttModelKind::kMoonshine && !cap.hasMoonshine) {
443
471
  outError = "Moonshine v1 model requested but preprocess/encode/uncached_decode/cached_decode not found in " + modelDir;
444
472
  return SttModelKind::kUnknown;
@@ -505,7 +533,9 @@ static SttModelKind ResolveSttKind(
505
533
  if (!paths.paraformerModel.empty()) return SttModelKind::kParaformer;
506
534
  if (cap.hasCanary) return SttModelKind::kCanary;
507
535
  if (cap.hasFireRedAsr) return SttModelKind::kFireRedAsr;
536
+ if (cap.hasQwen3Asr && hints.isLikelyQwen3Asr) return SttModelKind::kQwen3Asr;
508
537
  if (cap.hasWhisper) return SttModelKind::kWhisper;
538
+ if (cap.hasQwen3Asr) return SttModelKind::kQwen3Asr;
509
539
  if (cap.hasFunAsrNano) return SttModelKind::kFunAsrNano;
510
540
  if (cap.hasMoonshineV2) return SttModelKind::kMoonshineV2;
511
541
  if (cap.hasDolphin) return SttModelKind::kDolphin;
@@ -551,6 +581,12 @@ static void ApplyPathsForSttKind(SttModelKind kind, const SttCandidatePaths& can
551
581
  resultPaths.funasrEmbedding = candidate.funasrEmbedding;
552
582
  resultPaths.funasrTokenizer = candidate.funasrTokenizerDir;
553
583
  break;
584
+ case SttModelKind::kQwen3Asr:
585
+ resultPaths.qwen3ConvFrontend = candidate.qwen3ConvFrontend;
586
+ resultPaths.qwen3Encoder = candidate.encoder;
587
+ resultPaths.qwen3Decoder = candidate.decoder;
588
+ resultPaths.qwen3Tokenizer = candidate.qwen3TokenizerDir;
589
+ break;
554
590
  case SttModelKind::kMoonshine:
555
591
  resultPaths.moonshinePreprocessor = candidate.moonshinePreprocessor;
556
592
  resultPaths.moonshineEncoder = candidate.moonshineEncoder;
@@ -624,13 +660,15 @@ SttDetectResult DetectSttModel(
624
660
  EmptyOrPath(candidate.encoder), EmptyOrPath(candidate.decoder));
625
661
  LOGI("DetectSttModel: funasr encoderAdaptor=%s llm=%s embedding=%s tokenizerDir=%s",
626
662
  EmptyOrPath(candidate.funasrEncoderAdaptor), EmptyOrPath(candidate.funasrLLM), EmptyOrPath(candidate.funasrEmbedding), EmptyOrPath(candidate.funasrTokenizerDir));
627
- LOGI("DetectSttModel: hasTransducer=%d hasWhisper=%d hasMoonshine=%d hasMoonshineV2=%d hasParaformer=%d hasFunAsrNano=%d hasDolphin=%d hasFireRedAsr=%d hasFireRedCtc=%d hasCanary=%d hasOmnilingual=%d hasMedAsr=%d hasTeleSpeechCtc=%d hasToneCtc=%d",
663
+ LOGI("DetectSttModel: qwen3_asr conv=%s tokenizerDir=%s",
664
+ EmptyOrPath(candidate.qwen3ConvFrontend), EmptyOrPath(candidate.qwen3TokenizerDir));
665
+ LOGI("DetectSttModel: hasTransducer=%d hasWhisper=%d hasMoonshine=%d hasMoonshineV2=%d hasParaformer=%d hasFunAsrNano=%d hasQwen3Asr=%d hasDolphin=%d hasFireRedAsr=%d hasFireRedCtc=%d hasCanary=%d hasOmnilingual=%d hasMedAsr=%d hasTeleSpeechCtc=%d hasToneCtc=%d",
628
666
  (int)cap.hasTransducer, (int)cap.hasWhisper, (int)cap.hasMoonshine, (int)cap.hasMoonshineV2,
629
- (int)cap.hasParaformer, (int)cap.hasFunAsrNano, (int)cap.hasDolphin, (int)cap.hasFireRedAsr, (int)cap.hasFireRedCtc,
667
+ (int)cap.hasParaformer, (int)cap.hasFunAsrNano, (int)cap.hasQwen3Asr, (int)cap.hasDolphin, (int)cap.hasFireRedAsr, (int)cap.hasFireRedCtc,
630
668
  (int)cap.hasCanary, (int)cap.hasOmnilingual, (int)cap.hasMedAsr, (int)cap.hasTeleSpeechCtc, (int)cap.hasToneCtc);
631
- LOGI("DetectSttModel: hints isLikelyNemo=%d isLikelyTdt=%d isLikelyWenetCtc=%d isLikelySenseVoice=%d isLikelyFunAsrNano=%d isLikelyZipformer=%d isLikelyMoonshine=%d isLikelyDolphin=%d isLikelyFireRedAsr=%d isLikelyCanary=%d isLikelyOmnilingual=%d isLikelyMedAsr=%d isLikelyTeleSpeech=%d isLikelyToneCtc=%d isLikelyParaformer=%d isLikelyVad=%d isLikelyTdnn=%d",
669
+ LOGI("DetectSttModel: hints isLikelyNemo=%d isLikelyTdt=%d isLikelyWenetCtc=%d isLikelySenseVoice=%d isLikelyFunAsrNano=%d isLikelyQwen3Asr=%d isLikelyZipformer=%d isLikelyMoonshine=%d isLikelyDolphin=%d isLikelyFireRedAsr=%d isLikelyCanary=%d isLikelyOmnilingual=%d isLikelyMedAsr=%d isLikelyTeleSpeech=%d isLikelyToneCtc=%d isLikelyParaformer=%d isLikelyVad=%d isLikelyTdnn=%d",
632
670
  (int)hints.isLikelyNemo, (int)hints.isLikelyTdt, (int)hints.isLikelyWenetCtc, (int)hints.isLikelySenseVoice,
633
- (int)hints.isLikelyFunAsrNano, (int)hints.isLikelyZipformer, (int)hints.isLikelyMoonshine, (int)hints.isLikelyDolphin,
671
+ (int)hints.isLikelyFunAsrNano, (int)hints.isLikelyQwen3Asr, (int)hints.isLikelyZipformer, (int)hints.isLikelyMoonshine, (int)hints.isLikelyDolphin,
634
672
  (int)hints.isLikelyFireRedAsr, (int)hints.isLikelyCanary, (int)hints.isLikelyOmnilingual, (int)hints.isLikelyMedAsr,
635
673
  (int)hints.isLikelyTeleSpeech, (int)hints.isLikelyToneCtc, (int)hints.isLikelyParaformer, (int)hints.isLikelyVad, (int)hints.isLikelyTdnn);
636
674
  }
@@ -653,7 +691,8 @@ SttDetectResult DetectSttModel(
653
691
  }
654
692
 
655
693
  LOGI("DetectSttModel: selected kind=%d (%s)", static_cast<int>(result.selectedKind), KindToName(result.selectedKind));
656
- result.tokensRequired = (result.selectedKind != SttModelKind::kFunAsrNano);
694
+ result.tokensRequired = (result.selectedKind != SttModelKind::kFunAsrNano &&
695
+ result.selectedKind != SttModelKind::kQwen3Asr);
657
696
  ApplyPathsForSttKind(result.selectedKind, candidate, result.paths);
658
697
 
659
698
  if (!candidate.tokens.empty() && FileExists(candidate.tokens)) {
@@ -711,6 +750,11 @@ SttDetectResult DetectSttModel(
711
750
  EmptyOrPath(result.paths.funasrEncoderAdaptor), EmptyOrPath(result.paths.funasrLLM),
712
751
  EmptyOrPath(result.paths.funasrEmbedding), EmptyOrPath(result.paths.funasrTokenizer));
713
752
  break;
753
+ case SttModelKind::kQwen3Asr:
754
+ LOGI("DetectSttModel: paths set qwen3_asr conv=%s encoder=%s decoder=%s tokenizer=%s",
755
+ EmptyOrPath(result.paths.qwen3ConvFrontend), EmptyOrPath(result.paths.qwen3Encoder),
756
+ EmptyOrPath(result.paths.qwen3Decoder), EmptyOrPath(result.paths.qwen3Tokenizer));
757
+ break;
714
758
  default:
715
759
  break;
716
760
  }
@@ -2,7 +2,7 @@
2
2
  * sherpa-onnx-model-detect-tts.mm
3
3
  *
4
4
  * Purpose: Detects TTS (text-to-speech) model type and fills TtsModelPaths from a model directory.
5
- * Used by the TTS wrapper on iOS. Supports Vits, Matcha, Kokoro, Kitten, Pocket, Zipvoice.
5
+ * Used by the TTS wrapper on iOS. Supports Vits, Matcha, Kokoro, Kitten, Pocket, Zipvoice, Supertonic.
6
6
  *
7
7
  * --- Detection pipeline (overview) ---
8
8
  *
@@ -58,6 +58,7 @@ TtsModelKind ParseTtsModelType(const std::string& modelType) {
58
58
  if (modelType == "kitten") return TtsModelKind::kKitten;
59
59
  if (modelType == "pocket") return TtsModelKind::kPocket;
60
60
  if (modelType == "zipvoice") return TtsModelKind::kZipvoice;
61
+ if (modelType == "supertonic") return TtsModelKind::kSupertonic;
61
62
  return TtsModelKind::kUnknown;
62
63
  }
63
64
 
@@ -70,6 +71,7 @@ static bool CapabilitySupportsTtsKind(
70
71
  bool hasMatcha,
71
72
  bool hasPocket,
72
73
  bool hasZipvoice,
74
+ bool hasSupertonic,
73
75
  bool hasVoicesFile,
74
76
  bool hasDataDir
75
77
  ) {
@@ -85,6 +87,8 @@ static bool CapabilitySupportsTtsKind(
85
87
  return hasPocket;
86
88
  case TtsModelKind::kZipvoice:
87
89
  return hasZipvoice;
90
+ case TtsModelKind::kSupertonic:
91
+ return hasSupertonic;
88
92
  default:
89
93
  return false;
90
94
  }
@@ -109,6 +113,7 @@ static std::vector<TtsModelKind> GetKindsFromDirNameTts(const std::string& model
109
113
  if (lower.find("matcha") != std::string::npos) add(TtsModelKind::kMatcha);
110
114
  if (lower.find("pocket") != std::string::npos) add(TtsModelKind::kPocket);
111
115
  if (lower.find("zipvoice") != std::string::npos) add(TtsModelKind::kZipvoice);
116
+ if (lower.find("supertonic") != std::string::npos) add(TtsModelKind::kSupertonic);
112
117
  if (lower.find("kokoro") != std::string::npos) add(TtsModelKind::kKokoro);
113
118
  if (lower.find("kitten") != std::string::npos) add(TtsModelKind::kKitten);
114
119
  if (lower.find("vits") != std::string::npos) add(TtsModelKind::kVits);
@@ -154,14 +159,27 @@ TtsDetectResult DetectTtsModel(const std::string& modelDir, const std::string& m
154
159
  std::string textConditioner = FindOnnxByAnyToken(files, {"text_conditioner", "text-conditioner"}, std::nullopt);
155
160
  std::string vocabJsonFile = FindFileByName(files, "vocab.json");
156
161
  std::string tokenScoresJsonFile = FindFileByName(files, "token_scores.json");
162
+ std::string durationPredictor = FindOnnxByAnyToken(files, {"duration_predictor", "duration-predictor"}, std::nullopt);
163
+ std::string textEncoderSupertonic = FindOnnxByAnyToken(files, {"text_encoder", "text-encoder"}, std::nullopt);
164
+ std::string vectorEstimator = FindOnnxByAnyToken(files, {"vector_estimator", "vector-estimator"}, std::nullopt);
165
+ std::string ttsJsonFile = FindFileByName(files, "tts.json");
166
+ std::string unicodeIndexerFile = FindFileByName(files, "unicode_indexer.bin");
167
+ std::string voiceStyleFile = FindFileByName(files, "voice.bin");
157
168
 
158
- std::vector<std::string> modelExcludes = {"acoustic", "vocoder", "encoder", "decoder", "joiner"};
169
+ std::vector<std::string> modelExcludes = {
170
+ "acoustic", "vocoder", "encoder", "decoder", "joiner",
171
+ // Supertonic component models are not VITS monolithic model.onnx files.
172
+ "duration_predictor", "duration-predictor",
173
+ "text_encoder", "text-encoder",
174
+ "vector_estimator", "vector-estimator"
175
+ };
159
176
  std::string ttsModel = FindOnnxByAnyToken(files, {"model"}, std::nullopt);
160
177
  if (ttsModel.empty()) {
161
178
  ttsModel = FindLargestOnnxExcludingTokens(files, modelExcludes);
162
179
  }
163
180
 
164
- bool hasVits = !ttsModel.empty();
181
+ // VITS requires both model.onnx-like file and tokens.txt
182
+ bool hasVits = !ttsModel.empty() && !tokensFile.empty();
165
183
  std::string modelDirLower = ToLower(modelDir);
166
184
  bool isLikelyMatcha = modelDirLower.find("matcha") != std::string::npos;
167
185
  bool hasMatcha = (!acousticModel.empty() && !vocoder.empty())
@@ -178,6 +196,9 @@ TtsDetectResult DetectTtsModel(const std::string& modelDir, const std::string& m
178
196
  }
179
197
  bool hasPocket = !lmFlow.empty() && !lmMain.empty() && !encoder.empty() && !decoder.empty() &&
180
198
  !textConditioner.empty() && !vocabJsonFile.empty() && !tokenScoresJsonFile.empty();
199
+ bool hasSupertonic = !durationPredictor.empty() && !textEncoderSupertonic.empty() &&
200
+ !vectorEstimator.empty() && !vocoder.empty() && !ttsJsonFile.empty() &&
201
+ !unicodeIndexerFile.empty() && !voiceStyleFile.empty();
181
202
  bool hasDataDir = !dataDirPath.empty();
182
203
 
183
204
  bool isLikelyKitten = modelDirLower.find("kitten") != std::string::npos;
@@ -192,6 +213,9 @@ TtsDetectResult DetectTtsModel(const std::string& modelDir, const std::string& m
192
213
  if (hasZipvoice && !hasMatcha) {
193
214
  result.detectedModels.push_back({"zipvoice", modelDir});
194
215
  }
216
+ if (hasSupertonic) {
217
+ result.detectedModels.push_back({"supertonic", modelDir});
218
+ }
195
219
  if (hasVoicesFile) {
196
220
  if (isLikelyKitten && !isLikelyKokoro) {
197
221
  result.detectedModels.push_back({"kitten", modelDir});
@@ -228,7 +252,7 @@ TtsDetectResult DetectTtsModel(const std::string& modelDir, const std::string& m
228
252
  std::vector<TtsModelKind> nameCandidates = GetKindsFromDirNameTts(modelDir);
229
253
  if (!nameCandidates.empty()) {
230
254
  for (TtsModelKind k : nameCandidates) {
231
- if (CapabilitySupportsTtsKind(k, hasVits, hasMatcha, hasPocket, hasZipvoice,
255
+ if (CapabilitySupportsTtsKind(k, hasVits, hasMatcha, hasPocket, hasZipvoice, hasSupertonic,
232
256
  hasVoicesFile, hasDataDir)) {
233
257
  selected = k;
234
258
  break;
@@ -243,6 +267,8 @@ TtsDetectResult DetectTtsModel(const std::string& modelDir, const std::string& m
243
267
  selected = TtsModelKind::kPocket;
244
268
  } else if (hasZipvoice) {
245
269
  selected = TtsModelKind::kZipvoice;
270
+ } else if (hasSupertonic) {
271
+ selected = TtsModelKind::kSupertonic;
246
272
  } else if (hasVoicesFile) {
247
273
  if (isLikelyKitten && !isLikelyKokoro) {
248
274
  selected = TtsModelKind::kKitten;
@@ -289,6 +315,12 @@ TtsDetectResult DetectTtsModel(const std::string& modelDir, const std::string& m
289
315
  result.paths.textConditioner = textConditioner;
290
316
  result.paths.vocabJson = vocabJsonFile;
291
317
  result.paths.tokenScoresJson = tokenScoresJsonFile;
318
+ result.paths.durationPredictor = durationPredictor;
319
+ result.paths.textEncoder = textEncoderSupertonic;
320
+ result.paths.vectorEstimator = vectorEstimator;
321
+ result.paths.ttsJson = ttsJsonFile;
322
+ result.paths.unicodeIndexer = unicodeIndexerFile;
323
+ result.paths.voiceStyle = voiceStyleFile;
292
324
 
293
325
  auto validation = ValidateTtsPaths(selected, result.paths, modelDir);
294
326
  if (!validation.ok) {
@@ -19,6 +19,7 @@ enum class SttModelKind {
19
19
  kZipformerCtc,
20
20
  kWhisper,
21
21
  kFunAsrNano,
22
+ kQwen3Asr,
22
23
  kFireRedAsr,
23
24
  kMoonshine,
24
25
  kMoonshineV2,
@@ -37,7 +38,8 @@ enum class TtsModelKind {
37
38
  kKokoro,
38
39
  kKitten,
39
40
  kPocket,
40
- kZipvoice
41
+ kZipvoice,
42
+ kSupertonic
41
43
  };
42
44
 
43
45
  struct SttModelPaths {
@@ -55,6 +57,11 @@ struct SttModelPaths {
55
57
  std::string funasrLLM;
56
58
  std::string funasrEmbedding;
57
59
  std::string funasrTokenizer;
60
+ /** Qwen3-ASR: conv_frontend.onnx + encoder + decoder + tokenizer dir (vocab.json, merges.txt, tokenizer_config.json). */
61
+ std::string qwen3ConvFrontend;
62
+ std::string qwen3Encoder;
63
+ std::string qwen3Decoder;
64
+ std::string qwen3Tokenizer;
58
65
  std::string moonshinePreprocessor;
59
66
  std::string moonshineEncoder;
60
67
  std::string moonshineUncachedDecoder;
@@ -84,6 +91,8 @@ struct SttCandidatePaths {
84
91
  std::string funasrLLM;
85
92
  std::string funasrEmbedding;
86
93
  std::string funasrTokenizerDir;
94
+ std::string qwen3ConvFrontend;
95
+ std::string qwen3TokenizerDir;
87
96
  std::string moonshinePreprocessor;
88
97
  std::string moonshineEncoder;
89
98
  std::string moonshineUncachedDecoder;
@@ -99,6 +108,7 @@ struct SttPathHints {
99
108
  bool isLikelyWenetCtc = false;
100
109
  bool isLikelySenseVoice = false;
101
110
  bool isLikelyFunAsrNano = false;
111
+ bool isLikelyQwen3Asr = false;
102
112
  bool isLikelyZipformer = false;
103
113
  bool isLikelyMoonshine = false;
104
114
  bool isLikelyDolphin = false;
@@ -123,6 +133,7 @@ struct SttCapabilities {
123
133
  bool hasMoonshineV2 = false;
124
134
  bool hasParaformer = false;
125
135
  bool hasFunAsrNano = false;
136
+ bool hasQwen3Asr = false;
126
137
  bool hasDolphin = false;
127
138
  bool hasFireRedAsr = false;
128
139
  /** True when dir name suggests Fire Red but only a single CTC/paraformer model (no encoder/decoder). Use zipformer_ctc. */
@@ -150,6 +161,13 @@ struct TtsModelPaths {
150
161
  std::string textConditioner;
151
162
  std::string vocabJson;
152
163
  std::string tokenScoresJson;
164
+ // Supertonic TTS
165
+ std::string durationPredictor;
166
+ std::string textEncoder;
167
+ std::string vectorEstimator;
168
+ std::string ttsJson;
169
+ std::string unicodeIndexer;
170
+ std::string voiceStyle;
153
171
  };
154
172
 
155
173
  struct SttDetectResult {
@@ -52,6 +52,13 @@ static const SttFieldRequirement kFunAsrNanoReqs[] = {
52
52
  {"funasrTokenizer", &SttModelPaths::funasrTokenizer, true},
53
53
  };
54
54
 
55
+ static const SttFieldRequirement kQwen3AsrReqs[] = {
56
+ {"qwen3ConvFrontend", &SttModelPaths::qwen3ConvFrontend, true},
57
+ {"qwen3Encoder", &SttModelPaths::qwen3Encoder, true},
58
+ {"qwen3Decoder", &SttModelPaths::qwen3Decoder, true},
59
+ {"qwen3Tokenizer", &SttModelPaths::qwen3Tokenizer, true},
60
+ };
61
+
55
62
  static const SttFieldRequirement kMoonshineReqs[] = {
56
63
  {"moonshinePreprocessor", &SttModelPaths::moonshinePreprocessor, true},
57
64
  {"moonshineEncoder", &SttModelPaths::moonshineEncoder, true},
@@ -120,6 +127,9 @@ static const SttFieldRequirement* GetRequirements(SttModelKind kind, size_t& cou
120
127
  case SttModelKind::kFunAsrNano:
121
128
  count = std::size(kFunAsrNanoReqs);
122
129
  return kFunAsrNanoReqs;
130
+ case SttModelKind::kQwen3Asr:
131
+ count = std::size(kQwen3AsrReqs);
132
+ return kQwen3AsrReqs;
123
133
  case SttModelKind::kMoonshine:
124
134
  count = std::size(kMoonshineReqs);
125
135
  return kMoonshineReqs;
@@ -161,6 +171,7 @@ static const char* SttKindToName(SttModelKind k) {
161
171
  case SttModelKind::kZipformerCtc: return "Zipformer CTC";
162
172
  case SttModelKind::kWhisper: return "Whisper";
163
173
  case SttModelKind::kFunAsrNano: return "FunASR Nano";
174
+ case SttModelKind::kQwen3Asr: return "Qwen3 ASR";
164
175
  case SttModelKind::kFireRedAsr: return "Fire Red ASR";
165
176
  case SttModelKind::kMoonshine: return "Moonshine";
166
177
  case SttModelKind::kMoonshineV2: return "Moonshine v2";
@@ -59,6 +59,16 @@ static const TtsFieldRequirement kZipvoiceReqs[] = {
59
59
  {"lexicon", &TtsModelPaths::lexicon, true},
60
60
  };
61
61
 
62
+ static const TtsFieldRequirement kSupertonicReqs[] = {
63
+ {"durationPredictor", &TtsModelPaths::durationPredictor, true},
64
+ {"textEncoder", &TtsModelPaths::textEncoder, true},
65
+ {"vectorEstimator", &TtsModelPaths::vectorEstimator, true},
66
+ {"vocoder", &TtsModelPaths::vocoder, true},
67
+ {"ttsJson", &TtsModelPaths::ttsJson, true},
68
+ {"unicodeIndexer", &TtsModelPaths::unicodeIndexer, true},
69
+ {"voiceStyle", &TtsModelPaths::voiceStyle, true},
70
+ };
71
+
62
72
  // ============================================================
63
73
 
64
74
  static const TtsFieldRequirement* GetRequirements(TtsModelKind kind, size_t& count) {
@@ -79,6 +89,9 @@ static const TtsFieldRequirement* GetRequirements(TtsModelKind kind, size_t& cou
79
89
  case TtsModelKind::kZipvoice:
80
90
  count = std::size(kZipvoiceReqs);
81
91
  return kZipvoiceReqs;
92
+ case TtsModelKind::kSupertonic:
93
+ count = std::size(kSupertonicReqs);
94
+ return kSupertonicReqs;
82
95
  default:
83
96
  count = 0;
84
97
  return nullptr;
@@ -93,6 +106,7 @@ static const char* TtsKindToName(TtsModelKind k) {
93
106
  case TtsModelKind::kKitten: return "Kitten";
94
107
  case TtsModelKind::kPocket: return "Pocket";
95
108
  case TtsModelKind::kZipvoice: return "Zipvoice";
109
+ case TtsModelKind::kSupertonic: return "Supertonic";
96
110
  default: return "Unknown";
97
111
  }
98
112
  }