react-native-sherpa-onnx 0.3.6 → 0.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. package/LICENSE +1 -0
  2. package/README.md +92 -21
  3. package/SherpaOnnx.podspec +3 -0
  4. package/THIRD_PARTY_LICENSES/README.md +62 -0
  5. package/THIRD_PARTY_LICENSES/ffmpeg.txt +502 -0
  6. package/THIRD_PARTY_LICENSES/libarchive.txt +65 -0
  7. package/THIRD_PARTY_LICENSES/nvidia_omla.txt +181 -0
  8. package/THIRD_PARTY_LICENSES/onnxruntime.txt +21 -0
  9. package/THIRD_PARTY_LICENSES/opus.txt +44 -0
  10. package/THIRD_PARTY_LICENSES/sherpa-onnx.txt +201 -0
  11. package/THIRD_PARTY_LICENSES/shine.txt +482 -0
  12. package/THIRD_PARTY_LICENSES/zstd.txt +30 -0
  13. package/android/build.gradle +7 -3
  14. package/android/prebuilt-download.gradle +344 -152
  15. package/android/prebuilt-versions.gradle +1 -1
  16. package/android/src/main/assets/model_licenses/asr-models-license-status.csv +409 -0
  17. package/android/src/main/assets/model_licenses/qnn-asr-models-license-status.csv +695 -0
  18. package/android/src/main/assets/model_licenses/tts-models-license-status.csv +596 -0
  19. package/android/src/main/cpp/CMakeLists.txt +28 -10
  20. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.cpp +2 -2
  21. package/android/src/main/cpp/jni/audio/sherpa-onnx-audio-convert-jni.cpp +268 -2
  22. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +37 -6
  23. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect.h +9 -1
  24. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.cpp +7 -0
  25. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-tts.cpp +18 -2
  26. package/android/src/main/java/com/sherpaonnx/SherpaOnnxArchiveHelper.kt +40 -10
  27. package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +99 -0
  28. package/android/src/main/java/com/sherpaonnx/SherpaOnnxOnlineSttHelper.kt +4 -1
  29. package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +127 -97
  30. package/ios/Resources/model_licenses/asr-models-license-status.csv +409 -0
  31. package/ios/Resources/model_licenses/qnn-asr-models-license-status.csv +695 -0
  32. package/ios/Resources/model_licenses/tts-models-license-status.csv +596 -0
  33. package/ios/SherpaOnnx+OnlineSTT.mm +2 -0
  34. package/ios/SherpaOnnx+PcmLiveStream.mm +2 -29
  35. package/ios/SherpaOnnx+TTS.mm +179 -20
  36. package/ios/SherpaOnnx.mm +54 -0
  37. package/ios/SherpaOnnxAudioConvert.h +10 -0
  38. package/ios/SherpaOnnxAudioConvert.mm +257 -1
  39. package/ios/archive/sherpa-onnx-archive-helper.h +3 -0
  40. package/ios/archive/sherpa-onnx-archive-helper.mm +39 -6
  41. package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +49 -6
  42. package/ios/model_detect/sherpa-onnx-model-detect.h +9 -1
  43. package/ios/model_detect/sherpa-onnx-validate-tts.mm +18 -2
  44. package/ios/online_stt/sherpa-onnx-online-stt-wrapper.h +1 -0
  45. package/ios/online_stt/sherpa-onnx-online-stt-wrapper.mm +4 -0
  46. package/ios/tts/sherpa-onnx-tts-wrapper.h +37 -0
  47. package/ios/tts/sherpa-onnx-tts-wrapper.mm +158 -3
  48. package/lib/module/NativeSherpaOnnx.js.map +1 -1
  49. package/lib/module/audio/index.js +8 -0
  50. package/lib/module/audio/index.js.map +1 -1
  51. package/lib/module/download/ModelDownloadManager.js +10 -929
  52. package/lib/module/download/ModelDownloadManager.js.map +1 -1
  53. package/lib/module/download/activeModelOperations.js +26 -0
  54. package/lib/module/download/activeModelOperations.js.map +1 -0
  55. package/lib/module/download/background-downloader-types.js +2 -0
  56. package/lib/module/download/background-downloader-types.js.map +1 -0
  57. package/lib/module/download/bulkPurge.js +72 -0
  58. package/lib/module/download/bulkPurge.js.map +1 -0
  59. package/lib/module/download/checksumPrompt.js +19 -0
  60. package/lib/module/download/checksumPrompt.js.map +1 -0
  61. package/lib/module/download/constants.js +7 -0
  62. package/lib/module/download/constants.js.map +1 -0
  63. package/lib/module/download/downloadEvents.js +35 -0
  64. package/lib/module/download/downloadEvents.js.map +1 -0
  65. package/lib/module/download/downloadTask.js +438 -0
  66. package/lib/module/download/downloadTask.js.map +1 -0
  67. package/lib/module/download/ensureModel.js +89 -0
  68. package/lib/module/download/ensureModel.js.map +1 -0
  69. package/lib/module/download/index.js +4 -4
  70. package/lib/module/download/index.js.map +1 -1
  71. package/lib/module/download/localModels.js +151 -0
  72. package/lib/module/download/localModels.js.map +1 -0
  73. package/lib/module/download/modelExtraction.js +174 -0
  74. package/lib/module/download/modelExtraction.js.map +1 -0
  75. package/lib/module/download/paths.js +98 -0
  76. package/lib/module/download/paths.js.map +1 -0
  77. package/lib/module/download/postDownloadProcessing.js +206 -0
  78. package/lib/module/download/postDownloadProcessing.js.map +1 -0
  79. package/lib/module/download/protectedModelKeys.js +31 -0
  80. package/lib/module/download/protectedModelKeys.js.map +1 -0
  81. package/lib/module/download/registry.js +268 -0
  82. package/lib/module/download/registry.js.map +1 -0
  83. package/lib/module/download/retry.js +59 -0
  84. package/lib/module/download/retry.js.map +1 -0
  85. package/lib/module/download/types.js +17 -0
  86. package/lib/module/download/types.js.map +1 -0
  87. package/lib/module/download/validation.js +101 -5
  88. package/lib/module/download/validation.js.map +1 -1
  89. package/lib/module/{download → extraction}/extractTarBz2.js +3 -1
  90. package/lib/module/extraction/extractTarBz2.js.map +1 -0
  91. package/lib/module/{download → extraction}/extractTarZst.js +3 -1
  92. package/lib/module/extraction/extractTarZst.js.map +1 -0
  93. package/lib/module/extraction/index.js +3 -4
  94. package/lib/module/extraction/index.js.map +1 -1
  95. package/lib/module/index.js +1 -1
  96. package/lib/module/index.js.map +1 -1
  97. package/lib/module/licenses.js +63 -0
  98. package/lib/module/licenses.js.map +1 -0
  99. package/lib/module/stt/index.js +16 -2
  100. package/lib/module/stt/index.js.map +1 -1
  101. package/lib/module/stt/streaming.js +2 -0
  102. package/lib/module/stt/streaming.js.map +1 -1
  103. package/lib/module/stt/streamingTypes.js.map +1 -1
  104. package/lib/module/stt/types.js.map +1 -1
  105. package/lib/module/tts/index.js +21 -3
  106. package/lib/module/tts/index.js.map +1 -1
  107. package/lib/module/tts/streaming.js +5 -1
  108. package/lib/module/tts/streaming.js.map +1 -1
  109. package/lib/module/tts/types.js +4 -1
  110. package/lib/module/tts/types.js.map +1 -1
  111. package/lib/module/utils.js +16 -1
  112. package/lib/module/utils.js.map +1 -1
  113. package/lib/typescript/src/NativeSherpaOnnx.d.ts +34 -6
  114. package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
  115. package/lib/typescript/src/audio/index.d.ts +10 -0
  116. package/lib/typescript/src/audio/index.d.ts.map +1 -1
  117. package/lib/typescript/src/download/ModelDownloadManager.d.ts +11 -108
  118. package/lib/typescript/src/download/ModelDownloadManager.d.ts.map +1 -1
  119. package/lib/typescript/src/download/activeModelOperations.d.ts +6 -0
  120. package/lib/typescript/src/download/activeModelOperations.d.ts.map +1 -0
  121. package/lib/typescript/src/download/background-downloader-types.d.ts +64 -0
  122. package/lib/typescript/src/download/background-downloader-types.d.ts.map +1 -0
  123. package/lib/typescript/src/download/bulkPurge.d.ts +14 -0
  124. package/lib/typescript/src/download/bulkPurge.d.ts.map +1 -0
  125. package/lib/typescript/src/download/checksumPrompt.d.ts +3 -0
  126. package/lib/typescript/src/download/checksumPrompt.d.ts.map +1 -0
  127. package/lib/typescript/src/download/constants.d.ts +5 -0
  128. package/lib/typescript/src/download/constants.d.ts.map +1 -0
  129. package/lib/typescript/src/download/downloadEvents.d.ts +6 -0
  130. package/lib/typescript/src/download/downloadEvents.d.ts.map +1 -0
  131. package/lib/typescript/src/download/downloadTask.d.ts +30 -0
  132. package/lib/typescript/src/download/downloadTask.d.ts.map +1 -0
  133. package/lib/typescript/src/download/ensureModel.d.ts +26 -0
  134. package/lib/typescript/src/download/ensureModel.d.ts.map +1 -0
  135. package/lib/typescript/src/download/index.d.ts +7 -7
  136. package/lib/typescript/src/download/index.d.ts.map +1 -1
  137. package/lib/typescript/src/download/localModels.d.ts +15 -0
  138. package/lib/typescript/src/download/localModels.d.ts.map +1 -0
  139. package/lib/typescript/src/download/modelExtraction.d.ts +36 -0
  140. package/lib/typescript/src/download/modelExtraction.d.ts.map +1 -0
  141. package/lib/typescript/src/download/paths.d.ts +28 -0
  142. package/lib/typescript/src/download/paths.d.ts.map +1 -0
  143. package/lib/typescript/src/download/postDownloadProcessing.d.ts +19 -0
  144. package/lib/typescript/src/download/postDownloadProcessing.d.ts.map +1 -0
  145. package/lib/typescript/src/download/protectedModelKeys.d.ts +6 -0
  146. package/lib/typescript/src/download/protectedModelKeys.d.ts.map +1 -0
  147. package/lib/typescript/src/download/registry.d.ts +14 -0
  148. package/lib/typescript/src/download/registry.d.ts.map +1 -0
  149. package/lib/typescript/src/download/retry.d.ts +15 -0
  150. package/lib/typescript/src/download/retry.d.ts.map +1 -0
  151. package/lib/typescript/src/download/types.d.ts +96 -0
  152. package/lib/typescript/src/download/types.d.ts.map +1 -0
  153. package/lib/typescript/src/download/validation.d.ts +19 -0
  154. package/lib/typescript/src/download/validation.d.ts.map +1 -1
  155. package/lib/typescript/src/extraction/extractTarBz2.d.ts.map +1 -0
  156. package/lib/typescript/src/extraction/extractTarZst.d.ts.map +1 -0
  157. package/lib/typescript/src/index.d.ts +1 -0
  158. package/lib/typescript/src/index.d.ts.map +1 -1
  159. package/lib/typescript/src/licenses.d.ts +10 -0
  160. package/lib/typescript/src/licenses.d.ts.map +1 -0
  161. package/lib/typescript/src/stt/index.d.ts +4 -1
  162. package/lib/typescript/src/stt/index.d.ts.map +1 -1
  163. package/lib/typescript/src/stt/streaming.d.ts.map +1 -1
  164. package/lib/typescript/src/stt/streamingTypes.d.ts +5 -0
  165. package/lib/typescript/src/stt/streamingTypes.d.ts.map +1 -1
  166. package/lib/typescript/src/stt/types.d.ts +3 -1
  167. package/lib/typescript/src/stt/types.d.ts.map +1 -1
  168. package/lib/typescript/src/tts/index.d.ts +4 -2
  169. package/lib/typescript/src/tts/index.d.ts.map +1 -1
  170. package/lib/typescript/src/tts/streaming.d.ts.map +1 -1
  171. package/lib/typescript/src/tts/types.d.ts +12 -6
  172. package/lib/typescript/src/tts/types.d.ts.map +1 -1
  173. package/lib/typescript/src/utils.d.ts +5 -0
  174. package/lib/typescript/src/utils.d.ts.map +1 -1
  175. package/package.json +6 -1
  176. package/scripts/{check-model-csvs.sh → ci/check-model-csvs.sh} +9 -2
  177. package/scripts/ci/collect_all_sherpa_model_streams.sh +101 -0
  178. package/scripts/ci/collect_one_sherpa_release_stream.sh +189 -0
  179. package/scripts/ci/sherpa_asr_model_release_streams.json +21 -0
  180. package/scripts/ci/sherpa_tts_model_release_streams.json +13 -0
  181. package/scripts/ci/update_model_license_csv.sh +765 -0
  182. package/scripts/setup-ios-framework.sh +14 -11
  183. package/scripts/update_commercial_use.js +73 -0
  184. package/src/NativeSherpaOnnx.ts +37 -6
  185. package/src/audio/index.ts +20 -0
  186. package/src/download/ModelDownloadManager.ts +57 -1343
  187. package/src/download/activeModelOperations.ts +38 -0
  188. package/src/download/background-downloader-types.ts +73 -0
  189. package/src/download/bulkPurge.ts +102 -0
  190. package/src/download/checksumPrompt.ts +25 -0
  191. package/src/download/constants.ts +5 -0
  192. package/src/download/downloadEvents.ts +55 -0
  193. package/src/download/downloadTask.ts +565 -0
  194. package/src/download/ensureModel.ts +124 -0
  195. package/src/download/index.ts +21 -4
  196. package/src/download/localModels.ts +234 -0
  197. package/src/download/modelExtraction.ts +244 -0
  198. package/src/download/paths.ts +134 -0
  199. package/src/download/postDownloadProcessing.ts +292 -0
  200. package/src/download/protectedModelKeys.ts +30 -0
  201. package/src/download/registry.ts +405 -0
  202. package/src/download/retry.ts +76 -0
  203. package/src/download/types.ts +120 -0
  204. package/src/download/validation.ts +114 -8
  205. package/src/{download → extraction}/extractTarBz2.ts +3 -1
  206. package/src/{download → extraction}/extractTarZst.ts +3 -1
  207. package/src/extraction/index.ts +3 -7
  208. package/src/index.tsx +1 -0
  209. package/src/licenses.ts +100 -0
  210. package/src/stt/index.ts +20 -2
  211. package/src/stt/streaming.ts +3 -0
  212. package/src/stt/streamingTypes.ts +5 -0
  213. package/src/stt/types.ts +3 -1
  214. package/src/tts/index.ts +33 -2
  215. package/src/tts/streaming.ts +12 -0
  216. package/src/tts/types.ts +15 -5
  217. package/src/utils.ts +22 -1
  218. package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -1
  219. package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -1
  220. package/android/src/main/cpp/jni/tts/sherpa-onnx-tts-zipvoice-jni.cpp +0 -301
  221. package/android/src/main/java/com/sherpaonnx/ZipvoiceTtsWrapper.kt +0 -187
  222. package/lib/module/download/extractTarBz2.js.map +0 -1
  223. package/lib/module/download/extractTarZst.js.map +0 -1
  224. package/lib/typescript/src/download/extractTarBz2.d.ts.map +0 -1
  225. package/lib/typescript/src/download/extractTarZst.d.ts.map +0 -1
  226. package/scripts/check-qnn-support.sh +0 -78
  227. /package/lib/typescript/src/{download → extraction}/extractTarBz2.d.ts +0 -0
  228. /package/lib/typescript/src/{download → extraction}/extractTarZst.d.ts +0 -0
@@ -2,6 +2,7 @@
2
2
  #import <React/RCTLog.h>
3
3
  #include <string>
4
4
  #include <sys/stat.h>
5
+ #include <vector>
5
6
 
6
7
  #ifdef HAVE_FFMPEG
7
8
  extern "C" {
@@ -12,11 +13,14 @@ extern "C" {
12
13
  #include <libswresample/swresample.h>
13
14
  }
14
15
  #include <cstdio>
15
- #include <vector>
16
16
  #endif
17
17
 
18
18
  // Forward declaration — convertToFormat handles all formats including WAV (16 kHz mono).
19
19
  static std::string convertToFormat(const char* inputPath, const char* outputPath, const char* formatHint, int outputSampleRateHz);
20
+ static std::string decodeAudioFileToFloatMono(const char* inputPath,
21
+ int targetSampleRateHz,
22
+ std::vector<float>* outSamples,
23
+ int* outSampleRate);
20
24
 
21
25
  // Convenience: convert any audio to 16 kHz mono WAV via the main convertToFormat pipeline.
22
26
  static std::string convertToWav16kMono(const char* inputPath, const char* outputPath) {
@@ -659,6 +663,222 @@ static std::string convertToFormat(const char* inputPath, const char* outputPath
659
663
  #endif
660
664
  }
661
665
 
666
+ static std::string decodeAudioFileToFloatMono(const char* inputPath,
667
+ int targetSampleRateHz,
668
+ std::vector<float>* outSamples,
669
+ int* outSampleRate) {
670
+ outSamples->clear();
671
+ *outSampleRate = 0;
672
+ #ifndef HAVE_FFMPEG
673
+ (void)inputPath;
674
+ (void)targetSampleRateHz;
675
+ return std::string("FFmpeg not available. Build prebuilts with third_party/ffmpeg_prebuilt/build_ffmpeg_ios.sh.");
676
+ #else
677
+ if (!inputPath) {
678
+ return std::string("inputPath is null");
679
+ }
680
+
681
+ AVFormatContext* inFmt = nullptr;
682
+ if (avformat_open_input(&inFmt, inputPath, nullptr, nullptr) < 0) {
683
+ return std::string("Failed to open input file");
684
+ }
685
+ if (avformat_find_stream_info(inFmt, nullptr) < 0) {
686
+ avformat_close_input(&inFmt);
687
+ return std::string("Failed to find stream info");
688
+ }
689
+
690
+ int audioStreamIndex = -1;
691
+ for (unsigned i = 0; i < inFmt->nb_streams; ++i) {
692
+ if (inFmt->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
693
+ audioStreamIndex = (int)i;
694
+ break;
695
+ }
696
+ }
697
+ if (audioStreamIndex < 0) {
698
+ avformat_close_input(&inFmt);
699
+ return std::string("No audio stream found in input");
700
+ }
701
+
702
+ AVStream* inStream = inFmt->streams[audioStreamIndex];
703
+ const AVCodec* decoder = avcodec_find_decoder(inStream->codecpar->codec_id);
704
+ if (!decoder) {
705
+ avformat_close_input(&inFmt);
706
+ return std::string("Unsupported input codec");
707
+ }
708
+
709
+ AVCodecContext* decCtx = avcodec_alloc_context3(decoder);
710
+ if (!decCtx) {
711
+ avformat_close_input(&inFmt);
712
+ return std::string("Failed to allocate decoder context");
713
+ }
714
+ if (avcodec_parameters_to_context(decCtx, inStream->codecpar) < 0) {
715
+ avcodec_free_context(&decCtx);
716
+ avformat_close_input(&inFmt);
717
+ return std::string("Failed to copy codec parameters");
718
+ }
719
+ if (avcodec_open2(decCtx, decoder, nullptr) < 0) {
720
+ avcodec_free_context(&decCtx);
721
+ avformat_close_input(&inFmt);
722
+ return std::string("Failed to open decoder");
723
+ }
724
+
725
+ int in_sr = decCtx->sample_rate;
726
+ if (inStream->codecpar->sample_rate > 0) {
727
+ in_sr = inStream->codecpar->sample_rate;
728
+ }
729
+ if (in_sr <= 0) {
730
+ avcodec_free_context(&decCtx);
731
+ avformat_close_input(&inFmt);
732
+ return std::string("Invalid input sample rate");
733
+ }
734
+
735
+ int out_sr = (targetSampleRateHz > 0) ? targetSampleRateHz : in_sr;
736
+ if (out_sr <= 0) {
737
+ avcodec_free_context(&decCtx);
738
+ avformat_close_input(&inFmt);
739
+ return std::string("Invalid output sample rate");
740
+ }
741
+
742
+ AVChannelLayout in_layout{};
743
+ if (inStream->codecpar->ch_layout.nb_channels > 0) {
744
+ if (av_channel_layout_copy(&in_layout, &inStream->codecpar->ch_layout) < 0) {
745
+ avcodec_free_context(&decCtx);
746
+ avformat_close_input(&inFmt);
747
+ return std::string("Failed to copy input channel layout");
748
+ }
749
+ } else {
750
+ if (av_channel_layout_copy(&in_layout, &decCtx->ch_layout) < 0) {
751
+ avcodec_free_context(&decCtx);
752
+ avformat_close_input(&inFmt);
753
+ return std::string("Failed to get decoder channel layout");
754
+ }
755
+ }
756
+
757
+ AVChannelLayout out_layout = AV_CHANNEL_LAYOUT_MONO;
758
+ SwrContext* swr = nullptr;
759
+ if (swr_alloc_set_opts2(&swr,
760
+ &out_layout,
761
+ AV_SAMPLE_FMT_FLT,
762
+ out_sr,
763
+ &in_layout,
764
+ decCtx->sample_fmt,
765
+ in_sr,
766
+ 0,
767
+ nullptr) < 0 ||
768
+ !swr) {
769
+ av_channel_layout_uninit(&in_layout);
770
+ avcodec_free_context(&decCtx);
771
+ avformat_close_input(&inFmt);
772
+ return std::string("Failed to initialize resampler");
773
+ }
774
+ if (swr_init(swr) < 0) {
775
+ av_channel_layout_uninit(&in_layout);
776
+ swr_free(&swr);
777
+ avcodec_free_context(&decCtx);
778
+ avformat_close_input(&inFmt);
779
+ return std::string("Failed to initialize resampler (swr_init)");
780
+ }
781
+ av_channel_layout_uninit(&in_layout);
782
+
783
+ AVPacket* pkt = av_packet_alloc();
784
+ AVFrame* frame = av_frame_alloc();
785
+ if (!pkt || !frame) {
786
+ if (pkt) av_packet_free(&pkt);
787
+ if (frame) av_frame_free(&frame);
788
+ swr_free(&swr);
789
+ avcodec_free_context(&decCtx);
790
+ avformat_close_input(&inFmt);
791
+ return std::string("Out of memory");
792
+ }
793
+
794
+ auto appendConverted = [&](uint8_t* buf, int nbFloats) {
795
+ if (!buf || nbFloats <= 0) return;
796
+ const float* f = reinterpret_cast<const float*>(buf);
797
+ outSamples->insert(outSamples->end(), f, f + nbFloats);
798
+ };
799
+
800
+ auto convertOneFrame = [&](AVFrame* fr) {
801
+ // Copy plane pointers so we can pass const uint8_t** to swr_convert without
802
+ // reinterpret_cast(uint8_t** -> const uint8_t**), which triggers -Wcast-qual.
803
+ uint8_t** src = fr->extended_data ? fr->extended_data : fr->data;
804
+ int nplanes = fr->ch_layout.nb_channels;
805
+ if (nplanes <= 0) nplanes = AV_NUM_DATA_POINTERS;
806
+
807
+ const uint8_t* in_stack[AV_NUM_DATA_POINTERS] = {};
808
+ std::vector<const uint8_t*> in_heap;
809
+ const uint8_t** in_arg;
810
+ if (nplanes > AV_NUM_DATA_POINTERS) {
811
+ in_heap.resize(static_cast<size_t>(nplanes));
812
+ for (int i = 0; i < nplanes; ++i) {
813
+ in_heap[static_cast<size_t>(i)] = src[i];
814
+ }
815
+ in_arg = in_heap.data();
816
+ } else {
817
+ for (int i = 0; i < nplanes; ++i) {
818
+ in_stack[i] = src[i];
819
+ }
820
+ in_arg = in_stack;
821
+ }
822
+
823
+ int in_sr2 = inStream->codecpar->sample_rate ? inStream->codecpar->sample_rate : decCtx->sample_rate;
824
+ int64_t max_out =
825
+ av_rescale_rnd(swr_get_delay(swr, in_sr2) + (int64_t)fr->nb_samples, out_sr, in_sr2, AV_ROUND_UP);
826
+ if (max_out < 1) max_out = 1;
827
+ uint8_t* out_buf = nullptr;
828
+ if (av_samples_alloc(&out_buf, nullptr, 1, (int)max_out, AV_SAMPLE_FMT_FLT, 0) < 0) {
829
+ return;
830
+ }
831
+ int converted = swr_convert(swr, &out_buf, (int)max_out, in_arg, fr->nb_samples);
832
+ if (converted > 0) {
833
+ appendConverted(out_buf, converted);
834
+ }
835
+ av_freep(&out_buf);
836
+ };
837
+
838
+ while (av_read_frame(inFmt, pkt) >= 0) {
839
+ if (pkt->stream_index == audioStreamIndex) {
840
+ if (avcodec_send_packet(decCtx, pkt) == 0) {
841
+ while (avcodec_receive_frame(decCtx, frame) == 0) {
842
+ convertOneFrame(frame);
843
+ av_frame_unref(frame);
844
+ }
845
+ }
846
+ }
847
+ av_packet_unref(pkt);
848
+ }
849
+
850
+ if (avcodec_send_packet(decCtx, nullptr) == 0) {
851
+ while (avcodec_receive_frame(decCtx, frame) == 0) {
852
+ convertOneFrame(frame);
853
+ av_frame_unref(frame);
854
+ }
855
+ }
856
+
857
+ {
858
+ int in_sr2 = inStream->codecpar->sample_rate ? inStream->codecpar->sample_rate : decCtx->sample_rate;
859
+ int tailCap = (int)swr_get_delay(swr, in_sr2) + 4096;
860
+ if (tailCap < 16) tailCap = 16;
861
+ uint8_t* tailData = nullptr;
862
+ if (av_samples_alloc(&tailData, nullptr, 1, tailCap, AV_SAMPLE_FMT_FLT, 0) >= 0) {
863
+ int tailConverted = swr_convert(swr, &tailData, tailCap, nullptr, 0);
864
+ if (tailConverted > 0) {
865
+ appendConverted(tailData, tailConverted);
866
+ }
867
+ av_freep(&tailData);
868
+ }
869
+ }
870
+
871
+ av_packet_free(&pkt);
872
+ av_frame_free(&frame);
873
+ swr_free(&swr);
874
+ avcodec_free_context(&decCtx);
875
+ avformat_close_input(&inFmt);
876
+
877
+ *outSampleRate = out_sr;
878
+ return std::string("");
879
+ #endif
880
+ }
881
+
662
882
  @implementation SherpaOnnxAudioConvert
663
883
 
664
884
  + (BOOL)convertAudioToWav16k:(NSString *)inputPath
@@ -695,4 +915,40 @@ static std::string convertToFormat(const char* inputPath, const char* outputPath
695
915
  return YES;
696
916
  }
697
917
 
918
+ + (BOOL)decodeAudioFileToFloatSamples:(NSString *)inputPath
919
+ targetSampleRateHz:(int)targetSampleRateHz
920
+ outSamples:(NSArray<NSNumber *> **)outSamples
921
+ outSampleRate:(int *)outSampleRate
922
+ error:(NSError **)error
923
+ {
924
+ if (!outSamples || !outSampleRate) {
925
+ if (error) {
926
+ *error = [NSError errorWithDomain:@"SherpaOnnxAudioConvert"
927
+ code:-2
928
+ userInfo:@{NSLocalizedDescriptionKey: @"outSamples/outSampleRate required"}];
929
+ }
930
+ return NO;
931
+ }
932
+ *outSamples = nil;
933
+ *outSampleRate = 0;
934
+ std::vector<float> v;
935
+ int sr = 0;
936
+ std::string err = decodeAudioFileToFloatMono(inputPath.UTF8String, targetSampleRateHz, &v, &sr);
937
+ if (!err.empty()) {
938
+ if (error) {
939
+ *error = [NSError errorWithDomain:@"SherpaOnnxAudioConvert"
940
+ code:-1
941
+ userInfo:@{NSLocalizedDescriptionKey: [NSString stringWithUTF8String:err.c_str()]}];
942
+ }
943
+ return NO;
944
+ }
945
+ NSMutableArray<NSNumber *> *arr = [NSMutableArray arrayWithCapacity:v.size()];
946
+ for (size_t i = 0; i < v.size(); ++i) {
947
+ [arr addObject:@(v[i])];
948
+ }
949
+ *outSamples = arr;
950
+ *outSampleRate = sr;
951
+ return YES;
952
+ }
953
+
698
954
  @end
@@ -23,6 +23,9 @@ typedef void (^SherpaOnnxArchiveProgressBlock)(long long bytes, long long totalB
23
23
 
24
24
  + (void)cancelExtractTarZst;
25
25
 
26
+ /** Cancel extraction for a specific source archive path (per-operation cancel for parallel extractions). */
27
+ + (void)cancelExtractForPath:(NSString *)sourcePath;
28
+
26
29
  @end
27
30
 
28
31
  NS_ASSUME_NONNULL_END
@@ -14,9 +14,24 @@
14
14
  #include <array>
15
15
  #include <atomic>
16
16
  #include <cstdio>
17
+ #include <mutex>
18
+ #include <set>
17
19
  #include <string>
18
20
 
19
- static std::atomic_bool g_cancelExtract(false);
21
+ static std::mutex g_cancelMutex;
22
+ static std::set<std::string> g_cancelledPaths;
23
+
24
+ static bool isPathCancelled(const std::string& path) {
25
+ std::lock_guard<std::mutex> lock(g_cancelMutex);
26
+ // If the set contains an empty string, ALL extractions are cancelled (legacy global cancel).
27
+ return g_cancelledPaths.count("") > 0 || g_cancelledPaths.count(path) > 0;
28
+ }
29
+
30
+ static void clearCancelForPath(const std::string& path) {
31
+ std::lock_guard<std::mutex> lock(g_cancelMutex);
32
+ g_cancelledPaths.erase(path);
33
+ g_cancelledPaths.erase(""); // Clear the global cancel flag too
34
+ }
20
35
 
21
36
  namespace {
22
37
  #ifdef HAVE_LIBARCHIVE
@@ -127,7 +142,8 @@ static NSString* ComputeFileSha256(NSString* filePath, NSError** error) {
127
142
  + (void)cancelExtractTarBz2
128
143
  {
129
144
  #ifdef HAVE_LIBARCHIVE
130
- g_cancelExtract.store(true);
145
+ std::lock_guard<std::mutex> lock(g_cancelMutex);
146
+ g_cancelledPaths.insert(""); // Empty string = cancel ALL
131
147
  #else
132
148
  // feature disabled
133
149
  #endif
@@ -136,7 +152,21 @@ static NSString* ComputeFileSha256(NSString* filePath, NSError** error) {
136
152
  + (void)cancelExtractTarZst
137
153
  {
138
154
  #ifdef HAVE_LIBARCHIVE
139
- g_cancelExtract.store(true);
155
+ std::lock_guard<std::mutex> lock(g_cancelMutex);
156
+ g_cancelledPaths.insert(""); // Empty string = cancel ALL
157
+ #else
158
+ // feature disabled
159
+ #endif
160
+ }
161
+
162
+ + (void)cancelExtractForPath:(NSString *)sourcePath
163
+ {
164
+ #ifdef HAVE_LIBARCHIVE
165
+ std::string path = [sourcePath UTF8String] ?: "";
166
+ if (!path.empty()) {
167
+ std::lock_guard<std::mutex> lock(g_cancelMutex);
168
+ g_cancelledPaths.insert(path);
169
+ }
140
170
  #else
141
171
  // feature disabled
142
172
  #endif
@@ -150,7 +180,8 @@ static NSString* ComputeFileSha256(NSString* filePath, NSError** error) {
150
180
  #ifndef HAVE_LIBARCHIVE
151
181
  return @{ @"success": @NO, @"reason": @"libarchive is disabled in this build. Rebuild without SHERPA_ONNX_DISABLE_LIBARCHIVE=1." };
152
182
  #else
153
- g_cancelExtract.store(false);
183
+ std::string sourcePathStr = [sourcePath UTF8String] ?: "";
184
+ clearCancelForPath(sourcePathStr);
154
185
  NSFileManager *fileManager = [NSFileManager defaultManager];
155
186
 
156
187
  if (![fileManager fileExistsAtPath:sourcePath]) {
@@ -213,10 +244,11 @@ static NSString* ComputeFileSha256(NSString* filePath, NSError** error) {
213
244
  int lastPercent = -1;
214
245
  long long lastEmitBytes = 0;
215
246
  while ((result = archive_read_next_header(archive, &entry)) == ARCHIVE_OK) {
216
- if (g_cancelExtract.load()) {
247
+ if (isPathCancelled(sourcePathStr)) {
217
248
  archive_read_free(archive);
218
249
  archive_write_free(disk);
219
250
  close_reader();
251
+ clearCancelForPath(sourcePathStr);
220
252
  return @{ @"success": @NO, @"reason": @"Extraction cancelled" };
221
253
  }
222
254
  const char *currentPath = archive_entry_pathname(entry);
@@ -245,10 +277,11 @@ static NSString* ComputeFileSha256(NSString* filePath, NSError** error) {
245
277
  size_t size = 0;
246
278
  la_int64_t offset = 0;
247
279
  while ((result = archive_read_data_block(archive, &buff, &size, &offset)) == ARCHIVE_OK) {
248
- if (g_cancelExtract.load()) {
280
+ if (isPathCancelled(sourcePathStr)) {
249
281
  archive_read_free(archive);
250
282
  archive_write_free(disk);
251
283
  close_reader();
284
+ clearCancelForPath(sourcePathStr);
252
285
  return @{ @"success": @NO, @"reason": @"Extraction cancelled" };
253
286
  }
254
287
  la_ssize_t writeResult = archive_write_data_block(disk, buff, size, offset);
@@ -2,7 +2,7 @@
2
2
  * sherpa-onnx-model-detect-tts.mm
3
3
  *
4
4
  * Purpose: Detects TTS (text-to-speech) model type and fills TtsModelPaths from a model directory.
5
- * Used by the TTS wrapper on iOS. Supports Vits, Matcha, Kokoro, Kitten, Pocket, Zipvoice.
5
+ * Used by the TTS wrapper on iOS. Supports Vits, Matcha, Kokoro, Kitten, Pocket, Zipvoice, Supertonic.
6
6
  *
7
7
  * --- Detection pipeline (overview) ---
8
8
  *
@@ -39,6 +39,13 @@
39
39
  #include <string>
40
40
  #include <vector>
41
41
 
42
+ #if defined(__APPLE__)
43
+ #include <Foundation/Foundation.h>
44
+ #define TTS_DETECT_LOGI(fmt, ...) NSLog(@"[TtsModelDetect] " fmt, ##__VA_ARGS__)
45
+ #else
46
+ #define TTS_DETECT_LOGI(fmt, ...) ((void)0)
47
+ #endif
48
+
42
49
  namespace sherpaonnx {
43
50
  namespace {
44
51
 
@@ -51,18 +58,20 @@ TtsModelKind ParseTtsModelType(const std::string& modelType) {
51
58
  if (modelType == "kitten") return TtsModelKind::kKitten;
52
59
  if (modelType == "pocket") return TtsModelKind::kPocket;
53
60
  if (modelType == "zipvoice") return TtsModelKind::kZipvoice;
61
+ if (modelType == "supertonic") return TtsModelKind::kSupertonic;
54
62
  return TtsModelKind::kUnknown;
55
63
  }
56
64
 
57
65
  /** Returns true if the given kind is supported by the current paths and hints (required files present).
58
- * data_dir (espeak-ng-data) is required only for Kitten and Kokoro (sherpa-onnx config Validate());
59
- * VITS, Matcha, Zipvoice use it optionally; Pocket does not use it. */
66
+ * data_dir (espeak-ng-data) is required for Kitten, Kokoro, and Zipvoice (Zipvoice uses MatchaTtsLexicon + espeak).
67
+ * VITS and Matcha use dataDir optionally in this detector; Pocket does not use it. */
60
68
  static bool CapabilitySupportsTtsKind(
61
69
  TtsModelKind kind,
62
70
  bool hasVits,
63
71
  bool hasMatcha,
64
72
  bool hasPocket,
65
73
  bool hasZipvoice,
74
+ bool hasSupertonic,
66
75
  bool hasVoicesFile,
67
76
  bool hasDataDir
68
77
  ) {
@@ -78,6 +87,8 @@ static bool CapabilitySupportsTtsKind(
78
87
  return hasPocket;
79
88
  case TtsModelKind::kZipvoice:
80
89
  return hasZipvoice;
90
+ case TtsModelKind::kSupertonic:
91
+ return hasSupertonic;
81
92
  default:
82
93
  return false;
83
94
  }
@@ -102,6 +113,7 @@ static std::vector<TtsModelKind> GetKindsFromDirNameTts(const std::string& model
102
113
  if (lower.find("matcha") != std::string::npos) add(TtsModelKind::kMatcha);
103
114
  if (lower.find("pocket") != std::string::npos) add(TtsModelKind::kPocket);
104
115
  if (lower.find("zipvoice") != std::string::npos) add(TtsModelKind::kZipvoice);
116
+ if (lower.find("supertonic") != std::string::npos) add(TtsModelKind::kSupertonic);
105
117
  if (lower.find("kokoro") != std::string::npos) add(TtsModelKind::kKokoro);
106
118
  if (lower.find("kitten") != std::string::npos) add(TtsModelKind::kKitten);
107
119
  if (lower.find("vits") != std::string::npos) add(TtsModelKind::kVits);
@@ -132,6 +144,10 @@ TtsDetectResult DetectTtsModel(const std::string& modelDir, const std::string& m
132
144
  std::string tokensFile = FindFileByName(files, "tokens.txt");
133
145
  std::vector<LexiconCandidate> lexiconCandidates = FindLexiconCandidates(files, modelDir);
134
146
  std::string dataDirPath = FindDirectoryUnderRoot(files, modelDir, "espeak-ng-data");
147
+ TTS_DETECT_LOGI("DetectTtsModel: modelDir=%s espeak-ng dataDir=%s (empty=%d)",
148
+ modelDir.c_str(),
149
+ dataDirPath.empty() ? "(empty)" : dataDirPath.c_str(),
150
+ (int)dataDirPath.empty());
135
151
  std::string voicesFile = FindFileByName(files, "voices.bin");
136
152
 
137
153
  std::string acousticModel = FindOnnxByAnyToken(files, {"acoustic_model", "acoustic-model"}, std::nullopt);
@@ -143,14 +159,27 @@ TtsDetectResult DetectTtsModel(const std::string& modelDir, const std::string& m
143
159
  std::string textConditioner = FindOnnxByAnyToken(files, {"text_conditioner", "text-conditioner"}, std::nullopt);
144
160
  std::string vocabJsonFile = FindFileByName(files, "vocab.json");
145
161
  std::string tokenScoresJsonFile = FindFileByName(files, "token_scores.json");
162
+ std::string durationPredictor = FindOnnxByAnyToken(files, {"duration_predictor", "duration-predictor"}, std::nullopt);
163
+ std::string textEncoderSupertonic = FindOnnxByAnyToken(files, {"text_encoder", "text-encoder"}, std::nullopt);
164
+ std::string vectorEstimator = FindOnnxByAnyToken(files, {"vector_estimator", "vector-estimator"}, std::nullopt);
165
+ std::string ttsJsonFile = FindFileByName(files, "tts.json");
166
+ std::string unicodeIndexerFile = FindFileByName(files, "unicode_indexer.bin");
167
+ std::string voiceStyleFile = FindFileByName(files, "voice.bin");
146
168
 
147
- std::vector<std::string> modelExcludes = {"acoustic", "vocoder", "encoder", "decoder", "joiner"};
169
+ std::vector<std::string> modelExcludes = {
170
+ "acoustic", "vocoder", "encoder", "decoder", "joiner",
171
+ // Supertonic component models are not VITS monolithic model.onnx files.
172
+ "duration_predictor", "duration-predictor",
173
+ "text_encoder", "text-encoder",
174
+ "vector_estimator", "vector-estimator"
175
+ };
148
176
  std::string ttsModel = FindOnnxByAnyToken(files, {"model"}, std::nullopt);
149
177
  if (ttsModel.empty()) {
150
178
  ttsModel = FindLargestOnnxExcludingTokens(files, modelExcludes);
151
179
  }
152
180
 
153
- bool hasVits = !ttsModel.empty();
181
+ // VITS requires both model.onnx-like file and tokens.txt
182
+ bool hasVits = !ttsModel.empty() && !tokensFile.empty();
154
183
  std::string modelDirLower = ToLower(modelDir);
155
184
  bool isLikelyMatcha = modelDirLower.find("matcha") != std::string::npos;
156
185
  bool hasMatcha = (!acousticModel.empty() && !vocoder.empty())
@@ -167,6 +196,9 @@ TtsDetectResult DetectTtsModel(const std::string& modelDir, const std::string& m
167
196
  }
168
197
  bool hasPocket = !lmFlow.empty() && !lmMain.empty() && !encoder.empty() && !decoder.empty() &&
169
198
  !textConditioner.empty() && !vocabJsonFile.empty() && !tokenScoresJsonFile.empty();
199
+ bool hasSupertonic = !durationPredictor.empty() && !textEncoderSupertonic.empty() &&
200
+ !vectorEstimator.empty() && !vocoder.empty() && !ttsJsonFile.empty() &&
201
+ !unicodeIndexerFile.empty() && !voiceStyleFile.empty();
170
202
  bool hasDataDir = !dataDirPath.empty();
171
203
 
172
204
  bool isLikelyKitten = modelDirLower.find("kitten") != std::string::npos;
@@ -181,6 +213,9 @@ TtsDetectResult DetectTtsModel(const std::string& modelDir, const std::string& m
181
213
  if (hasZipvoice && !hasMatcha) {
182
214
  result.detectedModels.push_back({"zipvoice", modelDir});
183
215
  }
216
+ if (hasSupertonic) {
217
+ result.detectedModels.push_back({"supertonic", modelDir});
218
+ }
184
219
  if (hasVoicesFile) {
185
220
  if (isLikelyKitten && !isLikelyKokoro) {
186
221
  result.detectedModels.push_back({"kitten", modelDir});
@@ -217,7 +252,7 @@ TtsDetectResult DetectTtsModel(const std::string& modelDir, const std::string& m
217
252
  std::vector<TtsModelKind> nameCandidates = GetKindsFromDirNameTts(modelDir);
218
253
  if (!nameCandidates.empty()) {
219
254
  for (TtsModelKind k : nameCandidates) {
220
- if (CapabilitySupportsTtsKind(k, hasVits, hasMatcha, hasPocket, hasZipvoice,
255
+ if (CapabilitySupportsTtsKind(k, hasVits, hasMatcha, hasPocket, hasZipvoice, hasSupertonic,
221
256
  hasVoicesFile, hasDataDir)) {
222
257
  selected = k;
223
258
  break;
@@ -232,6 +267,8 @@ TtsDetectResult DetectTtsModel(const std::string& modelDir, const std::string& m
232
267
  selected = TtsModelKind::kPocket;
233
268
  } else if (hasZipvoice) {
234
269
  selected = TtsModelKind::kZipvoice;
270
+ } else if (hasSupertonic) {
271
+ selected = TtsModelKind::kSupertonic;
235
272
  } else if (hasVoicesFile) {
236
273
  if (isLikelyKitten && !isLikelyKokoro) {
237
274
  selected = TtsModelKind::kKitten;
@@ -278,6 +315,12 @@ TtsDetectResult DetectTtsModel(const std::string& modelDir, const std::string& m
278
315
  result.paths.textConditioner = textConditioner;
279
316
  result.paths.vocabJson = vocabJsonFile;
280
317
  result.paths.tokenScoresJson = tokenScoresJsonFile;
318
+ result.paths.durationPredictor = durationPredictor;
319
+ result.paths.textEncoder = textEncoderSupertonic;
320
+ result.paths.vectorEstimator = vectorEstimator;
321
+ result.paths.ttsJson = ttsJsonFile;
322
+ result.paths.unicodeIndexer = unicodeIndexerFile;
323
+ result.paths.voiceStyle = voiceStyleFile;
281
324
 
282
325
  auto validation = ValidateTtsPaths(selected, result.paths, modelDir);
283
326
  if (!validation.ok) {
@@ -37,7 +37,8 @@ enum class TtsModelKind {
37
37
  kKokoro,
38
38
  kKitten,
39
39
  kPocket,
40
- kZipvoice
40
+ kZipvoice,
41
+ kSupertonic
41
42
  };
42
43
 
43
44
  struct SttModelPaths {
@@ -150,6 +151,13 @@ struct TtsModelPaths {
150
151
  std::string textConditioner;
151
152
  std::string vocabJson;
152
153
  std::string tokenScoresJson;
154
+ // Supertonic TTS
155
+ std::string durationPredictor;
156
+ std::string textEncoder;
157
+ std::string vectorEstimator;
158
+ std::string ttsJson;
159
+ std::string unicodeIndexer;
160
+ std::string voiceStyle;
153
161
  };
154
162
 
155
163
  struct SttDetectResult {
@@ -55,8 +55,18 @@ static const TtsFieldRequirement kZipvoiceReqs[] = {
55
55
  {"decoder", &TtsModelPaths::decoder, true},
56
56
  {"vocoder", &TtsModelPaths::vocoder, true},
57
57
  {"tokens", &TtsModelPaths::tokens, true},
58
- {"dataDir", &TtsModelPaths::dataDir, false},
59
- {"lexicon", &TtsModelPaths::lexicon, false},
58
+ {"dataDir", &TtsModelPaths::dataDir, true},
59
+ {"lexicon", &TtsModelPaths::lexicon, true},
60
+ };
61
+
62
+ static const TtsFieldRequirement kSupertonicReqs[] = {
63
+ {"durationPredictor", &TtsModelPaths::durationPredictor, true},
64
+ {"textEncoder", &TtsModelPaths::textEncoder, true},
65
+ {"vectorEstimator", &TtsModelPaths::vectorEstimator, true},
66
+ {"vocoder", &TtsModelPaths::vocoder, true},
67
+ {"ttsJson", &TtsModelPaths::ttsJson, true},
68
+ {"unicodeIndexer", &TtsModelPaths::unicodeIndexer, true},
69
+ {"voiceStyle", &TtsModelPaths::voiceStyle, true},
60
70
  };
61
71
 
62
72
  // ============================================================
@@ -79,6 +89,9 @@ static const TtsFieldRequirement* GetRequirements(TtsModelKind kind, size_t& cou
79
89
  case TtsModelKind::kZipvoice:
80
90
  count = std::size(kZipvoiceReqs);
81
91
  return kZipvoiceReqs;
92
+ case TtsModelKind::kSupertonic:
93
+ count = std::size(kSupertonicReqs);
94
+ return kSupertonicReqs;
82
95
  default:
83
96
  count = 0;
84
97
  return nullptr;
@@ -93,6 +106,7 @@ static const char* TtsKindToName(TtsModelKind k) {
93
106
  case TtsModelKind::kKitten: return "Kitten";
94
107
  case TtsModelKind::kPocket: return "Pocket";
95
108
  case TtsModelKind::kZipvoice: return "Zipvoice";
109
+ case TtsModelKind::kSupertonic: return "Supertonic";
96
110
  default: return "Unknown";
97
111
  }
98
112
  }
@@ -102,6 +116,8 @@ static const char* GetFieldHint(const char* fieldName) {
102
116
  return "Copy espeak-ng-data into the model directory.";
103
117
  if (std::strcmp(fieldName, "tokens") == 0)
104
118
  return "Ensure tokens.txt is present in the model directory.";
119
+ if (std::strcmp(fieldName, "lexicon") == 0)
120
+ return "Add lexicon.txt (or lexicon-<lang>.txt) from the official sherpa-onnx Zipvoice/Matcha release; without it the native engine aborts.";
105
121
  return nullptr;
106
122
  }
107
123
 
@@ -49,6 +49,7 @@ public:
49
49
  const std::string& provider,
50
50
  const std::string& ruleFsts,
51
51
  const std::string& ruleFars,
52
+ float dither,
52
53
  float blankPenalty,
53
54
  bool debug,
54
55
  bool rule1MustContainNonSilence,
@@ -103,6 +103,7 @@ OnlineSttInitResult OnlineSttWrapper::initialize(
103
103
  const std::string& provider,
104
104
  const std::string& ruleFsts,
105
105
  const std::string& ruleFars,
106
+ float dither,
106
107
  float blankPenalty,
107
108
  bool debug,
108
109
  // NOTE: rule*MustContainNonSilence, rule1/2MinUtteranceLength, and
@@ -138,6 +139,9 @@ OnlineSttInitResult OnlineSttWrapper::initialize(
138
139
  sherpa_onnx::cxx::OnlineRecognizerConfig config;
139
140
  config.feat_config.sample_rate = 16000;
140
141
  config.feat_config.feature_dim = 80;
142
+ // Dither is not exposed on cxx::FeatureConfig in the bundled sherpa-onnx headers;
143
+ // Android applies it via JNI. iOS uses the library default (no dither from JS).
144
+ (void)dither;
141
145
  config.decoding_method = decodingMethod.empty() ? "greedy_search" : decodingMethod;
142
146
  config.max_active_paths = maxActivePaths;
143
147
  config.enable_endpoint = enableEndpoint;