react-native-sherpa-onnx 0.3.5 → 0.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (232) hide show
  1. package/LICENSE +1 -0
  2. package/README.md +90 -21
  3. package/SherpaOnnx.podspec +3 -0
  4. package/THIRD_PARTY_LICENSES/README.md +62 -0
  5. package/THIRD_PARTY_LICENSES/ffmpeg.txt +502 -0
  6. package/THIRD_PARTY_LICENSES/libarchive.txt +65 -0
  7. package/THIRD_PARTY_LICENSES/nvidia_omla.txt +181 -0
  8. package/THIRD_PARTY_LICENSES/onnxruntime.txt +21 -0
  9. package/THIRD_PARTY_LICENSES/opus.txt +44 -0
  10. package/THIRD_PARTY_LICENSES/sherpa-onnx.txt +201 -0
  11. package/THIRD_PARTY_LICENSES/shine.txt +482 -0
  12. package/THIRD_PARTY_LICENSES/zstd.txt +30 -0
  13. package/android/build.gradle +7 -3
  14. package/android/prebuilt-download.gradle +345 -153
  15. package/android/prebuilt-versions.gradle +2 -2
  16. package/android/src/main/assets/model_licenses/asr-models-license-status.csv +409 -0
  17. package/android/src/main/assets/model_licenses/qnn-asr-models-license-status.csv +695 -0
  18. package/android/src/main/assets/model_licenses/tts-models-license-status.csv +596 -0
  19. package/android/src/main/cpp/CMakeLists.txt +28 -10
  20. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.cpp +306 -6
  21. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.h +33 -4
  22. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-jni.cpp +266 -7
  23. package/android/src/main/cpp/jni/audio/sherpa-onnx-audio-convert-jni.cpp +268 -2
  24. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +6 -2
  25. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-tts.cpp +4 -2
  26. package/android/src/main/java/com/sherpaonnx/SherpaOnnxArchiveHelper.kt +137 -7
  27. package/android/src/main/java/com/sherpaonnx/SherpaOnnxAssetHelper.kt +51 -6
  28. package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +159 -0
  29. package/android/src/main/java/com/sherpaonnx/SherpaOnnxOnlineSttHelper.kt +4 -1
  30. package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +112 -97
  31. package/ios/Resources/model_licenses/asr-models-license-status.csv +409 -0
  32. package/ios/Resources/model_licenses/qnn-asr-models-license-status.csv +695 -0
  33. package/ios/Resources/model_licenses/tts-models-license-status.csv +596 -0
  34. package/ios/SherpaOnnx+OnlineSTT.mm +2 -0
  35. package/ios/SherpaOnnx+PcmLiveStream.mm +2 -29
  36. package/ios/SherpaOnnx+TTS.mm +178 -20
  37. package/ios/SherpaOnnx.mm +108 -1
  38. package/ios/SherpaOnnxAudioConvert.h +10 -0
  39. package/ios/SherpaOnnxAudioConvert.mm +257 -1
  40. package/ios/archive/sherpa-onnx-archive-helper.h +10 -0
  41. package/ios/archive/sherpa-onnx-archive-helper.mm +56 -5
  42. package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +13 -2
  43. package/ios/model_detect/sherpa-onnx-validate-tts.mm +4 -2
  44. package/ios/online_stt/sherpa-onnx-online-stt-wrapper.h +1 -0
  45. package/ios/online_stt/sherpa-onnx-online-stt-wrapper.mm +4 -0
  46. package/ios/tts/sherpa-onnx-tts-wrapper.h +37 -0
  47. package/ios/tts/sherpa-onnx-tts-wrapper.mm +149 -3
  48. package/lib/module/NativeSherpaOnnx.js.map +1 -1
  49. package/lib/module/audio/index.js +8 -0
  50. package/lib/module/audio/index.js.map +1 -1
  51. package/lib/module/download/ModelDownloadManager.js +10 -929
  52. package/lib/module/download/ModelDownloadManager.js.map +1 -1
  53. package/lib/module/download/activeModelOperations.js +26 -0
  54. package/lib/module/download/activeModelOperations.js.map +1 -0
  55. package/lib/module/download/background-downloader.d.js +2 -0
  56. package/lib/module/download/background-downloader.d.js.map +1 -0
  57. package/lib/module/download/bulkPurge.js +72 -0
  58. package/lib/module/download/bulkPurge.js.map +1 -0
  59. package/lib/module/download/checksumPrompt.js +19 -0
  60. package/lib/module/download/checksumPrompt.js.map +1 -0
  61. package/lib/module/download/constants.js +7 -0
  62. package/lib/module/download/constants.js.map +1 -0
  63. package/lib/module/download/downloadEvents.js +35 -0
  64. package/lib/module/download/downloadEvents.js.map +1 -0
  65. package/lib/module/download/downloadTask.js +385 -0
  66. package/lib/module/download/downloadTask.js.map +1 -0
  67. package/lib/module/download/ensureModel.js +89 -0
  68. package/lib/module/download/ensureModel.js.map +1 -0
  69. package/lib/module/download/index.js +4 -3
  70. package/lib/module/download/index.js.map +1 -1
  71. package/lib/module/download/localModels.js +151 -0
  72. package/lib/module/download/localModels.js.map +1 -0
  73. package/lib/module/download/modelExtraction.js +174 -0
  74. package/lib/module/download/modelExtraction.js.map +1 -0
  75. package/lib/module/download/paths.js +98 -0
  76. package/lib/module/download/paths.js.map +1 -0
  77. package/lib/module/download/postDownloadProcessing.js +206 -0
  78. package/lib/module/download/postDownloadProcessing.js.map +1 -0
  79. package/lib/module/download/protectedModelKeys.js +31 -0
  80. package/lib/module/download/protectedModelKeys.js.map +1 -0
  81. package/lib/module/download/registry.js +267 -0
  82. package/lib/module/download/registry.js.map +1 -0
  83. package/lib/module/download/retry.js +59 -0
  84. package/lib/module/download/retry.js.map +1 -0
  85. package/lib/module/download/types.js +17 -0
  86. package/lib/module/download/types.js.map +1 -0
  87. package/lib/module/download/validation.js +101 -5
  88. package/lib/module/download/validation.js.map +1 -1
  89. package/lib/module/{download → extraction}/extractTarBz2.js +3 -1
  90. package/lib/module/extraction/extractTarBz2.js.map +1 -0
  91. package/lib/module/extraction/extractTarZst.js +54 -0
  92. package/lib/module/extraction/extractTarZst.js.map +1 -0
  93. package/lib/module/extraction/index.js +190 -0
  94. package/lib/module/extraction/index.js.map +1 -0
  95. package/lib/module/extraction/types.js +2 -0
  96. package/lib/module/extraction/types.js.map +1 -0
  97. package/lib/module/index.js +2 -1
  98. package/lib/module/index.js.map +1 -1
  99. package/lib/module/licenses.js +63 -0
  100. package/lib/module/licenses.js.map +1 -0
  101. package/lib/module/stt/index.js +16 -2
  102. package/lib/module/stt/index.js.map +1 -1
  103. package/lib/module/stt/streaming.js +2 -0
  104. package/lib/module/stt/streaming.js.map +1 -1
  105. package/lib/module/stt/streamingTypes.js.map +1 -1
  106. package/lib/module/stt/types.js.map +1 -1
  107. package/lib/module/tts/index.js +20 -2
  108. package/lib/module/tts/index.js.map +1 -1
  109. package/lib/module/tts/streaming.js +4 -0
  110. package/lib/module/tts/streaming.js.map +1 -1
  111. package/lib/module/tts/types.js.map +1 -1
  112. package/lib/module/utils.js +16 -1
  113. package/lib/module/utils.js.map +1 -1
  114. package/lib/typescript/src/NativeSherpaOnnx.d.ts +72 -5
  115. package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
  116. package/lib/typescript/src/audio/index.d.ts +10 -0
  117. package/lib/typescript/src/audio/index.d.ts.map +1 -1
  118. package/lib/typescript/src/download/ModelDownloadManager.d.ts +10 -108
  119. package/lib/typescript/src/download/ModelDownloadManager.d.ts.map +1 -1
  120. package/lib/typescript/src/download/activeModelOperations.d.ts +6 -0
  121. package/lib/typescript/src/download/activeModelOperations.d.ts.map +1 -0
  122. package/lib/typescript/src/download/bulkPurge.d.ts +14 -0
  123. package/lib/typescript/src/download/bulkPurge.d.ts.map +1 -0
  124. package/lib/typescript/src/download/checksumPrompt.d.ts +3 -0
  125. package/lib/typescript/src/download/checksumPrompt.d.ts.map +1 -0
  126. package/lib/typescript/src/download/constants.d.ts +5 -0
  127. package/lib/typescript/src/download/constants.d.ts.map +1 -0
  128. package/lib/typescript/src/download/downloadEvents.d.ts +6 -0
  129. package/lib/typescript/src/download/downloadEvents.d.ts.map +1 -0
  130. package/lib/typescript/src/download/downloadTask.d.ts +20 -0
  131. package/lib/typescript/src/download/downloadTask.d.ts.map +1 -0
  132. package/lib/typescript/src/download/ensureModel.d.ts +26 -0
  133. package/lib/typescript/src/download/ensureModel.d.ts.map +1 -0
  134. package/lib/typescript/src/download/index.d.ts +7 -5
  135. package/lib/typescript/src/download/index.d.ts.map +1 -1
  136. package/lib/typescript/src/download/localModels.d.ts +15 -0
  137. package/lib/typescript/src/download/localModels.d.ts.map +1 -0
  138. package/lib/typescript/src/download/modelExtraction.d.ts +36 -0
  139. package/lib/typescript/src/download/modelExtraction.d.ts.map +1 -0
  140. package/lib/typescript/src/download/paths.d.ts +28 -0
  141. package/lib/typescript/src/download/paths.d.ts.map +1 -0
  142. package/lib/typescript/src/download/postDownloadProcessing.d.ts +19 -0
  143. package/lib/typescript/src/download/postDownloadProcessing.d.ts.map +1 -0
  144. package/lib/typescript/src/download/protectedModelKeys.d.ts +6 -0
  145. package/lib/typescript/src/download/protectedModelKeys.d.ts.map +1 -0
  146. package/lib/typescript/src/download/registry.d.ts +14 -0
  147. package/lib/typescript/src/download/registry.d.ts.map +1 -0
  148. package/lib/typescript/src/download/retry.d.ts +15 -0
  149. package/lib/typescript/src/download/retry.d.ts.map +1 -0
  150. package/lib/typescript/src/download/types.d.ts +96 -0
  151. package/lib/typescript/src/download/types.d.ts.map +1 -0
  152. package/lib/typescript/src/download/validation.d.ts +19 -0
  153. package/lib/typescript/src/download/validation.d.ts.map +1 -1
  154. package/lib/typescript/src/extraction/extractTarBz2.d.ts.map +1 -0
  155. package/lib/typescript/src/extraction/extractTarZst.d.ts +14 -0
  156. package/lib/typescript/src/extraction/extractTarZst.d.ts.map +1 -0
  157. package/lib/typescript/src/extraction/index.d.ts +50 -0
  158. package/lib/typescript/src/extraction/index.d.ts.map +1 -0
  159. package/lib/typescript/src/extraction/types.d.ts +60 -0
  160. package/lib/typescript/src/extraction/types.d.ts.map +1 -0
  161. package/lib/typescript/src/index.d.ts +1 -0
  162. package/lib/typescript/src/index.d.ts.map +1 -1
  163. package/lib/typescript/src/licenses.d.ts +10 -0
  164. package/lib/typescript/src/licenses.d.ts.map +1 -0
  165. package/lib/typescript/src/stt/index.d.ts +4 -1
  166. package/lib/typescript/src/stt/index.d.ts.map +1 -1
  167. package/lib/typescript/src/stt/streaming.d.ts.map +1 -1
  168. package/lib/typescript/src/stt/streamingTypes.d.ts +5 -0
  169. package/lib/typescript/src/stt/streamingTypes.d.ts.map +1 -1
  170. package/lib/typescript/src/stt/types.d.ts +3 -1
  171. package/lib/typescript/src/stt/types.d.ts.map +1 -1
  172. package/lib/typescript/src/tts/index.d.ts +3 -1
  173. package/lib/typescript/src/tts/index.d.ts.map +1 -1
  174. package/lib/typescript/src/tts/streaming.d.ts.map +1 -1
  175. package/lib/typescript/src/tts/types.d.ts +6 -5
  176. package/lib/typescript/src/tts/types.d.ts.map +1 -1
  177. package/lib/typescript/src/utils.d.ts +5 -0
  178. package/lib/typescript/src/utils.d.ts.map +1 -1
  179. package/package.json +11 -1
  180. package/scripts/{check-model-csvs.sh → ci/check-model-csvs.sh} +9 -2
  181. package/scripts/ci/collect_all_sherpa_model_streams.sh +101 -0
  182. package/scripts/ci/collect_one_sherpa_release_stream.sh +189 -0
  183. package/scripts/ci/sherpa_asr_model_release_streams.json +21 -0
  184. package/scripts/ci/sherpa_tts_model_release_streams.json +13 -0
  185. package/scripts/ci/update_model_license_csv.sh +765 -0
  186. package/scripts/setup-ios-framework.sh +14 -11
  187. package/scripts/update_commercial_use.js +73 -0
  188. package/src/NativeSherpaOnnx.ts +92 -5
  189. package/src/audio/index.ts +20 -0
  190. package/src/download/ModelDownloadManager.ts +55 -1343
  191. package/src/download/activeModelOperations.ts +38 -0
  192. package/src/download/background-downloader.d.ts +43 -0
  193. package/src/download/bulkPurge.ts +102 -0
  194. package/src/download/checksumPrompt.ts +25 -0
  195. package/src/download/constants.ts +5 -0
  196. package/src/download/downloadEvents.ts +55 -0
  197. package/src/download/downloadTask.ts +497 -0
  198. package/src/download/ensureModel.ts +124 -0
  199. package/src/download/index.ts +19 -2
  200. package/src/download/localModels.ts +234 -0
  201. package/src/download/modelExtraction.ts +244 -0
  202. package/src/download/paths.ts +134 -0
  203. package/src/download/postDownloadProcessing.ts +292 -0
  204. package/src/download/protectedModelKeys.ts +30 -0
  205. package/src/download/registry.ts +404 -0
  206. package/src/download/retry.ts +76 -0
  207. package/src/download/types.ts +120 -0
  208. package/src/download/validation.ts +114 -8
  209. package/src/{download → extraction}/extractTarBz2.ts +3 -1
  210. package/src/extraction/extractTarZst.ts +79 -0
  211. package/src/extraction/index.ts +269 -0
  212. package/src/extraction/types.ts +63 -0
  213. package/src/index.tsx +2 -0
  214. package/src/licenses.ts +100 -0
  215. package/src/stt/index.ts +20 -2
  216. package/src/stt/streaming.ts +3 -0
  217. package/src/stt/streamingTypes.ts +5 -0
  218. package/src/stt/types.ts +3 -1
  219. package/src/tts/index.ts +30 -2
  220. package/src/tts/streaming.ts +10 -0
  221. package/src/tts/types.ts +6 -5
  222. package/src/utils.ts +22 -1
  223. package/third_party/libarchive_prebuilt/ANDROID_RELEASE_TAG +1 -1
  224. package/third_party/libarchive_prebuilt/IOS_RELEASE_TAG +1 -1
  225. package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -1
  226. package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -1
  227. package/android/src/main/cpp/jni/tts/sherpa-onnx-tts-zipvoice-jni.cpp +0 -301
  228. package/android/src/main/java/com/sherpaonnx/ZipvoiceTtsWrapper.kt +0 -187
  229. package/lib/module/download/extractTarBz2.js.map +0 -1
  230. package/lib/typescript/src/download/extractTarBz2.d.ts.map +0 -1
  231. package/scripts/check-qnn-support.sh +0 -78
  232. /package/lib/typescript/src/{download → extraction}/extractTarBz2.d.ts +0 -0
@@ -2,6 +2,7 @@
2
2
  #import <React/RCTLog.h>
3
3
  #include <string>
4
4
  #include <sys/stat.h>
5
+ #include <vector>
5
6
 
6
7
  #ifdef HAVE_FFMPEG
7
8
  extern "C" {
@@ -12,11 +13,14 @@ extern "C" {
12
13
  #include <libswresample/swresample.h>
13
14
  }
14
15
  #include <cstdio>
15
- #include <vector>
16
16
  #endif
17
17
 
18
18
  // Forward declaration — convertToFormat handles all formats including WAV (16 kHz mono).
19
19
  static std::string convertToFormat(const char* inputPath, const char* outputPath, const char* formatHint, int outputSampleRateHz);
20
+ static std::string decodeAudioFileToFloatMono(const char* inputPath,
21
+ int targetSampleRateHz,
22
+ std::vector<float>* outSamples,
23
+ int* outSampleRate);
20
24
 
21
25
  // Convenience: convert any audio to 16 kHz mono WAV via the main convertToFormat pipeline.
22
26
  static std::string convertToWav16kMono(const char* inputPath, const char* outputPath) {
@@ -659,6 +663,222 @@ static std::string convertToFormat(const char* inputPath, const char* outputPath
659
663
  #endif
660
664
  }
661
665
 
666
+ static std::string decodeAudioFileToFloatMono(const char* inputPath,
667
+ int targetSampleRateHz,
668
+ std::vector<float>* outSamples,
669
+ int* outSampleRate) {
670
+ outSamples->clear();
671
+ *outSampleRate = 0;
672
+ #ifndef HAVE_FFMPEG
673
+ (void)inputPath;
674
+ (void)targetSampleRateHz;
675
+ return std::string("FFmpeg not available. Build prebuilts with third_party/ffmpeg_prebuilt/build_ffmpeg_ios.sh.");
676
+ #else
677
+ if (!inputPath) {
678
+ return std::string("inputPath is null");
679
+ }
680
+
681
+ AVFormatContext* inFmt = nullptr;
682
+ if (avformat_open_input(&inFmt, inputPath, nullptr, nullptr) < 0) {
683
+ return std::string("Failed to open input file");
684
+ }
685
+ if (avformat_find_stream_info(inFmt, nullptr) < 0) {
686
+ avformat_close_input(&inFmt);
687
+ return std::string("Failed to find stream info");
688
+ }
689
+
690
+ int audioStreamIndex = -1;
691
+ for (unsigned i = 0; i < inFmt->nb_streams; ++i) {
692
+ if (inFmt->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
693
+ audioStreamIndex = (int)i;
694
+ break;
695
+ }
696
+ }
697
+ if (audioStreamIndex < 0) {
698
+ avformat_close_input(&inFmt);
699
+ return std::string("No audio stream found in input");
700
+ }
701
+
702
+ AVStream* inStream = inFmt->streams[audioStreamIndex];
703
+ const AVCodec* decoder = avcodec_find_decoder(inStream->codecpar->codec_id);
704
+ if (!decoder) {
705
+ avformat_close_input(&inFmt);
706
+ return std::string("Unsupported input codec");
707
+ }
708
+
709
+ AVCodecContext* decCtx = avcodec_alloc_context3(decoder);
710
+ if (!decCtx) {
711
+ avformat_close_input(&inFmt);
712
+ return std::string("Failed to allocate decoder context");
713
+ }
714
+ if (avcodec_parameters_to_context(decCtx, inStream->codecpar) < 0) {
715
+ avcodec_free_context(&decCtx);
716
+ avformat_close_input(&inFmt);
717
+ return std::string("Failed to copy codec parameters");
718
+ }
719
+ if (avcodec_open2(decCtx, decoder, nullptr) < 0) {
720
+ avcodec_free_context(&decCtx);
721
+ avformat_close_input(&inFmt);
722
+ return std::string("Failed to open decoder");
723
+ }
724
+
725
+ int in_sr = decCtx->sample_rate;
726
+ if (inStream->codecpar->sample_rate > 0) {
727
+ in_sr = inStream->codecpar->sample_rate;
728
+ }
729
+ if (in_sr <= 0) {
730
+ avcodec_free_context(&decCtx);
731
+ avformat_close_input(&inFmt);
732
+ return std::string("Invalid input sample rate");
733
+ }
734
+
735
+ int out_sr = (targetSampleRateHz > 0) ? targetSampleRateHz : in_sr;
736
+ if (out_sr <= 0) {
737
+ avcodec_free_context(&decCtx);
738
+ avformat_close_input(&inFmt);
739
+ return std::string("Invalid output sample rate");
740
+ }
741
+
742
+ AVChannelLayout in_layout{};
743
+ if (inStream->codecpar->ch_layout.nb_channels > 0) {
744
+ if (av_channel_layout_copy(&in_layout, &inStream->codecpar->ch_layout) < 0) {
745
+ avcodec_free_context(&decCtx);
746
+ avformat_close_input(&inFmt);
747
+ return std::string("Failed to copy input channel layout");
748
+ }
749
+ } else {
750
+ if (av_channel_layout_copy(&in_layout, &decCtx->ch_layout) < 0) {
751
+ avcodec_free_context(&decCtx);
752
+ avformat_close_input(&inFmt);
753
+ return std::string("Failed to get decoder channel layout");
754
+ }
755
+ }
756
+
757
+ AVChannelLayout out_layout = AV_CHANNEL_LAYOUT_MONO;
758
+ SwrContext* swr = nullptr;
759
+ if (swr_alloc_set_opts2(&swr,
760
+ &out_layout,
761
+ AV_SAMPLE_FMT_FLT,
762
+ out_sr,
763
+ &in_layout,
764
+ decCtx->sample_fmt,
765
+ in_sr,
766
+ 0,
767
+ nullptr) < 0 ||
768
+ !swr) {
769
+ av_channel_layout_uninit(&in_layout);
770
+ avcodec_free_context(&decCtx);
771
+ avformat_close_input(&inFmt);
772
+ return std::string("Failed to initialize resampler");
773
+ }
774
+ if (swr_init(swr) < 0) {
775
+ av_channel_layout_uninit(&in_layout);
776
+ swr_free(&swr);
777
+ avcodec_free_context(&decCtx);
778
+ avformat_close_input(&inFmt);
779
+ return std::string("Failed to initialize resampler (swr_init)");
780
+ }
781
+ av_channel_layout_uninit(&in_layout);
782
+
783
+ AVPacket* pkt = av_packet_alloc();
784
+ AVFrame* frame = av_frame_alloc();
785
+ if (!pkt || !frame) {
786
+ if (pkt) av_packet_free(&pkt);
787
+ if (frame) av_frame_free(&frame);
788
+ swr_free(&swr);
789
+ avcodec_free_context(&decCtx);
790
+ avformat_close_input(&inFmt);
791
+ return std::string("Out of memory");
792
+ }
793
+
794
+ auto appendConverted = [&](uint8_t* buf, int nbFloats) {
795
+ if (!buf || nbFloats <= 0) return;
796
+ const float* f = reinterpret_cast<const float*>(buf);
797
+ outSamples->insert(outSamples->end(), f, f + nbFloats);
798
+ };
799
+
800
+ auto convertOneFrame = [&](AVFrame* fr) {
801
+ // Copy plane pointers so we can pass const uint8_t** to swr_convert without
802
+ // reinterpret_cast(uint8_t** -> const uint8_t**), which triggers -Wcast-qual.
803
+ uint8_t** src = fr->extended_data ? fr->extended_data : fr->data;
804
+ int nplanes = fr->ch_layout.nb_channels;
805
+ if (nplanes <= 0) nplanes = AV_NUM_DATA_POINTERS;
806
+
807
+ const uint8_t* in_stack[AV_NUM_DATA_POINTERS] = {};
808
+ std::vector<const uint8_t*> in_heap;
809
+ const uint8_t** in_arg;
810
+ if (nplanes > AV_NUM_DATA_POINTERS) {
811
+ in_heap.resize(static_cast<size_t>(nplanes));
812
+ for (int i = 0; i < nplanes; ++i) {
813
+ in_heap[static_cast<size_t>(i)] = src[i];
814
+ }
815
+ in_arg = in_heap.data();
816
+ } else {
817
+ for (int i = 0; i < nplanes; ++i) {
818
+ in_stack[i] = src[i];
819
+ }
820
+ in_arg = in_stack;
821
+ }
822
+
823
+ int in_sr2 = inStream->codecpar->sample_rate ? inStream->codecpar->sample_rate : decCtx->sample_rate;
824
+ int64_t max_out =
825
+ av_rescale_rnd(swr_get_delay(swr, in_sr2) + (int64_t)fr->nb_samples, out_sr, in_sr2, AV_ROUND_UP);
826
+ if (max_out < 1) max_out = 1;
827
+ uint8_t* out_buf = nullptr;
828
+ if (av_samples_alloc(&out_buf, nullptr, 1, (int)max_out, AV_SAMPLE_FMT_FLT, 0) < 0) {
829
+ return;
830
+ }
831
+ int converted = swr_convert(swr, &out_buf, (int)max_out, in_arg, fr->nb_samples);
832
+ if (converted > 0) {
833
+ appendConverted(out_buf, converted);
834
+ }
835
+ av_freep(&out_buf);
836
+ };
837
+
838
+ while (av_read_frame(inFmt, pkt) >= 0) {
839
+ if (pkt->stream_index == audioStreamIndex) {
840
+ if (avcodec_send_packet(decCtx, pkt) == 0) {
841
+ while (avcodec_receive_frame(decCtx, frame) == 0) {
842
+ convertOneFrame(frame);
843
+ av_frame_unref(frame);
844
+ }
845
+ }
846
+ }
847
+ av_packet_unref(pkt);
848
+ }
849
+
850
+ if (avcodec_send_packet(decCtx, nullptr) == 0) {
851
+ while (avcodec_receive_frame(decCtx, frame) == 0) {
852
+ convertOneFrame(frame);
853
+ av_frame_unref(frame);
854
+ }
855
+ }
856
+
857
+ {
858
+ int in_sr2 = inStream->codecpar->sample_rate ? inStream->codecpar->sample_rate : decCtx->sample_rate;
859
+ int tailCap = (int)swr_get_delay(swr, in_sr2) + 4096;
860
+ if (tailCap < 16) tailCap = 16;
861
+ uint8_t* tailData = nullptr;
862
+ if (av_samples_alloc(&tailData, nullptr, 1, tailCap, AV_SAMPLE_FMT_FLT, 0) >= 0) {
863
+ int tailConverted = swr_convert(swr, &tailData, tailCap, nullptr, 0);
864
+ if (tailConverted > 0) {
865
+ appendConverted(tailData, tailConverted);
866
+ }
867
+ av_freep(&tailData);
868
+ }
869
+ }
870
+
871
+ av_packet_free(&pkt);
872
+ av_frame_free(&frame);
873
+ swr_free(&swr);
874
+ avcodec_free_context(&decCtx);
875
+ avformat_close_input(&inFmt);
876
+
877
+ *outSampleRate = out_sr;
878
+ return std::string("");
879
+ #endif
880
+ }
881
+
662
882
  @implementation SherpaOnnxAudioConvert
663
883
 
664
884
  + (BOOL)convertAudioToWav16k:(NSString *)inputPath
@@ -695,4 +915,40 @@ static std::string convertToFormat(const char* inputPath, const char* outputPath
695
915
  return YES;
696
916
  }
697
917
 
918
+ + (BOOL)decodeAudioFileToFloatSamples:(NSString *)inputPath
919
+ targetSampleRateHz:(int)targetSampleRateHz
920
+ outSamples:(NSArray<NSNumber *> **)outSamples
921
+ outSampleRate:(int *)outSampleRate
922
+ error:(NSError **)error
923
+ {
924
+ if (!outSamples || !outSampleRate) {
925
+ if (error) {
926
+ *error = [NSError errorWithDomain:@"SherpaOnnxAudioConvert"
927
+ code:-2
928
+ userInfo:@{NSLocalizedDescriptionKey: @"outSamples/outSampleRate required"}];
929
+ }
930
+ return NO;
931
+ }
932
+ *outSamples = nil;
933
+ *outSampleRate = 0;
934
+ std::vector<float> v;
935
+ int sr = 0;
936
+ std::string err = decodeAudioFileToFloatMono(inputPath.UTF8String, targetSampleRateHz, &v, &sr);
937
+ if (!err.empty()) {
938
+ if (error) {
939
+ *error = [NSError errorWithDomain:@"SherpaOnnxAudioConvert"
940
+ code:-1
941
+ userInfo:@{NSLocalizedDescriptionKey: [NSString stringWithUTF8String:err.c_str()]}];
942
+ }
943
+ return NO;
944
+ }
945
+ NSMutableArray<NSNumber *> *arr = [NSMutableArray arrayWithCapacity:v.size()];
946
+ for (size_t i = 0; i < v.size(); ++i) {
947
+ [arr addObject:@(v[i])];
948
+ }
949
+ *outSamples = arr;
950
+ *outSampleRate = sr;
951
+ return YES;
952
+ }
953
+
698
954
  @end
@@ -11,11 +11,21 @@ typedef void (^SherpaOnnxArchiveProgressBlock)(long long bytes, long long totalB
11
11
  force:(BOOL)force
12
12
  progress:(nullable SherpaOnnxArchiveProgressBlock)progress;
13
13
 
14
+ - (NSDictionary *)extractTarZst:(NSString *)sourcePath
15
+ targetPath:(NSString *)targetPath
16
+ force:(BOOL)force
17
+ progress:(nullable SherpaOnnxArchiveProgressBlock)progress;
18
+
14
19
  - (nullable NSString *)computeFileSha256:(NSString *)filePath
15
20
  error:(NSError * _Nullable * _Nullable)error;
16
21
 
17
22
  + (void)cancelExtractTarBz2;
18
23
 
24
+ + (void)cancelExtractTarZst;
25
+
26
+ /** Cancel extraction for a specific source archive path (per-operation cancel for parallel extractions). */
27
+ + (void)cancelExtractForPath:(NSString *)sourcePath;
28
+
19
29
  @end
20
30
 
21
31
  NS_ASSUME_NONNULL_END
@@ -14,9 +14,24 @@
14
14
  #include <array>
15
15
  #include <atomic>
16
16
  #include <cstdio>
17
+ #include <mutex>
18
+ #include <set>
17
19
  #include <string>
18
20
 
19
- static std::atomic_bool g_cancelExtract(false);
21
+ static std::mutex g_cancelMutex;
22
+ static std::set<std::string> g_cancelledPaths;
23
+
24
+ static bool isPathCancelled(const std::string& path) {
25
+ std::lock_guard<std::mutex> lock(g_cancelMutex);
26
+ // If the set contains an empty string, ALL extractions are cancelled (legacy global cancel).
27
+ return g_cancelledPaths.count("") > 0 || g_cancelledPaths.count(path) > 0;
28
+ }
29
+
30
+ static void clearCancelForPath(const std::string& path) {
31
+ std::lock_guard<std::mutex> lock(g_cancelMutex);
32
+ g_cancelledPaths.erase(path);
33
+ g_cancelledPaths.erase(""); // Clear the global cancel flag too
34
+ }
20
35
 
21
36
  namespace {
22
37
  #ifdef HAVE_LIBARCHIVE
@@ -127,7 +142,31 @@ static NSString* ComputeFileSha256(NSString* filePath, NSError** error) {
127
142
  + (void)cancelExtractTarBz2
128
143
  {
129
144
  #ifdef HAVE_LIBARCHIVE
130
- g_cancelExtract.store(true);
145
+ std::lock_guard<std::mutex> lock(g_cancelMutex);
146
+ g_cancelledPaths.insert(""); // Empty string = cancel ALL
147
+ #else
148
+ // feature disabled
149
+ #endif
150
+ }
151
+
152
+ + (void)cancelExtractTarZst
153
+ {
154
+ #ifdef HAVE_LIBARCHIVE
155
+ std::lock_guard<std::mutex> lock(g_cancelMutex);
156
+ g_cancelledPaths.insert(""); // Empty string = cancel ALL
157
+ #else
158
+ // feature disabled
159
+ #endif
160
+ }
161
+
162
+ + (void)cancelExtractForPath:(NSString *)sourcePath
163
+ {
164
+ #ifdef HAVE_LIBARCHIVE
165
+ std::string path = [sourcePath UTF8String] ?: "";
166
+ if (!path.empty()) {
167
+ std::lock_guard<std::mutex> lock(g_cancelMutex);
168
+ g_cancelledPaths.insert(path);
169
+ }
131
170
  #else
132
171
  // feature disabled
133
172
  #endif
@@ -141,7 +180,8 @@ static NSString* ComputeFileSha256(NSString* filePath, NSError** error) {
141
180
  #ifndef HAVE_LIBARCHIVE
142
181
  return @{ @"success": @NO, @"reason": @"libarchive is disabled in this build. Rebuild without SHERPA_ONNX_DISABLE_LIBARCHIVE=1." };
143
182
  #else
144
- g_cancelExtract.store(false);
183
+ std::string sourcePathStr = [sourcePath UTF8String] ?: "";
184
+ clearCancelForPath(sourcePathStr);
145
185
  NSFileManager *fileManager = [NSFileManager defaultManager];
146
186
 
147
187
  if (![fileManager fileExistsAtPath:sourcePath]) {
@@ -174,6 +214,7 @@ static NSString* ComputeFileSha256(NSString* filePath, NSError** error) {
174
214
  struct archive *archive = archive_read_new();
175
215
  archive_read_support_format_tar(archive);
176
216
  archive_read_support_filter_bzip2(archive);
217
+ archive_read_support_filter_zstd(archive);
177
218
 
178
219
  ArchiveReadContext read_ctx;
179
220
  read_ctx.file = fopen([sourcePath UTF8String], "rb");
@@ -203,10 +244,11 @@ static NSString* ComputeFileSha256(NSString* filePath, NSError** error) {
203
244
  int lastPercent = -1;
204
245
  long long lastEmitBytes = 0;
205
246
  while ((result = archive_read_next_header(archive, &entry)) == ARCHIVE_OK) {
206
- if (g_cancelExtract.load()) {
247
+ if (isPathCancelled(sourcePathStr)) {
207
248
  archive_read_free(archive);
208
249
  archive_write_free(disk);
209
250
  close_reader();
251
+ clearCancelForPath(sourcePathStr);
210
252
  return @{ @"success": @NO, @"reason": @"Extraction cancelled" };
211
253
  }
212
254
  const char *currentPath = archive_entry_pathname(entry);
@@ -235,10 +277,11 @@ static NSString* ComputeFileSha256(NSString* filePath, NSError** error) {
235
277
  size_t size = 0;
236
278
  la_int64_t offset = 0;
237
279
  while ((result = archive_read_data_block(archive, &buff, &size, &offset)) == ARCHIVE_OK) {
238
- if (g_cancelExtract.load()) {
280
+ if (isPathCancelled(sourcePathStr)) {
239
281
  archive_read_free(archive);
240
282
  archive_write_free(disk);
241
283
  close_reader();
284
+ clearCancelForPath(sourcePathStr);
242
285
  return @{ @"success": @NO, @"reason": @"Extraction cancelled" };
243
286
  }
244
287
  la_ssize_t writeResult = archive_write_data_block(disk, buff, size, offset);
@@ -299,6 +342,14 @@ static NSString* ComputeFileSha256(NSString* filePath, NSError** error) {
299
342
  #endif
300
343
  }
301
344
 
345
+ - (NSDictionary *)extractTarZst:(NSString *)sourcePath
346
+ targetPath:(NSString *)targetPath
347
+ force:(BOOL)force
348
+ progress:(SherpaOnnxArchiveProgressBlock)progress
349
+ {
350
+ return [self extractTarBz2:sourcePath targetPath:targetPath force:force progress:progress];
351
+ }
352
+
302
353
  - (NSString *)computeFileSha256:(NSString *)filePath
303
354
  error:(NSError * _Nullable * _Nullable)error
304
355
  {
@@ -39,6 +39,13 @@
39
39
  #include <string>
40
40
  #include <vector>
41
41
 
42
+ #if defined(__APPLE__)
43
+ #include <Foundation/Foundation.h>
44
+ #define TTS_DETECT_LOGI(fmt, ...) NSLog(@"[TtsModelDetect] " fmt, ##__VA_ARGS__)
45
+ #else
46
+ #define TTS_DETECT_LOGI(fmt, ...) ((void)0)
47
+ #endif
48
+
42
49
  namespace sherpaonnx {
43
50
  namespace {
44
51
 
@@ -55,8 +62,8 @@ TtsModelKind ParseTtsModelType(const std::string& modelType) {
55
62
  }
56
63
 
57
64
  /** Returns true if the given kind is supported by the current paths and hints (required files present).
58
- * data_dir (espeak-ng-data) is required only for Kitten and Kokoro (sherpa-onnx config Validate());
59
- * VITS, Matcha, Zipvoice use it optionally; Pocket does not use it. */
65
+ * data_dir (espeak-ng-data) is required for Kitten, Kokoro, and Zipvoice (Zipvoice uses MatchaTtsLexicon + espeak).
66
+ * VITS and Matcha use dataDir optionally in this detector; Pocket does not use it. */
60
67
  static bool CapabilitySupportsTtsKind(
61
68
  TtsModelKind kind,
62
69
  bool hasVits,
@@ -132,6 +139,10 @@ TtsDetectResult DetectTtsModel(const std::string& modelDir, const std::string& m
132
139
  std::string tokensFile = FindFileByName(files, "tokens.txt");
133
140
  std::vector<LexiconCandidate> lexiconCandidates = FindLexiconCandidates(files, modelDir);
134
141
  std::string dataDirPath = FindDirectoryUnderRoot(files, modelDir, "espeak-ng-data");
142
+ TTS_DETECT_LOGI("DetectTtsModel: modelDir=%s espeak-ng dataDir=%s (empty=%d)",
143
+ modelDir.c_str(),
144
+ dataDirPath.empty() ? "(empty)" : dataDirPath.c_str(),
145
+ (int)dataDirPath.empty());
135
146
  std::string voicesFile = FindFileByName(files, "voices.bin");
136
147
 
137
148
  std::string acousticModel = FindOnnxByAnyToken(files, {"acoustic_model", "acoustic-model"}, std::nullopt);
@@ -55,8 +55,8 @@ static const TtsFieldRequirement kZipvoiceReqs[] = {
55
55
  {"decoder", &TtsModelPaths::decoder, true},
56
56
  {"vocoder", &TtsModelPaths::vocoder, true},
57
57
  {"tokens", &TtsModelPaths::tokens, true},
58
- {"dataDir", &TtsModelPaths::dataDir, false},
59
- {"lexicon", &TtsModelPaths::lexicon, false},
58
+ {"dataDir", &TtsModelPaths::dataDir, true},
59
+ {"lexicon", &TtsModelPaths::lexicon, true},
60
60
  };
61
61
 
62
62
  // ============================================================
@@ -102,6 +102,8 @@ static const char* GetFieldHint(const char* fieldName) {
102
102
  return "Copy espeak-ng-data into the model directory.";
103
103
  if (std::strcmp(fieldName, "tokens") == 0)
104
104
  return "Ensure tokens.txt is present in the model directory.";
105
+ if (std::strcmp(fieldName, "lexicon") == 0)
106
+ return "Add lexicon.txt (or lexicon-<lang>.txt) from the official sherpa-onnx Zipvoice/Matcha release; without it the native engine aborts.";
105
107
  return nullptr;
106
108
  }
107
109
 
@@ -49,6 +49,7 @@ public:
49
49
  const std::string& provider,
50
50
  const std::string& ruleFsts,
51
51
  const std::string& ruleFars,
52
+ float dither,
52
53
  float blankPenalty,
53
54
  bool debug,
54
55
  bool rule1MustContainNonSilence,
@@ -103,6 +103,7 @@ OnlineSttInitResult OnlineSttWrapper::initialize(
103
103
  const std::string& provider,
104
104
  const std::string& ruleFsts,
105
105
  const std::string& ruleFars,
106
+ float dither,
106
107
  float blankPenalty,
107
108
  bool debug,
108
109
  // NOTE: rule*MustContainNonSilence, rule1/2MinUtteranceLength, and
@@ -138,6 +139,9 @@ OnlineSttInitResult OnlineSttWrapper::initialize(
138
139
  sherpa_onnx::cxx::OnlineRecognizerConfig config;
139
140
  config.feat_config.sample_rate = 16000;
140
141
  config.feat_config.feature_dim = 80;
142
+ // Dither is not exposed on cxx::FeatureConfig in the bundled sherpa-onnx headers;
143
+ // Android applies it via JNI. iOS uses the library default (no dither from JS).
144
+ (void)dither;
141
145
  config.decoding_method = decodingMethod.empty() ? "greedy_search" : decodingMethod;
142
146
  config.max_active_paths = maxActivePaths;
143
147
  config.enable_endpoint = enableEndpoint;
@@ -2,21 +2,35 @@
2
2
  #define SHERPA_ONNX_TTS_WRAPPER_H
3
3
 
4
4
  #include "sherpa-onnx-common.h"
5
+ #include "sherpa-onnx-model-detect.h"
5
6
  #include <cstdint>
6
7
  #include <functional>
7
8
  #include <memory>
8
9
  #include <optional>
9
10
  #include <string>
11
+ #include <unordered_map>
10
12
  #include <vector>
11
13
 
12
14
  namespace sherpaonnx {
13
15
 
16
+ /** Voice cloning / zero-shot options for Zipvoice and Pocket (matches JS referenceAudio + referenceSampleRate + optional fields). */
17
+ struct VoiceCloneOptions {
18
+ std::vector<float> reference_audio;
19
+ int32_t reference_sample_rate = 0;
20
+ std::string reference_text;
21
+ int32_t num_steps = 5;
22
+ float silence_scale = 0.2f;
23
+ std::unordered_map<std::string, std::string> extra;
24
+ };
25
+
14
26
  /**
15
27
  * Result of TTS initialization.
16
28
  */
17
29
  struct TtsInitializeResult {
18
30
  bool success;
19
31
  std::vector<DetectedModel> detectedModels; // List of detected models with type and path
32
+ /** When success is false, optional error message (e.g. from DetectTtsModel or OfflineTts::Create). */
33
+ std::string error;
20
34
  };
21
35
 
22
36
  /**
@@ -59,6 +73,17 @@ public:
59
73
  float speed = 1.0f
60
74
  );
61
75
 
76
+ /**
77
+ * When cloning is set (non-empty reference_audio and reference_sample_rate > 0), calls
78
+ * OfflineTts::Generate(text, GenerationConfig). Otherwise same as generate(text, sid, speed).
79
+ */
80
+ AudioResult generate(
81
+ const std::string& text,
82
+ int32_t sid,
83
+ float speed,
84
+ const std::optional<VoiceCloneOptions>& cloning
85
+ );
86
+
62
87
  bool generateStream(
63
88
  const std::string& text,
64
89
  int32_t sid,
@@ -66,6 +91,15 @@ public:
66
91
  const TtsStreamCallback& callback
67
92
  );
68
93
 
94
+ /** Pocket: streaming with reference audio. Zipvoice + cloning is not supported (match Android). */
95
+ bool generateStream(
96
+ const std::string& text,
97
+ int32_t sid,
98
+ float speed,
99
+ const TtsStreamCallback& callback,
100
+ const std::optional<VoiceCloneOptions>& cloning
101
+ );
102
+
69
103
  static bool saveToWavFile(
70
104
  const std::vector<float>& samples,
71
105
  int32_t sampleRate,
@@ -78,6 +112,9 @@ public:
78
112
 
79
113
  bool isInitialized() const;
80
114
 
115
+ /** Model kind from last successful initialize() (for voice-cloning validation). */
116
+ TtsModelKind getModelKind() const;
117
+
81
118
  void release();
82
119
 
83
120
  private: