react-native-sherpa-onnx 0.3.5 → 0.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -0
- package/README.md +90 -21
- package/SherpaOnnx.podspec +3 -0
- package/THIRD_PARTY_LICENSES/README.md +62 -0
- package/THIRD_PARTY_LICENSES/ffmpeg.txt +502 -0
- package/THIRD_PARTY_LICENSES/libarchive.txt +65 -0
- package/THIRD_PARTY_LICENSES/nvidia_omla.txt +181 -0
- package/THIRD_PARTY_LICENSES/onnxruntime.txt +21 -0
- package/THIRD_PARTY_LICENSES/opus.txt +44 -0
- package/THIRD_PARTY_LICENSES/sherpa-onnx.txt +201 -0
- package/THIRD_PARTY_LICENSES/shine.txt +482 -0
- package/THIRD_PARTY_LICENSES/zstd.txt +30 -0
- package/android/build.gradle +7 -3
- package/android/prebuilt-download.gradle +345 -153
- package/android/prebuilt-versions.gradle +2 -2
- package/android/src/main/assets/model_licenses/asr-models-license-status.csv +409 -0
- package/android/src/main/assets/model_licenses/qnn-asr-models-license-status.csv +695 -0
- package/android/src/main/assets/model_licenses/tts-models-license-status.csv +596 -0
- package/android/src/main/cpp/CMakeLists.txt +28 -10
- package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.cpp +306 -6
- package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.h +33 -4
- package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-jni.cpp +266 -7
- package/android/src/main/cpp/jni/audio/sherpa-onnx-audio-convert-jni.cpp +268 -2
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +6 -2
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-tts.cpp +4 -2
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxArchiveHelper.kt +137 -7
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxAssetHelper.kt +51 -6
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +159 -0
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxOnlineSttHelper.kt +4 -1
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +112 -97
- package/ios/Resources/model_licenses/asr-models-license-status.csv +409 -0
- package/ios/Resources/model_licenses/qnn-asr-models-license-status.csv +695 -0
- package/ios/Resources/model_licenses/tts-models-license-status.csv +596 -0
- package/ios/SherpaOnnx+OnlineSTT.mm +2 -0
- package/ios/SherpaOnnx+PcmLiveStream.mm +2 -29
- package/ios/SherpaOnnx+TTS.mm +178 -20
- package/ios/SherpaOnnx.mm +108 -1
- package/ios/SherpaOnnxAudioConvert.h +10 -0
- package/ios/SherpaOnnxAudioConvert.mm +257 -1
- package/ios/archive/sherpa-onnx-archive-helper.h +10 -0
- package/ios/archive/sherpa-onnx-archive-helper.mm +56 -5
- package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +13 -2
- package/ios/model_detect/sherpa-onnx-validate-tts.mm +4 -2
- package/ios/online_stt/sherpa-onnx-online-stt-wrapper.h +1 -0
- package/ios/online_stt/sherpa-onnx-online-stt-wrapper.mm +4 -0
- package/ios/tts/sherpa-onnx-tts-wrapper.h +37 -0
- package/ios/tts/sherpa-onnx-tts-wrapper.mm +149 -3
- package/lib/module/NativeSherpaOnnx.js.map +1 -1
- package/lib/module/audio/index.js +8 -0
- package/lib/module/audio/index.js.map +1 -1
- package/lib/module/download/ModelDownloadManager.js +10 -929
- package/lib/module/download/ModelDownloadManager.js.map +1 -1
- package/lib/module/download/activeModelOperations.js +26 -0
- package/lib/module/download/activeModelOperations.js.map +1 -0
- package/lib/module/download/background-downloader.d.js +2 -0
- package/lib/module/download/background-downloader.d.js.map +1 -0
- package/lib/module/download/bulkPurge.js +72 -0
- package/lib/module/download/bulkPurge.js.map +1 -0
- package/lib/module/download/checksumPrompt.js +19 -0
- package/lib/module/download/checksumPrompt.js.map +1 -0
- package/lib/module/download/constants.js +7 -0
- package/lib/module/download/constants.js.map +1 -0
- package/lib/module/download/downloadEvents.js +35 -0
- package/lib/module/download/downloadEvents.js.map +1 -0
- package/lib/module/download/downloadTask.js +385 -0
- package/lib/module/download/downloadTask.js.map +1 -0
- package/lib/module/download/ensureModel.js +89 -0
- package/lib/module/download/ensureModel.js.map +1 -0
- package/lib/module/download/index.js +4 -3
- package/lib/module/download/index.js.map +1 -1
- package/lib/module/download/localModels.js +151 -0
- package/lib/module/download/localModels.js.map +1 -0
- package/lib/module/download/modelExtraction.js +174 -0
- package/lib/module/download/modelExtraction.js.map +1 -0
- package/lib/module/download/paths.js +98 -0
- package/lib/module/download/paths.js.map +1 -0
- package/lib/module/download/postDownloadProcessing.js +206 -0
- package/lib/module/download/postDownloadProcessing.js.map +1 -0
- package/lib/module/download/protectedModelKeys.js +31 -0
- package/lib/module/download/protectedModelKeys.js.map +1 -0
- package/lib/module/download/registry.js +267 -0
- package/lib/module/download/registry.js.map +1 -0
- package/lib/module/download/retry.js +59 -0
- package/lib/module/download/retry.js.map +1 -0
- package/lib/module/download/types.js +17 -0
- package/lib/module/download/types.js.map +1 -0
- package/lib/module/download/validation.js +101 -5
- package/lib/module/download/validation.js.map +1 -1
- package/lib/module/{download → extraction}/extractTarBz2.js +3 -1
- package/lib/module/extraction/extractTarBz2.js.map +1 -0
- package/lib/module/extraction/extractTarZst.js +54 -0
- package/lib/module/extraction/extractTarZst.js.map +1 -0
- package/lib/module/extraction/index.js +190 -0
- package/lib/module/extraction/index.js.map +1 -0
- package/lib/module/extraction/types.js +2 -0
- package/lib/module/extraction/types.js.map +1 -0
- package/lib/module/index.js +2 -1
- package/lib/module/index.js.map +1 -1
- package/lib/module/licenses.js +63 -0
- package/lib/module/licenses.js.map +1 -0
- package/lib/module/stt/index.js +16 -2
- package/lib/module/stt/index.js.map +1 -1
- package/lib/module/stt/streaming.js +2 -0
- package/lib/module/stt/streaming.js.map +1 -1
- package/lib/module/stt/streamingTypes.js.map +1 -1
- package/lib/module/stt/types.js.map +1 -1
- package/lib/module/tts/index.js +20 -2
- package/lib/module/tts/index.js.map +1 -1
- package/lib/module/tts/streaming.js +4 -0
- package/lib/module/tts/streaming.js.map +1 -1
- package/lib/module/tts/types.js.map +1 -1
- package/lib/module/utils.js +16 -1
- package/lib/module/utils.js.map +1 -1
- package/lib/typescript/src/NativeSherpaOnnx.d.ts +72 -5
- package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
- package/lib/typescript/src/audio/index.d.ts +10 -0
- package/lib/typescript/src/audio/index.d.ts.map +1 -1
- package/lib/typescript/src/download/ModelDownloadManager.d.ts +10 -108
- package/lib/typescript/src/download/ModelDownloadManager.d.ts.map +1 -1
- package/lib/typescript/src/download/activeModelOperations.d.ts +6 -0
- package/lib/typescript/src/download/activeModelOperations.d.ts.map +1 -0
- package/lib/typescript/src/download/bulkPurge.d.ts +14 -0
- package/lib/typescript/src/download/bulkPurge.d.ts.map +1 -0
- package/lib/typescript/src/download/checksumPrompt.d.ts +3 -0
- package/lib/typescript/src/download/checksumPrompt.d.ts.map +1 -0
- package/lib/typescript/src/download/constants.d.ts +5 -0
- package/lib/typescript/src/download/constants.d.ts.map +1 -0
- package/lib/typescript/src/download/downloadEvents.d.ts +6 -0
- package/lib/typescript/src/download/downloadEvents.d.ts.map +1 -0
- package/lib/typescript/src/download/downloadTask.d.ts +20 -0
- package/lib/typescript/src/download/downloadTask.d.ts.map +1 -0
- package/lib/typescript/src/download/ensureModel.d.ts +26 -0
- package/lib/typescript/src/download/ensureModel.d.ts.map +1 -0
- package/lib/typescript/src/download/index.d.ts +7 -5
- package/lib/typescript/src/download/index.d.ts.map +1 -1
- package/lib/typescript/src/download/localModels.d.ts +15 -0
- package/lib/typescript/src/download/localModels.d.ts.map +1 -0
- package/lib/typescript/src/download/modelExtraction.d.ts +36 -0
- package/lib/typescript/src/download/modelExtraction.d.ts.map +1 -0
- package/lib/typescript/src/download/paths.d.ts +28 -0
- package/lib/typescript/src/download/paths.d.ts.map +1 -0
- package/lib/typescript/src/download/postDownloadProcessing.d.ts +19 -0
- package/lib/typescript/src/download/postDownloadProcessing.d.ts.map +1 -0
- package/lib/typescript/src/download/protectedModelKeys.d.ts +6 -0
- package/lib/typescript/src/download/protectedModelKeys.d.ts.map +1 -0
- package/lib/typescript/src/download/registry.d.ts +14 -0
- package/lib/typescript/src/download/registry.d.ts.map +1 -0
- package/lib/typescript/src/download/retry.d.ts +15 -0
- package/lib/typescript/src/download/retry.d.ts.map +1 -0
- package/lib/typescript/src/download/types.d.ts +96 -0
- package/lib/typescript/src/download/types.d.ts.map +1 -0
- package/lib/typescript/src/download/validation.d.ts +19 -0
- package/lib/typescript/src/download/validation.d.ts.map +1 -1
- package/lib/typescript/src/extraction/extractTarBz2.d.ts.map +1 -0
- package/lib/typescript/src/extraction/extractTarZst.d.ts +14 -0
- package/lib/typescript/src/extraction/extractTarZst.d.ts.map +1 -0
- package/lib/typescript/src/extraction/index.d.ts +50 -0
- package/lib/typescript/src/extraction/index.d.ts.map +1 -0
- package/lib/typescript/src/extraction/types.d.ts +60 -0
- package/lib/typescript/src/extraction/types.d.ts.map +1 -0
- package/lib/typescript/src/index.d.ts +1 -0
- package/lib/typescript/src/index.d.ts.map +1 -1
- package/lib/typescript/src/licenses.d.ts +10 -0
- package/lib/typescript/src/licenses.d.ts.map +1 -0
- package/lib/typescript/src/stt/index.d.ts +4 -1
- package/lib/typescript/src/stt/index.d.ts.map +1 -1
- package/lib/typescript/src/stt/streaming.d.ts.map +1 -1
- package/lib/typescript/src/stt/streamingTypes.d.ts +5 -0
- package/lib/typescript/src/stt/streamingTypes.d.ts.map +1 -1
- package/lib/typescript/src/stt/types.d.ts +3 -1
- package/lib/typescript/src/stt/types.d.ts.map +1 -1
- package/lib/typescript/src/tts/index.d.ts +3 -1
- package/lib/typescript/src/tts/index.d.ts.map +1 -1
- package/lib/typescript/src/tts/streaming.d.ts.map +1 -1
- package/lib/typescript/src/tts/types.d.ts +6 -5
- package/lib/typescript/src/tts/types.d.ts.map +1 -1
- package/lib/typescript/src/utils.d.ts +5 -0
- package/lib/typescript/src/utils.d.ts.map +1 -1
- package/package.json +11 -1
- package/scripts/{check-model-csvs.sh → ci/check-model-csvs.sh} +9 -2
- package/scripts/ci/collect_all_sherpa_model_streams.sh +101 -0
- package/scripts/ci/collect_one_sherpa_release_stream.sh +189 -0
- package/scripts/ci/sherpa_asr_model_release_streams.json +21 -0
- package/scripts/ci/sherpa_tts_model_release_streams.json +13 -0
- package/scripts/ci/update_model_license_csv.sh +765 -0
- package/scripts/setup-ios-framework.sh +14 -11
- package/scripts/update_commercial_use.js +73 -0
- package/src/NativeSherpaOnnx.ts +92 -5
- package/src/audio/index.ts +20 -0
- package/src/download/ModelDownloadManager.ts +55 -1343
- package/src/download/activeModelOperations.ts +38 -0
- package/src/download/background-downloader.d.ts +43 -0
- package/src/download/bulkPurge.ts +102 -0
- package/src/download/checksumPrompt.ts +25 -0
- package/src/download/constants.ts +5 -0
- package/src/download/downloadEvents.ts +55 -0
- package/src/download/downloadTask.ts +497 -0
- package/src/download/ensureModel.ts +124 -0
- package/src/download/index.ts +19 -2
- package/src/download/localModels.ts +234 -0
- package/src/download/modelExtraction.ts +244 -0
- package/src/download/paths.ts +134 -0
- package/src/download/postDownloadProcessing.ts +292 -0
- package/src/download/protectedModelKeys.ts +30 -0
- package/src/download/registry.ts +404 -0
- package/src/download/retry.ts +76 -0
- package/src/download/types.ts +120 -0
- package/src/download/validation.ts +114 -8
- package/src/{download → extraction}/extractTarBz2.ts +3 -1
- package/src/extraction/extractTarZst.ts +79 -0
- package/src/extraction/index.ts +269 -0
- package/src/extraction/types.ts +63 -0
- package/src/index.tsx +2 -0
- package/src/licenses.ts +100 -0
- package/src/stt/index.ts +20 -2
- package/src/stt/streaming.ts +3 -0
- package/src/stt/streamingTypes.ts +5 -0
- package/src/stt/types.ts +3 -1
- package/src/tts/index.ts +30 -2
- package/src/tts/streaming.ts +10 -0
- package/src/tts/types.ts +6 -5
- package/src/utils.ts +22 -1
- package/third_party/libarchive_prebuilt/ANDROID_RELEASE_TAG +1 -1
- package/third_party/libarchive_prebuilt/IOS_RELEASE_TAG +1 -1
- package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -1
- package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -1
- package/android/src/main/cpp/jni/tts/sherpa-onnx-tts-zipvoice-jni.cpp +0 -301
- package/android/src/main/java/com/sherpaonnx/ZipvoiceTtsWrapper.kt +0 -187
- package/lib/module/download/extractTarBz2.js.map +0 -1
- package/lib/typescript/src/download/extractTarBz2.d.ts.map +0 -1
- package/scripts/check-qnn-support.sh +0 -78
- /package/lib/typescript/src/{download → extraction}/extractTarBz2.d.ts +0 -0
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
#import <React/RCTLog.h>
|
|
3
3
|
#include <string>
|
|
4
4
|
#include <sys/stat.h>
|
|
5
|
+
#include <vector>
|
|
5
6
|
|
|
6
7
|
#ifdef HAVE_FFMPEG
|
|
7
8
|
extern "C" {
|
|
@@ -12,11 +13,14 @@ extern "C" {
|
|
|
12
13
|
#include <libswresample/swresample.h>
|
|
13
14
|
}
|
|
14
15
|
#include <cstdio>
|
|
15
|
-
#include <vector>
|
|
16
16
|
#endif
|
|
17
17
|
|
|
18
18
|
// Forward declaration — convertToFormat handles all formats including WAV (16 kHz mono).
|
|
19
19
|
static std::string convertToFormat(const char* inputPath, const char* outputPath, const char* formatHint, int outputSampleRateHz);
|
|
20
|
+
static std::string decodeAudioFileToFloatMono(const char* inputPath,
|
|
21
|
+
int targetSampleRateHz,
|
|
22
|
+
std::vector<float>* outSamples,
|
|
23
|
+
int* outSampleRate);
|
|
20
24
|
|
|
21
25
|
// Convenience: convert any audio to 16 kHz mono WAV via the main convertToFormat pipeline.
|
|
22
26
|
static std::string convertToWav16kMono(const char* inputPath, const char* outputPath) {
|
|
@@ -659,6 +663,222 @@ static std::string convertToFormat(const char* inputPath, const char* outputPath
|
|
|
659
663
|
#endif
|
|
660
664
|
}
|
|
661
665
|
|
|
666
|
+
static std::string decodeAudioFileToFloatMono(const char* inputPath,
|
|
667
|
+
int targetSampleRateHz,
|
|
668
|
+
std::vector<float>* outSamples,
|
|
669
|
+
int* outSampleRate) {
|
|
670
|
+
outSamples->clear();
|
|
671
|
+
*outSampleRate = 0;
|
|
672
|
+
#ifndef HAVE_FFMPEG
|
|
673
|
+
(void)inputPath;
|
|
674
|
+
(void)targetSampleRateHz;
|
|
675
|
+
return std::string("FFmpeg not available. Build prebuilts with third_party/ffmpeg_prebuilt/build_ffmpeg_ios.sh.");
|
|
676
|
+
#else
|
|
677
|
+
if (!inputPath) {
|
|
678
|
+
return std::string("inputPath is null");
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
AVFormatContext* inFmt = nullptr;
|
|
682
|
+
if (avformat_open_input(&inFmt, inputPath, nullptr, nullptr) < 0) {
|
|
683
|
+
return std::string("Failed to open input file");
|
|
684
|
+
}
|
|
685
|
+
if (avformat_find_stream_info(inFmt, nullptr) < 0) {
|
|
686
|
+
avformat_close_input(&inFmt);
|
|
687
|
+
return std::string("Failed to find stream info");
|
|
688
|
+
}
|
|
689
|
+
|
|
690
|
+
int audioStreamIndex = -1;
|
|
691
|
+
for (unsigned i = 0; i < inFmt->nb_streams; ++i) {
|
|
692
|
+
if (inFmt->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
|
|
693
|
+
audioStreamIndex = (int)i;
|
|
694
|
+
break;
|
|
695
|
+
}
|
|
696
|
+
}
|
|
697
|
+
if (audioStreamIndex < 0) {
|
|
698
|
+
avformat_close_input(&inFmt);
|
|
699
|
+
return std::string("No audio stream found in input");
|
|
700
|
+
}
|
|
701
|
+
|
|
702
|
+
AVStream* inStream = inFmt->streams[audioStreamIndex];
|
|
703
|
+
const AVCodec* decoder = avcodec_find_decoder(inStream->codecpar->codec_id);
|
|
704
|
+
if (!decoder) {
|
|
705
|
+
avformat_close_input(&inFmt);
|
|
706
|
+
return std::string("Unsupported input codec");
|
|
707
|
+
}
|
|
708
|
+
|
|
709
|
+
AVCodecContext* decCtx = avcodec_alloc_context3(decoder);
|
|
710
|
+
if (!decCtx) {
|
|
711
|
+
avformat_close_input(&inFmt);
|
|
712
|
+
return std::string("Failed to allocate decoder context");
|
|
713
|
+
}
|
|
714
|
+
if (avcodec_parameters_to_context(decCtx, inStream->codecpar) < 0) {
|
|
715
|
+
avcodec_free_context(&decCtx);
|
|
716
|
+
avformat_close_input(&inFmt);
|
|
717
|
+
return std::string("Failed to copy codec parameters");
|
|
718
|
+
}
|
|
719
|
+
if (avcodec_open2(decCtx, decoder, nullptr) < 0) {
|
|
720
|
+
avcodec_free_context(&decCtx);
|
|
721
|
+
avformat_close_input(&inFmt);
|
|
722
|
+
return std::string("Failed to open decoder");
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
int in_sr = decCtx->sample_rate;
|
|
726
|
+
if (inStream->codecpar->sample_rate > 0) {
|
|
727
|
+
in_sr = inStream->codecpar->sample_rate;
|
|
728
|
+
}
|
|
729
|
+
if (in_sr <= 0) {
|
|
730
|
+
avcodec_free_context(&decCtx);
|
|
731
|
+
avformat_close_input(&inFmt);
|
|
732
|
+
return std::string("Invalid input sample rate");
|
|
733
|
+
}
|
|
734
|
+
|
|
735
|
+
int out_sr = (targetSampleRateHz > 0) ? targetSampleRateHz : in_sr;
|
|
736
|
+
if (out_sr <= 0) {
|
|
737
|
+
avcodec_free_context(&decCtx);
|
|
738
|
+
avformat_close_input(&inFmt);
|
|
739
|
+
return std::string("Invalid output sample rate");
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
AVChannelLayout in_layout{};
|
|
743
|
+
if (inStream->codecpar->ch_layout.nb_channels > 0) {
|
|
744
|
+
if (av_channel_layout_copy(&in_layout, &inStream->codecpar->ch_layout) < 0) {
|
|
745
|
+
avcodec_free_context(&decCtx);
|
|
746
|
+
avformat_close_input(&inFmt);
|
|
747
|
+
return std::string("Failed to copy input channel layout");
|
|
748
|
+
}
|
|
749
|
+
} else {
|
|
750
|
+
if (av_channel_layout_copy(&in_layout, &decCtx->ch_layout) < 0) {
|
|
751
|
+
avcodec_free_context(&decCtx);
|
|
752
|
+
avformat_close_input(&inFmt);
|
|
753
|
+
return std::string("Failed to get decoder channel layout");
|
|
754
|
+
}
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
AVChannelLayout out_layout = AV_CHANNEL_LAYOUT_MONO;
|
|
758
|
+
SwrContext* swr = nullptr;
|
|
759
|
+
if (swr_alloc_set_opts2(&swr,
|
|
760
|
+
&out_layout,
|
|
761
|
+
AV_SAMPLE_FMT_FLT,
|
|
762
|
+
out_sr,
|
|
763
|
+
&in_layout,
|
|
764
|
+
decCtx->sample_fmt,
|
|
765
|
+
in_sr,
|
|
766
|
+
0,
|
|
767
|
+
nullptr) < 0 ||
|
|
768
|
+
!swr) {
|
|
769
|
+
av_channel_layout_uninit(&in_layout);
|
|
770
|
+
avcodec_free_context(&decCtx);
|
|
771
|
+
avformat_close_input(&inFmt);
|
|
772
|
+
return std::string("Failed to initialize resampler");
|
|
773
|
+
}
|
|
774
|
+
if (swr_init(swr) < 0) {
|
|
775
|
+
av_channel_layout_uninit(&in_layout);
|
|
776
|
+
swr_free(&swr);
|
|
777
|
+
avcodec_free_context(&decCtx);
|
|
778
|
+
avformat_close_input(&inFmt);
|
|
779
|
+
return std::string("Failed to initialize resampler (swr_init)");
|
|
780
|
+
}
|
|
781
|
+
av_channel_layout_uninit(&in_layout);
|
|
782
|
+
|
|
783
|
+
AVPacket* pkt = av_packet_alloc();
|
|
784
|
+
AVFrame* frame = av_frame_alloc();
|
|
785
|
+
if (!pkt || !frame) {
|
|
786
|
+
if (pkt) av_packet_free(&pkt);
|
|
787
|
+
if (frame) av_frame_free(&frame);
|
|
788
|
+
swr_free(&swr);
|
|
789
|
+
avcodec_free_context(&decCtx);
|
|
790
|
+
avformat_close_input(&inFmt);
|
|
791
|
+
return std::string("Out of memory");
|
|
792
|
+
}
|
|
793
|
+
|
|
794
|
+
auto appendConverted = [&](uint8_t* buf, int nbFloats) {
|
|
795
|
+
if (!buf || nbFloats <= 0) return;
|
|
796
|
+
const float* f = reinterpret_cast<const float*>(buf);
|
|
797
|
+
outSamples->insert(outSamples->end(), f, f + nbFloats);
|
|
798
|
+
};
|
|
799
|
+
|
|
800
|
+
auto convertOneFrame = [&](AVFrame* fr) {
|
|
801
|
+
// Copy plane pointers so we can pass const uint8_t** to swr_convert without
|
|
802
|
+
// reinterpret_cast(uint8_t** -> const uint8_t**), which triggers -Wcast-qual.
|
|
803
|
+
uint8_t** src = fr->extended_data ? fr->extended_data : fr->data;
|
|
804
|
+
int nplanes = fr->ch_layout.nb_channels;
|
|
805
|
+
if (nplanes <= 0) nplanes = AV_NUM_DATA_POINTERS;
|
|
806
|
+
|
|
807
|
+
const uint8_t* in_stack[AV_NUM_DATA_POINTERS] = {};
|
|
808
|
+
std::vector<const uint8_t*> in_heap;
|
|
809
|
+
const uint8_t** in_arg;
|
|
810
|
+
if (nplanes > AV_NUM_DATA_POINTERS) {
|
|
811
|
+
in_heap.resize(static_cast<size_t>(nplanes));
|
|
812
|
+
for (int i = 0; i < nplanes; ++i) {
|
|
813
|
+
in_heap[static_cast<size_t>(i)] = src[i];
|
|
814
|
+
}
|
|
815
|
+
in_arg = in_heap.data();
|
|
816
|
+
} else {
|
|
817
|
+
for (int i = 0; i < nplanes; ++i) {
|
|
818
|
+
in_stack[i] = src[i];
|
|
819
|
+
}
|
|
820
|
+
in_arg = in_stack;
|
|
821
|
+
}
|
|
822
|
+
|
|
823
|
+
int in_sr2 = inStream->codecpar->sample_rate ? inStream->codecpar->sample_rate : decCtx->sample_rate;
|
|
824
|
+
int64_t max_out =
|
|
825
|
+
av_rescale_rnd(swr_get_delay(swr, in_sr2) + (int64_t)fr->nb_samples, out_sr, in_sr2, AV_ROUND_UP);
|
|
826
|
+
if (max_out < 1) max_out = 1;
|
|
827
|
+
uint8_t* out_buf = nullptr;
|
|
828
|
+
if (av_samples_alloc(&out_buf, nullptr, 1, (int)max_out, AV_SAMPLE_FMT_FLT, 0) < 0) {
|
|
829
|
+
return;
|
|
830
|
+
}
|
|
831
|
+
int converted = swr_convert(swr, &out_buf, (int)max_out, in_arg, fr->nb_samples);
|
|
832
|
+
if (converted > 0) {
|
|
833
|
+
appendConverted(out_buf, converted);
|
|
834
|
+
}
|
|
835
|
+
av_freep(&out_buf);
|
|
836
|
+
};
|
|
837
|
+
|
|
838
|
+
while (av_read_frame(inFmt, pkt) >= 0) {
|
|
839
|
+
if (pkt->stream_index == audioStreamIndex) {
|
|
840
|
+
if (avcodec_send_packet(decCtx, pkt) == 0) {
|
|
841
|
+
while (avcodec_receive_frame(decCtx, frame) == 0) {
|
|
842
|
+
convertOneFrame(frame);
|
|
843
|
+
av_frame_unref(frame);
|
|
844
|
+
}
|
|
845
|
+
}
|
|
846
|
+
}
|
|
847
|
+
av_packet_unref(pkt);
|
|
848
|
+
}
|
|
849
|
+
|
|
850
|
+
if (avcodec_send_packet(decCtx, nullptr) == 0) {
|
|
851
|
+
while (avcodec_receive_frame(decCtx, frame) == 0) {
|
|
852
|
+
convertOneFrame(frame);
|
|
853
|
+
av_frame_unref(frame);
|
|
854
|
+
}
|
|
855
|
+
}
|
|
856
|
+
|
|
857
|
+
{
|
|
858
|
+
int in_sr2 = inStream->codecpar->sample_rate ? inStream->codecpar->sample_rate : decCtx->sample_rate;
|
|
859
|
+
int tailCap = (int)swr_get_delay(swr, in_sr2) + 4096;
|
|
860
|
+
if (tailCap < 16) tailCap = 16;
|
|
861
|
+
uint8_t* tailData = nullptr;
|
|
862
|
+
if (av_samples_alloc(&tailData, nullptr, 1, tailCap, AV_SAMPLE_FMT_FLT, 0) >= 0) {
|
|
863
|
+
int tailConverted = swr_convert(swr, &tailData, tailCap, nullptr, 0);
|
|
864
|
+
if (tailConverted > 0) {
|
|
865
|
+
appendConverted(tailData, tailConverted);
|
|
866
|
+
}
|
|
867
|
+
av_freep(&tailData);
|
|
868
|
+
}
|
|
869
|
+
}
|
|
870
|
+
|
|
871
|
+
av_packet_free(&pkt);
|
|
872
|
+
av_frame_free(&frame);
|
|
873
|
+
swr_free(&swr);
|
|
874
|
+
avcodec_free_context(&decCtx);
|
|
875
|
+
avformat_close_input(&inFmt);
|
|
876
|
+
|
|
877
|
+
*outSampleRate = out_sr;
|
|
878
|
+
return std::string("");
|
|
879
|
+
#endif
|
|
880
|
+
}
|
|
881
|
+
|
|
662
882
|
@implementation SherpaOnnxAudioConvert
|
|
663
883
|
|
|
664
884
|
+ (BOOL)convertAudioToWav16k:(NSString *)inputPath
|
|
@@ -695,4 +915,40 @@ static std::string convertToFormat(const char* inputPath, const char* outputPath
|
|
|
695
915
|
return YES;
|
|
696
916
|
}
|
|
697
917
|
|
|
918
|
+
+ (BOOL)decodeAudioFileToFloatSamples:(NSString *)inputPath
|
|
919
|
+
targetSampleRateHz:(int)targetSampleRateHz
|
|
920
|
+
outSamples:(NSArray<NSNumber *> **)outSamples
|
|
921
|
+
outSampleRate:(int *)outSampleRate
|
|
922
|
+
error:(NSError **)error
|
|
923
|
+
{
|
|
924
|
+
if (!outSamples || !outSampleRate) {
|
|
925
|
+
if (error) {
|
|
926
|
+
*error = [NSError errorWithDomain:@"SherpaOnnxAudioConvert"
|
|
927
|
+
code:-2
|
|
928
|
+
userInfo:@{NSLocalizedDescriptionKey: @"outSamples/outSampleRate required"}];
|
|
929
|
+
}
|
|
930
|
+
return NO;
|
|
931
|
+
}
|
|
932
|
+
*outSamples = nil;
|
|
933
|
+
*outSampleRate = 0;
|
|
934
|
+
std::vector<float> v;
|
|
935
|
+
int sr = 0;
|
|
936
|
+
std::string err = decodeAudioFileToFloatMono(inputPath.UTF8String, targetSampleRateHz, &v, &sr);
|
|
937
|
+
if (!err.empty()) {
|
|
938
|
+
if (error) {
|
|
939
|
+
*error = [NSError errorWithDomain:@"SherpaOnnxAudioConvert"
|
|
940
|
+
code:-1
|
|
941
|
+
userInfo:@{NSLocalizedDescriptionKey: [NSString stringWithUTF8String:err.c_str()]}];
|
|
942
|
+
}
|
|
943
|
+
return NO;
|
|
944
|
+
}
|
|
945
|
+
NSMutableArray<NSNumber *> *arr = [NSMutableArray arrayWithCapacity:v.size()];
|
|
946
|
+
for (size_t i = 0; i < v.size(); ++i) {
|
|
947
|
+
[arr addObject:@(v[i])];
|
|
948
|
+
}
|
|
949
|
+
*outSamples = arr;
|
|
950
|
+
*outSampleRate = sr;
|
|
951
|
+
return YES;
|
|
952
|
+
}
|
|
953
|
+
|
|
698
954
|
@end
|
|
@@ -11,11 +11,21 @@ typedef void (^SherpaOnnxArchiveProgressBlock)(long long bytes, long long totalB
|
|
|
11
11
|
force:(BOOL)force
|
|
12
12
|
progress:(nullable SherpaOnnxArchiveProgressBlock)progress;
|
|
13
13
|
|
|
14
|
+
- (NSDictionary *)extractTarZst:(NSString *)sourcePath
|
|
15
|
+
targetPath:(NSString *)targetPath
|
|
16
|
+
force:(BOOL)force
|
|
17
|
+
progress:(nullable SherpaOnnxArchiveProgressBlock)progress;
|
|
18
|
+
|
|
14
19
|
- (nullable NSString *)computeFileSha256:(NSString *)filePath
|
|
15
20
|
error:(NSError * _Nullable * _Nullable)error;
|
|
16
21
|
|
|
17
22
|
+ (void)cancelExtractTarBz2;
|
|
18
23
|
|
|
24
|
+
+ (void)cancelExtractTarZst;
|
|
25
|
+
|
|
26
|
+
/** Cancel extraction for a specific source archive path (per-operation cancel for parallel extractions). */
|
|
27
|
+
+ (void)cancelExtractForPath:(NSString *)sourcePath;
|
|
28
|
+
|
|
19
29
|
@end
|
|
20
30
|
|
|
21
31
|
NS_ASSUME_NONNULL_END
|
|
@@ -14,9 +14,24 @@
|
|
|
14
14
|
#include <array>
|
|
15
15
|
#include <atomic>
|
|
16
16
|
#include <cstdio>
|
|
17
|
+
#include <mutex>
|
|
18
|
+
#include <set>
|
|
17
19
|
#include <string>
|
|
18
20
|
|
|
19
|
-
static std::
|
|
21
|
+
static std::mutex g_cancelMutex;
|
|
22
|
+
static std::set<std::string> g_cancelledPaths;
|
|
23
|
+
|
|
24
|
+
static bool isPathCancelled(const std::string& path) {
|
|
25
|
+
std::lock_guard<std::mutex> lock(g_cancelMutex);
|
|
26
|
+
// If the set contains an empty string, ALL extractions are cancelled (legacy global cancel).
|
|
27
|
+
return g_cancelledPaths.count("") > 0 || g_cancelledPaths.count(path) > 0;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
static void clearCancelForPath(const std::string& path) {
|
|
31
|
+
std::lock_guard<std::mutex> lock(g_cancelMutex);
|
|
32
|
+
g_cancelledPaths.erase(path);
|
|
33
|
+
g_cancelledPaths.erase(""); // Clear the global cancel flag too
|
|
34
|
+
}
|
|
20
35
|
|
|
21
36
|
namespace {
|
|
22
37
|
#ifdef HAVE_LIBARCHIVE
|
|
@@ -127,7 +142,31 @@ static NSString* ComputeFileSha256(NSString* filePath, NSError** error) {
|
|
|
127
142
|
+ (void)cancelExtractTarBz2
|
|
128
143
|
{
|
|
129
144
|
#ifdef HAVE_LIBARCHIVE
|
|
130
|
-
|
|
145
|
+
std::lock_guard<std::mutex> lock(g_cancelMutex);
|
|
146
|
+
g_cancelledPaths.insert(""); // Empty string = cancel ALL
|
|
147
|
+
#else
|
|
148
|
+
// feature disabled
|
|
149
|
+
#endif
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
+ (void)cancelExtractTarZst
|
|
153
|
+
{
|
|
154
|
+
#ifdef HAVE_LIBARCHIVE
|
|
155
|
+
std::lock_guard<std::mutex> lock(g_cancelMutex);
|
|
156
|
+
g_cancelledPaths.insert(""); // Empty string = cancel ALL
|
|
157
|
+
#else
|
|
158
|
+
// feature disabled
|
|
159
|
+
#endif
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
+ (void)cancelExtractForPath:(NSString *)sourcePath
|
|
163
|
+
{
|
|
164
|
+
#ifdef HAVE_LIBARCHIVE
|
|
165
|
+
std::string path = [sourcePath UTF8String] ?: "";
|
|
166
|
+
if (!path.empty()) {
|
|
167
|
+
std::lock_guard<std::mutex> lock(g_cancelMutex);
|
|
168
|
+
g_cancelledPaths.insert(path);
|
|
169
|
+
}
|
|
131
170
|
#else
|
|
132
171
|
// feature disabled
|
|
133
172
|
#endif
|
|
@@ -141,7 +180,8 @@ static NSString* ComputeFileSha256(NSString* filePath, NSError** error) {
|
|
|
141
180
|
#ifndef HAVE_LIBARCHIVE
|
|
142
181
|
return @{ @"success": @NO, @"reason": @"libarchive is disabled in this build. Rebuild without SHERPA_ONNX_DISABLE_LIBARCHIVE=1." };
|
|
143
182
|
#else
|
|
144
|
-
|
|
183
|
+
std::string sourcePathStr = [sourcePath UTF8String] ?: "";
|
|
184
|
+
clearCancelForPath(sourcePathStr);
|
|
145
185
|
NSFileManager *fileManager = [NSFileManager defaultManager];
|
|
146
186
|
|
|
147
187
|
if (![fileManager fileExistsAtPath:sourcePath]) {
|
|
@@ -174,6 +214,7 @@ static NSString* ComputeFileSha256(NSString* filePath, NSError** error) {
|
|
|
174
214
|
struct archive *archive = archive_read_new();
|
|
175
215
|
archive_read_support_format_tar(archive);
|
|
176
216
|
archive_read_support_filter_bzip2(archive);
|
|
217
|
+
archive_read_support_filter_zstd(archive);
|
|
177
218
|
|
|
178
219
|
ArchiveReadContext read_ctx;
|
|
179
220
|
read_ctx.file = fopen([sourcePath UTF8String], "rb");
|
|
@@ -203,10 +244,11 @@ static NSString* ComputeFileSha256(NSString* filePath, NSError** error) {
|
|
|
203
244
|
int lastPercent = -1;
|
|
204
245
|
long long lastEmitBytes = 0;
|
|
205
246
|
while ((result = archive_read_next_header(archive, &entry)) == ARCHIVE_OK) {
|
|
206
|
-
if (
|
|
247
|
+
if (isPathCancelled(sourcePathStr)) {
|
|
207
248
|
archive_read_free(archive);
|
|
208
249
|
archive_write_free(disk);
|
|
209
250
|
close_reader();
|
|
251
|
+
clearCancelForPath(sourcePathStr);
|
|
210
252
|
return @{ @"success": @NO, @"reason": @"Extraction cancelled" };
|
|
211
253
|
}
|
|
212
254
|
const char *currentPath = archive_entry_pathname(entry);
|
|
@@ -235,10 +277,11 @@ static NSString* ComputeFileSha256(NSString* filePath, NSError** error) {
|
|
|
235
277
|
size_t size = 0;
|
|
236
278
|
la_int64_t offset = 0;
|
|
237
279
|
while ((result = archive_read_data_block(archive, &buff, &size, &offset)) == ARCHIVE_OK) {
|
|
238
|
-
if (
|
|
280
|
+
if (isPathCancelled(sourcePathStr)) {
|
|
239
281
|
archive_read_free(archive);
|
|
240
282
|
archive_write_free(disk);
|
|
241
283
|
close_reader();
|
|
284
|
+
clearCancelForPath(sourcePathStr);
|
|
242
285
|
return @{ @"success": @NO, @"reason": @"Extraction cancelled" };
|
|
243
286
|
}
|
|
244
287
|
la_ssize_t writeResult = archive_write_data_block(disk, buff, size, offset);
|
|
@@ -299,6 +342,14 @@ static NSString* ComputeFileSha256(NSString* filePath, NSError** error) {
|
|
|
299
342
|
#endif
|
|
300
343
|
}
|
|
301
344
|
|
|
345
|
+
- (NSDictionary *)extractTarZst:(NSString *)sourcePath
|
|
346
|
+
targetPath:(NSString *)targetPath
|
|
347
|
+
force:(BOOL)force
|
|
348
|
+
progress:(SherpaOnnxArchiveProgressBlock)progress
|
|
349
|
+
{
|
|
350
|
+
return [self extractTarBz2:sourcePath targetPath:targetPath force:force progress:progress];
|
|
351
|
+
}
|
|
352
|
+
|
|
302
353
|
- (NSString *)computeFileSha256:(NSString *)filePath
|
|
303
354
|
error:(NSError * _Nullable * _Nullable)error
|
|
304
355
|
{
|
|
@@ -39,6 +39,13 @@
|
|
|
39
39
|
#include <string>
|
|
40
40
|
#include <vector>
|
|
41
41
|
|
|
42
|
+
#if defined(__APPLE__)
|
|
43
|
+
#include <Foundation/Foundation.h>
|
|
44
|
+
#define TTS_DETECT_LOGI(fmt, ...) NSLog(@"[TtsModelDetect] " fmt, ##__VA_ARGS__)
|
|
45
|
+
#else
|
|
46
|
+
#define TTS_DETECT_LOGI(fmt, ...) ((void)0)
|
|
47
|
+
#endif
|
|
48
|
+
|
|
42
49
|
namespace sherpaonnx {
|
|
43
50
|
namespace {
|
|
44
51
|
|
|
@@ -55,8 +62,8 @@ TtsModelKind ParseTtsModelType(const std::string& modelType) {
|
|
|
55
62
|
}
|
|
56
63
|
|
|
57
64
|
/** Returns true if the given kind is supported by the current paths and hints (required files present).
|
|
58
|
-
* data_dir (espeak-ng-data) is required
|
|
59
|
-
* VITS
|
|
65
|
+
* data_dir (espeak-ng-data) is required for Kitten, Kokoro, and Zipvoice (Zipvoice uses MatchaTtsLexicon + espeak).
|
|
66
|
+
* VITS and Matcha use dataDir optionally in this detector; Pocket does not use it. */
|
|
60
67
|
static bool CapabilitySupportsTtsKind(
|
|
61
68
|
TtsModelKind kind,
|
|
62
69
|
bool hasVits,
|
|
@@ -132,6 +139,10 @@ TtsDetectResult DetectTtsModel(const std::string& modelDir, const std::string& m
|
|
|
132
139
|
std::string tokensFile = FindFileByName(files, "tokens.txt");
|
|
133
140
|
std::vector<LexiconCandidate> lexiconCandidates = FindLexiconCandidates(files, modelDir);
|
|
134
141
|
std::string dataDirPath = FindDirectoryUnderRoot(files, modelDir, "espeak-ng-data");
|
|
142
|
+
TTS_DETECT_LOGI("DetectTtsModel: modelDir=%s espeak-ng dataDir=%s (empty=%d)",
|
|
143
|
+
modelDir.c_str(),
|
|
144
|
+
dataDirPath.empty() ? "(empty)" : dataDirPath.c_str(),
|
|
145
|
+
(int)dataDirPath.empty());
|
|
135
146
|
std::string voicesFile = FindFileByName(files, "voices.bin");
|
|
136
147
|
|
|
137
148
|
std::string acousticModel = FindOnnxByAnyToken(files, {"acoustic_model", "acoustic-model"}, std::nullopt);
|
|
@@ -55,8 +55,8 @@ static const TtsFieldRequirement kZipvoiceReqs[] = {
|
|
|
55
55
|
{"decoder", &TtsModelPaths::decoder, true},
|
|
56
56
|
{"vocoder", &TtsModelPaths::vocoder, true},
|
|
57
57
|
{"tokens", &TtsModelPaths::tokens, true},
|
|
58
|
-
{"dataDir", &TtsModelPaths::dataDir,
|
|
59
|
-
{"lexicon", &TtsModelPaths::lexicon,
|
|
58
|
+
{"dataDir", &TtsModelPaths::dataDir, true},
|
|
59
|
+
{"lexicon", &TtsModelPaths::lexicon, true},
|
|
60
60
|
};
|
|
61
61
|
|
|
62
62
|
// ============================================================
|
|
@@ -102,6 +102,8 @@ static const char* GetFieldHint(const char* fieldName) {
|
|
|
102
102
|
return "Copy espeak-ng-data into the model directory.";
|
|
103
103
|
if (std::strcmp(fieldName, "tokens") == 0)
|
|
104
104
|
return "Ensure tokens.txt is present in the model directory.";
|
|
105
|
+
if (std::strcmp(fieldName, "lexicon") == 0)
|
|
106
|
+
return "Add lexicon.txt (or lexicon-<lang>.txt) from the official sherpa-onnx Zipvoice/Matcha release; without it the native engine aborts.";
|
|
105
107
|
return nullptr;
|
|
106
108
|
}
|
|
107
109
|
|
|
@@ -103,6 +103,7 @@ OnlineSttInitResult OnlineSttWrapper::initialize(
|
|
|
103
103
|
const std::string& provider,
|
|
104
104
|
const std::string& ruleFsts,
|
|
105
105
|
const std::string& ruleFars,
|
|
106
|
+
float dither,
|
|
106
107
|
float blankPenalty,
|
|
107
108
|
bool debug,
|
|
108
109
|
// NOTE: rule*MustContainNonSilence, rule1/2MinUtteranceLength, and
|
|
@@ -138,6 +139,9 @@ OnlineSttInitResult OnlineSttWrapper::initialize(
|
|
|
138
139
|
sherpa_onnx::cxx::OnlineRecognizerConfig config;
|
|
139
140
|
config.feat_config.sample_rate = 16000;
|
|
140
141
|
config.feat_config.feature_dim = 80;
|
|
142
|
+
// Dither is not exposed on cxx::FeatureConfig in the bundled sherpa-onnx headers;
|
|
143
|
+
// Android applies it via JNI. iOS uses the library default (no dither from JS).
|
|
144
|
+
(void)dither;
|
|
141
145
|
config.decoding_method = decodingMethod.empty() ? "greedy_search" : decodingMethod;
|
|
142
146
|
config.max_active_paths = maxActivePaths;
|
|
143
147
|
config.enable_endpoint = enableEndpoint;
|
|
@@ -2,21 +2,35 @@
|
|
|
2
2
|
#define SHERPA_ONNX_TTS_WRAPPER_H
|
|
3
3
|
|
|
4
4
|
#include "sherpa-onnx-common.h"
|
|
5
|
+
#include "sherpa-onnx-model-detect.h"
|
|
5
6
|
#include <cstdint>
|
|
6
7
|
#include <functional>
|
|
7
8
|
#include <memory>
|
|
8
9
|
#include <optional>
|
|
9
10
|
#include <string>
|
|
11
|
+
#include <unordered_map>
|
|
10
12
|
#include <vector>
|
|
11
13
|
|
|
12
14
|
namespace sherpaonnx {
|
|
13
15
|
|
|
16
|
+
/** Voice cloning / zero-shot options for Zipvoice and Pocket (matches JS referenceAudio + referenceSampleRate + optional fields). */
|
|
17
|
+
struct VoiceCloneOptions {
|
|
18
|
+
std::vector<float> reference_audio;
|
|
19
|
+
int32_t reference_sample_rate = 0;
|
|
20
|
+
std::string reference_text;
|
|
21
|
+
int32_t num_steps = 5;
|
|
22
|
+
float silence_scale = 0.2f;
|
|
23
|
+
std::unordered_map<std::string, std::string> extra;
|
|
24
|
+
};
|
|
25
|
+
|
|
14
26
|
/**
|
|
15
27
|
* Result of TTS initialization.
|
|
16
28
|
*/
|
|
17
29
|
struct TtsInitializeResult {
|
|
18
30
|
bool success;
|
|
19
31
|
std::vector<DetectedModel> detectedModels; // List of detected models with type and path
|
|
32
|
+
/** When success is false, optional error message (e.g. from DetectTtsModel or OfflineTts::Create). */
|
|
33
|
+
std::string error;
|
|
20
34
|
};
|
|
21
35
|
|
|
22
36
|
/**
|
|
@@ -59,6 +73,17 @@ public:
|
|
|
59
73
|
float speed = 1.0f
|
|
60
74
|
);
|
|
61
75
|
|
|
76
|
+
/**
|
|
77
|
+
* When cloning is set (non-empty reference_audio and reference_sample_rate > 0), calls
|
|
78
|
+
* OfflineTts::Generate(text, GenerationConfig). Otherwise same as generate(text, sid, speed).
|
|
79
|
+
*/
|
|
80
|
+
AudioResult generate(
|
|
81
|
+
const std::string& text,
|
|
82
|
+
int32_t sid,
|
|
83
|
+
float speed,
|
|
84
|
+
const std::optional<VoiceCloneOptions>& cloning
|
|
85
|
+
);
|
|
86
|
+
|
|
62
87
|
bool generateStream(
|
|
63
88
|
const std::string& text,
|
|
64
89
|
int32_t sid,
|
|
@@ -66,6 +91,15 @@ public:
|
|
|
66
91
|
const TtsStreamCallback& callback
|
|
67
92
|
);
|
|
68
93
|
|
|
94
|
+
/** Pocket: streaming with reference audio. Zipvoice + cloning is not supported (match Android). */
|
|
95
|
+
bool generateStream(
|
|
96
|
+
const std::string& text,
|
|
97
|
+
int32_t sid,
|
|
98
|
+
float speed,
|
|
99
|
+
const TtsStreamCallback& callback,
|
|
100
|
+
const std::optional<VoiceCloneOptions>& cloning
|
|
101
|
+
);
|
|
102
|
+
|
|
69
103
|
static bool saveToWavFile(
|
|
70
104
|
const std::vector<float>& samples,
|
|
71
105
|
int32_t sampleRate,
|
|
@@ -78,6 +112,9 @@ public:
|
|
|
78
112
|
|
|
79
113
|
bool isInitialized() const;
|
|
80
114
|
|
|
115
|
+
/** Model kind from last successful initialize() (for voice-cloning validation). */
|
|
116
|
+
TtsModelKind getModelKind() const;
|
|
117
|
+
|
|
81
118
|
void release();
|
|
82
119
|
|
|
83
120
|
private:
|