react-native-sherpa-onnx 0.3.6 → 0.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -0
- package/README.md +89 -21
- package/SherpaOnnx.podspec +3 -0
- package/THIRD_PARTY_LICENSES/README.md +62 -0
- package/THIRD_PARTY_LICENSES/ffmpeg.txt +502 -0
- package/THIRD_PARTY_LICENSES/libarchive.txt +65 -0
- package/THIRD_PARTY_LICENSES/nvidia_omla.txt +181 -0
- package/THIRD_PARTY_LICENSES/onnxruntime.txt +21 -0
- package/THIRD_PARTY_LICENSES/opus.txt +44 -0
- package/THIRD_PARTY_LICENSES/sherpa-onnx.txt +201 -0
- package/THIRD_PARTY_LICENSES/shine.txt +482 -0
- package/THIRD_PARTY_LICENSES/zstd.txt +30 -0
- package/android/build.gradle +7 -3
- package/android/prebuilt-download.gradle +344 -152
- package/android/prebuilt-versions.gradle +1 -1
- package/android/src/main/assets/model_licenses/asr-models-license-status.csv +409 -0
- package/android/src/main/assets/model_licenses/qnn-asr-models-license-status.csv +695 -0
- package/android/src/main/assets/model_licenses/tts-models-license-status.csv +596 -0
- package/android/src/main/cpp/CMakeLists.txt +28 -10
- package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.cpp +2 -2
- package/android/src/main/cpp/jni/audio/sherpa-onnx-audio-convert-jni.cpp +268 -2
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +6 -2
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-tts.cpp +4 -2
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxArchiveHelper.kt +40 -10
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +99 -0
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxOnlineSttHelper.kt +4 -1
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +112 -97
- package/ios/Resources/model_licenses/asr-models-license-status.csv +409 -0
- package/ios/Resources/model_licenses/qnn-asr-models-license-status.csv +695 -0
- package/ios/Resources/model_licenses/tts-models-license-status.csv +596 -0
- package/ios/SherpaOnnx+OnlineSTT.mm +2 -0
- package/ios/SherpaOnnx+PcmLiveStream.mm +2 -29
- package/ios/SherpaOnnx+TTS.mm +178 -20
- package/ios/SherpaOnnx.mm +54 -0
- package/ios/SherpaOnnxAudioConvert.h +10 -0
- package/ios/SherpaOnnxAudioConvert.mm +257 -1
- package/ios/archive/sherpa-onnx-archive-helper.h +3 -0
- package/ios/archive/sherpa-onnx-archive-helper.mm +39 -6
- package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +13 -2
- package/ios/model_detect/sherpa-onnx-validate-tts.mm +4 -2
- package/ios/online_stt/sherpa-onnx-online-stt-wrapper.h +1 -0
- package/ios/online_stt/sherpa-onnx-online-stt-wrapper.mm +4 -0
- package/ios/tts/sherpa-onnx-tts-wrapper.h +37 -0
- package/ios/tts/sherpa-onnx-tts-wrapper.mm +149 -3
- package/lib/module/NativeSherpaOnnx.js.map +1 -1
- package/lib/module/audio/index.js +8 -0
- package/lib/module/audio/index.js.map +1 -1
- package/lib/module/download/ModelDownloadManager.js +10 -929
- package/lib/module/download/ModelDownloadManager.js.map +1 -1
- package/lib/module/download/activeModelOperations.js +26 -0
- package/lib/module/download/activeModelOperations.js.map +1 -0
- package/lib/module/download/background-downloader.d.js +2 -0
- package/lib/module/download/background-downloader.d.js.map +1 -0
- package/lib/module/download/bulkPurge.js +72 -0
- package/lib/module/download/bulkPurge.js.map +1 -0
- package/lib/module/download/checksumPrompt.js +19 -0
- package/lib/module/download/checksumPrompt.js.map +1 -0
- package/lib/module/download/constants.js +7 -0
- package/lib/module/download/constants.js.map +1 -0
- package/lib/module/download/downloadEvents.js +35 -0
- package/lib/module/download/downloadEvents.js.map +1 -0
- package/lib/module/download/downloadTask.js +385 -0
- package/lib/module/download/downloadTask.js.map +1 -0
- package/lib/module/download/ensureModel.js +89 -0
- package/lib/module/download/ensureModel.js.map +1 -0
- package/lib/module/download/index.js +4 -4
- package/lib/module/download/index.js.map +1 -1
- package/lib/module/download/localModels.js +151 -0
- package/lib/module/download/localModels.js.map +1 -0
- package/lib/module/download/modelExtraction.js +174 -0
- package/lib/module/download/modelExtraction.js.map +1 -0
- package/lib/module/download/paths.js +98 -0
- package/lib/module/download/paths.js.map +1 -0
- package/lib/module/download/postDownloadProcessing.js +206 -0
- package/lib/module/download/postDownloadProcessing.js.map +1 -0
- package/lib/module/download/protectedModelKeys.js +31 -0
- package/lib/module/download/protectedModelKeys.js.map +1 -0
- package/lib/module/download/registry.js +267 -0
- package/lib/module/download/registry.js.map +1 -0
- package/lib/module/download/retry.js +59 -0
- package/lib/module/download/retry.js.map +1 -0
- package/lib/module/download/types.js +17 -0
- package/lib/module/download/types.js.map +1 -0
- package/lib/module/download/validation.js +101 -5
- package/lib/module/download/validation.js.map +1 -1
- package/lib/module/{download → extraction}/extractTarBz2.js +3 -1
- package/lib/module/extraction/extractTarBz2.js.map +1 -0
- package/lib/module/{download → extraction}/extractTarZst.js +3 -1
- package/lib/module/extraction/extractTarZst.js.map +1 -0
- package/lib/module/extraction/index.js +3 -4
- package/lib/module/extraction/index.js.map +1 -1
- package/lib/module/index.js +1 -1
- package/lib/module/index.js.map +1 -1
- package/lib/module/licenses.js +63 -0
- package/lib/module/licenses.js.map +1 -0
- package/lib/module/stt/index.js +16 -2
- package/lib/module/stt/index.js.map +1 -1
- package/lib/module/stt/streaming.js +2 -0
- package/lib/module/stt/streaming.js.map +1 -1
- package/lib/module/stt/streamingTypes.js.map +1 -1
- package/lib/module/stt/types.js.map +1 -1
- package/lib/module/tts/index.js +20 -2
- package/lib/module/tts/index.js.map +1 -1
- package/lib/module/tts/streaming.js +4 -0
- package/lib/module/tts/streaming.js.map +1 -1
- package/lib/module/tts/types.js.map +1 -1
- package/lib/module/utils.js +16 -1
- package/lib/module/utils.js.map +1 -1
- package/lib/typescript/src/NativeSherpaOnnx.d.ts +33 -5
- package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
- package/lib/typescript/src/audio/index.d.ts +10 -0
- package/lib/typescript/src/audio/index.d.ts.map +1 -1
- package/lib/typescript/src/download/ModelDownloadManager.d.ts +10 -108
- package/lib/typescript/src/download/ModelDownloadManager.d.ts.map +1 -1
- package/lib/typescript/src/download/activeModelOperations.d.ts +6 -0
- package/lib/typescript/src/download/activeModelOperations.d.ts.map +1 -0
- package/lib/typescript/src/download/bulkPurge.d.ts +14 -0
- package/lib/typescript/src/download/bulkPurge.d.ts.map +1 -0
- package/lib/typescript/src/download/checksumPrompt.d.ts +3 -0
- package/lib/typescript/src/download/checksumPrompt.d.ts.map +1 -0
- package/lib/typescript/src/download/constants.d.ts +5 -0
- package/lib/typescript/src/download/constants.d.ts.map +1 -0
- package/lib/typescript/src/download/downloadEvents.d.ts +6 -0
- package/lib/typescript/src/download/downloadEvents.d.ts.map +1 -0
- package/lib/typescript/src/download/downloadTask.d.ts +20 -0
- package/lib/typescript/src/download/downloadTask.d.ts.map +1 -0
- package/lib/typescript/src/download/ensureModel.d.ts +26 -0
- package/lib/typescript/src/download/ensureModel.d.ts.map +1 -0
- package/lib/typescript/src/download/index.d.ts +7 -7
- package/lib/typescript/src/download/index.d.ts.map +1 -1
- package/lib/typescript/src/download/localModels.d.ts +15 -0
- package/lib/typescript/src/download/localModels.d.ts.map +1 -0
- package/lib/typescript/src/download/modelExtraction.d.ts +36 -0
- package/lib/typescript/src/download/modelExtraction.d.ts.map +1 -0
- package/lib/typescript/src/download/paths.d.ts +28 -0
- package/lib/typescript/src/download/paths.d.ts.map +1 -0
- package/lib/typescript/src/download/postDownloadProcessing.d.ts +19 -0
- package/lib/typescript/src/download/postDownloadProcessing.d.ts.map +1 -0
- package/lib/typescript/src/download/protectedModelKeys.d.ts +6 -0
- package/lib/typescript/src/download/protectedModelKeys.d.ts.map +1 -0
- package/lib/typescript/src/download/registry.d.ts +14 -0
- package/lib/typescript/src/download/registry.d.ts.map +1 -0
- package/lib/typescript/src/download/retry.d.ts +15 -0
- package/lib/typescript/src/download/retry.d.ts.map +1 -0
- package/lib/typescript/src/download/types.d.ts +96 -0
- package/lib/typescript/src/download/types.d.ts.map +1 -0
- package/lib/typescript/src/download/validation.d.ts +19 -0
- package/lib/typescript/src/download/validation.d.ts.map +1 -1
- package/lib/typescript/src/extraction/extractTarBz2.d.ts.map +1 -0
- package/lib/typescript/src/extraction/extractTarZst.d.ts.map +1 -0
- package/lib/typescript/src/index.d.ts +1 -0
- package/lib/typescript/src/index.d.ts.map +1 -1
- package/lib/typescript/src/licenses.d.ts +10 -0
- package/lib/typescript/src/licenses.d.ts.map +1 -0
- package/lib/typescript/src/stt/index.d.ts +4 -1
- package/lib/typescript/src/stt/index.d.ts.map +1 -1
- package/lib/typescript/src/stt/streaming.d.ts.map +1 -1
- package/lib/typescript/src/stt/streamingTypes.d.ts +5 -0
- package/lib/typescript/src/stt/streamingTypes.d.ts.map +1 -1
- package/lib/typescript/src/stt/types.d.ts +3 -1
- package/lib/typescript/src/stt/types.d.ts.map +1 -1
- package/lib/typescript/src/tts/index.d.ts +3 -1
- package/lib/typescript/src/tts/index.d.ts.map +1 -1
- package/lib/typescript/src/tts/streaming.d.ts.map +1 -1
- package/lib/typescript/src/tts/types.d.ts +6 -5
- package/lib/typescript/src/tts/types.d.ts.map +1 -1
- package/lib/typescript/src/utils.d.ts +5 -0
- package/lib/typescript/src/utils.d.ts.map +1 -1
- package/package.json +6 -1
- package/scripts/{check-model-csvs.sh → ci/check-model-csvs.sh} +9 -2
- package/scripts/ci/collect_all_sherpa_model_streams.sh +101 -0
- package/scripts/ci/collect_one_sherpa_release_stream.sh +189 -0
- package/scripts/ci/sherpa_asr_model_release_streams.json +21 -0
- package/scripts/ci/sherpa_tts_model_release_streams.json +13 -0
- package/scripts/ci/update_model_license_csv.sh +765 -0
- package/scripts/setup-ios-framework.sh +14 -11
- package/scripts/update_commercial_use.js +73 -0
- package/src/NativeSherpaOnnx.ts +36 -5
- package/src/audio/index.ts +20 -0
- package/src/download/ModelDownloadManager.ts +55 -1343
- package/src/download/activeModelOperations.ts +38 -0
- package/src/download/background-downloader.d.ts +43 -0
- package/src/download/bulkPurge.ts +102 -0
- package/src/download/checksumPrompt.ts +25 -0
- package/src/download/constants.ts +5 -0
- package/src/download/downloadEvents.ts +55 -0
- package/src/download/downloadTask.ts +497 -0
- package/src/download/ensureModel.ts +124 -0
- package/src/download/index.ts +19 -4
- package/src/download/localModels.ts +234 -0
- package/src/download/modelExtraction.ts +244 -0
- package/src/download/paths.ts +134 -0
- package/src/download/postDownloadProcessing.ts +292 -0
- package/src/download/protectedModelKeys.ts +30 -0
- package/src/download/registry.ts +404 -0
- package/src/download/retry.ts +76 -0
- package/src/download/types.ts +120 -0
- package/src/download/validation.ts +114 -8
- package/src/{download → extraction}/extractTarBz2.ts +3 -1
- package/src/{download → extraction}/extractTarZst.ts +3 -1
- package/src/extraction/index.ts +3 -7
- package/src/index.tsx +1 -0
- package/src/licenses.ts +100 -0
- package/src/stt/index.ts +20 -2
- package/src/stt/streaming.ts +3 -0
- package/src/stt/streamingTypes.ts +5 -0
- package/src/stt/types.ts +3 -1
- package/src/tts/index.ts +30 -2
- package/src/tts/streaming.ts +10 -0
- package/src/tts/types.ts +6 -5
- package/src/utils.ts +22 -1
- package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -1
- package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -1
- package/android/src/main/cpp/jni/tts/sherpa-onnx-tts-zipvoice-jni.cpp +0 -301
- package/android/src/main/java/com/sherpaonnx/ZipvoiceTtsWrapper.kt +0 -187
- package/lib/module/download/extractTarBz2.js.map +0 -1
- package/lib/module/download/extractTarZst.js.map +0 -1
- package/lib/typescript/src/download/extractTarBz2.d.ts.map +0 -1
- package/lib/typescript/src/download/extractTarZst.d.ts.map +0 -1
- package/scripts/check-qnn-support.sh +0 -78
- /package/lib/typescript/src/{download → extraction}/extractTarBz2.d.ts +0 -0
- /package/lib/typescript/src/{download → extraction}/extractTarZst.d.ts +0 -0
|
@@ -70,6 +70,7 @@ static sherpaonnx::OnlineSttWrapper* getOnlineSttInstanceForStream(NSString* str
|
|
|
70
70
|
NSString *provider = options.provider();
|
|
71
71
|
NSString *ruleFsts = options.ruleFsts();
|
|
72
72
|
NSString *ruleFars = options.ruleFars();
|
|
73
|
+
auto dither = options.dither();
|
|
73
74
|
auto blankPenalty = options.blankPenalty();
|
|
74
75
|
auto debug = options.debug();
|
|
75
76
|
auto rule1MustContainNonSilence = options.rule1MustContainNonSilence();
|
|
@@ -102,6 +103,7 @@ static sherpaonnx::OnlineSttWrapper* getOnlineSttInstanceForStream(NSString* str
|
|
|
102
103
|
provider != nil ? [provider UTF8String] : "cpu",
|
|
103
104
|
ruleFsts != nil ? [ruleFsts UTF8String] : "",
|
|
104
105
|
ruleFars != nil ? [ruleFars UTF8String] : "",
|
|
106
|
+
dither.has_value() ? (float)dither.value() : 0.f,
|
|
105
107
|
blankPenalty.has_value() ? (float)blankPenalty.value() : 0.f,
|
|
106
108
|
debug.has_value() && debug.value(),
|
|
107
109
|
rule1MustContainNonSilence.has_value() && rule1MustContainNonSilence.value(),
|
|
@@ -143,37 +143,10 @@ static void pcmLiveStopQueue(void) {
|
|
|
143
143
|
|
|
144
144
|
@implementation SherpaOnnx (PcmLiveStream)
|
|
145
145
|
|
|
146
|
-
|
|
146
|
+
#if __has_include(<SherpaOnnxSpec/SherpaOnnxSpec.h>)
|
|
147
|
+
- (void)startPcmLiveStream:(JS::NativeSherpaOnnx::SpecStartPcmLiveStreamOptions &)options
|
|
147
148
|
resolve:(RCTPromiseResolveBlock)resolve
|
|
148
149
|
reject:(RCTPromiseRejectBlock)reject
|
|
149
|
-
{
|
|
150
|
-
int targetRate = 16000;
|
|
151
|
-
UInt32 bufferSizeFrames = 0;
|
|
152
|
-
|
|
153
|
-
// Parse optionsArg coming from JS (fallback / non-codegen path).
|
|
154
|
-
if ([optionsArg isKindOfClass:[NSDictionary class]]) {
|
|
155
|
-
NSDictionary *dict = (NSDictionary *)optionsArg;
|
|
156
|
-
|
|
157
|
-
id sampleRateValue = dict[@"sampleRate"];
|
|
158
|
-
if ([sampleRateValue respondsToSelector:@selector(intValue)]) {
|
|
159
|
-
int v = (int)[sampleRateValue intValue];
|
|
160
|
-
if (v > 0) targetRate = v;
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
id bufferSizeValue = dict[@"bufferSizeFrames"];
|
|
164
|
-
if ([bufferSizeValue respondsToSelector:@selector(doubleValue)]) {
|
|
165
|
-
double v = [bufferSizeValue doubleValue];
|
|
166
|
-
if (v > 0) bufferSizeFrames = (UInt32)v;
|
|
167
|
-
}
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
[self _startPcmLiveStreamWithTargetRate:targetRate bufferSizeFrames:bufferSizeFrames resolve:resolve reject:reject];
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
#if __has_include(<SherpaOnnxSpec/SherpaOnnxSpec.h>)
|
|
174
|
-
- (void)startPcmLiveStreamWithOptions:(JS::NativeSherpaOnnx::SpecStartPcmLiveStreamOptions &)options
|
|
175
|
-
resolve:(RCTPromiseResolveBlock)resolve
|
|
176
|
-
reject:(RCTPromiseRejectBlock)reject
|
|
177
150
|
{
|
|
178
151
|
int targetRate = 16000;
|
|
179
152
|
if (options.sampleRate()) {
|
package/ios/SherpaOnnx+TTS.mm
CHANGED
|
@@ -17,6 +17,7 @@
|
|
|
17
17
|
#include <condition_variable>
|
|
18
18
|
#include <memory>
|
|
19
19
|
#include <mutex>
|
|
20
|
+
#include <optional>
|
|
20
21
|
#include <sstream>
|
|
21
22
|
#include <string>
|
|
22
23
|
#include <unordered_map>
|
|
@@ -55,6 +56,7 @@ static NSString *ttsModelKindToNSString(sherpaonnx::TtsModelKind kind) {
|
|
|
55
56
|
case K::kMatcha: return @"matcha";
|
|
56
57
|
case K::kKokoro: return @"kokoro";
|
|
57
58
|
case K::kKitten: return @"kitten";
|
|
59
|
+
case K::kPocket: return @"pocket";
|
|
58
60
|
case K::kZipvoice: return @"zipvoice";
|
|
59
61
|
default: return @"unknown";
|
|
60
62
|
}
|
|
@@ -73,8 +75,60 @@ std::vector<std::string> SplitTtsTokens(const std::string &text) {
|
|
|
73
75
|
}
|
|
74
76
|
return tokens;
|
|
75
77
|
}
|
|
78
|
+
|
|
79
|
+
/** When options omit numSteps, matches Android SherpaOnnxTtsHelper / upstream GenerationConfig default. */
|
|
80
|
+
constexpr int32_t kDefaultVoiceCloneNumSteps = 5;
|
|
81
|
+
|
|
82
|
+
/** Non-null optional when referenceAudio is non-empty array and referenceSampleRate > 0. */
|
|
83
|
+
static std::optional<sherpaonnx::VoiceCloneOptions> VoiceCloneOptionsFromNSDictionary(NSDictionary *options, int32_t defaultNumSteps) {
|
|
84
|
+
if (options == nil) return std::nullopt;
|
|
85
|
+
NSArray *refArr = options[@"referenceAudio"];
|
|
86
|
+
if (![refArr isKindOfClass:[NSArray class]] || [refArr count] == 0) return std::nullopt;
|
|
87
|
+
NSNumber *srNum = options[@"referenceSampleRate"];
|
|
88
|
+
if (srNum == nil || [srNum doubleValue] <= 0) return std::nullopt;
|
|
89
|
+
|
|
90
|
+
sherpaonnx::VoiceCloneOptions vo;
|
|
91
|
+
vo.reference_sample_rate = static_cast<int32_t>([srNum doubleValue]);
|
|
92
|
+
vo.reference_audio.reserve([refArr count]);
|
|
93
|
+
for (id elem in refArr) {
|
|
94
|
+
float v = 0.f;
|
|
95
|
+
if ([elem isKindOfClass:[NSNumber class]]) {
|
|
96
|
+
v = static_cast<float>([(NSNumber *)elem doubleValue]);
|
|
97
|
+
}
|
|
98
|
+
vo.reference_audio.push_back(v);
|
|
99
|
+
}
|
|
100
|
+
NSString *rt = options[@"referenceText"];
|
|
101
|
+
if (rt != nil && [rt length] > 0) {
|
|
102
|
+
vo.reference_text = std::string([rt UTF8String]);
|
|
103
|
+
}
|
|
104
|
+
if (options[@"numSteps"] != nil) {
|
|
105
|
+
vo.num_steps = static_cast<int32_t>([options[@"numSteps"] doubleValue]);
|
|
106
|
+
} else {
|
|
107
|
+
vo.num_steps = defaultNumSteps;
|
|
108
|
+
}
|
|
109
|
+
if (options[@"silenceScale"] != nil) {
|
|
110
|
+
vo.silence_scale = static_cast<float>([options[@"silenceScale"] doubleValue]);
|
|
111
|
+
}
|
|
112
|
+
id extra = options[@"extra"];
|
|
113
|
+
if ([extra isKindOfClass:[NSDictionary class]]) {
|
|
114
|
+
NSDictionary *ex = (NSDictionary *)extra;
|
|
115
|
+
for (NSString *k in ex) {
|
|
116
|
+
id v = ex[k];
|
|
117
|
+
if ([v isKindOfClass:[NSString class]]) {
|
|
118
|
+
vo.extra[std::string([k UTF8String])] = std::string([(NSString *)v UTF8String]);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
return vo;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
static bool NSDictionaryHasValidReferenceAudio(NSDictionary *options) {
|
|
126
|
+
auto o = VoiceCloneOptionsFromNSDictionary(options, 1);
|
|
127
|
+
return o.has_value() && !o->reference_audio.empty() && o->reference_sample_rate > 0;
|
|
76
128
|
}
|
|
77
129
|
|
|
130
|
+
} // namespace
|
|
131
|
+
|
|
78
132
|
@implementation SherpaOnnx (TTS)
|
|
79
133
|
|
|
80
134
|
- (void)initializeTts:(NSString *)instanceId
|
|
@@ -195,8 +249,10 @@ std::vector<std::string> SplitTtsTokens(const std::string &text) {
|
|
|
195
249
|
|
|
196
250
|
resolve(resultDict);
|
|
197
251
|
} else {
|
|
198
|
-
NSString *errorMsg =
|
|
199
|
-
|
|
252
|
+
NSString *errorMsg = result.error.empty()
|
|
253
|
+
? @"Failed to initialize TTS"
|
|
254
|
+
: [NSString stringWithUTF8String:result.error.c_str()];
|
|
255
|
+
RCTLogError(@"TTS init failed: %@", errorMsg);
|
|
200
256
|
reject(@"TTS_INIT_ERROR", errorMsg, nil);
|
|
201
257
|
}
|
|
202
258
|
} @catch (NSException *exception) {
|
|
@@ -408,10 +464,37 @@ std::vector<std::string> SplitTtsTokens(const std::string &text) {
|
|
|
408
464
|
@try {
|
|
409
465
|
std::string textStr = [text UTF8String];
|
|
410
466
|
|
|
467
|
+
using Kind = sherpaonnx::TtsModelKind;
|
|
468
|
+
Kind kind = wrapper->getModelKind();
|
|
469
|
+
bool hasRef = NSDictionaryHasValidReferenceAudio(options);
|
|
470
|
+
|
|
471
|
+
if (hasRef && kind != Kind::kZipvoice && kind != Kind::kPocket) {
|
|
472
|
+
reject(@"TTS_GENERATE_ERROR", @"Reference audio is only supported for Zipvoice and Pocket TTS.", nil);
|
|
473
|
+
return;
|
|
474
|
+
}
|
|
475
|
+
if (kind == Kind::kPocket && !hasRef) {
|
|
476
|
+
reject(@"TTS_GENERATE_ERROR", @"Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate (> 0) in options.", nil);
|
|
477
|
+
return;
|
|
478
|
+
}
|
|
479
|
+
if (hasRef && kind == Kind::kZipvoice) {
|
|
480
|
+
NSString *rt = options[@"referenceText"];
|
|
481
|
+
NSString *trimmed = rt != nil ? [rt stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]] : @"";
|
|
482
|
+
if ([trimmed length] == 0) {
|
|
483
|
+
reject(@"TTS_GENERATE_ERROR", @"Zipvoice voice cloning requires non-empty referenceText (transcript of reference audio).", nil);
|
|
484
|
+
return;
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
std::optional<sherpaonnx::VoiceCloneOptions> cloneOpt;
|
|
489
|
+
if (hasRef) {
|
|
490
|
+
cloneOpt = VoiceCloneOptionsFromNSDictionary(options, kDefaultVoiceCloneNumSteps);
|
|
491
|
+
}
|
|
492
|
+
|
|
411
493
|
auto result = wrapper->generate(
|
|
412
494
|
textStr,
|
|
413
495
|
static_cast<int32_t>(sid),
|
|
414
|
-
static_cast<float>(speed)
|
|
496
|
+
static_cast<float>(speed),
|
|
497
|
+
cloneOpt
|
|
415
498
|
);
|
|
416
499
|
|
|
417
500
|
if (result.samples.empty() || result.sampleRate == 0) {
|
|
@@ -469,10 +552,37 @@ std::vector<std::string> SplitTtsTokens(const std::string &text) {
|
|
|
469
552
|
@try {
|
|
470
553
|
std::string textStr = [text UTF8String];
|
|
471
554
|
|
|
555
|
+
using Kind = sherpaonnx::TtsModelKind;
|
|
556
|
+
Kind kind = wrapper->getModelKind();
|
|
557
|
+
bool hasRef = NSDictionaryHasValidReferenceAudio(options);
|
|
558
|
+
|
|
559
|
+
if (hasRef && kind != Kind::kZipvoice && kind != Kind::kPocket) {
|
|
560
|
+
reject(@"TTS_GENERATE_ERROR", @"Reference audio is only supported for Zipvoice and Pocket TTS.", nil);
|
|
561
|
+
return;
|
|
562
|
+
}
|
|
563
|
+
if (kind == Kind::kPocket && !hasRef) {
|
|
564
|
+
reject(@"TTS_GENERATE_ERROR", @"Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate (> 0) in options.", nil);
|
|
565
|
+
return;
|
|
566
|
+
}
|
|
567
|
+
if (hasRef && kind == Kind::kZipvoice) {
|
|
568
|
+
NSString *rt = options[@"referenceText"];
|
|
569
|
+
NSString *trimmed = rt != nil ? [rt stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]] : @"";
|
|
570
|
+
if ([trimmed length] == 0) {
|
|
571
|
+
reject(@"TTS_GENERATE_ERROR", @"Zipvoice voice cloning requires non-empty referenceText (transcript of reference audio).", nil);
|
|
572
|
+
return;
|
|
573
|
+
}
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
std::optional<sherpaonnx::VoiceCloneOptions> cloneOpt;
|
|
577
|
+
if (hasRef) {
|
|
578
|
+
cloneOpt = VoiceCloneOptionsFromNSDictionary(options, kDefaultVoiceCloneNumSteps);
|
|
579
|
+
}
|
|
580
|
+
|
|
472
581
|
auto result = wrapper->generate(
|
|
473
582
|
textStr,
|
|
474
583
|
static_cast<int32_t>(sid),
|
|
475
|
-
static_cast<float>(speed)
|
|
584
|
+
static_cast<float>(speed),
|
|
585
|
+
cloneOpt
|
|
476
586
|
);
|
|
477
587
|
|
|
478
588
|
if (result.samples.empty() || result.sampleRate == 0) {
|
|
@@ -487,22 +597,32 @@ std::vector<std::string> SplitTtsTokens(const std::string &text) {
|
|
|
487
597
|
[samplesArray addObject:@(sample)];
|
|
488
598
|
}
|
|
489
599
|
|
|
490
|
-
std::vector<std::string> tokens = SplitTtsTokens(textStr);
|
|
491
600
|
NSMutableArray *subtitlesArray = [NSMutableArray array];
|
|
492
|
-
if (!
|
|
493
|
-
double
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
601
|
+
if (hasRef && !result.samples.empty() && result.sampleRate > 0) {
|
|
602
|
+
double durationSec = static_cast<double>(result.samples.size()) / static_cast<double>(result.sampleRate);
|
|
603
|
+
NSDictionary *subtitleMap = @{
|
|
604
|
+
@"text": text,
|
|
605
|
+
@"start": @0.0,
|
|
606
|
+
@"end": @(durationSec)
|
|
607
|
+
};
|
|
608
|
+
[subtitlesArray addObject:subtitleMap];
|
|
609
|
+
} else {
|
|
610
|
+
std::vector<std::string> tokens = SplitTtsTokens(textStr);
|
|
611
|
+
if (!tokens.empty()) {
|
|
612
|
+
double totalSeconds = static_cast<double>(result.samples.size()) /
|
|
613
|
+
static_cast<double>(result.sampleRate);
|
|
614
|
+
double perToken = totalSeconds / static_cast<double>(tokens.size());
|
|
615
|
+
|
|
616
|
+
for (size_t i = 0; i < tokens.size(); ++i) {
|
|
617
|
+
double start = perToken * static_cast<double>(i);
|
|
618
|
+
double end = perToken * static_cast<double>(i + 1);
|
|
619
|
+
NSDictionary *item = @{
|
|
620
|
+
@"text": [NSString stringWithUTF8String:tokens[i].c_str()],
|
|
621
|
+
@"start": @(start),
|
|
622
|
+
@"end": @(end)
|
|
623
|
+
};
|
|
624
|
+
[subtitlesArray addObject:item];
|
|
625
|
+
}
|
|
506
626
|
}
|
|
507
627
|
}
|
|
508
628
|
|
|
@@ -556,6 +676,43 @@ std::vector<std::string> SplitTtsTokens(const std::string &text) {
|
|
|
556
676
|
instRef->streamRunning.store(true);
|
|
557
677
|
}
|
|
558
678
|
|
|
679
|
+
using Kind = sherpaonnx::TtsModelKind;
|
|
680
|
+
Kind streamKind = instRef->wrapper->getModelKind();
|
|
681
|
+
bool streamHasRef = NSDictionaryHasValidReferenceAudio(options);
|
|
682
|
+
|
|
683
|
+
if (streamKind == Kind::kPocket && !streamHasRef) {
|
|
684
|
+
std::lock_guard<std::mutex> lock(g_tts_mutex);
|
|
685
|
+
auto it2 = g_tts_instances.find([instanceId UTF8String]);
|
|
686
|
+
if (it2 != g_tts_instances.end()) {
|
|
687
|
+
it2->second->streamRunning.store(false);
|
|
688
|
+
}
|
|
689
|
+
reject(@"TTS_STREAM_ERROR", @"Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate (> 0) in options.", nil);
|
|
690
|
+
return;
|
|
691
|
+
}
|
|
692
|
+
if (streamHasRef && streamKind == Kind::kZipvoice) {
|
|
693
|
+
std::lock_guard<std::mutex> lock(g_tts_mutex);
|
|
694
|
+
auto it2 = g_tts_instances.find([instanceId UTF8String]);
|
|
695
|
+
if (it2 != g_tts_instances.end()) {
|
|
696
|
+
it2->second->streamRunning.store(false);
|
|
697
|
+
}
|
|
698
|
+
reject(@"TTS_STREAM_ERROR", @"Streaming with reference audio not supported for Zipvoice", nil);
|
|
699
|
+
return;
|
|
700
|
+
}
|
|
701
|
+
if (streamHasRef && streamKind != Kind::kPocket) {
|
|
702
|
+
std::lock_guard<std::mutex> lock(g_tts_mutex);
|
|
703
|
+
auto it2 = g_tts_instances.find([instanceId UTF8String]);
|
|
704
|
+
if (it2 != g_tts_instances.end()) {
|
|
705
|
+
it2->second->streamRunning.store(false);
|
|
706
|
+
}
|
|
707
|
+
reject(@"TTS_STREAM_ERROR", @"Reference audio streaming is only supported for Pocket TTS.", nil);
|
|
708
|
+
return;
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
std::optional<sherpaonnx::VoiceCloneOptions> streamCloneOpt;
|
|
712
|
+
if (streamHasRef) {
|
|
713
|
+
streamCloneOpt = VoiceCloneOptionsFromNSDictionary(options, kDefaultVoiceCloneNumSteps);
|
|
714
|
+
}
|
|
715
|
+
|
|
559
716
|
std::string textStr = [text UTF8String];
|
|
560
717
|
int32_t sampleRate = instRef->wrapper->getSampleRate();
|
|
561
718
|
NSString *instanceIdCopy = [instanceId copy];
|
|
@@ -595,7 +752,8 @@ std::vector<std::string> SplitTtsTokens(const std::string &text) {
|
|
|
595
752
|
});
|
|
596
753
|
|
|
597
754
|
return instRef->streamCancelled.load() ? 0 : 1;
|
|
598
|
-
}
|
|
755
|
+
},
|
|
756
|
+
streamCloneOpt
|
|
599
757
|
);
|
|
600
758
|
} @catch (NSException *exception) {
|
|
601
759
|
NSString *errorMsg = [NSString stringWithFormat:@"TTS streaming failed: %@", exception.reason];
|
package/ios/SherpaOnnx.mm
CHANGED
|
@@ -189,6 +189,14 @@
|
|
|
189
189
|
resolve(nil);
|
|
190
190
|
}
|
|
191
191
|
|
|
192
|
+
- (void)cancelExtractBySourcePath:(NSString *)sourcePath
|
|
193
|
+
resolve:(RCTPromiseResolveBlock)resolve
|
|
194
|
+
reject:(RCTPromiseRejectBlock)reject
|
|
195
|
+
{
|
|
196
|
+
[SherpaOnnxArchiveHelper cancelExtractForPath:sourcePath];
|
|
197
|
+
resolve(nil);
|
|
198
|
+
}
|
|
199
|
+
|
|
192
200
|
- (void)computeFileSha256:(NSString *)filePath
|
|
193
201
|
resolve:(RCTPromiseResolveBlock)resolve
|
|
194
202
|
reject:(RCTPromiseRejectBlock)reject
|
|
@@ -271,6 +279,26 @@
|
|
|
271
279
|
resolve(nil);
|
|
272
280
|
}
|
|
273
281
|
|
|
282
|
+
- (void)decodeAudioFileToFloatSamples:(NSString *)inputPath
|
|
283
|
+
targetSampleRateHz:(NSNumber *)targetSampleRateHz
|
|
284
|
+
resolve:(RCTPromiseResolveBlock)resolve
|
|
285
|
+
reject:(RCTPromiseRejectBlock)reject
|
|
286
|
+
{
|
|
287
|
+
NSArray<NSNumber *> *samples = nil;
|
|
288
|
+
int sr = 0;
|
|
289
|
+
NSError *error = nil;
|
|
290
|
+
int rate = targetSampleRateHz != nil ? targetSampleRateHz.intValue : 0;
|
|
291
|
+
if (![SherpaOnnxAudioConvert decodeAudioFileToFloatSamples:inputPath
|
|
292
|
+
targetSampleRateHz:rate
|
|
293
|
+
outSamples:&samples
|
|
294
|
+
outSampleRate:&sr
|
|
295
|
+
error:&error]) {
|
|
296
|
+
reject(@"DECODE_ERROR", error ? error.localizedDescription : @"Failed to decode audio", error);
|
|
297
|
+
return;
|
|
298
|
+
}
|
|
299
|
+
resolve(@{ @"samples": samples ?: @[], @"sampleRate": @(sr) });
|
|
300
|
+
}
|
|
301
|
+
|
|
274
302
|
- (void)getAvailableProviders:(RCTPromiseResolveBlock)resolve
|
|
275
303
|
reject:(RCTPromiseRejectBlock)reject
|
|
276
304
|
{
|
|
@@ -286,4 +314,30 @@
|
|
|
286
314
|
}
|
|
287
315
|
}
|
|
288
316
|
|
|
317
|
+
- (void)readAssetFileAsUtf8:(NSString *)assetPath
|
|
318
|
+
resolve:(RCTPromiseResolveBlock)resolve
|
|
319
|
+
reject:(RCTPromiseRejectBlock)reject
|
|
320
|
+
{
|
|
321
|
+
// Validate assetPath to prevent path traversal: reject any path that
|
|
322
|
+
// contains "..", is absolute, or uses backslashes.
|
|
323
|
+
if ([assetPath containsString:@".."] ||
|
|
324
|
+
[assetPath hasPrefix:@"/"] ||
|
|
325
|
+
[assetPath hasPrefix:@"\\"] ||
|
|
326
|
+
[assetPath containsString:@"\\"]) {
|
|
327
|
+
reject(@"ASSET_READ_ERROR",
|
|
328
|
+
[NSString stringWithFormat:@"Invalid asset path: %@", assetPath],
|
|
329
|
+
nil);
|
|
330
|
+
return;
|
|
331
|
+
}
|
|
332
|
+
NSString *resourcePath = [[NSBundle mainBundle] resourcePath];
|
|
333
|
+
NSString *fullPath = [resourcePath stringByAppendingPathComponent:assetPath];
|
|
334
|
+
NSError *error = nil;
|
|
335
|
+
NSString *content = [NSString stringWithContentsOfFile:fullPath encoding:NSUTF8StringEncoding error:&error];
|
|
336
|
+
if (error) {
|
|
337
|
+
reject(@"ASSET_READ_ERROR", [NSString stringWithFormat:@"Failed to read asset %@: %@", assetPath, error.localizedDescription], error);
|
|
338
|
+
} else {
|
|
339
|
+
resolve(content);
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
|
|
289
343
|
@end
|
|
@@ -23,6 +23,16 @@ NS_ASSUME_NONNULL_BEGIN
|
|
|
23
23
|
outputSampleRateHz:(int)outputSampleRateHz
|
|
24
24
|
error:(NSError **)error;
|
|
25
25
|
|
|
26
|
+
/**
|
|
27
|
+
* Decode audio to mono float samples (approx. [-1, 1]) and sample rate.
|
|
28
|
+
* targetSampleRateHz <= 0 keeps the decoded stream rate.
|
|
29
|
+
*/
|
|
30
|
+
+ (BOOL)decodeAudioFileToFloatSamples:(NSString *)inputPath
|
|
31
|
+
targetSampleRateHz:(int)targetSampleRateHz
|
|
32
|
+
outSamples:(NSArray<NSNumber *> **)outSamples
|
|
33
|
+
outSampleRate:(int *)outSampleRate
|
|
34
|
+
error:(NSError **)error;
|
|
35
|
+
|
|
26
36
|
@end
|
|
27
37
|
|
|
28
38
|
NS_ASSUME_NONNULL_END
|