react-native-sherpa-onnx 0.3.6 → 0.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (222) hide show
  1. package/LICENSE +1 -0
  2. package/README.md +89 -21
  3. package/SherpaOnnx.podspec +3 -0
  4. package/THIRD_PARTY_LICENSES/README.md +62 -0
  5. package/THIRD_PARTY_LICENSES/ffmpeg.txt +502 -0
  6. package/THIRD_PARTY_LICENSES/libarchive.txt +65 -0
  7. package/THIRD_PARTY_LICENSES/nvidia_omla.txt +181 -0
  8. package/THIRD_PARTY_LICENSES/onnxruntime.txt +21 -0
  9. package/THIRD_PARTY_LICENSES/opus.txt +44 -0
  10. package/THIRD_PARTY_LICENSES/sherpa-onnx.txt +201 -0
  11. package/THIRD_PARTY_LICENSES/shine.txt +482 -0
  12. package/THIRD_PARTY_LICENSES/zstd.txt +30 -0
  13. package/android/build.gradle +7 -3
  14. package/android/prebuilt-download.gradle +344 -152
  15. package/android/prebuilt-versions.gradle +1 -1
  16. package/android/src/main/assets/model_licenses/asr-models-license-status.csv +409 -0
  17. package/android/src/main/assets/model_licenses/qnn-asr-models-license-status.csv +695 -0
  18. package/android/src/main/assets/model_licenses/tts-models-license-status.csv +596 -0
  19. package/android/src/main/cpp/CMakeLists.txt +28 -10
  20. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.cpp +2 -2
  21. package/android/src/main/cpp/jni/audio/sherpa-onnx-audio-convert-jni.cpp +268 -2
  22. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +6 -2
  23. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-tts.cpp +4 -2
  24. package/android/src/main/java/com/sherpaonnx/SherpaOnnxArchiveHelper.kt +40 -10
  25. package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +99 -0
  26. package/android/src/main/java/com/sherpaonnx/SherpaOnnxOnlineSttHelper.kt +4 -1
  27. package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +112 -97
  28. package/ios/Resources/model_licenses/asr-models-license-status.csv +409 -0
  29. package/ios/Resources/model_licenses/qnn-asr-models-license-status.csv +695 -0
  30. package/ios/Resources/model_licenses/tts-models-license-status.csv +596 -0
  31. package/ios/SherpaOnnx+OnlineSTT.mm +2 -0
  32. package/ios/SherpaOnnx+PcmLiveStream.mm +2 -29
  33. package/ios/SherpaOnnx+TTS.mm +178 -20
  34. package/ios/SherpaOnnx.mm +54 -0
  35. package/ios/SherpaOnnxAudioConvert.h +10 -0
  36. package/ios/SherpaOnnxAudioConvert.mm +257 -1
  37. package/ios/archive/sherpa-onnx-archive-helper.h +3 -0
  38. package/ios/archive/sherpa-onnx-archive-helper.mm +39 -6
  39. package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +13 -2
  40. package/ios/model_detect/sherpa-onnx-validate-tts.mm +4 -2
  41. package/ios/online_stt/sherpa-onnx-online-stt-wrapper.h +1 -0
  42. package/ios/online_stt/sherpa-onnx-online-stt-wrapper.mm +4 -0
  43. package/ios/tts/sherpa-onnx-tts-wrapper.h +37 -0
  44. package/ios/tts/sherpa-onnx-tts-wrapper.mm +149 -3
  45. package/lib/module/NativeSherpaOnnx.js.map +1 -1
  46. package/lib/module/audio/index.js +8 -0
  47. package/lib/module/audio/index.js.map +1 -1
  48. package/lib/module/download/ModelDownloadManager.js +10 -929
  49. package/lib/module/download/ModelDownloadManager.js.map +1 -1
  50. package/lib/module/download/activeModelOperations.js +26 -0
  51. package/lib/module/download/activeModelOperations.js.map +1 -0
  52. package/lib/module/download/background-downloader.d.js +2 -0
  53. package/lib/module/download/background-downloader.d.js.map +1 -0
  54. package/lib/module/download/bulkPurge.js +72 -0
  55. package/lib/module/download/bulkPurge.js.map +1 -0
  56. package/lib/module/download/checksumPrompt.js +19 -0
  57. package/lib/module/download/checksumPrompt.js.map +1 -0
  58. package/lib/module/download/constants.js +7 -0
  59. package/lib/module/download/constants.js.map +1 -0
  60. package/lib/module/download/downloadEvents.js +35 -0
  61. package/lib/module/download/downloadEvents.js.map +1 -0
  62. package/lib/module/download/downloadTask.js +385 -0
  63. package/lib/module/download/downloadTask.js.map +1 -0
  64. package/lib/module/download/ensureModel.js +89 -0
  65. package/lib/module/download/ensureModel.js.map +1 -0
  66. package/lib/module/download/index.js +4 -4
  67. package/lib/module/download/index.js.map +1 -1
  68. package/lib/module/download/localModels.js +151 -0
  69. package/lib/module/download/localModels.js.map +1 -0
  70. package/lib/module/download/modelExtraction.js +174 -0
  71. package/lib/module/download/modelExtraction.js.map +1 -0
  72. package/lib/module/download/paths.js +98 -0
  73. package/lib/module/download/paths.js.map +1 -0
  74. package/lib/module/download/postDownloadProcessing.js +206 -0
  75. package/lib/module/download/postDownloadProcessing.js.map +1 -0
  76. package/lib/module/download/protectedModelKeys.js +31 -0
  77. package/lib/module/download/protectedModelKeys.js.map +1 -0
  78. package/lib/module/download/registry.js +267 -0
  79. package/lib/module/download/registry.js.map +1 -0
  80. package/lib/module/download/retry.js +59 -0
  81. package/lib/module/download/retry.js.map +1 -0
  82. package/lib/module/download/types.js +17 -0
  83. package/lib/module/download/types.js.map +1 -0
  84. package/lib/module/download/validation.js +101 -5
  85. package/lib/module/download/validation.js.map +1 -1
  86. package/lib/module/{download → extraction}/extractTarBz2.js +3 -1
  87. package/lib/module/extraction/extractTarBz2.js.map +1 -0
  88. package/lib/module/{download → extraction}/extractTarZst.js +3 -1
  89. package/lib/module/extraction/extractTarZst.js.map +1 -0
  90. package/lib/module/extraction/index.js +3 -4
  91. package/lib/module/extraction/index.js.map +1 -1
  92. package/lib/module/index.js +1 -1
  93. package/lib/module/index.js.map +1 -1
  94. package/lib/module/licenses.js +63 -0
  95. package/lib/module/licenses.js.map +1 -0
  96. package/lib/module/stt/index.js +16 -2
  97. package/lib/module/stt/index.js.map +1 -1
  98. package/lib/module/stt/streaming.js +2 -0
  99. package/lib/module/stt/streaming.js.map +1 -1
  100. package/lib/module/stt/streamingTypes.js.map +1 -1
  101. package/lib/module/stt/types.js.map +1 -1
  102. package/lib/module/tts/index.js +20 -2
  103. package/lib/module/tts/index.js.map +1 -1
  104. package/lib/module/tts/streaming.js +4 -0
  105. package/lib/module/tts/streaming.js.map +1 -1
  106. package/lib/module/tts/types.js.map +1 -1
  107. package/lib/module/utils.js +16 -1
  108. package/lib/module/utils.js.map +1 -1
  109. package/lib/typescript/src/NativeSherpaOnnx.d.ts +33 -5
  110. package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
  111. package/lib/typescript/src/audio/index.d.ts +10 -0
  112. package/lib/typescript/src/audio/index.d.ts.map +1 -1
  113. package/lib/typescript/src/download/ModelDownloadManager.d.ts +10 -108
  114. package/lib/typescript/src/download/ModelDownloadManager.d.ts.map +1 -1
  115. package/lib/typescript/src/download/activeModelOperations.d.ts +6 -0
  116. package/lib/typescript/src/download/activeModelOperations.d.ts.map +1 -0
  117. package/lib/typescript/src/download/bulkPurge.d.ts +14 -0
  118. package/lib/typescript/src/download/bulkPurge.d.ts.map +1 -0
  119. package/lib/typescript/src/download/checksumPrompt.d.ts +3 -0
  120. package/lib/typescript/src/download/checksumPrompt.d.ts.map +1 -0
  121. package/lib/typescript/src/download/constants.d.ts +5 -0
  122. package/lib/typescript/src/download/constants.d.ts.map +1 -0
  123. package/lib/typescript/src/download/downloadEvents.d.ts +6 -0
  124. package/lib/typescript/src/download/downloadEvents.d.ts.map +1 -0
  125. package/lib/typescript/src/download/downloadTask.d.ts +20 -0
  126. package/lib/typescript/src/download/downloadTask.d.ts.map +1 -0
  127. package/lib/typescript/src/download/ensureModel.d.ts +26 -0
  128. package/lib/typescript/src/download/ensureModel.d.ts.map +1 -0
  129. package/lib/typescript/src/download/index.d.ts +7 -7
  130. package/lib/typescript/src/download/index.d.ts.map +1 -1
  131. package/lib/typescript/src/download/localModels.d.ts +15 -0
  132. package/lib/typescript/src/download/localModels.d.ts.map +1 -0
  133. package/lib/typescript/src/download/modelExtraction.d.ts +36 -0
  134. package/lib/typescript/src/download/modelExtraction.d.ts.map +1 -0
  135. package/lib/typescript/src/download/paths.d.ts +28 -0
  136. package/lib/typescript/src/download/paths.d.ts.map +1 -0
  137. package/lib/typescript/src/download/postDownloadProcessing.d.ts +19 -0
  138. package/lib/typescript/src/download/postDownloadProcessing.d.ts.map +1 -0
  139. package/lib/typescript/src/download/protectedModelKeys.d.ts +6 -0
  140. package/lib/typescript/src/download/protectedModelKeys.d.ts.map +1 -0
  141. package/lib/typescript/src/download/registry.d.ts +14 -0
  142. package/lib/typescript/src/download/registry.d.ts.map +1 -0
  143. package/lib/typescript/src/download/retry.d.ts +15 -0
  144. package/lib/typescript/src/download/retry.d.ts.map +1 -0
  145. package/lib/typescript/src/download/types.d.ts +96 -0
  146. package/lib/typescript/src/download/types.d.ts.map +1 -0
  147. package/lib/typescript/src/download/validation.d.ts +19 -0
  148. package/lib/typescript/src/download/validation.d.ts.map +1 -1
  149. package/lib/typescript/src/extraction/extractTarBz2.d.ts.map +1 -0
  150. package/lib/typescript/src/extraction/extractTarZst.d.ts.map +1 -0
  151. package/lib/typescript/src/index.d.ts +1 -0
  152. package/lib/typescript/src/index.d.ts.map +1 -1
  153. package/lib/typescript/src/licenses.d.ts +10 -0
  154. package/lib/typescript/src/licenses.d.ts.map +1 -0
  155. package/lib/typescript/src/stt/index.d.ts +4 -1
  156. package/lib/typescript/src/stt/index.d.ts.map +1 -1
  157. package/lib/typescript/src/stt/streaming.d.ts.map +1 -1
  158. package/lib/typescript/src/stt/streamingTypes.d.ts +5 -0
  159. package/lib/typescript/src/stt/streamingTypes.d.ts.map +1 -1
  160. package/lib/typescript/src/stt/types.d.ts +3 -1
  161. package/lib/typescript/src/stt/types.d.ts.map +1 -1
  162. package/lib/typescript/src/tts/index.d.ts +3 -1
  163. package/lib/typescript/src/tts/index.d.ts.map +1 -1
  164. package/lib/typescript/src/tts/streaming.d.ts.map +1 -1
  165. package/lib/typescript/src/tts/types.d.ts +6 -5
  166. package/lib/typescript/src/tts/types.d.ts.map +1 -1
  167. package/lib/typescript/src/utils.d.ts +5 -0
  168. package/lib/typescript/src/utils.d.ts.map +1 -1
  169. package/package.json +6 -1
  170. package/scripts/{check-model-csvs.sh → ci/check-model-csvs.sh} +9 -2
  171. package/scripts/ci/collect_all_sherpa_model_streams.sh +101 -0
  172. package/scripts/ci/collect_one_sherpa_release_stream.sh +189 -0
  173. package/scripts/ci/sherpa_asr_model_release_streams.json +21 -0
  174. package/scripts/ci/sherpa_tts_model_release_streams.json +13 -0
  175. package/scripts/ci/update_model_license_csv.sh +765 -0
  176. package/scripts/setup-ios-framework.sh +14 -11
  177. package/scripts/update_commercial_use.js +73 -0
  178. package/src/NativeSherpaOnnx.ts +36 -5
  179. package/src/audio/index.ts +20 -0
  180. package/src/download/ModelDownloadManager.ts +55 -1343
  181. package/src/download/activeModelOperations.ts +38 -0
  182. package/src/download/background-downloader.d.ts +43 -0
  183. package/src/download/bulkPurge.ts +102 -0
  184. package/src/download/checksumPrompt.ts +25 -0
  185. package/src/download/constants.ts +5 -0
  186. package/src/download/downloadEvents.ts +55 -0
  187. package/src/download/downloadTask.ts +497 -0
  188. package/src/download/ensureModel.ts +124 -0
  189. package/src/download/index.ts +19 -4
  190. package/src/download/localModels.ts +234 -0
  191. package/src/download/modelExtraction.ts +244 -0
  192. package/src/download/paths.ts +134 -0
  193. package/src/download/postDownloadProcessing.ts +292 -0
  194. package/src/download/protectedModelKeys.ts +30 -0
  195. package/src/download/registry.ts +404 -0
  196. package/src/download/retry.ts +76 -0
  197. package/src/download/types.ts +120 -0
  198. package/src/download/validation.ts +114 -8
  199. package/src/{download → extraction}/extractTarBz2.ts +3 -1
  200. package/src/{download → extraction}/extractTarZst.ts +3 -1
  201. package/src/extraction/index.ts +3 -7
  202. package/src/index.tsx +1 -0
  203. package/src/licenses.ts +100 -0
  204. package/src/stt/index.ts +20 -2
  205. package/src/stt/streaming.ts +3 -0
  206. package/src/stt/streamingTypes.ts +5 -0
  207. package/src/stt/types.ts +3 -1
  208. package/src/tts/index.ts +30 -2
  209. package/src/tts/streaming.ts +10 -0
  210. package/src/tts/types.ts +6 -5
  211. package/src/utils.ts +22 -1
  212. package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -1
  213. package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -1
  214. package/android/src/main/cpp/jni/tts/sherpa-onnx-tts-zipvoice-jni.cpp +0 -301
  215. package/android/src/main/java/com/sherpaonnx/ZipvoiceTtsWrapper.kt +0 -187
  216. package/lib/module/download/extractTarBz2.js.map +0 -1
  217. package/lib/module/download/extractTarZst.js.map +0 -1
  218. package/lib/typescript/src/download/extractTarBz2.d.ts.map +0 -1
  219. package/lib/typescript/src/download/extractTarZst.d.ts.map +0 -1
  220. package/scripts/check-qnn-support.sh +0 -78
  221. /package/lib/typescript/src/{download → extraction}/extractTarBz2.d.ts +0 -0
  222. /package/lib/typescript/src/{download → extraction}/extractTarZst.d.ts +0 -0
@@ -70,6 +70,7 @@ static sherpaonnx::OnlineSttWrapper* getOnlineSttInstanceForStream(NSString* str
70
70
  NSString *provider = options.provider();
71
71
  NSString *ruleFsts = options.ruleFsts();
72
72
  NSString *ruleFars = options.ruleFars();
73
+ auto dither = options.dither();
73
74
  auto blankPenalty = options.blankPenalty();
74
75
  auto debug = options.debug();
75
76
  auto rule1MustContainNonSilence = options.rule1MustContainNonSilence();
@@ -102,6 +103,7 @@ static sherpaonnx::OnlineSttWrapper* getOnlineSttInstanceForStream(NSString* str
102
103
  provider != nil ? [provider UTF8String] : "cpu",
103
104
  ruleFsts != nil ? [ruleFsts UTF8String] : "",
104
105
  ruleFars != nil ? [ruleFars UTF8String] : "",
106
+ dither.has_value() ? (float)dither.value() : 0.f,
105
107
  blankPenalty.has_value() ? (float)blankPenalty.value() : 0.f,
106
108
  debug.has_value() && debug.value(),
107
109
  rule1MustContainNonSilence.has_value() && rule1MustContainNonSilence.value(),
@@ -143,37 +143,10 @@ static void pcmLiveStopQueue(void) {
143
143
 
144
144
  @implementation SherpaOnnx (PcmLiveStream)
145
145
 
146
- - (void)startPcmLiveStream:(id __unsafe_unretained)optionsArg
146
+ #if __has_include(<SherpaOnnxSpec/SherpaOnnxSpec.h>)
147
+ - (void)startPcmLiveStream:(JS::NativeSherpaOnnx::SpecStartPcmLiveStreamOptions &)options
147
148
  resolve:(RCTPromiseResolveBlock)resolve
148
149
  reject:(RCTPromiseRejectBlock)reject
149
- {
150
- int targetRate = 16000;
151
- UInt32 bufferSizeFrames = 0;
152
-
153
- // Parse optionsArg coming from JS (fallback / non-codegen path).
154
- if ([optionsArg isKindOfClass:[NSDictionary class]]) {
155
- NSDictionary *dict = (NSDictionary *)optionsArg;
156
-
157
- id sampleRateValue = dict[@"sampleRate"];
158
- if ([sampleRateValue respondsToSelector:@selector(intValue)]) {
159
- int v = (int)[sampleRateValue intValue];
160
- if (v > 0) targetRate = v;
161
- }
162
-
163
- id bufferSizeValue = dict[@"bufferSizeFrames"];
164
- if ([bufferSizeValue respondsToSelector:@selector(doubleValue)]) {
165
- double v = [bufferSizeValue doubleValue];
166
- if (v > 0) bufferSizeFrames = (UInt32)v;
167
- }
168
- }
169
-
170
- [self _startPcmLiveStreamWithTargetRate:targetRate bufferSizeFrames:bufferSizeFrames resolve:resolve reject:reject];
171
- }
172
-
173
- #if __has_include(<SherpaOnnxSpec/SherpaOnnxSpec.h>)
174
- - (void)startPcmLiveStreamWithOptions:(JS::NativeSherpaOnnx::SpecStartPcmLiveStreamOptions &)options
175
- resolve:(RCTPromiseResolveBlock)resolve
176
- reject:(RCTPromiseRejectBlock)reject
177
150
  {
178
151
  int targetRate = 16000;
179
152
  if (options.sampleRate()) {
@@ -17,6 +17,7 @@
17
17
  #include <condition_variable>
18
18
  #include <memory>
19
19
  #include <mutex>
20
+ #include <optional>
20
21
  #include <sstream>
21
22
  #include <string>
22
23
  #include <unordered_map>
@@ -55,6 +56,7 @@ static NSString *ttsModelKindToNSString(sherpaonnx::TtsModelKind kind) {
55
56
  case K::kMatcha: return @"matcha";
56
57
  case K::kKokoro: return @"kokoro";
57
58
  case K::kKitten: return @"kitten";
59
+ case K::kPocket: return @"pocket";
58
60
  case K::kZipvoice: return @"zipvoice";
59
61
  default: return @"unknown";
60
62
  }
@@ -73,8 +75,60 @@ std::vector<std::string> SplitTtsTokens(const std::string &text) {
73
75
  }
74
76
  return tokens;
75
77
  }
78
+
79
+ /** When options omit numSteps, matches Android SherpaOnnxTtsHelper / upstream GenerationConfig default. */
80
+ constexpr int32_t kDefaultVoiceCloneNumSteps = 5;
81
+
82
+ /** Non-null optional when referenceAudio is non-empty array and referenceSampleRate > 0. */
83
+ static std::optional<sherpaonnx::VoiceCloneOptions> VoiceCloneOptionsFromNSDictionary(NSDictionary *options, int32_t defaultNumSteps) {
84
+ if (options == nil) return std::nullopt;
85
+ NSArray *refArr = options[@"referenceAudio"];
86
+ if (![refArr isKindOfClass:[NSArray class]] || [refArr count] == 0) return std::nullopt;
87
+ NSNumber *srNum = options[@"referenceSampleRate"];
88
+ if (srNum == nil || [srNum doubleValue] <= 0) return std::nullopt;
89
+
90
+ sherpaonnx::VoiceCloneOptions vo;
91
+ vo.reference_sample_rate = static_cast<int32_t>([srNum doubleValue]);
92
+ vo.reference_audio.reserve([refArr count]);
93
+ for (id elem in refArr) {
94
+ float v = 0.f;
95
+ if ([elem isKindOfClass:[NSNumber class]]) {
96
+ v = static_cast<float>([(NSNumber *)elem doubleValue]);
97
+ }
98
+ vo.reference_audio.push_back(v);
99
+ }
100
+ NSString *rt = options[@"referenceText"];
101
+ if (rt != nil && [rt length] > 0) {
102
+ vo.reference_text = std::string([rt UTF8String]);
103
+ }
104
+ if (options[@"numSteps"] != nil) {
105
+ vo.num_steps = static_cast<int32_t>([options[@"numSteps"] doubleValue]);
106
+ } else {
107
+ vo.num_steps = defaultNumSteps;
108
+ }
109
+ if (options[@"silenceScale"] != nil) {
110
+ vo.silence_scale = static_cast<float>([options[@"silenceScale"] doubleValue]);
111
+ }
112
+ id extra = options[@"extra"];
113
+ if ([extra isKindOfClass:[NSDictionary class]]) {
114
+ NSDictionary *ex = (NSDictionary *)extra;
115
+ for (NSString *k in ex) {
116
+ id v = ex[k];
117
+ if ([v isKindOfClass:[NSString class]]) {
118
+ vo.extra[std::string([k UTF8String])] = std::string([(NSString *)v UTF8String]);
119
+ }
120
+ }
121
+ }
122
+ return vo;
123
+ }
124
+
125
+ static bool NSDictionaryHasValidReferenceAudio(NSDictionary *options) {
126
+ auto o = VoiceCloneOptionsFromNSDictionary(options, 1);
127
+ return o.has_value() && !o->reference_audio.empty() && o->reference_sample_rate > 0;
76
128
  }
77
129
 
130
+ } // namespace
131
+
78
132
  @implementation SherpaOnnx (TTS)
79
133
 
80
134
  - (void)initializeTts:(NSString *)instanceId
@@ -195,8 +249,10 @@ std::vector<std::string> SplitTtsTokens(const std::string &text) {
195
249
 
196
250
  resolve(resultDict);
197
251
  } else {
198
- NSString *errorMsg = @"Failed to initialize TTS";
199
- RCTLogError(@"%@", errorMsg);
252
+ NSString *errorMsg = result.error.empty()
253
+ ? @"Failed to initialize TTS"
254
+ : [NSString stringWithUTF8String:result.error.c_str()];
255
+ RCTLogError(@"TTS init failed: %@", errorMsg);
200
256
  reject(@"TTS_INIT_ERROR", errorMsg, nil);
201
257
  }
202
258
  } @catch (NSException *exception) {
@@ -408,10 +464,37 @@ std::vector<std::string> SplitTtsTokens(const std::string &text) {
408
464
  @try {
409
465
  std::string textStr = [text UTF8String];
410
466
 
467
+ using Kind = sherpaonnx::TtsModelKind;
468
+ Kind kind = wrapper->getModelKind();
469
+ bool hasRef = NSDictionaryHasValidReferenceAudio(options);
470
+
471
+ if (hasRef && kind != Kind::kZipvoice && kind != Kind::kPocket) {
472
+ reject(@"TTS_GENERATE_ERROR", @"Reference audio is only supported for Zipvoice and Pocket TTS.", nil);
473
+ return;
474
+ }
475
+ if (kind == Kind::kPocket && !hasRef) {
476
+ reject(@"TTS_GENERATE_ERROR", @"Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate (> 0) in options.", nil);
477
+ return;
478
+ }
479
+ if (hasRef && kind == Kind::kZipvoice) {
480
+ NSString *rt = options[@"referenceText"];
481
+ NSString *trimmed = rt != nil ? [rt stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]] : @"";
482
+ if ([trimmed length] == 0) {
483
+ reject(@"TTS_GENERATE_ERROR", @"Zipvoice voice cloning requires non-empty referenceText (transcript of reference audio).", nil);
484
+ return;
485
+ }
486
+ }
487
+
488
+ std::optional<sherpaonnx::VoiceCloneOptions> cloneOpt;
489
+ if (hasRef) {
490
+ cloneOpt = VoiceCloneOptionsFromNSDictionary(options, kDefaultVoiceCloneNumSteps);
491
+ }
492
+
411
493
  auto result = wrapper->generate(
412
494
  textStr,
413
495
  static_cast<int32_t>(sid),
414
- static_cast<float>(speed)
496
+ static_cast<float>(speed),
497
+ cloneOpt
415
498
  );
416
499
 
417
500
  if (result.samples.empty() || result.sampleRate == 0) {
@@ -469,10 +552,37 @@ std::vector<std::string> SplitTtsTokens(const std::string &text) {
469
552
  @try {
470
553
  std::string textStr = [text UTF8String];
471
554
 
555
+ using Kind = sherpaonnx::TtsModelKind;
556
+ Kind kind = wrapper->getModelKind();
557
+ bool hasRef = NSDictionaryHasValidReferenceAudio(options);
558
+
559
+ if (hasRef && kind != Kind::kZipvoice && kind != Kind::kPocket) {
560
+ reject(@"TTS_GENERATE_ERROR", @"Reference audio is only supported for Zipvoice and Pocket TTS.", nil);
561
+ return;
562
+ }
563
+ if (kind == Kind::kPocket && !hasRef) {
564
+ reject(@"TTS_GENERATE_ERROR", @"Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate (> 0) in options.", nil);
565
+ return;
566
+ }
567
+ if (hasRef && kind == Kind::kZipvoice) {
568
+ NSString *rt = options[@"referenceText"];
569
+ NSString *trimmed = rt != nil ? [rt stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]] : @"";
570
+ if ([trimmed length] == 0) {
571
+ reject(@"TTS_GENERATE_ERROR", @"Zipvoice voice cloning requires non-empty referenceText (transcript of reference audio).", nil);
572
+ return;
573
+ }
574
+ }
575
+
576
+ std::optional<sherpaonnx::VoiceCloneOptions> cloneOpt;
577
+ if (hasRef) {
578
+ cloneOpt = VoiceCloneOptionsFromNSDictionary(options, kDefaultVoiceCloneNumSteps);
579
+ }
580
+
472
581
  auto result = wrapper->generate(
473
582
  textStr,
474
583
  static_cast<int32_t>(sid),
475
- static_cast<float>(speed)
584
+ static_cast<float>(speed),
585
+ cloneOpt
476
586
  );
477
587
 
478
588
  if (result.samples.empty() || result.sampleRate == 0) {
@@ -487,22 +597,32 @@ std::vector<std::string> SplitTtsTokens(const std::string &text) {
487
597
  [samplesArray addObject:@(sample)];
488
598
  }
489
599
 
490
- std::vector<std::string> tokens = SplitTtsTokens(textStr);
491
600
  NSMutableArray *subtitlesArray = [NSMutableArray array];
492
- if (!tokens.empty()) {
493
- double totalSeconds = static_cast<double>(result.samples.size()) /
494
- static_cast<double>(result.sampleRate);
495
- double perToken = totalSeconds / static_cast<double>(tokens.size());
496
-
497
- for (size_t i = 0; i < tokens.size(); ++i) {
498
- double start = perToken * static_cast<double>(i);
499
- double end = perToken * static_cast<double>(i + 1);
500
- NSDictionary *item = @{
501
- @"text": [NSString stringWithUTF8String:tokens[i].c_str()],
502
- @"start": @(start),
503
- @"end": @(end)
504
- };
505
- [subtitlesArray addObject:item];
601
+ if (hasRef && !result.samples.empty() && result.sampleRate > 0) {
602
+ double durationSec = static_cast<double>(result.samples.size()) / static_cast<double>(result.sampleRate);
603
+ NSDictionary *subtitleMap = @{
604
+ @"text": text,
605
+ @"start": @0.0,
606
+ @"end": @(durationSec)
607
+ };
608
+ [subtitlesArray addObject:subtitleMap];
609
+ } else {
610
+ std::vector<std::string> tokens = SplitTtsTokens(textStr);
611
+ if (!tokens.empty()) {
612
+ double totalSeconds = static_cast<double>(result.samples.size()) /
613
+ static_cast<double>(result.sampleRate);
614
+ double perToken = totalSeconds / static_cast<double>(tokens.size());
615
+
616
+ for (size_t i = 0; i < tokens.size(); ++i) {
617
+ double start = perToken * static_cast<double>(i);
618
+ double end = perToken * static_cast<double>(i + 1);
619
+ NSDictionary *item = @{
620
+ @"text": [NSString stringWithUTF8String:tokens[i].c_str()],
621
+ @"start": @(start),
622
+ @"end": @(end)
623
+ };
624
+ [subtitlesArray addObject:item];
625
+ }
506
626
  }
507
627
  }
508
628
 
@@ -556,6 +676,43 @@ std::vector<std::string> SplitTtsTokens(const std::string &text) {
556
676
  instRef->streamRunning.store(true);
557
677
  }
558
678
 
679
+ using Kind = sherpaonnx::TtsModelKind;
680
+ Kind streamKind = instRef->wrapper->getModelKind();
681
+ bool streamHasRef = NSDictionaryHasValidReferenceAudio(options);
682
+
683
+ if (streamKind == Kind::kPocket && !streamHasRef) {
684
+ std::lock_guard<std::mutex> lock(g_tts_mutex);
685
+ auto it2 = g_tts_instances.find([instanceId UTF8String]);
686
+ if (it2 != g_tts_instances.end()) {
687
+ it2->second->streamRunning.store(false);
688
+ }
689
+ reject(@"TTS_STREAM_ERROR", @"Pocket TTS requires reference audio for voice cloning. Pass referenceAudio and referenceSampleRate (> 0) in options.", nil);
690
+ return;
691
+ }
692
+ if (streamHasRef && streamKind == Kind::kZipvoice) {
693
+ std::lock_guard<std::mutex> lock(g_tts_mutex);
694
+ auto it2 = g_tts_instances.find([instanceId UTF8String]);
695
+ if (it2 != g_tts_instances.end()) {
696
+ it2->second->streamRunning.store(false);
697
+ }
698
+ reject(@"TTS_STREAM_ERROR", @"Streaming with reference audio not supported for Zipvoice", nil);
699
+ return;
700
+ }
701
+ if (streamHasRef && streamKind != Kind::kPocket) {
702
+ std::lock_guard<std::mutex> lock(g_tts_mutex);
703
+ auto it2 = g_tts_instances.find([instanceId UTF8String]);
704
+ if (it2 != g_tts_instances.end()) {
705
+ it2->second->streamRunning.store(false);
706
+ }
707
+ reject(@"TTS_STREAM_ERROR", @"Reference audio streaming is only supported for Pocket TTS.", nil);
708
+ return;
709
+ }
710
+
711
+ std::optional<sherpaonnx::VoiceCloneOptions> streamCloneOpt;
712
+ if (streamHasRef) {
713
+ streamCloneOpt = VoiceCloneOptionsFromNSDictionary(options, kDefaultVoiceCloneNumSteps);
714
+ }
715
+
559
716
  std::string textStr = [text UTF8String];
560
717
  int32_t sampleRate = instRef->wrapper->getSampleRate();
561
718
  NSString *instanceIdCopy = [instanceId copy];
@@ -595,7 +752,8 @@ std::vector<std::string> SplitTtsTokens(const std::string &text) {
595
752
  });
596
753
 
597
754
  return instRef->streamCancelled.load() ? 0 : 1;
598
- }
755
+ },
756
+ streamCloneOpt
599
757
  );
600
758
  } @catch (NSException *exception) {
601
759
  NSString *errorMsg = [NSString stringWithFormat:@"TTS streaming failed: %@", exception.reason];
package/ios/SherpaOnnx.mm CHANGED
@@ -189,6 +189,14 @@
189
189
  resolve(nil);
190
190
  }
191
191
 
192
+ - (void)cancelExtractBySourcePath:(NSString *)sourcePath
193
+ resolve:(RCTPromiseResolveBlock)resolve
194
+ reject:(RCTPromiseRejectBlock)reject
195
+ {
196
+ [SherpaOnnxArchiveHelper cancelExtractForPath:sourcePath];
197
+ resolve(nil);
198
+ }
199
+
192
200
  - (void)computeFileSha256:(NSString *)filePath
193
201
  resolve:(RCTPromiseResolveBlock)resolve
194
202
  reject:(RCTPromiseRejectBlock)reject
@@ -271,6 +279,26 @@
271
279
  resolve(nil);
272
280
  }
273
281
 
282
+ - (void)decodeAudioFileToFloatSamples:(NSString *)inputPath
283
+ targetSampleRateHz:(NSNumber *)targetSampleRateHz
284
+ resolve:(RCTPromiseResolveBlock)resolve
285
+ reject:(RCTPromiseRejectBlock)reject
286
+ {
287
+ NSArray<NSNumber *> *samples = nil;
288
+ int sr = 0;
289
+ NSError *error = nil;
290
+ int rate = targetSampleRateHz != nil ? targetSampleRateHz.intValue : 0;
291
+ if (![SherpaOnnxAudioConvert decodeAudioFileToFloatSamples:inputPath
292
+ targetSampleRateHz:rate
293
+ outSamples:&samples
294
+ outSampleRate:&sr
295
+ error:&error]) {
296
+ reject(@"DECODE_ERROR", error ? error.localizedDescription : @"Failed to decode audio", error);
297
+ return;
298
+ }
299
+ resolve(@{ @"samples": samples ?: @[], @"sampleRate": @(sr) });
300
+ }
301
+
274
302
  - (void)getAvailableProviders:(RCTPromiseResolveBlock)resolve
275
303
  reject:(RCTPromiseRejectBlock)reject
276
304
  {
@@ -286,4 +314,30 @@
286
314
  }
287
315
  }
288
316
 
317
+ - (void)readAssetFileAsUtf8:(NSString *)assetPath
318
+ resolve:(RCTPromiseResolveBlock)resolve
319
+ reject:(RCTPromiseRejectBlock)reject
320
+ {
321
+ // Validate assetPath to prevent path traversal: reject any path that
322
+ // contains "..", is absolute, or uses backslashes.
323
+ if ([assetPath containsString:@".."] ||
324
+ [assetPath hasPrefix:@"/"] ||
325
+ [assetPath hasPrefix:@"\\"] ||
326
+ [assetPath containsString:@"\\"]) {
327
+ reject(@"ASSET_READ_ERROR",
328
+ [NSString stringWithFormat:@"Invalid asset path: %@", assetPath],
329
+ nil);
330
+ return;
331
+ }
332
+ NSString *resourcePath = [[NSBundle mainBundle] resourcePath];
333
+ NSString *fullPath = [resourcePath stringByAppendingPathComponent:assetPath];
334
+ NSError *error = nil;
335
+ NSString *content = [NSString stringWithContentsOfFile:fullPath encoding:NSUTF8StringEncoding error:&error];
336
+ if (error) {
337
+ reject(@"ASSET_READ_ERROR", [NSString stringWithFormat:@"Failed to read asset %@: %@", assetPath, error.localizedDescription], error);
338
+ } else {
339
+ resolve(content);
340
+ }
341
+ }
342
+
289
343
  @end
@@ -23,6 +23,16 @@ NS_ASSUME_NONNULL_BEGIN
23
23
  outputSampleRateHz:(int)outputSampleRateHz
24
24
  error:(NSError **)error;
25
25
 
26
+ /**
27
+ * Decode audio to mono float samples (approx. [-1, 1]) and sample rate.
28
+ * targetSampleRateHz <= 0 keeps the decoded stream rate.
29
+ */
30
+ + (BOOL)decodeAudioFileToFloatSamples:(NSString *)inputPath
31
+ targetSampleRateHz:(int)targetSampleRateHz
32
+ outSamples:(NSArray<NSNumber *> **)outSamples
33
+ outSampleRate:(int *)outSampleRate
34
+ error:(NSError **)error;
35
+
26
36
  @end
27
37
 
28
38
  NS_ASSUME_NONNULL_END