react-native-sherpa-onnx 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. package/README.md +232 -236
  2. package/SherpaOnnx.podspec +68 -64
  3. package/android/build.gradle +182 -192
  4. package/android/codegen.gradle +57 -0
  5. package/android/prebuilt-download.gradle +428 -0
  6. package/android/prebuilt-versions.gradle +43 -0
  7. package/android/proguard-rules.pro +10 -0
  8. package/android/src/main/assets/testModels/add_mul_add.onnx +28 -0
  9. package/android/src/main/assets/testModels/nnapi_internal_uint8_support.onnx +0 -0
  10. package/android/src/main/assets/testModels/qnn_multi_ctx_embed.onnx +0 -0
  11. package/android/src/main/cpp/CMakeLists.txt +166 -129
  12. package/android/src/main/cpp/CMakePresets.json +54 -0
  13. package/android/src/main/cpp/crypto/sha256.cpp +174 -0
  14. package/android/src/main/cpp/crypto/sha256.h +16 -0
  15. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.cpp +404 -0
  16. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.h +56 -0
  17. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-jni.cpp +181 -0
  18. package/android/src/main/cpp/jni/audio/sherpa-onnx-audio-convert-jni.cpp +888 -0
  19. package/{ios → android/src/main/cpp/jni/model_detect}/sherpa-onnx-common.h +18 -18
  20. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.cpp +86 -0
  21. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.h +20 -0
  22. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.cpp +423 -0
  23. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.h +55 -0
  24. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-stt.cpp +399 -0
  25. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +238 -0
  26. package/{ios → android/src/main/cpp/jni/model_detect}/sherpa-onnx-model-detect.h +122 -89
  27. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.cpp +99 -0
  28. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.h +16 -0
  29. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.cpp +78 -0
  30. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.h +16 -0
  31. package/android/src/main/cpp/jni/module/sherpa-onnx-module-jni.cpp +190 -0
  32. package/android/src/main/cpp/jni/tts/sherpa-onnx-tts-zipvoice-jni.cpp +301 -0
  33. package/android/src/main/java/com/sherpaonnx/SherpaOnnxArchiveHelper.kt +94 -0
  34. package/android/src/main/java/com/sherpaonnx/{SherpaOnnxCoreHelper.kt → SherpaOnnxAssetHelper.kt} +350 -236
  35. package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +791 -483
  36. package/android/src/main/java/com/sherpaonnx/SherpaOnnxSttHelper.kt +699 -109
  37. package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +1123 -668
  38. package/android/src/main/java/com/sherpaonnx/ZipvoiceTtsWrapper.kt +187 -0
  39. package/ios/SherpaOnnx+Assets.h +11 -0
  40. package/ios/SherpaOnnx+Assets.mm +325 -0
  41. package/ios/SherpaOnnx+STT.mm +455 -118
  42. package/ios/SherpaOnnx+TTS.mm +1101 -712
  43. package/ios/SherpaOnnx.h +17 -6
  44. package/ios/SherpaOnnx.mm +206 -311
  45. package/ios/SherpaOnnx.xcconfig +19 -19
  46. package/ios/SherpaOnnxCoreMLHelper.swift +24 -0
  47. package/ios/archive/sherpa-onnx-archive-helper.h +21 -0
  48. package/ios/archive/sherpa-onnx-archive-helper.mm +296 -0
  49. package/ios/libarchive_darwin_config.h +153 -0
  50. package/{android/src/main/cpp/jni → ios/model_detect}/sherpa-onnx-common.h +18 -18
  51. package/ios/model_detect/sherpa-onnx-model-detect-helper.h +49 -0
  52. package/ios/model_detect/sherpa-onnx-model-detect-helper.mm +210 -0
  53. package/ios/model_detect/sherpa-onnx-model-detect-stt.mm +344 -0
  54. package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +201 -0
  55. package/{android/src/main/cpp/jni → ios/model_detect}/sherpa-onnx-model-detect.h +117 -89
  56. package/ios/scripts/patch-libarchive-includes.sh +61 -0
  57. package/ios/scripts/setup-ios-libarchive.sh +98 -0
  58. package/ios/stt/sherpa-onnx-stt-wrapper.h +129 -0
  59. package/ios/stt/sherpa-onnx-stt-wrapper.mm +523 -0
  60. package/ios/{sherpa-onnx-tts-wrapper.h → tts/sherpa-onnx-tts-wrapper.h} +90 -85
  61. package/ios/{sherpa-onnx-tts-wrapper.mm → tts/sherpa-onnx-tts-wrapper.mm} +376 -345
  62. package/lib/module/NativeSherpaOnnx.js +3 -0
  63. package/lib/module/NativeSherpaOnnx.js.map +1 -1
  64. package/lib/module/audio/index.js +22 -0
  65. package/lib/module/audio/index.js.map +1 -0
  66. package/lib/module/diarization/index.js +1 -1
  67. package/lib/module/diarization/index.js.map +1 -1
  68. package/lib/module/download/ModelDownloadManager.js +918 -0
  69. package/lib/module/download/ModelDownloadManager.js.map +1 -0
  70. package/lib/module/download/extractTarBz2.js +53 -0
  71. package/lib/module/download/extractTarBz2.js.map +1 -0
  72. package/lib/module/download/index.js +6 -0
  73. package/lib/module/download/index.js.map +1 -0
  74. package/lib/module/download/validation.js +178 -0
  75. package/lib/module/download/validation.js.map +1 -0
  76. package/lib/module/enhancement/index.js +1 -1
  77. package/lib/module/enhancement/index.js.map +1 -1
  78. package/lib/module/index.js +41 -3
  79. package/lib/module/index.js.map +1 -1
  80. package/lib/module/separation/index.js +1 -1
  81. package/lib/module/separation/index.js.map +1 -1
  82. package/lib/module/stt/index.js +127 -60
  83. package/lib/module/stt/index.js.map +1 -1
  84. package/lib/module/stt/sttModelLanguages.js +512 -0
  85. package/lib/module/stt/sttModelLanguages.js.map +1 -0
  86. package/lib/module/stt/types.js +53 -1
  87. package/lib/module/stt/types.js.map +1 -1
  88. package/lib/module/tts/index.js +216 -289
  89. package/lib/module/tts/index.js.map +1 -1
  90. package/lib/module/tts/types.js +86 -1
  91. package/lib/module/tts/types.js.map +1 -1
  92. package/lib/module/types.js.map +1 -1
  93. package/lib/module/utils.js +86 -73
  94. package/lib/module/utils.js.map +1 -1
  95. package/lib/module/vad/index.js +1 -1
  96. package/lib/module/vad/index.js.map +1 -1
  97. package/lib/typescript/src/NativeSherpaOnnx.d.ts +192 -38
  98. package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
  99. package/lib/typescript/src/audio/index.d.ts +13 -0
  100. package/lib/typescript/src/audio/index.d.ts.map +1 -0
  101. package/lib/typescript/src/diarization/index.d.ts +3 -2
  102. package/lib/typescript/src/diarization/index.d.ts.map +1 -1
  103. package/lib/typescript/src/download/ModelDownloadManager.d.ts +108 -0
  104. package/lib/typescript/src/download/ModelDownloadManager.d.ts.map +1 -0
  105. package/lib/typescript/src/download/extractTarBz2.d.ts +14 -0
  106. package/lib/typescript/src/download/extractTarBz2.d.ts.map +1 -0
  107. package/lib/typescript/src/download/index.d.ts +7 -0
  108. package/lib/typescript/src/download/index.d.ts.map +1 -0
  109. package/lib/typescript/src/download/validation.d.ts +57 -0
  110. package/lib/typescript/src/download/validation.d.ts.map +1 -0
  111. package/lib/typescript/src/enhancement/index.d.ts +3 -2
  112. package/lib/typescript/src/enhancement/index.d.ts.map +1 -1
  113. package/lib/typescript/src/index.d.ts +26 -2
  114. package/lib/typescript/src/index.d.ts.map +1 -1
  115. package/lib/typescript/src/separation/index.d.ts +3 -2
  116. package/lib/typescript/src/separation/index.d.ts.map +1 -1
  117. package/lib/typescript/src/stt/index.d.ts +31 -43
  118. package/lib/typescript/src/stt/index.d.ts.map +1 -1
  119. package/lib/typescript/src/stt/sttModelLanguages.d.ts +52 -0
  120. package/lib/typescript/src/stt/sttModelLanguages.d.ts.map +1 -0
  121. package/lib/typescript/src/stt/types.d.ts +196 -9
  122. package/lib/typescript/src/stt/types.d.ts.map +1 -1
  123. package/lib/typescript/src/tts/index.d.ts +25 -211
  124. package/lib/typescript/src/tts/index.d.ts.map +1 -1
  125. package/lib/typescript/src/tts/types.d.ts +148 -25
  126. package/lib/typescript/src/tts/types.d.ts.map +1 -1
  127. package/lib/typescript/src/types.d.ts +0 -32
  128. package/lib/typescript/src/types.d.ts.map +1 -1
  129. package/lib/typescript/src/utils.d.ts +28 -13
  130. package/lib/typescript/src/utils.d.ts.map +1 -1
  131. package/lib/typescript/src/vad/index.d.ts +3 -2
  132. package/lib/typescript/src/vad/index.d.ts.map +1 -1
  133. package/package.json +250 -222
  134. package/scripts/check-qnn-support.sh +78 -0
  135. package/scripts/setup-ios-framework.sh +379 -282
  136. package/src/NativeSherpaOnnx.ts +474 -251
  137. package/src/audio/index.ts +32 -0
  138. package/src/diarization/index.ts +4 -2
  139. package/src/download/ModelDownloadManager.ts +1325 -0
  140. package/src/download/extractTarBz2.ts +78 -0
  141. package/src/download/index.ts +43 -0
  142. package/src/download/validation.ts +279 -0
  143. package/src/enhancement/index.ts +4 -2
  144. package/src/index.tsx +78 -27
  145. package/src/separation/index.ts +4 -2
  146. package/src/stt/index.ts +249 -89
  147. package/src/stt/sttModelLanguages.ts +237 -0
  148. package/src/stt/types.ts +263 -9
  149. package/src/tts/index.ts +470 -458
  150. package/src/tts/types.ts +373 -218
  151. package/src/types.ts +0 -44
  152. package/src/utils.ts +145 -131
  153. package/src/vad/index.ts +4 -2
  154. package/third_party/ffmpeg_prebuilt/ANDROID_RELEASE_TAG +1 -0
  155. package/third_party/libarchive_prebuilt/ANDROID_RELEASE_TAG +1 -0
  156. package/third_party/libarchive_prebuilt/IOS_RELEASE_TAG +1 -0
  157. package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -0
  158. package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -0
  159. package/android/src/main/cpp/include/sherpa-onnx/c-api/c-api.h +0 -1918
  160. package/android/src/main/cpp/include/sherpa-onnx/c-api/cxx-api.h +0 -841
  161. package/android/src/main/cpp/jni/sherpa-onnx-model-detect.cpp +0 -541
  162. package/android/src/main/cpp/jni/sherpa-onnx-stt-jni.cpp +0 -336
  163. package/android/src/main/cpp/jni/sherpa-onnx-stt-wrapper.cpp +0 -222
  164. package/android/src/main/cpp/jni/sherpa-onnx-stt-wrapper.h +0 -68
  165. package/android/src/main/cpp/jni/sherpa-onnx-tts-jni.cpp +0 -823
  166. package/android/src/main/cpp/jni/sherpa-onnx-tts-wrapper.cpp +0 -387
  167. package/android/src/main/cpp/jni/sherpa-onnx-tts-wrapper.h +0 -147
  168. package/ios/Frameworks/sherpa_onnx.xcframework.zip +0 -0
  169. package/ios/include/sherpa-onnx/c-api/c-api.h +0 -1918
  170. package/ios/include/sherpa-onnx/c-api/cxx-api.h +0 -841
  171. package/ios/sherpa-onnx-model-detect.mm +0 -441
  172. package/ios/sherpa-onnx-stt-wrapper.h +0 -48
  173. package/ios/sherpa-onnx-stt-wrapper.mm +0 -201
  174. package/scripts/copy-headers.js +0 -184
  175. package/scripts/setup-assets.js +0 -323
package/src/tts/index.ts CHANGED
@@ -1,458 +1,470 @@
1
- import { NativeEventEmitter } from 'react-native';
2
- import SherpaOnnx from '../NativeSherpaOnnx';
3
- import type {
4
- TTSInitializeOptions,
5
- TtsUpdateOptions,
6
- SynthesisOptions,
7
- GeneratedAudio,
8
- GeneratedAudioWithTimestamps,
9
- TTSModelInfo,
10
- TtsStreamChunk,
11
- TtsStreamEnd,
12
- TtsStreamError,
13
- } from './types';
14
- import type { InitializeOptions } from '../types';
15
- import { resolveModelPath } from '../utils';
16
-
17
- /**
18
- * Initialize Text-to-Speech (TTS) with model directory.
19
- *
20
- * Supports multiple model source types:
21
- * - Asset models (bundled in app)
22
- * - File system models (downloaded or user-provided)
23
- * - Auto-detection (tries asset first, then file system)
24
- *
25
- * Supported model types (auto-detected or explicit):
26
- * - VITS (includes Piper, Coqui, MeloTTS, MMS)
27
- * - Matcha (acoustic model + vocoder)
28
- * - Kokoro (multi-speaker, multi-language)
29
- * - KittenTTS (lightweight, multi-speaker)
30
- * - Zipvoice (voice cloning capable)
31
- *
32
- * @param options - TTS initialization options or model path configuration
33
- * @returns Promise resolving to result with success and detected models
34
- * @example
35
- * ```typescript
36
- * // Simple string (auto-detect)
37
- * const result = await initializeTTS('models/sherpa-onnx-vits-piper-en_US-lessac-medium');
38
- * console.log('Detected models:', result.detectedModels);
39
- *
40
- * // Asset model
41
- * const result = await initializeTTS({
42
- * modelPath: { type: 'asset', path: 'models/vits-piper-en' }
43
- * });
44
- *
45
- * // File system model with options
46
- * const result = await initializeTTS({
47
- * modelPath: { type: 'file', path: '/path/to/model' },
48
- * numThreads: 4,
49
- * debug: true
50
- * });
51
- *
52
- * // With explicit model type
53
- * const result = await initializeTTS({
54
- * modelPath: { type: 'asset', path: 'models/kokoro-en' },
55
- * modelType: 'kokoro'
56
- * });
57
- * ```
58
- */
59
- export async function initializeTTS(
60
- options: TTSInitializeOptions | InitializeOptions['modelPath']
61
- ): Promise<{
62
- success: boolean;
63
- detectedModels: Array<{ type: string; modelDir: string }>;
64
- }> {
65
- // Handle both object syntax and direct path syntax
66
- let modelPath: InitializeOptions['modelPath'];
67
- let modelType: string | undefined;
68
- let numThreads: number | undefined;
69
- let debug: boolean | undefined;
70
- let noiseScale: number | undefined;
71
- let noiseScaleW: number | undefined;
72
- let lengthScale: number | undefined;
73
-
74
- if (typeof options === 'object' && 'modelPath' in options) {
75
- modelPath = options.modelPath;
76
- modelType = options.modelType;
77
- numThreads = options.numThreads;
78
- debug = options.debug;
79
- noiseScale = options.noiseScale;
80
- noiseScaleW = options.noiseScaleW;
81
- lengthScale = options.lengthScale;
82
- } else {
83
- modelPath = options as InitializeOptions['modelPath'];
84
- modelType = undefined;
85
- numThreads = undefined;
86
- debug = undefined;
87
- noiseScale = undefined;
88
- noiseScaleW = undefined;
89
- lengthScale = undefined;
90
- }
91
-
92
- const resolvedPath = await resolveModelPath(modelPath);
93
- return SherpaOnnx.initializeTts(
94
- resolvedPath,
95
- modelType ?? 'auto',
96
- numThreads ?? 2,
97
- debug ?? false,
98
- noiseScale,
99
- noiseScaleW,
100
- lengthScale
101
- );
102
- }
103
-
104
- /**
105
- * Update TTS parameters by re-initializing with stored config.
106
- */
107
- export async function updateTtsParams(options: TtsUpdateOptions): Promise<{
108
- success: boolean;
109
- detectedModels: Array<{ type: string; modelDir: string }>;
110
- }> {
111
- const noiseArg =
112
- options.noiseScale === undefined ? Number.NaN : options.noiseScale;
113
- const noiseWArg =
114
- options.noiseScaleW === undefined ? Number.NaN : options.noiseScaleW;
115
- const lengthArg =
116
- options.lengthScale === undefined ? Number.NaN : options.lengthScale;
117
-
118
- return SherpaOnnx.updateTtsParams(noiseArg, noiseWArg, lengthArg);
119
- }
120
-
121
- /**
122
- * Generate speech from text.
123
- *
124
- * Returns raw audio samples as float array in range [-1.0, 1.0].
125
- * You can save these samples to a WAV file, stream them, or process them further.
126
- *
127
- * @param text - Text to convert to speech
128
- * @param options - Synthesis options (speaker ID, speed)
129
- * @returns Promise resolving to generated audio data
130
- * @example
131
- * ```typescript
132
- * // Basic usage
133
- * const audio = await generateSpeech('Hello, world!');
134
- * console.log(`Generated ${audio.samples.length} samples at ${audio.sampleRate} Hz`);
135
- *
136
- * // With options
137
- * const audio = await generateSpeech('Hello, world!', {
138
- * sid: 0, // Speaker ID (for multi-speaker models)
139
- * speed: 1.2 // 20% faster
140
- * });
141
- *
142
- * // Slower speech
143
- * const audio = await generateSpeech('Speak slowly', { speed: 0.8 });
144
- * ```
145
- */
146
- export async function generateSpeech(
147
- text: string,
148
- options?: SynthesisOptions
149
- ): Promise<GeneratedAudio> {
150
- return SherpaOnnx.generateTts(text, options?.sid ?? 0, options?.speed ?? 1.0);
151
- }
152
-
153
- /**
154
- * Generate speech from text and return subtitle/timestamp metadata.
155
- *
156
- * Timestamps are estimated based on the output duration when models do not
157
- * provide native timing information.
158
- */
159
- export async function generateSpeechWithTimestamps(
160
- text: string,
161
- options?: SynthesisOptions
162
- ): Promise<GeneratedAudioWithTimestamps> {
163
- return SherpaOnnx.generateTtsWithTimestamps(
164
- text,
165
- options?.sid ?? 0,
166
- options?.speed ?? 1.0
167
- );
168
- }
169
-
170
- const nativeTtsEventModule =
171
- SherpaOnnx &&
172
- typeof (SherpaOnnx as any).addListener === 'function' &&
173
- typeof (SherpaOnnx as any).removeListeners === 'function'
174
- ? (SherpaOnnx as any)
175
- : undefined;
176
-
177
- const ttsEventEmitter = new NativeEventEmitter(nativeTtsEventModule);
178
- export type TtsStreamHandlers = {
179
- onChunk?: (chunk: TtsStreamChunk) => void;
180
- onEnd?: (event: TtsStreamEnd) => void;
181
- onError?: (event: TtsStreamError) => void;
182
- };
183
-
184
- /**
185
- * Generate speech in streaming mode (emits chunk events).
186
- *
187
- * Returns an unsubscribe function to remove event listeners.
188
- */
189
- export async function generateSpeechStream(
190
- text: string,
191
- options: SynthesisOptions | undefined,
192
- handlers: TtsStreamHandlers
193
- ): Promise<() => void> {
194
- const subscriptions = [
195
- ttsEventEmitter.addListener('ttsStreamChunk', (event) => {
196
- handlers.onChunk?.(event as TtsStreamChunk);
197
- }),
198
- ttsEventEmitter.addListener('ttsStreamEnd', (event) => {
199
- handlers.onEnd?.(event as TtsStreamEnd);
200
- }),
201
- ttsEventEmitter.addListener('ttsStreamError', (event) => {
202
- handlers.onError?.(event as TtsStreamError);
203
- }),
204
- ];
205
-
206
- try {
207
- await SherpaOnnx.generateTtsStream(
208
- text,
209
- options?.sid ?? 0,
210
- options?.speed ?? 1.0
211
- );
212
- } catch (error) {
213
- // Clean up listeners if native call fails
214
- subscriptions.forEach((sub) => sub.remove());
215
- throw error;
216
- }
217
-
218
- return () => {
219
- subscriptions.forEach((sub) => sub.remove());
220
- };
221
- }
222
-
223
- /**
224
- * Cancel ongoing streaming TTS generation.
225
- */
226
- export function cancelSpeechStream(): Promise<void> {
227
- return SherpaOnnx.cancelTtsStream();
228
- }
229
-
230
- /**
231
- * Start PCM playback for streaming TTS.
232
- */
233
- export function startTtsPcmPlayer(
234
- sampleRate: number,
235
- channels: number
236
- ): Promise<void> {
237
- return SherpaOnnx.startTtsPcmPlayer(sampleRate, channels);
238
- }
239
-
240
- /**
241
- * Write PCM samples to the streaming TTS player.
242
- */
243
- export function writeTtsPcmChunk(samples: number[]): Promise<void> {
244
- return SherpaOnnx.writeTtsPcmChunk(samples);
245
- }
246
-
247
- /**
248
- * Stop PCM playback for streaming TTS.
249
- */
250
- export function stopTtsPcmPlayer(): Promise<void> {
251
- return SherpaOnnx.stopTtsPcmPlayer();
252
- }
253
-
254
- /**
255
- * Get TTS model information.
256
- *
257
- * Returns the sample rate and number of available speakers/voices.
258
- * Call this after initialization to check model capabilities.
259
- *
260
- * @returns Promise resolving to model information
261
- * @example
262
- * ```typescript
263
- * await initializeTTS('models/kokoro-en');
264
- * const info = await getModelInfo();
265
- *
266
- * console.log(`Sample rate: ${info.sampleRate} Hz`);
267
- * console.log(`Available speakers: ${info.numSpeakers}`);
268
- *
269
- * if (info.numSpeakers > 1) {
270
- * // Multi-speaker model, can use different voices
271
- * const audio = await generateSpeech('Hello', { sid: 1 });
272
- * }
273
- * ```
274
- */
275
- export async function getModelInfo(): Promise<TTSModelInfo> {
276
- const [sampleRate, numSpeakers] = await Promise.all([
277
- SherpaOnnx.getTtsSampleRate(),
278
- SherpaOnnx.getTtsNumSpeakers(),
279
- ]);
280
-
281
- return {
282
- sampleRate,
283
- numSpeakers,
284
- };
285
- }
286
-
287
- /**
288
- * Get the sample rate of the initialized TTS model.
289
- *
290
- * @returns Promise resolving to sample rate in Hz
291
- * @example
292
- * ```typescript
293
- * const sampleRate = await getSampleRate();
294
- * console.log(`Model outputs audio at ${sampleRate} Hz`);
295
- * ```
296
- */
297
- export function getSampleRate(): Promise<number> {
298
- return SherpaOnnx.getTtsSampleRate();
299
- }
300
-
301
- /**
302
- * Get the number of speakers/voices available in the model.
303
- *
304
- * @returns Promise resolving to number of speakers
305
- * - 0 or 1: Single-speaker model
306
- * - >1: Multi-speaker model
307
- * @example
308
- * ```typescript
309
- * const numSpeakers = await getNumSpeakers();
310
- *
311
- * if (numSpeakers > 1) {
312
- * console.log(`Model has ${numSpeakers} different voices`);
313
- * // Generate with different voices
314
- * for (let i = 0; i < numSpeakers; i++) {
315
- * const audio = await generateSpeech('Hello', { sid: i });
316
- * // ... use audio
317
- * }
318
- * }
319
- * ```
320
- */
321
- export function getNumSpeakers(): Promise<number> {
322
- return SherpaOnnx.getTtsNumSpeakers();
323
- }
324
-
325
- /**
326
- * Release TTS resources.
327
- *
328
- * Call this when you're done using TTS to free up memory.
329
- * After calling this, you must call `initializeTTS()` again before
330
- * using TTS functions.
331
- *
332
- * @example
333
- * ```typescript
334
- * await initializeTTS('models/vits-piper-en');
335
- * const audio = await generateSpeech('Hello');
336
- * // ... use audio
337
- * await unloadTTS(); // Free resources
338
- * ```
339
- */
340
- export function unloadTTS(): Promise<void> {
341
- return SherpaOnnx.unloadTts();
342
- }
343
-
344
- /**
345
- * Save generated TTS audio to a WAV file.
346
- *
347
- * @param audio - Generated audio from generateSpeech()
348
- * @param filePath - Absolute path where to save the WAV file
349
- * @returns Promise resolving to the file path where audio was saved
350
- * @example
351
- * ```typescript
352
- * import { Platform } from 'react-native';
353
- * import RNFS from 'react-native-fs';
354
- *
355
- * const audio = await generateSpeech('Hello, world!');
356
- *
357
- * // Save to documents directory
358
- * const documentsPath = Platform.OS === 'ios'
359
- * ? RNFS.DocumentDirectoryPath
360
- * : RNFS.ExternalDirectoryPath;
361
- * const filePath = `${documentsPath}/speech_${Date.now()}.wav`;
362
- *
363
- * const savedPath = await saveAudioToFile(audio, filePath);
364
- * console.log('Audio saved to:', savedPath);
365
- * ```
366
- */
367
- export function saveAudioToFile(
368
- audio: GeneratedAudio,
369
- filePath: string
370
- ): Promise<string> {
371
- return SherpaOnnx.saveTtsAudioToFile(
372
- audio.samples,
373
- audio.sampleRate,
374
- filePath
375
- );
376
- }
377
-
378
- /**
379
- * Save generated TTS audio to a WAV file via Android SAF content URI.
380
- *
381
- * @param audio - Generated audio from generateSpeech()
382
- * @param directoryUri - Directory content URI from SAF
383
- * @param filename - Desired file name
384
- * @returns Promise resolving to content URI of the saved file
385
- */
386
- export function saveAudioToContentUri(
387
- audio: GeneratedAudio,
388
- directoryUri: string,
389
- filename: string
390
- ): Promise<string> {
391
- return SherpaOnnx.saveTtsAudioToContentUri(
392
- audio.samples,
393
- audio.sampleRate,
394
- directoryUri,
395
- filename
396
- );
397
- }
398
-
399
- /**
400
- * Save a text file via Android SAF content URI.
401
- *
402
- * @param text - Text content to write
403
- * @param directoryUri - Directory content URI from SAF
404
- * @param filename - Desired file name
405
- * @param mimeType - MIME type (default: text/plain)
406
- * @returns Promise resolving to content URI of the saved file
407
- */
408
- export function saveTextToContentUri(
409
- text: string,
410
- directoryUri: string,
411
- filename: string,
412
- mimeType = 'text/plain'
413
- ): Promise<string> {
414
- return SherpaOnnx.saveTtsTextToContentUri(
415
- text,
416
- directoryUri,
417
- filename,
418
- mimeType
419
- );
420
- }
421
-
422
- /**
423
- * Copy a SAF content URI to a cache file for local playback (Android only).
424
- *
425
- * @param fileUri - Content URI of the saved WAV file
426
- * @param filename - Desired cache filename
427
- * @returns Promise resolving to absolute path of the cached file
428
- */
429
- export function copyContentUriToCache(
430
- fileUri: string,
431
- filename: string
432
- ): Promise<string> {
433
- return SherpaOnnx.copyTtsContentUriToCache(fileUri, filename);
434
- }
435
-
436
- /**
437
- * Share a TTS audio file (file path or content URI).
438
- *
439
- * @param fileUri - File path or content URI
440
- * @param mimeType - MIME type (default: audio/wav)
441
- */
442
- export function shareAudioFile(
443
- fileUri: string,
444
- mimeType = 'audio/wav'
445
- ): Promise<void> {
446
- return SherpaOnnx.shareTtsAudio(fileUri, mimeType);
447
- }
448
-
449
- // Export types
450
- export type {
451
- TTSInitializeOptions,
452
- TTSModelType,
453
- SynthesisOptions,
454
- GeneratedAudio,
455
- GeneratedAudioWithTimestamps,
456
- TtsSubtitleItem,
457
- TTSModelInfo,
458
- } from './types';
1
+ import { DeviceEventEmitter } from 'react-native';
2
+ import SherpaOnnx from '../NativeSherpaOnnx';
3
+ import type {
4
+ TTSInitializeOptions,
5
+ TTSModelType,
6
+ TtsModelOptions,
7
+ TtsUpdateOptions,
8
+ TtsGenerationOptions,
9
+ GeneratedAudio,
10
+ GeneratedAudioWithTimestamps,
11
+ TTSModelInfo,
12
+ TtsEngine,
13
+ TtsStreamChunk,
14
+ TtsStreamEnd,
15
+ TtsStreamError,
16
+ TtsStreamHandlers,
17
+ } from './types';
18
+ import type { ModelPathConfig } from '../types';
19
+ import { resolveModelPath } from '../utils';
20
+
21
+ let ttsInstanceCounter = 0;
22
+
23
+ /**
24
+ * Flatten model-specific options for the given model type to native init/update params.
25
+ * When modelType is 'auto' or missing, returns undefined for all (native uses defaults).
26
+ */
27
+ function flattenTtsModelOptionsForNative(
28
+ modelType: TTSModelType | undefined,
29
+ modelOptions: TtsModelOptions | undefined
30
+ ): {
31
+ noiseScale: number | undefined;
32
+ noiseScaleW: number | undefined;
33
+ lengthScale: number | undefined;
34
+ } {
35
+ if (
36
+ !modelOptions ||
37
+ !modelType ||
38
+ modelType === 'auto' ||
39
+ modelType === 'zipvoice' // Zipvoice does not use noise/length scale; native uses its own defaults
40
+ )
41
+ return {
42
+ noiseScale: undefined,
43
+ noiseScaleW: undefined,
44
+ lengthScale: undefined,
45
+ };
46
+ const block =
47
+ modelType === 'vits'
48
+ ? modelOptions.vits
49
+ : modelType === 'matcha'
50
+ ? modelOptions.matcha
51
+ : modelType === 'kokoro'
52
+ ? modelOptions.kokoro
53
+ : modelType === 'kitten'
54
+ ? modelOptions.kitten
55
+ : modelType === 'pocket'
56
+ ? modelOptions.pocket
57
+ : undefined;
58
+ if (!block)
59
+ return {
60
+ noiseScale: undefined,
61
+ noiseScaleW: undefined,
62
+ lengthScale: undefined,
63
+ };
64
+ const out: {
65
+ noiseScale: number | undefined;
66
+ noiseScaleW: number | undefined;
67
+ lengthScale: number | undefined;
68
+ } = {
69
+ noiseScale: undefined,
70
+ noiseScaleW: undefined,
71
+ lengthScale: undefined,
72
+ };
73
+ const n = block as {
74
+ noiseScale?: number;
75
+ noiseScaleW?: number;
76
+ lengthScale?: number;
77
+ };
78
+ if (n.noiseScale !== undefined && typeof n.noiseScale === 'number')
79
+ out.noiseScale = n.noiseScale;
80
+ if (n.noiseScaleW !== undefined && typeof n.noiseScaleW === 'number')
81
+ out.noiseScaleW = n.noiseScaleW;
82
+ if (n.lengthScale !== undefined && typeof n.lengthScale === 'number')
83
+ out.lengthScale = n.lengthScale;
84
+ return out;
85
+ }
86
+
87
+ /**
88
+ * Detect TTS model type and structure without initializing the engine.
89
+ * Uses the same native file-based detection as createTTS. Stateless; no instance required.
90
+ *
91
+ * @param modelPath - Model path configuration (asset, file, or auto)
92
+ * @param options - Optional modelType (default: 'auto')
93
+ * @returns Object with success, detectedModels (array of { type, modelDir }), and modelType (primary detected type)
94
+ * @example
95
+ * ```typescript
96
+ * const result = await detectTtsModel({ type: 'asset', path: 'models/vits-piper-en' });
97
+ * if (result.success) console.log('Detected type:', result.modelType, result.detectedModels);
98
+ * ```
99
+ */
100
+ export async function detectTtsModel(
101
+ modelPath: ModelPathConfig,
102
+ options?: { modelType?: TTSModelType }
103
+ ): Promise<{
104
+ success: boolean;
105
+ detectedModels: Array<{ type: string; modelDir: string }>;
106
+ modelType?: string;
107
+ }> {
108
+ const resolvedPath = await resolveModelPath(modelPath);
109
+ return SherpaOnnx.detectTtsModel(resolvedPath, options?.modelType);
110
+ }
111
+
112
+ /**
113
+ * Convert TtsGenerationOptions to a flat object for the native bridge.
114
+ * Flattens referenceAudio { samples, sampleRate } to referenceAudio array + referenceSampleRate.
115
+ */
116
+ function toNativeTtsOptions(
117
+ options?: TtsGenerationOptions
118
+ ): Record<string, unknown> {
119
+ if (options == null) return {};
120
+ const out: Record<string, unknown> = {};
121
+ if (options.sid !== undefined) out.sid = options.sid;
122
+ if (options.speed !== undefined) out.speed = options.speed;
123
+ if (options.silenceScale !== undefined)
124
+ out.silenceScale = options.silenceScale;
125
+ if (options.referenceAudio != null) {
126
+ out.referenceAudio = options.referenceAudio.samples;
127
+ out.referenceSampleRate = options.referenceAudio.sampleRate;
128
+ }
129
+ if (options.referenceText !== undefined)
130
+ out.referenceText = options.referenceText;
131
+ if (options.numSteps !== undefined) out.numSteps = options.numSteps;
132
+ if (options.extra != null && Object.keys(options.extra).length > 0)
133
+ out.extra = options.extra;
134
+ return out;
135
+ }
136
+
137
+ // TTS stream events are sent from native via sendEventWithName; use DeviceEventEmitter
138
+ // so we don't need NativeEventEmitter (which expects addListener/removeListeners on the module).
139
+ /**
140
+ * Create a TTS engine instance. Call destroy() on the returned engine when done to free native resources.
141
+ *
142
+ * @param options - TTS initialization options or model path configuration
143
+ * @returns Promise resolving to a TtsEngine instance
144
+ * @example
145
+ * ```typescript
146
+ * const tts = await createTTS({
147
+ * modelPath: { type: 'asset', path: 'models/vits-piper-en' },
148
+ * modelType: 'vits',
149
+ * modelOptions: { vits: { noiseScale: 0.667 } },
150
+ * });
151
+ * const audio = await tts.generateSpeech('Hello world');
152
+ * await tts.destroy();
153
+ * ```
154
+ */
155
+ export async function createTTS(
156
+ options: TTSInitializeOptions | ModelPathConfig
157
+ ): Promise<TtsEngine> {
158
+ const instanceId = `tts_${++ttsInstanceCounter}`;
159
+
160
+ let modelPath: ModelPathConfig;
161
+ let modelType: TTSModelType | undefined;
162
+ let provider: string | undefined;
163
+ let numThreads: number | undefined;
164
+ let debug: boolean | undefined;
165
+ let modelOptions: TtsModelOptions | undefined;
166
+ let ruleFsts: string | undefined;
167
+ let ruleFars: string | undefined;
168
+ let maxNumSentences: number | undefined;
169
+ let silenceScale: number | undefined;
170
+
171
+ if ('modelPath' in options) {
172
+ modelPath = options.modelPath;
173
+ modelType = options.modelType;
174
+ provider = options.provider;
175
+ numThreads = options.numThreads;
176
+ debug = options.debug;
177
+ modelOptions = options.modelOptions;
178
+ ruleFsts = options.ruleFsts;
179
+ ruleFars = options.ruleFars;
180
+ maxNumSentences = options.maxNumSentences;
181
+ silenceScale = options.silenceScale;
182
+ } else {
183
+ modelPath = options;
184
+ modelType = undefined;
185
+ provider = undefined;
186
+ numThreads = undefined;
187
+ debug = undefined;
188
+ modelOptions = undefined;
189
+ ruleFsts = undefined;
190
+ ruleFars = undefined;
191
+ maxNumSentences = undefined;
192
+ silenceScale = undefined;
193
+ }
194
+
195
+ const flat = flattenTtsModelOptionsForNative(modelType, modelOptions);
196
+ const resolvedPath = await resolveModelPath(modelPath);
197
+
198
+ const result = await SherpaOnnx.initializeTts(
199
+ instanceId,
200
+ resolvedPath,
201
+ modelType ?? 'auto',
202
+ numThreads ?? 2,
203
+ debug ?? false,
204
+ flat.noiseScale,
205
+ flat.noiseScaleW,
206
+ flat.lengthScale,
207
+ ruleFsts,
208
+ ruleFars,
209
+ maxNumSentences,
210
+ silenceScale,
211
+ provider
212
+ );
213
+
214
+ if (!result.success) {
215
+ throw new Error(
216
+ `TTS initialization failed: ${JSON.stringify(
217
+ result.detectedModels ?? []
218
+ )}`
219
+ );
220
+ }
221
+
222
+ const firstDetected = result.detectedModels?.[0];
223
+ const effectiveModelType: TTSModelType | undefined =
224
+ modelType && modelType !== 'auto'
225
+ ? modelType
226
+ : (firstDetected?.type as TTSModelType);
227
+
228
+ let destroyed = false;
229
+
230
+ const guard = () => {
231
+ if (destroyed) {
232
+ throw new Error(
233
+ `TTS instance ${instanceId} has been destroyed; cannot call methods on it.`
234
+ );
235
+ }
236
+ };
237
+
238
+ const engine: TtsEngine = {
239
+ get instanceId() {
240
+ return instanceId;
241
+ },
242
+
243
+ async generateSpeech(
244
+ text: string,
245
+ opts?: TtsGenerationOptions
246
+ ): Promise<GeneratedAudio> {
247
+ guard();
248
+ return SherpaOnnx.generateTts(instanceId, text, toNativeTtsOptions(opts));
249
+ },
250
+
251
+ async generateSpeechWithTimestamps(
252
+ text: string,
253
+ opts?: TtsGenerationOptions
254
+ ): Promise<GeneratedAudioWithTimestamps> {
255
+ guard();
256
+ return SherpaOnnx.generateTtsWithTimestamps(
257
+ instanceId,
258
+ text,
259
+ toNativeTtsOptions(opts)
260
+ );
261
+ },
262
+
263
+ async generateSpeechStream(
264
+ text: string,
265
+ opts: TtsGenerationOptions | undefined,
266
+ handlers: TtsStreamHandlers
267
+ ): Promise<() => void> {
268
+ guard();
269
+ const subscriptions = [
270
+ DeviceEventEmitter.addListener('ttsStreamChunk', (event: unknown) => {
271
+ const e = event as TtsStreamChunk;
272
+ if (e.instanceId != null && e.instanceId !== instanceId) return;
273
+ handlers.onChunk?.(e);
274
+ }),
275
+ DeviceEventEmitter.addListener('ttsStreamEnd', (event: unknown) => {
276
+ const e = event as TtsStreamEnd;
277
+ if (e.instanceId != null && e.instanceId !== instanceId) return;
278
+ handlers.onEnd?.(e);
279
+ }),
280
+ DeviceEventEmitter.addListener('ttsStreamError', (event: unknown) => {
281
+ const e = event as TtsStreamError;
282
+ if (e.instanceId != null && e.instanceId !== instanceId) return;
283
+ handlers.onError?.(e);
284
+ }),
285
+ ];
286
+
287
+ try {
288
+ await SherpaOnnx.generateTtsStream(
289
+ instanceId,
290
+ text,
291
+ toNativeTtsOptions(opts)
292
+ );
293
+ } catch (error) {
294
+ subscriptions.forEach((sub) => sub.remove());
295
+ throw error;
296
+ }
297
+
298
+ return () => {
299
+ subscriptions.forEach((sub) => sub.remove());
300
+ };
301
+ },
302
+
303
+ async cancelSpeechStream(): Promise<void> {
304
+ guard();
305
+ return SherpaOnnx.cancelTtsStream(instanceId);
306
+ },
307
+
308
+ async startPcmPlayer(sampleRate: number, channels: number): Promise<void> {
309
+ guard();
310
+ return SherpaOnnx.startTtsPcmPlayer(instanceId, sampleRate, channels);
311
+ },
312
+
313
+ async writePcmChunk(samples: number[]): Promise<void> {
314
+ guard();
315
+ return SherpaOnnx.writeTtsPcmChunk(instanceId, samples);
316
+ },
317
+
318
+ async stopPcmPlayer(): Promise<void> {
319
+ guard();
320
+ return SherpaOnnx.stopTtsPcmPlayer(instanceId);
321
+ },
322
+
323
+ async updateParams(opts: TtsUpdateOptions): Promise<{
324
+ success: boolean;
325
+ detectedModels: Array<{ type: string; modelDir: string }>;
326
+ }> {
327
+ guard();
328
+ const effectiveModelTypeForUpdate =
329
+ opts.modelType && opts.modelType !== 'auto'
330
+ ? opts.modelType
331
+ : effectiveModelType;
332
+ const flatOpts = flattenTtsModelOptionsForNative(
333
+ effectiveModelTypeForUpdate,
334
+ opts.modelOptions
335
+ );
336
+ const noiseArg =
337
+ flatOpts.noiseScale === undefined ? Number.NaN : flatOpts.noiseScale;
338
+ const noiseWArg =
339
+ flatOpts.noiseScaleW === undefined ? Number.NaN : flatOpts.noiseScaleW;
340
+ const lengthArg =
341
+ flatOpts.lengthScale === undefined ? Number.NaN : flatOpts.lengthScale;
342
+ return SherpaOnnx.updateTtsParams(
343
+ instanceId,
344
+ noiseArg,
345
+ noiseWArg,
346
+ lengthArg
347
+ );
348
+ },
349
+
350
+ async getModelInfo(): Promise<TTSModelInfo> {
351
+ guard();
352
+ const [sampleRate, numSpeakers] = await Promise.all([
353
+ SherpaOnnx.getTtsSampleRate(instanceId),
354
+ SherpaOnnx.getTtsNumSpeakers(instanceId),
355
+ ]);
356
+ return { sampleRate, numSpeakers };
357
+ },
358
+
359
+ async getSampleRate(): Promise<number> {
360
+ guard();
361
+ return SherpaOnnx.getTtsSampleRate(instanceId);
362
+ },
363
+
364
+ async getNumSpeakers(): Promise<number> {
365
+ guard();
366
+ return SherpaOnnx.getTtsNumSpeakers(instanceId);
367
+ },
368
+
369
+ async destroy(): Promise<void> {
370
+ if (destroyed) return;
371
+ destroyed = true;
372
+ await SherpaOnnx.unloadTts(instanceId);
373
+ },
374
+ };
375
+
376
+ return engine;
377
+ }
378
+
379
+ // ========== Module-level utilities (stateless, no instance required) ==========
380
+
381
+ /**
382
+ * Save generated TTS audio to a WAV file.
383
+ */
384
+ export function saveAudioToFile(
385
+ audio: GeneratedAudio,
386
+ filePath: string
387
+ ): Promise<string> {
388
+ return SherpaOnnx.saveTtsAudioToFile(
389
+ audio.samples,
390
+ audio.sampleRate,
391
+ filePath
392
+ );
393
+ }
394
+
395
+ /**
396
+ * Save generated TTS audio to a WAV file via Android SAF content URI.
397
+ */
398
+ export function saveAudioToContentUri(
399
+ audio: GeneratedAudio,
400
+ directoryUri: string,
401
+ filename: string
402
+ ): Promise<string> {
403
+ return SherpaOnnx.saveTtsAudioToContentUri(
404
+ audio.samples,
405
+ audio.sampleRate,
406
+ directoryUri,
407
+ filename
408
+ );
409
+ }
410
+
411
+ /**
412
+ * Save a text file via Android SAF content URI.
413
+ */
414
+ export function saveTextToContentUri(
415
+ text: string,
416
+ directoryUri: string,
417
+ filename: string,
418
+ mimeType = 'text/plain'
419
+ ): Promise<string> {
420
+ return SherpaOnnx.saveTtsTextToContentUri(
421
+ text,
422
+ directoryUri,
423
+ filename,
424
+ mimeType
425
+ );
426
+ }
427
+
428
+ /**
429
+ * Copy a SAF content URI to a cache file for local playback (Android only).
430
+ */
431
+ export function copyContentUriToCache(
432
+ fileUri: string,
433
+ filename: string
434
+ ): Promise<string> {
435
+ return SherpaOnnx.copyTtsContentUriToCache(fileUri, filename);
436
+ }
437
+
438
+ /**
439
+ * Share a TTS audio file (file path or content URI).
440
+ */
441
+ export function shareAudioFile(
442
+ fileUri: string,
443
+ mimeType = 'audio/wav'
444
+ ): Promise<void> {
445
+ return SherpaOnnx.shareTtsAudio(fileUri, mimeType);
446
+ }
447
+
448
+ // Export types and runtime type list
449
+ export type {
450
+ TTSInitializeOptions,
451
+ TTSModelType,
452
+ TtsModelOptions,
453
+ TtsVitsModelOptions,
454
+ TtsMatchaModelOptions,
455
+ TtsKokoroModelOptions,
456
+ TtsKittenModelOptions,
457
+ TtsPocketModelOptions,
458
+ TtsUpdateOptions,
459
+ TtsGenerationOptions,
460
+ GeneratedAudio,
461
+ GeneratedAudioWithTimestamps,
462
+ TtsSubtitleItem,
463
+ TTSModelInfo,
464
+ TtsEngine,
465
+ TtsStreamHandlers,
466
+ TtsStreamChunk,
467
+ TtsStreamEnd,
468
+ TtsStreamError,
469
+ } from './types';
470
+ export { TTS_MODEL_TYPES } from './types';