react-native-sherpa-onnx 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/README.md +21 -7
  2. package/SherpaOnnx.podspec +1 -1
  3. package/android/build.gradle +35 -26
  4. package/android/prebuilt-download.gradle +27 -14
  5. package/android/src/main/cpp/CMakeLists.txt +51 -17
  6. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.cpp +14 -0
  7. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.cpp +16 -0
  8. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.h +3 -0
  9. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-stt.cpp +19 -2
  10. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect.h +2 -1
  11. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.cpp +1 -0
  12. package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +114 -8
  13. package/android/src/main/java/com/sherpaonnx/SherpaOnnxOnlineSttHelper.kt +535 -0
  14. package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +10 -10
  15. package/ios/SherpaOnnx+OnlineSTT.mm +365 -0
  16. package/ios/SherpaOnnx+TTS.mm +35 -9
  17. package/ios/SherpaOnnx.mm +6 -0
  18. package/ios/model_detect/sherpa-onnx-model-detect-helper.h +3 -0
  19. package/ios/model_detect/sherpa-onnx-model-detect-helper.mm +16 -0
  20. package/ios/model_detect/sherpa-onnx-model-detect-stt.mm +19 -2
  21. package/ios/model_detect/sherpa-onnx-model-detect.h +2 -1
  22. package/ios/online_stt/sherpa-onnx-online-stt-wrapper.h +85 -0
  23. package/ios/online_stt/sherpa-onnx-online-stt-wrapper.mm +270 -0
  24. package/lib/module/NativeSherpaOnnx.js.map +1 -1
  25. package/lib/module/index.js +2 -2
  26. package/lib/module/stt/index.js +4 -0
  27. package/lib/module/stt/index.js.map +1 -1
  28. package/lib/module/stt/streaming.js +257 -0
  29. package/lib/module/stt/streaming.js.map +1 -0
  30. package/lib/module/stt/streamingTypes.js +38 -0
  31. package/lib/module/stt/streamingTypes.js.map +1 -0
  32. package/lib/module/tts/index.js +4 -43
  33. package/lib/module/tts/index.js.map +1 -1
  34. package/lib/module/tts/streaming.js +220 -0
  35. package/lib/module/tts/streaming.js.map +1 -0
  36. package/lib/module/tts/streamingTypes.js +4 -0
  37. package/lib/module/tts/streamingTypes.js.map +1 -0
  38. package/lib/module/tts/types.js +8 -1
  39. package/lib/module/tts/types.js.map +1 -1
  40. package/lib/typescript/src/NativeSherpaOnnx.d.ts +66 -1
  41. package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
  42. package/lib/typescript/src/stt/index.d.ts +3 -0
  43. package/lib/typescript/src/stt/index.d.ts.map +1 -1
  44. package/lib/typescript/src/stt/streaming.d.ts +42 -0
  45. package/lib/typescript/src/stt/streaming.d.ts.map +1 -0
  46. package/lib/typescript/src/stt/streamingTypes.d.ts +122 -0
  47. package/lib/typescript/src/stt/streamingTypes.d.ts.map +1 -0
  48. package/lib/typescript/src/tts/index.d.ts +3 -1
  49. package/lib/typescript/src/tts/index.d.ts.map +1 -1
  50. package/lib/typescript/src/tts/streaming.d.ts +24 -0
  51. package/lib/typescript/src/tts/streaming.d.ts.map +1 -0
  52. package/lib/typescript/src/tts/streamingTypes.d.ts +27 -0
  53. package/lib/typescript/src/tts/streamingTypes.d.ts.map +1 -0
  54. package/lib/typescript/src/tts/types.d.ts +19 -6
  55. package/lib/typescript/src/tts/types.d.ts.map +1 -1
  56. package/package.json +1 -2
  57. package/src/NativeSherpaOnnx.ts +95 -0
  58. package/src/index.tsx +2 -2
  59. package/src/stt/index.ts +17 -0
  60. package/src/stt/streaming.ts +361 -0
  61. package/src/stt/streamingTypes.ts +151 -0
  62. package/src/tts/index.ts +6 -66
  63. package/src/tts/streaming.ts +336 -0
  64. package/src/tts/streamingTypes.ts +54 -0
  65. package/src/tts/types.ts +20 -10
  66. package/android/codegen.gradle +0 -57
@@ -0,0 +1,361 @@
1
+ import SherpaOnnx from '../NativeSherpaOnnx';
2
+ import { resolveModelPath } from '../utils';
3
+ import type {
4
+ OnlineSTTModelType,
5
+ StreamingSttEngine,
6
+ StreamingSttInitOptions,
7
+ StreamingSttResult,
8
+ SttStream,
9
+ } from './streamingTypes';
10
+
11
+ let streamingSttInstanceCounter = 0;
12
+
13
+ /**
14
+ * Map detected STT model type (from detectSttModel) to an online (streaming) model type.
15
+ * Throws if the detected type has no streaming support.
16
+ */
17
+ export function mapDetectedToOnlineType(
18
+ detectedType: string | undefined
19
+ ): OnlineSTTModelType {
20
+ const t = detectedType ?? '';
21
+ switch (t) {
22
+ case 'transducer':
23
+ return 'transducer';
24
+ case 'paraformer':
25
+ return 'paraformer';
26
+ case 'nemo_ctc':
27
+ return 'nemo_ctc';
28
+ case 'zipformer_ctc':
29
+ case 'ctc':
30
+ return 'zipformer2_ctc';
31
+ case 'tone_ctc':
32
+ return 'tone_ctc';
33
+ default:
34
+ throw new Error(
35
+ `Model type "${t}" is not supported for streaming STT. Use createSTT() for offline recognition, or pass a supported modelType: transducer, paraformer, zipformer2_ctc, nemo_ctc, tone_ctc.`
36
+ );
37
+ }
38
+ }
39
+
40
+ /**
41
+ * Returns the online (streaming) model type for a detected STT model type, or null if streaming is not supported.
42
+ * Use this to check whether the current model can be used with createStreamingSTT() (e.g. for live transcription).
43
+ */
44
+ export function getOnlineTypeOrNull(
45
+ detectedType: string | undefined
46
+ ): OnlineSTTModelType | null {
47
+ try {
48
+ return mapDetectedToOnlineType(detectedType);
49
+ } catch {
50
+ return null;
51
+ }
52
+ }
53
+ let sttStreamCounter = 0;
54
+
55
+ function normalizeStreamingResult(raw: {
56
+ text?: string;
57
+ tokens?: string[] | unknown;
58
+ timestamps?: number[] | unknown;
59
+ }): StreamingSttResult {
60
+ return {
61
+ text: typeof raw.text === 'string' ? raw.text : '',
62
+ tokens: Array.isArray(raw.tokens) ? (raw.tokens as string[]) : [],
63
+ timestamps: Array.isArray(raw.timestamps)
64
+ ? (raw.timestamps as number[])
65
+ : [],
66
+ };
67
+ }
68
+
69
+ /**
70
+ * Flatten StreamingSttInitOptions to native initializeOnlineStt parameters.
71
+ * EndpointConfig (rule1, rule2, rule3) is expanded to 9 flat params.
72
+ */
73
+ function flattenInitOptionsForNative(options: StreamingSttInitOptions): {
74
+ modelDir: string;
75
+ modelType: string;
76
+ enableEndpoint: boolean;
77
+ decodingMethod: string;
78
+ maxActivePaths: number;
79
+ hotwordsFile?: string;
80
+ hotwordsScore?: number;
81
+ numThreads?: number;
82
+ provider?: string;
83
+ ruleFsts?: string;
84
+ ruleFars?: string;
85
+ blankPenalty?: number;
86
+ debug?: boolean;
87
+ rule1MustContainNonSilence?: boolean;
88
+ rule1MinTrailingSilence?: number;
89
+ rule1MinUtteranceLength?: number;
90
+ rule2MustContainNonSilence?: boolean;
91
+ rule2MinTrailingSilence?: number;
92
+ rule2MinUtteranceLength?: number;
93
+ rule3MustContainNonSilence?: boolean;
94
+ rule3MinTrailingSilence?: number;
95
+ rule3MinUtteranceLength?: number;
96
+ } {
97
+ const ep = options.endpointConfig;
98
+ return {
99
+ modelDir: '', // filled by caller after resolveModelPath
100
+ modelType: options.modelType,
101
+ enableEndpoint: options.enableEndpoint ?? true,
102
+ decodingMethod: options.decodingMethod ?? 'greedy_search',
103
+ maxActivePaths: options.maxActivePaths ?? 4,
104
+ hotwordsFile: options.hotwordsFile,
105
+ hotwordsScore: options.hotwordsScore,
106
+ numThreads: options.numThreads,
107
+ provider: options.provider,
108
+ ruleFsts: options.ruleFsts,
109
+ ruleFars: options.ruleFars,
110
+ blankPenalty: options.blankPenalty,
111
+ debug: options.debug,
112
+ rule1MustContainNonSilence: ep?.rule1?.mustContainNonSilence,
113
+ rule1MinTrailingSilence: ep?.rule1?.minTrailingSilence,
114
+ rule1MinUtteranceLength: ep?.rule1?.minUtteranceLength,
115
+ rule2MustContainNonSilence: ep?.rule2?.mustContainNonSilence,
116
+ rule2MinTrailingSilence: ep?.rule2?.minTrailingSilence,
117
+ rule2MinUtteranceLength: ep?.rule2?.minUtteranceLength,
118
+ rule3MustContainNonSilence: ep?.rule3?.mustContainNonSilence,
119
+ rule3MinTrailingSilence: ep?.rule3?.minTrailingSilence,
120
+ rule3MinUtteranceLength: ep?.rule3?.minUtteranceLength,
121
+ };
122
+ }
123
+
124
+ /**
125
+ * Create a streaming (online) STT engine. Use this for real-time recognition with
126
+ * partial results and endpoint detection. Call destroy() when done.
127
+ *
128
+ * @param options - Streaming STT init options (modelPath required; modelType optional, use 'auto' to detect from directory)
129
+ * @returns Promise resolving to a StreamingSttEngine
130
+ * @example
131
+ * ```typescript
132
+ * // With explicit model type
133
+ * const engine = await createStreamingSTT({
134
+ * modelPath: { type: 'asset', path: 'models/streaming-zipformer-en' },
135
+ * modelType: 'transducer',
136
+ * });
137
+ * // With auto-detection
138
+ * const engine = await createStreamingSTT({
139
+ * modelPath: { type: 'asset', path: 'models/sherpa-onnx-streaming-t-one-russian-2025-09-08' },
140
+ * modelType: 'auto',
141
+ * });
142
+ * const stream = await engine.createStream();
143
+ * await stream.acceptWaveform(samples, 16000);
144
+ * if (await stream.isReady()) {
145
+ * await stream.decode();
146
+ * const result = await stream.getResult();
147
+ * console.log(result.text);
148
+ * }
149
+ * await stream.release();
150
+ * await engine.destroy();
151
+ * ```
152
+ */
153
+ export async function createStreamingSTT(
154
+ options: StreamingSttInitOptions
155
+ ): Promise<StreamingSttEngine> {
156
+ const instanceId = `streaming_stt_${++streamingSttInstanceCounter}`;
157
+ const resolvedPath = await resolveModelPath(options.modelPath);
158
+
159
+ let effectiveModelType: OnlineSTTModelType;
160
+ if (options.modelType === 'auto' || options.modelType === undefined) {
161
+ const detectResult = await SherpaOnnx.detectSttModel(
162
+ resolvedPath,
163
+ undefined,
164
+ undefined
165
+ );
166
+ if (!detectResult.success) {
167
+ const errMsg =
168
+ 'error' in detectResult &&
169
+ typeof (detectResult as { error?: string }).error === 'string'
170
+ ? (detectResult as { error: string }).error
171
+ : 'Unknown error';
172
+ throw new Error(
173
+ `Streaming STT auto-detection failed for ${resolvedPath}. ${errMsg}`
174
+ );
175
+ }
176
+ effectiveModelType = mapDetectedToOnlineType(detectResult.modelType);
177
+ } else {
178
+ effectiveModelType = options.modelType;
179
+ }
180
+
181
+ const optionsWithResolvedType = { ...options, modelType: effectiveModelType };
182
+ const flat = flattenInitOptionsForNative(optionsWithResolvedType);
183
+ flat.modelDir = resolvedPath;
184
+
185
+ // Build options with only defined values (no undefined) to avoid iOS TurboModule marshalling crash when options contain undefined.
186
+ const nativeOptions: Parameters<
187
+ typeof SherpaOnnx.initializeOnlineSttWithOptions
188
+ >[1] = {
189
+ modelDir: flat.modelDir,
190
+ modelType: flat.modelType,
191
+ enableEndpoint: flat.enableEndpoint,
192
+ decodingMethod: flat.decodingMethod,
193
+ maxActivePaths: flat.maxActivePaths,
194
+ };
195
+ if (flat.hotwordsFile !== undefined)
196
+ nativeOptions.hotwordsFile = flat.hotwordsFile;
197
+ if (flat.hotwordsScore !== undefined)
198
+ nativeOptions.hotwordsScore = flat.hotwordsScore;
199
+ if (flat.numThreads !== undefined) nativeOptions.numThreads = flat.numThreads;
200
+ if (flat.provider !== undefined) nativeOptions.provider = flat.provider;
201
+ if (flat.ruleFsts !== undefined) nativeOptions.ruleFsts = flat.ruleFsts;
202
+ if (flat.ruleFars !== undefined) nativeOptions.ruleFars = flat.ruleFars;
203
+ if (flat.blankPenalty !== undefined)
204
+ nativeOptions.blankPenalty = flat.blankPenalty;
205
+ if (flat.debug !== undefined) nativeOptions.debug = flat.debug;
206
+ if (flat.rule1MustContainNonSilence !== undefined)
207
+ nativeOptions.rule1MustContainNonSilence = flat.rule1MustContainNonSilence;
208
+ if (flat.rule1MinTrailingSilence !== undefined)
209
+ nativeOptions.rule1MinTrailingSilence = flat.rule1MinTrailingSilence;
210
+ if (flat.rule1MinUtteranceLength !== undefined)
211
+ nativeOptions.rule1MinUtteranceLength = flat.rule1MinUtteranceLength;
212
+ if (flat.rule2MustContainNonSilence !== undefined)
213
+ nativeOptions.rule2MustContainNonSilence = flat.rule2MustContainNonSilence;
214
+ if (flat.rule2MinTrailingSilence !== undefined)
215
+ nativeOptions.rule2MinTrailingSilence = flat.rule2MinTrailingSilence;
216
+ if (flat.rule2MinUtteranceLength !== undefined)
217
+ nativeOptions.rule2MinUtteranceLength = flat.rule2MinUtteranceLength;
218
+ if (flat.rule3MustContainNonSilence !== undefined)
219
+ nativeOptions.rule3MustContainNonSilence = flat.rule3MustContainNonSilence;
220
+ if (flat.rule3MinTrailingSilence !== undefined)
221
+ nativeOptions.rule3MinTrailingSilence = flat.rule3MinTrailingSilence;
222
+ if (flat.rule3MinUtteranceLength !== undefined)
223
+ nativeOptions.rule3MinUtteranceLength = flat.rule3MinUtteranceLength;
224
+
225
+ const result = await SherpaOnnx.initializeOnlineSttWithOptions(
226
+ instanceId,
227
+ nativeOptions
228
+ );
229
+
230
+ if (!result.success) {
231
+ throw new Error(`Streaming STT initialization failed for ${instanceId}`);
232
+ }
233
+
234
+ const enableInputNormalization = options.enableInputNormalization !== false;
235
+ let destroyed = false;
236
+
237
+ const guard = () => {
238
+ if (destroyed) {
239
+ throw new Error(
240
+ `Streaming STT engine ${instanceId} has been destroyed; cannot call methods on it.`
241
+ );
242
+ }
243
+ };
244
+
245
+ const engine: StreamingSttEngine = {
246
+ get instanceId() {
247
+ return instanceId;
248
+ },
249
+
250
+ async createStream(hotwords?: string): Promise<SttStream> {
251
+ guard();
252
+ const streamId = `stt_stream_${++sttStreamCounter}`;
253
+ await SherpaOnnx.createSttStream(instanceId, streamId, hotwords);
254
+
255
+ let released = false;
256
+ const streamGuard = () => {
257
+ if (destroyed) {
258
+ throw new Error(
259
+ `Streaming STT engine ${instanceId} has been destroyed.`
260
+ );
261
+ }
262
+ if (released) {
263
+ throw new Error(
264
+ `Stream ${streamId} has been released; cannot call methods on it.`
265
+ );
266
+ }
267
+ };
268
+
269
+ const stream: SttStream = {
270
+ get streamId() {
271
+ return streamId;
272
+ },
273
+
274
+ async acceptWaveform(
275
+ samples: number[],
276
+ sampleRate: number
277
+ ): Promise<void> {
278
+ streamGuard();
279
+ await SherpaOnnx.acceptSttWaveform(streamId, samples, sampleRate);
280
+ },
281
+
282
+ async inputFinished(): Promise<void> {
283
+ streamGuard();
284
+ await SherpaOnnx.sttStreamInputFinished(streamId);
285
+ },
286
+
287
+ async decode(): Promise<void> {
288
+ streamGuard();
289
+ await SherpaOnnx.decodeSttStream(streamId);
290
+ },
291
+
292
+ async isReady(): Promise<boolean> {
293
+ streamGuard();
294
+ return SherpaOnnx.isSttStreamReady(streamId);
295
+ },
296
+
297
+ async getResult(): Promise<StreamingSttResult> {
298
+ streamGuard();
299
+ const raw = await SherpaOnnx.getSttStreamResult(streamId);
300
+ return normalizeStreamingResult(raw);
301
+ },
302
+
303
+ async isEndpoint(): Promise<boolean> {
304
+ streamGuard();
305
+ return SherpaOnnx.isSttStreamEndpoint(streamId);
306
+ },
307
+
308
+ async reset(): Promise<void> {
309
+ streamGuard();
310
+ await SherpaOnnx.resetSttStream(streamId);
311
+ },
312
+
313
+ async release(): Promise<void> {
314
+ if (released) return;
315
+ released = true;
316
+ await SherpaOnnx.releaseSttStream(streamId);
317
+ },
318
+
319
+ async processAudioChunk(
320
+ samples: number[],
321
+ sampleRate: number
322
+ ): Promise<{ result: StreamingSttResult; isEndpoint: boolean }> {
323
+ streamGuard();
324
+ let toSend: number[] = samples;
325
+ if (enableInputNormalization && samples.length > 0) {
326
+ let maxAbs = 1e-10;
327
+ for (let i = 0; i < samples.length; i++) {
328
+ const abs = Math.abs(samples[i]!);
329
+ if (abs > maxAbs) maxAbs = abs;
330
+ }
331
+ const scale = maxAbs < 0.01 ? 80 : Math.min(80, 0.8 / maxAbs);
332
+ toSend = new Array(samples.length);
333
+ for (let i = 0; i < samples.length; i++) {
334
+ const v = samples[i]! * scale;
335
+ toSend[i] = v < -1 ? -1 : v > 1 ? 1 : v;
336
+ }
337
+ }
338
+ const raw = await SherpaOnnx.processSttAudioChunk(
339
+ streamId,
340
+ toSend,
341
+ sampleRate
342
+ );
343
+ return {
344
+ result: normalizeStreamingResult(raw),
345
+ isEndpoint: Boolean(raw.isEndpoint),
346
+ };
347
+ },
348
+ };
349
+
350
+ return stream;
351
+ },
352
+
353
+ async destroy(): Promise<void> {
354
+ if (destroyed) return;
355
+ destroyed = true;
356
+ await SherpaOnnx.unloadOnlineStt(instanceId);
357
+ },
358
+ };
359
+
360
+ return engine;
361
+ }
@@ -0,0 +1,151 @@
1
+ import type { ModelPathConfig } from '../types';
2
+
3
+ /**
4
+ * Online (streaming) STT model types.
5
+ * These models use OnlineRecognizer + OnlineStream in sherpa-onnx.
6
+ * Must match the native OnlineRecognizer model config (transducer, paraformer, zipformer2_ctc, nemo_ctc, tone_ctc).
7
+ */
8
+ export type OnlineSTTModelType =
9
+ | 'transducer'
10
+ | 'paraformer'
11
+ | 'zipformer2_ctc'
12
+ | 'nemo_ctc'
13
+ | 'tone_ctc';
14
+
15
+ /** Runtime list of supported online STT model types. */
16
+ export const ONLINE_STT_MODEL_TYPES: readonly OnlineSTTModelType[] = [
17
+ 'transducer',
18
+ 'paraformer',
19
+ 'zipformer2_ctc',
20
+ 'nemo_ctc',
21
+ 'tone_ctc',
22
+ ] as const;
23
+
24
+ /**
25
+ * Single endpoint rule (Kotlin EndpointRule).
26
+ * Used to detect end of utterance in streaming recognition.
27
+ */
28
+ export interface EndpointRule {
29
+ /** If true, rule only matches when the segment contains non-silence. */
30
+ mustContainNonSilence: boolean;
31
+ /** Minimum trailing silence in seconds. */
32
+ minTrailingSilence: number;
33
+ /** Minimum utterance length in seconds (e.g. max length cap). */
34
+ minUtteranceLength: number;
35
+ }
36
+
37
+ /**
38
+ * Endpoint detection config (Kotlin EndpointConfig).
39
+ * Three rules; first match determines end of utterance.
40
+ */
41
+ export interface EndpointConfig {
42
+ /** Rule 1: e.g. 2.4s trailing silence, no speech required. */
43
+ rule1?: EndpointRule;
44
+ /** Rule 2: e.g. 1.4s trailing silence, speech required. */
45
+ rule2?: EndpointRule;
46
+ /** Rule 3: e.g. max utterance length 20s. */
47
+ rule3?: EndpointRule;
48
+ }
49
+
50
+ /**
51
+ * Options for initializing the streaming (online) STT engine.
52
+ */
53
+ export interface StreamingSttInitOptions {
54
+ /** Model path configuration (asset, file, or auto). */
55
+ modelPath: ModelPathConfig;
56
+ /** Online model type. Use 'auto' to detect from model directory (calls detectSttModel and maps to an online type). */
57
+ modelType: OnlineSTTModelType | 'auto';
58
+ /** Enable endpoint detection. Default: true. */
59
+ enableEndpoint?: boolean;
60
+ /** Endpoint rules. Defaults match Kotlin (rule1: 2.4s silence, rule2: 1.4s + speech, rule3: 20s max). */
61
+ endpointConfig?: EndpointConfig;
62
+ /** Decoding method. Default: "greedy_search". */
63
+ decodingMethod?: 'greedy_search' | 'modified_beam_search';
64
+ /** Max active paths for beam search. Default: 4. */
65
+ maxActivePaths?: number;
66
+ /** Path to hotwords file (transducer/nemo_transducer). */
67
+ hotwordsFile?: string;
68
+ /** Hotwords score. Default: 1.5. */
69
+ hotwordsScore?: number;
70
+ /** Number of threads for inference. Default: 1. */
71
+ numThreads?: number;
72
+ /** Execution provider (e.g. "cpu"). */
73
+ provider?: string;
74
+ /** Path(s) to rule FSTs for ITN. */
75
+ ruleFsts?: string;
76
+ /** Path(s) to rule FARs for ITN. */
77
+ ruleFars?: string;
78
+ /** Blank penalty. */
79
+ blankPenalty?: number;
80
+ /** Enable debug logging. Default: false. */
81
+ debug?: boolean;
82
+ /**
83
+ * Enable adaptive input normalization for audio chunks in processAudioChunk().
84
+ * When true (default), input is scaled so the peak is ~0.8 to handle varying device levels (e.g. quiet mics on iOS).
85
+ * Set to false if your audio is already in the expected range [-1, 1] and you want to pass it through unchanged.
86
+ */
87
+ enableInputNormalization?: boolean;
88
+ }
89
+
90
+ /**
91
+ * Partial or final recognition result from streaming STT (maps to Kotlin OnlineRecognizerResult).
92
+ */
93
+ export interface StreamingSttResult {
94
+ text: string;
95
+ tokens: string[];
96
+ timestamps: number[];
97
+ }
98
+
99
+ /**
100
+ * Streaming STT stream. Created by StreamingSttEngine.createStream().
101
+ * Feeds audio via acceptWaveform, then decode / getResult / isEndpoint.
102
+ */
103
+ export interface SttStream {
104
+ readonly streamId: string;
105
+
106
+ /** Feed PCM samples (float in [-1, 1]) to the stream. */
107
+ acceptWaveform(samples: number[], sampleRate: number): Promise<void>;
108
+
109
+ /** Signal that no more audio will be fed. */
110
+ inputFinished(): Promise<void>;
111
+
112
+ /** Run decoding on accumulated audio (call when isReady() is true). */
113
+ decode(): Promise<void>;
114
+
115
+ /** True if there is enough audio to decode. */
116
+ isReady(): Promise<boolean>;
117
+
118
+ /** Get current partial or final result. Call after decode(). */
119
+ getResult(): Promise<StreamingSttResult>;
120
+
121
+ /** True if endpoint (end of utterance) was detected. */
122
+ isEndpoint(): Promise<boolean>;
123
+
124
+ /** Reset stream state for reuse. */
125
+ reset(): Promise<void>;
126
+
127
+ /** Release native stream; do not use after this. */
128
+ release(): Promise<void>;
129
+
130
+ /**
131
+ * Convenience: feed audio, auto-decode while ready, return result and endpoint status.
132
+ * Reduces bridge round-trips from 5 to 1 per chunk.
133
+ */
134
+ processAudioChunk(
135
+ samples: number[],
136
+ sampleRate: number
137
+ ): Promise<{ result: StreamingSttResult; isEndpoint: boolean }>;
138
+ }
139
+
140
+ /**
141
+ * Streaming STT engine (OnlineRecognizer). Create via createStreamingSTT().
142
+ */
143
+ export interface StreamingSttEngine {
144
+ readonly instanceId: string;
145
+
146
+ /** Create a new stream for this recognizer. Optional hotwords string. */
147
+ createStream(hotwords?: string): Promise<SttStream>;
148
+
149
+ /** Release native recognizer and all streams. */
150
+ destroy(): Promise<void>;
151
+ }
package/src/tts/index.ts CHANGED
@@ -1,4 +1,3 @@
1
- import { DeviceEventEmitter } from 'react-native';
2
1
  import SherpaOnnx from '../NativeSherpaOnnx';
3
2
  import type {
4
3
  TTSInitializeOptions,
@@ -10,10 +9,6 @@ import type {
10
9
  GeneratedAudioWithTimestamps,
11
10
  TTSModelInfo,
12
11
  TtsEngine,
13
- TtsStreamChunk,
14
- TtsStreamEnd,
15
- TtsStreamError,
16
- TtsStreamHandlers,
17
12
  } from './types';
18
13
  import type { ModelPathConfig } from '../types';
19
14
  import { resolveModelPath } from '../utils';
@@ -135,7 +130,7 @@ function toNativeTtsOptions(
135
130
  }
136
131
 
137
132
  // TTS stream events are sent from native via sendEventWithName; use DeviceEventEmitter
138
- // so we don't need NativeEventEmitter (which expects addListener/removeListeners on the module).
133
+
139
134
  /**
140
135
  * Create a TTS engine instance. Call destroy() on the returned engine when done to free native resources.
141
136
  *
@@ -260,66 +255,6 @@ export async function createTTS(
260
255
  );
261
256
  },
262
257
 
263
- async generateSpeechStream(
264
- text: string,
265
- opts: TtsGenerationOptions | undefined,
266
- handlers: TtsStreamHandlers
267
- ): Promise<() => void> {
268
- guard();
269
- const subscriptions = [
270
- DeviceEventEmitter.addListener('ttsStreamChunk', (event: unknown) => {
271
- const e = event as TtsStreamChunk;
272
- if (e.instanceId != null && e.instanceId !== instanceId) return;
273
- handlers.onChunk?.(e);
274
- }),
275
- DeviceEventEmitter.addListener('ttsStreamEnd', (event: unknown) => {
276
- const e = event as TtsStreamEnd;
277
- if (e.instanceId != null && e.instanceId !== instanceId) return;
278
- handlers.onEnd?.(e);
279
- }),
280
- DeviceEventEmitter.addListener('ttsStreamError', (event: unknown) => {
281
- const e = event as TtsStreamError;
282
- if (e.instanceId != null && e.instanceId !== instanceId) return;
283
- handlers.onError?.(e);
284
- }),
285
- ];
286
-
287
- try {
288
- await SherpaOnnx.generateTtsStream(
289
- instanceId,
290
- text,
291
- toNativeTtsOptions(opts)
292
- );
293
- } catch (error) {
294
- subscriptions.forEach((sub) => sub.remove());
295
- throw error;
296
- }
297
-
298
- return () => {
299
- subscriptions.forEach((sub) => sub.remove());
300
- };
301
- },
302
-
303
- async cancelSpeechStream(): Promise<void> {
304
- guard();
305
- return SherpaOnnx.cancelTtsStream(instanceId);
306
- },
307
-
308
- async startPcmPlayer(sampleRate: number, channels: number): Promise<void> {
309
- guard();
310
- return SherpaOnnx.startTtsPcmPlayer(instanceId, sampleRate, channels);
311
- },
312
-
313
- async writePcmChunk(samples: number[]): Promise<void> {
314
- guard();
315
- return SherpaOnnx.writeTtsPcmChunk(instanceId, samples);
316
- },
317
-
318
- async stopPcmPlayer(): Promise<void> {
319
- guard();
320
- return SherpaOnnx.stopTtsPcmPlayer(instanceId);
321
- },
322
-
323
258
  async updateParams(opts: TtsUpdateOptions): Promise<{
324
259
  success: boolean;
325
260
  detectedModels: Array<{ type: string; modelDir: string }>;
@@ -445,6 +380,10 @@ export function shareAudioFile(
445
380
  return SherpaOnnx.shareTtsAudio(fileUri, mimeType);
446
381
  }
447
382
 
383
+ // Streaming TTS (separate engine; use createStreamingTTS for chunk callbacks and PCM playback)
384
+ export { createStreamingTTS } from './streaming';
385
+ export type { StreamingTtsEngine } from './streamingTypes';
386
+
448
387
  // Export types and runtime type list
449
388
  export type {
450
389
  TTSInitializeOptions,
@@ -462,6 +401,7 @@ export type {
462
401
  TtsSubtitleItem,
463
402
  TTSModelInfo,
464
403
  TtsEngine,
404
+ TtsStreamController,
465
405
  TtsStreamHandlers,
466
406
  TtsStreamChunk,
467
407
  TtsStreamEnd,