react-native-sherpa-onnx 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +84 -77
- package/SherpaOnnx.podspec +79 -45
- package/android/build.gradle +8 -2
- package/android/prebuilt-download.gradle +70 -16
- package/android/prebuilt-versions.gradle +14 -6
- package/android/src/main/cpp/CMakeLists.txt +2 -0
- package/android/src/main/cpp/jni/audio/sherpa-onnx-audio-convert-jni.cpp +202 -328
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.cpp +22 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.h +2 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.cpp +96 -142
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.h +40 -4
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-stt.cpp +774 -316
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +208 -122
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect.h +92 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.cpp +3 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.cpp +14 -2
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-stt.cpp +229 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-stt.h +38 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-tts.cpp +144 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-tts.h +38 -0
- package/android/src/main/cpp/jni/module/sherpa-onnx-module-jni.cpp +1 -1
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +157 -11
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxPcmCapture.kt +150 -0
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxSttHelper.kt +75 -24
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +52 -1
- package/ios/SherpaOnnx+PcmLiveStream.mm +288 -0
- package/ios/SherpaOnnx+STT.mm +2 -0
- package/ios/SherpaOnnx+TTS.mm +17 -0
- package/ios/SherpaOnnx.mm +27 -3
- package/ios/SherpaOnnxAudioConvert.h +28 -0
- package/ios/SherpaOnnxAudioConvert.mm +698 -0
- package/ios/archive/sherpa-onnx-archive-helper.mm +12 -0
- package/ios/model_detect/sherpa-onnx-model-detect-helper.h +37 -3
- package/ios/model_detect/sherpa-onnx-model-detect-helper.mm +80 -45
- package/ios/model_detect/sherpa-onnx-model-detect-stt.mm +629 -267
- package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +148 -56
- package/ios/model_detect/sherpa-onnx-model-detect.h +72 -0
- package/ios/model_detect/sherpa-onnx-validate-stt.h +38 -0
- package/ios/model_detect/sherpa-onnx-validate-stt.mm +229 -0
- package/ios/model_detect/sherpa-onnx-validate-tts.h +38 -0
- package/ios/model_detect/sherpa-onnx-validate-tts.mm +144 -0
- package/ios/stt/sherpa-onnx-stt-wrapper.mm +4 -0
- package/lib/module/NativeSherpaOnnx.js.map +1 -1
- package/lib/module/audio/index.js +55 -1
- package/lib/module/audio/index.js.map +1 -1
- package/lib/module/download/ModelDownloadManager.js +14 -0
- package/lib/module/download/ModelDownloadManager.js.map +1 -1
- package/lib/module/index.js +10 -0
- package/lib/module/index.js.map +1 -1
- package/lib/module/stt/streaming.js +6 -3
- package/lib/module/stt/streaming.js.map +1 -1
- package/lib/module/tts/index.js +13 -1
- package/lib/module/tts/index.js.map +1 -1
- package/lib/typescript/src/NativeSherpaOnnx.d.ts +32 -3
- package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
- package/lib/typescript/src/audio/index.d.ts +20 -1
- package/lib/typescript/src/audio/index.d.ts.map +1 -1
- package/lib/typescript/src/download/ModelDownloadManager.d.ts +2 -1
- package/lib/typescript/src/download/ModelDownloadManager.d.ts.map +1 -1
- package/lib/typescript/src/index.d.ts +10 -0
- package/lib/typescript/src/index.d.ts.map +1 -1
- package/lib/typescript/src/stt/streaming.d.ts.map +1 -1
- package/lib/typescript/src/stt/streamingTypes.d.ts +1 -1
- package/lib/typescript/src/stt/streamingTypes.d.ts.map +1 -1
- package/lib/typescript/src/tts/index.d.ts +12 -1
- package/lib/typescript/src/tts/index.d.ts.map +1 -1
- package/package.json +6 -1
- package/scripts/check-model-csvs.sh +72 -0
- package/scripts/setup-ios-framework.sh +272 -191
- package/src/NativeSherpaOnnx.ts +37 -3
- package/src/audio/index.ts +84 -1
- package/src/download/ModelDownloadManager.ts +19 -0
- package/src/index.tsx +15 -0
- package/src/stt/streaming.ts +10 -5
- package/src/stt/streamingTypes.ts +1 -1
- package/src/tts/index.ts +25 -1
- package/third_party/ffmpeg_prebuilt/ANDROID_RELEASE_TAG +1 -1
- package/third_party/libarchive_prebuilt/ANDROID_RELEASE_TAG +1 -1
- package/third_party/libarchive_prebuilt/IOS_RELEASE_TAG +1 -1
- package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -1
- package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -1
- package/ios/scripts/patch-libarchive-includes.sh +0 -61
- package/ios/scripts/setup-ios-libarchive.sh +0 -98
package/src/NativeSherpaOnnx.ts
CHANGED
|
@@ -21,7 +21,7 @@ export interface Spec extends TurboModule {
|
|
|
21
21
|
* @param instanceId - Unique ID for this engine instance (from createSTT)
|
|
22
22
|
* @param modelDir - Absolute path to model directory
|
|
23
23
|
* @param preferInt8 - Optional: true = prefer int8 models, false = prefer regular models, undefined = try int8 first (default)
|
|
24
|
-
* @param modelType - Optional: explicit model type ('transducer', 'nemo_transducer', 'paraformer', 'nemo_ctc', 'wenet_ctc', 'sense_voice', 'zipformer_ctc', 'whisper', 'funasr_nano', 'fire_red_asr', 'moonshine', 'dolphin', 'canary', 'omnilingual', 'medasr', 'telespeech_ctc', 'auto'), undefined = auto (default)
|
|
24
|
+
* @param modelType - Optional: explicit model type ('transducer', 'nemo_transducer', 'paraformer', 'nemo_ctc', 'wenet_ctc', 'sense_voice', 'zipformer_ctc', 'whisper', 'funasr_nano', 'fire_red_asr', 'moonshine', 'moonshine_v2', 'dolphin', 'canary', 'omnilingual', 'medasr', 'telespeech_ctc', 'auto'), undefined = auto (default)
|
|
25
25
|
* @param debug - Optional: enable debug logging in native layer and sherpa-onnx (default: false)
|
|
26
26
|
* @param hotwordsFile - Optional: path to hotwords file (OfflineRecognizerConfig)
|
|
27
27
|
* @param hotwordsScore - Optional: hotwords score (default in Kotlin 1.5)
|
|
@@ -65,7 +65,7 @@ export interface Spec extends TurboModule {
|
|
|
65
65
|
* @param modelDir - Absolute path to model directory (use resolveModelPath first for asset/file paths)
|
|
66
66
|
* @param preferInt8 - Optional: true = prefer int8, false = prefer regular, undefined = try int8 first
|
|
67
67
|
* @param modelType - Optional: explicit type or 'auto' (default)
|
|
68
|
-
* @returns Object with success, detectedModels (array of { type, modelDir }),
|
|
68
|
+
* @returns Object with success, detectedModels (array of { type, modelDir }), modelType (primary detected type), and optionally isHardwareSpecificUnsupported (true when the model is for unsupported hardware e.g. RK35xx, Ascend)
|
|
69
69
|
*/
|
|
70
70
|
detectSttModel(
|
|
71
71
|
modelDir: string,
|
|
@@ -73,6 +73,8 @@ export interface Spec extends TurboModule {
|
|
|
73
73
|
modelType?: string
|
|
74
74
|
): Promise<{
|
|
75
75
|
success: boolean;
|
|
76
|
+
/** True when detection failed because the model targets unsupported hardware (RK35xx, Ascend, CANN). Use to show a specific message or block init. */
|
|
77
|
+
isHardwareSpecificUnsupported?: boolean;
|
|
76
78
|
detectedModels: Array<{ type: string; modelDir: string }>;
|
|
77
79
|
modelType?: string;
|
|
78
80
|
}>;
|
|
@@ -211,6 +213,20 @@ export interface Spec extends TurboModule {
|
|
|
211
213
|
isEndpoint: boolean;
|
|
212
214
|
}>;
|
|
213
215
|
|
|
216
|
+
/**
|
|
217
|
+
* Start native PCM live capture. Microphone audio is captured and resampled to the requested
|
|
218
|
+
* sampleRate; chunks are emitted via the "pcmLiveStreamData" event (base64 Int16 PCM).
|
|
219
|
+
* App must have RECORD_AUDIO (Android) and NSMicrophoneUsageDescription (iOS) and grant permission before calling.
|
|
220
|
+
*/
|
|
221
|
+
startPcmLiveStream(options: {
|
|
222
|
+
sampleRate: number;
|
|
223
|
+
channelCount?: number;
|
|
224
|
+
bufferSizeFrames?: number;
|
|
225
|
+
}): Promise<void>;
|
|
226
|
+
|
|
227
|
+
/** Stop native PCM live capture. */
|
|
228
|
+
stopPcmLiveStream(): Promise<void>;
|
|
229
|
+
|
|
214
230
|
// ==================== TTS Methods ====================
|
|
215
231
|
|
|
216
232
|
/**
|
|
@@ -254,9 +270,10 @@ export interface Spec extends TurboModule {
|
|
|
254
270
|
/**
|
|
255
271
|
* Detect TTS model type and structure without initializing the engine.
|
|
256
272
|
* Uses the same native file-based detection as initializeTts.
|
|
273
|
+
* For Kokoro/Kitten multi-language models, also returns lexiconLanguageCandidates (e.g. ["default"], ["us-en", "gb-en", "zh"]) from detected lexicon.txt / lexicon-*.txt files.
|
|
257
274
|
* @param modelDir - Absolute path to model directory (use resolveModelPath first for asset/file paths)
|
|
258
275
|
* @param modelType - Optional: explicit type or 'auto' (default)
|
|
259
|
-
* @returns Object with success, detectedModels (array of { type, modelDir }),
|
|
276
|
+
* @returns Object with success, detectedModels (array of { type, modelDir }), modelType (primary detected type), and optionally lexiconLanguageCandidates (language ids for multi-lang Kokoro/Kitten)
|
|
260
277
|
*/
|
|
261
278
|
detectTtsModel(
|
|
262
279
|
modelDir: string,
|
|
@@ -265,6 +282,8 @@ export interface Spec extends TurboModule {
|
|
|
265
282
|
success: boolean;
|
|
266
283
|
detectedModels: Array<{ type: string; modelDir: string }>;
|
|
267
284
|
modelType?: string;
|
|
285
|
+
/** Language ids from detected lexicon files (e.g. "default" for lexicon.txt, "us-en", "zh" from lexicon-us-en.txt, lexicon-zh.txt). Present for Kokoro/Kitten when multiple or single lexicon files are found; use for language selection UI. */
|
|
286
|
+
lexiconLanguageCandidates?: string[];
|
|
268
287
|
}>;
|
|
269
288
|
|
|
270
289
|
/**
|
|
@@ -431,6 +450,19 @@ export interface Spec extends TurboModule {
|
|
|
431
450
|
mimeType: string
|
|
432
451
|
): Promise<string>;
|
|
433
452
|
|
|
453
|
+
/**
|
|
454
|
+
* Copy a local file into a document under a SAF directory URI (format-agnostic; Android only).
|
|
455
|
+
* @param fileUri - Content URI of the saved WAV file
|
|
456
|
+
* @param filename - Desired cache filename
|
|
457
|
+
* @returns Absolute file path to the cached copy
|
|
458
|
+
*/
|
|
459
|
+
copyFileToContentUri(
|
|
460
|
+
filePath: string,
|
|
461
|
+
directoryUri: string,
|
|
462
|
+
filename: string,
|
|
463
|
+
mimeType: string
|
|
464
|
+
): Promise<string>;
|
|
465
|
+
|
|
434
466
|
/**
|
|
435
467
|
* Copy a SAF content URI to a cache file for local playback.
|
|
436
468
|
* @param fileUri - Content URI of the saved WAV file
|
|
@@ -561,6 +593,8 @@ export interface Spec extends TurboModule {
|
|
|
561
593
|
* All get*Support methods return this shape. Optional modelBase64: if omitted, SDK uses embedded test model for canInit.
|
|
562
594
|
*/
|
|
563
595
|
getQnnSupport(modelBase64?: string): Promise<AccelerationSupport>;
|
|
596
|
+
/** Device SoC model string (e.g. SM8850 on Android 12+). Null if not available. isSupported: true when SoC is SM8xxx (supported for QNN). */
|
|
597
|
+
getDeviceQnnSoc(): Promise<{ soc: string | null; isSupported: boolean }>;
|
|
564
598
|
getNnapiSupport(modelBase64?: string): Promise<AccelerationSupport>;
|
|
565
599
|
getXnnpackSupport(modelBase64?: string): Promise<AccelerationSupport>;
|
|
566
600
|
getCoreMlSupport(modelBase64?: string): Promise<AccelerationSupport>;
|
package/src/audio/index.ts
CHANGED
|
@@ -1,9 +1,92 @@
|
|
|
1
|
+
import { Buffer } from 'buffer';
|
|
2
|
+
import { DeviceEventEmitter } from 'react-native';
|
|
1
3
|
import SherpaOnnx from '../NativeSherpaOnnx';
|
|
2
4
|
|
|
3
5
|
/**
|
|
4
|
-
*
|
|
6
|
+
* Decode base64-encoded Int16 PCM to float array in [-1, 1].
|
|
7
|
+
* Uses a preallocated Float32Array to avoid GC pressure on the live-mic hot path.
|
|
8
|
+
*/
|
|
9
|
+
function base64PcmToFloatArray(base64: string): Float32Array {
|
|
10
|
+
const bytes = Buffer.from(base64, 'base64');
|
|
11
|
+
const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
|
|
12
|
+
const len = bytes.byteLength / 2;
|
|
13
|
+
const out = new Float32Array(len);
|
|
14
|
+
for (let i = 0; i < len; i++) {
|
|
15
|
+
out[i] = view.getInt16(i * 2, true) / 32768;
|
|
16
|
+
}
|
|
17
|
+
return out;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export type PcmLiveStreamOptions = {
|
|
21
|
+
sampleRate?: number;
|
|
22
|
+
channelCount?: number;
|
|
23
|
+
bufferSizeFrames?: number;
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
export type PcmLiveStreamHandle = {
|
|
27
|
+
start: () => Promise<void>;
|
|
28
|
+
stop: () => Promise<void>;
|
|
29
|
+
onData: (
|
|
30
|
+
callback: (samples: Float32Array, sampleRate: number) => void
|
|
31
|
+
) => () => void;
|
|
32
|
+
onError: (callback: (message: string) => void) => () => void;
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Create a PCM live stream from the device microphone. Native capture and resampling ensure
|
|
37
|
+
* PCM is always delivered at the requested sampleRate (e.g. 16000 for STT). The app must have
|
|
38
|
+
* RECORD_AUDIO (Android) and NSMicrophoneUsageDescription (iOS) and grant permission before start().
|
|
39
|
+
*/
|
|
40
|
+
export function createPcmLiveStream(
|
|
41
|
+
options?: PcmLiveStreamOptions
|
|
42
|
+
): PcmLiveStreamHandle {
|
|
43
|
+
const sampleRate = options?.sampleRate ?? 16000;
|
|
44
|
+
const channelCount = options?.channelCount ?? 1;
|
|
45
|
+
const bufferSizeFrames = options?.bufferSizeFrames ?? 0;
|
|
46
|
+
|
|
47
|
+
return {
|
|
48
|
+
start: () =>
|
|
49
|
+
SherpaOnnx.startPcmLiveStream({
|
|
50
|
+
sampleRate,
|
|
51
|
+
channelCount,
|
|
52
|
+
bufferSizeFrames,
|
|
53
|
+
}),
|
|
54
|
+
|
|
55
|
+
stop: () => SherpaOnnx.stopPcmLiveStream(),
|
|
56
|
+
|
|
57
|
+
onData: (callback: (samples: Float32Array, sampleRate: number) => void) => {
|
|
58
|
+
const sub = DeviceEventEmitter.addListener(
|
|
59
|
+
'pcmLiveStreamData',
|
|
60
|
+
(event: { base64Pcm?: string; sampleRate?: number }) => {
|
|
61
|
+
const base64 = event?.base64Pcm ?? '';
|
|
62
|
+
const sr = event?.sampleRate ?? sampleRate;
|
|
63
|
+
if (base64) {
|
|
64
|
+
const samples = base64PcmToFloatArray(base64);
|
|
65
|
+
callback(samples, sr);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
);
|
|
69
|
+
return () => sub.remove();
|
|
70
|
+
},
|
|
71
|
+
|
|
72
|
+
onError: (callback: (message: string) => void) => {
|
|
73
|
+
const sub = DeviceEventEmitter.addListener(
|
|
74
|
+
'pcmLiveStreamError',
|
|
75
|
+
(event: { message?: string }) => {
|
|
76
|
+
callback(event?.message ?? 'Unknown error');
|
|
77
|
+
}
|
|
78
|
+
);
|
|
79
|
+
return () => sub.remove();
|
|
80
|
+
},
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Convert any supported audio file to a requested format (e.g. "mp3", "flac", "wav", "m4a", "opus", "webm").
|
|
5
86
|
* On Android this requires FFmpeg prebuilts. WAV output is always 16 kHz mono (sherpa-onnx).
|
|
6
87
|
* For MP3, optional outputSampleRateHz: 32000, 44100, or 48000; 0/undefined = 44100.
|
|
88
|
+
* For Opus, optional outputSampleRateHz: 8000, 12000, 16000, 24000, or 48000.
|
|
89
|
+
* For M4A/AAC, standard bitrates apply.
|
|
7
90
|
* Resolves on success, rejects with an error message on failure.
|
|
8
91
|
*/
|
|
9
92
|
export function convertAudioToFormat(
|
|
@@ -33,6 +33,7 @@ export enum ModelCategory {
|
|
|
33
33
|
Diarization = 'diarization',
|
|
34
34
|
Enhancement = 'enhancement',
|
|
35
35
|
Separation = 'separation',
|
|
36
|
+
Qnn = 'qnn',
|
|
36
37
|
}
|
|
37
38
|
|
|
38
39
|
/** TTS model type for meta; 'unknown' when id could not be classified. */
|
|
@@ -233,6 +234,11 @@ const CATEGORY_CONFIG: Record<
|
|
|
233
234
|
cacheFile: 'separation-models.json',
|
|
234
235
|
baseDir: `${DocumentDirectoryPath}/sherpa-onnx/models/separation`,
|
|
235
236
|
},
|
|
237
|
+
[ModelCategory.Qnn]: {
|
|
238
|
+
tag: 'asr-models-qnn-binary',
|
|
239
|
+
cacheFile: 'qnn-models.json',
|
|
240
|
+
baseDir: `${DocumentDirectoryPath}/sherpa-onnx/models/qnn`,
|
|
241
|
+
},
|
|
236
242
|
};
|
|
237
243
|
|
|
238
244
|
function getCacheDir(): string {
|
|
@@ -358,6 +364,11 @@ async function retryWithBackoff<T>(
|
|
|
358
364
|
async function fetchChecksumsFromRelease(
|
|
359
365
|
category: ModelCategory
|
|
360
366
|
): Promise<Map<string, string>> {
|
|
367
|
+
// asr-models-qnn-binary has no checksum.txt; use GitHub API digest only (set in toGenericModelMeta).
|
|
368
|
+
if (category === ModelCategory.Qnn) {
|
|
369
|
+
return new Map<string, string>();
|
|
370
|
+
}
|
|
371
|
+
|
|
361
372
|
// Return cached if available
|
|
362
373
|
if (checksumCacheByCategory[category]) {
|
|
363
374
|
return checksumCacheByCategory[category]!;
|
|
@@ -490,6 +501,14 @@ function isAssetSupportedForCategory(
|
|
|
490
501
|
return ext === 'onnx';
|
|
491
502
|
case ModelCategory.Separation:
|
|
492
503
|
return ext === 'tar.bz2' || ext === 'onnx';
|
|
504
|
+
case ModelCategory.Qnn:
|
|
505
|
+
// asr-models-qnn-binary: e.g. sherpa-onnx-qnn-SM8850-binary-5-seconds-zipformer-ctc-zh-*.tar.bz2
|
|
506
|
+
return (
|
|
507
|
+
ext === 'tar.bz2' &&
|
|
508
|
+
lower.includes('sherpa-onnx-qnn') &&
|
|
509
|
+
lower.includes('binary') &&
|
|
510
|
+
lower.includes('seconds')
|
|
511
|
+
);
|
|
493
512
|
default:
|
|
494
513
|
return false;
|
|
495
514
|
}
|
package/src/index.tsx
CHANGED
|
@@ -16,6 +16,8 @@ export {
|
|
|
16
16
|
resolveModelPath,
|
|
17
17
|
} from './utils';
|
|
18
18
|
|
|
19
|
+
export { copyFileToContentUri } from './tts';
|
|
20
|
+
|
|
19
21
|
// Note: Feature-specific exports are available via subpath imports:
|
|
20
22
|
// - import { createSTT, createStreamingSTT, ... } from 'react-native-sherpa-onnx/stt'
|
|
21
23
|
// - import { createTTS, ... } from 'react-native-sherpa-onnx/tts'
|
|
@@ -41,6 +43,19 @@ export function getQnnSupport(
|
|
|
41
43
|
return SherpaOnnx.getQnnSupport(modelBase64);
|
|
42
44
|
}
|
|
43
45
|
|
|
46
|
+
/**
|
|
47
|
+
* Device SoC result: soc is always the device SoC string when available (Android 12+); on iOS or when unavailable, soc is null.
|
|
48
|
+
* isSupported is true when the SoC is SM8xxx (supported for QNN models). Use soc for the label; use isSupported to decide whether to auto-select in the download manager.
|
|
49
|
+
*/
|
|
50
|
+
export type DeviceQnnSocResult = {
|
|
51
|
+
soc: string | null;
|
|
52
|
+
isSupported: boolean;
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
export function getDeviceQnnSoc(): Promise<DeviceQnnSocResult> {
|
|
56
|
+
return SherpaOnnx.getDeviceQnnSoc();
|
|
57
|
+
}
|
|
58
|
+
|
|
44
59
|
/**
|
|
45
60
|
* Return the list of available ONNX Runtime execution providers
|
|
46
61
|
* (e.g. "CPU", "NNAPI", "QNN", "XNNPACK").
|
package/src/stt/streaming.ts
CHANGED
|
@@ -317,11 +317,11 @@ export async function createStreamingSTT(
|
|
|
317
317
|
},
|
|
318
318
|
|
|
319
319
|
async processAudioChunk(
|
|
320
|
-
samples: number[],
|
|
320
|
+
samples: number[] | Float32Array,
|
|
321
321
|
sampleRate: number
|
|
322
322
|
): Promise<{ result: StreamingSttResult; isEndpoint: boolean }> {
|
|
323
323
|
streamGuard();
|
|
324
|
-
let toSend: number[] = samples;
|
|
324
|
+
let toSend: number[] | Float32Array = samples;
|
|
325
325
|
if (enableInputNormalization && samples.length > 0) {
|
|
326
326
|
let maxAbs = 1e-10;
|
|
327
327
|
for (let i = 0; i < samples.length; i++) {
|
|
@@ -329,15 +329,20 @@ export async function createStreamingSTT(
|
|
|
329
329
|
if (abs > maxAbs) maxAbs = abs;
|
|
330
330
|
}
|
|
331
331
|
const scale = maxAbs < 0.01 ? 80 : Math.min(80, 0.8 / maxAbs);
|
|
332
|
-
|
|
332
|
+
const normalized = new Float32Array(samples.length);
|
|
333
333
|
for (let i = 0; i < samples.length; i++) {
|
|
334
334
|
const v = samples[i]! * scale;
|
|
335
|
-
|
|
335
|
+
normalized[i] = v < -1 ? -1 : v > 1 ? 1 : v;
|
|
336
336
|
}
|
|
337
|
+
toSend = normalized;
|
|
337
338
|
}
|
|
339
|
+
// Bridge expects a plain array; Float32Array may not serialize as ReadableArray on all platforms.
|
|
340
|
+
const samplesArray = Array.isArray(toSend)
|
|
341
|
+
? toSend
|
|
342
|
+
: Array.from(toSend);
|
|
338
343
|
const raw = await SherpaOnnx.processSttAudioChunk(
|
|
339
344
|
streamId,
|
|
340
|
-
|
|
345
|
+
samplesArray,
|
|
341
346
|
sampleRate
|
|
342
347
|
);
|
|
343
348
|
return {
|
|
@@ -132,7 +132,7 @@ export interface SttStream {
|
|
|
132
132
|
* Reduces bridge round-trips from 5 to 1 per chunk.
|
|
133
133
|
*/
|
|
134
134
|
processAudioChunk(
|
|
135
|
-
samples: number[],
|
|
135
|
+
samples: number[] | Float32Array,
|
|
136
136
|
sampleRate: number
|
|
137
137
|
): Promise<{ result: StreamingSttResult; isEndpoint: boolean }>;
|
|
138
138
|
}
|
package/src/tts/index.ts
CHANGED
|
@@ -82,14 +82,18 @@ function flattenTtsModelOptionsForNative(
|
|
|
82
82
|
/**
|
|
83
83
|
* Detect TTS model type and structure without initializing the engine.
|
|
84
84
|
* Uses the same native file-based detection as createTTS. Stateless; no instance required.
|
|
85
|
+
* For Kokoro/Kitten multi-language models, the result includes lexiconLanguageCandidates (e.g. ["default"] or ["us-en", "gb-en", "zh"]) derived from lexicon.txt and lexicon-*.txt; use these for a language selection dropdown (language change requires re-initialization).
|
|
85
86
|
*
|
|
86
87
|
* @param modelPath - Model path configuration (asset, file, or auto)
|
|
87
88
|
* @param options - Optional modelType (default: 'auto')
|
|
88
|
-
* @returns Object with success, detectedModels (array of { type, modelDir }),
|
|
89
|
+
* @returns Object with success, detectedModels (array of { type, modelDir }), modelType (primary detected type), and optionally lexiconLanguageCandidates (language ids for multi-lang Kokoro/Kitten)
|
|
89
90
|
* @example
|
|
90
91
|
* ```typescript
|
|
91
92
|
* const result = await detectTtsModel({ type: 'asset', path: 'models/vits-piper-en' });
|
|
92
93
|
* if (result.success) console.log('Detected type:', result.modelType, result.detectedModels);
|
|
94
|
+
* if (result.lexiconLanguageCandidates?.length) {
|
|
95
|
+
* // Kokoro/Kitten multi-lang: show language dropdown (e.g. "us-en", "zh")
|
|
96
|
+
* }
|
|
93
97
|
* ```
|
|
94
98
|
*/
|
|
95
99
|
export async function detectTtsModel(
|
|
@@ -99,6 +103,8 @@ export async function detectTtsModel(
|
|
|
99
103
|
success: boolean;
|
|
100
104
|
detectedModels: Array<{ type: string; modelDir: string }>;
|
|
101
105
|
modelType?: string;
|
|
106
|
+
/** Language ids from detected lexicon files ("default" for lexicon.txt, or e.g. "us-en", "zh" from lexicon-us-en.txt, lexicon-zh.txt). Present for Kokoro/Kitten; use for language selection UI. */
|
|
107
|
+
lexiconLanguageCandidates?: string[];
|
|
102
108
|
}> {
|
|
103
109
|
const resolvedPath = await resolveModelPath(modelPath);
|
|
104
110
|
return SherpaOnnx.detectTtsModel(resolvedPath, options?.modelType);
|
|
@@ -360,6 +366,24 @@ export function saveTextToContentUri(
|
|
|
360
366
|
);
|
|
361
367
|
}
|
|
362
368
|
|
|
369
|
+
/**
|
|
370
|
+
* Copy a local file into a document under a SAF directory URI (format-agnostic; Android only).
|
|
371
|
+
* Use for saving converted audio (e.g. MP3, FLAC) to a content URI.
|
|
372
|
+
*/
|
|
373
|
+
export function copyFileToContentUri(
|
|
374
|
+
filePath: string,
|
|
375
|
+
directoryUri: string,
|
|
376
|
+
filename: string,
|
|
377
|
+
mimeType: string
|
|
378
|
+
): Promise<string> {
|
|
379
|
+
return SherpaOnnx.copyFileToContentUri(
|
|
380
|
+
filePath,
|
|
381
|
+
directoryUri,
|
|
382
|
+
filename,
|
|
383
|
+
mimeType
|
|
384
|
+
);
|
|
385
|
+
}
|
|
386
|
+
|
|
363
387
|
/**
|
|
364
388
|
* Copy a SAF content URI to a cache file for local playback (Android only).
|
|
365
389
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
ffmpeg-android-v8.0.1
|
|
1
|
+
ffmpeg-android-v8.0.1-1
|
|
@@ -1 +1 @@
|
|
|
1
|
-
libarchive-android-v3.8.5
|
|
1
|
+
libarchive-android-v3.8.5-1
|
|
@@ -1 +1 @@
|
|
|
1
|
-
libarchive-ios-v3.8.5
|
|
1
|
+
libarchive-ios-v3.8.5-1
|
|
@@ -1 +1 @@
|
|
|
1
|
-
sherpa-onnx-android-v1.12.
|
|
1
|
+
sherpa-onnx-android-v1.12.28
|
|
@@ -1 +1 @@
|
|
|
1
|
-
framework-v1.12.
|
|
1
|
+
framework-v1.12.28
|
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
#!/bin/bash
|
|
2
|
-
# Copy libarchive .c files to ios/patched_libarchive and insert <stdio.h> and <unistd.h>
|
|
3
|
-
# after #include "archive_platform.h" so they compile without modifying the submodule.
|
|
4
|
-
# Called from SherpaOnnx.podspec during evaluation.
|
|
5
|
-
# Requires: libarchive source dir (same as used for HEADER_SEARCH_PATHS: third_party or ios/Downloads/libarchive).
|
|
6
|
-
|
|
7
|
-
set -e
|
|
8
|
-
|
|
9
|
-
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
10
|
-
SDK_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
|
11
|
-
LIBARCHIVE_SRC="${1:-}"
|
|
12
|
-
PATCHED_DIR="$SDK_ROOT/ios/patched_libarchive"
|
|
13
|
-
|
|
14
|
-
if [ -z "$LIBARCHIVE_SRC" ]; then
|
|
15
|
-
echo "Error: libarchive source dir not set. Usage: $0 <libarchive_source_dir>" >&2
|
|
16
|
-
echo "Use the same path as libarchive_dir in the pod (e.g. third_party/libarchive_prebuilt/libarchive-ios-layout or ios/Downloads/libarchive)." >&2
|
|
17
|
-
exit 1
|
|
18
|
-
fi
|
|
19
|
-
|
|
20
|
-
if [ ! -d "$LIBARCHIVE_SRC" ]; then
|
|
21
|
-
echo "Error: libarchive source dir not found: $LIBARCHIVE_SRC" >&2
|
|
22
|
-
echo "Run third_party/libarchive_prebuilt/build_libarchive_ios.sh (repo) or ios/scripts/setup-ios-libarchive.sh (npm)." >&2
|
|
23
|
-
exit 1
|
|
24
|
-
fi
|
|
25
|
-
|
|
26
|
-
if [ ! -f "$LIBARCHIVE_SRC/archive_platform.h" ]; then
|
|
27
|
-
echo "Error: $LIBARCHIVE_SRC does not look like libarchive (archive_platform.h missing)." >&2
|
|
28
|
-
exit 1
|
|
29
|
-
fi
|
|
30
|
-
|
|
31
|
-
mkdir -p "$PATCHED_DIR"
|
|
32
|
-
count=0
|
|
33
|
-
|
|
34
|
-
# Same exclude as podspec: no test, windows, linux, sunos, freebsd
|
|
35
|
-
for f in "$LIBARCHIVE_SRC"/*.c; do
|
|
36
|
-
[ -f "$f" ] || continue
|
|
37
|
-
base=$(basename "$f" .c)
|
|
38
|
-
[[ "$(basename "$f")" =~ ^test\. ]] && continue
|
|
39
|
-
[[ "$base" == *windows* ]] && continue
|
|
40
|
-
[[ "$base" == *linux* ]] && continue
|
|
41
|
-
[[ "$base" == *sunos* ]] && continue
|
|
42
|
-
[[ "$base" == *freebsd* ]] && continue
|
|
43
|
-
|
|
44
|
-
dest="$PATCHED_DIR/$(basename "$f")"
|
|
45
|
-
# Insert #include <stdio.h> and #include <unistd.h> after first #include "archive_platform.h"
|
|
46
|
-
inserted=0
|
|
47
|
-
while IFS= read -r line; do
|
|
48
|
-
echo "$line"
|
|
49
|
-
if [ "$inserted" -eq 0 ] && echo "$line" | grep -q '^#include "archive_platform.h"'; then
|
|
50
|
-
echo '#include <stdio.h>'
|
|
51
|
-
echo '#include <unistd.h>'
|
|
52
|
-
inserted=1
|
|
53
|
-
fi
|
|
54
|
-
done < "$f" > "$dest"
|
|
55
|
-
count=$((count + 1))
|
|
56
|
-
done
|
|
57
|
-
|
|
58
|
-
if [ "$count" -eq 0 ]; then
|
|
59
|
-
echo "Error: No libarchive .c files were copied to $PATCHED_DIR. Check excludes and source dir." >&2
|
|
60
|
-
exit 1
|
|
61
|
-
fi
|
|
@@ -1,98 +0,0 @@
|
|
|
1
|
-
#!/bin/bash
|
|
2
|
-
# Download libarchive iOS sources from GitHub Releases and extract to ios/Downloads/libarchive.
|
|
3
|
-
# Always downloads (no skip when prebuilts exist). Call from Podfile pre_install so that
|
|
4
|
-
# ios/Downloads/libarchive exists before the podspec is evaluated (required when using the SDK from npm).
|
|
5
|
-
#
|
|
6
|
-
# Usage: run from repo root or from Podfile pre_install, e.g.:
|
|
7
|
-
# system("bash", "#{sdk_path}/ios/scripts/setup-ios-libarchive.sh")
|
|
8
|
-
# Or: ./ios/scripts/setup-ios-libarchive.sh
|
|
9
|
-
|
|
10
|
-
set -e
|
|
11
|
-
|
|
12
|
-
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
13
|
-
# SDK root = parent of ios/
|
|
14
|
-
SDK_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
|
15
|
-
DOWNLOADS_DIR="$SDK_ROOT/ios/Downloads"
|
|
16
|
-
LIBARCHIVE_DIR="$DOWNLOADS_DIR/libarchive"
|
|
17
|
-
TAG_FILE="$SDK_ROOT/third_party/libarchive_prebuilt/IOS_RELEASE_TAG"
|
|
18
|
-
|
|
19
|
-
# Resolve release tag: env LIBARCHIVE_IOS_RELEASE_TAG, or IOS_RELEASE_TAG file (single source of truth; committed and included in npm package).
|
|
20
|
-
RELEASE_TAG="${LIBARCHIVE_IOS_RELEASE_TAG:-}"
|
|
21
|
-
if [ -z "$RELEASE_TAG" ] && [ -f "$TAG_FILE" ]; then
|
|
22
|
-
RELEASE_TAG=$(grep -v '^#' "$TAG_FILE" | grep -v '^[[:space:]]*$' | head -1 | tr -d '\r\n')
|
|
23
|
-
fi
|
|
24
|
-
if [ -z "$RELEASE_TAG" ]; then
|
|
25
|
-
echo "Error: IOS_RELEASE_TAG not found at $TAG_FILE. Reinstall the package or run from repo." >&2
|
|
26
|
-
exit 1
|
|
27
|
-
fi
|
|
28
|
-
|
|
29
|
-
# Skip download if already present (podspec can be evaluated multiple times during pod install).
|
|
30
|
-
if [ -d "$LIBARCHIVE_DIR" ] && [ -f "$LIBARCHIVE_DIR/archive.h" ] && [ -n "$(find "$LIBARCHIVE_DIR" -maxdepth 1 -name '*.c' -print -quit 2>/dev/null)" ]; then
|
|
31
|
-
exit 0
|
|
32
|
-
fi
|
|
33
|
-
|
|
34
|
-
AUTH_ARGS=()
|
|
35
|
-
if [ -n "$GITHUB_TOKEN" ]; then
|
|
36
|
-
AUTH_ARGS+=(-H "Authorization: Bearer $GITHUB_TOKEN")
|
|
37
|
-
fi
|
|
38
|
-
|
|
39
|
-
echo "Downloading libarchive iOS sources from release $RELEASE_TAG..."
|
|
40
|
-
|
|
41
|
-
release_json=$(curl -s "${AUTH_ARGS[@]}" -H "Accept: application/vnd.github+json" \
|
|
42
|
-
"https://api.github.com/repos/XDcobra/react-native-sherpa-onnx/releases/tags/$RELEASE_TAG" 2>/dev/null || true)
|
|
43
|
-
if [ -z "$release_json" ] || ! echo "$release_json" | grep -q '"assets"'; then
|
|
44
|
-
echo "Error: Could not fetch release $RELEASE_TAG or no assets (rate limit?)." >&2
|
|
45
|
-
exit 1
|
|
46
|
-
fi
|
|
47
|
-
|
|
48
|
-
download_url=""
|
|
49
|
-
if command -v jq &>/dev/null; then
|
|
50
|
-
download_url=$(echo "$release_json" | jq -r '.assets[] | select(.name == "libarchive-ios-sources.zip") | .browser_download_url' | head -1)
|
|
51
|
-
else
|
|
52
|
-
download_url=$(echo "$release_json" | grep -o '"browser_download_url": "[^"]*libarchive-ios-sources.zip[^"]*"' | head -1 | sed 's/.*: "//;s/"$//')
|
|
53
|
-
fi
|
|
54
|
-
if [ -z "$download_url" ]; then
|
|
55
|
-
echo "Error: Asset libarchive-ios-sources.zip not found in release $RELEASE_TAG" >&2
|
|
56
|
-
exit 1
|
|
57
|
-
fi
|
|
58
|
-
|
|
59
|
-
mkdir -p "$DOWNLOADS_DIR"
|
|
60
|
-
zip_path="$DOWNLOADS_DIR/libarchive-ios-sources.zip"
|
|
61
|
-
if ! curl -L -f "${AUTH_ARGS[@]}" -o "$zip_path" "$download_url"; then
|
|
62
|
-
rm -f "$zip_path"
|
|
63
|
-
exit 1
|
|
64
|
-
fi
|
|
65
|
-
if ! file "$zip_path" 2>/dev/null | grep -q "Zip archive"; then
|
|
66
|
-
echo "Error: Downloaded file is not a valid zip" >&2
|
|
67
|
-
rm -f "$zip_path"
|
|
68
|
-
exit 1
|
|
69
|
-
fi
|
|
70
|
-
|
|
71
|
-
rm -rf "$LIBARCHIVE_DIR"
|
|
72
|
-
mkdir -p "$LIBARCHIVE_DIR"
|
|
73
|
-
unzip -q -o "$zip_path" -d "$LIBARCHIVE_DIR"
|
|
74
|
-
rm -f "$zip_path"
|
|
75
|
-
|
|
76
|
-
# If the zip had a single top-level dir (e.g. libarchive-ios-sources), flatten so
|
|
77
|
-
# archive.h and archive_xxhash.h are directly in LIBARCHIVE_DIR (podspec HEADER_SEARCH_PATHS expects that).
|
|
78
|
-
subdirs=("$LIBARCHIVE_DIR"/*/)
|
|
79
|
-
if [ -d "${subdirs[0]}" ] && [ "${#subdirs[@]}" -eq 1 ] && [ ! -f "$LIBARCHIVE_DIR/archive.h" ]; then
|
|
80
|
-
subdir="${subdirs[0]}"
|
|
81
|
-
echo "Flattening single top-level directory: $(basename "$subdir")"
|
|
82
|
-
shopt -s dotglob
|
|
83
|
-
mv "$subdir"* "$LIBARCHIVE_DIR/"
|
|
84
|
-
shopt -u dotglob
|
|
85
|
-
rmdir "$subdir" 2>/dev/null || true
|
|
86
|
-
fi
|
|
87
|
-
|
|
88
|
-
# Ensure required headers exist (e.g. archive_xxhash.h for LZ4 support)
|
|
89
|
-
if [ ! -f "$LIBARCHIVE_DIR/archive.h" ]; then
|
|
90
|
-
echo "Error: $LIBARCHIVE_DIR/archive.h missing after extract. Zip layout may be unexpected." >&2
|
|
91
|
-
exit 1
|
|
92
|
-
fi
|
|
93
|
-
if [ ! -f "$LIBARCHIVE_DIR/archive_xxhash.h" ]; then
|
|
94
|
-
echo "Error: $LIBARCHIVE_DIR/archive_xxhash.h missing. Re-publish libarchive iOS release (build_libarchive_ios.sh copies all *.h)." >&2
|
|
95
|
-
exit 1
|
|
96
|
-
fi
|
|
97
|
-
|
|
98
|
-
echo "Libarchive iOS sources extracted to $LIBARCHIVE_DIR"
|