react-native-sherpa-onnx 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +232 -236
- package/SherpaOnnx.podspec +68 -64
- package/android/build.gradle +182 -192
- package/android/codegen.gradle +57 -0
- package/android/prebuilt-download.gradle +428 -0
- package/android/prebuilt-versions.gradle +43 -0
- package/android/proguard-rules.pro +10 -0
- package/android/src/main/assets/testModels/add_mul_add.onnx +28 -0
- package/android/src/main/assets/testModels/nnapi_internal_uint8_support.onnx +0 -0
- package/android/src/main/assets/testModels/qnn_multi_ctx_embed.onnx +0 -0
- package/android/src/main/cpp/CMakeLists.txt +166 -129
- package/android/src/main/cpp/CMakePresets.json +54 -0
- package/android/src/main/cpp/crypto/sha256.cpp +174 -0
- package/android/src/main/cpp/crypto/sha256.h +16 -0
- package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.cpp +404 -0
- package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.h +56 -0
- package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-jni.cpp +181 -0
- package/android/src/main/cpp/jni/audio/sherpa-onnx-audio-convert-jni.cpp +888 -0
- package/{ios → android/src/main/cpp/jni/model_detect}/sherpa-onnx-common.h +18 -18
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.cpp +86 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.h +20 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.cpp +423 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.h +55 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-stt.cpp +399 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +238 -0
- package/{ios → android/src/main/cpp/jni/model_detect}/sherpa-onnx-model-detect.h +122 -89
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.cpp +99 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.h +16 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.cpp +78 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.h +16 -0
- package/android/src/main/cpp/jni/module/sherpa-onnx-module-jni.cpp +190 -0
- package/android/src/main/cpp/jni/tts/sherpa-onnx-tts-zipvoice-jni.cpp +301 -0
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxArchiveHelper.kt +94 -0
- package/android/src/main/java/com/sherpaonnx/{SherpaOnnxCoreHelper.kt → SherpaOnnxAssetHelper.kt} +350 -236
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +791 -483
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxSttHelper.kt +699 -109
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +1123 -668
- package/android/src/main/java/com/sherpaonnx/ZipvoiceTtsWrapper.kt +187 -0
- package/ios/SherpaOnnx+Assets.h +11 -0
- package/ios/SherpaOnnx+Assets.mm +325 -0
- package/ios/SherpaOnnx+STT.mm +455 -118
- package/ios/SherpaOnnx+TTS.mm +1101 -712
- package/ios/SherpaOnnx.h +17 -6
- package/ios/SherpaOnnx.mm +206 -311
- package/ios/SherpaOnnx.xcconfig +19 -19
- package/ios/SherpaOnnxCoreMLHelper.swift +24 -0
- package/ios/archive/sherpa-onnx-archive-helper.h +21 -0
- package/ios/archive/sherpa-onnx-archive-helper.mm +296 -0
- package/ios/libarchive_darwin_config.h +153 -0
- package/{android/src/main/cpp/jni → ios/model_detect}/sherpa-onnx-common.h +18 -18
- package/ios/model_detect/sherpa-onnx-model-detect-helper.h +49 -0
- package/ios/model_detect/sherpa-onnx-model-detect-helper.mm +210 -0
- package/ios/model_detect/sherpa-onnx-model-detect-stt.mm +344 -0
- package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +201 -0
- package/{android/src/main/cpp/jni → ios/model_detect}/sherpa-onnx-model-detect.h +117 -89
- package/ios/scripts/patch-libarchive-includes.sh +61 -0
- package/ios/scripts/setup-ios-libarchive.sh +98 -0
- package/ios/stt/sherpa-onnx-stt-wrapper.h +129 -0
- package/ios/stt/sherpa-onnx-stt-wrapper.mm +523 -0
- package/ios/{sherpa-onnx-tts-wrapper.h → tts/sherpa-onnx-tts-wrapper.h} +90 -85
- package/ios/{sherpa-onnx-tts-wrapper.mm → tts/sherpa-onnx-tts-wrapper.mm} +376 -345
- package/lib/module/NativeSherpaOnnx.js +3 -0
- package/lib/module/NativeSherpaOnnx.js.map +1 -1
- package/lib/module/audio/index.js +22 -0
- package/lib/module/audio/index.js.map +1 -0
- package/lib/module/diarization/index.js +1 -1
- package/lib/module/diarization/index.js.map +1 -1
- package/lib/module/download/ModelDownloadManager.js +918 -0
- package/lib/module/download/ModelDownloadManager.js.map +1 -0
- package/lib/module/download/extractTarBz2.js +53 -0
- package/lib/module/download/extractTarBz2.js.map +1 -0
- package/lib/module/download/index.js +6 -0
- package/lib/module/download/index.js.map +1 -0
- package/lib/module/download/validation.js +178 -0
- package/lib/module/download/validation.js.map +1 -0
- package/lib/module/enhancement/index.js +1 -1
- package/lib/module/enhancement/index.js.map +1 -1
- package/lib/module/index.js +41 -3
- package/lib/module/index.js.map +1 -1
- package/lib/module/separation/index.js +1 -1
- package/lib/module/separation/index.js.map +1 -1
- package/lib/module/stt/index.js +127 -60
- package/lib/module/stt/index.js.map +1 -1
- package/lib/module/stt/sttModelLanguages.js +512 -0
- package/lib/module/stt/sttModelLanguages.js.map +1 -0
- package/lib/module/stt/types.js +53 -1
- package/lib/module/stt/types.js.map +1 -1
- package/lib/module/tts/index.js +216 -289
- package/lib/module/tts/index.js.map +1 -1
- package/lib/module/tts/types.js +86 -1
- package/lib/module/tts/types.js.map +1 -1
- package/lib/module/types.js.map +1 -1
- package/lib/module/utils.js +86 -73
- package/lib/module/utils.js.map +1 -1
- package/lib/module/vad/index.js +1 -1
- package/lib/module/vad/index.js.map +1 -1
- package/lib/typescript/src/NativeSherpaOnnx.d.ts +192 -38
- package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
- package/lib/typescript/src/audio/index.d.ts +13 -0
- package/lib/typescript/src/audio/index.d.ts.map +1 -0
- package/lib/typescript/src/diarization/index.d.ts +3 -2
- package/lib/typescript/src/diarization/index.d.ts.map +1 -1
- package/lib/typescript/src/download/ModelDownloadManager.d.ts +108 -0
- package/lib/typescript/src/download/ModelDownloadManager.d.ts.map +1 -0
- package/lib/typescript/src/download/extractTarBz2.d.ts +14 -0
- package/lib/typescript/src/download/extractTarBz2.d.ts.map +1 -0
- package/lib/typescript/src/download/index.d.ts +7 -0
- package/lib/typescript/src/download/index.d.ts.map +1 -0
- package/lib/typescript/src/download/validation.d.ts +57 -0
- package/lib/typescript/src/download/validation.d.ts.map +1 -0
- package/lib/typescript/src/enhancement/index.d.ts +3 -2
- package/lib/typescript/src/enhancement/index.d.ts.map +1 -1
- package/lib/typescript/src/index.d.ts +26 -2
- package/lib/typescript/src/index.d.ts.map +1 -1
- package/lib/typescript/src/separation/index.d.ts +3 -2
- package/lib/typescript/src/separation/index.d.ts.map +1 -1
- package/lib/typescript/src/stt/index.d.ts +31 -43
- package/lib/typescript/src/stt/index.d.ts.map +1 -1
- package/lib/typescript/src/stt/sttModelLanguages.d.ts +52 -0
- package/lib/typescript/src/stt/sttModelLanguages.d.ts.map +1 -0
- package/lib/typescript/src/stt/types.d.ts +196 -9
- package/lib/typescript/src/stt/types.d.ts.map +1 -1
- package/lib/typescript/src/tts/index.d.ts +25 -211
- package/lib/typescript/src/tts/index.d.ts.map +1 -1
- package/lib/typescript/src/tts/types.d.ts +148 -25
- package/lib/typescript/src/tts/types.d.ts.map +1 -1
- package/lib/typescript/src/types.d.ts +0 -32
- package/lib/typescript/src/types.d.ts.map +1 -1
- package/lib/typescript/src/utils.d.ts +28 -13
- package/lib/typescript/src/utils.d.ts.map +1 -1
- package/lib/typescript/src/vad/index.d.ts +3 -2
- package/lib/typescript/src/vad/index.d.ts.map +1 -1
- package/package.json +250 -222
- package/scripts/check-qnn-support.sh +78 -0
- package/scripts/setup-ios-framework.sh +379 -282
- package/src/NativeSherpaOnnx.ts +474 -251
- package/src/audio/index.ts +32 -0
- package/src/diarization/index.ts +4 -2
- package/src/download/ModelDownloadManager.ts +1325 -0
- package/src/download/extractTarBz2.ts +78 -0
- package/src/download/index.ts +43 -0
- package/src/download/validation.ts +279 -0
- package/src/enhancement/index.ts +4 -2
- package/src/index.tsx +78 -27
- package/src/separation/index.ts +4 -2
- package/src/stt/index.ts +249 -89
- package/src/stt/sttModelLanguages.ts +237 -0
- package/src/stt/types.ts +263 -9
- package/src/tts/index.ts +470 -458
- package/src/tts/types.ts +373 -218
- package/src/types.ts +0 -44
- package/src/utils.ts +145 -131
- package/src/vad/index.ts +4 -2
- package/third_party/ffmpeg_prebuilt/ANDROID_RELEASE_TAG +1 -0
- package/third_party/libarchive_prebuilt/ANDROID_RELEASE_TAG +1 -0
- package/third_party/libarchive_prebuilt/IOS_RELEASE_TAG +1 -0
- package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -0
- package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -0
- package/android/src/main/cpp/include/sherpa-onnx/c-api/c-api.h +0 -1918
- package/android/src/main/cpp/include/sherpa-onnx/c-api/cxx-api.h +0 -841
- package/android/src/main/cpp/jni/sherpa-onnx-model-detect.cpp +0 -541
- package/android/src/main/cpp/jni/sherpa-onnx-stt-jni.cpp +0 -336
- package/android/src/main/cpp/jni/sherpa-onnx-stt-wrapper.cpp +0 -222
- package/android/src/main/cpp/jni/sherpa-onnx-stt-wrapper.h +0 -68
- package/android/src/main/cpp/jni/sherpa-onnx-tts-jni.cpp +0 -823
- package/android/src/main/cpp/jni/sherpa-onnx-tts-wrapper.cpp +0 -387
- package/android/src/main/cpp/jni/sherpa-onnx-tts-wrapper.h +0 -147
- package/ios/Frameworks/sherpa_onnx.xcframework.zip +0 -0
- package/ios/include/sherpa-onnx/c-api/c-api.h +0 -1918
- package/ios/include/sherpa-onnx/c-api/cxx-api.h +0 -841
- package/ios/sherpa-onnx-model-detect.mm +0 -441
- package/ios/sherpa-onnx-stt-wrapper.h +0 -48
- package/ios/sherpa-onnx-stt-wrapper.mm +0 -201
- package/scripts/copy-headers.js +0 -184
- package/scripts/setup-assets.js +0 -323
|
@@ -1,251 +1,65 @@
|
|
|
1
|
-
import type { TTSInitializeOptions,
|
|
2
|
-
import type {
|
|
1
|
+
import type { TTSInitializeOptions, TTSModelType, GeneratedAudio, TtsEngine } from './types';
|
|
2
|
+
import type { ModelPathConfig } from '../types';
|
|
3
3
|
/**
|
|
4
|
-
*
|
|
4
|
+
* Detect TTS model type and structure without initializing the engine.
|
|
5
|
+
* Uses the same native file-based detection as createTTS. Stateless; no instance required.
|
|
5
6
|
*
|
|
6
|
-
*
|
|
7
|
-
* -
|
|
8
|
-
*
|
|
9
|
-
* - Auto-detection (tries asset first, then file system)
|
|
10
|
-
*
|
|
11
|
-
* Supported model types (auto-detected or explicit):
|
|
12
|
-
* - VITS (includes Piper, Coqui, MeloTTS, MMS)
|
|
13
|
-
* - Matcha (acoustic model + vocoder)
|
|
14
|
-
* - Kokoro (multi-speaker, multi-language)
|
|
15
|
-
* - KittenTTS (lightweight, multi-speaker)
|
|
16
|
-
* - Zipvoice (voice cloning capable)
|
|
17
|
-
*
|
|
18
|
-
* @param options - TTS initialization options or model path configuration
|
|
19
|
-
* @returns Promise resolving to result with success and detected models
|
|
7
|
+
* @param modelPath - Model path configuration (asset, file, or auto)
|
|
8
|
+
* @param options - Optional modelType (default: 'auto')
|
|
9
|
+
* @returns Object with success, detectedModels (array of { type, modelDir }), and modelType (primary detected type)
|
|
20
10
|
* @example
|
|
21
11
|
* ```typescript
|
|
22
|
-
*
|
|
23
|
-
*
|
|
24
|
-
* console.log('Detected models:', result.detectedModels);
|
|
25
|
-
*
|
|
26
|
-
* // Asset model
|
|
27
|
-
* const result = await initializeTTS({
|
|
28
|
-
* modelPath: { type: 'asset', path: 'models/vits-piper-en' }
|
|
29
|
-
* });
|
|
30
|
-
*
|
|
31
|
-
* // File system model with options
|
|
32
|
-
* const result = await initializeTTS({
|
|
33
|
-
* modelPath: { type: 'file', path: '/path/to/model' },
|
|
34
|
-
* numThreads: 4,
|
|
35
|
-
* debug: true
|
|
36
|
-
* });
|
|
37
|
-
*
|
|
38
|
-
* // With explicit model type
|
|
39
|
-
* const result = await initializeTTS({
|
|
40
|
-
* modelPath: { type: 'asset', path: 'models/kokoro-en' },
|
|
41
|
-
* modelType: 'kokoro'
|
|
42
|
-
* });
|
|
12
|
+
* const result = await detectTtsModel({ type: 'asset', path: 'models/vits-piper-en' });
|
|
13
|
+
* if (result.success) console.log('Detected type:', result.modelType, result.detectedModels);
|
|
43
14
|
* ```
|
|
44
15
|
*/
|
|
45
|
-
export declare function
|
|
16
|
+
export declare function detectTtsModel(modelPath: ModelPathConfig, options?: {
|
|
17
|
+
modelType?: TTSModelType;
|
|
18
|
+
}): Promise<{
|
|
46
19
|
success: boolean;
|
|
47
20
|
detectedModels: Array<{
|
|
48
21
|
type: string;
|
|
49
22
|
modelDir: string;
|
|
50
23
|
}>;
|
|
24
|
+
modelType?: string;
|
|
51
25
|
}>;
|
|
52
26
|
/**
|
|
53
|
-
*
|
|
54
|
-
*/
|
|
55
|
-
export declare function updateTtsParams(options: TtsUpdateOptions): Promise<{
|
|
56
|
-
success: boolean;
|
|
57
|
-
detectedModels: Array<{
|
|
58
|
-
type: string;
|
|
59
|
-
modelDir: string;
|
|
60
|
-
}>;
|
|
61
|
-
}>;
|
|
62
|
-
/**
|
|
63
|
-
* Generate speech from text.
|
|
27
|
+
* Create a TTS engine instance. Call destroy() on the returned engine when done to free native resources.
|
|
64
28
|
*
|
|
65
|
-
*
|
|
66
|
-
*
|
|
67
|
-
*
|
|
68
|
-
* @param text - Text to convert to speech
|
|
69
|
-
* @param options - Synthesis options (speaker ID, speed)
|
|
70
|
-
* @returns Promise resolving to generated audio data
|
|
29
|
+
* @param options - TTS initialization options or model path configuration
|
|
30
|
+
* @returns Promise resolving to a TtsEngine instance
|
|
71
31
|
* @example
|
|
72
32
|
* ```typescript
|
|
73
|
-
*
|
|
74
|
-
*
|
|
75
|
-
*
|
|
76
|
-
*
|
|
77
|
-
* // With options
|
|
78
|
-
* const audio = await generateSpeech('Hello, world!', {
|
|
79
|
-
* sid: 0, // Speaker ID (for multi-speaker models)
|
|
80
|
-
* speed: 1.2 // 20% faster
|
|
33
|
+
* const tts = await createTTS({
|
|
34
|
+
* modelPath: { type: 'asset', path: 'models/vits-piper-en' },
|
|
35
|
+
* modelType: 'vits',
|
|
36
|
+
* modelOptions: { vits: { noiseScale: 0.667 } },
|
|
81
37
|
* });
|
|
82
|
-
*
|
|
83
|
-
*
|
|
84
|
-
* const audio = await generateSpeech('Speak slowly', { speed: 0.8 });
|
|
38
|
+
* const audio = await tts.generateSpeech('Hello world');
|
|
39
|
+
* await tts.destroy();
|
|
85
40
|
* ```
|
|
86
41
|
*/
|
|
87
|
-
export declare function
|
|
88
|
-
/**
|
|
89
|
-
* Generate speech from text and return subtitle/timestamp metadata.
|
|
90
|
-
*
|
|
91
|
-
* Timestamps are estimated based on the output duration when models do not
|
|
92
|
-
* provide native timing information.
|
|
93
|
-
*/
|
|
94
|
-
export declare function generateSpeechWithTimestamps(text: string, options?: SynthesisOptions): Promise<GeneratedAudioWithTimestamps>;
|
|
95
|
-
export type TtsStreamHandlers = {
|
|
96
|
-
onChunk?: (chunk: TtsStreamChunk) => void;
|
|
97
|
-
onEnd?: (event: TtsStreamEnd) => void;
|
|
98
|
-
onError?: (event: TtsStreamError) => void;
|
|
99
|
-
};
|
|
100
|
-
/**
|
|
101
|
-
* Generate speech in streaming mode (emits chunk events).
|
|
102
|
-
*
|
|
103
|
-
* Returns an unsubscribe function to remove event listeners.
|
|
104
|
-
*/
|
|
105
|
-
export declare function generateSpeechStream(text: string, options: SynthesisOptions | undefined, handlers: TtsStreamHandlers): Promise<() => void>;
|
|
106
|
-
/**
|
|
107
|
-
* Cancel ongoing streaming TTS generation.
|
|
108
|
-
*/
|
|
109
|
-
export declare function cancelSpeechStream(): Promise<void>;
|
|
110
|
-
/**
|
|
111
|
-
* Start PCM playback for streaming TTS.
|
|
112
|
-
*/
|
|
113
|
-
export declare function startTtsPcmPlayer(sampleRate: number, channels: number): Promise<void>;
|
|
114
|
-
/**
|
|
115
|
-
* Write PCM samples to the streaming TTS player.
|
|
116
|
-
*/
|
|
117
|
-
export declare function writeTtsPcmChunk(samples: number[]): Promise<void>;
|
|
118
|
-
/**
|
|
119
|
-
* Stop PCM playback for streaming TTS.
|
|
120
|
-
*/
|
|
121
|
-
export declare function stopTtsPcmPlayer(): Promise<void>;
|
|
122
|
-
/**
|
|
123
|
-
* Get TTS model information.
|
|
124
|
-
*
|
|
125
|
-
* Returns the sample rate and number of available speakers/voices.
|
|
126
|
-
* Call this after initialization to check model capabilities.
|
|
127
|
-
*
|
|
128
|
-
* @returns Promise resolving to model information
|
|
129
|
-
* @example
|
|
130
|
-
* ```typescript
|
|
131
|
-
* await initializeTTS('models/kokoro-en');
|
|
132
|
-
* const info = await getModelInfo();
|
|
133
|
-
*
|
|
134
|
-
* console.log(`Sample rate: ${info.sampleRate} Hz`);
|
|
135
|
-
* console.log(`Available speakers: ${info.numSpeakers}`);
|
|
136
|
-
*
|
|
137
|
-
* if (info.numSpeakers > 1) {
|
|
138
|
-
* // Multi-speaker model, can use different voices
|
|
139
|
-
* const audio = await generateSpeech('Hello', { sid: 1 });
|
|
140
|
-
* }
|
|
141
|
-
* ```
|
|
142
|
-
*/
|
|
143
|
-
export declare function getModelInfo(): Promise<TTSModelInfo>;
|
|
144
|
-
/**
|
|
145
|
-
* Get the sample rate of the initialized TTS model.
|
|
146
|
-
*
|
|
147
|
-
* @returns Promise resolving to sample rate in Hz
|
|
148
|
-
* @example
|
|
149
|
-
* ```typescript
|
|
150
|
-
* const sampleRate = await getSampleRate();
|
|
151
|
-
* console.log(`Model outputs audio at ${sampleRate} Hz`);
|
|
152
|
-
* ```
|
|
153
|
-
*/
|
|
154
|
-
export declare function getSampleRate(): Promise<number>;
|
|
155
|
-
/**
|
|
156
|
-
* Get the number of speakers/voices available in the model.
|
|
157
|
-
*
|
|
158
|
-
* @returns Promise resolving to number of speakers
|
|
159
|
-
* - 0 or 1: Single-speaker model
|
|
160
|
-
* - >1: Multi-speaker model
|
|
161
|
-
* @example
|
|
162
|
-
* ```typescript
|
|
163
|
-
* const numSpeakers = await getNumSpeakers();
|
|
164
|
-
*
|
|
165
|
-
* if (numSpeakers > 1) {
|
|
166
|
-
* console.log(`Model has ${numSpeakers} different voices`);
|
|
167
|
-
* // Generate with different voices
|
|
168
|
-
* for (let i = 0; i < numSpeakers; i++) {
|
|
169
|
-
* const audio = await generateSpeech('Hello', { sid: i });
|
|
170
|
-
* // ... use audio
|
|
171
|
-
* }
|
|
172
|
-
* }
|
|
173
|
-
* ```
|
|
174
|
-
*/
|
|
175
|
-
export declare function getNumSpeakers(): Promise<number>;
|
|
176
|
-
/**
|
|
177
|
-
* Release TTS resources.
|
|
178
|
-
*
|
|
179
|
-
* Call this when you're done using TTS to free up memory.
|
|
180
|
-
* After calling this, you must call `initializeTTS()` again before
|
|
181
|
-
* using TTS functions.
|
|
182
|
-
*
|
|
183
|
-
* @example
|
|
184
|
-
* ```typescript
|
|
185
|
-
* await initializeTTS('models/vits-piper-en');
|
|
186
|
-
* const audio = await generateSpeech('Hello');
|
|
187
|
-
* // ... use audio
|
|
188
|
-
* await unloadTTS(); // Free resources
|
|
189
|
-
* ```
|
|
190
|
-
*/
|
|
191
|
-
export declare function unloadTTS(): Promise<void>;
|
|
42
|
+
export declare function createTTS(options: TTSInitializeOptions | ModelPathConfig): Promise<TtsEngine>;
|
|
192
43
|
/**
|
|
193
44
|
* Save generated TTS audio to a WAV file.
|
|
194
|
-
*
|
|
195
|
-
* @param audio - Generated audio from generateSpeech()
|
|
196
|
-
* @param filePath - Absolute path where to save the WAV file
|
|
197
|
-
* @returns Promise resolving to the file path where audio was saved
|
|
198
|
-
* @example
|
|
199
|
-
* ```typescript
|
|
200
|
-
* import { Platform } from 'react-native';
|
|
201
|
-
* import RNFS from 'react-native-fs';
|
|
202
|
-
*
|
|
203
|
-
* const audio = await generateSpeech('Hello, world!');
|
|
204
|
-
*
|
|
205
|
-
* // Save to documents directory
|
|
206
|
-
* const documentsPath = Platform.OS === 'ios'
|
|
207
|
-
* ? RNFS.DocumentDirectoryPath
|
|
208
|
-
* : RNFS.ExternalDirectoryPath;
|
|
209
|
-
* const filePath = `${documentsPath}/speech_${Date.now()}.wav`;
|
|
210
|
-
*
|
|
211
|
-
* const savedPath = await saveAudioToFile(audio, filePath);
|
|
212
|
-
* console.log('Audio saved to:', savedPath);
|
|
213
|
-
* ```
|
|
214
45
|
*/
|
|
215
46
|
export declare function saveAudioToFile(audio: GeneratedAudio, filePath: string): Promise<string>;
|
|
216
47
|
/**
|
|
217
48
|
* Save generated TTS audio to a WAV file via Android SAF content URI.
|
|
218
|
-
*
|
|
219
|
-
* @param audio - Generated audio from generateSpeech()
|
|
220
|
-
* @param directoryUri - Directory content URI from SAF
|
|
221
|
-
* @param filename - Desired file name
|
|
222
|
-
* @returns Promise resolving to content URI of the saved file
|
|
223
49
|
*/
|
|
224
50
|
export declare function saveAudioToContentUri(audio: GeneratedAudio, directoryUri: string, filename: string): Promise<string>;
|
|
225
51
|
/**
|
|
226
52
|
* Save a text file via Android SAF content URI.
|
|
227
|
-
*
|
|
228
|
-
* @param text - Text content to write
|
|
229
|
-
* @param directoryUri - Directory content URI from SAF
|
|
230
|
-
* @param filename - Desired file name
|
|
231
|
-
* @param mimeType - MIME type (default: text/plain)
|
|
232
|
-
* @returns Promise resolving to content URI of the saved file
|
|
233
53
|
*/
|
|
234
54
|
export declare function saveTextToContentUri(text: string, directoryUri: string, filename: string, mimeType?: string): Promise<string>;
|
|
235
55
|
/**
|
|
236
56
|
* Copy a SAF content URI to a cache file for local playback (Android only).
|
|
237
|
-
*
|
|
238
|
-
* @param fileUri - Content URI of the saved WAV file
|
|
239
|
-
* @param filename - Desired cache filename
|
|
240
|
-
* @returns Promise resolving to absolute path of the cached file
|
|
241
57
|
*/
|
|
242
58
|
export declare function copyContentUriToCache(fileUri: string, filename: string): Promise<string>;
|
|
243
59
|
/**
|
|
244
60
|
* Share a TTS audio file (file path or content URI).
|
|
245
|
-
*
|
|
246
|
-
* @param fileUri - File path or content URI
|
|
247
|
-
* @param mimeType - MIME type (default: audio/wav)
|
|
248
61
|
*/
|
|
249
62
|
export declare function shareAudioFile(fileUri: string, mimeType?: string): Promise<void>;
|
|
250
|
-
export type { TTSInitializeOptions, TTSModelType,
|
|
63
|
+
export type { TTSInitializeOptions, TTSModelType, TtsModelOptions, TtsVitsModelOptions, TtsMatchaModelOptions, TtsKokoroModelOptions, TtsKittenModelOptions, TtsPocketModelOptions, TtsUpdateOptions, TtsGenerationOptions, GeneratedAudio, GeneratedAudioWithTimestamps, TtsSubtitleItem, TTSModelInfo, TtsEngine, TtsStreamHandlers, TtsStreamChunk, TtsStreamEnd, TtsStreamError, } from './types';
|
|
64
|
+
export { TTS_MODEL_TYPES } from './types';
|
|
251
65
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/tts/index.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EACV,oBAAoB,EACpB,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/tts/index.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EACV,oBAAoB,EACpB,YAAY,EAIZ,cAAc,EAGd,SAAS,EAKV,MAAM,SAAS,CAAC;AACjB,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,UAAU,CAAC;AAqEhD;;;;;;;;;;;;GAYG;AACH,wBAAsB,cAAc,CAClC,SAAS,EAAE,eAAe,EAC1B,OAAO,CAAC,EAAE;IAAE,SAAS,CAAC,EAAE,YAAY,CAAA;CAAE,GACrC,OAAO,CAAC;IACT,OAAO,EAAE,OAAO,CAAC;IACjB,cAAc,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IAC1D,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB,CAAC,CAGD;AA6BD;;;;;;;;;;;;;;;GAeG;AACH,wBAAsB,SAAS,CAC7B,OAAO,EAAE,oBAAoB,GAAG,eAAe,GAC9C,OAAO,CAAC,SAAS,CAAC,CA4NpB;AAID;;GAEG;AACH,wBAAgB,eAAe,CAC7B,KAAK,EAAE,cAAc,EACrB,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,MAAM,CAAC,CAMjB;AAED;;GAEG;AACH,wBAAgB,qBAAqB,CACnC,KAAK,EAAE,cAAc,EACrB,YAAY,EAAE,MAAM,EACpB,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,MAAM,CAAC,CAOjB;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAClC,IAAI,EAAE,MAAM,EACZ,YAAY,EAAE,MAAM,EACpB,QAAQ,EAAE,MAAM,EAChB,QAAQ,SAAe,GACtB,OAAO,CAAC,MAAM,CAAC,CAOjB;AAED;;GAEG;AACH,wBAAgB,qBAAqB,CACnC,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,MAAM,CAAC,CAEjB;AAED;;GAEG;AACH,wBAAgB,cAAc,CAC5B,OAAO,EAAE,MAAM,EACf,QAAQ,SAAc,GACrB,OAAO,CAAC,IAAI,CAAC,CAEf;AAGD,YAAY,EACV,oBAAoB,EACpB,YAAY,EACZ,eAAe,EACf,mBAAmB,EACnB,qBAAqB,EACrB,qBAAqB,EACrB,qBAAqB,EACrB,qBAAqB,EACrB,gBAAgB,EAChB,oBAAoB,EACpB,cAAc,EACd,4BAA4B,EAC5B,eAAe,EACf,YAAY,EACZ,SAAS,EACT,iBAAiB,EACjB,cAAc,EACd,YAAY,EACZ,cAAc,GACf,MAAM,SAAS,CAAC;AACjB,OAAO,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC"}
|
|
@@ -6,10 +6,53 @@ import type { ModelPathConfig } from '../types';
|
|
|
6
6
|
* - 'matcha': Matcha models (acoustic model + vocoder)
|
|
7
7
|
* - 'kokoro': Kokoro models (multi-speaker, multi-language)
|
|
8
8
|
* - 'kitten': KittenTTS models (lightweight, multi-speaker)
|
|
9
|
+
* - 'pocket': Pocket TTS models
|
|
9
10
|
* - 'zipvoice': Zipvoice models (voice cloning capable)
|
|
10
11
|
* - 'auto': Auto-detect model type based on files present (default)
|
|
11
12
|
*/
|
|
12
|
-
export type TTSModelType = 'vits' | 'matcha' | 'kokoro' | 'kitten' | 'zipvoice' | 'auto';
|
|
13
|
+
export type TTSModelType = 'vits' | 'matcha' | 'kokoro' | 'kitten' | 'pocket' | 'zipvoice' | 'auto';
|
|
14
|
+
/** Runtime list of supported TTS model types. */
|
|
15
|
+
export declare const TTS_MODEL_TYPES: readonly TTSModelType[];
|
|
16
|
+
/** Options for VITS models. Applied only when modelType is 'vits'. Kotlin OfflineTtsVitsModelConfig. */
|
|
17
|
+
export interface TtsVitsModelOptions {
|
|
18
|
+
/** Noise scale. If omitted, model default (or model.json) is used. */
|
|
19
|
+
noiseScale?: number;
|
|
20
|
+
/** Noise scale W. If omitted, model default is used. */
|
|
21
|
+
noiseScaleW?: number;
|
|
22
|
+
/** Length scale. If omitted, model default is used. */
|
|
23
|
+
lengthScale?: number;
|
|
24
|
+
}
|
|
25
|
+
/** Options for Matcha models. Applied only when modelType is 'matcha'. Kotlin OfflineTtsMatchaModelConfig. */
|
|
26
|
+
export interface TtsMatchaModelOptions {
|
|
27
|
+
/** Noise scale. If omitted, model default is used. */
|
|
28
|
+
noiseScale?: number;
|
|
29
|
+
/** Length scale. If omitted, model default is used. */
|
|
30
|
+
lengthScale?: number;
|
|
31
|
+
}
|
|
32
|
+
/** Options for Kokoro models. Applied only when modelType is 'kokoro'. Kotlin OfflineTtsKokoroModelConfig. */
|
|
33
|
+
export interface TtsKokoroModelOptions {
|
|
34
|
+
/** Length scale. If omitted, model default is used. */
|
|
35
|
+
lengthScale?: number;
|
|
36
|
+
}
|
|
37
|
+
/** Options for KittenTTS models. Applied only when modelType is 'kitten'. Kotlin OfflineTtsKittenModelConfig. */
|
|
38
|
+
export interface TtsKittenModelOptions {
|
|
39
|
+
/** Length scale. If omitted, model default is used. */
|
|
40
|
+
lengthScale?: number;
|
|
41
|
+
}
|
|
42
|
+
/** Options for Pocket TTS models. Applied only when modelType is 'pocket'. Kotlin has no init-time model config for pocket; reserved for future use. */
|
|
43
|
+
export interface TtsPocketModelOptions {
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Model-specific TTS options. Only the block for the actually loaded model type is applied;
|
|
47
|
+
* others are ignored (e.g. vits options have no effect when a kokoro model is loaded).
|
|
48
|
+
*/
|
|
49
|
+
export interface TtsModelOptions {
|
|
50
|
+
vits?: TtsVitsModelOptions;
|
|
51
|
+
matcha?: TtsMatchaModelOptions;
|
|
52
|
+
kokoro?: TtsKokoroModelOptions;
|
|
53
|
+
kitten?: TtsKittenModelOptions;
|
|
54
|
+
pocket?: TtsPocketModelOptions;
|
|
55
|
+
}
|
|
13
56
|
/**
|
|
14
57
|
* Configuration for TTS initialization.
|
|
15
58
|
*/
|
|
@@ -18,7 +61,7 @@ export interface TTSInitializeOptions {
|
|
|
18
61
|
* Path to the model directory.
|
|
19
62
|
* Can be an asset path, file system path, or auto-detection path.
|
|
20
63
|
*/
|
|
21
|
-
modelPath: ModelPathConfig
|
|
64
|
+
modelPath: ModelPathConfig;
|
|
22
65
|
/**
|
|
23
66
|
* Model type to use.
|
|
24
67
|
* If not specified or 'auto', the model type will be auto-detected
|
|
@@ -27,6 +70,13 @@ export interface TTSInitializeOptions {
|
|
|
27
70
|
* @default 'auto'
|
|
28
71
|
*/
|
|
29
72
|
modelType?: TTSModelType;
|
|
73
|
+
/**
|
|
74
|
+
* Execution provider (e.g. `'cpu'`, `'coreml'`, `'xnnpack'`, `'nnapi'`, `'qnn'`).
|
|
75
|
+
* Use getCoreMlSupport(), getXnnpackSupport(), etc. to check availability. See execution-providers.md.
|
|
76
|
+
*
|
|
77
|
+
* @default 'cpu'
|
|
78
|
+
*/
|
|
79
|
+
provider?: string;
|
|
30
80
|
/**
|
|
31
81
|
* Number of threads to use for inference.
|
|
32
82
|
* More threads = faster processing but more CPU usage.
|
|
@@ -41,45 +91,51 @@ export interface TTSInitializeOptions {
|
|
|
41
91
|
*/
|
|
42
92
|
debug?: boolean;
|
|
43
93
|
/**
|
|
44
|
-
*
|
|
45
|
-
*
|
|
46
|
-
* If omitted, the model default (or model.json) is used.
|
|
94
|
+
* Model-specific options. Only options for the loaded model type are applied.
|
|
95
|
+
* E.g. when modelType is 'vits', only modelOptions.vits is used.
|
|
47
96
|
*/
|
|
48
|
-
|
|
97
|
+
modelOptions?: TtsModelOptions;
|
|
49
98
|
/**
|
|
50
|
-
*
|
|
51
|
-
*
|
|
52
|
-
* If omitted, the model default (or model.json) is used.
|
|
99
|
+
* Path(s) to rule FSTs for TTS (OfflineTtsConfig.ruleFsts).
|
|
100
|
+
* Used for text normalization / ITN.
|
|
53
101
|
*/
|
|
54
|
-
|
|
102
|
+
ruleFsts?: string;
|
|
55
103
|
/**
|
|
56
|
-
*
|
|
57
|
-
*
|
|
58
|
-
* If omitted, the model default (or model.json) is used.
|
|
104
|
+
* Path(s) to rule FARs for TTS (OfflineTtsConfig.ruleFars).
|
|
105
|
+
* Used for text normalization / ITN.
|
|
59
106
|
*/
|
|
60
|
-
|
|
107
|
+
ruleFars?: string;
|
|
108
|
+
/**
|
|
109
|
+
* Max number of sentences per streaming callback (OfflineTtsConfig.maxNumSentences).
|
|
110
|
+
* Default: 1.
|
|
111
|
+
*/
|
|
112
|
+
maxNumSentences?: number;
|
|
113
|
+
/**
|
|
114
|
+
* Silence scale on config level (OfflineTtsConfig.silenceScale).
|
|
115
|
+
* Default: 0.2.
|
|
116
|
+
*/
|
|
117
|
+
silenceScale?: number;
|
|
61
118
|
}
|
|
62
119
|
/**
|
|
63
|
-
* Options for updating TTS model parameters.
|
|
120
|
+
* Options for updating TTS model parameters at runtime.
|
|
121
|
+
* Only the block for the given modelType is applied; flattened to native noiseScale / noiseScaleW / lengthScale.
|
|
64
122
|
*/
|
|
65
123
|
export interface TtsUpdateOptions {
|
|
66
124
|
/**
|
|
67
|
-
*
|
|
68
|
-
|
|
69
|
-
noiseScale?: number | null;
|
|
70
|
-
/**
|
|
71
|
-
* Noise scale W for VITS models.
|
|
125
|
+
* Model type currently loaded. When omitted or 'auto', the SDK uses the model type from the last
|
|
126
|
+
* successful initializeTTS(). After unloadTTS(), pass modelType explicitly until init is called again.
|
|
72
127
|
*/
|
|
73
|
-
|
|
128
|
+
modelType?: TTSModelType;
|
|
74
129
|
/**
|
|
75
|
-
*
|
|
130
|
+
* Model-specific options. Only the block for the effective model type is used (e.g. modelOptions.vits when type is 'vits').
|
|
76
131
|
*/
|
|
77
|
-
|
|
132
|
+
modelOptions?: TtsModelOptions;
|
|
78
133
|
}
|
|
79
134
|
/**
|
|
80
|
-
* Options for
|
|
135
|
+
* Options for TTS generation. Maps to Kotlin GenerationConfig when reference
|
|
136
|
+
* audio or advanced options are used; otherwise simple sid/speed are used.
|
|
81
137
|
*/
|
|
82
|
-
export interface
|
|
138
|
+
export interface TtsGenerationOptions {
|
|
83
139
|
/**
|
|
84
140
|
* Speaker ID for multi-speaker models.
|
|
85
141
|
* For single-speaker models, this is ignored.
|
|
@@ -99,6 +155,34 @@ export interface SynthesisOptions {
|
|
|
99
155
|
* @default 1.0
|
|
100
156
|
*/
|
|
101
157
|
speed?: number;
|
|
158
|
+
/**
|
|
159
|
+
* Silence scale (Kotlin GenerationConfig.silenceScale). Used at generate time.
|
|
160
|
+
*/
|
|
161
|
+
silenceScale?: number;
|
|
162
|
+
/**
|
|
163
|
+
* Reference audio for voice cloning (Kotlin GenerationConfig).
|
|
164
|
+
* In the Kotlin/RN stack, only Pocket TTS uses this; other model types (vits, matcha, kokoro, kitten) ignore it.
|
|
165
|
+
* Mono float samples in [-1, 1] and sample rate in Hz.
|
|
166
|
+
*/
|
|
167
|
+
referenceAudio?: {
|
|
168
|
+
samples: number[];
|
|
169
|
+
sampleRate: number;
|
|
170
|
+
};
|
|
171
|
+
/**
|
|
172
|
+
* Transcript text of the reference audio (Kotlin GenerationConfig.referenceText).
|
|
173
|
+
* Required for Pocket TTS when referenceAudio is provided; ignored by other model types.
|
|
174
|
+
*/
|
|
175
|
+
referenceText?: string;
|
|
176
|
+
/**
|
|
177
|
+
* Number of steps, e.g. flow-matching steps (Kotlin GenerationConfig.numSteps).
|
|
178
|
+
* Used by models such as Pocket.
|
|
179
|
+
*/
|
|
180
|
+
numSteps?: number;
|
|
181
|
+
/**
|
|
182
|
+
* Extra options as key-value pairs (Kotlin GenerationConfig.extra).
|
|
183
|
+
* Model-specific (e.g. temperature, chunk_size for Pocket).
|
|
184
|
+
*/
|
|
185
|
+
extra?: Record<string, string>;
|
|
102
186
|
}
|
|
103
187
|
/**
|
|
104
188
|
* Generated audio data from TTS synthesis.
|
|
@@ -153,6 +237,8 @@ export interface GeneratedAudioWithTimestamps extends GeneratedAudio {
|
|
|
153
237
|
* Streaming chunk event payload for TTS generation.
|
|
154
238
|
*/
|
|
155
239
|
export interface TtsStreamChunk {
|
|
240
|
+
/** Instance ID (set by native for multi-instance routing). */
|
|
241
|
+
instanceId?: string;
|
|
156
242
|
samples: number[];
|
|
157
243
|
sampleRate: number;
|
|
158
244
|
progress: number;
|
|
@@ -162,14 +248,51 @@ export interface TtsStreamChunk {
|
|
|
162
248
|
* Streaming end event payload.
|
|
163
249
|
*/
|
|
164
250
|
export interface TtsStreamEnd {
|
|
251
|
+
/** Instance ID (set by native for multi-instance routing). */
|
|
252
|
+
instanceId?: string;
|
|
165
253
|
cancelled: boolean;
|
|
166
254
|
}
|
|
167
255
|
/**
|
|
168
256
|
* Streaming error event payload.
|
|
169
257
|
*/
|
|
170
258
|
export interface TtsStreamError {
|
|
259
|
+
/** Instance ID (set by native for multi-instance routing). */
|
|
260
|
+
instanceId?: string;
|
|
171
261
|
message: string;
|
|
172
262
|
}
|
|
263
|
+
/**
|
|
264
|
+
* Handlers for TTS streaming generation (chunk, end, error).
|
|
265
|
+
*/
|
|
266
|
+
export interface TtsStreamHandlers {
|
|
267
|
+
onChunk?: (chunk: TtsStreamChunk) => void;
|
|
268
|
+
onEnd?: (event: TtsStreamEnd) => void;
|
|
269
|
+
onError?: (event: TtsStreamError) => void;
|
|
270
|
+
}
|
|
271
|
+
/**
|
|
272
|
+
* Instance-based TTS engine returned by createTTS().
|
|
273
|
+
* Call destroy() when done to free native resources.
|
|
274
|
+
*/
|
|
275
|
+
export interface TtsEngine {
|
|
276
|
+
readonly instanceId: string;
|
|
277
|
+
generateSpeech(text: string, options?: TtsGenerationOptions): Promise<GeneratedAudio>;
|
|
278
|
+
generateSpeechWithTimestamps(text: string, options?: TtsGenerationOptions): Promise<GeneratedAudioWithTimestamps>;
|
|
279
|
+
generateSpeechStream(text: string, options: TtsGenerationOptions | undefined, handlers: TtsStreamHandlers): Promise<() => void>;
|
|
280
|
+
cancelSpeechStream(): Promise<void>;
|
|
281
|
+
startPcmPlayer(sampleRate: number, channels: number): Promise<void>;
|
|
282
|
+
writePcmChunk(samples: number[]): Promise<void>;
|
|
283
|
+
stopPcmPlayer(): Promise<void>;
|
|
284
|
+
updateParams(options: TtsUpdateOptions): Promise<{
|
|
285
|
+
success: boolean;
|
|
286
|
+
detectedModels: Array<{
|
|
287
|
+
type: string;
|
|
288
|
+
modelDir: string;
|
|
289
|
+
}>;
|
|
290
|
+
}>;
|
|
291
|
+
getModelInfo(): Promise<TTSModelInfo>;
|
|
292
|
+
getSampleRate(): Promise<number>;
|
|
293
|
+
getNumSpeakers(): Promise<number>;
|
|
294
|
+
destroy(): Promise<void>;
|
|
295
|
+
}
|
|
173
296
|
/**
|
|
174
297
|
* Information about TTS model capabilities.
|
|
175
298
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../../src/tts/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,UAAU,CAAC;AAEhD
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../../src/tts/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,UAAU,CAAC;AAEhD;;;;;;;;;;GAUG;AACH,MAAM,MAAM,YAAY,GACpB,MAAM,GACN,QAAQ,GACR,QAAQ,GACR,QAAQ,GACR,QAAQ,GACR,UAAU,GACV,MAAM,CAAC;AAEX,iDAAiD;AACjD,eAAO,MAAM,eAAe,EAAE,SAAS,YAAY,EAQzC,CAAC;AAIX,wGAAwG;AACxG,MAAM,WAAW,mBAAmB;IAClC,sEAAsE;IACtE,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,wDAAwD;IACxD,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,uDAAuD;IACvD,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,8GAA8G;AAC9G,MAAM,WAAW,qBAAqB;IACpC,sDAAsD;IACtD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,uDAAuD;IACvD,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,8GAA8G;AAC9G,MAAM,WAAW,qBAAqB;IACpC,uDAAuD;IACvD,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,iHAAiH;AACjH,MAAM,WAAW,qBAAqB;IACpC,uDAAuD;IACvD,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,wJAAwJ;AACxJ,MAAM,WAAW,qBAAqB;CAErC;AAED;;;GAGG;AACH,MAAM,WAAW,eAAe;IAC9B,IAAI,CAAC,EAAE,mBAAmB,CAAC;IAC3B,MAAM,CAAC,EAAE,qBAAqB,CAAC;IAC/B,MAAM,CAAC,EAAE,qBAAqB,CAAC;IAC/B,MAAM,CAAC,EAAE,qBAAqB,CAAC;IAC/B,MAAM,CAAC,EAAE,qBAAqB,CAAC;CAChC;AAED;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACnC;;;OAGG;IACH,SAAS,EAAE,eAAe,CAAC;IAE3B;;;;;;OAMG;IACH,SAAS,CAAC,EAAE,YAAY,CAAC;IAEzB;;;;;OAKG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;IAElB;;;;;OAKG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;IAEpB;;;;OAIG;IACH,KAAK,CAAC,EAAE,OAAO,CAAC;IAEhB;;;OAGG;IACH,YAAY,CAAC,EAAE,eAAe,CAAC;IAE/B;;;OAGG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;IAElB;;;OAGG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;IAElB;;;OAGG;IACH,eAAe,CAAC,EAAE,MAAM,CAAC;IAEzB;;;OAGG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED;;;GAGG;AACH,MAAM,WAAW,gBAAgB;IAC/B;;;OAGG;IACH,SAAS,CAAC,EAAE,YAAY,CAAC;IAEzB;;OAEG;IACH,YAAY,CAAC,EAAE,eAAe,CAAC;CAChC;AAED;;;GAGG;AACH,MAAM,WAAW,oBAAoB;IACnC;;;;;;;OAOG;IACH,GAAG,CAAC,EAAE,MAAM,CAAC;IAEb;;;;;;;;OAQG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IAEf;;OAEG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;IAEtB;;;;OAIG;IACH,cAAc,CAAC,EAAE;QAAE,OAAO,EAAE,MAAM,EAAE,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,CAAC;IAE3D;;;OAGG;IACH,aAAa,CAAC,EAAE,MAAM,CAAC;IAEvB;;;OAGG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;IAElB;;;OAGG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAChC;AAED;;;;;;GAMG;AACH,MAAM,WAAW,cAAc;IAC7B;;;OAGG;IACH,OAAO,EAAE,MAAM,EAAE,CAAC;IAElB;;;OAGG;IACH,UAAU,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;IAEb;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IAEd;;OAEG;IACH,GAAG,EAAE,MAAM,CAAC;CACb;AAED;;GAEG;AACH,MAAM,WAAW,4BAA6B,SAAQ,cAAc;IAClE;;OAEG;IACH,SAAS,EAAE,eAAe,EAAE,CAAC;IAE7B;;OAEG;IACH,SAAS,EAAE,OAAO,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,8DAA8D;IAC9D,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,OAAO,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,8DAA8D;IAC9D,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,OAAO,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,8DAA8D;IAC9D,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,cAAc,KAAK,IAAI,CAAC;IAC1C,KAAK,CAAC,EAAE,CAAC,KAAK,EAAE,YAAY,KAAK,IAAI,CAAC;IACtC,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,cAAc,KAAK,IAAI,CAAC;CAC3C;AAED;;;GAGG;AACH,MAAM,WAAW,SAAS;IACxB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,cAAc,CACZ,IAAI,EAAE,MAAM,EACZ,OAAO,CAAC,EAAE,oBAAoB,GAC7B,OAAO,CAAC,cAAc,CAAC,CAAC;IAC3B,4BAA4B,CAC1B,IAAI,EAAE,MAAM,EACZ,OAAO,CAAC,EAAE,oBAAoB,GAC7B,OAAO,CAAC,4BAA4B,CAAC,CAAC;IACzC,oBAAoB,CAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,GAAG,SAAS,EACzC,QAAQ,EAAE,iBAAiB,GAC1B,OAAO,CAAC,MAAM,IAAI,CAAC,CAAC;IACvB,kBAAkB,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IACpC,cAAc,CAAC,UAAU,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACpE,aAAa,CAAC,OAAO,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAChD,aAAa,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IAC/B,YAAY,CAAC,OAAO,EAAE,gBAAgB,GAAG,OAAO,CAAC;QAC/C,OAAO,EAAE,OAAO,CAAC;QACjB,cAAc,EAAE,KAAK,CAAC;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,QAAQ,EAAE,MAAM,CAAA;SAAE,CAAC,CAAC;KAC3D,CAAC,CAAC;IACH,YAAY,IAAI,OAAO,CAAC,YAAY,CAAC,CAAC;IACtC,aAAa,IAAI,OAAO,CAAC,MAAM,CAAC,CAAC;IACjC,cAAc,IAAI,OAAO,CAAC,MAAM,CAAC,CAAC;IAClC,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CAC1B;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B;;OAEG;IACH,UAAU,EAAE,MAAM,CAAC;IAEnB;;;;OAIG;IACH,WAAW,EAAE,MAAM,CAAC;CACrB"}
|
|
@@ -24,36 +24,4 @@ export type ModelPathConfig = {
|
|
|
24
24
|
type: 'auto';
|
|
25
25
|
path: string;
|
|
26
26
|
};
|
|
27
|
-
/**
|
|
28
|
-
* Model type for explicit model detection
|
|
29
|
-
*/
|
|
30
|
-
export type ModelType = 'transducer' | 'paraformer' | 'nemo_ctc' | 'whisper' | 'wenet_ctc' | 'sense_voice' | 'funasr_nano' | 'auto';
|
|
31
|
-
/**
|
|
32
|
-
* Model initialization options
|
|
33
|
-
*/
|
|
34
|
-
export interface InitializeOptions {
|
|
35
|
-
/**
|
|
36
|
-
* Model directory path configuration
|
|
37
|
-
*/
|
|
38
|
-
modelPath: ModelPathConfig | string;
|
|
39
|
-
/**
|
|
40
|
-
* Model quantization preference
|
|
41
|
-
* - true: Prefer int8 quantized models (model.int8.onnx) - smaller, faster
|
|
42
|
-
* - false: Prefer regular models (model.onnx) - higher accuracy
|
|
43
|
-
* - undefined: Try int8 first, then fall back to regular (default behavior)
|
|
44
|
-
*/
|
|
45
|
-
preferInt8?: boolean;
|
|
46
|
-
/**
|
|
47
|
-
* Explicit model type specification
|
|
48
|
-
* - 'transducer': Force detection as Zipformer/Transducer model
|
|
49
|
-
* - 'paraformer': Force detection as Paraformer model
|
|
50
|
-
* - 'nemo_ctc': Force detection as NeMo CTC model
|
|
51
|
-
* - 'whisper': Force detection as Whisper model
|
|
52
|
-
* - 'wenet_ctc': Force detection as WeNet CTC model
|
|
53
|
-
* - 'sense_voice': Force detection as SenseVoice model
|
|
54
|
-
* - 'funasr_nano': Force detection as FunASR Nano model
|
|
55
|
-
* - 'auto': Automatic detection based on files (default)
|
|
56
|
-
*/
|
|
57
|
-
modelType?: ModelType;
|
|
58
|
-
}
|
|
59
27
|
//# sourceMappingURL=types.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,MAAM,MAAM,eAAe,GACvB;IACE;;;;OAIG;IACH,IAAI,EAAE,OAAO,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;CACd,GACD;IACE;;;OAGG;IACH,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;CACd,GACD;IACE;;;OAGG;IACH,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;CACd,CAAC
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,MAAM,MAAM,eAAe,GACvB;IACE;;;;OAIG;IACH,IAAI,EAAE,OAAO,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;CACd,GACD;IACE;;;OAGG;IACH,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;CACd,GACD;IACE;;;OAGG;IACH,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;CACd,CAAC"}
|