react-native-sherpa-onnx 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +232 -236
- package/SherpaOnnx.podspec +68 -64
- package/android/build.gradle +182 -192
- package/android/codegen.gradle +57 -0
- package/android/prebuilt-download.gradle +428 -0
- package/android/prebuilt-versions.gradle +43 -0
- package/android/proguard-rules.pro +10 -0
- package/android/src/main/assets/testModels/add_mul_add.onnx +28 -0
- package/android/src/main/assets/testModels/nnapi_internal_uint8_support.onnx +0 -0
- package/android/src/main/assets/testModels/qnn_multi_ctx_embed.onnx +0 -0
- package/android/src/main/cpp/CMakeLists.txt +166 -129
- package/android/src/main/cpp/CMakePresets.json +54 -0
- package/android/src/main/cpp/crypto/sha256.cpp +174 -0
- package/android/src/main/cpp/crypto/sha256.h +16 -0
- package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.cpp +404 -0
- package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.h +56 -0
- package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-jni.cpp +181 -0
- package/android/src/main/cpp/jni/audio/sherpa-onnx-audio-convert-jni.cpp +888 -0
- package/{ios → android/src/main/cpp/jni/model_detect}/sherpa-onnx-common.h +18 -18
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.cpp +86 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.h +20 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.cpp +423 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.h +55 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-stt.cpp +399 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +238 -0
- package/{ios → android/src/main/cpp/jni/model_detect}/sherpa-onnx-model-detect.h +122 -89
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.cpp +99 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.h +16 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.cpp +78 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.h +16 -0
- package/android/src/main/cpp/jni/module/sherpa-onnx-module-jni.cpp +190 -0
- package/android/src/main/cpp/jni/tts/sherpa-onnx-tts-zipvoice-jni.cpp +301 -0
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxArchiveHelper.kt +94 -0
- package/android/src/main/java/com/sherpaonnx/{SherpaOnnxCoreHelper.kt → SherpaOnnxAssetHelper.kt} +350 -236
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +791 -483
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxSttHelper.kt +699 -109
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +1123 -668
- package/android/src/main/java/com/sherpaonnx/ZipvoiceTtsWrapper.kt +187 -0
- package/ios/SherpaOnnx+Assets.h +11 -0
- package/ios/SherpaOnnx+Assets.mm +325 -0
- package/ios/SherpaOnnx+STT.mm +455 -118
- package/ios/SherpaOnnx+TTS.mm +1101 -712
- package/ios/SherpaOnnx.h +17 -6
- package/ios/SherpaOnnx.mm +206 -311
- package/ios/SherpaOnnx.xcconfig +19 -19
- package/ios/SherpaOnnxCoreMLHelper.swift +24 -0
- package/ios/archive/sherpa-onnx-archive-helper.h +21 -0
- package/ios/archive/sherpa-onnx-archive-helper.mm +296 -0
- package/ios/libarchive_darwin_config.h +153 -0
- package/{android/src/main/cpp/jni → ios/model_detect}/sherpa-onnx-common.h +18 -18
- package/ios/model_detect/sherpa-onnx-model-detect-helper.h +49 -0
- package/ios/model_detect/sherpa-onnx-model-detect-helper.mm +210 -0
- package/ios/model_detect/sherpa-onnx-model-detect-stt.mm +344 -0
- package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +201 -0
- package/{android/src/main/cpp/jni → ios/model_detect}/sherpa-onnx-model-detect.h +117 -89
- package/ios/scripts/patch-libarchive-includes.sh +61 -0
- package/ios/scripts/setup-ios-libarchive.sh +98 -0
- package/ios/stt/sherpa-onnx-stt-wrapper.h +129 -0
- package/ios/stt/sherpa-onnx-stt-wrapper.mm +523 -0
- package/ios/{sherpa-onnx-tts-wrapper.h → tts/sherpa-onnx-tts-wrapper.h} +90 -85
- package/ios/{sherpa-onnx-tts-wrapper.mm → tts/sherpa-onnx-tts-wrapper.mm} +376 -345
- package/lib/module/NativeSherpaOnnx.js +3 -0
- package/lib/module/NativeSherpaOnnx.js.map +1 -1
- package/lib/module/audio/index.js +22 -0
- package/lib/module/audio/index.js.map +1 -0
- package/lib/module/diarization/index.js +1 -1
- package/lib/module/diarization/index.js.map +1 -1
- package/lib/module/download/ModelDownloadManager.js +918 -0
- package/lib/module/download/ModelDownloadManager.js.map +1 -0
- package/lib/module/download/extractTarBz2.js +53 -0
- package/lib/module/download/extractTarBz2.js.map +1 -0
- package/lib/module/download/index.js +6 -0
- package/lib/module/download/index.js.map +1 -0
- package/lib/module/download/validation.js +178 -0
- package/lib/module/download/validation.js.map +1 -0
- package/lib/module/enhancement/index.js +1 -1
- package/lib/module/enhancement/index.js.map +1 -1
- package/lib/module/index.js +41 -3
- package/lib/module/index.js.map +1 -1
- package/lib/module/separation/index.js +1 -1
- package/lib/module/separation/index.js.map +1 -1
- package/lib/module/stt/index.js +127 -60
- package/lib/module/stt/index.js.map +1 -1
- package/lib/module/stt/sttModelLanguages.js +512 -0
- package/lib/module/stt/sttModelLanguages.js.map +1 -0
- package/lib/module/stt/types.js +53 -1
- package/lib/module/stt/types.js.map +1 -1
- package/lib/module/tts/index.js +216 -289
- package/lib/module/tts/index.js.map +1 -1
- package/lib/module/tts/types.js +86 -1
- package/lib/module/tts/types.js.map +1 -1
- package/lib/module/types.js.map +1 -1
- package/lib/module/utils.js +86 -73
- package/lib/module/utils.js.map +1 -1
- package/lib/module/vad/index.js +1 -1
- package/lib/module/vad/index.js.map +1 -1
- package/lib/typescript/src/NativeSherpaOnnx.d.ts +192 -38
- package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
- package/lib/typescript/src/audio/index.d.ts +13 -0
- package/lib/typescript/src/audio/index.d.ts.map +1 -0
- package/lib/typescript/src/diarization/index.d.ts +3 -2
- package/lib/typescript/src/diarization/index.d.ts.map +1 -1
- package/lib/typescript/src/download/ModelDownloadManager.d.ts +108 -0
- package/lib/typescript/src/download/ModelDownloadManager.d.ts.map +1 -0
- package/lib/typescript/src/download/extractTarBz2.d.ts +14 -0
- package/lib/typescript/src/download/extractTarBz2.d.ts.map +1 -0
- package/lib/typescript/src/download/index.d.ts +7 -0
- package/lib/typescript/src/download/index.d.ts.map +1 -0
- package/lib/typescript/src/download/validation.d.ts +57 -0
- package/lib/typescript/src/download/validation.d.ts.map +1 -0
- package/lib/typescript/src/enhancement/index.d.ts +3 -2
- package/lib/typescript/src/enhancement/index.d.ts.map +1 -1
- package/lib/typescript/src/index.d.ts +26 -2
- package/lib/typescript/src/index.d.ts.map +1 -1
- package/lib/typescript/src/separation/index.d.ts +3 -2
- package/lib/typescript/src/separation/index.d.ts.map +1 -1
- package/lib/typescript/src/stt/index.d.ts +31 -43
- package/lib/typescript/src/stt/index.d.ts.map +1 -1
- package/lib/typescript/src/stt/sttModelLanguages.d.ts +52 -0
- package/lib/typescript/src/stt/sttModelLanguages.d.ts.map +1 -0
- package/lib/typescript/src/stt/types.d.ts +196 -9
- package/lib/typescript/src/stt/types.d.ts.map +1 -1
- package/lib/typescript/src/tts/index.d.ts +25 -211
- package/lib/typescript/src/tts/index.d.ts.map +1 -1
- package/lib/typescript/src/tts/types.d.ts +148 -25
- package/lib/typescript/src/tts/types.d.ts.map +1 -1
- package/lib/typescript/src/types.d.ts +0 -32
- package/lib/typescript/src/types.d.ts.map +1 -1
- package/lib/typescript/src/utils.d.ts +28 -13
- package/lib/typescript/src/utils.d.ts.map +1 -1
- package/lib/typescript/src/vad/index.d.ts +3 -2
- package/lib/typescript/src/vad/index.d.ts.map +1 -1
- package/package.json +250 -222
- package/scripts/check-qnn-support.sh +78 -0
- package/scripts/setup-ios-framework.sh +379 -282
- package/src/NativeSherpaOnnx.ts +474 -251
- package/src/audio/index.ts +32 -0
- package/src/diarization/index.ts +4 -2
- package/src/download/ModelDownloadManager.ts +1325 -0
- package/src/download/extractTarBz2.ts +78 -0
- package/src/download/index.ts +43 -0
- package/src/download/validation.ts +279 -0
- package/src/enhancement/index.ts +4 -2
- package/src/index.tsx +78 -27
- package/src/separation/index.ts +4 -2
- package/src/stt/index.ts +249 -89
- package/src/stt/sttModelLanguages.ts +237 -0
- package/src/stt/types.ts +263 -9
- package/src/tts/index.ts +470 -458
- package/src/tts/types.ts +373 -218
- package/src/types.ts +0 -44
- package/src/utils.ts +145 -131
- package/src/vad/index.ts +4 -2
- package/third_party/ffmpeg_prebuilt/ANDROID_RELEASE_TAG +1 -0
- package/third_party/libarchive_prebuilt/ANDROID_RELEASE_TAG +1 -0
- package/third_party/libarchive_prebuilt/IOS_RELEASE_TAG +1 -0
- package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -0
- package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -0
- package/android/src/main/cpp/include/sherpa-onnx/c-api/c-api.h +0 -1918
- package/android/src/main/cpp/include/sherpa-onnx/c-api/cxx-api.h +0 -841
- package/android/src/main/cpp/jni/sherpa-onnx-model-detect.cpp +0 -541
- package/android/src/main/cpp/jni/sherpa-onnx-stt-jni.cpp +0 -336
- package/android/src/main/cpp/jni/sherpa-onnx-stt-wrapper.cpp +0 -222
- package/android/src/main/cpp/jni/sherpa-onnx-stt-wrapper.h +0 -68
- package/android/src/main/cpp/jni/sherpa-onnx-tts-jni.cpp +0 -823
- package/android/src/main/cpp/jni/sherpa-onnx-tts-wrapper.cpp +0 -387
- package/android/src/main/cpp/jni/sherpa-onnx-tts-wrapper.h +0 -147
- package/ios/Frameworks/sherpa_onnx.xcframework.zip +0 -0
- package/ios/include/sherpa-onnx/c-api/c-api.h +0 -1918
- package/ios/include/sherpa-onnx/c-api/cxx-api.h +0 -841
- package/ios/sherpa-onnx-model-detect.mm +0 -441
- package/ios/sherpa-onnx-stt-wrapper.h +0 -48
- package/ios/sherpa-onnx-stt-wrapper.mm +0 -201
- package/scripts/copy-headers.js +0 -184
- package/scripts/setup-assets.js +0 -323
package/src/stt/index.ts
CHANGED
|
@@ -1,89 +1,249 @@
|
|
|
1
|
-
import SherpaOnnx from '../NativeSherpaOnnx';
|
|
2
|
-
import type {
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
*
|
|
39
|
-
*
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
)
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
*
|
|
71
|
-
*
|
|
72
|
-
*
|
|
73
|
-
*
|
|
74
|
-
*
|
|
75
|
-
* ```
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
*
|
|
83
|
-
*/
|
|
84
|
-
export function
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
1
|
+
import SherpaOnnx from '../NativeSherpaOnnx';
|
|
2
|
+
import type {
|
|
3
|
+
STTInitializeOptions,
|
|
4
|
+
STTModelType,
|
|
5
|
+
SttEngine,
|
|
6
|
+
SttModelOptions,
|
|
7
|
+
SttRecognitionResult,
|
|
8
|
+
SttRuntimeConfig,
|
|
9
|
+
} from './types';
|
|
10
|
+
import type { ModelPathConfig } from '../types';
|
|
11
|
+
import { resolveModelPath } from '../utils';
|
|
12
|
+
|
|
13
|
+
let sttInstanceCounter = 0;
|
|
14
|
+
|
|
15
|
+
function normalizeSttResult(raw: {
|
|
16
|
+
text?: string;
|
|
17
|
+
tokens?: string[] | unknown;
|
|
18
|
+
timestamps?: number[] | unknown;
|
|
19
|
+
lang?: string;
|
|
20
|
+
emotion?: string;
|
|
21
|
+
event?: string;
|
|
22
|
+
durations?: number[] | unknown;
|
|
23
|
+
}): SttRecognitionResult {
|
|
24
|
+
return {
|
|
25
|
+
text: typeof raw.text === 'string' ? raw.text : '',
|
|
26
|
+
tokens: Array.isArray(raw.tokens) ? (raw.tokens as string[]) : [],
|
|
27
|
+
timestamps: Array.isArray(raw.timestamps)
|
|
28
|
+
? (raw.timestamps as number[])
|
|
29
|
+
: [],
|
|
30
|
+
lang: typeof raw.lang === 'string' ? raw.lang : '',
|
|
31
|
+
emotion: typeof raw.emotion === 'string' ? raw.emotion : '',
|
|
32
|
+
event: typeof raw.event === 'string' ? raw.event : '',
|
|
33
|
+
durations: Array.isArray(raw.durations) ? (raw.durations as number[]) : [],
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Detect STT model type and structure without initializing the recognizer.
|
|
39
|
+
* Uses the same native file-based detection as createSTT. Stateless; no instance required.
|
|
40
|
+
*
|
|
41
|
+
* @param modelPath - Model path configuration (asset, file, or auto)
|
|
42
|
+
* @param options - Optional preferInt8 and modelType (default: auto)
|
|
43
|
+
* @returns Object with success, detectedModels (array of { type, modelDir }), and modelType (primary detected type)
|
|
44
|
+
* @example
|
|
45
|
+
* ```typescript
|
|
46
|
+
* const path = { type: 'asset' as const, path: 'models/sherpa-onnx-whisper-tiny-en' };
|
|
47
|
+
* const result = await detectSttModel(path);
|
|
48
|
+
* if (result.success && result.detectedModels.length > 0) {
|
|
49
|
+
* console.log('Detected type:', result.modelType, result.detectedModels);
|
|
50
|
+
* }
|
|
51
|
+
* ```
|
|
52
|
+
*/
|
|
53
|
+
export async function detectSttModel(
|
|
54
|
+
modelPath: ModelPathConfig,
|
|
55
|
+
options?: { preferInt8?: boolean; modelType?: STTModelType }
|
|
56
|
+
): Promise<{
|
|
57
|
+
success: boolean;
|
|
58
|
+
detectedModels: Array<{ type: string; modelDir: string }>;
|
|
59
|
+
modelType?: string;
|
|
60
|
+
}> {
|
|
61
|
+
const resolvedPath = await resolveModelPath(modelPath);
|
|
62
|
+
return SherpaOnnx.detectSttModel(
|
|
63
|
+
resolvedPath,
|
|
64
|
+
options?.preferInt8,
|
|
65
|
+
options?.modelType
|
|
66
|
+
);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Create an STT engine instance. Call destroy() on the returned engine when done to free native resources.
|
|
71
|
+
*
|
|
72
|
+
* @param options - STT initialization options or model path configuration
|
|
73
|
+
* @returns Promise resolving to an SttEngine instance
|
|
74
|
+
* @example
|
|
75
|
+
* ```typescript
|
|
76
|
+
* const stt = await createSTT({
|
|
77
|
+
* modelPath: { type: 'asset', path: 'models/whisper-tiny' },
|
|
78
|
+
* });
|
|
79
|
+
* const result = await stt.transcribeFile('/path/to/audio.wav');
|
|
80
|
+
* console.log(result.text);
|
|
81
|
+
* await stt.destroy();
|
|
82
|
+
* ```
|
|
83
|
+
*/
|
|
84
|
+
export async function createSTT(
|
|
85
|
+
options: STTInitializeOptions | ModelPathConfig
|
|
86
|
+
): Promise<SttEngine> {
|
|
87
|
+
const instanceId = `stt_${++sttInstanceCounter}`;
|
|
88
|
+
|
|
89
|
+
let modelPath: ModelPathConfig;
|
|
90
|
+
let preferInt8: boolean | undefined;
|
|
91
|
+
let modelType: STTModelType | undefined;
|
|
92
|
+
let hotwordsFile: string | undefined;
|
|
93
|
+
let hotwordsScore: number | undefined;
|
|
94
|
+
let numThreads: number | undefined;
|
|
95
|
+
let provider: string | undefined;
|
|
96
|
+
let ruleFsts: string | undefined;
|
|
97
|
+
let ruleFars: string | undefined;
|
|
98
|
+
let dither: number | undefined;
|
|
99
|
+
let modelOptions: SttModelOptions | undefined;
|
|
100
|
+
let modelingUnit: string | undefined;
|
|
101
|
+
let bpeVocab: string | undefined;
|
|
102
|
+
|
|
103
|
+
if ('modelPath' in options) {
|
|
104
|
+
modelPath = options.modelPath;
|
|
105
|
+
preferInt8 = options.preferInt8;
|
|
106
|
+
modelType = options.modelType;
|
|
107
|
+
hotwordsFile = options.hotwordsFile;
|
|
108
|
+
hotwordsScore = options.hotwordsScore;
|
|
109
|
+
numThreads = options.numThreads;
|
|
110
|
+
provider = options.provider;
|
|
111
|
+
ruleFsts = options.ruleFsts;
|
|
112
|
+
ruleFars = options.ruleFars;
|
|
113
|
+
dither = options.dither;
|
|
114
|
+
modelOptions = options.modelOptions;
|
|
115
|
+
modelingUnit = options.modelingUnit;
|
|
116
|
+
bpeVocab = options.bpeVocab;
|
|
117
|
+
} else {
|
|
118
|
+
modelPath = options;
|
|
119
|
+
preferInt8 = undefined;
|
|
120
|
+
modelType = undefined;
|
|
121
|
+
hotwordsFile = undefined;
|
|
122
|
+
hotwordsScore = undefined;
|
|
123
|
+
numThreads = undefined;
|
|
124
|
+
provider = undefined;
|
|
125
|
+
ruleFsts = undefined;
|
|
126
|
+
ruleFars = undefined;
|
|
127
|
+
dither = undefined;
|
|
128
|
+
modelOptions = undefined;
|
|
129
|
+
modelingUnit = undefined;
|
|
130
|
+
bpeVocab = undefined;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
const debug = 'modelPath' in options ? options.debug : undefined;
|
|
134
|
+
const resolvedPath = await resolveModelPath(modelPath);
|
|
135
|
+
|
|
136
|
+
const result = await SherpaOnnx.initializeStt(
|
|
137
|
+
instanceId,
|
|
138
|
+
resolvedPath,
|
|
139
|
+
preferInt8,
|
|
140
|
+
modelType,
|
|
141
|
+
debug,
|
|
142
|
+
hotwordsFile,
|
|
143
|
+
hotwordsScore,
|
|
144
|
+
numThreads,
|
|
145
|
+
provider,
|
|
146
|
+
ruleFsts,
|
|
147
|
+
ruleFars,
|
|
148
|
+
dither,
|
|
149
|
+
modelOptions,
|
|
150
|
+
modelingUnit,
|
|
151
|
+
bpeVocab
|
|
152
|
+
);
|
|
153
|
+
|
|
154
|
+
if (!result.success) {
|
|
155
|
+
throw new Error(
|
|
156
|
+
`STT initialization failed: ${JSON.stringify(
|
|
157
|
+
result.detectedModels ?? []
|
|
158
|
+
)}`
|
|
159
|
+
);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
let destroyed = false;
|
|
163
|
+
|
|
164
|
+
const guard = () => {
|
|
165
|
+
if (destroyed) {
|
|
166
|
+
throw new Error(
|
|
167
|
+
`STT instance ${instanceId} has been destroyed; cannot call methods on it.`
|
|
168
|
+
);
|
|
169
|
+
}
|
|
170
|
+
};
|
|
171
|
+
|
|
172
|
+
const engine: SttEngine = {
|
|
173
|
+
get instanceId() {
|
|
174
|
+
return instanceId;
|
|
175
|
+
},
|
|
176
|
+
|
|
177
|
+
async transcribeFile(filePath: string): Promise<SttRecognitionResult> {
|
|
178
|
+
guard();
|
|
179
|
+
const raw = await SherpaOnnx.transcribeFile(instanceId, filePath);
|
|
180
|
+
return normalizeSttResult(raw);
|
|
181
|
+
},
|
|
182
|
+
|
|
183
|
+
async transcribeSamples(
|
|
184
|
+
samples: number[],
|
|
185
|
+
sampleRate: number
|
|
186
|
+
): Promise<SttRecognitionResult> {
|
|
187
|
+
guard();
|
|
188
|
+
const raw = await SherpaOnnx.transcribeSamples(
|
|
189
|
+
instanceId,
|
|
190
|
+
samples,
|
|
191
|
+
sampleRate
|
|
192
|
+
);
|
|
193
|
+
return normalizeSttResult(raw);
|
|
194
|
+
},
|
|
195
|
+
|
|
196
|
+
async setConfig(config: SttRuntimeConfig): Promise<void> {
|
|
197
|
+
guard();
|
|
198
|
+
const map: Record<string, string | number> = {};
|
|
199
|
+
if (config.decodingMethod != null)
|
|
200
|
+
map.decodingMethod = config.decodingMethod;
|
|
201
|
+
if (config.maxActivePaths != null)
|
|
202
|
+
map.maxActivePaths = config.maxActivePaths;
|
|
203
|
+
if (config.hotwordsFile != null) map.hotwordsFile = config.hotwordsFile;
|
|
204
|
+
if (config.hotwordsScore != null)
|
|
205
|
+
map.hotwordsScore = config.hotwordsScore;
|
|
206
|
+
if (config.blankPenalty != null) map.blankPenalty = config.blankPenalty;
|
|
207
|
+
if (config.ruleFsts != null) map.ruleFsts = config.ruleFsts;
|
|
208
|
+
if (config.ruleFars != null) map.ruleFars = config.ruleFars;
|
|
209
|
+
return SherpaOnnx.setSttConfig(instanceId, map);
|
|
210
|
+
},
|
|
211
|
+
|
|
212
|
+
async destroy(): Promise<void> {
|
|
213
|
+
if (destroyed) return;
|
|
214
|
+
destroyed = true;
|
|
215
|
+
await SherpaOnnx.unloadStt(instanceId);
|
|
216
|
+
},
|
|
217
|
+
};
|
|
218
|
+
|
|
219
|
+
return engine;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
// Export types and runtime type list
|
|
223
|
+
export type {
|
|
224
|
+
STTInitializeOptions,
|
|
225
|
+
STTModelType,
|
|
226
|
+
SttModelOptions,
|
|
227
|
+
SttRecognitionResult,
|
|
228
|
+
SttRuntimeConfig,
|
|
229
|
+
SttEngine,
|
|
230
|
+
SttInitResult,
|
|
231
|
+
} from './types';
|
|
232
|
+
export {
|
|
233
|
+
STT_MODEL_TYPES,
|
|
234
|
+
STT_HOTWORDS_MODEL_TYPES,
|
|
235
|
+
sttSupportsHotwords,
|
|
236
|
+
} from './types';
|
|
237
|
+
export {
|
|
238
|
+
getWhisperLanguages,
|
|
239
|
+
WHISPER_LANGUAGES,
|
|
240
|
+
getSenseVoiceLanguages,
|
|
241
|
+
SENSEVOICE_LANGUAGES,
|
|
242
|
+
getCanaryLanguages,
|
|
243
|
+
CANARY_LANGUAGES,
|
|
244
|
+
getFunasrNanoLanguages,
|
|
245
|
+
FUNASR_NANO_LANGUAGES,
|
|
246
|
+
getFunasrMltNanoLanguages,
|
|
247
|
+
FUNASR_MLT_NANO_LANGUAGES,
|
|
248
|
+
} from './sttModelLanguages';
|
|
249
|
+
export type { SttModelLanguage, WhisperLanguage } from './sttModelLanguages';
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* STT model language codes and display names.
|
|
3
|
+
* Per-model lists for Whisper, SenseVoice, and others. Use these for language-hint
|
|
4
|
+
* dropdowns so users only pick valid codes (invalid codes can crash the app, e.g. Whisper).
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
export interface SttModelLanguage {
|
|
8
|
+
/**
|
|
9
|
+
* Value to pass as language (e.g. "en" for Whisper, "中文" for FunASR Nano).
|
|
10
|
+
* Use as modelOptions.<model>.language (or srcLang/tgtLang where applicable).
|
|
11
|
+
*/
|
|
12
|
+
id: string;
|
|
13
|
+
/** Display name in English (e.g. "english", "chinese"). */
|
|
14
|
+
name: string;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/** @deprecated Use SttModelLanguage. Kept for backward compatibility. */
|
|
18
|
+
export type WhisperLanguage = SttModelLanguage;
|
|
19
|
+
|
|
20
|
+
// ========== Whisper ==========
|
|
21
|
+
// https://github.com/ggml-org/whisper.cpp/blob/d682e150908e10caa4c15883c633d7902d385237/src/whisper.cpp#L248
|
|
22
|
+
|
|
23
|
+
/** Ordered list of all Whisper-supported language codes and names. */
|
|
24
|
+
export const WHISPER_LANGUAGES: readonly SttModelLanguage[] = [
|
|
25
|
+
{ id: 'en', name: 'english' },
|
|
26
|
+
{ id: 'zh', name: 'chinese' },
|
|
27
|
+
{ id: 'de', name: 'german' },
|
|
28
|
+
{ id: 'es', name: 'spanish' },
|
|
29
|
+
{ id: 'ru', name: 'russian' },
|
|
30
|
+
{ id: 'ko', name: 'korean' },
|
|
31
|
+
{ id: 'fr', name: 'french' },
|
|
32
|
+
{ id: 'ja', name: 'japanese' },
|
|
33
|
+
{ id: 'pt', name: 'portuguese' },
|
|
34
|
+
{ id: 'tr', name: 'turkish' },
|
|
35
|
+
{ id: 'pl', name: 'polish' },
|
|
36
|
+
{ id: 'ca', name: 'catalan' },
|
|
37
|
+
{ id: 'nl', name: 'dutch' },
|
|
38
|
+
{ id: 'ar', name: 'arabic' },
|
|
39
|
+
{ id: 'sv', name: 'swedish' },
|
|
40
|
+
{ id: 'it', name: 'italian' },
|
|
41
|
+
{ id: 'id', name: 'indonesian' },
|
|
42
|
+
{ id: 'hi', name: 'hindi' },
|
|
43
|
+
{ id: 'fi', name: 'finnish' },
|
|
44
|
+
{ id: 'vi', name: 'vietnamese' },
|
|
45
|
+
{ id: 'he', name: 'hebrew' },
|
|
46
|
+
{ id: 'uk', name: 'ukrainian' },
|
|
47
|
+
{ id: 'el', name: 'greek' },
|
|
48
|
+
{ id: 'ms', name: 'malay' },
|
|
49
|
+
{ id: 'cs', name: 'czech' },
|
|
50
|
+
{ id: 'ro', name: 'romanian' },
|
|
51
|
+
{ id: 'da', name: 'danish' },
|
|
52
|
+
{ id: 'hu', name: 'hungarian' },
|
|
53
|
+
{ id: 'ta', name: 'tamil' },
|
|
54
|
+
{ id: 'no', name: 'norwegian' },
|
|
55
|
+
{ id: 'th', name: 'thai' },
|
|
56
|
+
{ id: 'ur', name: 'urdu' },
|
|
57
|
+
{ id: 'hr', name: 'croatian' },
|
|
58
|
+
{ id: 'bg', name: 'bulgarian' },
|
|
59
|
+
{ id: 'lt', name: 'lithuanian' },
|
|
60
|
+
{ id: 'la', name: 'latin' },
|
|
61
|
+
{ id: 'mi', name: 'maori' },
|
|
62
|
+
{ id: 'ml', name: 'malayalam' },
|
|
63
|
+
{ id: 'cy', name: 'welsh' },
|
|
64
|
+
{ id: 'sk', name: 'slovak' },
|
|
65
|
+
{ id: 'te', name: 'telugu' },
|
|
66
|
+
{ id: 'fa', name: 'persian' },
|
|
67
|
+
{ id: 'lv', name: 'latvian' },
|
|
68
|
+
{ id: 'bn', name: 'bengali' },
|
|
69
|
+
{ id: 'sr', name: 'serbian' },
|
|
70
|
+
{ id: 'az', name: 'azerbaijani' },
|
|
71
|
+
{ id: 'sl', name: 'slovenian' },
|
|
72
|
+
{ id: 'kn', name: 'kannada' },
|
|
73
|
+
{ id: 'et', name: 'estonian' },
|
|
74
|
+
{ id: 'mk', name: 'macedonian' },
|
|
75
|
+
{ id: 'br', name: 'breton' },
|
|
76
|
+
{ id: 'eu', name: 'basque' },
|
|
77
|
+
{ id: 'is', name: 'icelandic' },
|
|
78
|
+
{ id: 'hy', name: 'armenian' },
|
|
79
|
+
{ id: 'ne', name: 'nepali' },
|
|
80
|
+
{ id: 'mn', name: 'mongolian' },
|
|
81
|
+
{ id: 'bs', name: 'bosnian' },
|
|
82
|
+
{ id: 'kk', name: 'kazakh' },
|
|
83
|
+
{ id: 'sq', name: 'albanian' },
|
|
84
|
+
{ id: 'sw', name: 'swahili' },
|
|
85
|
+
{ id: 'gl', name: 'galician' },
|
|
86
|
+
{ id: 'mr', name: 'marathi' },
|
|
87
|
+
{ id: 'pa', name: 'punjabi' },
|
|
88
|
+
{ id: 'si', name: 'sinhala' },
|
|
89
|
+
{ id: 'km', name: 'khmer' },
|
|
90
|
+
{ id: 'sn', name: 'shona' },
|
|
91
|
+
{ id: 'yo', name: 'yoruba' },
|
|
92
|
+
{ id: 'so', name: 'somali' },
|
|
93
|
+
{ id: 'af', name: 'afrikaans' },
|
|
94
|
+
{ id: 'oc', name: 'occitan' },
|
|
95
|
+
{ id: 'ka', name: 'georgian' },
|
|
96
|
+
{ id: 'be', name: 'belarusian' },
|
|
97
|
+
{ id: 'tg', name: 'tajik' },
|
|
98
|
+
{ id: 'sd', name: 'sindhi' },
|
|
99
|
+
{ id: 'gu', name: 'gujarati' },
|
|
100
|
+
{ id: 'am', name: 'amharic' },
|
|
101
|
+
{ id: 'yi', name: 'yiddish' },
|
|
102
|
+
{ id: 'lo', name: 'lao' },
|
|
103
|
+
{ id: 'uz', name: 'uzbek' },
|
|
104
|
+
{ id: 'fo', name: 'faroese' },
|
|
105
|
+
{ id: 'ht', name: 'haitian creole' },
|
|
106
|
+
{ id: 'ps', name: 'pashto' },
|
|
107
|
+
{ id: 'tk', name: 'turkmen' },
|
|
108
|
+
{ id: 'nn', name: 'nynorsk' },
|
|
109
|
+
{ id: 'mt', name: 'maltese' },
|
|
110
|
+
{ id: 'sa', name: 'sanskrit' },
|
|
111
|
+
{ id: 'lb', name: 'luxembourgish' },
|
|
112
|
+
{ id: 'my', name: 'myanmar' },
|
|
113
|
+
{ id: 'bo', name: 'tibetan' },
|
|
114
|
+
{ id: 'tl', name: 'tagalog' },
|
|
115
|
+
{ id: 'mg', name: 'malagasy' },
|
|
116
|
+
{ id: 'as', name: 'assamese' },
|
|
117
|
+
{ id: 'tt', name: 'tatar' },
|
|
118
|
+
{ id: 'haw', name: 'hawaiian' },
|
|
119
|
+
{ id: 'ln', name: 'lingala' },
|
|
120
|
+
{ id: 'ha', name: 'hausa' },
|
|
121
|
+
{ id: 'ba', name: 'bashkir' },
|
|
122
|
+
{ id: 'jw', name: 'javanese' },
|
|
123
|
+
{ id: 'su', name: 'sundanese' },
|
|
124
|
+
{ id: 'yue', name: 'cantonese' },
|
|
125
|
+
] as const;
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Returns the list of Whisper-supported language codes and display names.
|
|
129
|
+
* Use for building a language-hint dropdown so users only pick valid codes (invalid codes can crash the app).
|
|
130
|
+
*/
|
|
131
|
+
export function getWhisperLanguages(): readonly SttModelLanguage[] {
|
|
132
|
+
return WHISPER_LANGUAGES;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// ========== SenseVoice ==========
|
|
136
|
+
// https://github.com/FunAudioLLM/SenseVoice/blob/1a90d46cb933ef9e213b7d90292b9301b3e20f40/api.py#L22
|
|
137
|
+
|
|
138
|
+
/** Ordered list of SenseVoice-supported language codes and names. */
|
|
139
|
+
export const SENSEVOICE_LANGUAGES: readonly SttModelLanguage[] = [
|
|
140
|
+
{ id: 'auto', name: 'auto' },
|
|
141
|
+
{ id: 'zh', name: 'chinese' },
|
|
142
|
+
{ id: 'en', name: 'english' },
|
|
143
|
+
{ id: 'yue', name: 'cantonese' },
|
|
144
|
+
{ id: 'ja', name: 'japanese' },
|
|
145
|
+
{ id: 'ko', name: 'korean' },
|
|
146
|
+
] as const;
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Returns the list of SenseVoice-supported language codes and display names.
|
|
150
|
+
* Use for modelOptions.senseVoice.language so users only pick valid codes.
|
|
151
|
+
*/
|
|
152
|
+
export function getSenseVoiceLanguages(): readonly SttModelLanguage[] {
|
|
153
|
+
return SENSEVOICE_LANGUAGES;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// ========== Canary ==========
|
|
157
|
+
// Used for modelOptions.canary.srcLang and modelOptions.canary.tgtLang.
|
|
158
|
+
// sherpa-onnx canary only supports 4 languages as it is the 180m model. The 1b model supports 25 languages.
|
|
159
|
+
// https://build.nvidia.com/nvidia/canary-1b-asr/modelcard
|
|
160
|
+
|
|
161
|
+
/** Canary: en, es, de, fr. */
|
|
162
|
+
export const CANARY_LANGUAGES: readonly SttModelLanguage[] = [
|
|
163
|
+
{ id: 'en', name: 'english' },
|
|
164
|
+
{ id: 'es', name: 'spanish' },
|
|
165
|
+
{ id: 'de', name: 'german' },
|
|
166
|
+
{ id: 'fr', name: 'french' },
|
|
167
|
+
] as const;
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Returns the list of Canary-supported language codes and display names.
|
|
171
|
+
* Use for modelOptions.canary.srcLang and modelOptions.canary.tgtLang.
|
|
172
|
+
*/
|
|
173
|
+
export function getCanaryLanguages(): readonly SttModelLanguage[] {
|
|
174
|
+
return CANARY_LANGUAGES;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// ========== FunASR Nano ==========
|
|
178
|
+
// Ids are the values passed to model.generate(..., language="中文"). Names are English display names.
|
|
179
|
+
// https://github.com/FunAudioLLM/Fun-ASR/blob/7dfdb6639e2ba861d3311a8d8c0e3578a8d24122/README.md?plain=1#L99
|
|
180
|
+
|
|
181
|
+
/** Fun-ASR-Nano-2512: Chinese, English, Japanese. */
|
|
182
|
+
export const FUNASR_NANO_LANGUAGES: readonly SttModelLanguage[] = [
|
|
183
|
+
{ id: '中文', name: 'chinese' },
|
|
184
|
+
{ id: '英文', name: 'english' },
|
|
185
|
+
{ id: '日文', name: 'japanese' },
|
|
186
|
+
] as const;
|
|
187
|
+
|
|
188
|
+
/** Fun-ASR-MLT-Nano-2512: multilingual list. */
|
|
189
|
+
export const FUNASR_MLT_NANO_LANGUAGES: readonly SttModelLanguage[] = [
|
|
190
|
+
{ id: '中文', name: 'chinese' },
|
|
191
|
+
{ id: '英文', name: 'english' },
|
|
192
|
+
{ id: '粤语', name: 'cantonese' },
|
|
193
|
+
{ id: '日文', name: 'japanese' },
|
|
194
|
+
{ id: '韩文', name: 'korean' },
|
|
195
|
+
{ id: '越南语', name: 'vietnamese' },
|
|
196
|
+
{ id: '印尼语', name: 'indonesian' },
|
|
197
|
+
{ id: '泰语', name: 'thai' },
|
|
198
|
+
{ id: '马来语', name: 'malay' },
|
|
199
|
+
{ id: '菲律宾语', name: 'filipino' },
|
|
200
|
+
{ id: '阿拉伯语', name: 'arabic' },
|
|
201
|
+
{ id: '印地语', name: 'hindi' },
|
|
202
|
+
{ id: '保加利亚语', name: 'bulgarian' },
|
|
203
|
+
{ id: '克罗地亚语', name: 'croatian' },
|
|
204
|
+
{ id: '捷克语', name: 'czech' },
|
|
205
|
+
{ id: '丹麦语', name: 'danish' },
|
|
206
|
+
{ id: '荷兰语', name: 'dutch' },
|
|
207
|
+
{ id: '爱沙尼亚语', name: 'estonian' },
|
|
208
|
+
{ id: '芬兰语', name: 'finnish' },
|
|
209
|
+
{ id: '希腊语', name: 'greek' },
|
|
210
|
+
{ id: '匈牙利语', name: 'hungarian' },
|
|
211
|
+
{ id: '爱尔兰语', name: 'irish' },
|
|
212
|
+
{ id: '拉脱维亚语', name: 'latvian' },
|
|
213
|
+
{ id: '立陶宛语', name: 'lithuanian' },
|
|
214
|
+
{ id: '马耳他语', name: 'maltese' },
|
|
215
|
+
{ id: '波兰语', name: 'polish' },
|
|
216
|
+
{ id: '葡萄牙语', name: 'portuguese' },
|
|
217
|
+
{ id: '罗马尼亚语', name: 'romanian' },
|
|
218
|
+
{ id: '斯洛伐克语', name: 'slovak' },
|
|
219
|
+
{ id: '斯洛文尼亚语', name: 'slovenian' },
|
|
220
|
+
{ id: '瑞典语', name: 'swedish' },
|
|
221
|
+
] as const;
|
|
222
|
+
|
|
223
|
+
/**
|
|
224
|
+
* Returns languages for Fun-ASR-Nano-2512 (中文, 英文, 日文).
|
|
225
|
+
* Id is the value for modelOptions.funasrNano.language (e.g. "中文").
|
|
226
|
+
*/
|
|
227
|
+
export function getFunasrNanoLanguages(): readonly SttModelLanguage[] {
|
|
228
|
+
return FUNASR_NANO_LANGUAGES;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
/**
|
|
232
|
+
* Returns languages for Fun-ASR-MLT-Nano-2512 (multilingual).
|
|
233
|
+
* Id is the value for modelOptions.funasrNano.language (e.g. "中文").
|
|
234
|
+
*/
|
|
235
|
+
export function getFunasrMltNanoLanguages(): readonly SttModelLanguage[] {
|
|
236
|
+
return FUNASR_MLT_NANO_LANGUAGES;
|
|
237
|
+
}
|