react-native-sherpa-onnx 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +232 -236
- package/SherpaOnnx.podspec +68 -64
- package/android/build.gradle +182 -192
- package/android/codegen.gradle +57 -0
- package/android/prebuilt-download.gradle +428 -0
- package/android/prebuilt-versions.gradle +43 -0
- package/android/proguard-rules.pro +10 -0
- package/android/src/main/assets/testModels/add_mul_add.onnx +28 -0
- package/android/src/main/assets/testModels/nnapi_internal_uint8_support.onnx +0 -0
- package/android/src/main/assets/testModels/qnn_multi_ctx_embed.onnx +0 -0
- package/android/src/main/cpp/CMakeLists.txt +166 -129
- package/android/src/main/cpp/CMakePresets.json +54 -0
- package/android/src/main/cpp/crypto/sha256.cpp +174 -0
- package/android/src/main/cpp/crypto/sha256.h +16 -0
- package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.cpp +404 -0
- package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.h +56 -0
- package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-jni.cpp +181 -0
- package/android/src/main/cpp/jni/audio/sherpa-onnx-audio-convert-jni.cpp +888 -0
- package/{ios → android/src/main/cpp/jni/model_detect}/sherpa-onnx-common.h +18 -18
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.cpp +86 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.h +20 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.cpp +423 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.h +55 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-stt.cpp +399 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +238 -0
- package/{ios → android/src/main/cpp/jni/model_detect}/sherpa-onnx-model-detect.h +122 -89
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.cpp +99 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.h +16 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.cpp +78 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.h +16 -0
- package/android/src/main/cpp/jni/module/sherpa-onnx-module-jni.cpp +190 -0
- package/android/src/main/cpp/jni/tts/sherpa-onnx-tts-zipvoice-jni.cpp +301 -0
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxArchiveHelper.kt +94 -0
- package/android/src/main/java/com/sherpaonnx/{SherpaOnnxCoreHelper.kt → SherpaOnnxAssetHelper.kt} +350 -236
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +791 -483
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxSttHelper.kt +699 -109
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +1123 -668
- package/android/src/main/java/com/sherpaonnx/ZipvoiceTtsWrapper.kt +187 -0
- package/ios/SherpaOnnx+Assets.h +11 -0
- package/ios/SherpaOnnx+Assets.mm +325 -0
- package/ios/SherpaOnnx+STT.mm +455 -118
- package/ios/SherpaOnnx+TTS.mm +1101 -712
- package/ios/SherpaOnnx.h +17 -6
- package/ios/SherpaOnnx.mm +206 -311
- package/ios/SherpaOnnx.xcconfig +19 -19
- package/ios/SherpaOnnxCoreMLHelper.swift +24 -0
- package/ios/archive/sherpa-onnx-archive-helper.h +21 -0
- package/ios/archive/sherpa-onnx-archive-helper.mm +296 -0
- package/ios/libarchive_darwin_config.h +153 -0
- package/{android/src/main/cpp/jni → ios/model_detect}/sherpa-onnx-common.h +18 -18
- package/ios/model_detect/sherpa-onnx-model-detect-helper.h +49 -0
- package/ios/model_detect/sherpa-onnx-model-detect-helper.mm +210 -0
- package/ios/model_detect/sherpa-onnx-model-detect-stt.mm +344 -0
- package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +201 -0
- package/{android/src/main/cpp/jni → ios/model_detect}/sherpa-onnx-model-detect.h +117 -89
- package/ios/scripts/patch-libarchive-includes.sh +61 -0
- package/ios/scripts/setup-ios-libarchive.sh +98 -0
- package/ios/stt/sherpa-onnx-stt-wrapper.h +129 -0
- package/ios/stt/sherpa-onnx-stt-wrapper.mm +523 -0
- package/ios/{sherpa-onnx-tts-wrapper.h → tts/sherpa-onnx-tts-wrapper.h} +90 -85
- package/ios/{sherpa-onnx-tts-wrapper.mm → tts/sherpa-onnx-tts-wrapper.mm} +376 -345
- package/lib/module/NativeSherpaOnnx.js +3 -0
- package/lib/module/NativeSherpaOnnx.js.map +1 -1
- package/lib/module/audio/index.js +22 -0
- package/lib/module/audio/index.js.map +1 -0
- package/lib/module/diarization/index.js +1 -1
- package/lib/module/diarization/index.js.map +1 -1
- package/lib/module/download/ModelDownloadManager.js +918 -0
- package/lib/module/download/ModelDownloadManager.js.map +1 -0
- package/lib/module/download/extractTarBz2.js +53 -0
- package/lib/module/download/extractTarBz2.js.map +1 -0
- package/lib/module/download/index.js +6 -0
- package/lib/module/download/index.js.map +1 -0
- package/lib/module/download/validation.js +178 -0
- package/lib/module/download/validation.js.map +1 -0
- package/lib/module/enhancement/index.js +1 -1
- package/lib/module/enhancement/index.js.map +1 -1
- package/lib/module/index.js +41 -3
- package/lib/module/index.js.map +1 -1
- package/lib/module/separation/index.js +1 -1
- package/lib/module/separation/index.js.map +1 -1
- package/lib/module/stt/index.js +127 -60
- package/lib/module/stt/index.js.map +1 -1
- package/lib/module/stt/sttModelLanguages.js +512 -0
- package/lib/module/stt/sttModelLanguages.js.map +1 -0
- package/lib/module/stt/types.js +53 -1
- package/lib/module/stt/types.js.map +1 -1
- package/lib/module/tts/index.js +216 -289
- package/lib/module/tts/index.js.map +1 -1
- package/lib/module/tts/types.js +86 -1
- package/lib/module/tts/types.js.map +1 -1
- package/lib/module/types.js.map +1 -1
- package/lib/module/utils.js +86 -73
- package/lib/module/utils.js.map +1 -1
- package/lib/module/vad/index.js +1 -1
- package/lib/module/vad/index.js.map +1 -1
- package/lib/typescript/src/NativeSherpaOnnx.d.ts +192 -38
- package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
- package/lib/typescript/src/audio/index.d.ts +13 -0
- package/lib/typescript/src/audio/index.d.ts.map +1 -0
- package/lib/typescript/src/diarization/index.d.ts +3 -2
- package/lib/typescript/src/diarization/index.d.ts.map +1 -1
- package/lib/typescript/src/download/ModelDownloadManager.d.ts +108 -0
- package/lib/typescript/src/download/ModelDownloadManager.d.ts.map +1 -0
- package/lib/typescript/src/download/extractTarBz2.d.ts +14 -0
- package/lib/typescript/src/download/extractTarBz2.d.ts.map +1 -0
- package/lib/typescript/src/download/index.d.ts +7 -0
- package/lib/typescript/src/download/index.d.ts.map +1 -0
- package/lib/typescript/src/download/validation.d.ts +57 -0
- package/lib/typescript/src/download/validation.d.ts.map +1 -0
- package/lib/typescript/src/enhancement/index.d.ts +3 -2
- package/lib/typescript/src/enhancement/index.d.ts.map +1 -1
- package/lib/typescript/src/index.d.ts +26 -2
- package/lib/typescript/src/index.d.ts.map +1 -1
- package/lib/typescript/src/separation/index.d.ts +3 -2
- package/lib/typescript/src/separation/index.d.ts.map +1 -1
- package/lib/typescript/src/stt/index.d.ts +31 -43
- package/lib/typescript/src/stt/index.d.ts.map +1 -1
- package/lib/typescript/src/stt/sttModelLanguages.d.ts +52 -0
- package/lib/typescript/src/stt/sttModelLanguages.d.ts.map +1 -0
- package/lib/typescript/src/stt/types.d.ts +196 -9
- package/lib/typescript/src/stt/types.d.ts.map +1 -1
- package/lib/typescript/src/tts/index.d.ts +25 -211
- package/lib/typescript/src/tts/index.d.ts.map +1 -1
- package/lib/typescript/src/tts/types.d.ts +148 -25
- package/lib/typescript/src/tts/types.d.ts.map +1 -1
- package/lib/typescript/src/types.d.ts +0 -32
- package/lib/typescript/src/types.d.ts.map +1 -1
- package/lib/typescript/src/utils.d.ts +28 -13
- package/lib/typescript/src/utils.d.ts.map +1 -1
- package/lib/typescript/src/vad/index.d.ts +3 -2
- package/lib/typescript/src/vad/index.d.ts.map +1 -1
- package/package.json +250 -222
- package/scripts/check-qnn-support.sh +78 -0
- package/scripts/setup-ios-framework.sh +379 -282
- package/src/NativeSherpaOnnx.ts +474 -251
- package/src/audio/index.ts +32 -0
- package/src/diarization/index.ts +4 -2
- package/src/download/ModelDownloadManager.ts +1325 -0
- package/src/download/extractTarBz2.ts +78 -0
- package/src/download/index.ts +43 -0
- package/src/download/validation.ts +279 -0
- package/src/enhancement/index.ts +4 -2
- package/src/index.tsx +78 -27
- package/src/separation/index.ts +4 -2
- package/src/stt/index.ts +249 -89
- package/src/stt/sttModelLanguages.ts +237 -0
- package/src/stt/types.ts +263 -9
- package/src/tts/index.ts +470 -458
- package/src/tts/types.ts +373 -218
- package/src/types.ts +0 -44
- package/src/utils.ts +145 -131
- package/src/vad/index.ts +4 -2
- package/third_party/ffmpeg_prebuilt/ANDROID_RELEASE_TAG +1 -0
- package/third_party/libarchive_prebuilt/ANDROID_RELEASE_TAG +1 -0
- package/third_party/libarchive_prebuilt/IOS_RELEASE_TAG +1 -0
- package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -0
- package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -0
- package/android/src/main/cpp/include/sherpa-onnx/c-api/c-api.h +0 -1918
- package/android/src/main/cpp/include/sherpa-onnx/c-api/cxx-api.h +0 -841
- package/android/src/main/cpp/jni/sherpa-onnx-model-detect.cpp +0 -541
- package/android/src/main/cpp/jni/sherpa-onnx-stt-jni.cpp +0 -336
- package/android/src/main/cpp/jni/sherpa-onnx-stt-wrapper.cpp +0 -222
- package/android/src/main/cpp/jni/sherpa-onnx-stt-wrapper.h +0 -68
- package/android/src/main/cpp/jni/sherpa-onnx-tts-jni.cpp +0 -823
- package/android/src/main/cpp/jni/sherpa-onnx-tts-wrapper.cpp +0 -387
- package/android/src/main/cpp/jni/sherpa-onnx-tts-wrapper.h +0 -147
- package/ios/Frameworks/sherpa_onnx.xcframework.zip +0 -0
- package/ios/include/sherpa-onnx/c-api/c-api.h +0 -1918
- package/ios/include/sherpa-onnx/c-api/cxx-api.h +0 -841
- package/ios/sherpa-onnx-model-detect.mm +0 -441
- package/ios/sherpa-onnx-stt-wrapper.h +0 -48
- package/ios/sherpa-onnx-stt-wrapper.mm +0 -201
- package/scripts/copy-headers.js +0 -184
- package/scripts/setup-assets.js +0 -323
|
@@ -1,441 +0,0 @@
|
|
|
1
|
-
#include "sherpa-onnx-model-detect.h"
|
|
2
|
-
|
|
3
|
-
#include <algorithm>
|
|
4
|
-
#include <cctype>
|
|
5
|
-
#include <filesystem>
|
|
6
|
-
#include <string>
|
|
7
|
-
#include <vector>
|
|
8
|
-
|
|
9
|
-
namespace fs = std::filesystem;
|
|
10
|
-
|
|
11
|
-
namespace sherpaonnx {
|
|
12
|
-
namespace {
|
|
13
|
-
|
|
14
|
-
bool FileExists(const std::string& path) {
|
|
15
|
-
return fs::exists(path);
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
bool IsDirectory(const std::string& path) {
|
|
19
|
-
return fs::is_directory(path);
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
std::string ToLower(std::string value) {
|
|
23
|
-
std::transform(value.begin(), value.end(), value.begin(), [](unsigned char c) {
|
|
24
|
-
return static_cast<char>(std::tolower(c));
|
|
25
|
-
});
|
|
26
|
-
return value;
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
std::string ResolveTokenizerDir(const std::string& modelDir) {
|
|
30
|
-
std::string vocabInMain = modelDir + "/vocab.json";
|
|
31
|
-
if (FileExists(vocabInMain)) {
|
|
32
|
-
return modelDir;
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
try {
|
|
36
|
-
for (const auto& entry : fs::directory_iterator(modelDir)) {
|
|
37
|
-
if (entry.is_directory()) {
|
|
38
|
-
std::string dirName = entry.path().filename().string();
|
|
39
|
-
std::string dirNameLower = ToLower(dirName);
|
|
40
|
-
if (dirNameLower.find("qwen3") != std::string::npos) {
|
|
41
|
-
std::string vocabPath = entry.path().string() + "/vocab.json";
|
|
42
|
-
if (FileExists(vocabPath)) {
|
|
43
|
-
return entry.path().string();
|
|
44
|
-
}
|
|
45
|
-
}
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
} catch (const std::exception&) {
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
std::string commonPath = modelDir + "/Qwen3-0.6B";
|
|
52
|
-
if (FileExists(commonPath + "/vocab.json")) {
|
|
53
|
-
return commonPath;
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
return "";
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
SttModelKind ParseSttModelType(const std::string& modelType) {
|
|
60
|
-
if (modelType == "transducer") return SttModelKind::kTransducer;
|
|
61
|
-
if (modelType == "paraformer") return SttModelKind::kParaformer;
|
|
62
|
-
if (modelType == "nemo_ctc") return SttModelKind::kNemoCtc;
|
|
63
|
-
if (modelType == "wenet_ctc") return SttModelKind::kWenetCtc;
|
|
64
|
-
if (modelType == "sense_voice") return SttModelKind::kSenseVoice;
|
|
65
|
-
if (modelType == "zipformer_ctc" || modelType == "ctc") return SttModelKind::kZipformerCtc;
|
|
66
|
-
if (modelType == "whisper") return SttModelKind::kWhisper;
|
|
67
|
-
if (modelType == "funasr_nano") return SttModelKind::kFunAsrNano;
|
|
68
|
-
return SttModelKind::kUnknown;
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
TtsModelKind ParseTtsModelType(const std::string& modelType) {
|
|
72
|
-
if (modelType == "vits") return TtsModelKind::kVits;
|
|
73
|
-
if (modelType == "matcha") return TtsModelKind::kMatcha;
|
|
74
|
-
if (modelType == "kokoro") return TtsModelKind::kKokoro;
|
|
75
|
-
if (modelType == "kitten") return TtsModelKind::kKitten;
|
|
76
|
-
if (modelType == "zipvoice") return TtsModelKind::kZipvoice;
|
|
77
|
-
return TtsModelKind::kUnknown;
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
} // namespace
|
|
81
|
-
|
|
82
|
-
SttDetectResult DetectSttModel(
|
|
83
|
-
const std::string& modelDir,
|
|
84
|
-
const std::optional<bool>& preferInt8,
|
|
85
|
-
const std::optional<std::string>& modelType
|
|
86
|
-
) {
|
|
87
|
-
SttDetectResult result;
|
|
88
|
-
|
|
89
|
-
if (modelDir.empty()) {
|
|
90
|
-
result.error = "Model directory is empty";
|
|
91
|
-
return result;
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
if (!FileExists(modelDir) || !IsDirectory(modelDir)) {
|
|
95
|
-
result.error = "Model directory does not exist or is not a directory: " + modelDir;
|
|
96
|
-
return result;
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
std::string encoderPath = modelDir + "/encoder.onnx";
|
|
100
|
-
std::string decoderPath = modelDir + "/decoder.onnx";
|
|
101
|
-
std::string joinerPath = modelDir + "/joiner.onnx";
|
|
102
|
-
std::string encoderPathInt8 = modelDir + "/encoder.int8.onnx";
|
|
103
|
-
std::string decoderPathInt8 = modelDir + "/decoder.int8.onnx";
|
|
104
|
-
std::string paraformerPathInt8 = modelDir + "/model.int8.onnx";
|
|
105
|
-
std::string paraformerPath = modelDir + "/model.onnx";
|
|
106
|
-
std::string ctcPathInt8 = modelDir + "/model.int8.onnx";
|
|
107
|
-
std::string ctcPath = modelDir + "/model.onnx";
|
|
108
|
-
std::string tokensPath = modelDir + "/tokens.txt";
|
|
109
|
-
|
|
110
|
-
std::string funasrEncoderAdaptor = modelDir + "/encoder_adaptor.onnx";
|
|
111
|
-
std::string funasrEncoderAdaptorInt8 = modelDir + "/encoder_adaptor.int8.onnx";
|
|
112
|
-
std::string funasrLLM = modelDir + "/llm.onnx";
|
|
113
|
-
std::string funasrLLMInt8 = modelDir + "/llm.int8.onnx";
|
|
114
|
-
std::string funasrEmbedding = modelDir + "/embedding.onnx";
|
|
115
|
-
std::string funasrEmbeddingInt8 = modelDir + "/embedding.int8.onnx";
|
|
116
|
-
|
|
117
|
-
std::string funasrTokenizerDir = ResolveTokenizerDir(modelDir);
|
|
118
|
-
|
|
119
|
-
std::string paraformerModelPath;
|
|
120
|
-
if (preferInt8.has_value()) {
|
|
121
|
-
if (preferInt8.value()) {
|
|
122
|
-
if (FileExists(paraformerPathInt8)) {
|
|
123
|
-
paraformerModelPath = paraformerPathInt8;
|
|
124
|
-
} else if (FileExists(paraformerPath)) {
|
|
125
|
-
paraformerModelPath = paraformerPath;
|
|
126
|
-
}
|
|
127
|
-
} else {
|
|
128
|
-
if (FileExists(paraformerPath)) {
|
|
129
|
-
paraformerModelPath = paraformerPath;
|
|
130
|
-
} else if (FileExists(paraformerPathInt8)) {
|
|
131
|
-
paraformerModelPath = paraformerPathInt8;
|
|
132
|
-
}
|
|
133
|
-
}
|
|
134
|
-
} else {
|
|
135
|
-
if (FileExists(paraformerPathInt8)) {
|
|
136
|
-
paraformerModelPath = paraformerPathInt8;
|
|
137
|
-
} else if (FileExists(paraformerPath)) {
|
|
138
|
-
paraformerModelPath = paraformerPath;
|
|
139
|
-
}
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
std::string ctcModelPath;
|
|
143
|
-
if (preferInt8.has_value()) {
|
|
144
|
-
if (preferInt8.value()) {
|
|
145
|
-
if (FileExists(ctcPathInt8)) {
|
|
146
|
-
ctcModelPath = ctcPathInt8;
|
|
147
|
-
} else if (FileExists(ctcPath)) {
|
|
148
|
-
ctcModelPath = ctcPath;
|
|
149
|
-
}
|
|
150
|
-
} else {
|
|
151
|
-
if (FileExists(ctcPath)) {
|
|
152
|
-
ctcModelPath = ctcPath;
|
|
153
|
-
} else if (FileExists(ctcPathInt8)) {
|
|
154
|
-
ctcModelPath = ctcPathInt8;
|
|
155
|
-
}
|
|
156
|
-
}
|
|
157
|
-
} else {
|
|
158
|
-
if (FileExists(ctcPathInt8)) {
|
|
159
|
-
ctcModelPath = ctcPathInt8;
|
|
160
|
-
} else if (FileExists(ctcPath)) {
|
|
161
|
-
ctcModelPath = ctcPath;
|
|
162
|
-
}
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
bool hasTransducer = FileExists(encoderPath) &&
|
|
166
|
-
FileExists(decoderPath) &&
|
|
167
|
-
FileExists(joinerPath);
|
|
168
|
-
|
|
169
|
-
bool hasWhisperEncoder = FileExists(encoderPath) || FileExists(encoderPathInt8);
|
|
170
|
-
bool hasWhisperDecoder = FileExists(decoderPath) || FileExists(decoderPathInt8);
|
|
171
|
-
bool hasWhisper = hasWhisperEncoder && hasWhisperDecoder && !FileExists(joinerPath);
|
|
172
|
-
|
|
173
|
-
bool hasFunAsrEncoderAdaptor = FileExists(funasrEncoderAdaptor) || FileExists(funasrEncoderAdaptorInt8);
|
|
174
|
-
bool hasFunAsrLLM = FileExists(funasrLLM) || FileExists(funasrLLMInt8);
|
|
175
|
-
bool hasFunAsrEmbedding = FileExists(funasrEmbedding) || FileExists(funasrEmbeddingInt8);
|
|
176
|
-
bool hasFunAsrTokenizer = !funasrTokenizerDir.empty() && FileExists(funasrTokenizerDir + "/vocab.json");
|
|
177
|
-
bool hasFunAsrNano = hasFunAsrEncoderAdaptor && hasFunAsrLLM && hasFunAsrEmbedding && hasFunAsrTokenizer;
|
|
178
|
-
|
|
179
|
-
bool isLikelyNemoCtc = modelDir.find("nemo") != std::string::npos ||
|
|
180
|
-
modelDir.find("parakeet") != std::string::npos;
|
|
181
|
-
bool isLikelyWenetCtc = modelDir.find("wenet") != std::string::npos;
|
|
182
|
-
bool isLikelySenseVoice = modelDir.find("sense") != std::string::npos ||
|
|
183
|
-
modelDir.find("sensevoice") != std::string::npos;
|
|
184
|
-
bool isLikelyFunAsrNano = modelDir.find("funasr") != std::string::npos ||
|
|
185
|
-
modelDir.find("funasr-nano") != std::string::npos;
|
|
186
|
-
|
|
187
|
-
if (hasTransducer) {
|
|
188
|
-
result.detectedModels.push_back({"transducer", modelDir});
|
|
189
|
-
}
|
|
190
|
-
|
|
191
|
-
if (!ctcModelPath.empty() && (isLikelyNemoCtc || isLikelyWenetCtc || isLikelySenseVoice)) {
|
|
192
|
-
if (isLikelyNemoCtc) {
|
|
193
|
-
result.detectedModels.push_back({"nemo_ctc", modelDir});
|
|
194
|
-
} else if (isLikelyWenetCtc) {
|
|
195
|
-
result.detectedModels.push_back({"wenet_ctc", modelDir});
|
|
196
|
-
} else if (isLikelySenseVoice) {
|
|
197
|
-
result.detectedModels.push_back({"sense_voice", modelDir});
|
|
198
|
-
} else {
|
|
199
|
-
result.detectedModels.push_back({"ctc", modelDir});
|
|
200
|
-
}
|
|
201
|
-
} else if (!paraformerModelPath.empty()) {
|
|
202
|
-
result.detectedModels.push_back({"paraformer", modelDir});
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
if (hasWhisper) {
|
|
206
|
-
result.detectedModels.push_back({"whisper", modelDir});
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
if (hasFunAsrNano) {
|
|
210
|
-
result.detectedModels.push_back({"funasr_nano", modelDir});
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
SttModelKind selected = SttModelKind::kUnknown;
|
|
214
|
-
|
|
215
|
-
if (modelType.has_value() && modelType.value() != "auto") {
|
|
216
|
-
selected = ParseSttModelType(modelType.value());
|
|
217
|
-
if (selected == SttModelKind::kUnknown) {
|
|
218
|
-
result.error = "Unknown model type: " + modelType.value();
|
|
219
|
-
return result;
|
|
220
|
-
}
|
|
221
|
-
|
|
222
|
-
if (selected == SttModelKind::kTransducer && !hasTransducer) {
|
|
223
|
-
result.error = "Transducer model requested but files not found in " + modelDir;
|
|
224
|
-
return result;
|
|
225
|
-
}
|
|
226
|
-
if (selected == SttModelKind::kParaformer && paraformerModelPath.empty()) {
|
|
227
|
-
result.error = "Paraformer model requested but model.onnx not found in " + modelDir;
|
|
228
|
-
return result;
|
|
229
|
-
}
|
|
230
|
-
if ((selected == SttModelKind::kNemoCtc || selected == SttModelKind::kWenetCtc ||
|
|
231
|
-
selected == SttModelKind::kSenseVoice || selected == SttModelKind::kZipformerCtc) &&
|
|
232
|
-
ctcModelPath.empty()) {
|
|
233
|
-
result.error = "CTC model requested but model.onnx not found in " + modelDir;
|
|
234
|
-
return result;
|
|
235
|
-
}
|
|
236
|
-
if (selected == SttModelKind::kWhisper && !hasWhisper) {
|
|
237
|
-
result.error = "Whisper model requested but encoder/decoder not found in " + modelDir;
|
|
238
|
-
return result;
|
|
239
|
-
}
|
|
240
|
-
if (selected == SttModelKind::kFunAsrNano && !hasFunAsrNano) {
|
|
241
|
-
result.error = "FunASR Nano model requested but required files not found in " + modelDir;
|
|
242
|
-
return result;
|
|
243
|
-
}
|
|
244
|
-
} else {
|
|
245
|
-
if (hasTransducer) {
|
|
246
|
-
selected = SttModelKind::kTransducer;
|
|
247
|
-
} else if (!ctcModelPath.empty() && (isLikelyNemoCtc || isLikelyWenetCtc || isLikelySenseVoice)) {
|
|
248
|
-
if (isLikelyNemoCtc) {
|
|
249
|
-
selected = SttModelKind::kNemoCtc;
|
|
250
|
-
} else if (isLikelyWenetCtc) {
|
|
251
|
-
selected = SttModelKind::kWenetCtc;
|
|
252
|
-
} else {
|
|
253
|
-
selected = SttModelKind::kSenseVoice;
|
|
254
|
-
}
|
|
255
|
-
} else if (hasFunAsrNano && isLikelyFunAsrNano) {
|
|
256
|
-
selected = SttModelKind::kFunAsrNano;
|
|
257
|
-
} else if (!paraformerModelPath.empty()) {
|
|
258
|
-
selected = SttModelKind::kParaformer;
|
|
259
|
-
} else if (hasWhisper) {
|
|
260
|
-
selected = SttModelKind::kWhisper;
|
|
261
|
-
} else if (hasFunAsrNano) {
|
|
262
|
-
selected = SttModelKind::kFunAsrNano;
|
|
263
|
-
} else if (!ctcModelPath.empty()) {
|
|
264
|
-
selected = SttModelKind::kZipformerCtc;
|
|
265
|
-
}
|
|
266
|
-
}
|
|
267
|
-
|
|
268
|
-
if (selected == SttModelKind::kUnknown) {
|
|
269
|
-
result.error = "No compatible model type detected in " + modelDir;
|
|
270
|
-
return result;
|
|
271
|
-
}
|
|
272
|
-
|
|
273
|
-
result.selectedKind = selected;
|
|
274
|
-
result.tokensRequired = !(selected == SttModelKind::kWhisper || selected == SttModelKind::kFunAsrNano);
|
|
275
|
-
|
|
276
|
-
if (selected == SttModelKind::kTransducer) {
|
|
277
|
-
result.paths.encoder = encoderPath;
|
|
278
|
-
result.paths.decoder = decoderPath;
|
|
279
|
-
result.paths.joiner = joinerPath;
|
|
280
|
-
} else if (selected == SttModelKind::kParaformer) {
|
|
281
|
-
result.paths.paraformerModel = paraformerModelPath;
|
|
282
|
-
} else if (selected == SttModelKind::kNemoCtc || selected == SttModelKind::kWenetCtc ||
|
|
283
|
-
selected == SttModelKind::kSenseVoice || selected == SttModelKind::kZipformerCtc) {
|
|
284
|
-
result.paths.ctcModel = ctcModelPath;
|
|
285
|
-
} else if (selected == SttModelKind::kWhisper) {
|
|
286
|
-
result.paths.whisperEncoder = FileExists(encoderPathInt8) ? encoderPathInt8 : encoderPath;
|
|
287
|
-
result.paths.whisperDecoder = FileExists(decoderPathInt8) ? decoderPathInt8 : decoderPath;
|
|
288
|
-
} else if (selected == SttModelKind::kFunAsrNano) {
|
|
289
|
-
result.paths.funasrEncoderAdaptor = FileExists(funasrEncoderAdaptorInt8) ? funasrEncoderAdaptorInt8 : funasrEncoderAdaptor;
|
|
290
|
-
result.paths.funasrLLM = FileExists(funasrLLMInt8) ? funasrLLMInt8 : funasrLLM;
|
|
291
|
-
result.paths.funasrEmbedding = FileExists(funasrEmbeddingInt8) ? funasrEmbeddingInt8 : funasrEmbedding;
|
|
292
|
-
result.paths.funasrTokenizer = funasrTokenizerDir + "/vocab.json";
|
|
293
|
-
}
|
|
294
|
-
|
|
295
|
-
if (FileExists(tokensPath)) {
|
|
296
|
-
result.paths.tokens = tokensPath;
|
|
297
|
-
} else if (result.tokensRequired) {
|
|
298
|
-
result.error = "Tokens file not found at " + tokensPath;
|
|
299
|
-
return result;
|
|
300
|
-
}
|
|
301
|
-
|
|
302
|
-
result.ok = true;
|
|
303
|
-
return result;
|
|
304
|
-
}
|
|
305
|
-
|
|
306
|
-
TtsDetectResult DetectTtsModel(const std::string& modelDir, const std::string& modelType) {
|
|
307
|
-
TtsDetectResult result;
|
|
308
|
-
|
|
309
|
-
if (modelDir.empty()) {
|
|
310
|
-
result.error = "TTS: Model directory is empty";
|
|
311
|
-
return result;
|
|
312
|
-
}
|
|
313
|
-
|
|
314
|
-
if (!FileExists(modelDir) || !IsDirectory(modelDir)) {
|
|
315
|
-
result.error = "TTS: Model directory does not exist or is not a directory: " + modelDir;
|
|
316
|
-
return result;
|
|
317
|
-
}
|
|
318
|
-
|
|
319
|
-
std::string modelOnnx = modelDir + "/model.onnx";
|
|
320
|
-
std::string modelFp16 = modelDir + "/model.fp16.onnx";
|
|
321
|
-
std::string modelInt8 = modelDir + "/model.int8.onnx";
|
|
322
|
-
std::string tokensFile = modelDir + "/tokens.txt";
|
|
323
|
-
std::string lexiconFile = modelDir + "/lexicon.txt";
|
|
324
|
-
std::string dataDirPath = modelDir + "/espeak-ng-data";
|
|
325
|
-
std::string voicesFile = modelDir + "/voices.bin";
|
|
326
|
-
std::string acousticModel = modelDir + "/acoustic_model.onnx";
|
|
327
|
-
std::string vocoder = modelDir + "/vocoder.onnx";
|
|
328
|
-
std::string encoder = modelDir + "/encoder.onnx";
|
|
329
|
-
std::string decoder = modelDir + "/decoder.onnx";
|
|
330
|
-
|
|
331
|
-
bool hasVits = FileExists(modelOnnx) || FileExists(modelFp16) || FileExists(modelInt8);
|
|
332
|
-
bool hasMatcha = FileExists(acousticModel) && FileExists(vocoder);
|
|
333
|
-
bool hasVoicesFile = FileExists(voicesFile);
|
|
334
|
-
bool hasZipvoice = FileExists(encoder) && FileExists(decoder) && FileExists(vocoder);
|
|
335
|
-
bool hasDataDir = IsDirectory(dataDirPath);
|
|
336
|
-
|
|
337
|
-
std::string modelDirLower = ToLower(modelDir);
|
|
338
|
-
bool isLikelyKitten = modelDirLower.find("kitten") != std::string::npos;
|
|
339
|
-
bool isLikelyKokoro = modelDirLower.find("kokoro") != std::string::npos;
|
|
340
|
-
|
|
341
|
-
if (hasMatcha) {
|
|
342
|
-
result.detectedModels.push_back({"matcha", modelDir});
|
|
343
|
-
}
|
|
344
|
-
if (hasZipvoice && !hasMatcha) {
|
|
345
|
-
result.detectedModels.push_back({"zipvoice", modelDir});
|
|
346
|
-
}
|
|
347
|
-
if (hasVoicesFile) {
|
|
348
|
-
result.detectedModels.push_back({"kokoro", modelDir});
|
|
349
|
-
result.detectedModels.push_back({"kitten", modelDir});
|
|
350
|
-
}
|
|
351
|
-
if (hasVits && !hasMatcha && !hasZipvoice && !hasVoicesFile) {
|
|
352
|
-
result.detectedModels.push_back({"vits", modelDir});
|
|
353
|
-
} else if (hasVits && hasVoicesFile) {
|
|
354
|
-
result.detectedModels.push_back({"vits", modelDir});
|
|
355
|
-
}
|
|
356
|
-
|
|
357
|
-
TtsModelKind selected = TtsModelKind::kUnknown;
|
|
358
|
-
if (modelType != "auto") {
|
|
359
|
-
selected = ParseTtsModelType(modelType);
|
|
360
|
-
if (selected == TtsModelKind::kUnknown) {
|
|
361
|
-
result.error = "TTS: Unknown model type: " + modelType;
|
|
362
|
-
return result;
|
|
363
|
-
}
|
|
364
|
-
} else {
|
|
365
|
-
if (hasMatcha) {
|
|
366
|
-
selected = TtsModelKind::kMatcha;
|
|
367
|
-
} else if (hasZipvoice) {
|
|
368
|
-
selected = TtsModelKind::kZipvoice;
|
|
369
|
-
} else if (hasVoicesFile) {
|
|
370
|
-
if (isLikelyKitten && !isLikelyKokoro) {
|
|
371
|
-
selected = TtsModelKind::kKitten;
|
|
372
|
-
} else if (isLikelyKokoro && !isLikelyKitten) {
|
|
373
|
-
selected = TtsModelKind::kKokoro;
|
|
374
|
-
} else {
|
|
375
|
-
selected = TtsModelKind::kKokoro;
|
|
376
|
-
}
|
|
377
|
-
} else if (hasVits) {
|
|
378
|
-
selected = TtsModelKind::kVits;
|
|
379
|
-
}
|
|
380
|
-
}
|
|
381
|
-
|
|
382
|
-
if (selected == TtsModelKind::kUnknown) {
|
|
383
|
-
result.error = "TTS: No compatible model type detected in " + modelDir;
|
|
384
|
-
return result;
|
|
385
|
-
}
|
|
386
|
-
|
|
387
|
-
if (selected == TtsModelKind::kVits && !hasVits) {
|
|
388
|
-
result.error = "TTS: VITS model requested but model.onnx not found in " + modelDir;
|
|
389
|
-
return result;
|
|
390
|
-
}
|
|
391
|
-
if (selected == TtsModelKind::kMatcha && !hasMatcha) {
|
|
392
|
-
result.error = "TTS: Matcha model requested but required files not found in " + modelDir;
|
|
393
|
-
return result;
|
|
394
|
-
}
|
|
395
|
-
if ((selected == TtsModelKind::kKokoro || selected == TtsModelKind::kKitten) && (!hasVits || !hasVoicesFile)) {
|
|
396
|
-
result.error = "TTS: Kokoro/Kitten model requested but required files not found in " + modelDir;
|
|
397
|
-
return result;
|
|
398
|
-
}
|
|
399
|
-
if (selected == TtsModelKind::kZipvoice && !hasZipvoice) {
|
|
400
|
-
result.error = "TTS: Zipvoice model requested but required files not found in " + modelDir;
|
|
401
|
-
return result;
|
|
402
|
-
}
|
|
403
|
-
if ((selected == TtsModelKind::kVits || selected == TtsModelKind::kMatcha ||
|
|
404
|
-
selected == TtsModelKind::kKokoro || selected == TtsModelKind::kKitten ||
|
|
405
|
-
selected == TtsModelKind::kZipvoice) &&
|
|
406
|
-
!hasDataDir) {
|
|
407
|
-
result.error = "TTS: espeak-ng-data not found in " + modelDir +
|
|
408
|
-
". Copy espeak-ng-data into the model directory.";
|
|
409
|
-
return result;
|
|
410
|
-
}
|
|
411
|
-
|
|
412
|
-
std::string ttsModel;
|
|
413
|
-
if (FileExists(modelInt8)) {
|
|
414
|
-
ttsModel = modelInt8;
|
|
415
|
-
} else if (FileExists(modelFp16)) {
|
|
416
|
-
ttsModel = modelFp16;
|
|
417
|
-
} else if (FileExists(modelOnnx)) {
|
|
418
|
-
ttsModel = modelOnnx;
|
|
419
|
-
}
|
|
420
|
-
|
|
421
|
-
result.selectedKind = selected;
|
|
422
|
-
result.paths.ttsModel = ttsModel;
|
|
423
|
-
result.paths.tokens = tokensFile;
|
|
424
|
-
result.paths.lexicon = FileExists(lexiconFile) ? lexiconFile : "";
|
|
425
|
-
result.paths.dataDir = dataDirPath;
|
|
426
|
-
result.paths.voices = voicesFile;
|
|
427
|
-
result.paths.acousticModel = acousticModel;
|
|
428
|
-
result.paths.vocoder = vocoder;
|
|
429
|
-
result.paths.encoder = encoder;
|
|
430
|
-
result.paths.decoder = decoder;
|
|
431
|
-
|
|
432
|
-
if (!FileExists(tokensFile)) {
|
|
433
|
-
result.error = "TTS: tokens.txt not found in " + modelDir;
|
|
434
|
-
return result;
|
|
435
|
-
}
|
|
436
|
-
|
|
437
|
-
result.ok = true;
|
|
438
|
-
return result;
|
|
439
|
-
}
|
|
440
|
-
|
|
441
|
-
} // namespace sherpaonnx
|
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
#ifndef SHERPA_ONNX_STT_WRAPPER_H
|
|
2
|
-
#define SHERPA_ONNX_STT_WRAPPER_H
|
|
3
|
-
|
|
4
|
-
#include "sherpa-onnx-common.h"
|
|
5
|
-
#include <cstdint>
|
|
6
|
-
#include <memory>
|
|
7
|
-
#include <optional>
|
|
8
|
-
#include <string>
|
|
9
|
-
#include <vector>
|
|
10
|
-
|
|
11
|
-
namespace sherpaonnx {
|
|
12
|
-
|
|
13
|
-
/**
|
|
14
|
-
* Result of STT initialization.
|
|
15
|
-
*/
|
|
16
|
-
struct SttInitializeResult {
|
|
17
|
-
bool success;
|
|
18
|
-
std::vector<DetectedModel> detectedModels; // List of detected models with type and path
|
|
19
|
-
};
|
|
20
|
-
|
|
21
|
-
/**
|
|
22
|
-
* Wrapper class for sherpa-onnx OfflineRecognizer (STT).
|
|
23
|
-
*/
|
|
24
|
-
class SttWrapper {
|
|
25
|
-
public:
|
|
26
|
-
SttWrapper();
|
|
27
|
-
~SttWrapper();
|
|
28
|
-
|
|
29
|
-
SttInitializeResult initialize(
|
|
30
|
-
const std::string& modelDir,
|
|
31
|
-
const std::optional<bool>& preferInt8 = std::nullopt,
|
|
32
|
-
const std::optional<std::string>& modelType = std::nullopt
|
|
33
|
-
);
|
|
34
|
-
|
|
35
|
-
std::string transcribeFile(const std::string& filePath);
|
|
36
|
-
|
|
37
|
-
bool isInitialized() const;
|
|
38
|
-
|
|
39
|
-
void release();
|
|
40
|
-
|
|
41
|
-
private:
|
|
42
|
-
class Impl;
|
|
43
|
-
std::unique_ptr<Impl> pImpl;
|
|
44
|
-
};
|
|
45
|
-
|
|
46
|
-
} // namespace sherpaonnx
|
|
47
|
-
|
|
48
|
-
#endif // SHERPA_ONNX_STT_WRAPPER_H
|
|
@@ -1,201 +0,0 @@
|
|
|
1
|
-
#include "sherpa-onnx-stt-wrapper.h"
|
|
2
|
-
#include "sherpa-onnx-model-detect.h"
|
|
3
|
-
#include <algorithm>
|
|
4
|
-
#include <cctype>
|
|
5
|
-
#include <cstring>
|
|
6
|
-
#include <fstream>
|
|
7
|
-
#include <optional>
|
|
8
|
-
#include <sstream>
|
|
9
|
-
|
|
10
|
-
// iOS logging
|
|
11
|
-
#ifdef __APPLE__
|
|
12
|
-
#include <Foundation/Foundation.h>
|
|
13
|
-
#include <cstdio>
|
|
14
|
-
#define LOGI(fmt, ...) NSLog(@"SttWrapper: " fmt, ##__VA_ARGS__)
|
|
15
|
-
#define LOGE(fmt, ...) NSLog(@"SttWrapper ERROR: " fmt, ##__VA_ARGS__)
|
|
16
|
-
#else
|
|
17
|
-
#define LOGI(...)
|
|
18
|
-
#define LOGE(...)
|
|
19
|
-
#endif
|
|
20
|
-
|
|
21
|
-
// Use C++17 filesystem (podspec enforces C++17)
|
|
22
|
-
#include <filesystem>
|
|
23
|
-
namespace fs = std::filesystem;
|
|
24
|
-
|
|
25
|
-
// sherpa-onnx headers - use C++ API (RAII wrapper around C API)
|
|
26
|
-
#include "sherpa-onnx/c-api/cxx-api.h"
|
|
27
|
-
|
|
28
|
-
namespace sherpaonnx {
|
|
29
|
-
|
|
30
|
-
// PIMPL pattern implementation
|
|
31
|
-
class SttWrapper::Impl {
|
|
32
|
-
public:
|
|
33
|
-
bool initialized = false;
|
|
34
|
-
std::string modelDir;
|
|
35
|
-
std::optional<sherpa_onnx::cxx::OfflineRecognizer> recognizer;
|
|
36
|
-
};
|
|
37
|
-
|
|
38
|
-
SttWrapper::SttWrapper() : pImpl(std::make_unique<Impl>()) {
|
|
39
|
-
LOGI("SttWrapper created");
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
SttWrapper::~SttWrapper() {
|
|
43
|
-
release();
|
|
44
|
-
LOGI("SttWrapper destroyed");
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
SttInitializeResult SttWrapper::initialize(
|
|
48
|
-
const std::string& modelDir,
|
|
49
|
-
const std::optional<bool>& preferInt8,
|
|
50
|
-
const std::optional<std::string>& modelType
|
|
51
|
-
) {
|
|
52
|
-
SttInitializeResult result;
|
|
53
|
-
result.success = false;
|
|
54
|
-
|
|
55
|
-
if (pImpl->initialized) {
|
|
56
|
-
release();
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
if (modelDir.empty()) {
|
|
60
|
-
LOGE("Model directory is empty");
|
|
61
|
-
return result;
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
try {
|
|
65
|
-
sherpa_onnx::cxx::OfflineRecognizerConfig config;
|
|
66
|
-
config.feat_config.sample_rate = 16000;
|
|
67
|
-
config.feat_config.feature_dim = 80;
|
|
68
|
-
|
|
69
|
-
auto detect = DetectSttModel(modelDir, preferInt8, modelType);
|
|
70
|
-
if (!detect.ok) {
|
|
71
|
-
LOGE("%s", detect.error.c_str());
|
|
72
|
-
return result;
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
switch (detect.selectedKind) {
|
|
76
|
-
case SttModelKind::kTransducer:
|
|
77
|
-
config.model_config.transducer.encoder = detect.paths.encoder;
|
|
78
|
-
config.model_config.transducer.decoder = detect.paths.decoder;
|
|
79
|
-
config.model_config.transducer.joiner = detect.paths.joiner;
|
|
80
|
-
break;
|
|
81
|
-
case SttModelKind::kParaformer:
|
|
82
|
-
config.model_config.paraformer.model = detect.paths.paraformerModel;
|
|
83
|
-
break;
|
|
84
|
-
case SttModelKind::kNemoCtc:
|
|
85
|
-
config.model_config.nemo_ctc.model = detect.paths.ctcModel;
|
|
86
|
-
break;
|
|
87
|
-
case SttModelKind::kWenetCtc:
|
|
88
|
-
config.model_config.wenet_ctc.model = detect.paths.ctcModel;
|
|
89
|
-
break;
|
|
90
|
-
case SttModelKind::kSenseVoice:
|
|
91
|
-
config.model_config.sense_voice.model = detect.paths.ctcModel;
|
|
92
|
-
break;
|
|
93
|
-
case SttModelKind::kZipformerCtc:
|
|
94
|
-
config.model_config.zipformer_ctc.model = detect.paths.ctcModel;
|
|
95
|
-
break;
|
|
96
|
-
case SttModelKind::kWhisper:
|
|
97
|
-
config.model_config.whisper.encoder = detect.paths.whisperEncoder;
|
|
98
|
-
config.model_config.whisper.decoder = detect.paths.whisperDecoder;
|
|
99
|
-
break;
|
|
100
|
-
case SttModelKind::kFunAsrNano:
|
|
101
|
-
config.model_config.funasr_nano.encoder_adaptor = detect.paths.funasrEncoderAdaptor;
|
|
102
|
-
config.model_config.funasr_nano.llm = detect.paths.funasrLLM;
|
|
103
|
-
config.model_config.funasr_nano.embedding = detect.paths.funasrEmbedding;
|
|
104
|
-
config.model_config.funasr_nano.tokenizer = detect.paths.funasrTokenizer;
|
|
105
|
-
break;
|
|
106
|
-
case SttModelKind::kUnknown:
|
|
107
|
-
default:
|
|
108
|
-
LOGE("No compatible model type detected in %s", modelDir.c_str());
|
|
109
|
-
return result;
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
if (!detect.paths.tokens.empty()) {
|
|
113
|
-
config.model_config.tokens = detect.paths.tokens;
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
config.decoding_method = "greedy_search";
|
|
117
|
-
config.model_config.num_threads = 4;
|
|
118
|
-
config.model_config.provider = "cpu";
|
|
119
|
-
|
|
120
|
-
bool isWhisperModel = !config.model_config.whisper.encoder.empty() && !config.model_config.whisper.decoder.empty();
|
|
121
|
-
if (isWhisperModel) {
|
|
122
|
-
LOGI("Initializing Whisper model with encoder: %s, decoder: %s", config.model_config.whisper.encoder.c_str(), config.model_config.whisper.decoder.c_str());
|
|
123
|
-
} else {
|
|
124
|
-
LOGI("Initializing non-Whisper model");
|
|
125
|
-
}
|
|
126
|
-
try {
|
|
127
|
-
pImpl->recognizer = sherpa_onnx::cxx::OfflineRecognizer::Create(config);
|
|
128
|
-
} catch (const std::exception& e) {
|
|
129
|
-
LOGE("Failed to create recognizer: %s", e.what());
|
|
130
|
-
return result;
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
pImpl->modelDir = modelDir;
|
|
134
|
-
pImpl->initialized = true;
|
|
135
|
-
|
|
136
|
-
result.success = true;
|
|
137
|
-
result.detectedModels = detect.detectedModels;
|
|
138
|
-
return result;
|
|
139
|
-
} catch (const std::exception& e) {
|
|
140
|
-
LOGE("Exception during initialization: %s", e.what());
|
|
141
|
-
return result;
|
|
142
|
-
} catch (...) {
|
|
143
|
-
LOGE("Unknown exception during initialization");
|
|
144
|
-
return result;
|
|
145
|
-
}
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
std::string SttWrapper::transcribeFile(const std::string& filePath) {
|
|
149
|
-
if (!pImpl->initialized || !pImpl->recognizer.has_value()) {
|
|
150
|
-
LOGE("Not initialized. Call initialize() first.");
|
|
151
|
-
return "";
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
try {
|
|
155
|
-
// Helper function to check if file exists
|
|
156
|
-
auto fileExists = [](const std::string& path) -> bool {
|
|
157
|
-
return fs::exists(path);
|
|
158
|
-
};
|
|
159
|
-
|
|
160
|
-
if (!fileExists(filePath)) {
|
|
161
|
-
LOGE("Audio file not found: %s", filePath.c_str());
|
|
162
|
-
return "";
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
sherpa_onnx::cxx::Wave wave = sherpa_onnx::cxx::ReadWave(filePath);
|
|
166
|
-
|
|
167
|
-
if (wave.samples.empty()) {
|
|
168
|
-
LOGE("Audio file is empty or failed to read: %s", filePath.c_str());
|
|
169
|
-
return "";
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
auto stream = pImpl->recognizer.value().CreateStream();
|
|
173
|
-
stream.AcceptWaveform(wave.sample_rate, wave.samples.data(), wave.samples.size());
|
|
174
|
-
|
|
175
|
-
pImpl->recognizer.value().Decode(&stream);
|
|
176
|
-
|
|
177
|
-
auto result = pImpl->recognizer.value().GetResult(&stream);
|
|
178
|
-
|
|
179
|
-
return result.text;
|
|
180
|
-
} catch (const std::exception& e) {
|
|
181
|
-
LOGE("Exception during transcription: %s", e.what());
|
|
182
|
-
return "";
|
|
183
|
-
} catch (...) {
|
|
184
|
-
LOGE("Unknown exception during transcription");
|
|
185
|
-
return "";
|
|
186
|
-
}
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
bool SttWrapper::isInitialized() const {
|
|
190
|
-
return pImpl->initialized;
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
void SttWrapper::release() {
|
|
194
|
-
if (pImpl->initialized) {
|
|
195
|
-
pImpl->recognizer.reset();
|
|
196
|
-
pImpl->initialized = false;
|
|
197
|
-
pImpl->modelDir.clear();
|
|
198
|
-
}
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
} // namespace sherpaonnx
|