react-native-sherpa-onnx 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +20 -0
- package/README.md +402 -0
- package/SherpaOnnx.podspec +84 -0
- package/android/build.gradle +193 -0
- package/android/src/main/AndroidManifest.xml +2 -0
- package/android/src/main/cpp/CMakeLists.txt +121 -0
- package/android/src/main/cpp/include/sherpa-onnx/c-api/c-api.h +1918 -0
- package/android/src/main/cpp/include/sherpa-onnx/c-api/cxx-api.h +841 -0
- package/android/src/main/cpp/jni/sherpa-onnx-jni.cpp +129 -0
- package/android/src/main/cpp/jni/sherpa-onnx-wrapper.cpp +649 -0
- package/android/src/main/cpp/jni/sherpa-onnx-wrapper.h +56 -0
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +316 -0
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxPackage.kt +33 -0
- package/ios/Frameworks/sherpa_onnx.xcframework.zip +0 -0
- package/ios/SherpaOnnx.h +5 -0
- package/ios/SherpaOnnx.mm +293 -0
- package/ios/SherpaOnnx.xcconfig +19 -0
- package/ios/include/sherpa-onnx/c-api/c-api.h +1918 -0
- package/ios/include/sherpa-onnx/c-api/cxx-api.h +841 -0
- package/ios/sherpa-onnx-wrapper.h +57 -0
- package/ios/sherpa-onnx-wrapper.mm +432 -0
- package/lib/module/NativeSherpaOnnx.js +5 -0
- package/lib/module/NativeSherpaOnnx.js.map +1 -0
- package/lib/module/diarization/index.js +54 -0
- package/lib/module/diarization/index.js.map +1 -0
- package/lib/module/enhancement/index.js +54 -0
- package/lib/module/enhancement/index.js.map +1 -0
- package/lib/module/index.js +25 -0
- package/lib/module/index.js.map +1 -0
- package/lib/module/package.json +1 -0
- package/lib/module/separation/index.js +54 -0
- package/lib/module/separation/index.js.map +1 -0
- package/lib/module/stt/index.js +79 -0
- package/lib/module/stt/index.js.map +1 -0
- package/lib/module/stt/types.js +4 -0
- package/lib/module/stt/types.js.map +1 -0
- package/lib/module/tts/index.js +54 -0
- package/lib/module/tts/index.js.map +1 -0
- package/lib/module/types.js +2 -0
- package/lib/module/types.js.map +1 -0
- package/lib/module/utils.js +93 -0
- package/lib/module/utils.js.map +1 -0
- package/lib/module/vad/index.js +54 -0
- package/lib/module/vad/index.js.map +1 -0
- package/lib/typescript/package.json +1 -0
- package/lib/typescript/src/NativeSherpaOnnx.d.ts +39 -0
- package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -0
- package/lib/typescript/src/diarization/index.d.ts +49 -0
- package/lib/typescript/src/diarization/index.d.ts.map +1 -0
- package/lib/typescript/src/enhancement/index.d.ts +47 -0
- package/lib/typescript/src/enhancement/index.d.ts.map +1 -0
- package/lib/typescript/src/index.d.ts +9 -0
- package/lib/typescript/src/index.d.ts.map +1 -0
- package/lib/typescript/src/separation/index.d.ts +48 -0
- package/lib/typescript/src/separation/index.d.ts.map +1 -0
- package/lib/typescript/src/stt/index.d.ts +53 -0
- package/lib/typescript/src/stt/index.d.ts.map +1 -0
- package/lib/typescript/src/stt/types.d.ts +39 -0
- package/lib/typescript/src/stt/types.d.ts.map +1 -0
- package/lib/typescript/src/tts/index.d.ts +47 -0
- package/lib/typescript/src/tts/index.d.ts.map +1 -0
- package/lib/typescript/src/types.d.ts +59 -0
- package/lib/typescript/src/types.d.ts.map +1 -0
- package/lib/typescript/src/utils.d.ts +53 -0
- package/lib/typescript/src/utils.d.ts.map +1 -0
- package/lib/typescript/src/vad/index.d.ts +48 -0
- package/lib/typescript/src/vad/index.d.ts.map +1 -0
- package/package.json +221 -0
- package/scripts/copy-headers.js +184 -0
- package/scripts/setup-assets.js +323 -0
- package/scripts/setup-ios-framework.sh +282 -0
- package/scripts/switch-registry.js +75 -0
- package/src/NativeSherpaOnnx.ts +44 -0
- package/src/diarization/index.ts +69 -0
- package/src/enhancement/index.ts +67 -0
- package/src/index.tsx +30 -0
- package/src/separation/index.ts +68 -0
- package/src/stt/index.ts +83 -0
- package/src/stt/types.ts +42 -0
- package/src/tts/index.ts +67 -0
- package/src/types.ts +73 -0
- package/src/utils.ts +97 -0
- package/src/vad/index.ts +70 -0
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
#ifndef SHERPA_ONNX_WRAPPER_H
|
|
2
|
+
#define SHERPA_ONNX_WRAPPER_H
|
|
3
|
+
|
|
4
|
+
#include <string>
|
|
5
|
+
#include <memory>
|
|
6
|
+
#include <optional>
|
|
7
|
+
|
|
8
|
+
namespace sherpaonnx {
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Wrapper class for sherpa-onnx OfflineRecognizer.
|
|
12
|
+
* This provides a C++ interface that can be easily called from iOS Objective-C++.
|
|
13
|
+
*/
|
|
14
|
+
class SherpaOnnxWrapper {
|
|
15
|
+
public:
|
|
16
|
+
SherpaOnnxWrapper();
|
|
17
|
+
~SherpaOnnxWrapper();
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Initialize sherpa-onnx with model directory.
|
|
21
|
+
* @param modelDir Path to the model directory
|
|
22
|
+
* @param preferInt8 Optional: true = prefer int8 models, false = prefer regular models, nullopt = try int8 first (default)
|
|
23
|
+
* @param modelType Optional: explicit model type ("transducer", "paraformer", "nemo_ctc"), nullopt = auto-detect (default)
|
|
24
|
+
* @return true if successful, false otherwise
|
|
25
|
+
*/
|
|
26
|
+
bool initialize(
|
|
27
|
+
const std::string& modelDir,
|
|
28
|
+
const std::optional<bool>& preferInt8 = std::nullopt,
|
|
29
|
+
const std::optional<std::string>& modelType = std::nullopt
|
|
30
|
+
);
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Transcribe an audio file.
|
|
34
|
+
* @param filePath Path to the audio file (WAV 16kHz mono 16-bit PCM)
|
|
35
|
+
* @return Transcribed text
|
|
36
|
+
*/
|
|
37
|
+
std::string transcribeFile(const std::string& filePath);
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Check if the recognizer is initialized.
|
|
41
|
+
* @return true if initialized, false otherwise
|
|
42
|
+
*/
|
|
43
|
+
bool isInitialized() const;
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Release resources.
|
|
47
|
+
*/
|
|
48
|
+
void release();
|
|
49
|
+
|
|
50
|
+
private:
|
|
51
|
+
class Impl;
|
|
52
|
+
std::unique_ptr<Impl> pImpl;
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
} // namespace sherpaonnx
|
|
56
|
+
|
|
57
|
+
#endif // SHERPA_ONNX_WRAPPER_H
|
|
@@ -0,0 +1,432 @@
|
|
|
1
|
+
#include "sherpa-onnx-wrapper.h"
|
|
2
|
+
#include <fstream>
|
|
3
|
+
#include <sstream>
|
|
4
|
+
#include <optional>
|
|
5
|
+
#include <algorithm>
|
|
6
|
+
#include <cctype>
|
|
7
|
+
#include <cstring>
|
|
8
|
+
|
|
9
|
+
// iOS logging
|
|
10
|
+
#ifdef __APPLE__
|
|
11
|
+
#include <Foundation/Foundation.h>
|
|
12
|
+
#include <cstdio>
|
|
13
|
+
#define LOGI(fmt, ...) NSLog(@"SherpaOnnxWrapper: " fmt, ##__VA_ARGS__)
|
|
14
|
+
#define LOGE(fmt, ...) NSLog(@"SherpaOnnxWrapper ERROR: " fmt, ##__VA_ARGS__)
|
|
15
|
+
#else
|
|
16
|
+
#define LOGI(...)
|
|
17
|
+
#define LOGE(...)
|
|
18
|
+
#endif
|
|
19
|
+
|
|
20
|
+
// Use C++17 filesystem (podspec enforces C++17)
|
|
21
|
+
#include <filesystem>
|
|
22
|
+
namespace fs = std::filesystem;
|
|
23
|
+
|
|
24
|
+
// sherpa-onnx headers - use C++ API (RAII wrapper around C API)
|
|
25
|
+
#include "sherpa-onnx/c-api/cxx-api.h"
|
|
26
|
+
|
|
27
|
+
namespace sherpaonnx {
|
|
28
|
+
|
|
29
|
+
// PIMPL pattern implementation
|
|
30
|
+
class SherpaOnnxWrapper::Impl {
|
|
31
|
+
public:
|
|
32
|
+
bool initialized = false;
|
|
33
|
+
std::string modelDir;
|
|
34
|
+
std::optional<sherpa_onnx::cxx::OfflineRecognizer> recognizer;
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
SherpaOnnxWrapper::SherpaOnnxWrapper() : pImpl(std::make_unique<Impl>()) {
|
|
38
|
+
LOGI("SherpaOnnxWrapper created");
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
SherpaOnnxWrapper::~SherpaOnnxWrapper() {
|
|
42
|
+
release();
|
|
43
|
+
LOGI("SherpaOnnxWrapper destroyed");
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
bool SherpaOnnxWrapper::initialize(
|
|
47
|
+
const std::string& modelDir,
|
|
48
|
+
const std::optional<bool>& preferInt8,
|
|
49
|
+
const std::optional<std::string>& modelType
|
|
50
|
+
) {
|
|
51
|
+
if (pImpl->initialized) {
|
|
52
|
+
release();
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
if (modelDir.empty()) {
|
|
56
|
+
LOGE("Model directory is empty");
|
|
57
|
+
return false;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
try {
|
|
61
|
+
// Helper function to check if file exists
|
|
62
|
+
auto fileExists = [](const std::string& path) -> bool {
|
|
63
|
+
return fs::exists(path);
|
|
64
|
+
};
|
|
65
|
+
|
|
66
|
+
auto isDirectory = [](const std::string& path) -> bool {
|
|
67
|
+
return fs::is_directory(path);
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
// Check if model directory exists
|
|
71
|
+
if (!fileExists(modelDir) || !isDirectory(modelDir)) {
|
|
72
|
+
LOGE("Model directory does not exist or is not a directory: %s", modelDir.c_str());
|
|
73
|
+
return false;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Setup configuration using C++ API
|
|
77
|
+
sherpa_onnx::cxx::OfflineRecognizerConfig config;
|
|
78
|
+
|
|
79
|
+
// Set default feature config (16kHz, 80-dim for most models)
|
|
80
|
+
config.feat_config.sample_rate = 16000;
|
|
81
|
+
config.feat_config.feature_dim = 80;
|
|
82
|
+
|
|
83
|
+
// Build paths for model files
|
|
84
|
+
std::string encoderPath = modelDir + "/encoder.onnx";
|
|
85
|
+
std::string decoderPath = modelDir + "/decoder.onnx";
|
|
86
|
+
std::string joinerPath = modelDir + "/joiner.onnx";
|
|
87
|
+
std::string encoderPathInt8 = modelDir + "/encoder.int8.onnx";
|
|
88
|
+
std::string decoderPathInt8 = modelDir + "/decoder.int8.onnx";
|
|
89
|
+
std::string paraformerPathInt8 = modelDir + "/model.int8.onnx";
|
|
90
|
+
std::string paraformerPath = modelDir + "/model.onnx";
|
|
91
|
+
std::string ctcPathInt8 = modelDir + "/model.int8.onnx";
|
|
92
|
+
std::string ctcPath = modelDir + "/model.onnx";
|
|
93
|
+
std::string tokensPath = modelDir + "/tokens.txt";
|
|
94
|
+
|
|
95
|
+
// FunASR Nano paths
|
|
96
|
+
std::string funasrEncoderAdaptor = modelDir + "/encoder_adaptor.onnx";
|
|
97
|
+
std::string funasrEncoderAdaptorInt8 = modelDir + "/encoder_adaptor.int8.onnx";
|
|
98
|
+
std::string funasrLLM = modelDir + "/llm.onnx";
|
|
99
|
+
std::string funasrLLMInt8 = modelDir + "/llm.int8.onnx";
|
|
100
|
+
std::string funasrEmbedding = modelDir + "/embedding.onnx";
|
|
101
|
+
std::string funasrEmbeddingInt8 = modelDir + "/embedding.int8.onnx";
|
|
102
|
+
|
|
103
|
+
// Helper function to find FunASR Nano tokenizer directory
|
|
104
|
+
auto findFunAsrTokenizer = [&fileExists, &modelDir]() -> std::string {
|
|
105
|
+
std::string vocabInMain = modelDir + "/vocab.json";
|
|
106
|
+
if (fileExists(vocabInMain)) {
|
|
107
|
+
return modelDir;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
try {
|
|
111
|
+
for (const auto& entry : fs::directory_iterator(modelDir)) {
|
|
112
|
+
if (entry.is_directory()) {
|
|
113
|
+
std::string dirName = entry.path().filename().string();
|
|
114
|
+
std::string dirNameLower = dirName;
|
|
115
|
+
std::transform(dirNameLower.begin(), dirNameLower.end(), dirNameLower.begin(),
|
|
116
|
+
[](unsigned char c) { return static_cast<char>(std::tolower(c)); });
|
|
117
|
+
if (dirNameLower.find("qwen3") != std::string::npos) {
|
|
118
|
+
std::string vocabPath = entry.path().string() + "/vocab.json";
|
|
119
|
+
if (fileExists(vocabPath)) {
|
|
120
|
+
return entry.path().string();
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
} catch (const std::exception& e) {
|
|
126
|
+
// Error accessing directory
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
std::string commonPath = modelDir + "/Qwen3-0.6B";
|
|
130
|
+
if (fileExists(commonPath + "/vocab.json")) {
|
|
131
|
+
return commonPath;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
return "";
|
|
135
|
+
};
|
|
136
|
+
|
|
137
|
+
std::string funasrTokenizer = findFunAsrTokenizer();
|
|
138
|
+
|
|
139
|
+
bool tokensRequired = true;
|
|
140
|
+
|
|
141
|
+
// Configure based on model type - same logic as Android version
|
|
142
|
+
std::string paraformerModelPath;
|
|
143
|
+
if (preferInt8.has_value()) {
|
|
144
|
+
if (preferInt8.value()) {
|
|
145
|
+
if (fileExists(paraformerPathInt8)) {
|
|
146
|
+
paraformerModelPath = paraformerPathInt8;
|
|
147
|
+
} else if (fileExists(paraformerPath)) {
|
|
148
|
+
paraformerModelPath = paraformerPath;
|
|
149
|
+
}
|
|
150
|
+
} else {
|
|
151
|
+
if (fileExists(paraformerPath)) {
|
|
152
|
+
paraformerModelPath = paraformerPath;
|
|
153
|
+
} else if (fileExists(paraformerPathInt8)) {
|
|
154
|
+
paraformerModelPath = paraformerPathInt8;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
} else {
|
|
158
|
+
if (fileExists(paraformerPathInt8)) {
|
|
159
|
+
paraformerModelPath = paraformerPathInt8;
|
|
160
|
+
} else if (fileExists(paraformerPath)) {
|
|
161
|
+
paraformerModelPath = paraformerPath;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
std::string ctcModelPath;
|
|
166
|
+
if (preferInt8.has_value()) {
|
|
167
|
+
if (preferInt8.value()) {
|
|
168
|
+
if (fileExists(ctcPathInt8)) {
|
|
169
|
+
ctcModelPath = ctcPathInt8;
|
|
170
|
+
} else if (fileExists(ctcPath)) {
|
|
171
|
+
ctcModelPath = ctcPath;
|
|
172
|
+
}
|
|
173
|
+
} else {
|
|
174
|
+
if (fileExists(ctcPath)) {
|
|
175
|
+
ctcModelPath = ctcPath;
|
|
176
|
+
} else if (fileExists(ctcPathInt8)) {
|
|
177
|
+
ctcModelPath = ctcPathInt8;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
} else {
|
|
181
|
+
if (fileExists(ctcPathInt8)) {
|
|
182
|
+
ctcModelPath = ctcPathInt8;
|
|
183
|
+
} else if (fileExists(ctcPath)) {
|
|
184
|
+
ctcModelPath = ctcPath;
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
bool hasTransducer = fileExists(encoderPath) &&
|
|
189
|
+
fileExists(decoderPath) &&
|
|
190
|
+
fileExists(joinerPath);
|
|
191
|
+
|
|
192
|
+
bool hasWhisperEncoder = fileExists(encoderPath) || fileExists(encoderPathInt8);
|
|
193
|
+
bool hasWhisperDecoder = fileExists(decoderPath) || fileExists(decoderPathInt8);
|
|
194
|
+
bool hasWhisper = hasWhisperEncoder && hasWhisperDecoder && !fileExists(joinerPath);
|
|
195
|
+
|
|
196
|
+
bool hasFunAsrEncoderAdaptor = fileExists(funasrEncoderAdaptor) || fileExists(funasrEncoderAdaptorInt8);
|
|
197
|
+
bool hasFunAsrLLM = fileExists(funasrLLM) || fileExists(funasrLLMInt8);
|
|
198
|
+
bool hasFunAsrEmbedding = fileExists(funasrEmbedding) || fileExists(funasrEmbeddingInt8);
|
|
199
|
+
bool hasFunAsrTokenizer = !funasrTokenizer.empty() && fileExists(funasrTokenizer + "/vocab.json");
|
|
200
|
+
bool hasFunAsrNano = hasFunAsrEncoderAdaptor && hasFunAsrLLM && hasFunAsrEmbedding && hasFunAsrTokenizer;
|
|
201
|
+
|
|
202
|
+
bool isLikelyNemoCtc = modelDir.find("nemo") != std::string::npos ||
|
|
203
|
+
modelDir.find("parakeet") != std::string::npos;
|
|
204
|
+
bool isLikelyWenetCtc = modelDir.find("wenet") != std::string::npos;
|
|
205
|
+
bool isLikelySenseVoice = modelDir.find("sense") != std::string::npos ||
|
|
206
|
+
modelDir.find("sensevoice") != std::string::npos;
|
|
207
|
+
bool isLikelyFunAsrNano = modelDir.find("funasr") != std::string::npos ||
|
|
208
|
+
modelDir.find("funasr-nano") != std::string::npos;
|
|
209
|
+
bool isLikelyWhisper = modelDir.find("whisper") != std::string::npos;
|
|
210
|
+
|
|
211
|
+
bool modelConfigured = false;
|
|
212
|
+
|
|
213
|
+
// Use explicit model type if provided
|
|
214
|
+
if (modelType.has_value()) {
|
|
215
|
+
std::string type = modelType.value();
|
|
216
|
+
if (type == "transducer" && hasTransducer) {
|
|
217
|
+
LOGI("Using explicit Transducer model type");
|
|
218
|
+
config.model_config.transducer.encoder = encoderPath;
|
|
219
|
+
config.model_config.transducer.decoder = decoderPath;
|
|
220
|
+
config.model_config.transducer.joiner = joinerPath;
|
|
221
|
+
modelConfigured = true;
|
|
222
|
+
} else if (type == "paraformer" && !paraformerModelPath.empty()) {
|
|
223
|
+
LOGI("Using explicit Paraformer model type: %s", paraformerModelPath.c_str());
|
|
224
|
+
config.model_config.paraformer.model = paraformerModelPath;
|
|
225
|
+
modelConfigured = true;
|
|
226
|
+
} else if (type == "nemo_ctc" && !ctcModelPath.empty()) {
|
|
227
|
+
LOGI("Using explicit NeMo CTC model type: %s", ctcModelPath.c_str());
|
|
228
|
+
config.model_config.nemo_ctc.model = ctcModelPath;
|
|
229
|
+
modelConfigured = true;
|
|
230
|
+
} else if (type == "wenet_ctc" && !ctcModelPath.empty()) {
|
|
231
|
+
LOGI("Using explicit WeNet CTC model type: %s", ctcModelPath.c_str());
|
|
232
|
+
config.model_config.wenet_ctc.model = ctcModelPath;
|
|
233
|
+
modelConfigured = true;
|
|
234
|
+
} else if (type == "sense_voice" && !ctcModelPath.empty()) {
|
|
235
|
+
LOGI("Using explicit SenseVoice model type: %s", ctcModelPath.c_str());
|
|
236
|
+
config.model_config.sense_voice.model = ctcModelPath;
|
|
237
|
+
config.model_config.sense_voice.language = "auto";
|
|
238
|
+
config.model_config.sense_voice.use_itn = false;
|
|
239
|
+
modelConfigured = true;
|
|
240
|
+
} else if (type == "funasr_nano" && hasFunAsrNano) {
|
|
241
|
+
LOGI("Using explicit FunASR Nano model type");
|
|
242
|
+
config.model_config.funasr_nano.encoder_adaptor = fileExists(funasrEncoderAdaptorInt8) ? funasrEncoderAdaptorInt8 : funasrEncoderAdaptor;
|
|
243
|
+
config.model_config.funasr_nano.llm = fileExists(funasrLLMInt8) ? funasrLLMInt8 : funasrLLM;
|
|
244
|
+
config.model_config.funasr_nano.embedding = fileExists(funasrEmbeddingInt8) ? funasrEmbeddingInt8 : funasrEmbedding;
|
|
245
|
+
config.model_config.funasr_nano.tokenizer = funasrTokenizer;
|
|
246
|
+
tokensRequired = false;
|
|
247
|
+
modelConfigured = true;
|
|
248
|
+
} else if (type == "whisper" && hasWhisper) {
|
|
249
|
+
LOGI("Using explicit Whisper model type");
|
|
250
|
+
config.model_config.whisper.encoder = fileExists(encoderPathInt8) ? encoderPathInt8 : encoderPath;
|
|
251
|
+
config.model_config.whisper.decoder = fileExists(decoderPathInt8) ? decoderPathInt8 : decoderPath;
|
|
252
|
+
config.model_config.whisper.language = "en";
|
|
253
|
+
config.model_config.whisper.task = "transcribe";
|
|
254
|
+
tokensRequired = true;
|
|
255
|
+
if (fileExists(tokensPath)) {
|
|
256
|
+
config.model_config.tokens = tokensPath;
|
|
257
|
+
LOGI("Using tokens file for Whisper: %s", tokensPath.c_str());
|
|
258
|
+
} else {
|
|
259
|
+
LOGE("Tokens file not found for Whisper model: %s", tokensPath.c_str());
|
|
260
|
+
return false;
|
|
261
|
+
}
|
|
262
|
+
modelConfigured = true;
|
|
263
|
+
} else {
|
|
264
|
+
LOGE("Explicit model type '%s' specified but required files not found", type.c_str());
|
|
265
|
+
return false;
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
// Auto-detect if no explicit type
|
|
270
|
+
if (!modelConfigured) {
|
|
271
|
+
if (hasTransducer) {
|
|
272
|
+
LOGI("Auto-detected Transducer model");
|
|
273
|
+
config.model_config.transducer.encoder = encoderPath;
|
|
274
|
+
config.model_config.transducer.decoder = decoderPath;
|
|
275
|
+
config.model_config.transducer.joiner = joinerPath;
|
|
276
|
+
modelConfigured = true;
|
|
277
|
+
} else if (hasFunAsrNano && isLikelyFunAsrNano) {
|
|
278
|
+
LOGI("Auto-detected FunASR Nano model");
|
|
279
|
+
config.model_config.funasr_nano.encoder_adaptor = fileExists(funasrEncoderAdaptorInt8) ? funasrEncoderAdaptorInt8 : funasrEncoderAdaptor;
|
|
280
|
+
config.model_config.funasr_nano.llm = fileExists(funasrLLMInt8) ? funasrLLMInt8 : funasrLLM;
|
|
281
|
+
config.model_config.funasr_nano.embedding = fileExists(funasrEmbeddingInt8) ? funasrEmbeddingInt8 : funasrEmbedding;
|
|
282
|
+
config.model_config.funasr_nano.tokenizer = funasrTokenizer;
|
|
283
|
+
tokensRequired = false;
|
|
284
|
+
modelConfigured = true;
|
|
285
|
+
} else if (hasWhisper && isLikelyWhisper) {
|
|
286
|
+
LOGI("Auto-detected Whisper model");
|
|
287
|
+
config.model_config.whisper.encoder = fileExists(encoderPathInt8) ? encoderPathInt8 : encoderPath;
|
|
288
|
+
config.model_config.whisper.decoder = fileExists(decoderPathInt8) ? decoderPathInt8 : decoderPath;
|
|
289
|
+
config.model_config.whisper.language = "en";
|
|
290
|
+
config.model_config.whisper.task = "transcribe";
|
|
291
|
+
tokensRequired = true;
|
|
292
|
+
if (fileExists(tokensPath)) {
|
|
293
|
+
config.model_config.tokens = tokensPath;
|
|
294
|
+
LOGI("Using tokens file for Whisper: %s", tokensPath.c_str());
|
|
295
|
+
} else {
|
|
296
|
+
LOGE("Tokens file not found for Whisper model: %s", tokensPath.c_str());
|
|
297
|
+
return false;
|
|
298
|
+
}
|
|
299
|
+
modelConfigured = true;
|
|
300
|
+
} else if (!ctcModelPath.empty() && isLikelySenseVoice) {
|
|
301
|
+
LOGI("Auto-detected SenseVoice model: %s", ctcModelPath.c_str());
|
|
302
|
+
config.model_config.sense_voice.model = ctcModelPath;
|
|
303
|
+
config.model_config.sense_voice.language = "auto";
|
|
304
|
+
config.model_config.sense_voice.use_itn = false;
|
|
305
|
+
modelConfigured = true;
|
|
306
|
+
} else if (!ctcModelPath.empty() && isLikelyWenetCtc) {
|
|
307
|
+
LOGI("Auto-detected WeNet CTC model: %s", ctcModelPath.c_str());
|
|
308
|
+
config.model_config.wenet_ctc.model = ctcModelPath;
|
|
309
|
+
modelConfigured = true;
|
|
310
|
+
} else if (!ctcModelPath.empty() && isLikelyNemoCtc) {
|
|
311
|
+
LOGI("Auto-detected NeMo CTC model: %s", ctcModelPath.c_str());
|
|
312
|
+
config.model_config.nemo_ctc.model = ctcModelPath;
|
|
313
|
+
modelConfigured = true;
|
|
314
|
+
} else if (!paraformerModelPath.empty()) {
|
|
315
|
+
LOGI("Auto-detected Paraformer model: %s", paraformerModelPath.c_str());
|
|
316
|
+
config.model_config.paraformer.model = paraformerModelPath;
|
|
317
|
+
modelConfigured = true;
|
|
318
|
+
} else if (!ctcModelPath.empty()) {
|
|
319
|
+
// Fallback: try as CTC model
|
|
320
|
+
LOGI("Auto-detected CTC model (fallback): %s", ctcModelPath.c_str());
|
|
321
|
+
config.model_config.nemo_ctc.model = ctcModelPath;
|
|
322
|
+
modelConfigured = true;
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
if (tokensRequired) {
|
|
327
|
+
if (!fileExists(tokensPath)) {
|
|
328
|
+
LOGE("Tokens file not found: %s", tokensPath.c_str());
|
|
329
|
+
return false;
|
|
330
|
+
}
|
|
331
|
+
config.model_config.tokens = tokensPath;
|
|
332
|
+
LOGI("Using tokens file: %s", tokensPath.c_str());
|
|
333
|
+
} else if (modelConfigured && fileExists(tokensPath)) {
|
|
334
|
+
config.model_config.tokens = tokensPath;
|
|
335
|
+
LOGI("Using tokens file (optional): %s", tokensPath.c_str());
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
if (!modelConfigured) {
|
|
339
|
+
LOGE("No valid model files found in directory: %s", modelDir.c_str());
|
|
340
|
+
return false;
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
// Set remaining config
|
|
344
|
+
config.decoding_method = "greedy_search";
|
|
345
|
+
config.model_config.num_threads = 4;
|
|
346
|
+
config.model_config.provider = "cpu";
|
|
347
|
+
config.model_config.debug = false;
|
|
348
|
+
|
|
349
|
+
// Create the recognizer using C++ API
|
|
350
|
+
try {
|
|
351
|
+
auto recognizer = sherpa_onnx::cxx::OfflineRecognizer::Create(config);
|
|
352
|
+
if (recognizer.Get() == nullptr) {
|
|
353
|
+
LOGE("Failed to create OfflineRecognizer: Create returned invalid object (nullptr)");
|
|
354
|
+
return false;
|
|
355
|
+
}
|
|
356
|
+
pImpl->recognizer = std::move(recognizer);
|
|
357
|
+
LOGI("OfflineRecognizer created successfully using C++ API");
|
|
358
|
+
} catch (const std::exception& e) {
|
|
359
|
+
LOGE("Failed to create OfflineRecognizer: %s", e.what());
|
|
360
|
+
return false;
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
pImpl->modelDir = modelDir;
|
|
364
|
+
pImpl->initialized = true;
|
|
365
|
+
return true;
|
|
366
|
+
|
|
367
|
+
} catch (const std::exception& e) {
|
|
368
|
+
LOGE("Exception during initialization: %s", e.what());
|
|
369
|
+
return false;
|
|
370
|
+
} catch (...) {
|
|
371
|
+
LOGE("Unknown exception during initialization");
|
|
372
|
+
return false;
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
std::string SherpaOnnxWrapper::transcribeFile(const std::string& filePath) {
|
|
377
|
+
if (!pImpl->initialized || !pImpl->recognizer.has_value()) {
|
|
378
|
+
LOGE("Not initialized. Call initialize() first.");
|
|
379
|
+
return "";
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
try {
|
|
383
|
+
if (!fs::exists(filePath)) {
|
|
384
|
+
LOGE("Audio file does not exist: %s", filePath.c_str());
|
|
385
|
+
return "";
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
// Read the wave file using C++ API
|
|
389
|
+
sherpa_onnx::cxx::Wave wave = sherpa_onnx::cxx::ReadWave(filePath);
|
|
390
|
+
|
|
391
|
+
if (wave.samples.empty()) {
|
|
392
|
+
LOGE("Failed to read wave file or file is empty: %s", filePath.c_str());
|
|
393
|
+
return "";
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
// Create a stream
|
|
397
|
+
auto stream = pImpl->recognizer.value().CreateStream();
|
|
398
|
+
|
|
399
|
+
// Feed audio data to the stream (all samples at once for offline recognition)
|
|
400
|
+
stream.AcceptWaveform(wave.sample_rate, wave.samples.data(), wave.samples.size());
|
|
401
|
+
|
|
402
|
+
// Decode the stream
|
|
403
|
+
pImpl->recognizer.value().Decode(&stream);
|
|
404
|
+
|
|
405
|
+
// Get result
|
|
406
|
+
auto result = pImpl->recognizer.value().GetResult(&stream);
|
|
407
|
+
|
|
408
|
+
return result.text;
|
|
409
|
+
} catch (const std::exception& e) {
|
|
410
|
+
LOGE("Exception during transcription: %s", e.what());
|
|
411
|
+
return "";
|
|
412
|
+
} catch (...) {
|
|
413
|
+
LOGE("Unknown exception during transcription");
|
|
414
|
+
return "";
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
bool SherpaOnnxWrapper::isInitialized() const {
|
|
419
|
+
return pImpl->initialized;
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
void SherpaOnnxWrapper::release() {
|
|
423
|
+
if (pImpl->initialized) {
|
|
424
|
+
// OfflineRecognizer uses RAII - destruction happens automatically when optional is reset
|
|
425
|
+
pImpl->recognizer.reset();
|
|
426
|
+
pImpl->initialized = false;
|
|
427
|
+
pImpl->modelDir.clear();
|
|
428
|
+
LOGI("Resources released");
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
} // namespace sherpaonnx
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"names":["TurboModuleRegistry","getEnforcing"],"sourceRoot":"..\\..\\src","sources":["NativeSherpaOnnx.ts"],"mappings":";;AAAA,SAASA,mBAAmB,QAA0B,cAAc;AA2CpE,eAAeA,mBAAmB,CAACC,YAAY,CAAO,YAAY,CAAC","ignoreList":[]}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Speaker Diarization feature module
|
|
5
|
+
*
|
|
6
|
+
* @remarks
|
|
7
|
+
* This feature is not yet implemented. This module serves as a placeholder
|
|
8
|
+
* for future speaker diarization functionality.
|
|
9
|
+
*
|
|
10
|
+
* @example
|
|
11
|
+
* ```typescript
|
|
12
|
+
* // Future usage:
|
|
13
|
+
* import { initializeDiarization, diarizeAudio } from 'react-native-sherpa-onnx/diarization';
|
|
14
|
+
*
|
|
15
|
+
* await initializeDiarization({ modelPath: 'models/diarization-model' });
|
|
16
|
+
* const segments = await diarizeAudio('path/to/audio.wav');
|
|
17
|
+
* ```
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Diarization initialization options (placeholder)
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Speaker segment with speaker ID
|
|
26
|
+
*/
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Initialize Speaker Diarization with model directory.
|
|
30
|
+
*
|
|
31
|
+
* @throws {Error} Not yet implemented
|
|
32
|
+
*/
|
|
33
|
+
export async function initializeDiarization(_options) {
|
|
34
|
+
throw new Error('Speaker Diarization feature is not yet implemented. This is a placeholder module.');
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Perform speaker diarization on an audio file.
|
|
39
|
+
*
|
|
40
|
+
* @throws {Error} Not yet implemented
|
|
41
|
+
*/
|
|
42
|
+
export function diarizeAudio(_filePath) {
|
|
43
|
+
throw new Error('Speaker Diarization feature is not yet implemented. This is a placeholder module.');
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Release diarization resources.
|
|
48
|
+
*
|
|
49
|
+
* @throws {Error} Not yet implemented
|
|
50
|
+
*/
|
|
51
|
+
export function unloadDiarization() {
|
|
52
|
+
throw new Error('Speaker Diarization feature is not yet implemented. This is a placeholder module.');
|
|
53
|
+
}
|
|
54
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"names":["initializeDiarization","_options","Error","diarizeAudio","_filePath","unloadDiarization"],"sourceRoot":"..\\..\\..\\src","sources":["diarization/index.ts"],"mappings":";;AAAA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;;AAEA;AACA;AACA;;AAMA;AACA;AACA;;AAQA;AACA;AACA;AACA;AACA;AACA,OAAO,eAAeA,qBAAqBA,CACzCC,QAAsC,EACvB;EACf,MAAM,IAAIC,KAAK,CACb,mFACF,CAAC;AACH;;AAEA;AACA;AACA;AACA;AACA;AACA,OAAO,SAASC,YAAYA,CAACC,SAAiB,EAA6B;EACzE,MAAM,IAAIF,KAAK,CACb,mFACF,CAAC;AACH;;AAEA;AACA;AACA;AACA;AACA;AACA,OAAO,SAASG,iBAAiBA,CAAA,EAAkB;EACjD,MAAM,IAAIH,KAAK,CACb,mFACF,CAAC;AACH","ignoreList":[]}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Speech Enhancement feature module
|
|
5
|
+
*
|
|
6
|
+
* @remarks
|
|
7
|
+
* This feature is not yet implemented. This module serves as a placeholder
|
|
8
|
+
* for future speech enhancement functionality.
|
|
9
|
+
*
|
|
10
|
+
* @example
|
|
11
|
+
* ```typescript
|
|
12
|
+
* // Future usage:
|
|
13
|
+
* import { initializeEnhancement, enhanceAudio } from 'react-native-sherpa-onnx/enhancement';
|
|
14
|
+
*
|
|
15
|
+
* await initializeEnhancement({ modelPath: 'models/enhancement-model' });
|
|
16
|
+
* const enhancedPath = await enhanceAudio('path/to/noisy-audio.wav');
|
|
17
|
+
* ```
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Enhancement initialization options (placeholder)
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Enhancement result
|
|
26
|
+
*/
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Initialize Speech Enhancement with model directory.
|
|
30
|
+
*
|
|
31
|
+
* @throws {Error} Not yet implemented
|
|
32
|
+
*/
|
|
33
|
+
export async function initializeEnhancement(_options) {
|
|
34
|
+
throw new Error('Speech Enhancement feature is not yet implemented. This is a placeholder module.');
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Enhance speech quality in an audio file.
|
|
39
|
+
*
|
|
40
|
+
* @throws {Error} Not yet implemented
|
|
41
|
+
*/
|
|
42
|
+
export function enhanceAudio(_filePath) {
|
|
43
|
+
throw new Error('Speech Enhancement feature is not yet implemented. This is a placeholder module.');
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Release enhancement resources.
|
|
48
|
+
*
|
|
49
|
+
* @throws {Error} Not yet implemented
|
|
50
|
+
*/
|
|
51
|
+
export function unloadEnhancement() {
|
|
52
|
+
throw new Error('Speech Enhancement feature is not yet implemented. This is a placeholder module.');
|
|
53
|
+
}
|
|
54
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"names":["initializeEnhancement","_options","Error","enhanceAudio","_filePath","unloadEnhancement"],"sourceRoot":"..\\..\\..\\src","sources":["enhancement/index.ts"],"mappings":";;AAAA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;;AAEA;AACA;AACA;;AAMA;AACA;AACA;;AAMA;AACA;AACA;AACA;AACA;AACA,OAAO,eAAeA,qBAAqBA,CACzCC,QAAsC,EACvB;EACf,MAAM,IAAIC,KAAK,CACb,kFACF,CAAC;AACH;;AAEA;AACA;AACA;AACA;AACA;AACA,OAAO,SAASC,YAAYA,CAACC,SAAiB,EAA8B;EAC1E,MAAM,IAAIF,KAAK,CACb,kFACF,CAAC;AACH;;AAEA;AACA;AACA;AACA;AACA;AACA,OAAO,SAASG,iBAAiBA,CAAA,EAAkB;EACjD,MAAM,IAAIH,KAAK,CACb,kFACF,CAAC;AACH","ignoreList":[]}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
import SherpaOnnx from "./NativeSherpaOnnx.js";
|
|
4
|
+
|
|
5
|
+
// Export types and utilities
|
|
6
|
+
|
|
7
|
+
export { assetModelPath, autoModelPath, fileModelPath, getDefaultModelPath, resolveModelPath } from "./utils.js";
|
|
8
|
+
|
|
9
|
+
// Re-export STT functionality
|
|
10
|
+
export { initializeSTT, transcribeFile, unloadSTT } from "./stt/index.js";
|
|
11
|
+
// TODO: Uncomment these exports once the features are implemented
|
|
12
|
+
// Re-export other features (when implemented)
|
|
13
|
+
// export * from './tts';
|
|
14
|
+
// export * from './vad';
|
|
15
|
+
// export * from './diarization';
|
|
16
|
+
// export * from './enhancement';
|
|
17
|
+
// export * from './separation';
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Test method to verify sherpa-onnx native library is loaded.
|
|
21
|
+
*/
|
|
22
|
+
export function testSherpaInit() {
|
|
23
|
+
return SherpaOnnx.testSherpaInit();
|
|
24
|
+
}
|
|
25
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"names":["SherpaOnnx","assetModelPath","autoModelPath","fileModelPath","getDefaultModelPath","resolveModelPath","initializeSTT","transcribeFile","unloadSTT","testSherpaInit"],"sourceRoot":"..\\..\\src","sources":["index.tsx"],"mappings":";;AAAA,OAAOA,UAAU,MAAM,uBAAoB;;AAE3C;;AAEA,SACEC,cAAc,EACdC,aAAa,EACbC,aAAa,EACbC,mBAAmB,EACnBC,gBAAgB,QACX,YAAS;;AAEhB;AACA,SAASC,aAAa,EAAEC,cAAc,EAAEC,SAAS,QAAQ,gBAAO;AAGhE;AACA;AACA;AACA;AACA;AACA;AACA;;AAEA;AACA;AACA;AACA,OAAO,SAASC,cAAcA,CAAA,EAAoB;EAChD,OAAOT,UAAU,CAACS,cAAc,CAAC,CAAC;AACpC","ignoreList":[]}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"type":"module"}
|