react-native-sherpa-onnx 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +84 -77
- package/SherpaOnnx.podspec +79 -45
- package/android/build.gradle +8 -2
- package/android/prebuilt-download.gradle +70 -16
- package/android/prebuilt-versions.gradle +14 -6
- package/android/src/main/cpp/CMakeLists.txt +2 -0
- package/android/src/main/cpp/jni/audio/sherpa-onnx-audio-convert-jni.cpp +202 -328
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.cpp +22 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.h +2 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.cpp +96 -142
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.h +40 -4
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-stt.cpp +774 -316
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +208 -122
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect.h +92 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.cpp +3 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.cpp +14 -2
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-stt.cpp +229 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-stt.h +38 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-tts.cpp +144 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-tts.h +38 -0
- package/android/src/main/cpp/jni/module/sherpa-onnx-module-jni.cpp +1 -1
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +157 -11
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxPcmCapture.kt +150 -0
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxSttHelper.kt +75 -24
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +52 -1
- package/ios/SherpaOnnx+PcmLiveStream.mm +288 -0
- package/ios/SherpaOnnx+STT.mm +2 -0
- package/ios/SherpaOnnx+TTS.mm +17 -0
- package/ios/SherpaOnnx.mm +27 -3
- package/ios/SherpaOnnxAudioConvert.h +28 -0
- package/ios/SherpaOnnxAudioConvert.mm +698 -0
- package/ios/archive/sherpa-onnx-archive-helper.mm +12 -0
- package/ios/model_detect/sherpa-onnx-model-detect-helper.h +37 -3
- package/ios/model_detect/sherpa-onnx-model-detect-helper.mm +80 -45
- package/ios/model_detect/sherpa-onnx-model-detect-stt.mm +629 -267
- package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +148 -56
- package/ios/model_detect/sherpa-onnx-model-detect.h +72 -0
- package/ios/model_detect/sherpa-onnx-validate-stt.h +38 -0
- package/ios/model_detect/sherpa-onnx-validate-stt.mm +229 -0
- package/ios/model_detect/sherpa-onnx-validate-tts.h +38 -0
- package/ios/model_detect/sherpa-onnx-validate-tts.mm +144 -0
- package/ios/stt/sherpa-onnx-stt-wrapper.mm +4 -0
- package/lib/module/NativeSherpaOnnx.js.map +1 -1
- package/lib/module/audio/index.js +55 -1
- package/lib/module/audio/index.js.map +1 -1
- package/lib/module/download/ModelDownloadManager.js +14 -0
- package/lib/module/download/ModelDownloadManager.js.map +1 -1
- package/lib/module/index.js +10 -0
- package/lib/module/index.js.map +1 -1
- package/lib/module/stt/streaming.js +6 -3
- package/lib/module/stt/streaming.js.map +1 -1
- package/lib/module/tts/index.js +13 -1
- package/lib/module/tts/index.js.map +1 -1
- package/lib/typescript/src/NativeSherpaOnnx.d.ts +32 -3
- package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
- package/lib/typescript/src/audio/index.d.ts +20 -1
- package/lib/typescript/src/audio/index.d.ts.map +1 -1
- package/lib/typescript/src/download/ModelDownloadManager.d.ts +2 -1
- package/lib/typescript/src/download/ModelDownloadManager.d.ts.map +1 -1
- package/lib/typescript/src/index.d.ts +10 -0
- package/lib/typescript/src/index.d.ts.map +1 -1
- package/lib/typescript/src/stt/streaming.d.ts.map +1 -1
- package/lib/typescript/src/stt/streamingTypes.d.ts +1 -1
- package/lib/typescript/src/stt/streamingTypes.d.ts.map +1 -1
- package/lib/typescript/src/tts/index.d.ts +12 -1
- package/lib/typescript/src/tts/index.d.ts.map +1 -1
- package/package.json +6 -1
- package/scripts/check-model-csvs.sh +72 -0
- package/scripts/setup-ios-framework.sh +272 -191
- package/src/NativeSherpaOnnx.ts +37 -3
- package/src/audio/index.ts +84 -1
- package/src/download/ModelDownloadManager.ts +19 -0
- package/src/index.tsx +15 -0
- package/src/stt/streaming.ts +10 -5
- package/src/stt/streamingTypes.ts +1 -1
- package/src/tts/index.ts +25 -1
- package/third_party/ffmpeg_prebuilt/ANDROID_RELEASE_TAG +1 -1
- package/third_party/libarchive_prebuilt/ANDROID_RELEASE_TAG +1 -1
- package/third_party/libarchive_prebuilt/IOS_RELEASE_TAG +1 -1
- package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -1
- package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -1
- package/ios/scripts/patch-libarchive-includes.sh +0 -61
- package/ios/scripts/setup-ios-libarchive.sh +0 -98
|
@@ -83,4 +83,26 @@ jobject BuildDetectedModelsList(JNIEnv* env, const std::vector<DetectedModel>& m
|
|
|
83
83
|
return list;
|
|
84
84
|
}
|
|
85
85
|
|
|
86
|
+
jobject BuildStringList(JNIEnv* env, const std::vector<std::string>& strings) {
|
|
87
|
+
jclass listClass = env->FindClass("java/util/ArrayList");
|
|
88
|
+
if (!listClass) return nullptr;
|
|
89
|
+
jmethodID listInit = env->GetMethodID(listClass, "<init>", "()V");
|
|
90
|
+
jmethodID listAdd = env->GetMethodID(listClass, "add", "(Ljava/lang/Object;)Z");
|
|
91
|
+
if (!listInit || !listAdd) {
|
|
92
|
+
env->DeleteLocalRef(listClass);
|
|
93
|
+
return nullptr;
|
|
94
|
+
}
|
|
95
|
+
jobject list = env->NewObject(listClass, listInit);
|
|
96
|
+
env->DeleteLocalRef(listClass);
|
|
97
|
+
if (!list) return nullptr;
|
|
98
|
+
for (const auto& s : strings) {
|
|
99
|
+
jstring jval = env->NewStringUTF(s.c_str());
|
|
100
|
+
if (jval) {
|
|
101
|
+
env->CallBooleanMethod(list, listAdd, jval);
|
|
102
|
+
env->DeleteLocalRef(jval);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
return list;
|
|
106
|
+
}
|
|
107
|
+
|
|
86
108
|
} // namespace sherpaonnx
|
|
@@ -14,6 +14,8 @@ namespace sherpaonnx {
|
|
|
14
14
|
bool PutString(JNIEnv* env, jobject map, jmethodID putId, const char* key, const std::string& value);
|
|
15
15
|
bool PutBoolean(JNIEnv* env, jobject map, jmethodID putId, const char* key, bool value);
|
|
16
16
|
jobject BuildDetectedModelsList(JNIEnv* env, const std::vector<DetectedModel>& models);
|
|
17
|
+
/** Build a Java ArrayList<String> from a vector of strings. Returns null on failure. */
|
|
18
|
+
jobject BuildStringList(JNIEnv* env, const std::vector<std::string>& strings);
|
|
17
19
|
|
|
18
20
|
} // namespace sherpaonnx
|
|
19
21
|
|
|
@@ -35,8 +35,8 @@ bool ContainsToken(const std::string& value, const std::string& token) {
|
|
|
35
35
|
return value.find(token) != std::string::npos;
|
|
36
36
|
}
|
|
37
37
|
|
|
38
|
-
bool
|
|
39
|
-
return EndsWith(entry.nameLower, ".onnx");
|
|
38
|
+
bool IsOnnxOrOrtFile(const FileEntry& entry) {
|
|
39
|
+
return EndsWith(entry.nameLower, ".onnx") || EndsWith(entry.nameLower, ".ort");
|
|
40
40
|
}
|
|
41
41
|
|
|
42
42
|
std::string BaseName(const std::string& path) {
|
|
@@ -55,7 +55,7 @@ std::string ChooseLargest(
|
|
|
55
55
|
std::uint64_t bestSize = 0;
|
|
56
56
|
|
|
57
57
|
for (const auto& entry : files) {
|
|
58
|
-
if (!
|
|
58
|
+
if (!IsOnnxOrOrtFile(entry)) continue;
|
|
59
59
|
|
|
60
60
|
bool hasExcluded = false;
|
|
61
61
|
for (const auto& token : excludeTokens) {
|
|
@@ -212,9 +212,8 @@ std::string ToLower(std::string value) {
|
|
|
212
212
|
return value;
|
|
213
213
|
}
|
|
214
214
|
|
|
215
|
-
std::string FindFileByName(const std::
|
|
215
|
+
std::string FindFileByName(const std::vector<FileEntry>& files, const std::string& fileName) {
|
|
216
216
|
std::string target = ToLower(fileName);
|
|
217
|
-
auto files = ListFilesRecursive(baseDir, maxDepth);
|
|
218
217
|
for (const auto& entry : files) {
|
|
219
218
|
if (entry.nameLower == target) {
|
|
220
219
|
return entry.path;
|
|
@@ -223,149 +222,17 @@ std::string FindFileByName(const std::string& baseDir, const std::string& fileNa
|
|
|
223
222
|
return "";
|
|
224
223
|
}
|
|
225
224
|
|
|
226
|
-
std::string FindFileEndingWith(const std::
|
|
225
|
+
std::string FindFileEndingWith(const std::vector<FileEntry>& files, const std::string& suffix) {
|
|
227
226
|
std::string targetSuffix = ToLower(suffix);
|
|
228
|
-
auto files = ListFilesRecursive(baseDir, maxDepth);
|
|
229
|
-
// 1) exact match (e.g. "tokens.txt")
|
|
230
227
|
for (const auto& entry : files) {
|
|
231
|
-
if (entry.nameLower == targetSuffix)
|
|
232
|
-
return entry.path;
|
|
233
|
-
}
|
|
228
|
+
if (entry.nameLower == targetSuffix) return entry.path;
|
|
234
229
|
}
|
|
235
|
-
|
|
236
|
-
// 2) true suffix match (preferred over substring to avoid false positives
|
|
237
|
-
// like "tokens.txt.bak" or "mytokens.txt.tmp").
|
|
238
230
|
for (const auto& entry : files) {
|
|
239
|
-
if (
|
|
231
|
+
if (targetSuffix.size() <= entry.nameLower.size() &&
|
|
232
|
+
std::equal(targetSuffix.rbegin(), targetSuffix.rend(), entry.nameLower.rbegin())) {
|
|
240
233
|
return entry.path;
|
|
241
234
|
}
|
|
242
235
|
}
|
|
243
|
-
|
|
244
|
-
// 3) If we are looking for tokens, fallback to inspecting .txt files' contents.
|
|
245
|
-
// Heuristic: many token files are plain text with lines like "token <index>".
|
|
246
|
-
if (targetSuffix.find("tokens") != std::string::npos) {
|
|
247
|
-
auto IsLikelyTokensFile = [](const std::string& path) -> bool {
|
|
248
|
-
std::ifstream ifs(path);
|
|
249
|
-
if (!ifs.is_open()) return false;
|
|
250
|
-
std::string line;
|
|
251
|
-
int total = 0;
|
|
252
|
-
int matched = 0;
|
|
253
|
-
const int maxLines = 2000;
|
|
254
|
-
|
|
255
|
-
while (total < maxLines && std::getline(ifs, line)) {
|
|
256
|
-
++total;
|
|
257
|
-
if (line.empty()) continue;
|
|
258
|
-
// Trim trailing CR if present
|
|
259
|
-
if (!line.empty() && line.back() == '\r') line.pop_back();
|
|
260
|
-
|
|
261
|
-
// Check if the line ends with an integer index (common token format)
|
|
262
|
-
size_t sp = line.find_last_of(" \t");
|
|
263
|
-
if (sp != std::string::npos && sp + 1 < line.size()) {
|
|
264
|
-
std::string idx = line.substr(sp + 1);
|
|
265
|
-
bool allDigits = !idx.empty();
|
|
266
|
-
for (char c : idx) {
|
|
267
|
-
if (!std::isdigit(static_cast<unsigned char>(c))) { allDigits = false; break; }
|
|
268
|
-
}
|
|
269
|
-
if (allDigits) ++matched;
|
|
270
|
-
}
|
|
271
|
-
}
|
|
272
|
-
|
|
273
|
-
ifs.close();
|
|
274
|
-
if (total < 2) return false;
|
|
275
|
-
// Heuristic: at least half of non-empty lines should match the token pattern
|
|
276
|
-
return matched >= std::max(1, total / 2);
|
|
277
|
-
};
|
|
278
|
-
|
|
279
|
-
for (const auto& entry : files) {
|
|
280
|
-
if (EndsWith(entry.nameLower, ".txt")) {
|
|
281
|
-
if (IsLikelyTokensFile(entry.path)) {
|
|
282
|
-
return entry.path;
|
|
283
|
-
}
|
|
284
|
-
}
|
|
285
|
-
}
|
|
286
|
-
}
|
|
287
|
-
return "";
|
|
288
|
-
}
|
|
289
|
-
|
|
290
|
-
std::string FindDirectoryByName(const std::string& baseDir, const std::string& dirName, int maxDepth) {
|
|
291
|
-
std::string target = ToLower(dirName);
|
|
292
|
-
std::vector<std::string> toVisit = ListDirectories(baseDir);
|
|
293
|
-
int depth = 0;
|
|
294
|
-
|
|
295
|
-
while (!toVisit.empty() && depth <= maxDepth) {
|
|
296
|
-
std::vector<std::string> next;
|
|
297
|
-
for (const auto& dir : toVisit) {
|
|
298
|
-
std::string name = dir;
|
|
299
|
-
#if __cplusplus >= 201703L && __has_include(<filesystem>)
|
|
300
|
-
try {
|
|
301
|
-
name = fs::path(dir).filename().string();
|
|
302
|
-
} catch (const std::exception&) {
|
|
303
|
-
}
|
|
304
|
-
#elif __has_include(<experimental/filesystem>)
|
|
305
|
-
try {
|
|
306
|
-
name = fs::path(dir).filename().string();
|
|
307
|
-
} catch (const std::exception&) {
|
|
308
|
-
}
|
|
309
|
-
#else
|
|
310
|
-
name = BaseName(dir);
|
|
311
|
-
#endif
|
|
312
|
-
if (ToLower(name) == target) {
|
|
313
|
-
return dir;
|
|
314
|
-
}
|
|
315
|
-
if (depth < maxDepth) {
|
|
316
|
-
auto nested = ListDirectories(dir);
|
|
317
|
-
next.insert(next.end(), nested.begin(), nested.end());
|
|
318
|
-
}
|
|
319
|
-
}
|
|
320
|
-
toVisit.swap(next);
|
|
321
|
-
depth += 1;
|
|
322
|
-
}
|
|
323
|
-
|
|
324
|
-
return "";
|
|
325
|
-
}
|
|
326
|
-
|
|
327
|
-
std::string ResolveTokenizerDir(const std::string& modelDir) {
|
|
328
|
-
std::string vocabInMain = modelDir + "/vocab.json";
|
|
329
|
-
if (FileExists(vocabInMain)) {
|
|
330
|
-
return modelDir;
|
|
331
|
-
}
|
|
332
|
-
|
|
333
|
-
std::vector<std::string> toVisit = ListDirectories(modelDir);
|
|
334
|
-
int depth = 0;
|
|
335
|
-
while (!toVisit.empty() && depth <= 2) {
|
|
336
|
-
std::vector<std::string> next;
|
|
337
|
-
for (const auto& dir : toVisit) {
|
|
338
|
-
std::string dirName = dir;
|
|
339
|
-
#if __cplusplus >= 201703L && __has_include(<filesystem>)
|
|
340
|
-
try {
|
|
341
|
-
dirName = fs::path(dir).filename().string();
|
|
342
|
-
} catch (const std::exception&) {
|
|
343
|
-
}
|
|
344
|
-
#elif __has_include(<experimental/filesystem>)
|
|
345
|
-
try {
|
|
346
|
-
dirName = fs::path(dir).filename().string();
|
|
347
|
-
} catch (const std::exception&) {
|
|
348
|
-
}
|
|
349
|
-
#else
|
|
350
|
-
dirName = BaseName(dir);
|
|
351
|
-
#endif
|
|
352
|
-
std::string dirNameLower = ToLower(dirName);
|
|
353
|
-
if (dirNameLower.find("qwen3") != std::string::npos) {
|
|
354
|
-
std::string vocabPath = dir + "/vocab.json";
|
|
355
|
-
if (FileExists(vocabPath)) {
|
|
356
|
-
return dir;
|
|
357
|
-
}
|
|
358
|
-
}
|
|
359
|
-
|
|
360
|
-
if (depth < 2) {
|
|
361
|
-
auto nested = ListDirectories(dir);
|
|
362
|
-
next.insert(next.end(), nested.begin(), nested.end());
|
|
363
|
-
}
|
|
364
|
-
}
|
|
365
|
-
toVisit.swap(next);
|
|
366
|
-
depth += 1;
|
|
367
|
-
}
|
|
368
|
-
|
|
369
236
|
return "";
|
|
370
237
|
}
|
|
371
238
|
|
|
@@ -377,7 +244,7 @@ std::string FindOnnxByToken(
|
|
|
377
244
|
std::vector<FileEntry> matches;
|
|
378
245
|
std::string tokenLower = ToLower(token);
|
|
379
246
|
for (const auto& entry : files) {
|
|
380
|
-
if (!
|
|
247
|
+
if (!IsOnnxOrOrtFile(entry)) continue;
|
|
381
248
|
if (ContainsToken(entry.nameLower, tokenLower)) {
|
|
382
249
|
matches.push_back(entry);
|
|
383
250
|
}
|
|
@@ -407,6 +274,40 @@ std::string FindOnnxByAnyToken(
|
|
|
407
274
|
return "";
|
|
408
275
|
}
|
|
409
276
|
|
|
277
|
+
std::string FindOnnxByAnyTokenExcluding(
|
|
278
|
+
const std::vector<FileEntry>& files,
|
|
279
|
+
const std::vector<std::string>& tokens,
|
|
280
|
+
const std::vector<std::string>& excludeInName,
|
|
281
|
+
const std::optional<bool>& preferInt8
|
|
282
|
+
) {
|
|
283
|
+
for (const auto& token : tokens) {
|
|
284
|
+
std::string tokenLower = ToLower(token);
|
|
285
|
+
std::vector<FileEntry> matches;
|
|
286
|
+
for (const auto& entry : files) {
|
|
287
|
+
if (!IsOnnxOrOrtFile(entry)) continue;
|
|
288
|
+
if (!ContainsToken(entry.nameLower, tokenLower)) continue;
|
|
289
|
+
bool excluded = false;
|
|
290
|
+
for (const auto& ex : excludeInName) {
|
|
291
|
+
std::string exLower = ToLower(ex);
|
|
292
|
+
if (ContainsToken(entry.nameLower, exLower)) {
|
|
293
|
+
excluded = true;
|
|
294
|
+
break;
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
if (!excluded) matches.push_back(entry);
|
|
298
|
+
}
|
|
299
|
+
if (matches.empty()) continue;
|
|
300
|
+
std::vector<std::string> emptyTokens;
|
|
301
|
+
bool wantInt8 = preferInt8.has_value() && preferInt8.value();
|
|
302
|
+
bool wantNonInt8 = preferInt8.has_value() && !preferInt8.value();
|
|
303
|
+
std::string chosen = ChooseLargest(matches, emptyTokens, wantInt8, wantNonInt8);
|
|
304
|
+
if (!chosen.empty()) return chosen;
|
|
305
|
+
chosen = ChooseLargest(matches, emptyTokens, false, false);
|
|
306
|
+
if (!chosen.empty()) return chosen;
|
|
307
|
+
}
|
|
308
|
+
return "";
|
|
309
|
+
}
|
|
310
|
+
|
|
410
311
|
std::string FindLargestOnnx(const std::vector<FileEntry>& files) {
|
|
411
312
|
std::vector<std::string> emptyTokens;
|
|
412
313
|
return ChooseLargest(files, emptyTokens, false, false);
|
|
@@ -435,5 +336,58 @@ bool ContainsWord(const std::string& haystack, const std::string& word) {
|
|
|
435
336
|
return false;
|
|
436
337
|
}
|
|
437
338
|
|
|
339
|
+
std::string FindDirectoryUnderRoot(
|
|
340
|
+
const std::vector<FileEntry>& files,
|
|
341
|
+
const std::string& rootDir,
|
|
342
|
+
const std::string& dirName
|
|
343
|
+
) {
|
|
344
|
+
if (dirName.empty()) return "";
|
|
345
|
+
const std::string needle = "/" + dirName + "/";
|
|
346
|
+
const size_t dirPathLen = 1 + dirName.size();
|
|
347
|
+
for (const auto& entry : files) {
|
|
348
|
+
if (entry.path.size() < rootDir.size() + needle.size()) continue;
|
|
349
|
+
if (entry.path.compare(0, rootDir.size(), rootDir) != 0) continue;
|
|
350
|
+
size_t pos = entry.path.find(needle, rootDir.size());
|
|
351
|
+
if (pos != std::string::npos) {
|
|
352
|
+
return entry.path.substr(0, pos + dirPathLen);
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
return "";
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
std::vector<LexiconCandidate> FindLexiconCandidates(
|
|
359
|
+
const std::vector<FileEntry>& files,
|
|
360
|
+
const std::string& rootDir
|
|
361
|
+
) {
|
|
362
|
+
std::vector<LexiconCandidate> candidates;
|
|
363
|
+
const size_t rootLen = rootDir.size();
|
|
364
|
+
for (const auto& entry : files) {
|
|
365
|
+
if (entry.path.size() <= rootLen) continue;
|
|
366
|
+
if (rootLen > 0) {
|
|
367
|
+
if (entry.path.compare(0, rootLen, rootDir) != 0) continue;
|
|
368
|
+
// Enforce path boundary: if rootDir doesn't end with '/', require '/' after it
|
|
369
|
+
if (rootDir.back() != '/' && entry.path[rootLen] != '/') continue;
|
|
370
|
+
}
|
|
371
|
+
std::string base = BaseName(entry.path);
|
|
372
|
+
if (base.empty()) continue;
|
|
373
|
+
std::string baseLower = ToLower(base);
|
|
374
|
+
if (baseLower == "lexicon.txt") {
|
|
375
|
+
candidates.push_back({entry.path, "default"});
|
|
376
|
+
} else if (baseLower.size() > 12 &&
|
|
377
|
+
baseLower.compare(0, 8, "lexicon-") == 0 &&
|
|
378
|
+
baseLower.compare(baseLower.size() - 4, 4, ".txt") == 0) {
|
|
379
|
+
std::string languageId = baseLower.substr(8, baseLower.size() - 12);
|
|
380
|
+
candidates.push_back({entry.path, languageId});
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
std::sort(candidates.begin(), candidates.end(), [](const LexiconCandidate& a, const LexiconCandidate& b) {
|
|
384
|
+
if (a.languageId == b.languageId) return a.path < b.path;
|
|
385
|
+
if (a.languageId == "default") return true;
|
|
386
|
+
if (b.languageId == "default") return false;
|
|
387
|
+
return a.languageId < b.languageId;
|
|
388
|
+
});
|
|
389
|
+
return candidates;
|
|
390
|
+
}
|
|
391
|
+
|
|
438
392
|
} // namespace model_detect
|
|
439
393
|
} // namespace sherpaonnx
|
|
@@ -22,11 +22,11 @@ std::vector<std::string> ListDirectories(const std::string& path);
|
|
|
22
22
|
std::vector<FileEntry> ListFiles(const std::string& path);
|
|
23
23
|
std::vector<FileEntry> ListFilesRecursive(const std::string& path, int maxDepth = 2);
|
|
24
24
|
std::string ToLower(std::string value);
|
|
25
|
-
std::string ResolveTokenizerDir(const std::string& modelDir);
|
|
26
25
|
|
|
27
|
-
|
|
28
|
-
std::string
|
|
29
|
-
|
|
26
|
+
/** Find file in \p files whose name equals \p fileName (case-insensitive). Uses file tree only, no filesystem. */
|
|
27
|
+
std::string FindFileByName(const std::vector<FileEntry>& files, const std::string& fileName);
|
|
28
|
+
/** Find file in \p files whose name equals or ends with \p suffix (e.g. tokens.txt). Case-insensitive. */
|
|
29
|
+
std::string FindFileEndingWith(const std::vector<FileEntry>& files, const std::string& suffix);
|
|
30
30
|
|
|
31
31
|
std::string FindOnnxByToken(
|
|
32
32
|
const std::vector<FileEntry>& files,
|
|
@@ -40,6 +40,14 @@ std::string FindOnnxByAnyToken(
|
|
|
40
40
|
const std::optional<bool>& preferInt8
|
|
41
41
|
);
|
|
42
42
|
|
|
43
|
+
/** Like FindOnnxByAnyToken but skips any file whose nameLower contains any of \p excludeInName. */
|
|
44
|
+
std::string FindOnnxByAnyTokenExcluding(
|
|
45
|
+
const std::vector<FileEntry>& files,
|
|
46
|
+
const std::vector<std::string>& tokens,
|
|
47
|
+
const std::vector<std::string>& excludeInName,
|
|
48
|
+
const std::optional<bool>& preferInt8
|
|
49
|
+
);
|
|
50
|
+
|
|
43
51
|
std::string FindLargestOnnx(
|
|
44
52
|
const std::vector<FileEntry>& files
|
|
45
53
|
);
|
|
@@ -52,6 +60,34 @@ std::string FindLargestOnnxExcludingTokens(
|
|
|
52
60
|
/** Returns true if \p word appears in \p haystack as a standalone token (surrounded by separators: / - _ . space). */
|
|
53
61
|
bool ContainsWord(const std::string& haystack, const std::string& word);
|
|
54
62
|
|
|
63
|
+
/**
|
|
64
|
+
* Find a directory with the given name anywhere under \p rootDir in the file tree.
|
|
65
|
+
* Searches \p files for any path that starts with \p rootDir and contains "/dirName/".
|
|
66
|
+
* Returns the full path to that directory (e.g. rootDir/inner/dirName) or empty if not found.
|
|
67
|
+
* Used e.g. to find espeak-ng-data in modelDir or in modelDir/inner-model-dir/.
|
|
68
|
+
*/
|
|
69
|
+
std::string FindDirectoryUnderRoot(
|
|
70
|
+
const std::vector<FileEntry>& files,
|
|
71
|
+
const std::string& rootDir,
|
|
72
|
+
const std::string& dirName
|
|
73
|
+
);
|
|
74
|
+
|
|
75
|
+
/** Lexicon file with optional language id for multi-lang TTS (e.g. Kokoro). */
|
|
76
|
+
struct LexiconCandidate {
|
|
77
|
+
std::string path; /**< Full path to the lexicon file */
|
|
78
|
+
std::string languageId; /**< From filename: "default" for lexicon.txt, else e.g. "us-en", "zh" from lexicon-us-en.txt, lexicon-zh.txt */
|
|
79
|
+
};
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Find all lexicon files under \p rootDir: exact "lexicon.txt" and any "lexicon-*.txt".
|
|
83
|
+
* Returns a list of LexiconCandidate (path + languageId), ordered: lexicon.txt first (as "default"),
|
|
84
|
+
* then lexicon-*.txt alphabetically by language id. Used for multi-language Kokoro/Kitten TTS.
|
|
85
|
+
*/
|
|
86
|
+
std::vector<LexiconCandidate> FindLexiconCandidates(
|
|
87
|
+
const std::vector<FileEntry>& files,
|
|
88
|
+
const std::string& rootDir
|
|
89
|
+
);
|
|
90
|
+
|
|
55
91
|
} // namespace model_detect
|
|
56
92
|
} // namespace sherpaonnx
|
|
57
93
|
|