react-native-sherpa-onnx 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +232 -236
- package/SherpaOnnx.podspec +68 -64
- package/android/build.gradle +182 -192
- package/android/codegen.gradle +57 -0
- package/android/prebuilt-download.gradle +428 -0
- package/android/prebuilt-versions.gradle +43 -0
- package/android/proguard-rules.pro +10 -0
- package/android/src/main/assets/testModels/add_mul_add.onnx +28 -0
- package/android/src/main/assets/testModels/nnapi_internal_uint8_support.onnx +0 -0
- package/android/src/main/assets/testModels/qnn_multi_ctx_embed.onnx +0 -0
- package/android/src/main/cpp/CMakeLists.txt +166 -129
- package/android/src/main/cpp/CMakePresets.json +54 -0
- package/android/src/main/cpp/crypto/sha256.cpp +174 -0
- package/android/src/main/cpp/crypto/sha256.h +16 -0
- package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.cpp +404 -0
- package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.h +56 -0
- package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-jni.cpp +181 -0
- package/android/src/main/cpp/jni/audio/sherpa-onnx-audio-convert-jni.cpp +888 -0
- package/{ios → android/src/main/cpp/jni/model_detect}/sherpa-onnx-common.h +18 -18
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.cpp +86 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.h +20 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.cpp +423 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.h +55 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-stt.cpp +399 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +238 -0
- package/{ios → android/src/main/cpp/jni/model_detect}/sherpa-onnx-model-detect.h +122 -89
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.cpp +99 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.h +16 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.cpp +78 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.h +16 -0
- package/android/src/main/cpp/jni/module/sherpa-onnx-module-jni.cpp +190 -0
- package/android/src/main/cpp/jni/tts/sherpa-onnx-tts-zipvoice-jni.cpp +301 -0
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxArchiveHelper.kt +94 -0
- package/android/src/main/java/com/sherpaonnx/{SherpaOnnxCoreHelper.kt → SherpaOnnxAssetHelper.kt} +350 -236
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +791 -483
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxSttHelper.kt +699 -109
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +1123 -668
- package/android/src/main/java/com/sherpaonnx/ZipvoiceTtsWrapper.kt +187 -0
- package/ios/SherpaOnnx+Assets.h +11 -0
- package/ios/SherpaOnnx+Assets.mm +325 -0
- package/ios/SherpaOnnx+STT.mm +455 -118
- package/ios/SherpaOnnx+TTS.mm +1101 -712
- package/ios/SherpaOnnx.h +17 -6
- package/ios/SherpaOnnx.mm +206 -311
- package/ios/SherpaOnnx.xcconfig +19 -19
- package/ios/SherpaOnnxCoreMLHelper.swift +24 -0
- package/ios/archive/sherpa-onnx-archive-helper.h +21 -0
- package/ios/archive/sherpa-onnx-archive-helper.mm +296 -0
- package/ios/libarchive_darwin_config.h +153 -0
- package/{android/src/main/cpp/jni → ios/model_detect}/sherpa-onnx-common.h +18 -18
- package/ios/model_detect/sherpa-onnx-model-detect-helper.h +49 -0
- package/ios/model_detect/sherpa-onnx-model-detect-helper.mm +210 -0
- package/ios/model_detect/sherpa-onnx-model-detect-stt.mm +344 -0
- package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +201 -0
- package/{android/src/main/cpp/jni → ios/model_detect}/sherpa-onnx-model-detect.h +117 -89
- package/ios/scripts/patch-libarchive-includes.sh +61 -0
- package/ios/scripts/setup-ios-libarchive.sh +98 -0
- package/ios/stt/sherpa-onnx-stt-wrapper.h +129 -0
- package/ios/stt/sherpa-onnx-stt-wrapper.mm +523 -0
- package/ios/{sherpa-onnx-tts-wrapper.h → tts/sherpa-onnx-tts-wrapper.h} +90 -85
- package/ios/{sherpa-onnx-tts-wrapper.mm → tts/sherpa-onnx-tts-wrapper.mm} +376 -345
- package/lib/module/NativeSherpaOnnx.js +3 -0
- package/lib/module/NativeSherpaOnnx.js.map +1 -1
- package/lib/module/audio/index.js +22 -0
- package/lib/module/audio/index.js.map +1 -0
- package/lib/module/diarization/index.js +1 -1
- package/lib/module/diarization/index.js.map +1 -1
- package/lib/module/download/ModelDownloadManager.js +918 -0
- package/lib/module/download/ModelDownloadManager.js.map +1 -0
- package/lib/module/download/extractTarBz2.js +53 -0
- package/lib/module/download/extractTarBz2.js.map +1 -0
- package/lib/module/download/index.js +6 -0
- package/lib/module/download/index.js.map +1 -0
- package/lib/module/download/validation.js +178 -0
- package/lib/module/download/validation.js.map +1 -0
- package/lib/module/enhancement/index.js +1 -1
- package/lib/module/enhancement/index.js.map +1 -1
- package/lib/module/index.js +41 -3
- package/lib/module/index.js.map +1 -1
- package/lib/module/separation/index.js +1 -1
- package/lib/module/separation/index.js.map +1 -1
- package/lib/module/stt/index.js +127 -60
- package/lib/module/stt/index.js.map +1 -1
- package/lib/module/stt/sttModelLanguages.js +512 -0
- package/lib/module/stt/sttModelLanguages.js.map +1 -0
- package/lib/module/stt/types.js +53 -1
- package/lib/module/stt/types.js.map +1 -1
- package/lib/module/tts/index.js +216 -289
- package/lib/module/tts/index.js.map +1 -1
- package/lib/module/tts/types.js +86 -1
- package/lib/module/tts/types.js.map +1 -1
- package/lib/module/types.js.map +1 -1
- package/lib/module/utils.js +86 -73
- package/lib/module/utils.js.map +1 -1
- package/lib/module/vad/index.js +1 -1
- package/lib/module/vad/index.js.map +1 -1
- package/lib/typescript/src/NativeSherpaOnnx.d.ts +192 -38
- package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
- package/lib/typescript/src/audio/index.d.ts +13 -0
- package/lib/typescript/src/audio/index.d.ts.map +1 -0
- package/lib/typescript/src/diarization/index.d.ts +3 -2
- package/lib/typescript/src/diarization/index.d.ts.map +1 -1
- package/lib/typescript/src/download/ModelDownloadManager.d.ts +108 -0
- package/lib/typescript/src/download/ModelDownloadManager.d.ts.map +1 -0
- package/lib/typescript/src/download/extractTarBz2.d.ts +14 -0
- package/lib/typescript/src/download/extractTarBz2.d.ts.map +1 -0
- package/lib/typescript/src/download/index.d.ts +7 -0
- package/lib/typescript/src/download/index.d.ts.map +1 -0
- package/lib/typescript/src/download/validation.d.ts +57 -0
- package/lib/typescript/src/download/validation.d.ts.map +1 -0
- package/lib/typescript/src/enhancement/index.d.ts +3 -2
- package/lib/typescript/src/enhancement/index.d.ts.map +1 -1
- package/lib/typescript/src/index.d.ts +26 -2
- package/lib/typescript/src/index.d.ts.map +1 -1
- package/lib/typescript/src/separation/index.d.ts +3 -2
- package/lib/typescript/src/separation/index.d.ts.map +1 -1
- package/lib/typescript/src/stt/index.d.ts +31 -43
- package/lib/typescript/src/stt/index.d.ts.map +1 -1
- package/lib/typescript/src/stt/sttModelLanguages.d.ts +52 -0
- package/lib/typescript/src/stt/sttModelLanguages.d.ts.map +1 -0
- package/lib/typescript/src/stt/types.d.ts +196 -9
- package/lib/typescript/src/stt/types.d.ts.map +1 -1
- package/lib/typescript/src/tts/index.d.ts +25 -211
- package/lib/typescript/src/tts/index.d.ts.map +1 -1
- package/lib/typescript/src/tts/types.d.ts +148 -25
- package/lib/typescript/src/tts/types.d.ts.map +1 -1
- package/lib/typescript/src/types.d.ts +0 -32
- package/lib/typescript/src/types.d.ts.map +1 -1
- package/lib/typescript/src/utils.d.ts +28 -13
- package/lib/typescript/src/utils.d.ts.map +1 -1
- package/lib/typescript/src/vad/index.d.ts +3 -2
- package/lib/typescript/src/vad/index.d.ts.map +1 -1
- package/package.json +250 -222
- package/scripts/check-qnn-support.sh +78 -0
- package/scripts/setup-ios-framework.sh +379 -282
- package/src/NativeSherpaOnnx.ts +474 -251
- package/src/audio/index.ts +32 -0
- package/src/diarization/index.ts +4 -2
- package/src/download/ModelDownloadManager.ts +1325 -0
- package/src/download/extractTarBz2.ts +78 -0
- package/src/download/index.ts +43 -0
- package/src/download/validation.ts +279 -0
- package/src/enhancement/index.ts +4 -2
- package/src/index.tsx +78 -27
- package/src/separation/index.ts +4 -2
- package/src/stt/index.ts +249 -89
- package/src/stt/sttModelLanguages.ts +237 -0
- package/src/stt/types.ts +263 -9
- package/src/tts/index.ts +470 -458
- package/src/tts/types.ts +373 -218
- package/src/types.ts +0 -44
- package/src/utils.ts +145 -131
- package/src/vad/index.ts +4 -2
- package/third_party/ffmpeg_prebuilt/ANDROID_RELEASE_TAG +1 -0
- package/third_party/libarchive_prebuilt/ANDROID_RELEASE_TAG +1 -0
- package/third_party/libarchive_prebuilt/IOS_RELEASE_TAG +1 -0
- package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -0
- package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -0
- package/android/src/main/cpp/include/sherpa-onnx/c-api/c-api.h +0 -1918
- package/android/src/main/cpp/include/sherpa-onnx/c-api/cxx-api.h +0 -841
- package/android/src/main/cpp/jni/sherpa-onnx-model-detect.cpp +0 -541
- package/android/src/main/cpp/jni/sherpa-onnx-stt-jni.cpp +0 -336
- package/android/src/main/cpp/jni/sherpa-onnx-stt-wrapper.cpp +0 -222
- package/android/src/main/cpp/jni/sherpa-onnx-stt-wrapper.h +0 -68
- package/android/src/main/cpp/jni/sherpa-onnx-tts-jni.cpp +0 -823
- package/android/src/main/cpp/jni/sherpa-onnx-tts-wrapper.cpp +0 -387
- package/android/src/main/cpp/jni/sherpa-onnx-tts-wrapper.h +0 -147
- package/ios/Frameworks/sherpa_onnx.xcframework.zip +0 -0
- package/ios/include/sherpa-onnx/c-api/c-api.h +0 -1918
- package/ios/include/sherpa-onnx/c-api/cxx-api.h +0 -841
- package/ios/sherpa-onnx-model-detect.mm +0 -441
- package/ios/sherpa-onnx-stt-wrapper.h +0 -48
- package/ios/sherpa-onnx-stt-wrapper.mm +0 -201
- package/scripts/copy-headers.js +0 -184
- package/scripts/setup-assets.js +0 -323
|
@@ -1,18 +1,18 @@
|
|
|
1
|
-
#ifndef SHERPA_ONNX_COMMON_H
|
|
2
|
-
#define SHERPA_ONNX_COMMON_H
|
|
3
|
-
|
|
4
|
-
#include <string>
|
|
5
|
-
|
|
6
|
-
namespace sherpaonnx {
|
|
7
|
-
|
|
8
|
-
/**
|
|
9
|
-
* Information about a detected model.
|
|
10
|
-
*/
|
|
11
|
-
struct DetectedModel {
|
|
12
|
-
std::string type; // Model type (e.g., "transducer", "paraformer", "nemo_ctc")
|
|
13
|
-
std::string modelDir; // Directory path where the model is located
|
|
14
|
-
};
|
|
15
|
-
|
|
16
|
-
} // namespace sherpaonnx
|
|
17
|
-
|
|
18
|
-
#endif // SHERPA_ONNX_COMMON_H
|
|
1
|
+
#ifndef SHERPA_ONNX_COMMON_H
|
|
2
|
+
#define SHERPA_ONNX_COMMON_H
|
|
3
|
+
|
|
4
|
+
#include <string>
|
|
5
|
+
|
|
6
|
+
namespace sherpaonnx {
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Information about a detected model.
|
|
10
|
+
*/
|
|
11
|
+
struct DetectedModel {
|
|
12
|
+
std::string type; // Model type (e.g., "transducer", "paraformer", "nemo_ctc")
|
|
13
|
+
std::string modelDir; // Directory path where the model is located
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
} // namespace sherpaonnx
|
|
17
|
+
|
|
18
|
+
#endif // SHERPA_ONNX_COMMON_H
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* sherpa-onnx-detect-jni-common.cpp
|
|
3
|
+
*
|
|
4
|
+
* Purpose: Shared JNI helpers for building Java HashMap/ArrayList from C++ detect results
|
|
5
|
+
* (PutString, PutBoolean, BuildDetectedModelsList). Used by sherpa-onnx-stt-wrapper and
|
|
6
|
+
* sherpa-onnx-tts-wrapper.
|
|
7
|
+
*/
|
|
8
|
+
#include "sherpa-onnx-detect-jni-common.h"
|
|
9
|
+
|
|
10
|
+
namespace sherpaonnx {
|
|
11
|
+
|
|
12
|
+
bool PutString(JNIEnv* env, jobject map, jmethodID putId, const char* key, const std::string& value) {
|
|
13
|
+
jstring jkey = env->NewStringUTF(key);
|
|
14
|
+
if (!jkey) return false;
|
|
15
|
+
jstring jval = value.empty() ? nullptr : env->NewStringUTF(value.c_str());
|
|
16
|
+
if (!value.empty() && !jval) {
|
|
17
|
+
env->DeleteLocalRef(jkey);
|
|
18
|
+
return false;
|
|
19
|
+
}
|
|
20
|
+
env->CallObjectMethod(map, putId, jkey, jval ? static_cast<jobject>(jval) : nullptr);
|
|
21
|
+
env->DeleteLocalRef(jkey);
|
|
22
|
+
if (jval) env->DeleteLocalRef(jval);
|
|
23
|
+
return true;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
bool PutBoolean(JNIEnv* env, jobject map, jmethodID putId, const char* key, bool value) {
|
|
27
|
+
jclass boolClass = env->FindClass("java/lang/Boolean");
|
|
28
|
+
if (!boolClass) return false;
|
|
29
|
+
jmethodID valueOf = env->GetStaticMethodID(boolClass, "valueOf", "(Z)Ljava/lang/Boolean;");
|
|
30
|
+
if (!valueOf) {
|
|
31
|
+
env->DeleteLocalRef(boolClass);
|
|
32
|
+
return false;
|
|
33
|
+
}
|
|
34
|
+
jobject boxed = env->CallStaticObjectMethod(boolClass, valueOf, value ? JNI_TRUE : JNI_FALSE);
|
|
35
|
+
env->DeleteLocalRef(boolClass);
|
|
36
|
+
if (!boxed) return false;
|
|
37
|
+
jstring jkey = env->NewStringUTF(key);
|
|
38
|
+
if (!jkey) {
|
|
39
|
+
env->DeleteLocalRef(boxed);
|
|
40
|
+
return false;
|
|
41
|
+
}
|
|
42
|
+
env->CallObjectMethod(map, putId, jkey, boxed);
|
|
43
|
+
env->DeleteLocalRef(jkey);
|
|
44
|
+
env->DeleteLocalRef(boxed);
|
|
45
|
+
return true;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
jobject BuildDetectedModelsList(JNIEnv* env, const std::vector<DetectedModel>& models) {
|
|
49
|
+
jclass listClass = env->FindClass("java/util/ArrayList");
|
|
50
|
+
if (!listClass) return nullptr;
|
|
51
|
+
jmethodID listInit = env->GetMethodID(listClass, "<init>", "()V");
|
|
52
|
+
jmethodID listAdd = env->GetMethodID(listClass, "add", "(Ljava/lang/Object;)Z");
|
|
53
|
+
if (!listInit || !listAdd) {
|
|
54
|
+
env->DeleteLocalRef(listClass);
|
|
55
|
+
return nullptr;
|
|
56
|
+
}
|
|
57
|
+
jobject list = env->NewObject(listClass, listInit);
|
|
58
|
+
env->DeleteLocalRef(listClass);
|
|
59
|
+
if (!list) return nullptr;
|
|
60
|
+
|
|
61
|
+
jclass mapClass = env->FindClass("java/util/HashMap");
|
|
62
|
+
if (!mapClass) {
|
|
63
|
+
env->DeleteLocalRef(list);
|
|
64
|
+
return nullptr;
|
|
65
|
+
}
|
|
66
|
+
jmethodID mapInit = env->GetMethodID(mapClass, "<init>", "()V");
|
|
67
|
+
jmethodID mapPut = env->GetMethodID(mapClass, "put", "(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;");
|
|
68
|
+
if (!mapInit || !mapPut) {
|
|
69
|
+
env->DeleteLocalRef(mapClass);
|
|
70
|
+
env->DeleteLocalRef(list);
|
|
71
|
+
return nullptr;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
for (const auto& m : models) {
|
|
75
|
+
jobject modelMap = env->NewObject(mapClass, mapInit);
|
|
76
|
+
if (!modelMap) continue;
|
|
77
|
+
PutString(env, modelMap, mapPut, "type", m.type);
|
|
78
|
+
PutString(env, modelMap, mapPut, "modelDir", m.modelDir);
|
|
79
|
+
env->CallBooleanMethod(list, listAdd, modelMap);
|
|
80
|
+
env->DeleteLocalRef(modelMap);
|
|
81
|
+
}
|
|
82
|
+
env->DeleteLocalRef(mapClass);
|
|
83
|
+
return list;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
} // namespace sherpaonnx
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
#ifndef SHERPA_ONNX_DETECT_JNI_COMMON_H
|
|
2
|
+
#define SHERPA_ONNX_DETECT_JNI_COMMON_H
|
|
3
|
+
|
|
4
|
+
#include <jni.h>
|
|
5
|
+
#include <string>
|
|
6
|
+
#include <vector>
|
|
7
|
+
|
|
8
|
+
#include "sherpa-onnx-common.h"
|
|
9
|
+
|
|
10
|
+
namespace sherpaonnx {
|
|
11
|
+
|
|
12
|
+
// Helpers for building Java HashMap/ArrayList from C++ detect results.
|
|
13
|
+
// Used by sherpa-onnx-stt-wrapper and sherpa-onnx-tts-wrapper.
|
|
14
|
+
bool PutString(JNIEnv* env, jobject map, jmethodID putId, const char* key, const std::string& value);
|
|
15
|
+
bool PutBoolean(JNIEnv* env, jobject map, jmethodID putId, const char* key, bool value);
|
|
16
|
+
jobject BuildDetectedModelsList(JNIEnv* env, const std::vector<DetectedModel>& models);
|
|
17
|
+
|
|
18
|
+
} // namespace sherpaonnx
|
|
19
|
+
|
|
20
|
+
#endif // SHERPA_ONNX_DETECT_JNI_COMMON_H
|
|
@@ -0,0 +1,423 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* sherpa-onnx-model-detect-helper.cpp
|
|
3
|
+
*
|
|
4
|
+
* Purpose: Shared filesystem and string helpers for model detection (file/dir listing, token-based
|
|
5
|
+
* ONNX search, path resolution). Used by sherpa-onnx-model-detect-stt.cpp and -tts.cpp on Android.
|
|
6
|
+
*/
|
|
7
|
+
#include "sherpa-onnx-model-detect-helper.h"
|
|
8
|
+
|
|
9
|
+
#include <algorithm>
|
|
10
|
+
#include <cctype>
|
|
11
|
+
#include <fstream>
|
|
12
|
+
|
|
13
|
+
#if __cplusplus >= 201703L && __has_include(<filesystem>)
|
|
14
|
+
#include <filesystem>
|
|
15
|
+
namespace fs = std::filesystem;
|
|
16
|
+
#elif __has_include(<experimental/filesystem>)
|
|
17
|
+
#include <experimental/filesystem>
|
|
18
|
+
namespace fs = std::experimental::filesystem;
|
|
19
|
+
#else
|
|
20
|
+
#include <dirent.h>
|
|
21
|
+
#include <sys/stat.h>
|
|
22
|
+
#endif
|
|
23
|
+
|
|
24
|
+
namespace sherpaonnx {
|
|
25
|
+
namespace model_detect {
|
|
26
|
+
|
|
27
|
+
namespace {
|
|
28
|
+
|
|
29
|
+
bool EndsWith(const std::string& value, const std::string& suffix) {
|
|
30
|
+
if (suffix.size() > value.size()) return false;
|
|
31
|
+
return std::equal(suffix.rbegin(), suffix.rend(), value.rbegin());
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
bool ContainsToken(const std::string& value, const std::string& token) {
|
|
35
|
+
return value.find(token) != std::string::npos;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
bool IsOnnxFile(const FileEntry& entry) {
|
|
39
|
+
return EndsWith(entry.nameLower, ".onnx");
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
std::string BaseName(const std::string& path) {
|
|
43
|
+
size_t pos = path.find_last_of("/\\");
|
|
44
|
+
if (pos == std::string::npos) return path;
|
|
45
|
+
return path.substr(pos + 1);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
std::string ChooseLargest(
|
|
49
|
+
const std::vector<FileEntry>& files,
|
|
50
|
+
const std::vector<std::string>& excludeTokens,
|
|
51
|
+
bool onlyInt8,
|
|
52
|
+
bool onlyNonInt8
|
|
53
|
+
) {
|
|
54
|
+
std::string chosen;
|
|
55
|
+
std::uint64_t bestSize = 0;
|
|
56
|
+
|
|
57
|
+
for (const auto& entry : files) {
|
|
58
|
+
if (!IsOnnxFile(entry)) continue;
|
|
59
|
+
|
|
60
|
+
bool hasExcluded = false;
|
|
61
|
+
for (const auto& token : excludeTokens) {
|
|
62
|
+
if (ContainsToken(entry.nameLower, token)) {
|
|
63
|
+
hasExcluded = true;
|
|
64
|
+
break;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
if (hasExcluded) continue;
|
|
68
|
+
|
|
69
|
+
bool isInt8 = ContainsToken(entry.nameLower, "int8");
|
|
70
|
+
if (onlyInt8 && !isInt8) continue;
|
|
71
|
+
if (onlyNonInt8 && isInt8) continue;
|
|
72
|
+
|
|
73
|
+
if (entry.size >= bestSize) {
|
|
74
|
+
bestSize = entry.size;
|
|
75
|
+
chosen = entry.path;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
return chosen;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
} // namespace
|
|
83
|
+
|
|
84
|
+
bool FileExists(const std::string& path) {
|
|
85
|
+
#if __cplusplus >= 201703L && __has_include(<filesystem>)
|
|
86
|
+
return std::filesystem::exists(path);
|
|
87
|
+
#elif __has_include(<experimental/filesystem>)
|
|
88
|
+
return std::experimental::filesystem::exists(path);
|
|
89
|
+
#else
|
|
90
|
+
struct stat buffer;
|
|
91
|
+
return (stat(path.c_str(), &buffer) == 0);
|
|
92
|
+
#endif
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
bool IsDirectory(const std::string& path) {
|
|
96
|
+
#if __cplusplus >= 201703L && __has_include(<filesystem>)
|
|
97
|
+
return std::filesystem::is_directory(path);
|
|
98
|
+
#elif __has_include(<experimental/filesystem>)
|
|
99
|
+
return std::experimental::filesystem::is_directory(path);
|
|
100
|
+
#else
|
|
101
|
+
struct stat buffer;
|
|
102
|
+
if (stat(path.c_str(), &buffer) != 0) return false;
|
|
103
|
+
return S_ISDIR(buffer.st_mode);
|
|
104
|
+
#endif
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
std::vector<std::string> ListDirectories(const std::string& path) {
|
|
108
|
+
std::vector<std::string> results;
|
|
109
|
+
#if __cplusplus >= 201703L && __has_include(<filesystem>)
|
|
110
|
+
try {
|
|
111
|
+
for (const auto& entry : fs::directory_iterator(path)) {
|
|
112
|
+
if (entry.is_directory()) {
|
|
113
|
+
results.push_back(entry.path().string());
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
} catch (const std::exception&) {
|
|
117
|
+
}
|
|
118
|
+
#elif __has_include(<experimental/filesystem>)
|
|
119
|
+
try {
|
|
120
|
+
for (const auto& entry : fs::directory_iterator(path)) {
|
|
121
|
+
if (entry.is_directory()) {
|
|
122
|
+
results.push_back(entry.path().string());
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
} catch (const std::exception&) {
|
|
126
|
+
}
|
|
127
|
+
#else
|
|
128
|
+
DIR* dir = opendir(path.c_str());
|
|
129
|
+
if (!dir) return results;
|
|
130
|
+
while (auto* entry = readdir(dir)) {
|
|
131
|
+
if (!entry->d_name) continue;
|
|
132
|
+
std::string name = entry->d_name;
|
|
133
|
+
if (name == "." || name == "..") continue;
|
|
134
|
+
std::string full = path + "/" + name;
|
|
135
|
+
struct stat st;
|
|
136
|
+
if (stat(full.c_str(), &st) == 0 && S_ISDIR(st.st_mode)) {
|
|
137
|
+
results.push_back(full);
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
closedir(dir);
|
|
141
|
+
#endif
|
|
142
|
+
return results;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
std::vector<FileEntry> ListFiles(const std::string& path) {
|
|
146
|
+
std::vector<FileEntry> results;
|
|
147
|
+
#if __cplusplus >= 201703L && __has_include(<filesystem>)
|
|
148
|
+
try {
|
|
149
|
+
for (const auto& entry : fs::directory_iterator(path)) {
|
|
150
|
+
if (!entry.is_regular_file()) continue;
|
|
151
|
+
FileEntry file;
|
|
152
|
+
file.path = entry.path().string();
|
|
153
|
+
file.name = entry.path().filename().string();
|
|
154
|
+
file.nameLower = ToLower(file.name);
|
|
155
|
+
file.size = static_cast<std::uint64_t>(entry.file_size());
|
|
156
|
+
results.push_back(file);
|
|
157
|
+
}
|
|
158
|
+
} catch (const std::exception&) {
|
|
159
|
+
}
|
|
160
|
+
#elif __has_include(<experimental/filesystem>)
|
|
161
|
+
try {
|
|
162
|
+
for (const auto& entry : fs::directory_iterator(path)) {
|
|
163
|
+
if (!entry.is_regular_file()) continue;
|
|
164
|
+
FileEntry file;
|
|
165
|
+
file.path = entry.path().string();
|
|
166
|
+
file.name = entry.path().filename().string();
|
|
167
|
+
file.nameLower = ToLower(file.name);
|
|
168
|
+
file.size = static_cast<std::uint64_t>(entry.file_size());
|
|
169
|
+
results.push_back(file);
|
|
170
|
+
}
|
|
171
|
+
} catch (const std::exception&) {
|
|
172
|
+
}
|
|
173
|
+
#else
|
|
174
|
+
DIR* dir = opendir(path.c_str());
|
|
175
|
+
if (!dir) return results;
|
|
176
|
+
while (auto* entry = readdir(dir)) {
|
|
177
|
+
if (!entry->d_name) continue;
|
|
178
|
+
std::string name = entry->d_name;
|
|
179
|
+
if (name == "." || name == "..") continue;
|
|
180
|
+
std::string full = path + "/" + name;
|
|
181
|
+
struct stat st;
|
|
182
|
+
if (stat(full.c_str(), &st) == 0 && S_ISREG(st.st_mode)) {
|
|
183
|
+
FileEntry file;
|
|
184
|
+
file.path = full;
|
|
185
|
+
file.name = name;
|
|
186
|
+
file.nameLower = ToLower(name);
|
|
187
|
+
file.size = static_cast<std::uint64_t>(st.st_size);
|
|
188
|
+
results.push_back(file);
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
closedir(dir);
|
|
192
|
+
#endif
|
|
193
|
+
return results;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
std::vector<FileEntry> ListFilesRecursive(const std::string& path, int maxDepth) {
|
|
197
|
+
std::vector<FileEntry> results = ListFiles(path);
|
|
198
|
+
if (maxDepth <= 0) return results;
|
|
199
|
+
|
|
200
|
+
for (const auto& dir : ListDirectories(path)) {
|
|
201
|
+
auto nested = ListFilesRecursive(dir, maxDepth - 1);
|
|
202
|
+
results.insert(results.end(), nested.begin(), nested.end());
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
return results;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
std::string ToLower(std::string value) {
|
|
209
|
+
std::transform(value.begin(), value.end(), value.begin(), [](unsigned char c) {
|
|
210
|
+
return static_cast<char>(std::tolower(c));
|
|
211
|
+
});
|
|
212
|
+
return value;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
std::string FindFileByName(const std::string& baseDir, const std::string& fileName, int maxDepth) {
|
|
216
|
+
std::string target = ToLower(fileName);
|
|
217
|
+
auto files = ListFilesRecursive(baseDir, maxDepth);
|
|
218
|
+
for (const auto& entry : files) {
|
|
219
|
+
if (entry.nameLower == target) {
|
|
220
|
+
return entry.path;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
return "";
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
std::string FindFileEndingWith(const std::string& baseDir, const std::string& suffix, int maxDepth) {
|
|
227
|
+
std::string targetSuffix = ToLower(suffix);
|
|
228
|
+
auto files = ListFilesRecursive(baseDir, maxDepth);
|
|
229
|
+
// 1) exact match (e.g. "tokens.txt")
|
|
230
|
+
for (const auto& entry : files) {
|
|
231
|
+
if (entry.nameLower == targetSuffix) {
|
|
232
|
+
return entry.path;
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
// 2) true suffix match (preferred over substring to avoid false positives
|
|
237
|
+
// like "tokens.txt.bak" or "mytokens.txt.tmp").
|
|
238
|
+
for (const auto& entry : files) {
|
|
239
|
+
if (EndsWith(entry.nameLower, targetSuffix)) {
|
|
240
|
+
return entry.path;
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
// 3) If we are looking for tokens, fallback to inspecting .txt files' contents.
|
|
245
|
+
// Heuristic: many token files are plain text with lines like "token <index>".
|
|
246
|
+
if (targetSuffix.find("tokens") != std::string::npos) {
|
|
247
|
+
auto IsLikelyTokensFile = [](const std::string& path) -> bool {
|
|
248
|
+
std::ifstream ifs(path);
|
|
249
|
+
if (!ifs.is_open()) return false;
|
|
250
|
+
std::string line;
|
|
251
|
+
int total = 0;
|
|
252
|
+
int matched = 0;
|
|
253
|
+
const int maxLines = 2000;
|
|
254
|
+
|
|
255
|
+
while (total < maxLines && std::getline(ifs, line)) {
|
|
256
|
+
++total;
|
|
257
|
+
if (line.empty()) continue;
|
|
258
|
+
// Trim trailing CR if present
|
|
259
|
+
if (!line.empty() && line.back() == '\r') line.pop_back();
|
|
260
|
+
|
|
261
|
+
// Check if the line ends with an integer index (common token format)
|
|
262
|
+
size_t sp = line.find_last_of(" \t");
|
|
263
|
+
if (sp != std::string::npos && sp + 1 < line.size()) {
|
|
264
|
+
std::string idx = line.substr(sp + 1);
|
|
265
|
+
bool allDigits = !idx.empty();
|
|
266
|
+
for (char c : idx) {
|
|
267
|
+
if (!std::isdigit(static_cast<unsigned char>(c))) { allDigits = false; break; }
|
|
268
|
+
}
|
|
269
|
+
if (allDigits) ++matched;
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
ifs.close();
|
|
274
|
+
if (total < 2) return false;
|
|
275
|
+
// Heuristic: at least half of non-empty lines should match the token pattern
|
|
276
|
+
return matched >= std::max(1, total / 2);
|
|
277
|
+
};
|
|
278
|
+
|
|
279
|
+
for (const auto& entry : files) {
|
|
280
|
+
if (EndsWith(entry.nameLower, ".txt")) {
|
|
281
|
+
if (IsLikelyTokensFile(entry.path)) {
|
|
282
|
+
return entry.path;
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
return "";
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
std::string FindDirectoryByName(const std::string& baseDir, const std::string& dirName, int maxDepth) {
|
|
291
|
+
std::string target = ToLower(dirName);
|
|
292
|
+
std::vector<std::string> toVisit = ListDirectories(baseDir);
|
|
293
|
+
int depth = 0;
|
|
294
|
+
|
|
295
|
+
while (!toVisit.empty() && depth <= maxDepth) {
|
|
296
|
+
std::vector<std::string> next;
|
|
297
|
+
for (const auto& dir : toVisit) {
|
|
298
|
+
std::string name = dir;
|
|
299
|
+
#if __cplusplus >= 201703L && __has_include(<filesystem>)
|
|
300
|
+
try {
|
|
301
|
+
name = fs::path(dir).filename().string();
|
|
302
|
+
} catch (const std::exception&) {
|
|
303
|
+
}
|
|
304
|
+
#elif __has_include(<experimental/filesystem>)
|
|
305
|
+
try {
|
|
306
|
+
name = fs::path(dir).filename().string();
|
|
307
|
+
} catch (const std::exception&) {
|
|
308
|
+
}
|
|
309
|
+
#else
|
|
310
|
+
name = BaseName(dir);
|
|
311
|
+
#endif
|
|
312
|
+
if (ToLower(name) == target) {
|
|
313
|
+
return dir;
|
|
314
|
+
}
|
|
315
|
+
if (depth < maxDepth) {
|
|
316
|
+
auto nested = ListDirectories(dir);
|
|
317
|
+
next.insert(next.end(), nested.begin(), nested.end());
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
toVisit.swap(next);
|
|
321
|
+
depth += 1;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
return "";
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
std::string ResolveTokenizerDir(const std::string& modelDir) {
|
|
328
|
+
std::string vocabInMain = modelDir + "/vocab.json";
|
|
329
|
+
if (FileExists(vocabInMain)) {
|
|
330
|
+
return modelDir;
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
std::vector<std::string> toVisit = ListDirectories(modelDir);
|
|
334
|
+
int depth = 0;
|
|
335
|
+
while (!toVisit.empty() && depth <= 2) {
|
|
336
|
+
std::vector<std::string> next;
|
|
337
|
+
for (const auto& dir : toVisit) {
|
|
338
|
+
std::string dirName = dir;
|
|
339
|
+
#if __cplusplus >= 201703L && __has_include(<filesystem>)
|
|
340
|
+
try {
|
|
341
|
+
dirName = fs::path(dir).filename().string();
|
|
342
|
+
} catch (const std::exception&) {
|
|
343
|
+
}
|
|
344
|
+
#elif __has_include(<experimental/filesystem>)
|
|
345
|
+
try {
|
|
346
|
+
dirName = fs::path(dir).filename().string();
|
|
347
|
+
} catch (const std::exception&) {
|
|
348
|
+
}
|
|
349
|
+
#else
|
|
350
|
+
dirName = BaseName(dir);
|
|
351
|
+
#endif
|
|
352
|
+
std::string dirNameLower = ToLower(dirName);
|
|
353
|
+
if (dirNameLower.find("qwen3") != std::string::npos) {
|
|
354
|
+
std::string vocabPath = dir + "/vocab.json";
|
|
355
|
+
if (FileExists(vocabPath)) {
|
|
356
|
+
return dir;
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
if (depth < 2) {
|
|
361
|
+
auto nested = ListDirectories(dir);
|
|
362
|
+
next.insert(next.end(), nested.begin(), nested.end());
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
toVisit.swap(next);
|
|
366
|
+
depth += 1;
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
return "";
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
std::string FindOnnxByToken(
|
|
373
|
+
const std::vector<FileEntry>& files,
|
|
374
|
+
const std::string& token,
|
|
375
|
+
const std::optional<bool>& preferInt8
|
|
376
|
+
) {
|
|
377
|
+
std::vector<FileEntry> matches;
|
|
378
|
+
std::string tokenLower = ToLower(token);
|
|
379
|
+
for (const auto& entry : files) {
|
|
380
|
+
if (!IsOnnxFile(entry)) continue;
|
|
381
|
+
if (ContainsToken(entry.nameLower, tokenLower)) {
|
|
382
|
+
matches.push_back(entry);
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
if (matches.empty()) return "";
|
|
387
|
+
|
|
388
|
+
std::vector<std::string> emptyTokens;
|
|
389
|
+
bool wantInt8 = preferInt8.has_value() && preferInt8.value();
|
|
390
|
+
bool wantNonInt8 = preferInt8.has_value() && !preferInt8.value();
|
|
391
|
+
|
|
392
|
+
std::string preferred = ChooseLargest(matches, emptyTokens, wantInt8, wantNonInt8);
|
|
393
|
+
if (!preferred.empty()) return preferred;
|
|
394
|
+
|
|
395
|
+
return ChooseLargest(matches, emptyTokens, false, false);
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
std::string FindOnnxByAnyToken(
|
|
399
|
+
const std::vector<FileEntry>& files,
|
|
400
|
+
const std::vector<std::string>& tokens,
|
|
401
|
+
const std::optional<bool>& preferInt8
|
|
402
|
+
) {
|
|
403
|
+
for (const auto& token : tokens) {
|
|
404
|
+
std::string match = FindOnnxByToken(files, token, preferInt8);
|
|
405
|
+
if (!match.empty()) return match;
|
|
406
|
+
}
|
|
407
|
+
return "";
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
std::string FindLargestOnnx(const std::vector<FileEntry>& files) {
|
|
411
|
+
std::vector<std::string> emptyTokens;
|
|
412
|
+
return ChooseLargest(files, emptyTokens, false, false);
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
std::string FindLargestOnnxExcludingTokens(
|
|
416
|
+
const std::vector<FileEntry>& files,
|
|
417
|
+
const std::vector<std::string>& excludeTokens
|
|
418
|
+
) {
|
|
419
|
+
return ChooseLargest(files, excludeTokens, false, false);
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
} // namespace model_detect
|
|
423
|
+
} // namespace sherpaonnx
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
#ifndef SHERPA_ONNX_MODEL_DETECT_HELPER_H
|
|
2
|
+
#define SHERPA_ONNX_MODEL_DETECT_HELPER_H
|
|
3
|
+
|
|
4
|
+
#include <cstdint>
|
|
5
|
+
#include <optional>
|
|
6
|
+
#include <string>
|
|
7
|
+
#include <vector>
|
|
8
|
+
|
|
9
|
+
namespace sherpaonnx {
|
|
10
|
+
namespace model_detect {
|
|
11
|
+
|
|
12
|
+
struct FileEntry {
|
|
13
|
+
std::string path;
|
|
14
|
+
std::string name;
|
|
15
|
+
std::string nameLower;
|
|
16
|
+
std::uint64_t size = 0;
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
bool FileExists(const std::string& path);
|
|
20
|
+
bool IsDirectory(const std::string& path);
|
|
21
|
+
std::vector<std::string> ListDirectories(const std::string& path);
|
|
22
|
+
std::vector<FileEntry> ListFiles(const std::string& path);
|
|
23
|
+
std::vector<FileEntry> ListFilesRecursive(const std::string& path, int maxDepth = 2);
|
|
24
|
+
std::string ToLower(std::string value);
|
|
25
|
+
std::string ResolveTokenizerDir(const std::string& modelDir);
|
|
26
|
+
|
|
27
|
+
std::string FindFileByName(const std::string& baseDir, const std::string& fileName, int maxDepth = 2);
|
|
28
|
+
std::string FindFileEndingWith(const std::string& baseDir, const std::string& suffix, int maxDepth = 2);
|
|
29
|
+
std::string FindDirectoryByName(const std::string& baseDir, const std::string& dirName, int maxDepth = 2);
|
|
30
|
+
|
|
31
|
+
std::string FindOnnxByToken(
|
|
32
|
+
const std::vector<FileEntry>& files,
|
|
33
|
+
const std::string& token,
|
|
34
|
+
const std::optional<bool>& preferInt8
|
|
35
|
+
);
|
|
36
|
+
|
|
37
|
+
std::string FindOnnxByAnyToken(
|
|
38
|
+
const std::vector<FileEntry>& files,
|
|
39
|
+
const std::vector<std::string>& tokens,
|
|
40
|
+
const std::optional<bool>& preferInt8
|
|
41
|
+
);
|
|
42
|
+
|
|
43
|
+
std::string FindLargestOnnx(
|
|
44
|
+
const std::vector<FileEntry>& files
|
|
45
|
+
);
|
|
46
|
+
|
|
47
|
+
std::string FindLargestOnnxExcludingTokens(
|
|
48
|
+
const std::vector<FileEntry>& files,
|
|
49
|
+
const std::vector<std::string>& excludeTokens
|
|
50
|
+
);
|
|
51
|
+
|
|
52
|
+
} // namespace model_detect
|
|
53
|
+
} // namespace sherpaonnx
|
|
54
|
+
|
|
55
|
+
#endif // SHERPA_ONNX_MODEL_DETECT_HELPER_H
|