react-native-sherpa-onnx 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. package/README.md +232 -236
  2. package/SherpaOnnx.podspec +68 -64
  3. package/android/build.gradle +182 -192
  4. package/android/codegen.gradle +57 -0
  5. package/android/prebuilt-download.gradle +428 -0
  6. package/android/prebuilt-versions.gradle +43 -0
  7. package/android/proguard-rules.pro +10 -0
  8. package/android/src/main/assets/testModels/add_mul_add.onnx +28 -0
  9. package/android/src/main/assets/testModels/nnapi_internal_uint8_support.onnx +0 -0
  10. package/android/src/main/assets/testModels/qnn_multi_ctx_embed.onnx +0 -0
  11. package/android/src/main/cpp/CMakeLists.txt +166 -129
  12. package/android/src/main/cpp/CMakePresets.json +54 -0
  13. package/android/src/main/cpp/crypto/sha256.cpp +174 -0
  14. package/android/src/main/cpp/crypto/sha256.h +16 -0
  15. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.cpp +404 -0
  16. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.h +56 -0
  17. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-jni.cpp +181 -0
  18. package/android/src/main/cpp/jni/audio/sherpa-onnx-audio-convert-jni.cpp +888 -0
  19. package/{ios → android/src/main/cpp/jni/model_detect}/sherpa-onnx-common.h +18 -18
  20. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.cpp +86 -0
  21. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.h +20 -0
  22. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.cpp +423 -0
  23. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.h +55 -0
  24. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-stt.cpp +399 -0
  25. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +238 -0
  26. package/{ios → android/src/main/cpp/jni/model_detect}/sherpa-onnx-model-detect.h +122 -89
  27. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.cpp +99 -0
  28. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.h +16 -0
  29. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.cpp +78 -0
  30. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.h +16 -0
  31. package/android/src/main/cpp/jni/module/sherpa-onnx-module-jni.cpp +190 -0
  32. package/android/src/main/cpp/jni/tts/sherpa-onnx-tts-zipvoice-jni.cpp +301 -0
  33. package/android/src/main/java/com/sherpaonnx/SherpaOnnxArchiveHelper.kt +94 -0
  34. package/android/src/main/java/com/sherpaonnx/{SherpaOnnxCoreHelper.kt → SherpaOnnxAssetHelper.kt} +350 -236
  35. package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +791 -483
  36. package/android/src/main/java/com/sherpaonnx/SherpaOnnxSttHelper.kt +699 -109
  37. package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +1123 -668
  38. package/android/src/main/java/com/sherpaonnx/ZipvoiceTtsWrapper.kt +187 -0
  39. package/ios/SherpaOnnx+Assets.h +11 -0
  40. package/ios/SherpaOnnx+Assets.mm +325 -0
  41. package/ios/SherpaOnnx+STT.mm +455 -118
  42. package/ios/SherpaOnnx+TTS.mm +1101 -712
  43. package/ios/SherpaOnnx.h +17 -6
  44. package/ios/SherpaOnnx.mm +206 -311
  45. package/ios/SherpaOnnx.xcconfig +19 -19
  46. package/ios/SherpaOnnxCoreMLHelper.swift +24 -0
  47. package/ios/archive/sherpa-onnx-archive-helper.h +21 -0
  48. package/ios/archive/sherpa-onnx-archive-helper.mm +296 -0
  49. package/ios/libarchive_darwin_config.h +153 -0
  50. package/{android/src/main/cpp/jni → ios/model_detect}/sherpa-onnx-common.h +18 -18
  51. package/ios/model_detect/sherpa-onnx-model-detect-helper.h +49 -0
  52. package/ios/model_detect/sherpa-onnx-model-detect-helper.mm +210 -0
  53. package/ios/model_detect/sherpa-onnx-model-detect-stt.mm +344 -0
  54. package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +201 -0
  55. package/{android/src/main/cpp/jni → ios/model_detect}/sherpa-onnx-model-detect.h +117 -89
  56. package/ios/scripts/patch-libarchive-includes.sh +61 -0
  57. package/ios/scripts/setup-ios-libarchive.sh +98 -0
  58. package/ios/stt/sherpa-onnx-stt-wrapper.h +129 -0
  59. package/ios/stt/sherpa-onnx-stt-wrapper.mm +523 -0
  60. package/ios/{sherpa-onnx-tts-wrapper.h → tts/sherpa-onnx-tts-wrapper.h} +90 -85
  61. package/ios/{sherpa-onnx-tts-wrapper.mm → tts/sherpa-onnx-tts-wrapper.mm} +376 -345
  62. package/lib/module/NativeSherpaOnnx.js +3 -0
  63. package/lib/module/NativeSherpaOnnx.js.map +1 -1
  64. package/lib/module/audio/index.js +22 -0
  65. package/lib/module/audio/index.js.map +1 -0
  66. package/lib/module/diarization/index.js +1 -1
  67. package/lib/module/diarization/index.js.map +1 -1
  68. package/lib/module/download/ModelDownloadManager.js +918 -0
  69. package/lib/module/download/ModelDownloadManager.js.map +1 -0
  70. package/lib/module/download/extractTarBz2.js +53 -0
  71. package/lib/module/download/extractTarBz2.js.map +1 -0
  72. package/lib/module/download/index.js +6 -0
  73. package/lib/module/download/index.js.map +1 -0
  74. package/lib/module/download/validation.js +178 -0
  75. package/lib/module/download/validation.js.map +1 -0
  76. package/lib/module/enhancement/index.js +1 -1
  77. package/lib/module/enhancement/index.js.map +1 -1
  78. package/lib/module/index.js +41 -3
  79. package/lib/module/index.js.map +1 -1
  80. package/lib/module/separation/index.js +1 -1
  81. package/lib/module/separation/index.js.map +1 -1
  82. package/lib/module/stt/index.js +127 -60
  83. package/lib/module/stt/index.js.map +1 -1
  84. package/lib/module/stt/sttModelLanguages.js +512 -0
  85. package/lib/module/stt/sttModelLanguages.js.map +1 -0
  86. package/lib/module/stt/types.js +53 -1
  87. package/lib/module/stt/types.js.map +1 -1
  88. package/lib/module/tts/index.js +216 -289
  89. package/lib/module/tts/index.js.map +1 -1
  90. package/lib/module/tts/types.js +86 -1
  91. package/lib/module/tts/types.js.map +1 -1
  92. package/lib/module/types.js.map +1 -1
  93. package/lib/module/utils.js +86 -73
  94. package/lib/module/utils.js.map +1 -1
  95. package/lib/module/vad/index.js +1 -1
  96. package/lib/module/vad/index.js.map +1 -1
  97. package/lib/typescript/src/NativeSherpaOnnx.d.ts +192 -38
  98. package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
  99. package/lib/typescript/src/audio/index.d.ts +13 -0
  100. package/lib/typescript/src/audio/index.d.ts.map +1 -0
  101. package/lib/typescript/src/diarization/index.d.ts +3 -2
  102. package/lib/typescript/src/diarization/index.d.ts.map +1 -1
  103. package/lib/typescript/src/download/ModelDownloadManager.d.ts +108 -0
  104. package/lib/typescript/src/download/ModelDownloadManager.d.ts.map +1 -0
  105. package/lib/typescript/src/download/extractTarBz2.d.ts +14 -0
  106. package/lib/typescript/src/download/extractTarBz2.d.ts.map +1 -0
  107. package/lib/typescript/src/download/index.d.ts +7 -0
  108. package/lib/typescript/src/download/index.d.ts.map +1 -0
  109. package/lib/typescript/src/download/validation.d.ts +57 -0
  110. package/lib/typescript/src/download/validation.d.ts.map +1 -0
  111. package/lib/typescript/src/enhancement/index.d.ts +3 -2
  112. package/lib/typescript/src/enhancement/index.d.ts.map +1 -1
  113. package/lib/typescript/src/index.d.ts +26 -2
  114. package/lib/typescript/src/index.d.ts.map +1 -1
  115. package/lib/typescript/src/separation/index.d.ts +3 -2
  116. package/lib/typescript/src/separation/index.d.ts.map +1 -1
  117. package/lib/typescript/src/stt/index.d.ts +31 -43
  118. package/lib/typescript/src/stt/index.d.ts.map +1 -1
  119. package/lib/typescript/src/stt/sttModelLanguages.d.ts +52 -0
  120. package/lib/typescript/src/stt/sttModelLanguages.d.ts.map +1 -0
  121. package/lib/typescript/src/stt/types.d.ts +196 -9
  122. package/lib/typescript/src/stt/types.d.ts.map +1 -1
  123. package/lib/typescript/src/tts/index.d.ts +25 -211
  124. package/lib/typescript/src/tts/index.d.ts.map +1 -1
  125. package/lib/typescript/src/tts/types.d.ts +148 -25
  126. package/lib/typescript/src/tts/types.d.ts.map +1 -1
  127. package/lib/typescript/src/types.d.ts +0 -32
  128. package/lib/typescript/src/types.d.ts.map +1 -1
  129. package/lib/typescript/src/utils.d.ts +28 -13
  130. package/lib/typescript/src/utils.d.ts.map +1 -1
  131. package/lib/typescript/src/vad/index.d.ts +3 -2
  132. package/lib/typescript/src/vad/index.d.ts.map +1 -1
  133. package/package.json +250 -222
  134. package/scripts/check-qnn-support.sh +78 -0
  135. package/scripts/setup-ios-framework.sh +379 -282
  136. package/src/NativeSherpaOnnx.ts +474 -251
  137. package/src/audio/index.ts +32 -0
  138. package/src/diarization/index.ts +4 -2
  139. package/src/download/ModelDownloadManager.ts +1325 -0
  140. package/src/download/extractTarBz2.ts +78 -0
  141. package/src/download/index.ts +43 -0
  142. package/src/download/validation.ts +279 -0
  143. package/src/enhancement/index.ts +4 -2
  144. package/src/index.tsx +78 -27
  145. package/src/separation/index.ts +4 -2
  146. package/src/stt/index.ts +249 -89
  147. package/src/stt/sttModelLanguages.ts +237 -0
  148. package/src/stt/types.ts +263 -9
  149. package/src/tts/index.ts +470 -458
  150. package/src/tts/types.ts +373 -218
  151. package/src/types.ts +0 -44
  152. package/src/utils.ts +145 -131
  153. package/src/vad/index.ts +4 -2
  154. package/third_party/ffmpeg_prebuilt/ANDROID_RELEASE_TAG +1 -0
  155. package/third_party/libarchive_prebuilt/ANDROID_RELEASE_TAG +1 -0
  156. package/third_party/libarchive_prebuilt/IOS_RELEASE_TAG +1 -0
  157. package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -0
  158. package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -0
  159. package/android/src/main/cpp/include/sherpa-onnx/c-api/c-api.h +0 -1918
  160. package/android/src/main/cpp/include/sherpa-onnx/c-api/cxx-api.h +0 -841
  161. package/android/src/main/cpp/jni/sherpa-onnx-model-detect.cpp +0 -541
  162. package/android/src/main/cpp/jni/sherpa-onnx-stt-jni.cpp +0 -336
  163. package/android/src/main/cpp/jni/sherpa-onnx-stt-wrapper.cpp +0 -222
  164. package/android/src/main/cpp/jni/sherpa-onnx-stt-wrapper.h +0 -68
  165. package/android/src/main/cpp/jni/sherpa-onnx-tts-jni.cpp +0 -823
  166. package/android/src/main/cpp/jni/sherpa-onnx-tts-wrapper.cpp +0 -387
  167. package/android/src/main/cpp/jni/sherpa-onnx-tts-wrapper.h +0 -147
  168. package/ios/Frameworks/sherpa_onnx.xcframework.zip +0 -0
  169. package/ios/include/sherpa-onnx/c-api/c-api.h +0 -1918
  170. package/ios/include/sherpa-onnx/c-api/cxx-api.h +0 -841
  171. package/ios/sherpa-onnx-model-detect.mm +0 -441
  172. package/ios/sherpa-onnx-stt-wrapper.h +0 -48
  173. package/ios/sherpa-onnx-stt-wrapper.mm +0 -201
  174. package/scripts/copy-headers.js +0 -184
  175. package/scripts/setup-assets.js +0 -323
@@ -1,18 +1,18 @@
1
- #ifndef SHERPA_ONNX_COMMON_H
2
- #define SHERPA_ONNX_COMMON_H
3
-
4
- #include <string>
5
-
6
- namespace sherpaonnx {
7
-
8
- /**
9
- * Information about a detected model.
10
- */
11
- struct DetectedModel {
12
- std::string type; // Model type (e.g., "transducer", "paraformer", "nemo_ctc")
13
- std::string modelDir; // Directory path where the model is located
14
- };
15
-
16
- } // namespace sherpaonnx
17
-
18
- #endif // SHERPA_ONNX_COMMON_H
1
+ #ifndef SHERPA_ONNX_COMMON_H
2
+ #define SHERPA_ONNX_COMMON_H
3
+
4
+ #include <string>
5
+
6
+ namespace sherpaonnx {
7
+
8
+ /**
9
+ * Information about a detected model.
10
+ */
11
+ struct DetectedModel {
12
+ std::string type; // Model type (e.g., "transducer", "paraformer", "nemo_ctc")
13
+ std::string modelDir; // Directory path where the model is located
14
+ };
15
+
16
+ } // namespace sherpaonnx
17
+
18
+ #endif // SHERPA_ONNX_COMMON_H
@@ -0,0 +1,86 @@
1
+ /**
2
+ * sherpa-onnx-detect-jni-common.cpp
3
+ *
4
+ * Purpose: Shared JNI helpers for building Java HashMap/ArrayList from C++ detect results
5
+ * (PutString, PutBoolean, BuildDetectedModelsList). Used by sherpa-onnx-stt-wrapper and
6
+ * sherpa-onnx-tts-wrapper.
7
+ */
8
+ #include "sherpa-onnx-detect-jni-common.h"
9
+
10
+ namespace sherpaonnx {
11
+
12
+ bool PutString(JNIEnv* env, jobject map, jmethodID putId, const char* key, const std::string& value) {
13
+ jstring jkey = env->NewStringUTF(key);
14
+ if (!jkey) return false;
15
+ jstring jval = value.empty() ? nullptr : env->NewStringUTF(value.c_str());
16
+ if (!value.empty() && !jval) {
17
+ env->DeleteLocalRef(jkey);
18
+ return false;
19
+ }
20
+ env->CallObjectMethod(map, putId, jkey, jval ? static_cast<jobject>(jval) : nullptr);
21
+ env->DeleteLocalRef(jkey);
22
+ if (jval) env->DeleteLocalRef(jval);
23
+ return true;
24
+ }
25
+
26
+ bool PutBoolean(JNIEnv* env, jobject map, jmethodID putId, const char* key, bool value) {
27
+ jclass boolClass = env->FindClass("java/lang/Boolean");
28
+ if (!boolClass) return false;
29
+ jmethodID valueOf = env->GetStaticMethodID(boolClass, "valueOf", "(Z)Ljava/lang/Boolean;");
30
+ if (!valueOf) {
31
+ env->DeleteLocalRef(boolClass);
32
+ return false;
33
+ }
34
+ jobject boxed = env->CallStaticObjectMethod(boolClass, valueOf, value ? JNI_TRUE : JNI_FALSE);
35
+ env->DeleteLocalRef(boolClass);
36
+ if (!boxed) return false;
37
+ jstring jkey = env->NewStringUTF(key);
38
+ if (!jkey) {
39
+ env->DeleteLocalRef(boxed);
40
+ return false;
41
+ }
42
+ env->CallObjectMethod(map, putId, jkey, boxed);
43
+ env->DeleteLocalRef(jkey);
44
+ env->DeleteLocalRef(boxed);
45
+ return true;
46
+ }
47
+
48
+ jobject BuildDetectedModelsList(JNIEnv* env, const std::vector<DetectedModel>& models) {
49
+ jclass listClass = env->FindClass("java/util/ArrayList");
50
+ if (!listClass) return nullptr;
51
+ jmethodID listInit = env->GetMethodID(listClass, "<init>", "()V");
52
+ jmethodID listAdd = env->GetMethodID(listClass, "add", "(Ljava/lang/Object;)Z");
53
+ if (!listInit || !listAdd) {
54
+ env->DeleteLocalRef(listClass);
55
+ return nullptr;
56
+ }
57
+ jobject list = env->NewObject(listClass, listInit);
58
+ env->DeleteLocalRef(listClass);
59
+ if (!list) return nullptr;
60
+
61
+ jclass mapClass = env->FindClass("java/util/HashMap");
62
+ if (!mapClass) {
63
+ env->DeleteLocalRef(list);
64
+ return nullptr;
65
+ }
66
+ jmethodID mapInit = env->GetMethodID(mapClass, "<init>", "()V");
67
+ jmethodID mapPut = env->GetMethodID(mapClass, "put", "(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;");
68
+ if (!mapInit || !mapPut) {
69
+ env->DeleteLocalRef(mapClass);
70
+ env->DeleteLocalRef(list);
71
+ return nullptr;
72
+ }
73
+
74
+ for (const auto& m : models) {
75
+ jobject modelMap = env->NewObject(mapClass, mapInit);
76
+ if (!modelMap) continue;
77
+ PutString(env, modelMap, mapPut, "type", m.type);
78
+ PutString(env, modelMap, mapPut, "modelDir", m.modelDir);
79
+ env->CallBooleanMethod(list, listAdd, modelMap);
80
+ env->DeleteLocalRef(modelMap);
81
+ }
82
+ env->DeleteLocalRef(mapClass);
83
+ return list;
84
+ }
85
+
86
+ } // namespace sherpaonnx
@@ -0,0 +1,20 @@
1
+ #ifndef SHERPA_ONNX_DETECT_JNI_COMMON_H
2
+ #define SHERPA_ONNX_DETECT_JNI_COMMON_H
3
+
4
+ #include <jni.h>
5
+ #include <string>
6
+ #include <vector>
7
+
8
+ #include "sherpa-onnx-common.h"
9
+
10
+ namespace sherpaonnx {
11
+
12
+ // Helpers for building Java HashMap/ArrayList from C++ detect results.
13
+ // Used by sherpa-onnx-stt-wrapper and sherpa-onnx-tts-wrapper.
14
+ bool PutString(JNIEnv* env, jobject map, jmethodID putId, const char* key, const std::string& value);
15
+ bool PutBoolean(JNIEnv* env, jobject map, jmethodID putId, const char* key, bool value);
16
+ jobject BuildDetectedModelsList(JNIEnv* env, const std::vector<DetectedModel>& models);
17
+
18
+ } // namespace sherpaonnx
19
+
20
+ #endif // SHERPA_ONNX_DETECT_JNI_COMMON_H
@@ -0,0 +1,423 @@
1
+ /**
2
+ * sherpa-onnx-model-detect-helper.cpp
3
+ *
4
+ * Purpose: Shared filesystem and string helpers for model detection (file/dir listing, token-based
5
+ * ONNX search, path resolution). Used by sherpa-onnx-model-detect-stt.cpp and -tts.cpp on Android.
6
+ */
7
+ #include "sherpa-onnx-model-detect-helper.h"
8
+
9
+ #include <algorithm>
10
+ #include <cctype>
11
+ #include <fstream>
12
+
13
+ #if __cplusplus >= 201703L && __has_include(<filesystem>)
14
+ #include <filesystem>
15
+ namespace fs = std::filesystem;
16
+ #elif __has_include(<experimental/filesystem>)
17
+ #include <experimental/filesystem>
18
+ namespace fs = std::experimental::filesystem;
19
+ #else
20
+ #include <dirent.h>
21
+ #include <sys/stat.h>
22
+ #endif
23
+
24
+ namespace sherpaonnx {
25
+ namespace model_detect {
26
+
27
+ namespace {
28
+
29
+ bool EndsWith(const std::string& value, const std::string& suffix) {
30
+ if (suffix.size() > value.size()) return false;
31
+ return std::equal(suffix.rbegin(), suffix.rend(), value.rbegin());
32
+ }
33
+
34
+ bool ContainsToken(const std::string& value, const std::string& token) {
35
+ return value.find(token) != std::string::npos;
36
+ }
37
+
38
+ bool IsOnnxFile(const FileEntry& entry) {
39
+ return EndsWith(entry.nameLower, ".onnx");
40
+ }
41
+
42
+ std::string BaseName(const std::string& path) {
43
+ size_t pos = path.find_last_of("/\\");
44
+ if (pos == std::string::npos) return path;
45
+ return path.substr(pos + 1);
46
+ }
47
+
48
+ std::string ChooseLargest(
49
+ const std::vector<FileEntry>& files,
50
+ const std::vector<std::string>& excludeTokens,
51
+ bool onlyInt8,
52
+ bool onlyNonInt8
53
+ ) {
54
+ std::string chosen;
55
+ std::uint64_t bestSize = 0;
56
+
57
+ for (const auto& entry : files) {
58
+ if (!IsOnnxFile(entry)) continue;
59
+
60
+ bool hasExcluded = false;
61
+ for (const auto& token : excludeTokens) {
62
+ if (ContainsToken(entry.nameLower, token)) {
63
+ hasExcluded = true;
64
+ break;
65
+ }
66
+ }
67
+ if (hasExcluded) continue;
68
+
69
+ bool isInt8 = ContainsToken(entry.nameLower, "int8");
70
+ if (onlyInt8 && !isInt8) continue;
71
+ if (onlyNonInt8 && isInt8) continue;
72
+
73
+ if (entry.size >= bestSize) {
74
+ bestSize = entry.size;
75
+ chosen = entry.path;
76
+ }
77
+ }
78
+
79
+ return chosen;
80
+ }
81
+
82
+ } // namespace
83
+
84
+ bool FileExists(const std::string& path) {
85
+ #if __cplusplus >= 201703L && __has_include(<filesystem>)
86
+ return std::filesystem::exists(path);
87
+ #elif __has_include(<experimental/filesystem>)
88
+ return std::experimental::filesystem::exists(path);
89
+ #else
90
+ struct stat buffer;
91
+ return (stat(path.c_str(), &buffer) == 0);
92
+ #endif
93
+ }
94
+
95
+ bool IsDirectory(const std::string& path) {
96
+ #if __cplusplus >= 201703L && __has_include(<filesystem>)
97
+ return std::filesystem::is_directory(path);
98
+ #elif __has_include(<experimental/filesystem>)
99
+ return std::experimental::filesystem::is_directory(path);
100
+ #else
101
+ struct stat buffer;
102
+ if (stat(path.c_str(), &buffer) != 0) return false;
103
+ return S_ISDIR(buffer.st_mode);
104
+ #endif
105
+ }
106
+
107
+ std::vector<std::string> ListDirectories(const std::string& path) {
108
+ std::vector<std::string> results;
109
+ #if __cplusplus >= 201703L && __has_include(<filesystem>)
110
+ try {
111
+ for (const auto& entry : fs::directory_iterator(path)) {
112
+ if (entry.is_directory()) {
113
+ results.push_back(entry.path().string());
114
+ }
115
+ }
116
+ } catch (const std::exception&) {
117
+ }
118
+ #elif __has_include(<experimental/filesystem>)
119
+ try {
120
+ for (const auto& entry : fs::directory_iterator(path)) {
121
+ if (entry.is_directory()) {
122
+ results.push_back(entry.path().string());
123
+ }
124
+ }
125
+ } catch (const std::exception&) {
126
+ }
127
+ #else
128
+ DIR* dir = opendir(path.c_str());
129
+ if (!dir) return results;
130
+ while (auto* entry = readdir(dir)) {
131
+ if (!entry->d_name) continue;
132
+ std::string name = entry->d_name;
133
+ if (name == "." || name == "..") continue;
134
+ std::string full = path + "/" + name;
135
+ struct stat st;
136
+ if (stat(full.c_str(), &st) == 0 && S_ISDIR(st.st_mode)) {
137
+ results.push_back(full);
138
+ }
139
+ }
140
+ closedir(dir);
141
+ #endif
142
+ return results;
143
+ }
144
+
145
+ std::vector<FileEntry> ListFiles(const std::string& path) {
146
+ std::vector<FileEntry> results;
147
+ #if __cplusplus >= 201703L && __has_include(<filesystem>)
148
+ try {
149
+ for (const auto& entry : fs::directory_iterator(path)) {
150
+ if (!entry.is_regular_file()) continue;
151
+ FileEntry file;
152
+ file.path = entry.path().string();
153
+ file.name = entry.path().filename().string();
154
+ file.nameLower = ToLower(file.name);
155
+ file.size = static_cast<std::uint64_t>(entry.file_size());
156
+ results.push_back(file);
157
+ }
158
+ } catch (const std::exception&) {
159
+ }
160
+ #elif __has_include(<experimental/filesystem>)
161
+ try {
162
+ for (const auto& entry : fs::directory_iterator(path)) {
163
+ if (!entry.is_regular_file()) continue;
164
+ FileEntry file;
165
+ file.path = entry.path().string();
166
+ file.name = entry.path().filename().string();
167
+ file.nameLower = ToLower(file.name);
168
+ file.size = static_cast<std::uint64_t>(entry.file_size());
169
+ results.push_back(file);
170
+ }
171
+ } catch (const std::exception&) {
172
+ }
173
+ #else
174
+ DIR* dir = opendir(path.c_str());
175
+ if (!dir) return results;
176
+ while (auto* entry = readdir(dir)) {
177
+ if (!entry->d_name) continue;
178
+ std::string name = entry->d_name;
179
+ if (name == "." || name == "..") continue;
180
+ std::string full = path + "/" + name;
181
+ struct stat st;
182
+ if (stat(full.c_str(), &st) == 0 && S_ISREG(st.st_mode)) {
183
+ FileEntry file;
184
+ file.path = full;
185
+ file.name = name;
186
+ file.nameLower = ToLower(name);
187
+ file.size = static_cast<std::uint64_t>(st.st_size);
188
+ results.push_back(file);
189
+ }
190
+ }
191
+ closedir(dir);
192
+ #endif
193
+ return results;
194
+ }
195
+
196
+ std::vector<FileEntry> ListFilesRecursive(const std::string& path, int maxDepth) {
197
+ std::vector<FileEntry> results = ListFiles(path);
198
+ if (maxDepth <= 0) return results;
199
+
200
+ for (const auto& dir : ListDirectories(path)) {
201
+ auto nested = ListFilesRecursive(dir, maxDepth - 1);
202
+ results.insert(results.end(), nested.begin(), nested.end());
203
+ }
204
+
205
+ return results;
206
+ }
207
+
208
+ std::string ToLower(std::string value) {
209
+ std::transform(value.begin(), value.end(), value.begin(), [](unsigned char c) {
210
+ return static_cast<char>(std::tolower(c));
211
+ });
212
+ return value;
213
+ }
214
+
215
+ std::string FindFileByName(const std::string& baseDir, const std::string& fileName, int maxDepth) {
216
+ std::string target = ToLower(fileName);
217
+ auto files = ListFilesRecursive(baseDir, maxDepth);
218
+ for (const auto& entry : files) {
219
+ if (entry.nameLower == target) {
220
+ return entry.path;
221
+ }
222
+ }
223
+ return "";
224
+ }
225
+
226
+ std::string FindFileEndingWith(const std::string& baseDir, const std::string& suffix, int maxDepth) {
227
+ std::string targetSuffix = ToLower(suffix);
228
+ auto files = ListFilesRecursive(baseDir, maxDepth);
229
+ // 1) exact match (e.g. "tokens.txt")
230
+ for (const auto& entry : files) {
231
+ if (entry.nameLower == targetSuffix) {
232
+ return entry.path;
233
+ }
234
+ }
235
+
236
+ // 2) true suffix match (preferred over substring to avoid false positives
237
+ // like "tokens.txt.bak" or "mytokens.txt.tmp").
238
+ for (const auto& entry : files) {
239
+ if (EndsWith(entry.nameLower, targetSuffix)) {
240
+ return entry.path;
241
+ }
242
+ }
243
+
244
+ // 3) If we are looking for tokens, fallback to inspecting .txt files' contents.
245
+ // Heuristic: many token files are plain text with lines like "token <index>".
246
+ if (targetSuffix.find("tokens") != std::string::npos) {
247
+ auto IsLikelyTokensFile = [](const std::string& path) -> bool {
248
+ std::ifstream ifs(path);
249
+ if (!ifs.is_open()) return false;
250
+ std::string line;
251
+ int total = 0;
252
+ int matched = 0;
253
+ const int maxLines = 2000;
254
+
255
+ while (total < maxLines && std::getline(ifs, line)) {
256
+ ++total;
257
+ if (line.empty()) continue;
258
+ // Trim trailing CR if present
259
+ if (!line.empty() && line.back() == '\r') line.pop_back();
260
+
261
+ // Check if the line ends with an integer index (common token format)
262
+ size_t sp = line.find_last_of(" \t");
263
+ if (sp != std::string::npos && sp + 1 < line.size()) {
264
+ std::string idx = line.substr(sp + 1);
265
+ bool allDigits = !idx.empty();
266
+ for (char c : idx) {
267
+ if (!std::isdigit(static_cast<unsigned char>(c))) { allDigits = false; break; }
268
+ }
269
+ if (allDigits) ++matched;
270
+ }
271
+ }
272
+
273
+ ifs.close();
274
+ if (total < 2) return false;
275
+ // Heuristic: at least half of non-empty lines should match the token pattern
276
+ return matched >= std::max(1, total / 2);
277
+ };
278
+
279
+ for (const auto& entry : files) {
280
+ if (EndsWith(entry.nameLower, ".txt")) {
281
+ if (IsLikelyTokensFile(entry.path)) {
282
+ return entry.path;
283
+ }
284
+ }
285
+ }
286
+ }
287
+ return "";
288
+ }
289
+
290
+ std::string FindDirectoryByName(const std::string& baseDir, const std::string& dirName, int maxDepth) {
291
+ std::string target = ToLower(dirName);
292
+ std::vector<std::string> toVisit = ListDirectories(baseDir);
293
+ int depth = 0;
294
+
295
+ while (!toVisit.empty() && depth <= maxDepth) {
296
+ std::vector<std::string> next;
297
+ for (const auto& dir : toVisit) {
298
+ std::string name = dir;
299
+ #if __cplusplus >= 201703L && __has_include(<filesystem>)
300
+ try {
301
+ name = fs::path(dir).filename().string();
302
+ } catch (const std::exception&) {
303
+ }
304
+ #elif __has_include(<experimental/filesystem>)
305
+ try {
306
+ name = fs::path(dir).filename().string();
307
+ } catch (const std::exception&) {
308
+ }
309
+ #else
310
+ name = BaseName(dir);
311
+ #endif
312
+ if (ToLower(name) == target) {
313
+ return dir;
314
+ }
315
+ if (depth < maxDepth) {
316
+ auto nested = ListDirectories(dir);
317
+ next.insert(next.end(), nested.begin(), nested.end());
318
+ }
319
+ }
320
+ toVisit.swap(next);
321
+ depth += 1;
322
+ }
323
+
324
+ return "";
325
+ }
326
+
327
+ std::string ResolveTokenizerDir(const std::string& modelDir) {
328
+ std::string vocabInMain = modelDir + "/vocab.json";
329
+ if (FileExists(vocabInMain)) {
330
+ return modelDir;
331
+ }
332
+
333
+ std::vector<std::string> toVisit = ListDirectories(modelDir);
334
+ int depth = 0;
335
+ while (!toVisit.empty() && depth <= 2) {
336
+ std::vector<std::string> next;
337
+ for (const auto& dir : toVisit) {
338
+ std::string dirName = dir;
339
+ #if __cplusplus >= 201703L && __has_include(<filesystem>)
340
+ try {
341
+ dirName = fs::path(dir).filename().string();
342
+ } catch (const std::exception&) {
343
+ }
344
+ #elif __has_include(<experimental/filesystem>)
345
+ try {
346
+ dirName = fs::path(dir).filename().string();
347
+ } catch (const std::exception&) {
348
+ }
349
+ #else
350
+ dirName = BaseName(dir);
351
+ #endif
352
+ std::string dirNameLower = ToLower(dirName);
353
+ if (dirNameLower.find("qwen3") != std::string::npos) {
354
+ std::string vocabPath = dir + "/vocab.json";
355
+ if (FileExists(vocabPath)) {
356
+ return dir;
357
+ }
358
+ }
359
+
360
+ if (depth < 2) {
361
+ auto nested = ListDirectories(dir);
362
+ next.insert(next.end(), nested.begin(), nested.end());
363
+ }
364
+ }
365
+ toVisit.swap(next);
366
+ depth += 1;
367
+ }
368
+
369
+ return "";
370
+ }
371
+
372
+ std::string FindOnnxByToken(
373
+ const std::vector<FileEntry>& files,
374
+ const std::string& token,
375
+ const std::optional<bool>& preferInt8
376
+ ) {
377
+ std::vector<FileEntry> matches;
378
+ std::string tokenLower = ToLower(token);
379
+ for (const auto& entry : files) {
380
+ if (!IsOnnxFile(entry)) continue;
381
+ if (ContainsToken(entry.nameLower, tokenLower)) {
382
+ matches.push_back(entry);
383
+ }
384
+ }
385
+
386
+ if (matches.empty()) return "";
387
+
388
+ std::vector<std::string> emptyTokens;
389
+ bool wantInt8 = preferInt8.has_value() && preferInt8.value();
390
+ bool wantNonInt8 = preferInt8.has_value() && !preferInt8.value();
391
+
392
+ std::string preferred = ChooseLargest(matches, emptyTokens, wantInt8, wantNonInt8);
393
+ if (!preferred.empty()) return preferred;
394
+
395
+ return ChooseLargest(matches, emptyTokens, false, false);
396
+ }
397
+
398
+ std::string FindOnnxByAnyToken(
399
+ const std::vector<FileEntry>& files,
400
+ const std::vector<std::string>& tokens,
401
+ const std::optional<bool>& preferInt8
402
+ ) {
403
+ for (const auto& token : tokens) {
404
+ std::string match = FindOnnxByToken(files, token, preferInt8);
405
+ if (!match.empty()) return match;
406
+ }
407
+ return "";
408
+ }
409
+
410
+ std::string FindLargestOnnx(const std::vector<FileEntry>& files) {
411
+ std::vector<std::string> emptyTokens;
412
+ return ChooseLargest(files, emptyTokens, false, false);
413
+ }
414
+
415
+ std::string FindLargestOnnxExcludingTokens(
416
+ const std::vector<FileEntry>& files,
417
+ const std::vector<std::string>& excludeTokens
418
+ ) {
419
+ return ChooseLargest(files, excludeTokens, false, false);
420
+ }
421
+
422
+ } // namespace model_detect
423
+ } // namespace sherpaonnx
@@ -0,0 +1,55 @@
1
+ #ifndef SHERPA_ONNX_MODEL_DETECT_HELPER_H
2
+ #define SHERPA_ONNX_MODEL_DETECT_HELPER_H
3
+
4
+ #include <cstdint>
5
+ #include <optional>
6
+ #include <string>
7
+ #include <vector>
8
+
9
+ namespace sherpaonnx {
10
+ namespace model_detect {
11
+
12
+ struct FileEntry {
13
+ std::string path;
14
+ std::string name;
15
+ std::string nameLower;
16
+ std::uint64_t size = 0;
17
+ };
18
+
19
+ bool FileExists(const std::string& path);
20
+ bool IsDirectory(const std::string& path);
21
+ std::vector<std::string> ListDirectories(const std::string& path);
22
+ std::vector<FileEntry> ListFiles(const std::string& path);
23
+ std::vector<FileEntry> ListFilesRecursive(const std::string& path, int maxDepth = 2);
24
+ std::string ToLower(std::string value);
25
+ std::string ResolveTokenizerDir(const std::string& modelDir);
26
+
27
+ std::string FindFileByName(const std::string& baseDir, const std::string& fileName, int maxDepth = 2);
28
+ std::string FindFileEndingWith(const std::string& baseDir, const std::string& suffix, int maxDepth = 2);
29
+ std::string FindDirectoryByName(const std::string& baseDir, const std::string& dirName, int maxDepth = 2);
30
+
31
+ std::string FindOnnxByToken(
32
+ const std::vector<FileEntry>& files,
33
+ const std::string& token,
34
+ const std::optional<bool>& preferInt8
35
+ );
36
+
37
+ std::string FindOnnxByAnyToken(
38
+ const std::vector<FileEntry>& files,
39
+ const std::vector<std::string>& tokens,
40
+ const std::optional<bool>& preferInt8
41
+ );
42
+
43
+ std::string FindLargestOnnx(
44
+ const std::vector<FileEntry>& files
45
+ );
46
+
47
+ std::string FindLargestOnnxExcludingTokens(
48
+ const std::vector<FileEntry>& files,
49
+ const std::vector<std::string>& excludeTokens
50
+ );
51
+
52
+ } // namespace model_detect
53
+ } // namespace sherpaonnx
54
+
55
+ #endif // SHERPA_ONNX_MODEL_DETECT_HELPER_H