react-native-sherpa-onnx 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. package/README.md +232 -236
  2. package/SherpaOnnx.podspec +68 -64
  3. package/android/build.gradle +182 -192
  4. package/android/codegen.gradle +57 -0
  5. package/android/prebuilt-download.gradle +428 -0
  6. package/android/prebuilt-versions.gradle +43 -0
  7. package/android/proguard-rules.pro +10 -0
  8. package/android/src/main/assets/testModels/add_mul_add.onnx +28 -0
  9. package/android/src/main/assets/testModels/nnapi_internal_uint8_support.onnx +0 -0
  10. package/android/src/main/assets/testModels/qnn_multi_ctx_embed.onnx +0 -0
  11. package/android/src/main/cpp/CMakeLists.txt +166 -129
  12. package/android/src/main/cpp/CMakePresets.json +54 -0
  13. package/android/src/main/cpp/crypto/sha256.cpp +174 -0
  14. package/android/src/main/cpp/crypto/sha256.h +16 -0
  15. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.cpp +404 -0
  16. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.h +56 -0
  17. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-jni.cpp +181 -0
  18. package/android/src/main/cpp/jni/audio/sherpa-onnx-audio-convert-jni.cpp +888 -0
  19. package/{ios → android/src/main/cpp/jni/model_detect}/sherpa-onnx-common.h +18 -18
  20. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.cpp +86 -0
  21. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.h +20 -0
  22. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.cpp +423 -0
  23. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.h +55 -0
  24. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-stt.cpp +399 -0
  25. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +238 -0
  26. package/{ios → android/src/main/cpp/jni/model_detect}/sherpa-onnx-model-detect.h +122 -89
  27. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.cpp +99 -0
  28. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.h +16 -0
  29. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.cpp +78 -0
  30. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.h +16 -0
  31. package/android/src/main/cpp/jni/module/sherpa-onnx-module-jni.cpp +190 -0
  32. package/android/src/main/cpp/jni/tts/sherpa-onnx-tts-zipvoice-jni.cpp +301 -0
  33. package/android/src/main/java/com/sherpaonnx/SherpaOnnxArchiveHelper.kt +94 -0
  34. package/android/src/main/java/com/sherpaonnx/{SherpaOnnxCoreHelper.kt → SherpaOnnxAssetHelper.kt} +350 -236
  35. package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +791 -483
  36. package/android/src/main/java/com/sherpaonnx/SherpaOnnxSttHelper.kt +699 -109
  37. package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +1123 -668
  38. package/android/src/main/java/com/sherpaonnx/ZipvoiceTtsWrapper.kt +187 -0
  39. package/ios/SherpaOnnx+Assets.h +11 -0
  40. package/ios/SherpaOnnx+Assets.mm +325 -0
  41. package/ios/SherpaOnnx+STT.mm +455 -118
  42. package/ios/SherpaOnnx+TTS.mm +1101 -712
  43. package/ios/SherpaOnnx.h +17 -6
  44. package/ios/SherpaOnnx.mm +206 -311
  45. package/ios/SherpaOnnx.xcconfig +19 -19
  46. package/ios/SherpaOnnxCoreMLHelper.swift +24 -0
  47. package/ios/archive/sherpa-onnx-archive-helper.h +21 -0
  48. package/ios/archive/sherpa-onnx-archive-helper.mm +296 -0
  49. package/ios/libarchive_darwin_config.h +153 -0
  50. package/{android/src/main/cpp/jni → ios/model_detect}/sherpa-onnx-common.h +18 -18
  51. package/ios/model_detect/sherpa-onnx-model-detect-helper.h +49 -0
  52. package/ios/model_detect/sherpa-onnx-model-detect-helper.mm +210 -0
  53. package/ios/model_detect/sherpa-onnx-model-detect-stt.mm +344 -0
  54. package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +201 -0
  55. package/{android/src/main/cpp/jni → ios/model_detect}/sherpa-onnx-model-detect.h +117 -89
  56. package/ios/scripts/patch-libarchive-includes.sh +61 -0
  57. package/ios/scripts/setup-ios-libarchive.sh +98 -0
  58. package/ios/stt/sherpa-onnx-stt-wrapper.h +129 -0
  59. package/ios/stt/sherpa-onnx-stt-wrapper.mm +523 -0
  60. package/ios/{sherpa-onnx-tts-wrapper.h → tts/sherpa-onnx-tts-wrapper.h} +90 -85
  61. package/ios/{sherpa-onnx-tts-wrapper.mm → tts/sherpa-onnx-tts-wrapper.mm} +376 -345
  62. package/lib/module/NativeSherpaOnnx.js +3 -0
  63. package/lib/module/NativeSherpaOnnx.js.map +1 -1
  64. package/lib/module/audio/index.js +22 -0
  65. package/lib/module/audio/index.js.map +1 -0
  66. package/lib/module/diarization/index.js +1 -1
  67. package/lib/module/diarization/index.js.map +1 -1
  68. package/lib/module/download/ModelDownloadManager.js +918 -0
  69. package/lib/module/download/ModelDownloadManager.js.map +1 -0
  70. package/lib/module/download/extractTarBz2.js +53 -0
  71. package/lib/module/download/extractTarBz2.js.map +1 -0
  72. package/lib/module/download/index.js +6 -0
  73. package/lib/module/download/index.js.map +1 -0
  74. package/lib/module/download/validation.js +178 -0
  75. package/lib/module/download/validation.js.map +1 -0
  76. package/lib/module/enhancement/index.js +1 -1
  77. package/lib/module/enhancement/index.js.map +1 -1
  78. package/lib/module/index.js +41 -3
  79. package/lib/module/index.js.map +1 -1
  80. package/lib/module/separation/index.js +1 -1
  81. package/lib/module/separation/index.js.map +1 -1
  82. package/lib/module/stt/index.js +127 -60
  83. package/lib/module/stt/index.js.map +1 -1
  84. package/lib/module/stt/sttModelLanguages.js +512 -0
  85. package/lib/module/stt/sttModelLanguages.js.map +1 -0
  86. package/lib/module/stt/types.js +53 -1
  87. package/lib/module/stt/types.js.map +1 -1
  88. package/lib/module/tts/index.js +216 -289
  89. package/lib/module/tts/index.js.map +1 -1
  90. package/lib/module/tts/types.js +86 -1
  91. package/lib/module/tts/types.js.map +1 -1
  92. package/lib/module/types.js.map +1 -1
  93. package/lib/module/utils.js +86 -73
  94. package/lib/module/utils.js.map +1 -1
  95. package/lib/module/vad/index.js +1 -1
  96. package/lib/module/vad/index.js.map +1 -1
  97. package/lib/typescript/src/NativeSherpaOnnx.d.ts +192 -38
  98. package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
  99. package/lib/typescript/src/audio/index.d.ts +13 -0
  100. package/lib/typescript/src/audio/index.d.ts.map +1 -0
  101. package/lib/typescript/src/diarization/index.d.ts +3 -2
  102. package/lib/typescript/src/diarization/index.d.ts.map +1 -1
  103. package/lib/typescript/src/download/ModelDownloadManager.d.ts +108 -0
  104. package/lib/typescript/src/download/ModelDownloadManager.d.ts.map +1 -0
  105. package/lib/typescript/src/download/extractTarBz2.d.ts +14 -0
  106. package/lib/typescript/src/download/extractTarBz2.d.ts.map +1 -0
  107. package/lib/typescript/src/download/index.d.ts +7 -0
  108. package/lib/typescript/src/download/index.d.ts.map +1 -0
  109. package/lib/typescript/src/download/validation.d.ts +57 -0
  110. package/lib/typescript/src/download/validation.d.ts.map +1 -0
  111. package/lib/typescript/src/enhancement/index.d.ts +3 -2
  112. package/lib/typescript/src/enhancement/index.d.ts.map +1 -1
  113. package/lib/typescript/src/index.d.ts +26 -2
  114. package/lib/typescript/src/index.d.ts.map +1 -1
  115. package/lib/typescript/src/separation/index.d.ts +3 -2
  116. package/lib/typescript/src/separation/index.d.ts.map +1 -1
  117. package/lib/typescript/src/stt/index.d.ts +31 -43
  118. package/lib/typescript/src/stt/index.d.ts.map +1 -1
  119. package/lib/typescript/src/stt/sttModelLanguages.d.ts +52 -0
  120. package/lib/typescript/src/stt/sttModelLanguages.d.ts.map +1 -0
  121. package/lib/typescript/src/stt/types.d.ts +196 -9
  122. package/lib/typescript/src/stt/types.d.ts.map +1 -1
  123. package/lib/typescript/src/tts/index.d.ts +25 -211
  124. package/lib/typescript/src/tts/index.d.ts.map +1 -1
  125. package/lib/typescript/src/tts/types.d.ts +148 -25
  126. package/lib/typescript/src/tts/types.d.ts.map +1 -1
  127. package/lib/typescript/src/types.d.ts +0 -32
  128. package/lib/typescript/src/types.d.ts.map +1 -1
  129. package/lib/typescript/src/utils.d.ts +28 -13
  130. package/lib/typescript/src/utils.d.ts.map +1 -1
  131. package/lib/typescript/src/vad/index.d.ts +3 -2
  132. package/lib/typescript/src/vad/index.d.ts.map +1 -1
  133. package/package.json +250 -222
  134. package/scripts/check-qnn-support.sh +78 -0
  135. package/scripts/setup-ios-framework.sh +379 -282
  136. package/src/NativeSherpaOnnx.ts +474 -251
  137. package/src/audio/index.ts +32 -0
  138. package/src/diarization/index.ts +4 -2
  139. package/src/download/ModelDownloadManager.ts +1325 -0
  140. package/src/download/extractTarBz2.ts +78 -0
  141. package/src/download/index.ts +43 -0
  142. package/src/download/validation.ts +279 -0
  143. package/src/enhancement/index.ts +4 -2
  144. package/src/index.tsx +78 -27
  145. package/src/separation/index.ts +4 -2
  146. package/src/stt/index.ts +249 -89
  147. package/src/stt/sttModelLanguages.ts +237 -0
  148. package/src/stt/types.ts +263 -9
  149. package/src/tts/index.ts +470 -458
  150. package/src/tts/types.ts +373 -218
  151. package/src/types.ts +0 -44
  152. package/src/utils.ts +145 -131
  153. package/src/vad/index.ts +4 -2
  154. package/third_party/ffmpeg_prebuilt/ANDROID_RELEASE_TAG +1 -0
  155. package/third_party/libarchive_prebuilt/ANDROID_RELEASE_TAG +1 -0
  156. package/third_party/libarchive_prebuilt/IOS_RELEASE_TAG +1 -0
  157. package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -0
  158. package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -0
  159. package/android/src/main/cpp/include/sherpa-onnx/c-api/c-api.h +0 -1918
  160. package/android/src/main/cpp/include/sherpa-onnx/c-api/cxx-api.h +0 -841
  161. package/android/src/main/cpp/jni/sherpa-onnx-model-detect.cpp +0 -541
  162. package/android/src/main/cpp/jni/sherpa-onnx-stt-jni.cpp +0 -336
  163. package/android/src/main/cpp/jni/sherpa-onnx-stt-wrapper.cpp +0 -222
  164. package/android/src/main/cpp/jni/sherpa-onnx-stt-wrapper.h +0 -68
  165. package/android/src/main/cpp/jni/sherpa-onnx-tts-jni.cpp +0 -823
  166. package/android/src/main/cpp/jni/sherpa-onnx-tts-wrapper.cpp +0 -387
  167. package/android/src/main/cpp/jni/sherpa-onnx-tts-wrapper.h +0 -147
  168. package/ios/Frameworks/sherpa_onnx.xcframework.zip +0 -0
  169. package/ios/include/sherpa-onnx/c-api/c-api.h +0 -1918
  170. package/ios/include/sherpa-onnx/c-api/cxx-api.h +0 -841
  171. package/ios/sherpa-onnx-model-detect.mm +0 -441
  172. package/ios/sherpa-onnx-stt-wrapper.h +0 -48
  173. package/ios/sherpa-onnx-stt-wrapper.mm +0 -201
  174. package/scripts/copy-headers.js +0 -184
  175. package/scripts/setup-assets.js +0 -323
@@ -1,441 +0,0 @@
1
- #include "sherpa-onnx-model-detect.h"
2
-
3
- #include <algorithm>
4
- #include <cctype>
5
- #include <filesystem>
6
- #include <string>
7
- #include <vector>
8
-
9
- namespace fs = std::filesystem;
10
-
11
- namespace sherpaonnx {
12
- namespace {
13
-
14
- bool FileExists(const std::string& path) {
15
- return fs::exists(path);
16
- }
17
-
18
- bool IsDirectory(const std::string& path) {
19
- return fs::is_directory(path);
20
- }
21
-
22
- std::string ToLower(std::string value) {
23
- std::transform(value.begin(), value.end(), value.begin(), [](unsigned char c) {
24
- return static_cast<char>(std::tolower(c));
25
- });
26
- return value;
27
- }
28
-
29
- std::string ResolveTokenizerDir(const std::string& modelDir) {
30
- std::string vocabInMain = modelDir + "/vocab.json";
31
- if (FileExists(vocabInMain)) {
32
- return modelDir;
33
- }
34
-
35
- try {
36
- for (const auto& entry : fs::directory_iterator(modelDir)) {
37
- if (entry.is_directory()) {
38
- std::string dirName = entry.path().filename().string();
39
- std::string dirNameLower = ToLower(dirName);
40
- if (dirNameLower.find("qwen3") != std::string::npos) {
41
- std::string vocabPath = entry.path().string() + "/vocab.json";
42
- if (FileExists(vocabPath)) {
43
- return entry.path().string();
44
- }
45
- }
46
- }
47
- }
48
- } catch (const std::exception&) {
49
- }
50
-
51
- std::string commonPath = modelDir + "/Qwen3-0.6B";
52
- if (FileExists(commonPath + "/vocab.json")) {
53
- return commonPath;
54
- }
55
-
56
- return "";
57
- }
58
-
59
- SttModelKind ParseSttModelType(const std::string& modelType) {
60
- if (modelType == "transducer") return SttModelKind::kTransducer;
61
- if (modelType == "paraformer") return SttModelKind::kParaformer;
62
- if (modelType == "nemo_ctc") return SttModelKind::kNemoCtc;
63
- if (modelType == "wenet_ctc") return SttModelKind::kWenetCtc;
64
- if (modelType == "sense_voice") return SttModelKind::kSenseVoice;
65
- if (modelType == "zipformer_ctc" || modelType == "ctc") return SttModelKind::kZipformerCtc;
66
- if (modelType == "whisper") return SttModelKind::kWhisper;
67
- if (modelType == "funasr_nano") return SttModelKind::kFunAsrNano;
68
- return SttModelKind::kUnknown;
69
- }
70
-
71
- TtsModelKind ParseTtsModelType(const std::string& modelType) {
72
- if (modelType == "vits") return TtsModelKind::kVits;
73
- if (modelType == "matcha") return TtsModelKind::kMatcha;
74
- if (modelType == "kokoro") return TtsModelKind::kKokoro;
75
- if (modelType == "kitten") return TtsModelKind::kKitten;
76
- if (modelType == "zipvoice") return TtsModelKind::kZipvoice;
77
- return TtsModelKind::kUnknown;
78
- }
79
-
80
- } // namespace
81
-
82
- SttDetectResult DetectSttModel(
83
- const std::string& modelDir,
84
- const std::optional<bool>& preferInt8,
85
- const std::optional<std::string>& modelType
86
- ) {
87
- SttDetectResult result;
88
-
89
- if (modelDir.empty()) {
90
- result.error = "Model directory is empty";
91
- return result;
92
- }
93
-
94
- if (!FileExists(modelDir) || !IsDirectory(modelDir)) {
95
- result.error = "Model directory does not exist or is not a directory: " + modelDir;
96
- return result;
97
- }
98
-
99
- std::string encoderPath = modelDir + "/encoder.onnx";
100
- std::string decoderPath = modelDir + "/decoder.onnx";
101
- std::string joinerPath = modelDir + "/joiner.onnx";
102
- std::string encoderPathInt8 = modelDir + "/encoder.int8.onnx";
103
- std::string decoderPathInt8 = modelDir + "/decoder.int8.onnx";
104
- std::string paraformerPathInt8 = modelDir + "/model.int8.onnx";
105
- std::string paraformerPath = modelDir + "/model.onnx";
106
- std::string ctcPathInt8 = modelDir + "/model.int8.onnx";
107
- std::string ctcPath = modelDir + "/model.onnx";
108
- std::string tokensPath = modelDir + "/tokens.txt";
109
-
110
- std::string funasrEncoderAdaptor = modelDir + "/encoder_adaptor.onnx";
111
- std::string funasrEncoderAdaptorInt8 = modelDir + "/encoder_adaptor.int8.onnx";
112
- std::string funasrLLM = modelDir + "/llm.onnx";
113
- std::string funasrLLMInt8 = modelDir + "/llm.int8.onnx";
114
- std::string funasrEmbedding = modelDir + "/embedding.onnx";
115
- std::string funasrEmbeddingInt8 = modelDir + "/embedding.int8.onnx";
116
-
117
- std::string funasrTokenizerDir = ResolveTokenizerDir(modelDir);
118
-
119
- std::string paraformerModelPath;
120
- if (preferInt8.has_value()) {
121
- if (preferInt8.value()) {
122
- if (FileExists(paraformerPathInt8)) {
123
- paraformerModelPath = paraformerPathInt8;
124
- } else if (FileExists(paraformerPath)) {
125
- paraformerModelPath = paraformerPath;
126
- }
127
- } else {
128
- if (FileExists(paraformerPath)) {
129
- paraformerModelPath = paraformerPath;
130
- } else if (FileExists(paraformerPathInt8)) {
131
- paraformerModelPath = paraformerPathInt8;
132
- }
133
- }
134
- } else {
135
- if (FileExists(paraformerPathInt8)) {
136
- paraformerModelPath = paraformerPathInt8;
137
- } else if (FileExists(paraformerPath)) {
138
- paraformerModelPath = paraformerPath;
139
- }
140
- }
141
-
142
- std::string ctcModelPath;
143
- if (preferInt8.has_value()) {
144
- if (preferInt8.value()) {
145
- if (FileExists(ctcPathInt8)) {
146
- ctcModelPath = ctcPathInt8;
147
- } else if (FileExists(ctcPath)) {
148
- ctcModelPath = ctcPath;
149
- }
150
- } else {
151
- if (FileExists(ctcPath)) {
152
- ctcModelPath = ctcPath;
153
- } else if (FileExists(ctcPathInt8)) {
154
- ctcModelPath = ctcPathInt8;
155
- }
156
- }
157
- } else {
158
- if (FileExists(ctcPathInt8)) {
159
- ctcModelPath = ctcPathInt8;
160
- } else if (FileExists(ctcPath)) {
161
- ctcModelPath = ctcPath;
162
- }
163
- }
164
-
165
- bool hasTransducer = FileExists(encoderPath) &&
166
- FileExists(decoderPath) &&
167
- FileExists(joinerPath);
168
-
169
- bool hasWhisperEncoder = FileExists(encoderPath) || FileExists(encoderPathInt8);
170
- bool hasWhisperDecoder = FileExists(decoderPath) || FileExists(decoderPathInt8);
171
- bool hasWhisper = hasWhisperEncoder && hasWhisperDecoder && !FileExists(joinerPath);
172
-
173
- bool hasFunAsrEncoderAdaptor = FileExists(funasrEncoderAdaptor) || FileExists(funasrEncoderAdaptorInt8);
174
- bool hasFunAsrLLM = FileExists(funasrLLM) || FileExists(funasrLLMInt8);
175
- bool hasFunAsrEmbedding = FileExists(funasrEmbedding) || FileExists(funasrEmbeddingInt8);
176
- bool hasFunAsrTokenizer = !funasrTokenizerDir.empty() && FileExists(funasrTokenizerDir + "/vocab.json");
177
- bool hasFunAsrNano = hasFunAsrEncoderAdaptor && hasFunAsrLLM && hasFunAsrEmbedding && hasFunAsrTokenizer;
178
-
179
- bool isLikelyNemoCtc = modelDir.find("nemo") != std::string::npos ||
180
- modelDir.find("parakeet") != std::string::npos;
181
- bool isLikelyWenetCtc = modelDir.find("wenet") != std::string::npos;
182
- bool isLikelySenseVoice = modelDir.find("sense") != std::string::npos ||
183
- modelDir.find("sensevoice") != std::string::npos;
184
- bool isLikelyFunAsrNano = modelDir.find("funasr") != std::string::npos ||
185
- modelDir.find("funasr-nano") != std::string::npos;
186
-
187
- if (hasTransducer) {
188
- result.detectedModels.push_back({"transducer", modelDir});
189
- }
190
-
191
- if (!ctcModelPath.empty() && (isLikelyNemoCtc || isLikelyWenetCtc || isLikelySenseVoice)) {
192
- if (isLikelyNemoCtc) {
193
- result.detectedModels.push_back({"nemo_ctc", modelDir});
194
- } else if (isLikelyWenetCtc) {
195
- result.detectedModels.push_back({"wenet_ctc", modelDir});
196
- } else if (isLikelySenseVoice) {
197
- result.detectedModels.push_back({"sense_voice", modelDir});
198
- } else {
199
- result.detectedModels.push_back({"ctc", modelDir});
200
- }
201
- } else if (!paraformerModelPath.empty()) {
202
- result.detectedModels.push_back({"paraformer", modelDir});
203
- }
204
-
205
- if (hasWhisper) {
206
- result.detectedModels.push_back({"whisper", modelDir});
207
- }
208
-
209
- if (hasFunAsrNano) {
210
- result.detectedModels.push_back({"funasr_nano", modelDir});
211
- }
212
-
213
- SttModelKind selected = SttModelKind::kUnknown;
214
-
215
- if (modelType.has_value() && modelType.value() != "auto") {
216
- selected = ParseSttModelType(modelType.value());
217
- if (selected == SttModelKind::kUnknown) {
218
- result.error = "Unknown model type: " + modelType.value();
219
- return result;
220
- }
221
-
222
- if (selected == SttModelKind::kTransducer && !hasTransducer) {
223
- result.error = "Transducer model requested but files not found in " + modelDir;
224
- return result;
225
- }
226
- if (selected == SttModelKind::kParaformer && paraformerModelPath.empty()) {
227
- result.error = "Paraformer model requested but model.onnx not found in " + modelDir;
228
- return result;
229
- }
230
- if ((selected == SttModelKind::kNemoCtc || selected == SttModelKind::kWenetCtc ||
231
- selected == SttModelKind::kSenseVoice || selected == SttModelKind::kZipformerCtc) &&
232
- ctcModelPath.empty()) {
233
- result.error = "CTC model requested but model.onnx not found in " + modelDir;
234
- return result;
235
- }
236
- if (selected == SttModelKind::kWhisper && !hasWhisper) {
237
- result.error = "Whisper model requested but encoder/decoder not found in " + modelDir;
238
- return result;
239
- }
240
- if (selected == SttModelKind::kFunAsrNano && !hasFunAsrNano) {
241
- result.error = "FunASR Nano model requested but required files not found in " + modelDir;
242
- return result;
243
- }
244
- } else {
245
- if (hasTransducer) {
246
- selected = SttModelKind::kTransducer;
247
- } else if (!ctcModelPath.empty() && (isLikelyNemoCtc || isLikelyWenetCtc || isLikelySenseVoice)) {
248
- if (isLikelyNemoCtc) {
249
- selected = SttModelKind::kNemoCtc;
250
- } else if (isLikelyWenetCtc) {
251
- selected = SttModelKind::kWenetCtc;
252
- } else {
253
- selected = SttModelKind::kSenseVoice;
254
- }
255
- } else if (hasFunAsrNano && isLikelyFunAsrNano) {
256
- selected = SttModelKind::kFunAsrNano;
257
- } else if (!paraformerModelPath.empty()) {
258
- selected = SttModelKind::kParaformer;
259
- } else if (hasWhisper) {
260
- selected = SttModelKind::kWhisper;
261
- } else if (hasFunAsrNano) {
262
- selected = SttModelKind::kFunAsrNano;
263
- } else if (!ctcModelPath.empty()) {
264
- selected = SttModelKind::kZipformerCtc;
265
- }
266
- }
267
-
268
- if (selected == SttModelKind::kUnknown) {
269
- result.error = "No compatible model type detected in " + modelDir;
270
- return result;
271
- }
272
-
273
- result.selectedKind = selected;
274
- result.tokensRequired = !(selected == SttModelKind::kWhisper || selected == SttModelKind::kFunAsrNano);
275
-
276
- if (selected == SttModelKind::kTransducer) {
277
- result.paths.encoder = encoderPath;
278
- result.paths.decoder = decoderPath;
279
- result.paths.joiner = joinerPath;
280
- } else if (selected == SttModelKind::kParaformer) {
281
- result.paths.paraformerModel = paraformerModelPath;
282
- } else if (selected == SttModelKind::kNemoCtc || selected == SttModelKind::kWenetCtc ||
283
- selected == SttModelKind::kSenseVoice || selected == SttModelKind::kZipformerCtc) {
284
- result.paths.ctcModel = ctcModelPath;
285
- } else if (selected == SttModelKind::kWhisper) {
286
- result.paths.whisperEncoder = FileExists(encoderPathInt8) ? encoderPathInt8 : encoderPath;
287
- result.paths.whisperDecoder = FileExists(decoderPathInt8) ? decoderPathInt8 : decoderPath;
288
- } else if (selected == SttModelKind::kFunAsrNano) {
289
- result.paths.funasrEncoderAdaptor = FileExists(funasrEncoderAdaptorInt8) ? funasrEncoderAdaptorInt8 : funasrEncoderAdaptor;
290
- result.paths.funasrLLM = FileExists(funasrLLMInt8) ? funasrLLMInt8 : funasrLLM;
291
- result.paths.funasrEmbedding = FileExists(funasrEmbeddingInt8) ? funasrEmbeddingInt8 : funasrEmbedding;
292
- result.paths.funasrTokenizer = funasrTokenizerDir + "/vocab.json";
293
- }
294
-
295
- if (FileExists(tokensPath)) {
296
- result.paths.tokens = tokensPath;
297
- } else if (result.tokensRequired) {
298
- result.error = "Tokens file not found at " + tokensPath;
299
- return result;
300
- }
301
-
302
- result.ok = true;
303
- return result;
304
- }
305
-
306
- TtsDetectResult DetectTtsModel(const std::string& modelDir, const std::string& modelType) {
307
- TtsDetectResult result;
308
-
309
- if (modelDir.empty()) {
310
- result.error = "TTS: Model directory is empty";
311
- return result;
312
- }
313
-
314
- if (!FileExists(modelDir) || !IsDirectory(modelDir)) {
315
- result.error = "TTS: Model directory does not exist or is not a directory: " + modelDir;
316
- return result;
317
- }
318
-
319
- std::string modelOnnx = modelDir + "/model.onnx";
320
- std::string modelFp16 = modelDir + "/model.fp16.onnx";
321
- std::string modelInt8 = modelDir + "/model.int8.onnx";
322
- std::string tokensFile = modelDir + "/tokens.txt";
323
- std::string lexiconFile = modelDir + "/lexicon.txt";
324
- std::string dataDirPath = modelDir + "/espeak-ng-data";
325
- std::string voicesFile = modelDir + "/voices.bin";
326
- std::string acousticModel = modelDir + "/acoustic_model.onnx";
327
- std::string vocoder = modelDir + "/vocoder.onnx";
328
- std::string encoder = modelDir + "/encoder.onnx";
329
- std::string decoder = modelDir + "/decoder.onnx";
330
-
331
- bool hasVits = FileExists(modelOnnx) || FileExists(modelFp16) || FileExists(modelInt8);
332
- bool hasMatcha = FileExists(acousticModel) && FileExists(vocoder);
333
- bool hasVoicesFile = FileExists(voicesFile);
334
- bool hasZipvoice = FileExists(encoder) && FileExists(decoder) && FileExists(vocoder);
335
- bool hasDataDir = IsDirectory(dataDirPath);
336
-
337
- std::string modelDirLower = ToLower(modelDir);
338
- bool isLikelyKitten = modelDirLower.find("kitten") != std::string::npos;
339
- bool isLikelyKokoro = modelDirLower.find("kokoro") != std::string::npos;
340
-
341
- if (hasMatcha) {
342
- result.detectedModels.push_back({"matcha", modelDir});
343
- }
344
- if (hasZipvoice && !hasMatcha) {
345
- result.detectedModels.push_back({"zipvoice", modelDir});
346
- }
347
- if (hasVoicesFile) {
348
- result.detectedModels.push_back({"kokoro", modelDir});
349
- result.detectedModels.push_back({"kitten", modelDir});
350
- }
351
- if (hasVits && !hasMatcha && !hasZipvoice && !hasVoicesFile) {
352
- result.detectedModels.push_back({"vits", modelDir});
353
- } else if (hasVits && hasVoicesFile) {
354
- result.detectedModels.push_back({"vits", modelDir});
355
- }
356
-
357
- TtsModelKind selected = TtsModelKind::kUnknown;
358
- if (modelType != "auto") {
359
- selected = ParseTtsModelType(modelType);
360
- if (selected == TtsModelKind::kUnknown) {
361
- result.error = "TTS: Unknown model type: " + modelType;
362
- return result;
363
- }
364
- } else {
365
- if (hasMatcha) {
366
- selected = TtsModelKind::kMatcha;
367
- } else if (hasZipvoice) {
368
- selected = TtsModelKind::kZipvoice;
369
- } else if (hasVoicesFile) {
370
- if (isLikelyKitten && !isLikelyKokoro) {
371
- selected = TtsModelKind::kKitten;
372
- } else if (isLikelyKokoro && !isLikelyKitten) {
373
- selected = TtsModelKind::kKokoro;
374
- } else {
375
- selected = TtsModelKind::kKokoro;
376
- }
377
- } else if (hasVits) {
378
- selected = TtsModelKind::kVits;
379
- }
380
- }
381
-
382
- if (selected == TtsModelKind::kUnknown) {
383
- result.error = "TTS: No compatible model type detected in " + modelDir;
384
- return result;
385
- }
386
-
387
- if (selected == TtsModelKind::kVits && !hasVits) {
388
- result.error = "TTS: VITS model requested but model.onnx not found in " + modelDir;
389
- return result;
390
- }
391
- if (selected == TtsModelKind::kMatcha && !hasMatcha) {
392
- result.error = "TTS: Matcha model requested but required files not found in " + modelDir;
393
- return result;
394
- }
395
- if ((selected == TtsModelKind::kKokoro || selected == TtsModelKind::kKitten) && (!hasVits || !hasVoicesFile)) {
396
- result.error = "TTS: Kokoro/Kitten model requested but required files not found in " + modelDir;
397
- return result;
398
- }
399
- if (selected == TtsModelKind::kZipvoice && !hasZipvoice) {
400
- result.error = "TTS: Zipvoice model requested but required files not found in " + modelDir;
401
- return result;
402
- }
403
- if ((selected == TtsModelKind::kVits || selected == TtsModelKind::kMatcha ||
404
- selected == TtsModelKind::kKokoro || selected == TtsModelKind::kKitten ||
405
- selected == TtsModelKind::kZipvoice) &&
406
- !hasDataDir) {
407
- result.error = "TTS: espeak-ng-data not found in " + modelDir +
408
- ". Copy espeak-ng-data into the model directory.";
409
- return result;
410
- }
411
-
412
- std::string ttsModel;
413
- if (FileExists(modelInt8)) {
414
- ttsModel = modelInt8;
415
- } else if (FileExists(modelFp16)) {
416
- ttsModel = modelFp16;
417
- } else if (FileExists(modelOnnx)) {
418
- ttsModel = modelOnnx;
419
- }
420
-
421
- result.selectedKind = selected;
422
- result.paths.ttsModel = ttsModel;
423
- result.paths.tokens = tokensFile;
424
- result.paths.lexicon = FileExists(lexiconFile) ? lexiconFile : "";
425
- result.paths.dataDir = dataDirPath;
426
- result.paths.voices = voicesFile;
427
- result.paths.acousticModel = acousticModel;
428
- result.paths.vocoder = vocoder;
429
- result.paths.encoder = encoder;
430
- result.paths.decoder = decoder;
431
-
432
- if (!FileExists(tokensFile)) {
433
- result.error = "TTS: tokens.txt not found in " + modelDir;
434
- return result;
435
- }
436
-
437
- result.ok = true;
438
- return result;
439
- }
440
-
441
- } // namespace sherpaonnx
@@ -1,48 +0,0 @@
1
- #ifndef SHERPA_ONNX_STT_WRAPPER_H
2
- #define SHERPA_ONNX_STT_WRAPPER_H
3
-
4
- #include "sherpa-onnx-common.h"
5
- #include <cstdint>
6
- #include <memory>
7
- #include <optional>
8
- #include <string>
9
- #include <vector>
10
-
11
- namespace sherpaonnx {
12
-
13
- /**
14
- * Result of STT initialization.
15
- */
16
- struct SttInitializeResult {
17
- bool success;
18
- std::vector<DetectedModel> detectedModels; // List of detected models with type and path
19
- };
20
-
21
- /**
22
- * Wrapper class for sherpa-onnx OfflineRecognizer (STT).
23
- */
24
- class SttWrapper {
25
- public:
26
- SttWrapper();
27
- ~SttWrapper();
28
-
29
- SttInitializeResult initialize(
30
- const std::string& modelDir,
31
- const std::optional<bool>& preferInt8 = std::nullopt,
32
- const std::optional<std::string>& modelType = std::nullopt
33
- );
34
-
35
- std::string transcribeFile(const std::string& filePath);
36
-
37
- bool isInitialized() const;
38
-
39
- void release();
40
-
41
- private:
42
- class Impl;
43
- std::unique_ptr<Impl> pImpl;
44
- };
45
-
46
- } // namespace sherpaonnx
47
-
48
- #endif // SHERPA_ONNX_STT_WRAPPER_H
@@ -1,201 +0,0 @@
1
- #include "sherpa-onnx-stt-wrapper.h"
2
- #include "sherpa-onnx-model-detect.h"
3
- #include <algorithm>
4
- #include <cctype>
5
- #include <cstring>
6
- #include <fstream>
7
- #include <optional>
8
- #include <sstream>
9
-
10
- // iOS logging
11
- #ifdef __APPLE__
12
- #include <Foundation/Foundation.h>
13
- #include <cstdio>
14
- #define LOGI(fmt, ...) NSLog(@"SttWrapper: " fmt, ##__VA_ARGS__)
15
- #define LOGE(fmt, ...) NSLog(@"SttWrapper ERROR: " fmt, ##__VA_ARGS__)
16
- #else
17
- #define LOGI(...)
18
- #define LOGE(...)
19
- #endif
20
-
21
- // Use C++17 filesystem (podspec enforces C++17)
22
- #include <filesystem>
23
- namespace fs = std::filesystem;
24
-
25
- // sherpa-onnx headers - use C++ API (RAII wrapper around C API)
26
- #include "sherpa-onnx/c-api/cxx-api.h"
27
-
28
- namespace sherpaonnx {
29
-
30
- // PIMPL pattern implementation
31
- class SttWrapper::Impl {
32
- public:
33
- bool initialized = false;
34
- std::string modelDir;
35
- std::optional<sherpa_onnx::cxx::OfflineRecognizer> recognizer;
36
- };
37
-
38
- SttWrapper::SttWrapper() : pImpl(std::make_unique<Impl>()) {
39
- LOGI("SttWrapper created");
40
- }
41
-
42
- SttWrapper::~SttWrapper() {
43
- release();
44
- LOGI("SttWrapper destroyed");
45
- }
46
-
47
- SttInitializeResult SttWrapper::initialize(
48
- const std::string& modelDir,
49
- const std::optional<bool>& preferInt8,
50
- const std::optional<std::string>& modelType
51
- ) {
52
- SttInitializeResult result;
53
- result.success = false;
54
-
55
- if (pImpl->initialized) {
56
- release();
57
- }
58
-
59
- if (modelDir.empty()) {
60
- LOGE("Model directory is empty");
61
- return result;
62
- }
63
-
64
- try {
65
- sherpa_onnx::cxx::OfflineRecognizerConfig config;
66
- config.feat_config.sample_rate = 16000;
67
- config.feat_config.feature_dim = 80;
68
-
69
- auto detect = DetectSttModel(modelDir, preferInt8, modelType);
70
- if (!detect.ok) {
71
- LOGE("%s", detect.error.c_str());
72
- return result;
73
- }
74
-
75
- switch (detect.selectedKind) {
76
- case SttModelKind::kTransducer:
77
- config.model_config.transducer.encoder = detect.paths.encoder;
78
- config.model_config.transducer.decoder = detect.paths.decoder;
79
- config.model_config.transducer.joiner = detect.paths.joiner;
80
- break;
81
- case SttModelKind::kParaformer:
82
- config.model_config.paraformer.model = detect.paths.paraformerModel;
83
- break;
84
- case SttModelKind::kNemoCtc:
85
- config.model_config.nemo_ctc.model = detect.paths.ctcModel;
86
- break;
87
- case SttModelKind::kWenetCtc:
88
- config.model_config.wenet_ctc.model = detect.paths.ctcModel;
89
- break;
90
- case SttModelKind::kSenseVoice:
91
- config.model_config.sense_voice.model = detect.paths.ctcModel;
92
- break;
93
- case SttModelKind::kZipformerCtc:
94
- config.model_config.zipformer_ctc.model = detect.paths.ctcModel;
95
- break;
96
- case SttModelKind::kWhisper:
97
- config.model_config.whisper.encoder = detect.paths.whisperEncoder;
98
- config.model_config.whisper.decoder = detect.paths.whisperDecoder;
99
- break;
100
- case SttModelKind::kFunAsrNano:
101
- config.model_config.funasr_nano.encoder_adaptor = detect.paths.funasrEncoderAdaptor;
102
- config.model_config.funasr_nano.llm = detect.paths.funasrLLM;
103
- config.model_config.funasr_nano.embedding = detect.paths.funasrEmbedding;
104
- config.model_config.funasr_nano.tokenizer = detect.paths.funasrTokenizer;
105
- break;
106
- case SttModelKind::kUnknown:
107
- default:
108
- LOGE("No compatible model type detected in %s", modelDir.c_str());
109
- return result;
110
- }
111
-
112
- if (!detect.paths.tokens.empty()) {
113
- config.model_config.tokens = detect.paths.tokens;
114
- }
115
-
116
- config.decoding_method = "greedy_search";
117
- config.model_config.num_threads = 4;
118
- config.model_config.provider = "cpu";
119
-
120
- bool isWhisperModel = !config.model_config.whisper.encoder.empty() && !config.model_config.whisper.decoder.empty();
121
- if (isWhisperModel) {
122
- LOGI("Initializing Whisper model with encoder: %s, decoder: %s", config.model_config.whisper.encoder.c_str(), config.model_config.whisper.decoder.c_str());
123
- } else {
124
- LOGI("Initializing non-Whisper model");
125
- }
126
- try {
127
- pImpl->recognizer = sherpa_onnx::cxx::OfflineRecognizer::Create(config);
128
- } catch (const std::exception& e) {
129
- LOGE("Failed to create recognizer: %s", e.what());
130
- return result;
131
- }
132
-
133
- pImpl->modelDir = modelDir;
134
- pImpl->initialized = true;
135
-
136
- result.success = true;
137
- result.detectedModels = detect.detectedModels;
138
- return result;
139
- } catch (const std::exception& e) {
140
- LOGE("Exception during initialization: %s", e.what());
141
- return result;
142
- } catch (...) {
143
- LOGE("Unknown exception during initialization");
144
- return result;
145
- }
146
- }
147
-
148
- std::string SttWrapper::transcribeFile(const std::string& filePath) {
149
- if (!pImpl->initialized || !pImpl->recognizer.has_value()) {
150
- LOGE("Not initialized. Call initialize() first.");
151
- return "";
152
- }
153
-
154
- try {
155
- // Helper function to check if file exists
156
- auto fileExists = [](const std::string& path) -> bool {
157
- return fs::exists(path);
158
- };
159
-
160
- if (!fileExists(filePath)) {
161
- LOGE("Audio file not found: %s", filePath.c_str());
162
- return "";
163
- }
164
-
165
- sherpa_onnx::cxx::Wave wave = sherpa_onnx::cxx::ReadWave(filePath);
166
-
167
- if (wave.samples.empty()) {
168
- LOGE("Audio file is empty or failed to read: %s", filePath.c_str());
169
- return "";
170
- }
171
-
172
- auto stream = pImpl->recognizer.value().CreateStream();
173
- stream.AcceptWaveform(wave.sample_rate, wave.samples.data(), wave.samples.size());
174
-
175
- pImpl->recognizer.value().Decode(&stream);
176
-
177
- auto result = pImpl->recognizer.value().GetResult(&stream);
178
-
179
- return result.text;
180
- } catch (const std::exception& e) {
181
- LOGE("Exception during transcription: %s", e.what());
182
- return "";
183
- } catch (...) {
184
- LOGE("Unknown exception during transcription");
185
- return "";
186
- }
187
- }
188
-
189
- bool SttWrapper::isInitialized() const {
190
- return pImpl->initialized;
191
- }
192
-
193
- void SttWrapper::release() {
194
- if (pImpl->initialized) {
195
- pImpl->recognizer.reset();
196
- pImpl->initialized = false;
197
- pImpl->modelDir.clear();
198
- }
199
- }
200
-
201
- } // namespace sherpaonnx