react-native-sherpa-onnx 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +232 -236
- package/SherpaOnnx.podspec +68 -64
- package/android/build.gradle +182 -192
- package/android/codegen.gradle +57 -0
- package/android/prebuilt-download.gradle +428 -0
- package/android/prebuilt-versions.gradle +43 -0
- package/android/proguard-rules.pro +10 -0
- package/android/src/main/assets/testModels/add_mul_add.onnx +28 -0
- package/android/src/main/assets/testModels/nnapi_internal_uint8_support.onnx +0 -0
- package/android/src/main/assets/testModels/qnn_multi_ctx_embed.onnx +0 -0
- package/android/src/main/cpp/CMakeLists.txt +166 -129
- package/android/src/main/cpp/CMakePresets.json +54 -0
- package/android/src/main/cpp/crypto/sha256.cpp +174 -0
- package/android/src/main/cpp/crypto/sha256.h +16 -0
- package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.cpp +404 -0
- package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.h +56 -0
- package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-jni.cpp +181 -0
- package/android/src/main/cpp/jni/audio/sherpa-onnx-audio-convert-jni.cpp +888 -0
- package/{ios → android/src/main/cpp/jni/model_detect}/sherpa-onnx-common.h +18 -18
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.cpp +86 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.h +20 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.cpp +423 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.h +55 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-stt.cpp +399 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +238 -0
- package/{ios → android/src/main/cpp/jni/model_detect}/sherpa-onnx-model-detect.h +122 -89
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.cpp +99 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.h +16 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.cpp +78 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.h +16 -0
- package/android/src/main/cpp/jni/module/sherpa-onnx-module-jni.cpp +190 -0
- package/android/src/main/cpp/jni/tts/sherpa-onnx-tts-zipvoice-jni.cpp +301 -0
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxArchiveHelper.kt +94 -0
- package/android/src/main/java/com/sherpaonnx/{SherpaOnnxCoreHelper.kt → SherpaOnnxAssetHelper.kt} +350 -236
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +791 -483
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxSttHelper.kt +699 -109
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +1123 -668
- package/android/src/main/java/com/sherpaonnx/ZipvoiceTtsWrapper.kt +187 -0
- package/ios/SherpaOnnx+Assets.h +11 -0
- package/ios/SherpaOnnx+Assets.mm +325 -0
- package/ios/SherpaOnnx+STT.mm +455 -118
- package/ios/SherpaOnnx+TTS.mm +1101 -712
- package/ios/SherpaOnnx.h +17 -6
- package/ios/SherpaOnnx.mm +206 -311
- package/ios/SherpaOnnx.xcconfig +19 -19
- package/ios/SherpaOnnxCoreMLHelper.swift +24 -0
- package/ios/archive/sherpa-onnx-archive-helper.h +21 -0
- package/ios/archive/sherpa-onnx-archive-helper.mm +296 -0
- package/ios/libarchive_darwin_config.h +153 -0
- package/{android/src/main/cpp/jni → ios/model_detect}/sherpa-onnx-common.h +18 -18
- package/ios/model_detect/sherpa-onnx-model-detect-helper.h +49 -0
- package/ios/model_detect/sherpa-onnx-model-detect-helper.mm +210 -0
- package/ios/model_detect/sherpa-onnx-model-detect-stt.mm +344 -0
- package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +201 -0
- package/{android/src/main/cpp/jni → ios/model_detect}/sherpa-onnx-model-detect.h +117 -89
- package/ios/scripts/patch-libarchive-includes.sh +61 -0
- package/ios/scripts/setup-ios-libarchive.sh +98 -0
- package/ios/stt/sherpa-onnx-stt-wrapper.h +129 -0
- package/ios/stt/sherpa-onnx-stt-wrapper.mm +523 -0
- package/ios/{sherpa-onnx-tts-wrapper.h → tts/sherpa-onnx-tts-wrapper.h} +90 -85
- package/ios/{sherpa-onnx-tts-wrapper.mm → tts/sherpa-onnx-tts-wrapper.mm} +376 -345
- package/lib/module/NativeSherpaOnnx.js +3 -0
- package/lib/module/NativeSherpaOnnx.js.map +1 -1
- package/lib/module/audio/index.js +22 -0
- package/lib/module/audio/index.js.map +1 -0
- package/lib/module/diarization/index.js +1 -1
- package/lib/module/diarization/index.js.map +1 -1
- package/lib/module/download/ModelDownloadManager.js +918 -0
- package/lib/module/download/ModelDownloadManager.js.map +1 -0
- package/lib/module/download/extractTarBz2.js +53 -0
- package/lib/module/download/extractTarBz2.js.map +1 -0
- package/lib/module/download/index.js +6 -0
- package/lib/module/download/index.js.map +1 -0
- package/lib/module/download/validation.js +178 -0
- package/lib/module/download/validation.js.map +1 -0
- package/lib/module/enhancement/index.js +1 -1
- package/lib/module/enhancement/index.js.map +1 -1
- package/lib/module/index.js +41 -3
- package/lib/module/index.js.map +1 -1
- package/lib/module/separation/index.js +1 -1
- package/lib/module/separation/index.js.map +1 -1
- package/lib/module/stt/index.js +127 -60
- package/lib/module/stt/index.js.map +1 -1
- package/lib/module/stt/sttModelLanguages.js +512 -0
- package/lib/module/stt/sttModelLanguages.js.map +1 -0
- package/lib/module/stt/types.js +53 -1
- package/lib/module/stt/types.js.map +1 -1
- package/lib/module/tts/index.js +216 -289
- package/lib/module/tts/index.js.map +1 -1
- package/lib/module/tts/types.js +86 -1
- package/lib/module/tts/types.js.map +1 -1
- package/lib/module/types.js.map +1 -1
- package/lib/module/utils.js +86 -73
- package/lib/module/utils.js.map +1 -1
- package/lib/module/vad/index.js +1 -1
- package/lib/module/vad/index.js.map +1 -1
- package/lib/typescript/src/NativeSherpaOnnx.d.ts +192 -38
- package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
- package/lib/typescript/src/audio/index.d.ts +13 -0
- package/lib/typescript/src/audio/index.d.ts.map +1 -0
- package/lib/typescript/src/diarization/index.d.ts +3 -2
- package/lib/typescript/src/diarization/index.d.ts.map +1 -1
- package/lib/typescript/src/download/ModelDownloadManager.d.ts +108 -0
- package/lib/typescript/src/download/ModelDownloadManager.d.ts.map +1 -0
- package/lib/typescript/src/download/extractTarBz2.d.ts +14 -0
- package/lib/typescript/src/download/extractTarBz2.d.ts.map +1 -0
- package/lib/typescript/src/download/index.d.ts +7 -0
- package/lib/typescript/src/download/index.d.ts.map +1 -0
- package/lib/typescript/src/download/validation.d.ts +57 -0
- package/lib/typescript/src/download/validation.d.ts.map +1 -0
- package/lib/typescript/src/enhancement/index.d.ts +3 -2
- package/lib/typescript/src/enhancement/index.d.ts.map +1 -1
- package/lib/typescript/src/index.d.ts +26 -2
- package/lib/typescript/src/index.d.ts.map +1 -1
- package/lib/typescript/src/separation/index.d.ts +3 -2
- package/lib/typescript/src/separation/index.d.ts.map +1 -1
- package/lib/typescript/src/stt/index.d.ts +31 -43
- package/lib/typescript/src/stt/index.d.ts.map +1 -1
- package/lib/typescript/src/stt/sttModelLanguages.d.ts +52 -0
- package/lib/typescript/src/stt/sttModelLanguages.d.ts.map +1 -0
- package/lib/typescript/src/stt/types.d.ts +196 -9
- package/lib/typescript/src/stt/types.d.ts.map +1 -1
- package/lib/typescript/src/tts/index.d.ts +25 -211
- package/lib/typescript/src/tts/index.d.ts.map +1 -1
- package/lib/typescript/src/tts/types.d.ts +148 -25
- package/lib/typescript/src/tts/types.d.ts.map +1 -1
- package/lib/typescript/src/types.d.ts +0 -32
- package/lib/typescript/src/types.d.ts.map +1 -1
- package/lib/typescript/src/utils.d.ts +28 -13
- package/lib/typescript/src/utils.d.ts.map +1 -1
- package/lib/typescript/src/vad/index.d.ts +3 -2
- package/lib/typescript/src/vad/index.d.ts.map +1 -1
- package/package.json +250 -222
- package/scripts/check-qnn-support.sh +78 -0
- package/scripts/setup-ios-framework.sh +379 -282
- package/src/NativeSherpaOnnx.ts +474 -251
- package/src/audio/index.ts +32 -0
- package/src/diarization/index.ts +4 -2
- package/src/download/ModelDownloadManager.ts +1325 -0
- package/src/download/extractTarBz2.ts +78 -0
- package/src/download/index.ts +43 -0
- package/src/download/validation.ts +279 -0
- package/src/enhancement/index.ts +4 -2
- package/src/index.tsx +78 -27
- package/src/separation/index.ts +4 -2
- package/src/stt/index.ts +249 -89
- package/src/stt/sttModelLanguages.ts +237 -0
- package/src/stt/types.ts +263 -9
- package/src/tts/index.ts +470 -458
- package/src/tts/types.ts +373 -218
- package/src/types.ts +0 -44
- package/src/utils.ts +145 -131
- package/src/vad/index.ts +4 -2
- package/third_party/ffmpeg_prebuilt/ANDROID_RELEASE_TAG +1 -0
- package/third_party/libarchive_prebuilt/ANDROID_RELEASE_TAG +1 -0
- package/third_party/libarchive_prebuilt/IOS_RELEASE_TAG +1 -0
- package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -0
- package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -0
- package/android/src/main/cpp/include/sherpa-onnx/c-api/c-api.h +0 -1918
- package/android/src/main/cpp/include/sherpa-onnx/c-api/cxx-api.h +0 -841
- package/android/src/main/cpp/jni/sherpa-onnx-model-detect.cpp +0 -541
- package/android/src/main/cpp/jni/sherpa-onnx-stt-jni.cpp +0 -336
- package/android/src/main/cpp/jni/sherpa-onnx-stt-wrapper.cpp +0 -222
- package/android/src/main/cpp/jni/sherpa-onnx-stt-wrapper.h +0 -68
- package/android/src/main/cpp/jni/sherpa-onnx-tts-jni.cpp +0 -823
- package/android/src/main/cpp/jni/sherpa-onnx-tts-wrapper.cpp +0 -387
- package/android/src/main/cpp/jni/sherpa-onnx-tts-wrapper.h +0 -147
- package/ios/Frameworks/sherpa_onnx.xcframework.zip +0 -0
- package/ios/include/sherpa-onnx/c-api/c-api.h +0 -1918
- package/ios/include/sherpa-onnx/c-api/cxx-api.h +0 -841
- package/ios/sherpa-onnx-model-detect.mm +0 -441
- package/ios/sherpa-onnx-stt-wrapper.h +0 -48
- package/ios/sherpa-onnx-stt-wrapper.mm +0 -201
- package/scripts/copy-headers.js +0 -184
- package/scripts/setup-assets.js +0 -323
package/ios/SherpaOnnx+STT.mm
CHANGED
|
@@ -1,118 +1,455 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
if (
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
1
|
+
/**
|
|
2
|
+
* SherpaOnnx+STT.mm
|
|
3
|
+
*
|
|
4
|
+
* Purpose: STT (speech-to-text) TurboModule methods: initializeStt, releaseStt, runStt, and related.
|
|
5
|
+
* Uses sherpa-onnx-stt-wrapper for native recognition and sherpa-onnx-model-detect for model detection.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#import "SherpaOnnx.h"
|
|
9
|
+
#import <React/RCTLog.h>
|
|
10
|
+
|
|
11
|
+
#include "sherpa-onnx-stt-wrapper.h"
|
|
12
|
+
#include "sherpa-onnx-model-detect.h"
|
|
13
|
+
#include <memory>
|
|
14
|
+
#include <mutex>
|
|
15
|
+
#include <optional>
|
|
16
|
+
#include <string>
|
|
17
|
+
#include <unordered_map>
|
|
18
|
+
#include <vector>
|
|
19
|
+
|
|
20
|
+
struct SttInstanceState {
|
|
21
|
+
std::unique_ptr<sherpaonnx::SttWrapper> wrapper;
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
static std::unordered_map<std::string, std::unique_ptr<SttInstanceState>> g_stt_instances;
|
|
25
|
+
static std::mutex g_stt_mutex;
|
|
26
|
+
|
|
27
|
+
static NSString *sttModelKindToNSString(sherpaonnx::SttModelKind kind) {
|
|
28
|
+
using K = sherpaonnx::SttModelKind;
|
|
29
|
+
switch (kind) {
|
|
30
|
+
case K::kTransducer: return @"transducer";
|
|
31
|
+
case K::kNemoTransducer: return @"nemo_transducer";
|
|
32
|
+
case K::kParaformer: return @"paraformer";
|
|
33
|
+
case K::kNemoCtc: return @"nemo_ctc";
|
|
34
|
+
case K::kWenetCtc: return @"wenet_ctc";
|
|
35
|
+
case K::kSenseVoice: return @"sense_voice";
|
|
36
|
+
case K::kZipformerCtc: return @"zipformer_ctc";
|
|
37
|
+
case K::kWhisper: return @"whisper";
|
|
38
|
+
case K::kFunAsrNano: return @"funasr_nano";
|
|
39
|
+
case K::kFireRedAsr: return @"fire_red_asr";
|
|
40
|
+
case K::kMoonshine: return @"moonshine";
|
|
41
|
+
case K::kDolphin: return @"dolphin";
|
|
42
|
+
case K::kCanary: return @"canary";
|
|
43
|
+
case K::kOmnilingual: return @"omnilingual";
|
|
44
|
+
case K::kMedAsr: return @"medasr";
|
|
45
|
+
case K::kTeleSpeechCtc: return @"telespeech_ctc";
|
|
46
|
+
default: return @"unknown";
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
static NSDictionary *sttResultToDict(const sherpaonnx::SttRecognitionResult& r) {
|
|
51
|
+
NSMutableArray *tokens = [NSMutableArray arrayWithCapacity:r.tokens.size()];
|
|
52
|
+
for (const auto& t : r.tokens) {
|
|
53
|
+
[tokens addObject:[NSString stringWithUTF8String:t.c_str()]];
|
|
54
|
+
}
|
|
55
|
+
NSMutableArray *timestamps = [NSMutableArray arrayWithCapacity:r.timestamps.size()];
|
|
56
|
+
for (float ts : r.timestamps) {
|
|
57
|
+
[timestamps addObject:@(ts)];
|
|
58
|
+
}
|
|
59
|
+
NSMutableArray *durations = [NSMutableArray arrayWithCapacity:r.durations.size()];
|
|
60
|
+
for (float d : r.durations) {
|
|
61
|
+
[durations addObject:@(d)];
|
|
62
|
+
}
|
|
63
|
+
return @{
|
|
64
|
+
@"text": [NSString stringWithUTF8String:r.text.c_str()] ?: @"",
|
|
65
|
+
@"tokens": tokens,
|
|
66
|
+
@"timestamps": timestamps,
|
|
67
|
+
@"lang": [NSString stringWithUTF8String:r.lang.c_str()] ?: @"",
|
|
68
|
+
@"emotion": [NSString stringWithUTF8String:r.emotion.c_str()] ?: @"",
|
|
69
|
+
@"event": [NSString stringWithUTF8String:r.event.c_str()] ?: @"",
|
|
70
|
+
@"durations": durations
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
@implementation SherpaOnnx (STT)
|
|
75
|
+
|
|
76
|
+
- (void)initializeStt:(NSString *)instanceId
|
|
77
|
+
modelDir:(NSString *)modelDir
|
|
78
|
+
preferInt8:(NSNumber *)preferInt8
|
|
79
|
+
modelType:(NSString *)modelType
|
|
80
|
+
debug:(NSNumber *)debug
|
|
81
|
+
hotwordsFile:(NSString *)hotwordsFile
|
|
82
|
+
hotwordsScore:(NSNumber *)hotwordsScore
|
|
83
|
+
numThreads:(NSNumber *)numThreads
|
|
84
|
+
provider:(NSString *)provider
|
|
85
|
+
ruleFsts:(NSString *)ruleFsts
|
|
86
|
+
ruleFars:(NSString *)ruleFars
|
|
87
|
+
dither:(NSNumber *)dither
|
|
88
|
+
modelOptions:(NSDictionary *)modelOptions
|
|
89
|
+
modelingUnit:(NSString *)modelingUnit
|
|
90
|
+
bpeVocab:(NSString *)bpeVocab
|
|
91
|
+
resolve:(RCTPromiseResolveBlock)resolve
|
|
92
|
+
reject:(RCTPromiseRejectBlock)reject
|
|
93
|
+
{
|
|
94
|
+
if (instanceId == nil || [instanceId length] == 0) {
|
|
95
|
+
reject(@"INIT_ERROR", @"instanceId is required", nil);
|
|
96
|
+
return;
|
|
97
|
+
}
|
|
98
|
+
std::string instanceIdStr = [instanceId UTF8String];
|
|
99
|
+
RCTLogInfo(@"Initializing STT instance %@ with modelDir: %@", instanceId, modelDir);
|
|
100
|
+
|
|
101
|
+
@try {
|
|
102
|
+
std::lock_guard<std::mutex> lock(g_stt_mutex);
|
|
103
|
+
auto it = g_stt_instances.find(instanceIdStr);
|
|
104
|
+
if (it == g_stt_instances.end()) {
|
|
105
|
+
g_stt_instances[instanceIdStr] = std::make_unique<SttInstanceState>();
|
|
106
|
+
}
|
|
107
|
+
SttInstanceState *inst = g_stt_instances[instanceIdStr].get();
|
|
108
|
+
if (inst->wrapper == nullptr) {
|
|
109
|
+
inst->wrapper = std::make_unique<sherpaonnx::SttWrapper>();
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
std::string modelDirStr = [modelDir UTF8String];
|
|
113
|
+
|
|
114
|
+
std::optional<bool> preferInt8Opt = std::nullopt;
|
|
115
|
+
if (preferInt8 != nil) {
|
|
116
|
+
preferInt8Opt = [preferInt8 boolValue];
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
std::optional<std::string> modelTypeOpt = std::nullopt;
|
|
120
|
+
if (modelType != nil && [modelType length] > 0) {
|
|
121
|
+
modelTypeOpt = [modelType UTF8String];
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
bool debugVal = (debug != nil && [debug boolValue]);
|
|
125
|
+
|
|
126
|
+
std::optional<std::string> hotwordsFileOpt = std::nullopt;
|
|
127
|
+
if (hotwordsFile != nil && [hotwordsFile length] > 0) {
|
|
128
|
+
hotwordsFileOpt = [hotwordsFile UTF8String];
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
std::optional<float> hotwordsScoreOpt = std::nullopt;
|
|
132
|
+
if (hotwordsScore != nil) {
|
|
133
|
+
hotwordsScoreOpt = [hotwordsScore floatValue];
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
std::optional<int32_t> numThreadsOpt = std::nullopt;
|
|
137
|
+
if (numThreads != nil) {
|
|
138
|
+
numThreadsOpt = [numThreads intValue];
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
std::optional<std::string> providerOpt = std::nullopt;
|
|
142
|
+
if (provider != nil && [provider length] > 0) {
|
|
143
|
+
providerOpt = [provider UTF8String];
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
std::optional<std::string> ruleFstsOpt = std::nullopt;
|
|
147
|
+
if (ruleFsts != nil && [ruleFsts length] > 0) {
|
|
148
|
+
ruleFstsOpt = [ruleFsts UTF8String];
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
std::optional<std::string> ruleFarsOpt = std::nullopt;
|
|
152
|
+
if (ruleFars != nil && [ruleFars length] > 0) {
|
|
153
|
+
ruleFarsOpt = [ruleFars UTF8String];
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
std::optional<float> ditherOpt = std::nullopt;
|
|
157
|
+
if (dither != nil) {
|
|
158
|
+
ditherOpt = [dither floatValue];
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// Parse model-specific options (only the block for the loaded model type is applied in C++).
|
|
162
|
+
sherpaonnx::SttWhisperOptions whisperOpts;
|
|
163
|
+
sherpaonnx::SttSenseVoiceOptions senseVoiceOpts;
|
|
164
|
+
sherpaonnx::SttCanaryOptions canaryOpts;
|
|
165
|
+
sherpaonnx::SttFunAsrNanoOptions funasrNanoOpts;
|
|
166
|
+
const sherpaonnx::SttWhisperOptions *whisperOptsPtr = nullptr;
|
|
167
|
+
const sherpaonnx::SttSenseVoiceOptions *senseVoiceOptsPtr = nullptr;
|
|
168
|
+
const sherpaonnx::SttCanaryOptions *canaryOptsPtr = nullptr;
|
|
169
|
+
const sherpaonnx::SttFunAsrNanoOptions *funasrNanoOptsPtr = nullptr;
|
|
170
|
+
if (modelOptions != nil && [modelOptions isKindOfClass:[NSDictionary class]]) {
|
|
171
|
+
NSDictionary *w = modelOptions[@"whisper"];
|
|
172
|
+
if ([w isKindOfClass:[NSDictionary class]]) {
|
|
173
|
+
if (w[@"language"] != nil) whisperOpts.language = std::string([(NSString *)w[@"language"] UTF8String]);
|
|
174
|
+
if (w[@"task"] != nil) whisperOpts.task = std::string([(NSString *)w[@"task"] UTF8String]);
|
|
175
|
+
if (w[@"tailPaddings"] != nil) whisperOpts.tail_paddings = [(NSNumber *)w[@"tailPaddings"] intValue];
|
|
176
|
+
whisperOptsPtr = &whisperOpts;
|
|
177
|
+
}
|
|
178
|
+
NSDictionary *sv = modelOptions[@"senseVoice"];
|
|
179
|
+
if ([sv isKindOfClass:[NSDictionary class]]) {
|
|
180
|
+
if (sv[@"language"] != nil) senseVoiceOpts.language = std::string([(NSString *)sv[@"language"] UTF8String]);
|
|
181
|
+
if (sv[@"useItn"] != nil) senseVoiceOpts.use_itn = [(NSNumber *)sv[@"useItn"] boolValue];
|
|
182
|
+
senseVoiceOptsPtr = &senseVoiceOpts;
|
|
183
|
+
}
|
|
184
|
+
NSDictionary *c = modelOptions[@"canary"];
|
|
185
|
+
if ([c isKindOfClass:[NSDictionary class]]) {
|
|
186
|
+
if (c[@"srcLang"] != nil) canaryOpts.src_lang = std::string([(NSString *)c[@"srcLang"] UTF8String]);
|
|
187
|
+
if (c[@"tgtLang"] != nil) canaryOpts.tgt_lang = std::string([(NSString *)c[@"tgtLang"] UTF8String]);
|
|
188
|
+
if (c[@"usePnc"] != nil) canaryOpts.use_pnc = [(NSNumber *)c[@"usePnc"] boolValue];
|
|
189
|
+
canaryOptsPtr = &canaryOpts;
|
|
190
|
+
}
|
|
191
|
+
NSDictionary *fn = modelOptions[@"funasrNano"];
|
|
192
|
+
if ([fn isKindOfClass:[NSDictionary class]]) {
|
|
193
|
+
if (fn[@"systemPrompt"] != nil) funasrNanoOpts.system_prompt = std::string([(NSString *)fn[@"systemPrompt"] UTF8String]);
|
|
194
|
+
if (fn[@"userPrompt"] != nil) funasrNanoOpts.user_prompt = std::string([(NSString *)fn[@"userPrompt"] UTF8String]);
|
|
195
|
+
if (fn[@"maxNewTokens"] != nil) funasrNanoOpts.max_new_tokens = [(NSNumber *)fn[@"maxNewTokens"] intValue];
|
|
196
|
+
if (fn[@"temperature"] != nil) funasrNanoOpts.temperature = [(NSNumber *)fn[@"temperature"] floatValue];
|
|
197
|
+
if (fn[@"topP"] != nil) funasrNanoOpts.top_p = [(NSNumber *)fn[@"topP"] floatValue];
|
|
198
|
+
if (fn[@"seed"] != nil) funasrNanoOpts.seed = [(NSNumber *)fn[@"seed"] intValue];
|
|
199
|
+
if (fn[@"language"] != nil) funasrNanoOpts.language = std::string([(NSString *)fn[@"language"] UTF8String]);
|
|
200
|
+
if (fn[@"itn"] != nil) funasrNanoOpts.itn = [(NSNumber *)fn[@"itn"] boolValue];
|
|
201
|
+
if (fn[@"hotwords"] != nil) funasrNanoOpts.hotwords = std::string([(NSString *)fn[@"hotwords"] UTF8String]);
|
|
202
|
+
funasrNanoOptsPtr = &funasrNanoOpts;
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
sherpaonnx::SttInitializeResult result = inst->wrapper->initialize(
|
|
207
|
+
modelDirStr, preferInt8Opt, modelTypeOpt, debugVal, hotwordsFileOpt, hotwordsScoreOpt,
|
|
208
|
+
numThreadsOpt, providerOpt, ruleFstsOpt, ruleFarsOpt, ditherOpt,
|
|
209
|
+
whisperOptsPtr, senseVoiceOptsPtr, canaryOptsPtr, funasrNanoOptsPtr);
|
|
210
|
+
|
|
211
|
+
if (result.success) {
|
|
212
|
+
RCTLogInfo(@"Sherpa-onnx initialized successfully");
|
|
213
|
+
|
|
214
|
+
NSMutableDictionary *resultDict = [NSMutableDictionary dictionary];
|
|
215
|
+
resultDict[@"success"] = @YES;
|
|
216
|
+
|
|
217
|
+
NSMutableArray *detectedModelsArray = [NSMutableArray array];
|
|
218
|
+
for (const auto& model : result.detectedModels) {
|
|
219
|
+
NSMutableDictionary *modelDict = [NSMutableDictionary dictionary];
|
|
220
|
+
modelDict[@"type"] = [NSString stringWithUTF8String:model.type.c_str()];
|
|
221
|
+
modelDict[@"modelDir"] = [NSString stringWithUTF8String:model.modelDir.c_str()];
|
|
222
|
+
[detectedModelsArray addObject:modelDict];
|
|
223
|
+
}
|
|
224
|
+
resultDict[@"detectedModels"] = detectedModelsArray;
|
|
225
|
+
if (!result.modelType.empty()) {
|
|
226
|
+
resultDict[@"modelType"] = [NSString stringWithUTF8String:result.modelType.c_str()];
|
|
227
|
+
}
|
|
228
|
+
if (!result.decodingMethod.empty()) {
|
|
229
|
+
resultDict[@"decodingMethod"] = [NSString stringWithUTF8String:result.decodingMethod.c_str()];
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
resolve(resultDict);
|
|
233
|
+
} else {
|
|
234
|
+
NSString *errorMsg = result.error.empty()
|
|
235
|
+
? [NSString stringWithFormat:@"Failed to initialize sherpa-onnx with model directory: %@", modelDir]
|
|
236
|
+
: [NSString stringWithUTF8String:result.error.c_str()];
|
|
237
|
+
NSString *code = @"INIT_ERROR";
|
|
238
|
+
if ([errorMsg hasPrefix:@"HOTWORDS_NOT_SUPPORTED"]) code = @"HOTWORDS_NOT_SUPPORTED";
|
|
239
|
+
else if ([errorMsg hasPrefix:@"INVALID_HOTWORDS_FILE"]) code = @"INVALID_HOTWORDS_FILE";
|
|
240
|
+
RCTLogError(@"%@", errorMsg);
|
|
241
|
+
reject(code, errorMsg, nil);
|
|
242
|
+
}
|
|
243
|
+
} @catch (NSException *exception) {
|
|
244
|
+
NSString *errorMsg = [NSString stringWithFormat:@"Exception during initialization: %@", exception.reason];
|
|
245
|
+
RCTLogError(@"%@", errorMsg);
|
|
246
|
+
reject(@"INIT_ERROR", errorMsg, nil);
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
- (void)detectSttModel:(NSString *)modelDir
|
|
251
|
+
preferInt8:(NSNumber *)preferInt8
|
|
252
|
+
modelType:(NSString *)modelType
|
|
253
|
+
resolve:(RCTPromiseResolveBlock)resolve
|
|
254
|
+
reject:(RCTPromiseRejectBlock)reject
|
|
255
|
+
{
|
|
256
|
+
RCTLogInfo(@"Detecting STT model in: %@", modelDir);
|
|
257
|
+
@try {
|
|
258
|
+
std::string modelDirStr = [modelDir UTF8String];
|
|
259
|
+
std::optional<bool> preferInt8Opt = std::nullopt;
|
|
260
|
+
if (preferInt8 != nil) {
|
|
261
|
+
preferInt8Opt = [preferInt8 boolValue];
|
|
262
|
+
}
|
|
263
|
+
std::optional<std::string> modelTypeOpt = std::nullopt;
|
|
264
|
+
if (modelType != nil && [modelType length] > 0 && ![modelType isEqualToString:@"auto"]) {
|
|
265
|
+
modelTypeOpt = [modelType UTF8String];
|
|
266
|
+
}
|
|
267
|
+
sherpaonnx::SttDetectResult result = sherpaonnx::DetectSttModel(modelDirStr, preferInt8Opt, modelTypeOpt, false);
|
|
268
|
+
|
|
269
|
+
NSMutableDictionary *resultDict = [NSMutableDictionary dictionary];
|
|
270
|
+
resultDict[@"success"] = @(result.ok);
|
|
271
|
+
if (!result.error.empty()) {
|
|
272
|
+
resultDict[@"error"] = [NSString stringWithUTF8String:result.error.c_str()];
|
|
273
|
+
}
|
|
274
|
+
NSMutableArray *detectedModelsArray = [NSMutableArray array];
|
|
275
|
+
for (const auto& model : result.detectedModels) {
|
|
276
|
+
NSMutableDictionary *modelDict = [NSMutableDictionary dictionary];
|
|
277
|
+
modelDict[@"type"] = [NSString stringWithUTF8String:model.type.c_str()];
|
|
278
|
+
modelDict[@"modelDir"] = [NSString stringWithUTF8String:model.modelDir.c_str()];
|
|
279
|
+
[detectedModelsArray addObject:modelDict];
|
|
280
|
+
}
|
|
281
|
+
resultDict[@"detectedModels"] = detectedModelsArray;
|
|
282
|
+
resultDict[@"modelType"] = sttModelKindToNSString(result.selectedKind);
|
|
283
|
+
resolve(resultDict);
|
|
284
|
+
} @catch (NSException *exception) {
|
|
285
|
+
NSString *errorMsg = [NSString stringWithFormat:@"STT model detection failed: %@", exception.reason];
|
|
286
|
+
RCTLogError(@"%@", errorMsg);
|
|
287
|
+
reject(@"DETECT_ERROR", errorMsg, nil);
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
- (void)transcribeFile:(NSString *)instanceId
|
|
292
|
+
filePath:(NSString *)filePath
|
|
293
|
+
resolve:(RCTPromiseResolveBlock)resolve
|
|
294
|
+
reject:(RCTPromiseRejectBlock)reject
|
|
295
|
+
{
|
|
296
|
+
if (instanceId == nil || [instanceId length] == 0) {
|
|
297
|
+
reject(@"TRANSCRIBE_ERROR", @"instanceId is required", nil);
|
|
298
|
+
return;
|
|
299
|
+
}
|
|
300
|
+
std::string instanceIdStr = [instanceId UTF8String];
|
|
301
|
+
std::lock_guard<std::mutex> lock(g_stt_mutex);
|
|
302
|
+
auto it = g_stt_instances.find(instanceIdStr);
|
|
303
|
+
if (it == g_stt_instances.end() || it->second->wrapper == nullptr || !it->second->wrapper->isInitialized()) {
|
|
304
|
+
reject(@"TRANSCRIBE_ERROR", @"STT not initialized. Call initializeStt first.", nil);
|
|
305
|
+
return;
|
|
306
|
+
}
|
|
307
|
+
sherpaonnx::SttWrapper *wrapper = it->second->wrapper.get();
|
|
308
|
+
try {
|
|
309
|
+
std::string filePathStr = [filePath UTF8String];
|
|
310
|
+
sherpaonnx::SttRecognitionResult result = wrapper->transcribeFile(filePathStr);
|
|
311
|
+
resolve(sttResultToDict(result));
|
|
312
|
+
} catch (const std::exception& e) {
|
|
313
|
+
NSString *errorMsg = e.what() ? [NSString stringWithUTF8String:e.what()] : @"Recognition failed.";
|
|
314
|
+
if (!errorMsg) errorMsg = @"Recognition failed.";
|
|
315
|
+
RCTLogError(@"Transcribe error: %@", errorMsg);
|
|
316
|
+
reject(@"TRANSCRIBE_ERROR", errorMsg, nil);
|
|
317
|
+
} catch (...) {
|
|
318
|
+
NSString *errorMsg = @"Unknown error during transcription";
|
|
319
|
+
RCTLogError(@"Transcribe error: %@", errorMsg);
|
|
320
|
+
reject(@"TRANSCRIBE_ERROR", errorMsg, nil);
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
- (void)transcribeSamples:(NSString *)instanceId
|
|
325
|
+
samples:(NSArray<NSNumber *> *)samples
|
|
326
|
+
sampleRate:(double)sampleRate
|
|
327
|
+
resolve:(RCTPromiseResolveBlock)resolve
|
|
328
|
+
reject:(RCTPromiseRejectBlock)reject
|
|
329
|
+
{
|
|
330
|
+
if (instanceId == nil || [instanceId length] == 0) {
|
|
331
|
+
reject(@"TRANSCRIBE_ERROR", @"instanceId is required", nil);
|
|
332
|
+
return;
|
|
333
|
+
}
|
|
334
|
+
std::string instanceIdStr = [instanceId UTF8String];
|
|
335
|
+
std::lock_guard<std::mutex> lock(g_stt_mutex);
|
|
336
|
+
auto it = g_stt_instances.find(instanceIdStr);
|
|
337
|
+
if (it == g_stt_instances.end() || it->second->wrapper == nullptr || !it->second->wrapper->isInitialized()) {
|
|
338
|
+
reject(@"TRANSCRIBE_ERROR", @"STT not initialized. Call initializeStt first.", nil);
|
|
339
|
+
return;
|
|
340
|
+
}
|
|
341
|
+
sherpaonnx::SttWrapper *wrapper = it->second->wrapper.get();
|
|
342
|
+
try {
|
|
343
|
+
std::vector<float> floatSamples;
|
|
344
|
+
floatSamples.reserve([samples count]);
|
|
345
|
+
for (NSNumber *n in samples) {
|
|
346
|
+
floatSamples.push_back([n floatValue]);
|
|
347
|
+
}
|
|
348
|
+
sherpaonnx::SttRecognitionResult result = wrapper->transcribeSamples(floatSamples, static_cast<int32_t>(sampleRate));
|
|
349
|
+
resolve(sttResultToDict(result));
|
|
350
|
+
} catch (const std::exception& e) {
|
|
351
|
+
NSString *errorMsg = e.what() ? [NSString stringWithUTF8String:e.what()] : @"Recognition failed.";
|
|
352
|
+
if (!errorMsg) errorMsg = @"Recognition failed.";
|
|
353
|
+
RCTLogError(@"TranscribeSamples error: %@", errorMsg);
|
|
354
|
+
reject(@"TRANSCRIBE_ERROR", errorMsg, nil);
|
|
355
|
+
} catch (...) {
|
|
356
|
+
NSString *errorMsg = @"Unknown error during transcription";
|
|
357
|
+
RCTLogError(@"TranscribeSamples error: %@", errorMsg);
|
|
358
|
+
reject(@"TRANSCRIBE_ERROR", errorMsg, nil);
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
- (void)setSttConfig:(NSString *)instanceId
|
|
363
|
+
options:(NSDictionary *)options
|
|
364
|
+
resolve:(RCTPromiseResolveBlock)resolve
|
|
365
|
+
reject:(RCTPromiseRejectBlock)reject
|
|
366
|
+
{
|
|
367
|
+
if (instanceId == nil || [instanceId length] == 0) {
|
|
368
|
+
reject(@"CONFIG_ERROR", @"instanceId is required", nil);
|
|
369
|
+
return;
|
|
370
|
+
}
|
|
371
|
+
std::string instanceIdStr = [instanceId UTF8String];
|
|
372
|
+
std::lock_guard<std::mutex> lock(g_stt_mutex);
|
|
373
|
+
auto it = g_stt_instances.find(instanceIdStr);
|
|
374
|
+
if (it == g_stt_instances.end() || it->second->wrapper == nullptr || !it->second->wrapper->isInitialized()) {
|
|
375
|
+
reject(@"CONFIG_ERROR", @"STT not initialized. Call initializeStt first.", nil);
|
|
376
|
+
return;
|
|
377
|
+
}
|
|
378
|
+
sherpaonnx::SttWrapper *wrapper = it->second->wrapper.get();
|
|
379
|
+
@try {
|
|
380
|
+
sherpaonnx::SttRuntimeConfigOptions opts;
|
|
381
|
+
if (options[@"decodingMethod"] != nil) {
|
|
382
|
+
opts.decoding_method = [options[@"decodingMethod"] isKindOfClass:[NSString class]]
|
|
383
|
+
? std::optional<std::string>([(NSString *)options[@"decodingMethod"] UTF8String])
|
|
384
|
+
: std::nullopt;
|
|
385
|
+
}
|
|
386
|
+
if (options[@"maxActivePaths"] != nil) {
|
|
387
|
+
NSNumber *n = options[@"maxActivePaths"];
|
|
388
|
+
if ([n isKindOfClass:[NSNumber class]]) opts.max_active_paths = [n intValue];
|
|
389
|
+
}
|
|
390
|
+
if (options[@"hotwordsFile"] != nil && [options[@"hotwordsFile"] isKindOfClass:[NSString class]]) {
|
|
391
|
+
opts.hotwords_file = [(NSString *)options[@"hotwordsFile"] UTF8String];
|
|
392
|
+
}
|
|
393
|
+
if (options[@"hotwordsScore"] != nil) {
|
|
394
|
+
NSNumber *n = options[@"hotwordsScore"];
|
|
395
|
+
if ([n isKindOfClass:[NSNumber class]]) opts.hotwords_score = [n floatValue];
|
|
396
|
+
}
|
|
397
|
+
if (options[@"blankPenalty"] != nil) {
|
|
398
|
+
NSNumber *n = options[@"blankPenalty"];
|
|
399
|
+
if ([n isKindOfClass:[NSNumber class]]) opts.blank_penalty = [n floatValue];
|
|
400
|
+
}
|
|
401
|
+
if (options[@"ruleFsts"] != nil && [options[@"ruleFsts"] isKindOfClass:[NSString class]]) {
|
|
402
|
+
opts.rule_fsts = [(NSString *)options[@"ruleFsts"] UTF8String];
|
|
403
|
+
}
|
|
404
|
+
if (options[@"ruleFars"] != nil && [options[@"ruleFars"] isKindOfClass:[NSString class]]) {
|
|
405
|
+
opts.rule_fars = [(NSString *)options[@"ruleFars"] UTF8String];
|
|
406
|
+
}
|
|
407
|
+
try {
|
|
408
|
+
wrapper->setConfig(opts);
|
|
409
|
+
resolve(nil);
|
|
410
|
+
} catch (const std::exception& e) {
|
|
411
|
+
NSString *reason = e.what() ? [NSString stringWithUTF8String:e.what()] : @"Unknown error";
|
|
412
|
+
NSString *code = @"CONFIG_ERROR";
|
|
413
|
+
if ([reason hasPrefix:@"HOTWORDS_NOT_SUPPORTED"]) code = @"HOTWORDS_NOT_SUPPORTED";
|
|
414
|
+
else if ([reason hasPrefix:@"INVALID_HOTWORDS_FILE"]) code = @"INVALID_HOTWORDS_FILE";
|
|
415
|
+
RCTLogError(@"setSttConfig: %@", reason);
|
|
416
|
+
reject(code, reason, nil);
|
|
417
|
+
}
|
|
418
|
+
} @catch (NSException *exception) {
|
|
419
|
+
NSString *reason = exception.reason ?: @"";
|
|
420
|
+
NSString *code = @"CONFIG_ERROR";
|
|
421
|
+
if ([reason hasPrefix:@"HOTWORDS_NOT_SUPPORTED"]) code = @"HOTWORDS_NOT_SUPPORTED";
|
|
422
|
+
else if ([reason hasPrefix:@"INVALID_HOTWORDS_FILE"]) code = @"INVALID_HOTWORDS_FILE";
|
|
423
|
+
NSString *errorMsg = [NSString stringWithFormat:@"Exception in setSttConfig: %@", reason];
|
|
424
|
+
RCTLogError(@"%@", errorMsg);
|
|
425
|
+
reject(code, errorMsg, nil);
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
- (void)unloadStt:(NSString *)instanceId
|
|
430
|
+
resolve:(RCTPromiseResolveBlock)resolve
|
|
431
|
+
reject:(RCTPromiseRejectBlock)reject
|
|
432
|
+
{
|
|
433
|
+
if (instanceId == nil || [instanceId length] == 0) {
|
|
434
|
+
resolve(nil);
|
|
435
|
+
return;
|
|
436
|
+
}
|
|
437
|
+
std::string instanceIdStr = [instanceId UTF8String];
|
|
438
|
+
@try {
|
|
439
|
+
std::lock_guard<std::mutex> lock(g_stt_mutex);
|
|
440
|
+
auto it = g_stt_instances.find(instanceIdStr);
|
|
441
|
+
if (it != g_stt_instances.end()) {
|
|
442
|
+
it->second->wrapper->release();
|
|
443
|
+
it->second->wrapper.reset();
|
|
444
|
+
g_stt_instances.erase(it);
|
|
445
|
+
}
|
|
446
|
+
RCTLogInfo(@"STT instance %@ released", instanceId);
|
|
447
|
+
resolve(nil);
|
|
448
|
+
} @catch (NSException *exception) {
|
|
449
|
+
NSString *errorMsg = [NSString stringWithFormat:@"Exception during cleanup: %@", exception.reason];
|
|
450
|
+
RCTLogError(@"%@", errorMsg);
|
|
451
|
+
reject(@"CLEANUP_ERROR", errorMsg, nil);
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
@end
|