react-native-sherpa-onnx 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +84 -77
- package/SherpaOnnx.podspec +79 -45
- package/android/build.gradle +8 -2
- package/android/prebuilt-download.gradle +70 -16
- package/android/prebuilt-versions.gradle +14 -6
- package/android/src/main/cpp/CMakeLists.txt +2 -0
- package/android/src/main/cpp/jni/audio/sherpa-onnx-audio-convert-jni.cpp +202 -328
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.cpp +22 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.h +2 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.cpp +96 -142
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.h +40 -4
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-stt.cpp +774 -316
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +208 -122
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect.h +92 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.cpp +3 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.cpp +14 -2
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-stt.cpp +229 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-stt.h +38 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-tts.cpp +144 -0
- package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-tts.h +38 -0
- package/android/src/main/cpp/jni/module/sherpa-onnx-module-jni.cpp +1 -1
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +157 -11
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxPcmCapture.kt +150 -0
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxSttHelper.kt +75 -24
- package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +52 -1
- package/ios/SherpaOnnx+PcmLiveStream.mm +288 -0
- package/ios/SherpaOnnx+STT.mm +2 -0
- package/ios/SherpaOnnx+TTS.mm +17 -0
- package/ios/SherpaOnnx.mm +27 -3
- package/ios/SherpaOnnxAudioConvert.h +28 -0
- package/ios/SherpaOnnxAudioConvert.mm +698 -0
- package/ios/archive/sherpa-onnx-archive-helper.mm +12 -0
- package/ios/model_detect/sherpa-onnx-model-detect-helper.h +37 -3
- package/ios/model_detect/sherpa-onnx-model-detect-helper.mm +80 -45
- package/ios/model_detect/sherpa-onnx-model-detect-stt.mm +629 -267
- package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +148 -56
- package/ios/model_detect/sherpa-onnx-model-detect.h +72 -0
- package/ios/model_detect/sherpa-onnx-validate-stt.h +38 -0
- package/ios/model_detect/sherpa-onnx-validate-stt.mm +229 -0
- package/ios/model_detect/sherpa-onnx-validate-tts.h +38 -0
- package/ios/model_detect/sherpa-onnx-validate-tts.mm +144 -0
- package/ios/stt/sherpa-onnx-stt-wrapper.mm +4 -0
- package/lib/module/NativeSherpaOnnx.js.map +1 -1
- package/lib/module/audio/index.js +55 -1
- package/lib/module/audio/index.js.map +1 -1
- package/lib/module/download/ModelDownloadManager.js +14 -0
- package/lib/module/download/ModelDownloadManager.js.map +1 -1
- package/lib/module/index.js +10 -0
- package/lib/module/index.js.map +1 -1
- package/lib/module/stt/streaming.js +6 -3
- package/lib/module/stt/streaming.js.map +1 -1
- package/lib/module/tts/index.js +13 -1
- package/lib/module/tts/index.js.map +1 -1
- package/lib/typescript/src/NativeSherpaOnnx.d.ts +32 -3
- package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
- package/lib/typescript/src/audio/index.d.ts +20 -1
- package/lib/typescript/src/audio/index.d.ts.map +1 -1
- package/lib/typescript/src/download/ModelDownloadManager.d.ts +2 -1
- package/lib/typescript/src/download/ModelDownloadManager.d.ts.map +1 -1
- package/lib/typescript/src/index.d.ts +10 -0
- package/lib/typescript/src/index.d.ts.map +1 -1
- package/lib/typescript/src/stt/streaming.d.ts.map +1 -1
- package/lib/typescript/src/stt/streamingTypes.d.ts +1 -1
- package/lib/typescript/src/stt/streamingTypes.d.ts.map +1 -1
- package/lib/typescript/src/tts/index.d.ts +12 -1
- package/lib/typescript/src/tts/index.d.ts.map +1 -1
- package/package.json +6 -1
- package/scripts/check-model-csvs.sh +72 -0
- package/scripts/setup-ios-framework.sh +272 -191
- package/src/NativeSherpaOnnx.ts +37 -3
- package/src/audio/index.ts +84 -1
- package/src/download/ModelDownloadManager.ts +19 -0
- package/src/index.tsx +15 -0
- package/src/stt/streaming.ts +10 -5
- package/src/stt/streamingTypes.ts +1 -1
- package/src/tts/index.ts +25 -1
- package/third_party/ffmpeg_prebuilt/ANDROID_RELEASE_TAG +1 -1
- package/third_party/libarchive_prebuilt/ANDROID_RELEASE_TAG +1 -1
- package/third_party/libarchive_prebuilt/IOS_RELEASE_TAG +1 -1
- package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -1
- package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -1
- package/ios/scripts/patch-libarchive-includes.sh +0 -61
- package/ios/scripts/setup-ios-libarchive.sh +0 -98
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SherpaOnnx+PcmLiveStream.mm
|
|
3
|
+
*
|
|
4
|
+
* Native PCM live capture from the microphone via Audio Queue API (AudioQueueNewInput).
|
|
5
|
+
* Captures at a supported hardware rate (16000, 44100, 48000), resamples to the requested
|
|
6
|
+
* target rate, and emits pcmLiveStreamData at target rate (same behavior as Android).
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
#import "SherpaOnnx.h"
|
|
10
|
+
#import <AVFoundation/AVFoundation.h>
|
|
11
|
+
#import <AudioToolbox/AudioToolbox.h>
|
|
12
|
+
#import <React/RCTLog.h>
|
|
13
|
+
#import <stdlib.h>
|
|
14
|
+
|
|
15
|
+
static const UInt32 kPcmLiveAQNumberBuffers = 3;
|
|
16
|
+
/** Capture sample rates to try in order (match Android CAPTURE_RATES). */
|
|
17
|
+
static const int kPcmLiveCaptureRates[] = { 16000, 44100, 48000 };
|
|
18
|
+
static const size_t kPcmLiveCaptureRatesCount = sizeof(kPcmLiveCaptureRates) / sizeof(kPcmLiveCaptureRates[0]);
|
|
19
|
+
|
|
20
|
+
static NSInteger _pcmLiveTargetSampleRate = 16000;
|
|
21
|
+
static NSInteger _pcmLiveCaptureRate = 16000;
|
|
22
|
+
static __weak SherpaOnnx *_pcmLiveModule = nil;
|
|
23
|
+
static AudioQueueRef _pcmLiveAudioQueue = NULL;
|
|
24
|
+
static AudioQueueBufferRef _pcmLiveAQBuffers[kPcmLiveAQNumberBuffers];
|
|
25
|
+
static volatile BOOL _pcmLiveAQRunning = NO;
|
|
26
|
+
|
|
27
|
+
static void emitPcmChunk(SherpaOnnx *module, const int16_t *samples, NSUInteger count, NSInteger sampleRate) {
|
|
28
|
+
if (!module || count == 0) return;
|
|
29
|
+
// Copy samples into NSData on the AudioQueue callback thread so the data
|
|
30
|
+
// remains valid after the audio buffer is reused.
|
|
31
|
+
NSData *data = [NSData dataWithBytes:samples length:count * sizeof(int16_t)];
|
|
32
|
+
// Dispatch the React Native event emission to the main queue to avoid
|
|
33
|
+
// bridge thread-safety issues.
|
|
34
|
+
dispatch_async(dispatch_get_main_queue(), ^{
|
|
35
|
+
NSString *base64 = [data base64EncodedStringWithOptions:0];
|
|
36
|
+
[module sendEventWithName:@"pcmLiveStreamData"
|
|
37
|
+
body:@{ @"base64Pcm": base64, @"sampleRate": @(sampleRate) }];
|
|
38
|
+
});
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
static void emitPcmError(SherpaOnnx *module, NSString *message) {
|
|
42
|
+
if (!module) return;
|
|
43
|
+
// Dispatch error events to the main queue to match other RN event patterns
|
|
44
|
+
// and avoid bridge thread-safety issues.
|
|
45
|
+
dispatch_async(dispatch_get_main_queue(), ^{
|
|
46
|
+
[module sendEventWithName:@"pcmLiveStreamError" body:@{ @"message": message ?: @"" }];
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/** Resample Int16 PCM from fromRate to toRate using linear interpolation (match Android resampleInt16). */
|
|
51
|
+
static NSUInteger pcmLiveResampleInt16(const int16_t *input, NSUInteger inputFrames,
|
|
52
|
+
int fromRate, int toRate,
|
|
53
|
+
int16_t *output, size_t outputCapacity) {
|
|
54
|
+
if (fromRate == toRate) {
|
|
55
|
+
size_t copy = (inputFrames < outputCapacity) ? inputFrames : outputCapacity;
|
|
56
|
+
memcpy(output, input, copy * sizeof(int16_t));
|
|
57
|
+
return copy;
|
|
58
|
+
}
|
|
59
|
+
double ratio = (double)fromRate / (double)toRate;
|
|
60
|
+
NSUInteger outLength = (NSUInteger)((double)inputFrames / ratio);
|
|
61
|
+
if (outLength > outputCapacity) outLength = outputCapacity;
|
|
62
|
+
if (outLength == 0) return 0;
|
|
63
|
+
for (NSUInteger i = 0; i < outLength; i++) {
|
|
64
|
+
double srcIdx = (double)i * ratio;
|
|
65
|
+
NSUInteger idx0 = (NSUInteger)srcIdx;
|
|
66
|
+
if (idx0 >= inputFrames) idx0 = inputFrames - 1;
|
|
67
|
+
NSUInteger idx1 = idx0 + 1;
|
|
68
|
+
if (idx1 >= inputFrames) idx1 = inputFrames - 1;
|
|
69
|
+
float frac = (float)(srcIdx - (double)idx0);
|
|
70
|
+
int v0 = (int)input[idx0];
|
|
71
|
+
int v1 = (int)input[idx1];
|
|
72
|
+
int v = (int)(v0 + (v1 - v0) * frac);
|
|
73
|
+
if (v < -32768) v = -32768;
|
|
74
|
+
if (v > 32767) v = 32767;
|
|
75
|
+
output[i] = (int16_t)v;
|
|
76
|
+
}
|
|
77
|
+
return outLength;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
static void pcmLiveAQInputCallback(void *inUserData,
|
|
81
|
+
AudioQueueRef inAQ,
|
|
82
|
+
AudioQueueBufferRef inBuffer,
|
|
83
|
+
const AudioTimeStamp *inStartTime,
|
|
84
|
+
UInt32 inNumPackets,
|
|
85
|
+
const AudioStreamPacketDescription *inPacketDesc) {
|
|
86
|
+
(void)inUserData;
|
|
87
|
+
(void)inStartTime;
|
|
88
|
+
(void)inNumPackets;
|
|
89
|
+
(void)inPacketDesc;
|
|
90
|
+
if (!_pcmLiveAQRunning) return;
|
|
91
|
+
SherpaOnnx *module = _pcmLiveModule;
|
|
92
|
+
if (!module) return;
|
|
93
|
+
UInt32 byteSize = inBuffer->mAudioDataByteSize;
|
|
94
|
+
if (byteSize == 0) {
|
|
95
|
+
AudioQueueEnqueueBuffer(inAQ, inBuffer, 0, NULL);
|
|
96
|
+
return;
|
|
97
|
+
}
|
|
98
|
+
const int16_t *samples = (const int16_t *)inBuffer->mAudioData;
|
|
99
|
+
NSUInteger count = byteSize / sizeof(int16_t);
|
|
100
|
+
NSInteger targetRate = _pcmLiveTargetSampleRate;
|
|
101
|
+
NSInteger captureRate = _pcmLiveCaptureRate;
|
|
102
|
+
|
|
103
|
+
if (captureRate == targetRate) {
|
|
104
|
+
emitPcmChunk(module, samples, count, targetRate);
|
|
105
|
+
} else {
|
|
106
|
+
// Compute an upper bound on the number of output frames for resampling.
|
|
107
|
+
NSUInteger maxOutFrames =
|
|
108
|
+
(count * (NSUInteger)targetRate + (NSUInteger)captureRate - 1) /
|
|
109
|
+
(NSUInteger)captureRate;
|
|
110
|
+
if (maxOutFrames == 0) {
|
|
111
|
+
AudioQueueEnqueueBuffer(inAQ, inBuffer, 0, NULL);
|
|
112
|
+
return;
|
|
113
|
+
}
|
|
114
|
+
int16_t *resampleBuf = (int16_t *)malloc(maxOutFrames * sizeof(int16_t));
|
|
115
|
+
if (resampleBuf == NULL) {
|
|
116
|
+
emitPcmError(module, @"Failed to allocate resample buffer");
|
|
117
|
+
AudioQueueEnqueueBuffer(inAQ, inBuffer, 0, NULL);
|
|
118
|
+
return;
|
|
119
|
+
}
|
|
120
|
+
NSUInteger outFrames = pcmLiveResampleInt16(samples, count,
|
|
121
|
+
(int)captureRate, (int)targetRate,
|
|
122
|
+
resampleBuf, maxOutFrames);
|
|
123
|
+
if (outFrames > 0)
|
|
124
|
+
emitPcmChunk(module, resampleBuf, outFrames, targetRate);
|
|
125
|
+
free(resampleBuf);
|
|
126
|
+
}
|
|
127
|
+
AudioQueueEnqueueBuffer(inAQ, inBuffer, 0, NULL);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
static void pcmLiveStopQueue(void) {
|
|
131
|
+
if (_pcmLiveAudioQueue == NULL) return;
|
|
132
|
+
_pcmLiveAQRunning = NO;
|
|
133
|
+
AudioQueueStop(_pcmLiveAudioQueue, true);
|
|
134
|
+
for (UInt32 i = 0; i < kPcmLiveAQNumberBuffers; i++) {
|
|
135
|
+
if (_pcmLiveAQBuffers[i] != NULL) {
|
|
136
|
+
AudioQueueFreeBuffer(_pcmLiveAudioQueue, _pcmLiveAQBuffers[i]);
|
|
137
|
+
_pcmLiveAQBuffers[i] = NULL;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
AudioQueueDispose(_pcmLiveAudioQueue, true);
|
|
141
|
+
_pcmLiveAudioQueue = NULL;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
@implementation SherpaOnnx (PcmLiveStream)
|
|
145
|
+
|
|
146
|
+
- (void)startPcmLiveStream:(id __unsafe_unretained)optionsArg
|
|
147
|
+
resolve:(RCTPromiseResolveBlock)resolve
|
|
148
|
+
reject:(RCTPromiseRejectBlock)reject
|
|
149
|
+
{
|
|
150
|
+
int targetRate = 16000;
|
|
151
|
+
UInt32 bufferSizeFrames = 0;
|
|
152
|
+
|
|
153
|
+
// Parse optionsArg coming from JS (fallback / non-codegen path).
|
|
154
|
+
if ([optionsArg isKindOfClass:[NSDictionary class]]) {
|
|
155
|
+
NSDictionary *dict = (NSDictionary *)optionsArg;
|
|
156
|
+
|
|
157
|
+
id sampleRateValue = dict[@"sampleRate"];
|
|
158
|
+
if ([sampleRateValue respondsToSelector:@selector(intValue)]) {
|
|
159
|
+
int v = (int)[sampleRateValue intValue];
|
|
160
|
+
if (v > 0) targetRate = v;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
id bufferSizeValue = dict[@"bufferSizeFrames"];
|
|
164
|
+
if ([bufferSizeValue respondsToSelector:@selector(doubleValue)]) {
|
|
165
|
+
double v = [bufferSizeValue doubleValue];
|
|
166
|
+
if (v > 0) bufferSizeFrames = (UInt32)v;
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
[self _startPcmLiveStreamWithTargetRate:targetRate bufferSizeFrames:bufferSizeFrames resolve:resolve reject:reject];
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
#if __has_include(<SherpaOnnxSpec/SherpaOnnxSpec.h>)
|
|
174
|
+
- (void)startPcmLiveStreamWithOptions:(JS::NativeSherpaOnnx::SpecStartPcmLiveStreamOptions &)options
|
|
175
|
+
resolve:(RCTPromiseResolveBlock)resolve
|
|
176
|
+
reject:(RCTPromiseRejectBlock)reject
|
|
177
|
+
{
|
|
178
|
+
int targetRate = 16000;
|
|
179
|
+
if (options.sampleRate()) {
|
|
180
|
+
targetRate = (int)options.sampleRate();
|
|
181
|
+
if (targetRate <= 0) targetRate = 16000;
|
|
182
|
+
}
|
|
183
|
+
UInt32 bufferSizeFrames = 0;
|
|
184
|
+
if (options.bufferSizeFrames().has_value()) {
|
|
185
|
+
double v = options.bufferSizeFrames().value();
|
|
186
|
+
if (v > 0) bufferSizeFrames = (UInt32)v;
|
|
187
|
+
}
|
|
188
|
+
[self _startPcmLiveStreamWithTargetRate:targetRate bufferSizeFrames:bufferSizeFrames resolve:resolve reject:reject];
|
|
189
|
+
}
|
|
190
|
+
#endif
|
|
191
|
+
|
|
192
|
+
- (void)_startPcmLiveStreamWithTargetRate:(int)targetRate
|
|
193
|
+
bufferSizeFrames:(UInt32)bufferSizeFrames
|
|
194
|
+
resolve:(RCTPromiseResolveBlock)resolve
|
|
195
|
+
reject:(RCTPromiseRejectBlock)reject
|
|
196
|
+
{
|
|
197
|
+
pcmLiveStopQueue();
|
|
198
|
+
|
|
199
|
+
_pcmLiveTargetSampleRate = targetRate;
|
|
200
|
+
_pcmLiveModule = self;
|
|
201
|
+
|
|
202
|
+
NSError *error = nil;
|
|
203
|
+
AVAudioSession *session = [AVAudioSession sharedInstance];
|
|
204
|
+
if (![session setCategory:AVAudioSessionCategoryPlayAndRecord
|
|
205
|
+
mode:AVAudioSessionModeDefault
|
|
206
|
+
options:AVAudioSessionCategoryOptionDefaultToSpeaker | AVAudioSessionCategoryOptionAllowBluetooth
|
|
207
|
+
error:&error]) {
|
|
208
|
+
RCTLog(@"%@", [NSString stringWithFormat:@"[SherpaOnnx PcmLive] setCategory error: %@", error]);
|
|
209
|
+
reject(@"PCM_LIVE_STREAM_ERROR", error.localizedDescription ?: @"Failed to set audio session", error);
|
|
210
|
+
return;
|
|
211
|
+
}
|
|
212
|
+
if (![session setActive:YES withOptions:0 error:&error]) {
|
|
213
|
+
RCTLog(@"%@", [NSString stringWithFormat:@"[SherpaOnnx PcmLive] setActive error: %@", error]);
|
|
214
|
+
reject(@"PCM_LIVE_STREAM_ERROR", error.localizedDescription ?: @"Failed to activate audio session", error);
|
|
215
|
+
return;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
AudioStreamBasicDescription fmt;
|
|
219
|
+
memset(&fmt, 0, sizeof(fmt));
|
|
220
|
+
fmt.mFormatID = kAudioFormatLinearPCM;
|
|
221
|
+
fmt.mFormatFlags = kLinearPCMFormatFlagIsSignedInteger | kLinearPCMFormatFlagIsPacked;
|
|
222
|
+
fmt.mChannelsPerFrame = 1;
|
|
223
|
+
fmt.mBitsPerChannel = 16;
|
|
224
|
+
fmt.mBytesPerPacket = 2;
|
|
225
|
+
fmt.mBytesPerFrame = 2;
|
|
226
|
+
fmt.mFramesPerPacket = 1;
|
|
227
|
+
|
|
228
|
+
OSStatus status = noErr;
|
|
229
|
+
int chosenCaptureRate = 16000;
|
|
230
|
+
for (size_t r = 0; r < kPcmLiveCaptureRatesCount; r++) {
|
|
231
|
+
chosenCaptureRate = kPcmLiveCaptureRates[r];
|
|
232
|
+
fmt.mSampleRate = (Float64)chosenCaptureRate;
|
|
233
|
+
status = AudioQueueNewInput(&fmt, pcmLiveAQInputCallback, NULL, NULL, NULL, 0, &_pcmLiveAudioQueue);
|
|
234
|
+
if (status == noErr) break;
|
|
235
|
+
_pcmLiveAudioQueue = NULL;
|
|
236
|
+
}
|
|
237
|
+
if (status != noErr || _pcmLiveAudioQueue == NULL) {
|
|
238
|
+
[session setActive:NO withOptions:0 error:nil];
|
|
239
|
+
reject(@"PCM_LIVE_STREAM_ERROR", [NSString stringWithFormat:@"AudioQueueNewInput failed for all rates (last: %d)", (int)status], nil);
|
|
240
|
+
return;
|
|
241
|
+
}
|
|
242
|
+
_pcmLiveCaptureRate = chosenCaptureRate;
|
|
243
|
+
|
|
244
|
+
UInt32 bufferByteSize = 2048;
|
|
245
|
+
if (bufferSizeFrames > 0) {
|
|
246
|
+
bufferByteSize = bufferSizeFrames * 2; /* 16-bit mono */
|
|
247
|
+
if (bufferByteSize < 1024) bufferByteSize = 1024;
|
|
248
|
+
if (bufferByteSize > 32768) bufferByteSize = 32768;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
for (UInt32 i = 0; i < kPcmLiveAQNumberBuffers; i++) {
|
|
252
|
+
status = AudioQueueAllocateBuffer(_pcmLiveAudioQueue, bufferByteSize, &_pcmLiveAQBuffers[i]);
|
|
253
|
+
if (status != noErr) {
|
|
254
|
+
pcmLiveStopQueue();
|
|
255
|
+
[session setActive:NO withOptions:0 error:nil];
|
|
256
|
+
reject(@"PCM_LIVE_STREAM_ERROR", [NSString stringWithFormat:@"AudioQueueAllocateBuffer failed: %d", (int)status], nil);
|
|
257
|
+
return;
|
|
258
|
+
}
|
|
259
|
+
AudioQueueEnqueueBuffer(_pcmLiveAudioQueue, _pcmLiveAQBuffers[i], 0, NULL);
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
_pcmLiveAQRunning = YES;
|
|
263
|
+
status = AudioQueueStart(_pcmLiveAudioQueue, NULL);
|
|
264
|
+
if (status != noErr) {
|
|
265
|
+
pcmLiveStopQueue();
|
|
266
|
+
[session setActive:NO withOptions:0 error:nil];
|
|
267
|
+
reject(@"PCM_LIVE_STREAM_ERROR", [NSString stringWithFormat:@"AudioQueueStart failed: %d", (int)status], nil);
|
|
268
|
+
return;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
resolve(nil);
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
- (void)stopPcmLiveStream:(RCTPromiseResolveBlock)resolve
|
|
275
|
+
reject:(RCTPromiseRejectBlock)reject
|
|
276
|
+
{
|
|
277
|
+
[self stopPcmLiveStreamWithResolve:resolve reject:reject];
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
- (void)stopPcmLiveStreamWithResolve:(RCTPromiseResolveBlock)resolve
|
|
281
|
+
reject:(RCTPromiseRejectBlock)reject
|
|
282
|
+
{
|
|
283
|
+
pcmLiveStopQueue();
|
|
284
|
+
[[AVAudioSession sharedInstance] setActive:NO withOptions:AVAudioSessionSetActiveOptionNotifyOthersOnDeactivation error:nil];
|
|
285
|
+
resolve(nil);
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
@end
|
package/ios/SherpaOnnx+STT.mm
CHANGED
|
@@ -38,6 +38,7 @@ static NSString *sttModelKindToNSString(sherpaonnx::SttModelKind kind) {
|
|
|
38
38
|
case K::kFunAsrNano: return @"funasr_nano";
|
|
39
39
|
case K::kFireRedAsr: return @"fire_red_asr";
|
|
40
40
|
case K::kMoonshine: return @"moonshine";
|
|
41
|
+
case K::kMoonshineV2: return @"moonshine_v2";
|
|
41
42
|
case K::kDolphin: return @"dolphin";
|
|
42
43
|
case K::kCanary: return @"canary";
|
|
43
44
|
case K::kOmnilingual: return @"omnilingual";
|
|
@@ -268,6 +269,7 @@ static NSDictionary *sttResultToDict(const sherpaonnx::SttRecognitionResult& r)
|
|
|
268
269
|
|
|
269
270
|
NSMutableDictionary *resultDict = [NSMutableDictionary dictionary];
|
|
270
271
|
resultDict[@"success"] = @(result.ok);
|
|
272
|
+
resultDict[@"isHardwareSpecificUnsupported"] = @(result.isHardwareSpecificUnsupported);
|
|
271
273
|
if (!result.error.empty()) {
|
|
272
274
|
resultDict[@"error"] = [NSString stringWithUTF8String:result.error.c_str()];
|
|
273
275
|
}
|
package/ios/SherpaOnnx+TTS.mm
CHANGED
|
@@ -232,6 +232,13 @@ std::vector<std::string> SplitTtsTokens(const std::string &text) {
|
|
|
232
232
|
}
|
|
233
233
|
resultDict[@"detectedModels"] = detectedModelsArray;
|
|
234
234
|
resultDict[@"modelType"] = ttsModelKindToNSString(result.selectedKind);
|
|
235
|
+
if (!result.lexiconLanguageCandidates.empty()) {
|
|
236
|
+
NSMutableArray *langCandidates = [NSMutableArray array];
|
|
237
|
+
for (const auto& id : result.lexiconLanguageCandidates) {
|
|
238
|
+
[langCandidates addObject:[NSString stringWithUTF8String:id.c_str()]];
|
|
239
|
+
}
|
|
240
|
+
resultDict[@"lexiconLanguageCandidates"] = langCandidates;
|
|
241
|
+
}
|
|
235
242
|
resolve(resultDict);
|
|
236
243
|
} @catch (NSException *exception) {
|
|
237
244
|
NSString *errorMsg = [NSString stringWithFormat:@"TTS model detection failed: %@", exception.reason];
|
|
@@ -1041,6 +1048,16 @@ std::vector<std::string> SplitTtsTokens(const std::string &text) {
|
|
|
1041
1048
|
}
|
|
1042
1049
|
}
|
|
1043
1050
|
|
|
1051
|
+
- (void)copyFileToContentUri:(NSString *)filePath
|
|
1052
|
+
directoryUri:(NSString *)directoryUri
|
|
1053
|
+
filename:(NSString *)filename
|
|
1054
|
+
mimeType:(NSString *)mimeType
|
|
1055
|
+
resolve:(RCTPromiseResolveBlock)resolve
|
|
1056
|
+
reject:(RCTPromiseRejectBlock)reject
|
|
1057
|
+
{
|
|
1058
|
+
reject(@"TTS_SAVE_ERROR", @"Copy file to content URI is not supported on iOS (Android SAF only)", nil);
|
|
1059
|
+
}
|
|
1060
|
+
|
|
1044
1061
|
- (void)saveTtsTextToContentUri:(NSString *)text
|
|
1045
1062
|
directoryUri:(NSString *)directoryUri
|
|
1046
1063
|
filename:(NSString *)filename
|
package/ios/SherpaOnnx.mm
CHANGED
|
@@ -11,6 +11,8 @@
|
|
|
11
11
|
#import "SherpaOnnx+Assets.h"
|
|
12
12
|
#import "sherpa-onnx-archive-helper.h"
|
|
13
13
|
#import <React/RCTLog.h>
|
|
14
|
+
#import <AVFoundation/AVFoundation.h>
|
|
15
|
+
#import "SherpaOnnxAudioConvert.h"
|
|
14
16
|
#if __has_include("SherpaOnnx-Swift.h")
|
|
15
17
|
#import "SherpaOnnx-Swift.h"
|
|
16
18
|
#endif
|
|
@@ -36,7 +38,7 @@
|
|
|
36
38
|
|
|
37
39
|
- (NSArray<NSString *> *)supportedEvents
|
|
38
40
|
{
|
|
39
|
-
return @[ @"ttsStreamChunk", @"ttsStreamEnd", @"ttsStreamError", @"extractTarBz2Progress" ];
|
|
41
|
+
return @[ @"ttsStreamChunk", @"ttsStreamEnd", @"ttsStreamError", @"extractTarBz2Progress", @"pcmLiveStreamData", @"pcmLiveStreamError" ];
|
|
40
42
|
}
|
|
41
43
|
|
|
42
44
|
- (void)resolveModelPath:(JS::NativeSherpaOnnx::SpecResolveModelPathConfig &)config
|
|
@@ -93,6 +95,12 @@
|
|
|
93
95
|
resolve(@{ @"providerCompiled": @NO, @"hasAccelerator": @NO, @"canInit": @NO });
|
|
94
96
|
}
|
|
95
97
|
|
|
98
|
+
- (void)getDeviceQnnSoc:(RCTPromiseResolveBlock)resolve
|
|
99
|
+
reject:(RCTPromiseRejectBlock)reject
|
|
100
|
+
{
|
|
101
|
+
resolve(@{ @"soc": [NSNull null], @"isSupported": @NO });
|
|
102
|
+
}
|
|
103
|
+
|
|
96
104
|
// NNAPI is Android-only; on iOS we always return no support.
|
|
97
105
|
- (void)getNnapiSupport:(NSString *)modelBase64
|
|
98
106
|
resolve:(RCTPromiseResolveBlock)resolve
|
|
@@ -183,7 +191,16 @@
|
|
|
183
191
|
resolve:(RCTPromiseResolveBlock)resolve
|
|
184
192
|
reject:(RCTPromiseRejectBlock)reject
|
|
185
193
|
{
|
|
186
|
-
|
|
194
|
+
NSError *error = nil;
|
|
195
|
+
if (![SherpaOnnxAudioConvert convertAudioToFormat:inputPath
|
|
196
|
+
outputPath:outputPath
|
|
197
|
+
format:format
|
|
198
|
+
outputSampleRateHz:outputSampleRateHz.intValue
|
|
199
|
+
error:&error]) {
|
|
200
|
+
reject(@"CONVERT_ERROR", error ? error.localizedDescription : @"Conversion failed", error);
|
|
201
|
+
return;
|
|
202
|
+
}
|
|
203
|
+
resolve(nil);
|
|
187
204
|
}
|
|
188
205
|
|
|
189
206
|
- (void)convertAudioToWav16k:(NSString *)inputPath
|
|
@@ -191,7 +208,14 @@
|
|
|
191
208
|
resolve:(RCTPromiseResolveBlock)resolve
|
|
192
209
|
reject:(RCTPromiseRejectBlock)reject
|
|
193
210
|
{
|
|
194
|
-
|
|
211
|
+
NSError *error = nil;
|
|
212
|
+
if (![SherpaOnnxAudioConvert convertAudioToWav16k:inputPath
|
|
213
|
+
outputPath:outputPath
|
|
214
|
+
error:&error]) {
|
|
215
|
+
reject(@"CONVERT_ERROR", error ? error.localizedDescription : @"Conversion to WAV 16kHz mono failed", error);
|
|
216
|
+
return;
|
|
217
|
+
}
|
|
218
|
+
resolve(nil);
|
|
195
219
|
}
|
|
196
220
|
|
|
197
221
|
- (void)getAvailableProviders:(RCTPromiseResolveBlock)resolve
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
#import <Foundation/Foundation.h>
|
|
2
|
+
|
|
3
|
+
NS_ASSUME_NONNULL_BEGIN
|
|
4
|
+
|
|
5
|
+
@interface SherpaOnnxAudioConvert : NSObject
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Converts any supported audio file to 16 kHz mono 16-bit PCM WAV.
|
|
9
|
+
* Returns YES on success, NO on failure. Populates `error` on failure.
|
|
10
|
+
*/
|
|
11
|
+
+ (BOOL)convertAudioToWav16k:(NSString *)inputPath
|
|
12
|
+
outputPath:(NSString *)outputPath
|
|
13
|
+
error:(NSError **)error;
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Converts arbitrary audio file to requested format (e.g. "mp3", "flac", "wav").
|
|
17
|
+
* outputSampleRateHz is mostly used for MP3 encoding.
|
|
18
|
+
* Returns YES on success, NO on failure. Populates `error` on failure.
|
|
19
|
+
*/
|
|
20
|
+
+ (BOOL)convertAudioToFormat:(NSString *)inputPath
|
|
21
|
+
outputPath:(NSString *)outputPath
|
|
22
|
+
format:(NSString *)format
|
|
23
|
+
outputSampleRateHz:(int)outputSampleRateHz
|
|
24
|
+
error:(NSError **)error;
|
|
25
|
+
|
|
26
|
+
@end
|
|
27
|
+
|
|
28
|
+
NS_ASSUME_NONNULL_END
|