react-native-sherpa-onnx 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +28 -15
  2. package/SherpaOnnx.podspec +13 -5
  3. package/android/prebuilt-download.gradle +18 -5
  4. package/android/prebuilt-versions.gradle +8 -4
  5. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.cpp +43 -142
  6. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.h +12 -4
  7. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-stt.cpp +694 -307
  8. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +194 -99
  9. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect.h +90 -0
  10. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.cpp +3 -0
  11. package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +70 -0
  12. package/android/src/main/java/com/sherpaonnx/SherpaOnnxPcmCapture.kt +150 -0
  13. package/android/src/main/java/com/sherpaonnx/SherpaOnnxSttHelper.kt +39 -19
  14. package/ios/SherpaOnnx+PcmLiveStream.mm +288 -0
  15. package/ios/SherpaOnnx+STT.mm +2 -0
  16. package/ios/SherpaOnnx.mm +1 -1
  17. package/ios/model_detect/sherpa-onnx-model-detect-helper.h +9 -3
  18. package/ios/model_detect/sherpa-onnx-model-detect-helper.mm +38 -54
  19. package/ios/model_detect/sherpa-onnx-model-detect-stt.mm +620 -267
  20. package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +131 -28
  21. package/ios/model_detect/sherpa-onnx-model-detect.h +70 -0
  22. package/ios/stt/sherpa-onnx-stt-wrapper.mm +4 -0
  23. package/lib/module/NativeSherpaOnnx.js.map +1 -1
  24. package/lib/module/audio/index.js +52 -0
  25. package/lib/module/audio/index.js.map +1 -1
  26. package/lib/module/stt/streaming.js +6 -3
  27. package/lib/module/stt/streaming.js.map +1 -1
  28. package/lib/typescript/src/NativeSherpaOnnx.d.ts +16 -2
  29. package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
  30. package/lib/typescript/src/audio/index.d.ts +17 -0
  31. package/lib/typescript/src/audio/index.d.ts.map +1 -1
  32. package/lib/typescript/src/stt/streaming.d.ts.map +1 -1
  33. package/lib/typescript/src/stt/streamingTypes.d.ts +1 -1
  34. package/lib/typescript/src/stt/streamingTypes.d.ts.map +1 -1
  35. package/package.json +6 -1
  36. package/scripts/check-model-csvs.sh +72 -0
  37. package/scripts/setup-ios-framework.sh +48 -48
  38. package/src/NativeSherpaOnnx.ts +18 -2
  39. package/src/audio/index.ts +81 -0
  40. package/src/stt/streaming.ts +10 -5
  41. package/src/stt/streamingTypes.ts +1 -1
  42. package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -1
  43. package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -1
@@ -0,0 +1,150 @@
1
+ package com.sherpaonnx
2
+
3
+ import android.media.AudioFormat
4
+ import android.media.AudioRecord
5
+ import android.media.MediaRecorder
6
+ import android.util.Base64
7
+ import android.util.Log
8
+ import java.nio.ByteBuffer
9
+ import java.nio.ByteOrder
10
+ import kotlin.concurrent.thread
11
+ import kotlin.math.round
12
+
13
+ /**
14
+ * Native PCM capture from the microphone with optional resampling to a target sample rate.
15
+ * Captures at a supported hardware rate (e.g. 44100 or 48000 Hz), then resamples to the
16
+ * requested rate so the app always receives PCM at the same sample rate (e.g. 16000 for STT).
17
+ */
18
+ class SherpaOnnxPcmCapture(
19
+ private val targetSampleRate: Int,
20
+ private val channelCount: Int,
21
+ private val bufferSizeFrames: Int,
22
+ private val onChunk: (base64Pcm: String, sampleRate: Int) -> Unit,
23
+ private val onError: (message: String) -> Unit,
24
+ private val logTag: String = "SherpaOnnxPcmCapture"
25
+ ) {
26
+ private var audioRecord: AudioRecord? = null
27
+ @Volatile
28
+ private var running = false
29
+ private var captureThread: Thread? = null
30
+
31
+ companion object {
32
+ /** Supported capture sample rates to try in order (device-dependent). */
33
+ private val CAPTURE_RATES = intArrayOf(16000, 44100, 48000)
34
+
35
+ /**
36
+ * Resample Int16 PCM from capture rate to target rate using linear interpolation.
37
+ * Returns a new ByteArray of Int16 samples at target rate.
38
+ */
39
+ private fun resampleInt16(
40
+ input: ShortArray,
41
+ fromRate: Int,
42
+ toRate: Int
43
+ ): ShortArray {
44
+ if (fromRate == toRate) return input
45
+ val ratio = fromRate.toDouble() / toRate
46
+ val outLength = round(input.size / ratio).toInt().coerceAtLeast(0)
47
+ val result = ShortArray(outLength)
48
+ for (i in 0 until outLength) {
49
+ val srcIdx = i * ratio
50
+ val idx0 = srcIdx.toInt().coerceIn(0, input.size - 1)
51
+ val idx1 = (idx0 + 1).coerceAtMost(input.size - 1)
52
+ val frac = (srcIdx - idx0).toFloat()
53
+ val v0 = input[idx0].toInt()
54
+ val v1 = input[idx1].toInt()
55
+ result[i] = (v0 + (v1 - v0) * frac).toInt().toShort()
56
+ }
57
+ return result
58
+ }
59
+ }
60
+
61
+ /**
62
+ * Start capture. Uses a supported hardware rate and resamples to [targetSampleRate] before emitting.
63
+ */
64
+ fun start() {
65
+ if (running) {
66
+ Log.w(logTag, "start: already running")
67
+ return
68
+ }
69
+ val bufferSizeBytes = if (bufferSizeFrames > 0) {
70
+ bufferSizeFrames * 2 // 2 bytes per sample (16-bit mono)
71
+ } else {
72
+ (0.1 * targetSampleRate).toInt() * 2 // 0.1 s default (16-bit mono)
73
+ }
74
+ val captureRate = CAPTURE_RATES.firstOrNull { rate ->
75
+ val size = AudioRecord.getMinBufferSize(rate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT)
76
+ size != AudioRecord.ERROR && size != AudioRecord.ERROR_BAD_VALUE
77
+ } ?: 44100
78
+ val minBuf = AudioRecord.getMinBufferSize(captureRate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT)
79
+ val bufSize = minBuf.coerceAtLeast(bufferSizeBytes)
80
+ val record = try {
81
+ AudioRecord(
82
+ MediaRecorder.AudioSource.VOICE_RECOGNITION,
83
+ captureRate,
84
+ AudioFormat.CHANNEL_IN_MONO,
85
+ AudioFormat.ENCODING_PCM_16BIT,
86
+ bufSize
87
+ )
88
+ } catch (e: SecurityException) {
89
+ Log.e(logTag, "start: RECORD_AUDIO permission not granted", e)
90
+ onError("RECORD_AUDIO permission not granted")
91
+ return
92
+ }
93
+ if (record.state != AudioRecord.STATE_INITIALIZED) {
94
+ Log.e(logTag, "start: AudioRecord not initialized")
95
+ onError("AudioRecord failed to initialize")
96
+ record.release()
97
+ return
98
+ }
99
+ audioRecord = record
100
+ running = true
101
+ captureThread = thread(name = "SherpaOnnxPcmCapture") {
102
+ val shortBuf = ShortArray(bufSize / 2)
103
+ try {
104
+ record.startRecording()
105
+ while (running && record.recordingState == AudioRecord.RECORDSTATE_RECORDING) {
106
+ val read = record.read(shortBuf, 0, shortBuf.size)
107
+ if (read <= 0) continue
108
+ val chunk = shortBuf.copyOf(read)
109
+ val toEmit = if (captureRate != targetSampleRate) {
110
+ resampleInt16(chunk, captureRate, targetSampleRate)
111
+ } else {
112
+ chunk
113
+ }
114
+ val byteBuf = ByteBuffer.allocate(toEmit.size * 2).order(ByteOrder.LITTLE_ENDIAN)
115
+ for (s in toEmit) byteBuf.putShort(s)
116
+ val base64 = Base64.encodeToString(byteBuf.array(), Base64.NO_WRAP)
117
+ onChunk(base64, targetSampleRate)
118
+ }
119
+ } catch (e: Exception) {
120
+ if (running) {
121
+ Log.e(logTag, "Capture thread error", e)
122
+ onError(e.message ?: "Capture error")
123
+ }
124
+ } finally {
125
+ try {
126
+ record.stop()
127
+ } catch (_: Exception) { }
128
+ record.release()
129
+ audioRecord = null
130
+ }
131
+ }
132
+ }
133
+
134
+ /** Stop capture and release resources. */
135
+ fun stop() {
136
+ running = false
137
+ // Actively stop AudioRecord to unblock any pending read()
138
+ val record = audioRecord
139
+ if (record != null) {
140
+ try {
141
+ record.stop()
142
+ } catch (_: Exception) {
143
+ // Ignore; the capture thread's finally block also handles stop/release safely
144
+ }
145
+ }
146
+ captureThread?.join(2000)
147
+ captureThread = null
148
+ audioRecord = null
149
+ }
150
+ }
@@ -2,6 +2,7 @@ package com.sherpaonnx
2
2
 
3
3
  import android.content.Context
4
4
  import android.net.Uri
5
+ import android.os.HandlerThread
5
6
  import android.util.Log
6
7
  import com.facebook.react.bridge.Arguments
7
8
  import com.facebook.react.bridge.Promise
@@ -51,6 +52,9 @@ internal class SherpaOnnxSttHelper(
51
52
 
52
53
  private val instances = ConcurrentHashMap<String, SttEngineInstance>()
53
54
 
55
+ private val initThread = HandlerThread("stt-init").also { it.start() }
56
+ private val initHandler = android.os.Handler(initThread.looper)
57
+
54
58
  private fun getInstance(instanceId: String): SttEngineInstance? = instances[instanceId]
55
59
 
56
60
  /** Hotwords are supported for transducer and NeMo transducer models (sherpa-onnx; NeMo: https://github.com/k2-fsa/sherpa-onnx/pull/3077). */
@@ -277,26 +281,33 @@ internal class SherpaOnnxSttHelper(
277
281
  )
278
282
  inst.lastRecognizerConfig = config
279
283
  inst.currentSttModelType = modelTypeStr
280
- inst.recognizer = OfflineRecognizer(config = config)
281
-
282
-
283
-
284
- val resultMap = Arguments.createMap()
285
- resultMap.putBoolean("success", true)
286
- resultMap.putString("modelType", modelTypeStr)
287
- resultMap.putString("decodingMethod", config.decodingMethod)
288
- val detectedModelsArray = Arguments.createArray()
289
- for (model in detectedModels) {
290
- val modelMap = model as? HashMap<*, *>
291
- if (modelMap != null) {
292
- val modelResultMap = Arguments.createMap()
293
- modelResultMap.putString("type", modelMap["type"] as? String ?: "")
294
- modelResultMap.putString("modelDir", modelMap["modelDir"] as? String ?: "")
295
- detectedModelsArray.pushMap(modelResultMap)
284
+ // Defer recognizer creation to the dedicated background thread so release() of the previous
285
+ // recognizer can complete off the UI thread (avoids "destroyed mutex" / SIGSEGV when switching models).
286
+ initHandler.post {
287
+ try {
288
+ inst.recognizer = OfflineRecognizer(config = config)
289
+ val resultMap = Arguments.createMap()
290
+ resultMap.putBoolean("success", true)
291
+ resultMap.putString("modelType", modelTypeStr)
292
+ resultMap.putString("decodingMethod", config.decodingMethod)
293
+ val detectedModelsArray = Arguments.createArray()
294
+ for (model in detectedModels) {
295
+ val modelMap = model as? HashMap<*, *>
296
+ if (modelMap != null) {
297
+ val modelResultMap = Arguments.createMap()
298
+ modelResultMap.putString("type", modelMap["type"] as? String ?: "")
299
+ modelResultMap.putString("modelDir", modelMap["modelDir"] as? String ?: "")
300
+ detectedModelsArray.pushMap(modelResultMap)
301
+ }
302
+ }
303
+ resultMap.putArray("detectedModels", detectedModelsArray)
304
+ promise.resolve(resultMap)
305
+ } catch (e: Exception) {
306
+ val errorMsg = "Exception creating recognizer: ${e.message ?: e.javaClass.simpleName}"
307
+ Log.e(logTag, errorMsg, e)
308
+ promise.reject("INIT_ERROR", errorMsg, e)
296
309
  }
297
310
  }
298
- resultMap.putArray("detectedModels", detectedModelsArray)
299
- promise.resolve(resultMap)
300
311
  } catch (e: Exception) {
301
312
  val errorMsg = "Exception during initialization: ${e.message ?: e.javaClass.simpleName}"
302
313
  Log.e(logTag, errorMsg, e)
@@ -588,7 +599,16 @@ internal class SherpaOnnxSttHelper(
588
599
  preprocessor = path(paths, "moonshinePreprocessor"),
589
600
  encoder = path(paths, "moonshineEncoder"),
590
601
  uncachedDecoder = path(paths, "moonshineUncachedDecoder"),
591
- cachedDecoder = path(paths, "moonshineCachedDecoder")
602
+ cachedDecoder = path(paths, "moonshineCachedDecoder"),
603
+ mergedDecoder = ""
604
+ ),
605
+ tokens = path(paths, "tokens"),
606
+ modelType = "moonshine"
607
+ )
608
+ "moonshine_v2" -> OfflineModelConfig(
609
+ moonshine = OfflineMoonshineModelConfig(
610
+ encoder = path(paths, "moonshineEncoder"),
611
+ mergedDecoder = path(paths, "moonshineMergedDecoder")
592
612
  ),
593
613
  tokens = path(paths, "tokens"),
594
614
  modelType = "moonshine"
@@ -0,0 +1,288 @@
1
+ /**
2
+ * SherpaOnnx+PcmLiveStream.mm
3
+ *
4
+ * Native PCM live capture from the microphone via Audio Queue API (AudioQueueNewInput).
5
+ * Captures at a supported hardware rate (16000, 44100, 48000), resamples to the requested
6
+ * target rate, and emits pcmLiveStreamData at target rate (same behavior as Android).
7
+ */
8
+
9
+ #import "SherpaOnnx.h"
10
+ #import <AVFoundation/AVFoundation.h>
11
+ #import <AudioToolbox/AudioToolbox.h>
12
+ #import <React/RCTLog.h>
13
+ #import <stdlib.h>
14
+
15
+ static const UInt32 kPcmLiveAQNumberBuffers = 3;
16
+ /** Capture sample rates to try in order (match Android CAPTURE_RATES). */
17
+ static const int kPcmLiveCaptureRates[] = { 16000, 44100, 48000 };
18
+ static const size_t kPcmLiveCaptureRatesCount = sizeof(kPcmLiveCaptureRates) / sizeof(kPcmLiveCaptureRates[0]);
19
+
20
+ static NSInteger _pcmLiveTargetSampleRate = 16000;
21
+ static NSInteger _pcmLiveCaptureRate = 16000;
22
+ static __weak SherpaOnnx *_pcmLiveModule = nil;
23
+ static AudioQueueRef _pcmLiveAudioQueue = NULL;
24
+ static AudioQueueBufferRef _pcmLiveAQBuffers[kPcmLiveAQNumberBuffers];
25
+ static volatile BOOL _pcmLiveAQRunning = NO;
26
+
27
+ static void emitPcmChunk(SherpaOnnx *module, const int16_t *samples, NSUInteger count, NSInteger sampleRate) {
28
+ if (!module || count == 0) return;
29
+ // Copy samples into NSData on the AudioQueue callback thread so the data
30
+ // remains valid after the audio buffer is reused.
31
+ NSData *data = [NSData dataWithBytes:samples length:count * sizeof(int16_t)];
32
+ // Dispatch the React Native event emission to the main queue to avoid
33
+ // bridge thread-safety issues.
34
+ dispatch_async(dispatch_get_main_queue(), ^{
35
+ NSString *base64 = [data base64EncodedStringWithOptions:0];
36
+ [module sendEventWithName:@"pcmLiveStreamData"
37
+ body:@{ @"base64Pcm": base64, @"sampleRate": @(sampleRate) }];
38
+ });
39
+ }
40
+
41
+ static void emitPcmError(SherpaOnnx *module, NSString *message) {
42
+ if (!module) return;
43
+ // Dispatch error events to the main queue to match other RN event patterns
44
+ // and avoid bridge thread-safety issues.
45
+ dispatch_async(dispatch_get_main_queue(), ^{
46
+ [module sendEventWithName:@"pcmLiveStreamError" body:@{ @"message": message ?: @"" }];
47
+ });
48
+ }
49
+
50
+ /** Resample Int16 PCM from fromRate to toRate using linear interpolation (match Android resampleInt16). */
51
+ static NSUInteger pcmLiveResampleInt16(const int16_t *input, NSUInteger inputFrames,
52
+ int fromRate, int toRate,
53
+ int16_t *output, size_t outputCapacity) {
54
+ if (fromRate == toRate) {
55
+ size_t copy = (inputFrames < outputCapacity) ? inputFrames : outputCapacity;
56
+ memcpy(output, input, copy * sizeof(int16_t));
57
+ return copy;
58
+ }
59
+ double ratio = (double)fromRate / (double)toRate;
60
+ NSUInteger outLength = (NSUInteger)((double)inputFrames / ratio);
61
+ if (outLength > outputCapacity) outLength = outputCapacity;
62
+ if (outLength == 0) return 0;
63
+ for (NSUInteger i = 0; i < outLength; i++) {
64
+ double srcIdx = (double)i * ratio;
65
+ NSUInteger idx0 = (NSUInteger)srcIdx;
66
+ if (idx0 >= inputFrames) idx0 = inputFrames - 1;
67
+ NSUInteger idx1 = idx0 + 1;
68
+ if (idx1 >= inputFrames) idx1 = inputFrames - 1;
69
+ float frac = (float)(srcIdx - (double)idx0);
70
+ int v0 = (int)input[idx0];
71
+ int v1 = (int)input[idx1];
72
+ int v = (int)(v0 + (v1 - v0) * frac);
73
+ if (v < -32768) v = -32768;
74
+ if (v > 32767) v = 32767;
75
+ output[i] = (int16_t)v;
76
+ }
77
+ return outLength;
78
+ }
79
+
80
+ static void pcmLiveAQInputCallback(void *inUserData,
81
+ AudioQueueRef inAQ,
82
+ AudioQueueBufferRef inBuffer,
83
+ const AudioTimeStamp *inStartTime,
84
+ UInt32 inNumPackets,
85
+ const AudioStreamPacketDescription *inPacketDesc) {
86
+ (void)inUserData;
87
+ (void)inStartTime;
88
+ (void)inNumPackets;
89
+ (void)inPacketDesc;
90
+ if (!_pcmLiveAQRunning) return;
91
+ SherpaOnnx *module = _pcmLiveModule;
92
+ if (!module) return;
93
+ UInt32 byteSize = inBuffer->mAudioDataByteSize;
94
+ if (byteSize == 0) {
95
+ AudioQueueEnqueueBuffer(inAQ, inBuffer, 0, NULL);
96
+ return;
97
+ }
98
+ const int16_t *samples = (const int16_t *)inBuffer->mAudioData;
99
+ NSUInteger count = byteSize / sizeof(int16_t);
100
+ NSInteger targetRate = _pcmLiveTargetSampleRate;
101
+ NSInteger captureRate = _pcmLiveCaptureRate;
102
+
103
+ if (captureRate == targetRate) {
104
+ emitPcmChunk(module, samples, count, targetRate);
105
+ } else {
106
+ // Compute an upper bound on the number of output frames for resampling.
107
+ NSUInteger maxOutFrames =
108
+ (count * (NSUInteger)targetRate + (NSUInteger)captureRate - 1) /
109
+ (NSUInteger)captureRate;
110
+ if (maxOutFrames == 0) {
111
+ AudioQueueEnqueueBuffer(inAQ, inBuffer, 0, NULL);
112
+ return;
113
+ }
114
+ int16_t *resampleBuf = (int16_t *)malloc(maxOutFrames * sizeof(int16_t));
115
+ if (resampleBuf == NULL) {
116
+ emitPcmError(module, @"Failed to allocate resample buffer");
117
+ AudioQueueEnqueueBuffer(inAQ, inBuffer, 0, NULL);
118
+ return;
119
+ }
120
+ NSUInteger outFrames = pcmLiveResampleInt16(samples, count,
121
+ (int)captureRate, (int)targetRate,
122
+ resampleBuf, maxOutFrames);
123
+ if (outFrames > 0)
124
+ emitPcmChunk(module, resampleBuf, outFrames, targetRate);
125
+ free(resampleBuf);
126
+ }
127
+ AudioQueueEnqueueBuffer(inAQ, inBuffer, 0, NULL);
128
+ }
129
+
130
+ static void pcmLiveStopQueue(void) {
131
+ if (_pcmLiveAudioQueue == NULL) return;
132
+ _pcmLiveAQRunning = NO;
133
+ AudioQueueStop(_pcmLiveAudioQueue, true);
134
+ for (UInt32 i = 0; i < kPcmLiveAQNumberBuffers; i++) {
135
+ if (_pcmLiveAQBuffers[i] != NULL) {
136
+ AudioQueueFreeBuffer(_pcmLiveAudioQueue, _pcmLiveAQBuffers[i]);
137
+ _pcmLiveAQBuffers[i] = NULL;
138
+ }
139
+ }
140
+ AudioQueueDispose(_pcmLiveAudioQueue, true);
141
+ _pcmLiveAudioQueue = NULL;
142
+ }
143
+
144
+ @implementation SherpaOnnx (PcmLiveStream)
145
+
146
+ - (void)startPcmLiveStream:(id __unsafe_unretained)optionsArg
147
+ resolve:(RCTPromiseResolveBlock)resolve
148
+ reject:(RCTPromiseRejectBlock)reject
149
+ {
150
+ int targetRate = 16000;
151
+ UInt32 bufferSizeFrames = 0;
152
+
153
+ // Parse optionsArg coming from JS (fallback / non-codegen path).
154
+ if ([optionsArg isKindOfClass:[NSDictionary class]]) {
155
+ NSDictionary *dict = (NSDictionary *)optionsArg;
156
+
157
+ id sampleRateValue = dict[@"sampleRate"];
158
+ if ([sampleRateValue respondsToSelector:@selector(intValue)]) {
159
+ int v = (int)[sampleRateValue intValue];
160
+ if (v > 0) targetRate = v;
161
+ }
162
+
163
+ id bufferSizeValue = dict[@"bufferSizeFrames"];
164
+ if ([bufferSizeValue respondsToSelector:@selector(doubleValue)]) {
165
+ double v = [bufferSizeValue doubleValue];
166
+ if (v > 0) bufferSizeFrames = (UInt32)v;
167
+ }
168
+ }
169
+
170
+ [self _startPcmLiveStreamWithTargetRate:targetRate bufferSizeFrames:bufferSizeFrames resolve:resolve reject:reject];
171
+ }
172
+
173
+ #if __has_include(<SherpaOnnxSpec/SherpaOnnxSpec.h>)
174
+ - (void)startPcmLiveStreamWithOptions:(JS::NativeSherpaOnnx::SpecStartPcmLiveStreamOptions &)options
175
+ resolve:(RCTPromiseResolveBlock)resolve
176
+ reject:(RCTPromiseRejectBlock)reject
177
+ {
178
+ int targetRate = 16000;
179
+ if (options.sampleRate()) {
180
+ targetRate = (int)options.sampleRate();
181
+ if (targetRate <= 0) targetRate = 16000;
182
+ }
183
+ UInt32 bufferSizeFrames = 0;
184
+ if (options.bufferSizeFrames().has_value()) {
185
+ double v = options.bufferSizeFrames().value();
186
+ if (v > 0) bufferSizeFrames = (UInt32)v;
187
+ }
188
+ [self _startPcmLiveStreamWithTargetRate:targetRate bufferSizeFrames:bufferSizeFrames resolve:resolve reject:reject];
189
+ }
190
+ #endif
191
+
192
+ - (void)_startPcmLiveStreamWithTargetRate:(int)targetRate
193
+ bufferSizeFrames:(UInt32)bufferSizeFrames
194
+ resolve:(RCTPromiseResolveBlock)resolve
195
+ reject:(RCTPromiseRejectBlock)reject
196
+ {
197
+ pcmLiveStopQueue();
198
+
199
+ _pcmLiveTargetSampleRate = targetRate;
200
+ _pcmLiveModule = self;
201
+
202
+ NSError *error = nil;
203
+ AVAudioSession *session = [AVAudioSession sharedInstance];
204
+ if (![session setCategory:AVAudioSessionCategoryPlayAndRecord
205
+ mode:AVAudioSessionModeDefault
206
+ options:AVAudioSessionCategoryOptionDefaultToSpeaker | AVAudioSessionCategoryOptionAllowBluetooth
207
+ error:&error]) {
208
+ RCTLog(@"%@", [NSString stringWithFormat:@"[SherpaOnnx PcmLive] setCategory error: %@", error]);
209
+ reject(@"PCM_LIVE_STREAM_ERROR", error.localizedDescription ?: @"Failed to set audio session", error);
210
+ return;
211
+ }
212
+ if (![session setActive:YES withOptions:0 error:&error]) {
213
+ RCTLog(@"%@", [NSString stringWithFormat:@"[SherpaOnnx PcmLive] setActive error: %@", error]);
214
+ reject(@"PCM_LIVE_STREAM_ERROR", error.localizedDescription ?: @"Failed to activate audio session", error);
215
+ return;
216
+ }
217
+
218
+ AudioStreamBasicDescription fmt;
219
+ memset(&fmt, 0, sizeof(fmt));
220
+ fmt.mFormatID = kAudioFormatLinearPCM;
221
+ fmt.mFormatFlags = kLinearPCMFormatFlagIsSignedInteger | kLinearPCMFormatFlagIsPacked;
222
+ fmt.mChannelsPerFrame = 1;
223
+ fmt.mBitsPerChannel = 16;
224
+ fmt.mBytesPerPacket = 2;
225
+ fmt.mBytesPerFrame = 2;
226
+ fmt.mFramesPerPacket = 1;
227
+
228
+ OSStatus status = noErr;
229
+ int chosenCaptureRate = 16000;
230
+ for (size_t r = 0; r < kPcmLiveCaptureRatesCount; r++) {
231
+ chosenCaptureRate = kPcmLiveCaptureRates[r];
232
+ fmt.mSampleRate = (Float64)chosenCaptureRate;
233
+ status = AudioQueueNewInput(&fmt, pcmLiveAQInputCallback, NULL, NULL, NULL, 0, &_pcmLiveAudioQueue);
234
+ if (status == noErr) break;
235
+ _pcmLiveAudioQueue = NULL;
236
+ }
237
+ if (status != noErr || _pcmLiveAudioQueue == NULL) {
238
+ [session setActive:NO withOptions:0 error:nil];
239
+ reject(@"PCM_LIVE_STREAM_ERROR", [NSString stringWithFormat:@"AudioQueueNewInput failed for all rates (last: %d)", (int)status], nil);
240
+ return;
241
+ }
242
+ _pcmLiveCaptureRate = chosenCaptureRate;
243
+
244
+ UInt32 bufferByteSize = 2048;
245
+ if (bufferSizeFrames > 0) {
246
+ bufferByteSize = bufferSizeFrames * 2; /* 16-bit mono */
247
+ if (bufferByteSize < 1024) bufferByteSize = 1024;
248
+ if (bufferByteSize > 32768) bufferByteSize = 32768;
249
+ }
250
+
251
+ for (UInt32 i = 0; i < kPcmLiveAQNumberBuffers; i++) {
252
+ status = AudioQueueAllocateBuffer(_pcmLiveAudioQueue, bufferByteSize, &_pcmLiveAQBuffers[i]);
253
+ if (status != noErr) {
254
+ pcmLiveStopQueue();
255
+ [session setActive:NO withOptions:0 error:nil];
256
+ reject(@"PCM_LIVE_STREAM_ERROR", [NSString stringWithFormat:@"AudioQueueAllocateBuffer failed: %d", (int)status], nil);
257
+ return;
258
+ }
259
+ AudioQueueEnqueueBuffer(_pcmLiveAudioQueue, _pcmLiveAQBuffers[i], 0, NULL);
260
+ }
261
+
262
+ _pcmLiveAQRunning = YES;
263
+ status = AudioQueueStart(_pcmLiveAudioQueue, NULL);
264
+ if (status != noErr) {
265
+ pcmLiveStopQueue();
266
+ [session setActive:NO withOptions:0 error:nil];
267
+ reject(@"PCM_LIVE_STREAM_ERROR", [NSString stringWithFormat:@"AudioQueueStart failed: %d", (int)status], nil);
268
+ return;
269
+ }
270
+
271
+ resolve(nil);
272
+ }
273
+
274
+ - (void)stopPcmLiveStream:(RCTPromiseResolveBlock)resolve
275
+ reject:(RCTPromiseRejectBlock)reject
276
+ {
277
+ [self stopPcmLiveStreamWithResolve:resolve reject:reject];
278
+ }
279
+
280
+ - (void)stopPcmLiveStreamWithResolve:(RCTPromiseResolveBlock)resolve
281
+ reject:(RCTPromiseRejectBlock)reject
282
+ {
283
+ pcmLiveStopQueue();
284
+ [[AVAudioSession sharedInstance] setActive:NO withOptions:AVAudioSessionSetActiveOptionNotifyOthersOnDeactivation error:nil];
285
+ resolve(nil);
286
+ }
287
+
288
+ @end
@@ -38,6 +38,7 @@ static NSString *sttModelKindToNSString(sherpaonnx::SttModelKind kind) {
38
38
  case K::kFunAsrNano: return @"funasr_nano";
39
39
  case K::kFireRedAsr: return @"fire_red_asr";
40
40
  case K::kMoonshine: return @"moonshine";
41
+ case K::kMoonshineV2: return @"moonshine_v2";
41
42
  case K::kDolphin: return @"dolphin";
42
43
  case K::kCanary: return @"canary";
43
44
  case K::kOmnilingual: return @"omnilingual";
@@ -268,6 +269,7 @@ static NSDictionary *sttResultToDict(const sherpaonnx::SttRecognitionResult& r)
268
269
 
269
270
  NSMutableDictionary *resultDict = [NSMutableDictionary dictionary];
270
271
  resultDict[@"success"] = @(result.ok);
272
+ resultDict[@"isHardwareSpecificUnsupported"] = @(result.isHardwareSpecificUnsupported);
271
273
  if (!result.error.empty()) {
272
274
  resultDict[@"error"] = [NSString stringWithUTF8String:result.error.c_str()];
273
275
  }
package/ios/SherpaOnnx.mm CHANGED
@@ -36,7 +36,7 @@
36
36
 
37
37
  - (NSArray<NSString *> *)supportedEvents
38
38
  {
39
- return @[ @"ttsStreamChunk", @"ttsStreamEnd", @"ttsStreamError", @"extractTarBz2Progress" ];
39
+ return @[ @"ttsStreamChunk", @"ttsStreamEnd", @"ttsStreamError", @"extractTarBz2Progress", @"pcmLiveStreamData", @"pcmLiveStreamError" ];
40
40
  }
41
41
 
42
42
  - (void)resolveModelPath:(JS::NativeSherpaOnnx::SpecResolveModelPathConfig &)config
@@ -21,12 +21,11 @@ std::vector<std::string> ListDirectories(const std::string& path);
21
21
  std::vector<FileEntry> ListFiles(const std::string& path);
22
22
  std::vector<FileEntry> ListFilesRecursive(const std::string& path, int maxDepth);
23
23
  std::string ToLower(std::string value);
24
- std::string ResolveTokenizerDir(const std::string& modelDir);
25
24
 
26
- std::string FindFileByName(const std::string& baseDir, const std::string& fileName, int maxDepth);
25
+ /** Find file in \p files whose name equals \p fileName (case-insensitive). Uses file tree only, no filesystem. */
26
+ std::string FindFileByName(const std::vector<FileEntry>& files, const std::string& fileName);
27
27
  /** Find file whose name equals or ends with suffix (e.g. tokens.txt, tiny-tokens.txt) in a pre-built file list. */
28
28
  std::string FindFileEndingWith(const std::vector<FileEntry>& files, const std::string& suffix);
29
- std::string FindDirectoryByName(const std::string& baseDir, const std::string& dirName, int maxDepth);
30
29
 
31
30
  std::string FindOnnxByToken(
32
31
  const std::vector<FileEntry>& files,
@@ -38,6 +37,13 @@ std::string FindOnnxByAnyToken(
38
37
  const std::vector<std::string>& tokens,
39
38
  const std::optional<bool>& preferInt8
40
39
  );
40
+ /** Like FindOnnxByAnyToken but skips any file whose nameLower contains any of \p excludeInName. */
41
+ std::string FindOnnxByAnyTokenExcluding(
42
+ const std::vector<FileEntry>& files,
43
+ const std::vector<std::string>& tokens,
44
+ const std::vector<std::string>& excludeInName,
45
+ const std::optional<bool>& preferInt8
46
+ );
41
47
  std::string FindLargestOnnxExcludingTokens(
42
48
  const std::vector<FileEntry>& files,
43
49
  const std::vector<std::string>& excludeTokens