@gmessier/nitro-speech 0.4.3 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/README.md +16 -4
  2. package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/AudioPermissionRequester.kt +12 -6
  3. package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/HybridRecognizer.kt +57 -39
  4. package/ios/Engines/AnalyzerEngine.swift +11 -7
  5. package/ios/Engines/RecognizerEngine.swift +140 -82
  6. package/ios/Engines/SFSpeechEngine.swift +7 -3
  7. package/ios/HybridRecognizer.swift +18 -5
  8. package/ios/Shared/Permissions.swift +12 -47
  9. package/lib/Recognizer/methods.d.ts +10 -10
  10. package/lib/Recognizer/methods.js +2 -2
  11. package/lib/Recognizer/types.d.ts +2 -1
  12. package/lib/index.d.ts +1 -1
  13. package/lib/specs/Recognizer.nitro.d.ts +2 -1
  14. package/lib/specs/SpeechRecognitionConfig.d.ts +2 -0
  15. package/lib/specs/SpeechRecognitionPrewarm.d.ts +10 -0
  16. package/lib/specs/SpeechRecognitionPrewarm.js +1 -0
  17. package/nitrogen/generated/android/c++/JHybridRecognizerSpec.cpp +7 -3
  18. package/nitrogen/generated/android/c++/JHybridRecognizerSpec.hpp +1 -1
  19. package/nitrogen/generated/android/c++/JSpeechRecognitionPrewarm.hpp +57 -0
  20. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/HybridRecognizerSpec.kt +1 -1
  21. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/SpeechRecognitionPrewarm.kt +51 -0
  22. package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Bridge.hpp +18 -0
  23. package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Umbrella.hpp +3 -0
  24. package/nitrogen/generated/ios/c++/HybridRecognizerSpecSwift.hpp +5 -2
  25. package/nitrogen/generated/ios/swift/HybridRecognizerSpec.swift +1 -1
  26. package/nitrogen/generated/ios/swift/HybridRecognizerSpec_cxx.swift +2 -2
  27. package/nitrogen/generated/ios/swift/SpeechRecognitionPrewarm.swift +42 -0
  28. package/nitrogen/generated/shared/c++/HybridRecognizerSpec.hpp +4 -1
  29. package/nitrogen/generated/shared/c++/SpeechRecognitionPrewarm.hpp +83 -0
  30. package/package.json +1 -1
  31. package/src/Recognizer/methods.ts +38 -31
  32. package/src/Recognizer/types.ts +2 -0
  33. package/src/index.ts +1 -0
  34. package/src/specs/Recognizer.nitro.ts +5 -1
  35. package/src/specs/SpeechRecognitionConfig.ts +2 -0
  36. package/src/specs/SpeechRecognitionPrewarm.ts +10 -0
package/README.md CHANGED
@@ -126,7 +126,7 @@ Both permissions are required for speech recognition to work on iOS.
126
126
  | **Reset Auto-finish Time** | Resets the Timer to the threshold | ✅ | ✅ |
127
127
  | **Voice input volume** | `useVoiceInputVolume`, `getVoiceInputVolume()`, `onVolumeChange` | ✅ | ✅ |
128
128
  | **Reset Auto-finish Sensitivity** | The voice detector sensitivity to reset the Auto-finish time | ✅ | ✅ |
129
- | **Prewarm** | Prepares resources, downloads assets, confirms locale availability | ✅ | ✅ |
129
+ | **Prewarm** | Prepares resources, downloads assets, confirms locale availability, requests permissions | ✅ | ✅ |
130
130
  | **Update config** | Static method `updateConfig` allows updating the config on the fly | ✅ | ✅ |
131
131
  | **Is Active** | Static method `getIsActive()` | ✅ | ✅ |
132
132
  | **Haptic feedback** | Haptic feedback on recording start/stop | ✅ | ✅ |
@@ -230,6 +230,17 @@ function MyComponent() {
230
230
  )>
231
231
  <Text>Update Timer to 12s, 500ms interval, 0.65 sensitivity, with reset</Text>
232
232
  </TouchableOpacity>
233
+ <TouchableOpacity
234
+ onPress={() => {
235
+ scheduleOnRuntime(workletRuntime, () => {
236
+ RecognizerRef.prewarm({
237
+ iosPreset: 'speed',
238
+ }, { requestPermission: true });
239
+ });
240
+ }}
241
+ >
242
+ <Text>Prewarm from worklet with permission request (default behavior)</Text>
243
+ </TouchableOpacity>
233
244
  </View>
234
245
  );
235
246
  }
@@ -262,7 +273,7 @@ If you need to call recognizer methods from other components without prop drilli
262
273
  ```typescript
263
274
  import { RecognizerRef } from '@gmessier/nitro-speech';
264
275
 
265
- RecognizerRef.prewarm({ locale: 'en-US' });
276
+ RecognizerRef.prewarm({ locale: 'en-US' }, { requestPermission: true });
266
277
  RecognizerRef.startListening({ locale: 'en-US' });
267
278
  RecognizerRef.addAutoFinishTime(5000);
268
279
  RecognizerRef.resetAutoFinishTime();
@@ -407,7 +418,8 @@ SpeechRecognizer.onVolumeChange = (volume) => {
407
418
  SpeechRecognizer.prewarm({
408
419
  locale: 'en-US',
409
420
  // ... your config to prepare
410
- });
421
+ }, { requestPermission: true });
422
+ );
411
423
  // OR `await` if you want to react to the success
412
424
  await SpeechRecognizer.prewarm({
413
425
  locale: 'en-US',
@@ -418,7 +430,7 @@ scheduleOnRuntime(workletRuntime, () => {
418
430
  SpeechRecognizer.prewarm({
419
431
  locale: 'en-US',
420
432
  // ... your config to prepare
421
- });
433
+ }, { requestPermission: false });
422
434
  });
423
435
 
424
436
  // Start listening
@@ -6,6 +6,8 @@ import android.content.pm.PackageManager
6
6
  import androidx.activity.ComponentActivity
7
7
  import androidx.activity.result.contract.ActivityResultContracts
8
8
  import androidx.core.content.ContextCompat
9
+ import kotlinx.coroutines.suspendCancellableCoroutine
10
+ import kotlin.coroutines.resume
9
11
 
10
12
  class AudioPermissionRequester (
11
13
  private val activity: Activity
@@ -16,12 +18,13 @@ class AudioPermissionRequester (
16
18
  private var callback: ((Boolean) -> Unit)? = null
17
19
 
18
20
  private val launcher = componentActivity.activityResultRegistry.register(
19
- "record_audio_key", ActivityResultContracts.RequestPermission()
21
+ "record_audio_key",
22
+ ActivityResultContracts.RequestPermission()
20
23
  ) { granted ->
21
24
  callback?.invoke(granted)
22
25
  }
23
26
 
24
- fun checkAndRequest(onResult: (Boolean) -> Unit) {
27
+ suspend fun checkAndRequest(): Boolean {
25
28
  val audioGranted =
26
29
  ContextCompat.checkSelfPermission(
27
30
  activity,
@@ -29,11 +32,14 @@ class AudioPermissionRequester (
29
32
  ) == PackageManager.PERMISSION_GRANTED
30
33
 
31
34
  if (audioGranted) {
32
- onResult(true)
33
- return
35
+ return true
34
36
  }
35
37
 
36
- callback = onResult
37
- launcher.launch(recordAudioPermission)
38
+ return suspendCancellableCoroutine { cont ->
39
+ callback = { granted ->
40
+ if (cont.isActive) cont.resume(granted)
41
+ }
42
+ launcher.launch(recordAudioPermission)
43
+ }
38
44
  }
39
45
  }
@@ -14,6 +14,7 @@ import com.margelo.nitro.core.Promise
14
14
  import com.margelo.nitro.nitrospeech.MutableSpeechRecognitionConfig
15
15
  import com.margelo.nitro.nitrospeech.HybridRecognizerSpec
16
16
  import com.margelo.nitro.nitrospeech.SpeechRecognitionConfig
17
+ import com.margelo.nitro.nitrospeech.SpeechRecognitionPrewarm
17
18
  import com.margelo.nitro.nitrospeech.VolumeChangeEvent
18
19
 
19
20
  @DoNotStrip
@@ -43,52 +44,27 @@ class HybridRecognizer: HybridRecognizerSpec() {
43
44
 
44
45
  @DoNotStrip
45
46
  @Keep
46
- override fun prewarm(defaultParams: SpeechRecognitionConfig?): Promise<Unit> {
47
- // no-op
47
+ override fun prewarm(
48
+ defaultParams: SpeechRecognitionConfig?,
49
+ options: SpeechRecognitionPrewarm?
50
+ ): Promise<Unit> {
51
+ logger.log("prewarm called")
48
52
  // nothing to prewarm
49
- return Promise()
53
+ // only request permissions
54
+ return Promise.async {
55
+ // Enabled by default for user
56
+ if (options?.requestPermission != false) {
57
+ preparePermissions(null, isPrewarm = true)
58
+ }
59
+ }
50
60
  }
51
61
 
52
62
  @DoNotStrip
53
63
  @Keep
54
64
  override fun startListening(params: SpeechRecognitionConfig?) {
55
65
  logger.log("startListening: $params")
56
- if (isActive) {
57
- onFinishRecognition(
58
- null,
59
- "Error at startListening: Previous SpeechRecognizer is still active",
60
- false
61
- )
62
- return
63
- }
64
-
65
- val context = NitroModules.applicationContext
66
- if (context == null) {
67
- onFinishRecognition(
68
- null,
69
- "Error at startListening: Context not available",
70
- true
71
- )
72
- return
73
- }
74
- val activity = context.currentActivity
75
- if (activity == null) {
76
- onFinishRecognition(
77
- null,
78
- "Error at startListening: Activity not found",
79
- true
80
- )
81
- return
82
- }
83
-
84
- val permissionRequester = AudioPermissionRequester(activity)
85
- permissionRequester.checkAndRequest { granted ->
86
- if (!granted) {
87
- onPermissionDenied?.invoke()
88
- return@checkAndRequest
89
- }
90
- config = params
91
- start(context)
66
+ Promise.async {
67
+ preparePermissions(params, isPrewarm = false)
92
68
  }
93
69
  }
94
70
 
@@ -196,6 +172,48 @@ class HybridRecognizer: HybridRecognizerSpec() {
196
172
  stopListening()
197
173
  }
198
174
 
175
+ private suspend fun preparePermissions(params: SpeechRecognitionConfig?, isPrewarm: Boolean) {
176
+ if (isActive) {
177
+ onFinishRecognition(
178
+ null,
179
+ "Error: SpeechRecognizer is already active",
180
+ false
181
+ )
182
+ return
183
+ }
184
+
185
+ val context = NitroModules.applicationContext
186
+ if (context == null) {
187
+ onFinishRecognition(
188
+ null,
189
+ "Error: Context not available",
190
+ true
191
+ )
192
+ return
193
+ }
194
+ val activity = context.currentActivity
195
+ if (activity == null) {
196
+ onFinishRecognition(
197
+ null,
198
+ "Error: Activity not found",
199
+ true
200
+ )
201
+ return
202
+ }
203
+
204
+ val permissionRequester = AudioPermissionRequester(activity)
205
+ val granted = permissionRequester.checkAndRequest()
206
+ if (!granted) {
207
+ onPermissionDenied?.invoke()
208
+ return
209
+ }
210
+ if (isPrewarm) {
211
+ return
212
+ }
213
+ config = params
214
+ start(context)
215
+ }
216
+
199
217
  private fun start(context: Context) {
200
218
  mainHandler.post {
201
219
  try {
@@ -46,17 +46,21 @@ final class AnalyzerEngine: RecognizerEngine {
46
46
  }
47
47
  }
48
48
 
49
- override func prewarm(for type: FailureType) async {
50
- await super.prewarm(for: type)
49
+ override func prewarm(for type: PrewarmType, _ options: SpeechRecognitionPrewarm? = nil) async {
50
+ await super.prewarm(for: type, options)
51
51
  do {
52
52
  // Create transcriber and install assets
53
53
  try await transcriber.create(config: self.recognizerDelegate?.config)
54
54
  }
55
55
  catch {
56
+ let failureType: FailureType = switch type {
57
+ case .prewarm: .prewarm
58
+ case .start: .start
59
+ }
56
60
  self.reportFailure(
57
61
  from: "prewarm.assets",
58
62
  message: "Failed to create transcriber",
59
- type: type
63
+ type: failureType
60
64
  )
61
65
  }
62
66
  }
@@ -93,7 +97,7 @@ final class AnalyzerEngine: RecognizerEngine {
93
97
  self?.outputContinuation?.yield(buffer)
94
98
  }
95
99
  )
96
- guard let hardwareFormat else { return }
100
+ guard let hardwareFormat = recognizerDelegate?.hardwareFormat else { return }
97
101
  let stream = AsyncStream(
98
102
  AVAudioPCMBuffer.self,
99
103
  bufferingPolicy: .unbounded
@@ -225,14 +229,14 @@ final class AnalyzerEngine: RecognizerEngine {
225
229
  if !disableRepeatingFilter {
226
230
  newBatch = Utils.repeatingFilter(newBatch)
227
231
  }
228
- Log.log("[1] lastBatch: \(self.resultBatches.last ?? "") | newBatch: \(newBatch)")
232
+ // Log.log("[1] lastBatch: \(self.resultBatches.last ?? "") | newBatch: \(newBatch)")
229
233
  if self.resultBatches.isEmpty {
230
234
  self.resultBatches.append(newBatch)
231
235
  } else if CMTimeGetSeconds(rangeStart) == self.lastBatchStartTime || isFinal {
232
- Log.log("[2] replace, isFinal: \(isFinal)")
236
+ // Log.log("[2] replace, isFinal: \(isFinal)")
233
237
  self.resultBatches[self.resultBatches.count - 1] = newBatch
234
238
  } else {
235
- Log.log("[2] add new batch")
239
+ // Log.log("[2] add new batch")
236
240
  self.resultBatches.append(newBatch)
237
241
  }
238
242
  self.lastBatchStartTime = CMTimeGetSeconds(rangeStart)
@@ -12,10 +12,14 @@ enum FailureType {
12
12
  case onSession
13
13
  }
14
14
 
15
+ enum PrewarmType {
16
+ case start
17
+ case prewarm
18
+ }
19
+
15
20
  class RecognizerEngine {
16
21
  var isActive = false
17
22
  var isStopping = false
18
- var hardwareFormat: AVAudioFormat?
19
23
  weak var recognizerDelegate: RecognizerDelegate?
20
24
 
21
25
  private let audioLevelTracker = AudioLevelTracker()
@@ -33,21 +37,48 @@ class RecognizerEngine {
33
37
 
34
38
  // MARK: - Recognizer Methods
35
39
 
36
- func prewarm(for: FailureType) async {
37
- self.prepareAudioEngine()
40
+ func prewarm(for type: PrewarmType, _ options: SpeechRecognitionPrewarm? = nil) async {
41
+ // Prepare audioEngine
42
+ audioEngine = AVAudioEngine()
43
+ lg.log("[prewarm.audioEngine]")
44
+
45
+ guard let recognizerDelegate else { return }
46
+
47
+ // Everything is set, return early
48
+ if type == .prewarm, recognizerDelegate.hardwareFormat != nil {
49
+ lg.log("[prewarm.return]: Everything set")
50
+ return
51
+ }
52
+
53
+ // User explicitly asked for prewarm without requesting permissions, return early
54
+ if type == .prewarm, options?.requestPermission == false {
55
+ lg.log("[prewarm.return]: requestPermission: false")
56
+ return
57
+ }
58
+
59
+ if type == .prewarm {
60
+ // options.requestPermission is true by default
61
+ // Start Permission sequence
62
+ let granted = await requestPermissions()
63
+ if granted {
64
+ self.prewarmAudioSession(for: type)
65
+ }
66
+ } else {
67
+ self.prewarmAudioSession(for: type)
68
+ }
69
+
38
70
  // for SpeechTranscriber: .isAvailable and async assets
39
71
  // for Dictation: only async assets
40
72
  // for legacy SF: only sync .isAvailable
41
73
  }
42
74
 
43
- func start() {
44
- guard let recognizerDelegate, !isActive else { return }
75
+ func start() async {
76
+ guard !isActive else { return }
45
77
 
46
- Permissions(
47
- onGranted: self.startSession,
48
- onDenied: recognizerDelegate.permissionDenied,
49
- onError: recognizerDelegate.error
50
- ).requestAuthorization()
78
+ let granted = await requestPermissions()
79
+ if granted {
80
+ await startSession()
81
+ }
51
82
  }
52
83
 
53
84
  func stop() {
@@ -56,6 +87,55 @@ class RecognizerEngine {
56
87
  HapticImpact.trigger(with: self.recognizerDelegate?.config?.stopHapticFeedbackStyle)
57
88
  }
58
89
 
90
+ func updateSession(
91
+ newConfig: MutableSpeechRecognitionConfig? = nil,
92
+ addMsToTimer: Double? = nil,
93
+ resetTimer: Bool? = nil
94
+ ) {
95
+ guard let recognizerDelegate, isActive, !isStopping else { return }
96
+ let currentConfig = recognizerDelegate.config
97
+ // Update AutoFinish time
98
+ if let newAutoFinish = newConfig?.autoFinishRecognitionMs,
99
+ newAutoFinish != currentConfig?.autoFinishRecognitionMs {
100
+ autoStopper?.updateThreshold(
101
+ newAutoFinish,
102
+ from: "updateSession"
103
+ )
104
+ }
105
+ // Update AutoFinish progress interval
106
+ if let newInterval = newConfig?.autoFinishProgressIntervalMs,
107
+ newInterval != currentConfig?.autoFinishProgressIntervalMs {
108
+ autoStopper?.updateProgressInterval(
109
+ newInterval,
110
+ from: "updateSession"
111
+ )
112
+ }
113
+
114
+ if let addMsToTimer {
115
+ // Add time to the timer once
116
+ autoStopper?.addMsOnce(
117
+ addMsToTimer,
118
+ from: "updateSession"
119
+ )
120
+ } else if resetTimer == true {
121
+ // Reset to current baseline threshold.
122
+ autoStopper?.resetTimer(from: "updateSession")
123
+ }
124
+ // Only update new non-nil values in the config
125
+ recognizerDelegate.softlyUpdateConfig(newConfig: newConfig)
126
+ }
127
+
128
+ func getVoiceInputVolume() -> VolumeChangeEvent? {
129
+ guard let currentSample = audioLevelTracker.currentSample else { return nil }
130
+ return VolumeChangeEvent(
131
+ smoothedVolume: currentSample.smoothed,
132
+ rawVolume: currentSample.raw,
133
+ db: currentSample.db
134
+ )
135
+ }
136
+
137
+ // MARK: Helpers
138
+
59
139
  func startSession() async {
60
140
  lg.log("[startSession.startSession]")
61
141
  // Init everything
@@ -66,15 +146,13 @@ class RecognizerEngine {
66
146
  lg.log("[startSession.initAutoStop]")
67
147
  startAppStateObserver()
68
148
  lg.log("[startSession.startAppStateObserver]")
69
- startAudioSession()
70
- lg.log("[startSession.startAudioSession]")
71
149
  }
72
150
 
73
151
  func startAudioEngine(
74
152
  onBuffer: @escaping (AVAudioPCMBuffer) -> Void
75
153
  ) {
76
154
  lg.log("[startAudioEngine]")
77
- guard let audioEngine, let hardwareFormat else { return }
155
+ guard let audioEngine, let hardwareFormat = self.recognizerDelegate?.hardwareFormat else { return }
78
156
  audioEngine.inputNode.installTap(
79
157
  onBus: 0,
80
158
  bufferSize: 1024,
@@ -124,53 +202,6 @@ class RecognizerEngine {
124
202
  recognizerDelegate.readyForSpeech()
125
203
  recognizerDelegate.result(batches: [])
126
204
  }
127
-
128
- func updateSession(
129
- newConfig: MutableSpeechRecognitionConfig? = nil,
130
- addMsToTimer: Double? = nil,
131
- resetTimer: Bool? = nil
132
- ) {
133
- guard let recognizerDelegate, isActive, !isStopping else { return }
134
- let currentConfig = recognizerDelegate.config
135
- // Update AutoFinish time
136
- if let newAutoFinish = newConfig?.autoFinishRecognitionMs,
137
- newAutoFinish != currentConfig?.autoFinishRecognitionMs {
138
- autoStopper?.updateThreshold(
139
- newAutoFinish,
140
- from: "updateSession"
141
- )
142
- }
143
- // Update AutoFinish progress interval
144
- if let newInterval = newConfig?.autoFinishProgressIntervalMs,
145
- newInterval != currentConfig?.autoFinishProgressIntervalMs {
146
- autoStopper?.updateProgressInterval(
147
- newInterval,
148
- from: "updateSession"
149
- )
150
- }
151
-
152
- if let addMsToTimer {
153
- // Add time to the timer once
154
- autoStopper?.addMsOnce(
155
- addMsToTimer,
156
- from: "updateSession"
157
- )
158
- } else if resetTimer == true {
159
- // Reset to current baseline threshold.
160
- autoStopper?.resetTimer(from: "updateSession")
161
- }
162
- // Only update new non-nil values in the config
163
- recognizerDelegate.softlyUpdateConfig(newConfig: newConfig)
164
- }
165
-
166
- func getVoiceInputVolume() -> VolumeChangeEvent? {
167
- guard let currentSample = audioLevelTracker.currentSample else { return nil }
168
- return VolumeChangeEvent(
169
- smoothedVolume: currentSample.smoothed,
170
- rawVolume: currentSample.raw,
171
- db: currentSample.db
172
- )
173
- }
174
205
 
175
206
  func cleanup(from: String) {
176
207
  lg.log("[cleanup]: \(from)")
@@ -226,29 +257,32 @@ class RecognizerEngine {
226
257
  }
227
258
  }
228
259
 
229
- // MARK: - AudioEngine heavy prepare
260
+ // MARK: Permissions
230
261
 
231
- private func prepareAudioEngine() {
232
- lg.log("[prewarm.start]")
233
- audioEngine = AVAudioEngine()
234
- guard let audioEngine else {
235
- self.reportFailure(
236
- from: "Audio Engine",
237
- message: "Audio Engine failed to initiate",
238
- // RecognizerEngine-agnostic Error
239
- type: .system
240
- )
241
- return
262
+ private func requestPermissions() async -> Bool {
263
+ guard let recognizerDelegate else { return false }
264
+ let authStatus = await Permissions.requestAuthorization()
265
+ if authStatus == .denied || authStatus == .restricted {
266
+ recognizerDelegate.permissionDenied()
267
+ return false
242
268
  }
243
- lg.log("[prewarm.audioEngine]")
244
- // heavy first hardwareFormat retrieval
245
- if hardwareFormat == nil {
246
- hardwareFormat = audioEngine.inputNode.outputFormat(forBus: 0)
247
- lg.log("[prewarm.hardwareFormat]")
269
+
270
+ if authStatus != .authorized {
271
+ // .notDetermined or unknown issue
272
+ recognizerDelegate.error(message: "Speech recognition permission is not determined")
273
+ return false
274
+ }
275
+
276
+ if !(await Permissions.requestMicrophonePermission()) {
277
+ recognizerDelegate.permissionDenied()
278
+ return false
248
279
  }
280
+
281
+ // Granted
282
+ return true
249
283
  }
250
284
 
251
- // MARK: - AutoStopper
285
+ // MARK: Auto Stopper
252
286
 
253
287
  private func initAutoStop() {
254
288
  let config = self.recognizerDelegate?.config
@@ -271,7 +305,7 @@ class RecognizerEngine {
271
305
  autoStopper = nil
272
306
  }
273
307
 
274
- // MARK: - App State Observer
308
+ // MARK: App State Observer
275
309
 
276
310
  private func startAppStateObserver() {
277
311
  appStateObserver = AppStateObserver { [weak self] in
@@ -285,12 +319,37 @@ class RecognizerEngine {
285
319
  appStateObserver = nil
286
320
  }
287
321
 
288
- // MARK: - Audio Session
322
+ // MARK: Audio Session
323
+
324
+ private func prewarmAudioSession(for type: PrewarmType) {
325
+ guard let audioEngine else {
326
+ self.reportFailure(
327
+ from: "Audio Engine",
328
+ message: "Audio Engine failed to initiate",
329
+ // RecognizerEngine-agnostic Error
330
+ type: .system
331
+ )
332
+ return
333
+ }
334
+ startAudioSession()
335
+ lg.log("[prewarmAudioSession.audioSession]")
336
+ // heavy first hardwareFormat retrieval
337
+ if let recognizerDelegate, recognizerDelegate.hardwareFormat == nil {
338
+ let format = audioEngine.inputNode.outputFormat(forBus: 0)
339
+ recognizerDelegate.setHardwareFormat(format: format)
340
+ lg.log("[prewarmAudioSession.hardwareFormat]")
341
+ }
342
+
343
+ if type == .prewarm {
344
+ stopAudioSession()
345
+ lg.log("[prewarmAudioSession.stopAudioSession]")
346
+ }
347
+ }
289
348
 
290
349
  private func startAudioSession() {
291
350
  do {
292
351
  let audioSession = AVAudioSession.sharedInstance()
293
- try audioSession.setCategory(.record, mode: .measurement, options: .duckOthers)
352
+ try audioSession.setCategory(.playAndRecord, mode: .default, options: .duckOthers)
294
353
  // Required for haptic feedback
295
354
  try audioSession.setAllowHapticsAndSystemSoundsDuringRecording(true)
296
355
  try audioSession.setActive(true, options: .notifyOthersOnDeactivation)
@@ -305,7 +364,6 @@ class RecognizerEngine {
305
364
  }
306
365
  private func stopAudioSession() {
307
366
  do {
308
- // TODO: check unduck
309
367
  try AVAudioSession.sharedInstance().setActive(false)
310
368
  } catch {
311
369
  // Just log and no-op - not critical
@@ -15,18 +15,22 @@ final class SFSpeechEngine: RecognizerEngine {
15
15
  recognitionTask?.finish()
16
16
  }
17
17
 
18
- override func prewarm(for type: FailureType) async {
18
+ override func prewarm(for type: PrewarmType, _ options: SpeechRecognitionPrewarm? = nil) async {
19
19
  speechRecognizer = SFSpeechRecognizer(
20
20
  locale: Locale(identifier: self.recognizerDelegate?.config?.locale ?? "en-US")
21
21
  )
22
22
  if speechRecognizer?.isAvailable != true {
23
+ let failureType: FailureType = switch type {
24
+ case .prewarm: .prewarm
25
+ case .start: .start
26
+ }
23
27
  self.reportFailure(
24
28
  from: "prewarm",
25
29
  message: "SFSpeechRecognizer is not available",
26
- type: type
30
+ type: failureType
27
31
  )
28
32
  }
29
- await super.prewarm(for: type)
33
+ await super.prewarm(for: type, options)
30
34
  }
31
35
 
32
36
  override func startSession() async {
@@ -1,8 +1,11 @@
1
1
  import Foundation
2
2
  import NitroModules
3
+ import AVFoundation
3
4
 
4
5
  class HybridRecognizer: HybridRecognizerSpec {
6
+ var prewarmOptions: SpeechRecognitionPrewarm?
5
7
  var config: SpeechRecognitionConfig?
8
+ var hardwareFormat: AVAudioFormat?
6
9
 
7
10
  var onReadyForSpeech: (() -> Void)?
8
11
  var onRecordingStopped: (() -> Void)?
@@ -27,12 +30,18 @@ class HybridRecognizer: HybridRecognizerSpec {
27
30
  private let lg = Lg(prefix: "HybridRecognizer")
28
31
 
29
32
  @discardableResult
30
- func prewarm(defaultParams: SpeechRecognitionConfig?) -> Promise<Void> {
33
+ func prewarm(
34
+ defaultParams: SpeechRecognitionConfig?,
35
+ options: SpeechRecognitionPrewarm?
36
+ ) -> Promise<Void> {
37
+ prewarmOptions = options
31
38
  return Promise.async(.userInitiated) { [weak self] in
39
+ // Ignore when standalone prewarm triggered for active session
40
+ guard self?.engine?.isActive != true else { return }
32
41
  // Ensure correct engine is selected based on params and ios version
33
42
  await self?.ensureEngine(params: defaultParams)
34
43
  // try to preload assets and check if speech engine is available on OS level
35
- await self?.engine?.prewarm(for: .prewarm)
44
+ await self?.engine?.prewarm(for: .prewarm, options)
36
45
  }
37
46
  }
38
47
 
@@ -40,7 +49,7 @@ class HybridRecognizer: HybridRecognizerSpec {
40
49
  Task {
41
50
  // Ensure correct engine is selected based on params and ios version
42
51
  await ensureEngine(params: params)
43
- engine?.start()
52
+ await engine?.start()
44
53
  }
45
54
  }
46
55
 
@@ -111,6 +120,8 @@ class HybridRecognizer: HybridRecognizerSpec {
111
120
 
112
121
  protocol RecognizerDelegate: AnyObject {
113
122
  var config: SpeechRecognitionConfig? { get }
123
+ var hardwareFormat: AVAudioFormat? { get }
124
+ func setHardwareFormat(format: AVAudioFormat)
114
125
  func softlyUpdateConfig(newConfig: MutableSpeechRecognitionConfig?)
115
126
  func reselectEngine(forPrewarm: Bool)
116
127
  func readyForSpeech()
@@ -123,6 +134,9 @@ protocol RecognizerDelegate: AnyObject {
123
134
  }
124
135
 
125
136
  extension HybridRecognizer: RecognizerDelegate {
137
+ func setHardwareFormat(format: AVAudioFormat) {
138
+ hardwareFormat = format
139
+ }
126
140
  func softlyUpdateConfig(newConfig: MutableSpeechRecognitionConfig?) {
127
141
  if let newConfig {
128
142
  config = SpeechRecognitionConfig(
@@ -193,7 +207,6 @@ extension HybridRecognizer: RecognizerDelegate {
193
207
  }
194
208
 
195
209
  func volumeChange(event: VolumeChangeEvent) {
196
- // self.lg.log("[onVolumeChange] \(event.rawVolume)")
197
210
  if onVolumeChange != nil {
198
211
  onVolumeChangeFallback = onVolumeChange
199
212
  }
@@ -212,7 +225,7 @@ extension HybridRecognizer: RecognizerDelegate {
212
225
  engine = nil
213
226
  // Try to prewarm with another candidate
214
227
  if forPrewarm {
215
- self.prewarm(defaultParams: config)
228
+ self.prewarm(defaultParams: config, options: prewarmOptions)
216
229
  } else {
217
230
  // Try to start with another candidate
218
231
  self.startListening(params: config)