@gmessier/nitro-speech 0.4.3 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -4
- package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/AudioPermissionRequester.kt +12 -6
- package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/HybridRecognizer.kt +57 -39
- package/ios/Engines/AnalyzerEngine.swift +11 -7
- package/ios/Engines/RecognizerEngine.swift +140 -82
- package/ios/Engines/SFSpeechEngine.swift +7 -3
- package/ios/HybridRecognizer.swift +18 -5
- package/ios/Shared/Permissions.swift +12 -47
- package/lib/Recognizer/methods.d.ts +10 -10
- package/lib/Recognizer/methods.js +2 -2
- package/lib/Recognizer/types.d.ts +2 -1
- package/lib/index.d.ts +1 -1
- package/lib/specs/Recognizer.nitro.d.ts +2 -1
- package/lib/specs/SpeechRecognitionConfig.d.ts +2 -0
- package/lib/specs/SpeechRecognitionPrewarm.d.ts +10 -0
- package/lib/specs/SpeechRecognitionPrewarm.js +1 -0
- package/nitrogen/generated/android/c++/JHybridRecognizerSpec.cpp +7 -3
- package/nitrogen/generated/android/c++/JHybridRecognizerSpec.hpp +1 -1
- package/nitrogen/generated/android/c++/JSpeechRecognitionPrewarm.hpp +57 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/HybridRecognizerSpec.kt +1 -1
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/SpeechRecognitionPrewarm.kt +51 -0
- package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Bridge.hpp +18 -0
- package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Umbrella.hpp +3 -0
- package/nitrogen/generated/ios/c++/HybridRecognizerSpecSwift.hpp +5 -2
- package/nitrogen/generated/ios/swift/HybridRecognizerSpec.swift +1 -1
- package/nitrogen/generated/ios/swift/HybridRecognizerSpec_cxx.swift +2 -2
- package/nitrogen/generated/ios/swift/SpeechRecognitionPrewarm.swift +42 -0
- package/nitrogen/generated/shared/c++/HybridRecognizerSpec.hpp +4 -1
- package/nitrogen/generated/shared/c++/SpeechRecognitionPrewarm.hpp +83 -0
- package/package.json +1 -1
- package/src/Recognizer/methods.ts +38 -31
- package/src/Recognizer/types.ts +2 -0
- package/src/index.ts +1 -0
- package/src/specs/Recognizer.nitro.ts +5 -1
- package/src/specs/SpeechRecognitionConfig.ts +2 -0
- package/src/specs/SpeechRecognitionPrewarm.ts +10 -0
package/README.md
CHANGED
|
@@ -126,7 +126,7 @@ Both permissions are required for speech recognition to work on iOS.
|
|
|
126
126
|
| **Reset Auto-finish Time** | Resets the Timer to the threshold | ✅ | ✅ |
|
|
127
127
|
| **Voice input volume** | `useVoiceInputVolume`, `getVoiceInputVolume()`, `onVolumeChange` | ✅ | ✅ |
|
|
128
128
|
| **Reset Auto-finish Sensitivity** | The voice detector sensitivity to reset the Auto-finish time | ✅ | ✅ |
|
|
129
|
-
| **Prewarm** | Prepares resources, downloads assets, confirms locale availability | ✅ | ✅ |
|
|
129
|
+
| **Prewarm** | Prepares resources, downloads assets, confirms locale availability, requests permissions | ✅ | ✅ |
|
|
130
130
|
| **Update config** | Static method `updateConfig` allows updating the config on the fly | ✅ | ✅ |
|
|
131
131
|
| **Is Active** | Static method `getIsActive()` | ✅ | ✅ |
|
|
132
132
|
| **Haptic feedback** | Haptic feedback on recording start/stop | ✅ | ✅ |
|
|
@@ -230,6 +230,17 @@ function MyComponent() {
|
|
|
230
230
|
)>
|
|
231
231
|
<Text>Update Timer to 12s, 500ms interval, 0.65 sensitivity, with reset</Text>
|
|
232
232
|
</TouchableOpacity>
|
|
233
|
+
<TouchableOpacity
|
|
234
|
+
onPress={() => {
|
|
235
|
+
scheduleOnRuntime(workletRuntime, () => {
|
|
236
|
+
RecognizerRef.prewarm({
|
|
237
|
+
iosPreset: 'speed',
|
|
238
|
+
}, { requestPermission: true });
|
|
239
|
+
});
|
|
240
|
+
}}
|
|
241
|
+
>
|
|
242
|
+
<Text>Prewarm from worklet with permission request (default behavior)</Text>
|
|
243
|
+
</TouchableOpacity>
|
|
233
244
|
</View>
|
|
234
245
|
);
|
|
235
246
|
}
|
|
@@ -262,7 +273,7 @@ If you need to call recognizer methods from other components without prop drilli
|
|
|
262
273
|
```typescript
|
|
263
274
|
import { RecognizerRef } from '@gmessier/nitro-speech';
|
|
264
275
|
|
|
265
|
-
RecognizerRef.prewarm({ locale: 'en-US' });
|
|
276
|
+
RecognizerRef.prewarm({ locale: 'en-US' }, { requestPermission: true });
|
|
266
277
|
RecognizerRef.startListening({ locale: 'en-US' });
|
|
267
278
|
RecognizerRef.addAutoFinishTime(5000);
|
|
268
279
|
RecognizerRef.resetAutoFinishTime();
|
|
@@ -407,7 +418,8 @@ SpeechRecognizer.onVolumeChange = (volume) => {
|
|
|
407
418
|
SpeechRecognizer.prewarm({
|
|
408
419
|
locale: 'en-US',
|
|
409
420
|
// ... your config to prepare
|
|
410
|
-
});
|
|
421
|
+
}, { requestPermission: true });
|
|
422
|
+
);
|
|
411
423
|
// OR `await` if you want to react to the success
|
|
412
424
|
await SpeechRecognizer.prewarm({
|
|
413
425
|
locale: 'en-US',
|
|
@@ -418,7 +430,7 @@ scheduleOnRuntime(workletRuntime, () => {
|
|
|
418
430
|
SpeechRecognizer.prewarm({
|
|
419
431
|
locale: 'en-US',
|
|
420
432
|
// ... your config to prepare
|
|
421
|
-
});
|
|
433
|
+
}, { requestPermission: false });
|
|
422
434
|
});
|
|
423
435
|
|
|
424
436
|
// Start listening
|
package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/AudioPermissionRequester.kt
CHANGED
|
@@ -6,6 +6,8 @@ import android.content.pm.PackageManager
|
|
|
6
6
|
import androidx.activity.ComponentActivity
|
|
7
7
|
import androidx.activity.result.contract.ActivityResultContracts
|
|
8
8
|
import androidx.core.content.ContextCompat
|
|
9
|
+
import kotlinx.coroutines.suspendCancellableCoroutine
|
|
10
|
+
import kotlin.coroutines.resume
|
|
9
11
|
|
|
10
12
|
class AudioPermissionRequester (
|
|
11
13
|
private val activity: Activity
|
|
@@ -16,12 +18,13 @@ class AudioPermissionRequester (
|
|
|
16
18
|
private var callback: ((Boolean) -> Unit)? = null
|
|
17
19
|
|
|
18
20
|
private val launcher = componentActivity.activityResultRegistry.register(
|
|
19
|
-
"record_audio_key",
|
|
21
|
+
"record_audio_key",
|
|
22
|
+
ActivityResultContracts.RequestPermission()
|
|
20
23
|
) { granted ->
|
|
21
24
|
callback?.invoke(granted)
|
|
22
25
|
}
|
|
23
26
|
|
|
24
|
-
fun checkAndRequest(
|
|
27
|
+
suspend fun checkAndRequest(): Boolean {
|
|
25
28
|
val audioGranted =
|
|
26
29
|
ContextCompat.checkSelfPermission(
|
|
27
30
|
activity,
|
|
@@ -29,11 +32,14 @@ class AudioPermissionRequester (
|
|
|
29
32
|
) == PackageManager.PERMISSION_GRANTED
|
|
30
33
|
|
|
31
34
|
if (audioGranted) {
|
|
32
|
-
|
|
33
|
-
return
|
|
35
|
+
return true
|
|
34
36
|
}
|
|
35
37
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
+
return suspendCancellableCoroutine { cont ->
|
|
39
|
+
callback = { granted ->
|
|
40
|
+
if (cont.isActive) cont.resume(granted)
|
|
41
|
+
}
|
|
42
|
+
launcher.launch(recordAudioPermission)
|
|
43
|
+
}
|
|
38
44
|
}
|
|
39
45
|
}
|
|
@@ -14,6 +14,7 @@ import com.margelo.nitro.core.Promise
|
|
|
14
14
|
import com.margelo.nitro.nitrospeech.MutableSpeechRecognitionConfig
|
|
15
15
|
import com.margelo.nitro.nitrospeech.HybridRecognizerSpec
|
|
16
16
|
import com.margelo.nitro.nitrospeech.SpeechRecognitionConfig
|
|
17
|
+
import com.margelo.nitro.nitrospeech.SpeechRecognitionPrewarm
|
|
17
18
|
import com.margelo.nitro.nitrospeech.VolumeChangeEvent
|
|
18
19
|
|
|
19
20
|
@DoNotStrip
|
|
@@ -43,52 +44,27 @@ class HybridRecognizer: HybridRecognizerSpec() {
|
|
|
43
44
|
|
|
44
45
|
@DoNotStrip
|
|
45
46
|
@Keep
|
|
46
|
-
override fun prewarm(
|
|
47
|
-
|
|
47
|
+
override fun prewarm(
|
|
48
|
+
defaultParams: SpeechRecognitionConfig?,
|
|
49
|
+
options: SpeechRecognitionPrewarm?
|
|
50
|
+
): Promise<Unit> {
|
|
51
|
+
logger.log("prewarm called")
|
|
48
52
|
// nothing to prewarm
|
|
49
|
-
|
|
53
|
+
// only request permissions
|
|
54
|
+
return Promise.async {
|
|
55
|
+
// Enabled by default for user
|
|
56
|
+
if (options?.requestPermission != false) {
|
|
57
|
+
preparePermissions(null, isPrewarm = true)
|
|
58
|
+
}
|
|
59
|
+
}
|
|
50
60
|
}
|
|
51
61
|
|
|
52
62
|
@DoNotStrip
|
|
53
63
|
@Keep
|
|
54
64
|
override fun startListening(params: SpeechRecognitionConfig?) {
|
|
55
65
|
logger.log("startListening: $params")
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
null,
|
|
59
|
-
"Error at startListening: Previous SpeechRecognizer is still active",
|
|
60
|
-
false
|
|
61
|
-
)
|
|
62
|
-
return
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
val context = NitroModules.applicationContext
|
|
66
|
-
if (context == null) {
|
|
67
|
-
onFinishRecognition(
|
|
68
|
-
null,
|
|
69
|
-
"Error at startListening: Context not available",
|
|
70
|
-
true
|
|
71
|
-
)
|
|
72
|
-
return
|
|
73
|
-
}
|
|
74
|
-
val activity = context.currentActivity
|
|
75
|
-
if (activity == null) {
|
|
76
|
-
onFinishRecognition(
|
|
77
|
-
null,
|
|
78
|
-
"Error at startListening: Activity not found",
|
|
79
|
-
true
|
|
80
|
-
)
|
|
81
|
-
return
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
val permissionRequester = AudioPermissionRequester(activity)
|
|
85
|
-
permissionRequester.checkAndRequest { granted ->
|
|
86
|
-
if (!granted) {
|
|
87
|
-
onPermissionDenied?.invoke()
|
|
88
|
-
return@checkAndRequest
|
|
89
|
-
}
|
|
90
|
-
config = params
|
|
91
|
-
start(context)
|
|
66
|
+
Promise.async {
|
|
67
|
+
preparePermissions(params, isPrewarm = false)
|
|
92
68
|
}
|
|
93
69
|
}
|
|
94
70
|
|
|
@@ -196,6 +172,48 @@ class HybridRecognizer: HybridRecognizerSpec() {
|
|
|
196
172
|
stopListening()
|
|
197
173
|
}
|
|
198
174
|
|
|
175
|
+
private suspend fun preparePermissions(params: SpeechRecognitionConfig?, isPrewarm: Boolean) {
|
|
176
|
+
if (isActive) {
|
|
177
|
+
onFinishRecognition(
|
|
178
|
+
null,
|
|
179
|
+
"Error: SpeechRecognizer is already active",
|
|
180
|
+
false
|
|
181
|
+
)
|
|
182
|
+
return
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
val context = NitroModules.applicationContext
|
|
186
|
+
if (context == null) {
|
|
187
|
+
onFinishRecognition(
|
|
188
|
+
null,
|
|
189
|
+
"Error: Context not available",
|
|
190
|
+
true
|
|
191
|
+
)
|
|
192
|
+
return
|
|
193
|
+
}
|
|
194
|
+
val activity = context.currentActivity
|
|
195
|
+
if (activity == null) {
|
|
196
|
+
onFinishRecognition(
|
|
197
|
+
null,
|
|
198
|
+
"Error: Activity not found",
|
|
199
|
+
true
|
|
200
|
+
)
|
|
201
|
+
return
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
val permissionRequester = AudioPermissionRequester(activity)
|
|
205
|
+
val granted = permissionRequester.checkAndRequest()
|
|
206
|
+
if (!granted) {
|
|
207
|
+
onPermissionDenied?.invoke()
|
|
208
|
+
return
|
|
209
|
+
}
|
|
210
|
+
if (isPrewarm) {
|
|
211
|
+
return
|
|
212
|
+
}
|
|
213
|
+
config = params
|
|
214
|
+
start(context)
|
|
215
|
+
}
|
|
216
|
+
|
|
199
217
|
private fun start(context: Context) {
|
|
200
218
|
mainHandler.post {
|
|
201
219
|
try {
|
|
@@ -46,17 +46,21 @@ final class AnalyzerEngine: RecognizerEngine {
|
|
|
46
46
|
}
|
|
47
47
|
}
|
|
48
48
|
|
|
49
|
-
override func prewarm(for type:
|
|
50
|
-
await super.prewarm(for: type)
|
|
49
|
+
override func prewarm(for type: PrewarmType, _ options: SpeechRecognitionPrewarm? = nil) async {
|
|
50
|
+
await super.prewarm(for: type, options)
|
|
51
51
|
do {
|
|
52
52
|
// Create transcriber and install assets
|
|
53
53
|
try await transcriber.create(config: self.recognizerDelegate?.config)
|
|
54
54
|
}
|
|
55
55
|
catch {
|
|
56
|
+
let failureType: FailureType = switch type {
|
|
57
|
+
case .prewarm: .prewarm
|
|
58
|
+
case .start: .start
|
|
59
|
+
}
|
|
56
60
|
self.reportFailure(
|
|
57
61
|
from: "prewarm.assets",
|
|
58
62
|
message: "Failed to create transcriber",
|
|
59
|
-
type:
|
|
63
|
+
type: failureType
|
|
60
64
|
)
|
|
61
65
|
}
|
|
62
66
|
}
|
|
@@ -93,7 +97,7 @@ final class AnalyzerEngine: RecognizerEngine {
|
|
|
93
97
|
self?.outputContinuation?.yield(buffer)
|
|
94
98
|
}
|
|
95
99
|
)
|
|
96
|
-
guard let hardwareFormat else { return }
|
|
100
|
+
guard let hardwareFormat = recognizerDelegate?.hardwareFormat else { return }
|
|
97
101
|
let stream = AsyncStream(
|
|
98
102
|
AVAudioPCMBuffer.self,
|
|
99
103
|
bufferingPolicy: .unbounded
|
|
@@ -225,14 +229,14 @@ final class AnalyzerEngine: RecognizerEngine {
|
|
|
225
229
|
if !disableRepeatingFilter {
|
|
226
230
|
newBatch = Utils.repeatingFilter(newBatch)
|
|
227
231
|
}
|
|
228
|
-
Log.log("[1] lastBatch: \(self.resultBatches.last ?? "") | newBatch: \(newBatch)")
|
|
232
|
+
// Log.log("[1] lastBatch: \(self.resultBatches.last ?? "") | newBatch: \(newBatch)")
|
|
229
233
|
if self.resultBatches.isEmpty {
|
|
230
234
|
self.resultBatches.append(newBatch)
|
|
231
235
|
} else if CMTimeGetSeconds(rangeStart) == self.lastBatchStartTime || isFinal {
|
|
232
|
-
Log.log("[2] replace, isFinal: \(isFinal)")
|
|
236
|
+
// Log.log("[2] replace, isFinal: \(isFinal)")
|
|
233
237
|
self.resultBatches[self.resultBatches.count - 1] = newBatch
|
|
234
238
|
} else {
|
|
235
|
-
Log.log("[2] add new batch")
|
|
239
|
+
// Log.log("[2] add new batch")
|
|
236
240
|
self.resultBatches.append(newBatch)
|
|
237
241
|
}
|
|
238
242
|
self.lastBatchStartTime = CMTimeGetSeconds(rangeStart)
|
|
@@ -12,10 +12,14 @@ enum FailureType {
|
|
|
12
12
|
case onSession
|
|
13
13
|
}
|
|
14
14
|
|
|
15
|
+
enum PrewarmType {
|
|
16
|
+
case start
|
|
17
|
+
case prewarm
|
|
18
|
+
}
|
|
19
|
+
|
|
15
20
|
class RecognizerEngine {
|
|
16
21
|
var isActive = false
|
|
17
22
|
var isStopping = false
|
|
18
|
-
var hardwareFormat: AVAudioFormat?
|
|
19
23
|
weak var recognizerDelegate: RecognizerDelegate?
|
|
20
24
|
|
|
21
25
|
private let audioLevelTracker = AudioLevelTracker()
|
|
@@ -33,21 +37,48 @@ class RecognizerEngine {
|
|
|
33
37
|
|
|
34
38
|
// MARK: - Recognizer Methods
|
|
35
39
|
|
|
36
|
-
func prewarm(for:
|
|
37
|
-
|
|
40
|
+
func prewarm(for type: PrewarmType, _ options: SpeechRecognitionPrewarm? = nil) async {
|
|
41
|
+
// Prepare audioEngine
|
|
42
|
+
audioEngine = AVAudioEngine()
|
|
43
|
+
lg.log("[prewarm.audioEngine]")
|
|
44
|
+
|
|
45
|
+
guard let recognizerDelegate else { return }
|
|
46
|
+
|
|
47
|
+
// Everything is set, return early
|
|
48
|
+
if type == .prewarm, recognizerDelegate.hardwareFormat != nil {
|
|
49
|
+
lg.log("[prewarm.return]: Everything set")
|
|
50
|
+
return
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// User explicitly asked for prewarm without requesting permissions, return early
|
|
54
|
+
if type == .prewarm, options?.requestPermission == false {
|
|
55
|
+
lg.log("[prewarm.return]: requestPermission: false")
|
|
56
|
+
return
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
if type == .prewarm {
|
|
60
|
+
// options.requestPermission is true by default
|
|
61
|
+
// Start Permission sequence
|
|
62
|
+
let granted = await requestPermissions()
|
|
63
|
+
if granted {
|
|
64
|
+
self.prewarmAudioSession(for: type)
|
|
65
|
+
}
|
|
66
|
+
} else {
|
|
67
|
+
self.prewarmAudioSession(for: type)
|
|
68
|
+
}
|
|
69
|
+
|
|
38
70
|
// for SpeechTranscriber: .isAvailable and async assets
|
|
39
71
|
// for Dictation: only async assets
|
|
40
72
|
// for legacy SF: only sync .isAvailable
|
|
41
73
|
}
|
|
42
74
|
|
|
43
|
-
func start() {
|
|
44
|
-
guard
|
|
75
|
+
func start() async {
|
|
76
|
+
guard !isActive else { return }
|
|
45
77
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
).requestAuthorization()
|
|
78
|
+
let granted = await requestPermissions()
|
|
79
|
+
if granted {
|
|
80
|
+
await startSession()
|
|
81
|
+
}
|
|
51
82
|
}
|
|
52
83
|
|
|
53
84
|
func stop() {
|
|
@@ -56,6 +87,55 @@ class RecognizerEngine {
|
|
|
56
87
|
HapticImpact.trigger(with: self.recognizerDelegate?.config?.stopHapticFeedbackStyle)
|
|
57
88
|
}
|
|
58
89
|
|
|
90
|
+
func updateSession(
|
|
91
|
+
newConfig: MutableSpeechRecognitionConfig? = nil,
|
|
92
|
+
addMsToTimer: Double? = nil,
|
|
93
|
+
resetTimer: Bool? = nil
|
|
94
|
+
) {
|
|
95
|
+
guard let recognizerDelegate, isActive, !isStopping else { return }
|
|
96
|
+
let currentConfig = recognizerDelegate.config
|
|
97
|
+
// Update AutoFinish time
|
|
98
|
+
if let newAutoFinish = newConfig?.autoFinishRecognitionMs,
|
|
99
|
+
newAutoFinish != currentConfig?.autoFinishRecognitionMs {
|
|
100
|
+
autoStopper?.updateThreshold(
|
|
101
|
+
newAutoFinish,
|
|
102
|
+
from: "updateSession"
|
|
103
|
+
)
|
|
104
|
+
}
|
|
105
|
+
// Update AutoFinish progress interval
|
|
106
|
+
if let newInterval = newConfig?.autoFinishProgressIntervalMs,
|
|
107
|
+
newInterval != currentConfig?.autoFinishProgressIntervalMs {
|
|
108
|
+
autoStopper?.updateProgressInterval(
|
|
109
|
+
newInterval,
|
|
110
|
+
from: "updateSession"
|
|
111
|
+
)
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
if let addMsToTimer {
|
|
115
|
+
// Add time to the timer once
|
|
116
|
+
autoStopper?.addMsOnce(
|
|
117
|
+
addMsToTimer,
|
|
118
|
+
from: "updateSession"
|
|
119
|
+
)
|
|
120
|
+
} else if resetTimer == true {
|
|
121
|
+
// Reset to current baseline threshold.
|
|
122
|
+
autoStopper?.resetTimer(from: "updateSession")
|
|
123
|
+
}
|
|
124
|
+
// Only update new non-nil values in the config
|
|
125
|
+
recognizerDelegate.softlyUpdateConfig(newConfig: newConfig)
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
func getVoiceInputVolume() -> VolumeChangeEvent? {
|
|
129
|
+
guard let currentSample = audioLevelTracker.currentSample else { return nil }
|
|
130
|
+
return VolumeChangeEvent(
|
|
131
|
+
smoothedVolume: currentSample.smoothed,
|
|
132
|
+
rawVolume: currentSample.raw,
|
|
133
|
+
db: currentSample.db
|
|
134
|
+
)
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// MARK: Helpers
|
|
138
|
+
|
|
59
139
|
func startSession() async {
|
|
60
140
|
lg.log("[startSession.startSession]")
|
|
61
141
|
// Init everything
|
|
@@ -66,15 +146,13 @@ class RecognizerEngine {
|
|
|
66
146
|
lg.log("[startSession.initAutoStop]")
|
|
67
147
|
startAppStateObserver()
|
|
68
148
|
lg.log("[startSession.startAppStateObserver]")
|
|
69
|
-
startAudioSession()
|
|
70
|
-
lg.log("[startSession.startAudioSession]")
|
|
71
149
|
}
|
|
72
150
|
|
|
73
151
|
func startAudioEngine(
|
|
74
152
|
onBuffer: @escaping (AVAudioPCMBuffer) -> Void
|
|
75
153
|
) {
|
|
76
154
|
lg.log("[startAudioEngine]")
|
|
77
|
-
guard let audioEngine, let hardwareFormat else { return }
|
|
155
|
+
guard let audioEngine, let hardwareFormat = self.recognizerDelegate?.hardwareFormat else { return }
|
|
78
156
|
audioEngine.inputNode.installTap(
|
|
79
157
|
onBus: 0,
|
|
80
158
|
bufferSize: 1024,
|
|
@@ -124,53 +202,6 @@ class RecognizerEngine {
|
|
|
124
202
|
recognizerDelegate.readyForSpeech()
|
|
125
203
|
recognizerDelegate.result(batches: [])
|
|
126
204
|
}
|
|
127
|
-
|
|
128
|
-
func updateSession(
|
|
129
|
-
newConfig: MutableSpeechRecognitionConfig? = nil,
|
|
130
|
-
addMsToTimer: Double? = nil,
|
|
131
|
-
resetTimer: Bool? = nil
|
|
132
|
-
) {
|
|
133
|
-
guard let recognizerDelegate, isActive, !isStopping else { return }
|
|
134
|
-
let currentConfig = recognizerDelegate.config
|
|
135
|
-
// Update AutoFinish time
|
|
136
|
-
if let newAutoFinish = newConfig?.autoFinishRecognitionMs,
|
|
137
|
-
newAutoFinish != currentConfig?.autoFinishRecognitionMs {
|
|
138
|
-
autoStopper?.updateThreshold(
|
|
139
|
-
newAutoFinish,
|
|
140
|
-
from: "updateSession"
|
|
141
|
-
)
|
|
142
|
-
}
|
|
143
|
-
// Update AutoFinish progress interval
|
|
144
|
-
if let newInterval = newConfig?.autoFinishProgressIntervalMs,
|
|
145
|
-
newInterval != currentConfig?.autoFinishProgressIntervalMs {
|
|
146
|
-
autoStopper?.updateProgressInterval(
|
|
147
|
-
newInterval,
|
|
148
|
-
from: "updateSession"
|
|
149
|
-
)
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
if let addMsToTimer {
|
|
153
|
-
// Add time to the timer once
|
|
154
|
-
autoStopper?.addMsOnce(
|
|
155
|
-
addMsToTimer,
|
|
156
|
-
from: "updateSession"
|
|
157
|
-
)
|
|
158
|
-
} else if resetTimer == true {
|
|
159
|
-
// Reset to current baseline threshold.
|
|
160
|
-
autoStopper?.resetTimer(from: "updateSession")
|
|
161
|
-
}
|
|
162
|
-
// Only update new non-nil values in the config
|
|
163
|
-
recognizerDelegate.softlyUpdateConfig(newConfig: newConfig)
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
func getVoiceInputVolume() -> VolumeChangeEvent? {
|
|
167
|
-
guard let currentSample = audioLevelTracker.currentSample else { return nil }
|
|
168
|
-
return VolumeChangeEvent(
|
|
169
|
-
smoothedVolume: currentSample.smoothed,
|
|
170
|
-
rawVolume: currentSample.raw,
|
|
171
|
-
db: currentSample.db
|
|
172
|
-
)
|
|
173
|
-
}
|
|
174
205
|
|
|
175
206
|
func cleanup(from: String) {
|
|
176
207
|
lg.log("[cleanup]: \(from)")
|
|
@@ -226,29 +257,32 @@ class RecognizerEngine {
|
|
|
226
257
|
}
|
|
227
258
|
}
|
|
228
259
|
|
|
229
|
-
// MARK:
|
|
260
|
+
// MARK: Permissions
|
|
230
261
|
|
|
231
|
-
private func
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
message: "Audio Engine failed to initiate",
|
|
238
|
-
// RecognizerEngine-agnostic Error
|
|
239
|
-
type: .system
|
|
240
|
-
)
|
|
241
|
-
return
|
|
262
|
+
private func requestPermissions() async -> Bool {
|
|
263
|
+
guard let recognizerDelegate else { return false }
|
|
264
|
+
let authStatus = await Permissions.requestAuthorization()
|
|
265
|
+
if authStatus == .denied || authStatus == .restricted {
|
|
266
|
+
recognizerDelegate.permissionDenied()
|
|
267
|
+
return false
|
|
242
268
|
}
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
269
|
+
|
|
270
|
+
if authStatus != .authorized {
|
|
271
|
+
// .notDetermined or unknown issue
|
|
272
|
+
recognizerDelegate.error(message: "Speech recognition permission is not determined")
|
|
273
|
+
return false
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
if !(await Permissions.requestMicrophonePermission()) {
|
|
277
|
+
recognizerDelegate.permissionDenied()
|
|
278
|
+
return false
|
|
248
279
|
}
|
|
280
|
+
|
|
281
|
+
// Granted
|
|
282
|
+
return true
|
|
249
283
|
}
|
|
250
284
|
|
|
251
|
-
// MARK:
|
|
285
|
+
// MARK: Auto Stopper
|
|
252
286
|
|
|
253
287
|
private func initAutoStop() {
|
|
254
288
|
let config = self.recognizerDelegate?.config
|
|
@@ -271,7 +305,7 @@ class RecognizerEngine {
|
|
|
271
305
|
autoStopper = nil
|
|
272
306
|
}
|
|
273
307
|
|
|
274
|
-
// MARK:
|
|
308
|
+
// MARK: App State Observer
|
|
275
309
|
|
|
276
310
|
private func startAppStateObserver() {
|
|
277
311
|
appStateObserver = AppStateObserver { [weak self] in
|
|
@@ -285,12 +319,37 @@ class RecognizerEngine {
|
|
|
285
319
|
appStateObserver = nil
|
|
286
320
|
}
|
|
287
321
|
|
|
288
|
-
// MARK:
|
|
322
|
+
// MARK: Audio Session
|
|
323
|
+
|
|
324
|
+
private func prewarmAudioSession(for type: PrewarmType) {
|
|
325
|
+
guard let audioEngine else {
|
|
326
|
+
self.reportFailure(
|
|
327
|
+
from: "Audio Engine",
|
|
328
|
+
message: "Audio Engine failed to initiate",
|
|
329
|
+
// RecognizerEngine-agnostic Error
|
|
330
|
+
type: .system
|
|
331
|
+
)
|
|
332
|
+
return
|
|
333
|
+
}
|
|
334
|
+
startAudioSession()
|
|
335
|
+
lg.log("[prewarmAudioSession.audioSession]")
|
|
336
|
+
// heavy first hardwareFormat retrieval
|
|
337
|
+
if let recognizerDelegate, recognizerDelegate.hardwareFormat == nil {
|
|
338
|
+
let format = audioEngine.inputNode.outputFormat(forBus: 0)
|
|
339
|
+
recognizerDelegate.setHardwareFormat(format: format)
|
|
340
|
+
lg.log("[prewarmAudioSession.hardwareFormat]")
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
if type == .prewarm {
|
|
344
|
+
stopAudioSession()
|
|
345
|
+
lg.log("[prewarmAudioSession.stopAudioSession]")
|
|
346
|
+
}
|
|
347
|
+
}
|
|
289
348
|
|
|
290
349
|
private func startAudioSession() {
|
|
291
350
|
do {
|
|
292
351
|
let audioSession = AVAudioSession.sharedInstance()
|
|
293
|
-
try audioSession.setCategory(.
|
|
352
|
+
try audioSession.setCategory(.playAndRecord, mode: .default, options: .duckOthers)
|
|
294
353
|
// Required for haptic feedback
|
|
295
354
|
try audioSession.setAllowHapticsAndSystemSoundsDuringRecording(true)
|
|
296
355
|
try audioSession.setActive(true, options: .notifyOthersOnDeactivation)
|
|
@@ -305,7 +364,6 @@ class RecognizerEngine {
|
|
|
305
364
|
}
|
|
306
365
|
private func stopAudioSession() {
|
|
307
366
|
do {
|
|
308
|
-
// TODO: check unduck
|
|
309
367
|
try AVAudioSession.sharedInstance().setActive(false)
|
|
310
368
|
} catch {
|
|
311
369
|
// Just log and no-op - not critical
|
|
@@ -15,18 +15,22 @@ final class SFSpeechEngine: RecognizerEngine {
|
|
|
15
15
|
recognitionTask?.finish()
|
|
16
16
|
}
|
|
17
17
|
|
|
18
|
-
override func prewarm(for type:
|
|
18
|
+
override func prewarm(for type: PrewarmType, _ options: SpeechRecognitionPrewarm? = nil) async {
|
|
19
19
|
speechRecognizer = SFSpeechRecognizer(
|
|
20
20
|
locale: Locale(identifier: self.recognizerDelegate?.config?.locale ?? "en-US")
|
|
21
21
|
)
|
|
22
22
|
if speechRecognizer?.isAvailable != true {
|
|
23
|
+
let failureType: FailureType = switch type {
|
|
24
|
+
case .prewarm: .prewarm
|
|
25
|
+
case .start: .start
|
|
26
|
+
}
|
|
23
27
|
self.reportFailure(
|
|
24
28
|
from: "prewarm",
|
|
25
29
|
message: "SFSpeechRecognizer is not available",
|
|
26
|
-
type:
|
|
30
|
+
type: failureType
|
|
27
31
|
)
|
|
28
32
|
}
|
|
29
|
-
await super.prewarm(for: type)
|
|
33
|
+
await super.prewarm(for: type, options)
|
|
30
34
|
}
|
|
31
35
|
|
|
32
36
|
override func startSession() async {
|
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
import Foundation
|
|
2
2
|
import NitroModules
|
|
3
|
+
import AVFoundation
|
|
3
4
|
|
|
4
5
|
class HybridRecognizer: HybridRecognizerSpec {
|
|
6
|
+
var prewarmOptions: SpeechRecognitionPrewarm?
|
|
5
7
|
var config: SpeechRecognitionConfig?
|
|
8
|
+
var hardwareFormat: AVAudioFormat?
|
|
6
9
|
|
|
7
10
|
var onReadyForSpeech: (() -> Void)?
|
|
8
11
|
var onRecordingStopped: (() -> Void)?
|
|
@@ -27,12 +30,18 @@ class HybridRecognizer: HybridRecognizerSpec {
|
|
|
27
30
|
private let lg = Lg(prefix: "HybridRecognizer")
|
|
28
31
|
|
|
29
32
|
@discardableResult
|
|
30
|
-
func prewarm(
|
|
33
|
+
func prewarm(
|
|
34
|
+
defaultParams: SpeechRecognitionConfig?,
|
|
35
|
+
options: SpeechRecognitionPrewarm?
|
|
36
|
+
) -> Promise<Void> {
|
|
37
|
+
prewarmOptions = options
|
|
31
38
|
return Promise.async(.userInitiated) { [weak self] in
|
|
39
|
+
// Ignore when standalone prewarm triggered for active session
|
|
40
|
+
guard self?.engine?.isActive != true else { return }
|
|
32
41
|
// Ensure correct engine is selected based on params and ios version
|
|
33
42
|
await self?.ensureEngine(params: defaultParams)
|
|
34
43
|
// try to preload assets and check if speech engine is available on OS level
|
|
35
|
-
await self?.engine?.prewarm(for: .prewarm)
|
|
44
|
+
await self?.engine?.prewarm(for: .prewarm, options)
|
|
36
45
|
}
|
|
37
46
|
}
|
|
38
47
|
|
|
@@ -40,7 +49,7 @@ class HybridRecognizer: HybridRecognizerSpec {
|
|
|
40
49
|
Task {
|
|
41
50
|
// Ensure correct engine is selected based on params and ios version
|
|
42
51
|
await ensureEngine(params: params)
|
|
43
|
-
engine?.start()
|
|
52
|
+
await engine?.start()
|
|
44
53
|
}
|
|
45
54
|
}
|
|
46
55
|
|
|
@@ -111,6 +120,8 @@ class HybridRecognizer: HybridRecognizerSpec {
|
|
|
111
120
|
|
|
112
121
|
protocol RecognizerDelegate: AnyObject {
|
|
113
122
|
var config: SpeechRecognitionConfig? { get }
|
|
123
|
+
var hardwareFormat: AVAudioFormat? { get }
|
|
124
|
+
func setHardwareFormat(format: AVAudioFormat)
|
|
114
125
|
func softlyUpdateConfig(newConfig: MutableSpeechRecognitionConfig?)
|
|
115
126
|
func reselectEngine(forPrewarm: Bool)
|
|
116
127
|
func readyForSpeech()
|
|
@@ -123,6 +134,9 @@ protocol RecognizerDelegate: AnyObject {
|
|
|
123
134
|
}
|
|
124
135
|
|
|
125
136
|
extension HybridRecognizer: RecognizerDelegate {
|
|
137
|
+
func setHardwareFormat(format: AVAudioFormat) {
|
|
138
|
+
hardwareFormat = format
|
|
139
|
+
}
|
|
126
140
|
func softlyUpdateConfig(newConfig: MutableSpeechRecognitionConfig?) {
|
|
127
141
|
if let newConfig {
|
|
128
142
|
config = SpeechRecognitionConfig(
|
|
@@ -193,7 +207,6 @@ extension HybridRecognizer: RecognizerDelegate {
|
|
|
193
207
|
}
|
|
194
208
|
|
|
195
209
|
func volumeChange(event: VolumeChangeEvent) {
|
|
196
|
-
// self.lg.log("[onVolumeChange] \(event.rawVolume)")
|
|
197
210
|
if onVolumeChange != nil {
|
|
198
211
|
onVolumeChangeFallback = onVolumeChange
|
|
199
212
|
}
|
|
@@ -212,7 +225,7 @@ extension HybridRecognizer: RecognizerDelegate {
|
|
|
212
225
|
engine = nil
|
|
213
226
|
// Try to prewarm with another candidate
|
|
214
227
|
if forPrewarm {
|
|
215
|
-
self.prewarm(defaultParams: config)
|
|
228
|
+
self.prewarm(defaultParams: config, options: prewarmOptions)
|
|
216
229
|
} else {
|
|
217
230
|
// Try to start with another candidate
|
|
218
231
|
self.startListening(params: config)
|