@gmessier/nitro-speech 0.4.2 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -7
- package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/AudioPermissionRequester.kt +12 -6
- package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/HybridRecognizer.kt +57 -39
- package/ios/Audio/AudioLevelTracker.swift +1 -1
- package/ios/Coordinator.swift +1 -0
- package/ios/Engines/AnalyzerEngine.swift +11 -7
- package/ios/Engines/RecognizerEngine.swift +140 -82
- package/ios/Engines/SFSpeechEngine.swift +7 -3
- package/ios/HybridRecognizer.swift +49 -8
- package/ios/Shared/AutoStopper.swift +1 -1
- package/ios/Shared/Permissions.swift +12 -47
- package/ios/Shared/Utils.swift +2 -1
- package/lib/Recognizer/methods.d.ts +10 -10
- package/lib/Recognizer/methods.js +2 -2
- package/lib/Recognizer/types.d.ts +2 -1
- package/lib/index.d.ts +1 -1
- package/lib/specs/Recognizer.nitro.d.ts +2 -1
- package/lib/specs/SpeechRecognitionConfig.d.ts +7 -3
- package/lib/specs/SpeechRecognitionPrewarm.d.ts +10 -0
- package/lib/specs/SpeechRecognitionPrewarm.js +1 -0
- package/nitrogen/generated/android/c++/JHybridRecognizerSpec.cpp +7 -3
- package/nitrogen/generated/android/c++/JHybridRecognizerSpec.hpp +1 -1
- package/nitrogen/generated/android/c++/JIosPreset.hpp +3 -0
- package/nitrogen/generated/android/c++/JSpeechRecognitionPrewarm.hpp +57 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/HybridRecognizerSpec.kt +1 -1
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/IosPreset.kt +2 -1
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/SpeechRecognitionPrewarm.kt +51 -0
- package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Bridge.hpp +18 -0
- package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Umbrella.hpp +3 -0
- package/nitrogen/generated/ios/c++/HybridRecognizerSpecSwift.hpp +5 -2
- package/nitrogen/generated/ios/swift/HybridRecognizerSpec.swift +1 -1
- package/nitrogen/generated/ios/swift/HybridRecognizerSpec_cxx.swift +2 -2
- package/nitrogen/generated/ios/swift/IosPreset.swift +4 -0
- package/nitrogen/generated/ios/swift/SpeechRecognitionPrewarm.swift +42 -0
- package/nitrogen/generated/shared/c++/HybridRecognizerSpec.hpp +4 -1
- package/nitrogen/generated/shared/c++/IosPreset.hpp +4 -0
- package/nitrogen/generated/shared/c++/SpeechRecognitionPrewarm.hpp +83 -0
- package/package.json +1 -1
- package/src/Recognizer/methods.ts +38 -31
- package/src/Recognizer/types.ts +2 -0
- package/src/index.ts +1 -0
- package/src/specs/Recognizer.nitro.ts +5 -1
- package/src/specs/SpeechRecognitionConfig.ts +7 -3
- package/src/specs/SpeechRecognitionPrewarm.ts +10 -0
package/README.md
CHANGED
|
@@ -34,7 +34,7 @@
|
|
|
34
34
|
- 👆 Configurable Haptic Feedback on start and finish
|
|
35
35
|
- 🎚️ Speech-quality configurations:
|
|
36
36
|
- Result is grouped by speech segments into Batches.
|
|
37
|
-
- Param `iosPreset` -
|
|
37
|
+
- Param `iosPreset` - enables best transcriber for your situation
|
|
38
38
|
- Param `disableRepeatingFilter` - filters out consecutive duplicate words.
|
|
39
39
|
- Param `androidDisableBatchHandling` - disables empty partial results
|
|
40
40
|
- Many more, see `SpeechRecognitionConfig`
|
|
@@ -126,7 +126,7 @@ Both permissions are required for speech recognition to work on iOS.
|
|
|
126
126
|
| **Reset Auto-finish Time** | Resets the Timer to the threshold | ✅ | ✅ |
|
|
127
127
|
| **Voice input volume** | `useVoiceInputVolume`, `getVoiceInputVolume()`, `onVolumeChange` | ✅ | ✅ |
|
|
128
128
|
| **Reset Auto-finish Sensitivity** | The voice detector sensitivity to reset the Auto-finish time | ✅ | ✅ |
|
|
129
|
-
| **Prewarm** | Prepares resources, downloads assets, confirms locale availability | ✅ | ✅ |
|
|
129
|
+
| **Prewarm** | Prepares resources, downloads assets, confirms locale availability, requests permissions | ✅ | ✅ |
|
|
130
130
|
| **Update config** | Static method `updateConfig` allows updating the config on the fly | ✅ | ✅ |
|
|
131
131
|
| **Is Active** | Static method `getIsActive()` | ✅ | ✅ |
|
|
132
132
|
| **Haptic feedback** | Haptic feedback on recording start/stop | ✅ | ✅ |
|
|
@@ -138,7 +138,7 @@ Both permissions are required for speech recognition to work on iOS.
|
|
|
138
138
|
| **Language model selection** | Choose between web search vs free-form models | Auto | ✅ |
|
|
139
139
|
| **Batch handling** | Filters out empty or repeated results | Auto | ✅ |
|
|
140
140
|
| **Formatting quality** | Prefer quality vs speed in formatting | Auto | ✅ |
|
|
141
|
-
| **Transcription preset** | `iosPreset` adapts for
|
|
141
|
+
| **Transcription preset** | `iosPreset` adapts for different scenarios | ✅ | Auto |
|
|
142
142
|
| **Automatic punctuation** | Adds punctuation to transcription (iOS 16+) | ✅ | Auto |
|
|
143
143
|
| **Atypical speech hint** | Hint iOS that speech may include accent, lisp, or other confounding traits | ✅ | Auto |
|
|
144
144
|
| **getSupportedLocalesIOS** | Supported locales for iOS (No available API for Android) | ✅ | X |
|
|
@@ -230,12 +230,23 @@ function MyComponent() {
|
|
|
230
230
|
)>
|
|
231
231
|
<Text>Update Timer to 12s, 500ms interval, 0.65 sensitivity, with reset</Text>
|
|
232
232
|
</TouchableOpacity>
|
|
233
|
+
<TouchableOpacity
|
|
234
|
+
onPress={() => {
|
|
235
|
+
scheduleOnRuntime(workletRuntime, () => {
|
|
236
|
+
RecognizerRef.prewarm({
|
|
237
|
+
iosPreset: 'speed',
|
|
238
|
+
}, { requestPermission: true });
|
|
239
|
+
});
|
|
240
|
+
}}
|
|
241
|
+
>
|
|
242
|
+
<Text>Prewarm from worklet with permission request (default behavior)</Text>
|
|
243
|
+
</TouchableOpacity>
|
|
233
244
|
</View>
|
|
234
245
|
);
|
|
235
246
|
}
|
|
236
247
|
```
|
|
237
248
|
|
|
238
|
-
On iOS 26+, the recognizer prefers the newer `SpeechTranscriber` path for general cases. Setting `iosPreset: 'shortForm'`, `iosAddPunctuation: false`, or `iosAtypicalSpeech: true` switches priority to `DictationTranscriber` that is better suited for short utterances or non-standard speech patterns.
|
|
249
|
+
On iOS 26+, the recognizer prefers the newer `SpeechTranscriber` path for general cases. Setting `iosPreset: 'shortForm' OR 'speed'`, `iosAddPunctuation: false`, or `iosAtypicalSpeech: true` switches priority to `DictationTranscriber` that is better suited for short utterances or non-standard speech patterns.
|
|
239
250
|
|
|
240
251
|
### With React Navigation (important)
|
|
241
252
|
|
|
@@ -262,7 +273,7 @@ If you need to call recognizer methods from other components without prop drilli
|
|
|
262
273
|
```typescript
|
|
263
274
|
import { RecognizerRef } from '@gmessier/nitro-speech';
|
|
264
275
|
|
|
265
|
-
RecognizerRef.prewarm({ locale: 'en-US' });
|
|
276
|
+
RecognizerRef.prewarm({ locale: 'en-US' }, { requestPermission: true });
|
|
266
277
|
RecognizerRef.startListening({ locale: 'en-US' });
|
|
267
278
|
RecognizerRef.addAutoFinishTime(5000);
|
|
268
279
|
RecognizerRef.resetAutoFinishTime();
|
|
@@ -407,7 +418,8 @@ SpeechRecognizer.onVolumeChange = (volume) => {
|
|
|
407
418
|
SpeechRecognizer.prewarm({
|
|
408
419
|
locale: 'en-US',
|
|
409
420
|
// ... your config to prepare
|
|
410
|
-
});
|
|
421
|
+
}, { requestPermission: true });
|
|
422
|
+
);
|
|
411
423
|
// OR `await` if you want to react to the success
|
|
412
424
|
await SpeechRecognizer.prewarm({
|
|
413
425
|
locale: 'en-US',
|
|
@@ -418,7 +430,7 @@ scheduleOnRuntime(workletRuntime, () => {
|
|
|
418
430
|
SpeechRecognizer.prewarm({
|
|
419
431
|
locale: 'en-US',
|
|
420
432
|
// ... your config to prepare
|
|
421
|
-
});
|
|
433
|
+
}, { requestPermission: false });
|
|
422
434
|
});
|
|
423
435
|
|
|
424
436
|
// Start listening
|
package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/AudioPermissionRequester.kt
CHANGED
|
@@ -6,6 +6,8 @@ import android.content.pm.PackageManager
|
|
|
6
6
|
import androidx.activity.ComponentActivity
|
|
7
7
|
import androidx.activity.result.contract.ActivityResultContracts
|
|
8
8
|
import androidx.core.content.ContextCompat
|
|
9
|
+
import kotlinx.coroutines.suspendCancellableCoroutine
|
|
10
|
+
import kotlin.coroutines.resume
|
|
9
11
|
|
|
10
12
|
class AudioPermissionRequester (
|
|
11
13
|
private val activity: Activity
|
|
@@ -16,12 +18,13 @@ class AudioPermissionRequester (
|
|
|
16
18
|
private var callback: ((Boolean) -> Unit)? = null
|
|
17
19
|
|
|
18
20
|
private val launcher = componentActivity.activityResultRegistry.register(
|
|
19
|
-
"record_audio_key",
|
|
21
|
+
"record_audio_key",
|
|
22
|
+
ActivityResultContracts.RequestPermission()
|
|
20
23
|
) { granted ->
|
|
21
24
|
callback?.invoke(granted)
|
|
22
25
|
}
|
|
23
26
|
|
|
24
|
-
fun checkAndRequest(
|
|
27
|
+
suspend fun checkAndRequest(): Boolean {
|
|
25
28
|
val audioGranted =
|
|
26
29
|
ContextCompat.checkSelfPermission(
|
|
27
30
|
activity,
|
|
@@ -29,11 +32,14 @@ class AudioPermissionRequester (
|
|
|
29
32
|
) == PackageManager.PERMISSION_GRANTED
|
|
30
33
|
|
|
31
34
|
if (audioGranted) {
|
|
32
|
-
|
|
33
|
-
return
|
|
35
|
+
return true
|
|
34
36
|
}
|
|
35
37
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
+
return suspendCancellableCoroutine { cont ->
|
|
39
|
+
callback = { granted ->
|
|
40
|
+
if (cont.isActive) cont.resume(granted)
|
|
41
|
+
}
|
|
42
|
+
launcher.launch(recordAudioPermission)
|
|
43
|
+
}
|
|
38
44
|
}
|
|
39
45
|
}
|
|
@@ -14,6 +14,7 @@ import com.margelo.nitro.core.Promise
|
|
|
14
14
|
import com.margelo.nitro.nitrospeech.MutableSpeechRecognitionConfig
|
|
15
15
|
import com.margelo.nitro.nitrospeech.HybridRecognizerSpec
|
|
16
16
|
import com.margelo.nitro.nitrospeech.SpeechRecognitionConfig
|
|
17
|
+
import com.margelo.nitro.nitrospeech.SpeechRecognitionPrewarm
|
|
17
18
|
import com.margelo.nitro.nitrospeech.VolumeChangeEvent
|
|
18
19
|
|
|
19
20
|
@DoNotStrip
|
|
@@ -43,52 +44,27 @@ class HybridRecognizer: HybridRecognizerSpec() {
|
|
|
43
44
|
|
|
44
45
|
@DoNotStrip
|
|
45
46
|
@Keep
|
|
46
|
-
override fun prewarm(
|
|
47
|
-
|
|
47
|
+
override fun prewarm(
|
|
48
|
+
defaultParams: SpeechRecognitionConfig?,
|
|
49
|
+
options: SpeechRecognitionPrewarm?
|
|
50
|
+
): Promise<Unit> {
|
|
51
|
+
logger.log("prewarm called")
|
|
48
52
|
// nothing to prewarm
|
|
49
|
-
|
|
53
|
+
// only request permissions
|
|
54
|
+
return Promise.async {
|
|
55
|
+
// Enabled by default for user
|
|
56
|
+
if (options?.requestPermission != false) {
|
|
57
|
+
preparePermissions(null, isPrewarm = true)
|
|
58
|
+
}
|
|
59
|
+
}
|
|
50
60
|
}
|
|
51
61
|
|
|
52
62
|
@DoNotStrip
|
|
53
63
|
@Keep
|
|
54
64
|
override fun startListening(params: SpeechRecognitionConfig?) {
|
|
55
65
|
logger.log("startListening: $params")
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
null,
|
|
59
|
-
"Error at startListening: Previous SpeechRecognizer is still active",
|
|
60
|
-
false
|
|
61
|
-
)
|
|
62
|
-
return
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
val context = NitroModules.applicationContext
|
|
66
|
-
if (context == null) {
|
|
67
|
-
onFinishRecognition(
|
|
68
|
-
null,
|
|
69
|
-
"Error at startListening: Context not available",
|
|
70
|
-
true
|
|
71
|
-
)
|
|
72
|
-
return
|
|
73
|
-
}
|
|
74
|
-
val activity = context.currentActivity
|
|
75
|
-
if (activity == null) {
|
|
76
|
-
onFinishRecognition(
|
|
77
|
-
null,
|
|
78
|
-
"Error at startListening: Activity not found",
|
|
79
|
-
true
|
|
80
|
-
)
|
|
81
|
-
return
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
val permissionRequester = AudioPermissionRequester(activity)
|
|
85
|
-
permissionRequester.checkAndRequest { granted ->
|
|
86
|
-
if (!granted) {
|
|
87
|
-
onPermissionDenied?.invoke()
|
|
88
|
-
return@checkAndRequest
|
|
89
|
-
}
|
|
90
|
-
config = params
|
|
91
|
-
start(context)
|
|
66
|
+
Promise.async {
|
|
67
|
+
preparePermissions(params, isPrewarm = false)
|
|
92
68
|
}
|
|
93
69
|
}
|
|
94
70
|
|
|
@@ -196,6 +172,48 @@ class HybridRecognizer: HybridRecognizerSpec() {
|
|
|
196
172
|
stopListening()
|
|
197
173
|
}
|
|
198
174
|
|
|
175
|
+
private suspend fun preparePermissions(params: SpeechRecognitionConfig?, isPrewarm: Boolean) {
|
|
176
|
+
if (isActive) {
|
|
177
|
+
onFinishRecognition(
|
|
178
|
+
null,
|
|
179
|
+
"Error: SpeechRecognizer is already active",
|
|
180
|
+
false
|
|
181
|
+
)
|
|
182
|
+
return
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
val context = NitroModules.applicationContext
|
|
186
|
+
if (context == null) {
|
|
187
|
+
onFinishRecognition(
|
|
188
|
+
null,
|
|
189
|
+
"Error: Context not available",
|
|
190
|
+
true
|
|
191
|
+
)
|
|
192
|
+
return
|
|
193
|
+
}
|
|
194
|
+
val activity = context.currentActivity
|
|
195
|
+
if (activity == null) {
|
|
196
|
+
onFinishRecognition(
|
|
197
|
+
null,
|
|
198
|
+
"Error: Activity not found",
|
|
199
|
+
true
|
|
200
|
+
)
|
|
201
|
+
return
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
val permissionRequester = AudioPermissionRequester(activity)
|
|
205
|
+
val granted = permissionRequester.checkAndRequest()
|
|
206
|
+
if (!granted) {
|
|
207
|
+
onPermissionDenied?.invoke()
|
|
208
|
+
return
|
|
209
|
+
}
|
|
210
|
+
if (isPrewarm) {
|
|
211
|
+
return
|
|
212
|
+
}
|
|
213
|
+
config = params
|
|
214
|
+
start(context)
|
|
215
|
+
}
|
|
216
|
+
|
|
199
217
|
private fun start(context: Context) {
|
|
200
218
|
mainHandler.post {
|
|
201
219
|
try {
|
package/ios/Coordinator.swift
CHANGED
|
@@ -46,17 +46,21 @@ final class AnalyzerEngine: RecognizerEngine {
|
|
|
46
46
|
}
|
|
47
47
|
}
|
|
48
48
|
|
|
49
|
-
override func prewarm(for type:
|
|
50
|
-
await super.prewarm(for: type)
|
|
49
|
+
override func prewarm(for type: PrewarmType, _ options: SpeechRecognitionPrewarm? = nil) async {
|
|
50
|
+
await super.prewarm(for: type, options)
|
|
51
51
|
do {
|
|
52
52
|
// Create transcriber and install assets
|
|
53
53
|
try await transcriber.create(config: self.recognizerDelegate?.config)
|
|
54
54
|
}
|
|
55
55
|
catch {
|
|
56
|
+
let failureType: FailureType = switch type {
|
|
57
|
+
case .prewarm: .prewarm
|
|
58
|
+
case .start: .start
|
|
59
|
+
}
|
|
56
60
|
self.reportFailure(
|
|
57
61
|
from: "prewarm.assets",
|
|
58
62
|
message: "Failed to create transcriber",
|
|
59
|
-
type:
|
|
63
|
+
type: failureType
|
|
60
64
|
)
|
|
61
65
|
}
|
|
62
66
|
}
|
|
@@ -93,7 +97,7 @@ final class AnalyzerEngine: RecognizerEngine {
|
|
|
93
97
|
self?.outputContinuation?.yield(buffer)
|
|
94
98
|
}
|
|
95
99
|
)
|
|
96
|
-
guard let hardwareFormat else { return }
|
|
100
|
+
guard let hardwareFormat = recognizerDelegate?.hardwareFormat else { return }
|
|
97
101
|
let stream = AsyncStream(
|
|
98
102
|
AVAudioPCMBuffer.self,
|
|
99
103
|
bufferingPolicy: .unbounded
|
|
@@ -225,14 +229,14 @@ final class AnalyzerEngine: RecognizerEngine {
|
|
|
225
229
|
if !disableRepeatingFilter {
|
|
226
230
|
newBatch = Utils.repeatingFilter(newBatch)
|
|
227
231
|
}
|
|
228
|
-
Log.log("[1] lastBatch: \(self.resultBatches.last ?? "") | newBatch: \(newBatch)")
|
|
232
|
+
// Log.log("[1] lastBatch: \(self.resultBatches.last ?? "") | newBatch: \(newBatch)")
|
|
229
233
|
if self.resultBatches.isEmpty {
|
|
230
234
|
self.resultBatches.append(newBatch)
|
|
231
235
|
} else if CMTimeGetSeconds(rangeStart) == self.lastBatchStartTime || isFinal {
|
|
232
|
-
Log.log("[2] replace, isFinal: \(isFinal)")
|
|
236
|
+
// Log.log("[2] replace, isFinal: \(isFinal)")
|
|
233
237
|
self.resultBatches[self.resultBatches.count - 1] = newBatch
|
|
234
238
|
} else {
|
|
235
|
-
Log.log("[2] add new batch")
|
|
239
|
+
// Log.log("[2] add new batch")
|
|
236
240
|
self.resultBatches.append(newBatch)
|
|
237
241
|
}
|
|
238
242
|
self.lastBatchStartTime = CMTimeGetSeconds(rangeStart)
|
|
@@ -12,10 +12,14 @@ enum FailureType {
|
|
|
12
12
|
case onSession
|
|
13
13
|
}
|
|
14
14
|
|
|
15
|
+
enum PrewarmType {
|
|
16
|
+
case start
|
|
17
|
+
case prewarm
|
|
18
|
+
}
|
|
19
|
+
|
|
15
20
|
class RecognizerEngine {
|
|
16
21
|
var isActive = false
|
|
17
22
|
var isStopping = false
|
|
18
|
-
var hardwareFormat: AVAudioFormat?
|
|
19
23
|
weak var recognizerDelegate: RecognizerDelegate?
|
|
20
24
|
|
|
21
25
|
private let audioLevelTracker = AudioLevelTracker()
|
|
@@ -33,21 +37,48 @@ class RecognizerEngine {
|
|
|
33
37
|
|
|
34
38
|
// MARK: - Recognizer Methods
|
|
35
39
|
|
|
36
|
-
func prewarm(for:
|
|
37
|
-
|
|
40
|
+
func prewarm(for type: PrewarmType, _ options: SpeechRecognitionPrewarm? = nil) async {
|
|
41
|
+
// Prepare audioEngine
|
|
42
|
+
audioEngine = AVAudioEngine()
|
|
43
|
+
lg.log("[prewarm.audioEngine]")
|
|
44
|
+
|
|
45
|
+
guard let recognizerDelegate else { return }
|
|
46
|
+
|
|
47
|
+
// Everything is set, return early
|
|
48
|
+
if type == .prewarm, recognizerDelegate.hardwareFormat != nil {
|
|
49
|
+
lg.log("[prewarm.return]: Everything set")
|
|
50
|
+
return
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// User explicitly asked for prewarm without requesting permissions, return early
|
|
54
|
+
if type == .prewarm, options?.requestPermission == false {
|
|
55
|
+
lg.log("[prewarm.return]: requestPermission: false")
|
|
56
|
+
return
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
if type == .prewarm {
|
|
60
|
+
// options.requestPermission is true by default
|
|
61
|
+
// Start Permission sequence
|
|
62
|
+
let granted = await requestPermissions()
|
|
63
|
+
if granted {
|
|
64
|
+
self.prewarmAudioSession(for: type)
|
|
65
|
+
}
|
|
66
|
+
} else {
|
|
67
|
+
self.prewarmAudioSession(for: type)
|
|
68
|
+
}
|
|
69
|
+
|
|
38
70
|
// for SpeechTranscriber: .isAvailable and async assets
|
|
39
71
|
// for Dictation: only async assets
|
|
40
72
|
// for legacy SF: only sync .isAvailable
|
|
41
73
|
}
|
|
42
74
|
|
|
43
|
-
func start() {
|
|
44
|
-
guard
|
|
75
|
+
func start() async {
|
|
76
|
+
guard !isActive else { return }
|
|
45
77
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
).requestAuthorization()
|
|
78
|
+
let granted = await requestPermissions()
|
|
79
|
+
if granted {
|
|
80
|
+
await startSession()
|
|
81
|
+
}
|
|
51
82
|
}
|
|
52
83
|
|
|
53
84
|
func stop() {
|
|
@@ -56,6 +87,55 @@ class RecognizerEngine {
|
|
|
56
87
|
HapticImpact.trigger(with: self.recognizerDelegate?.config?.stopHapticFeedbackStyle)
|
|
57
88
|
}
|
|
58
89
|
|
|
90
|
+
func updateSession(
|
|
91
|
+
newConfig: MutableSpeechRecognitionConfig? = nil,
|
|
92
|
+
addMsToTimer: Double? = nil,
|
|
93
|
+
resetTimer: Bool? = nil
|
|
94
|
+
) {
|
|
95
|
+
guard let recognizerDelegate, isActive, !isStopping else { return }
|
|
96
|
+
let currentConfig = recognizerDelegate.config
|
|
97
|
+
// Update AutoFinish time
|
|
98
|
+
if let newAutoFinish = newConfig?.autoFinishRecognitionMs,
|
|
99
|
+
newAutoFinish != currentConfig?.autoFinishRecognitionMs {
|
|
100
|
+
autoStopper?.updateThreshold(
|
|
101
|
+
newAutoFinish,
|
|
102
|
+
from: "updateSession"
|
|
103
|
+
)
|
|
104
|
+
}
|
|
105
|
+
// Update AutoFinish progress interval
|
|
106
|
+
if let newInterval = newConfig?.autoFinishProgressIntervalMs,
|
|
107
|
+
newInterval != currentConfig?.autoFinishProgressIntervalMs {
|
|
108
|
+
autoStopper?.updateProgressInterval(
|
|
109
|
+
newInterval,
|
|
110
|
+
from: "updateSession"
|
|
111
|
+
)
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
if let addMsToTimer {
|
|
115
|
+
// Add time to the timer once
|
|
116
|
+
autoStopper?.addMsOnce(
|
|
117
|
+
addMsToTimer,
|
|
118
|
+
from: "updateSession"
|
|
119
|
+
)
|
|
120
|
+
} else if resetTimer == true {
|
|
121
|
+
// Reset to current baseline threshold.
|
|
122
|
+
autoStopper?.resetTimer(from: "updateSession")
|
|
123
|
+
}
|
|
124
|
+
// Only update new non-nil values in the config
|
|
125
|
+
recognizerDelegate.softlyUpdateConfig(newConfig: newConfig)
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
func getVoiceInputVolume() -> VolumeChangeEvent? {
|
|
129
|
+
guard let currentSample = audioLevelTracker.currentSample else { return nil }
|
|
130
|
+
return VolumeChangeEvent(
|
|
131
|
+
smoothedVolume: currentSample.smoothed,
|
|
132
|
+
rawVolume: currentSample.raw,
|
|
133
|
+
db: currentSample.db
|
|
134
|
+
)
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// MARK: Helpers
|
|
138
|
+
|
|
59
139
|
func startSession() async {
|
|
60
140
|
lg.log("[startSession.startSession]")
|
|
61
141
|
// Init everything
|
|
@@ -66,15 +146,13 @@ class RecognizerEngine {
|
|
|
66
146
|
lg.log("[startSession.initAutoStop]")
|
|
67
147
|
startAppStateObserver()
|
|
68
148
|
lg.log("[startSession.startAppStateObserver]")
|
|
69
|
-
startAudioSession()
|
|
70
|
-
lg.log("[startSession.startAudioSession]")
|
|
71
149
|
}
|
|
72
150
|
|
|
73
151
|
func startAudioEngine(
|
|
74
152
|
onBuffer: @escaping (AVAudioPCMBuffer) -> Void
|
|
75
153
|
) {
|
|
76
154
|
lg.log("[startAudioEngine]")
|
|
77
|
-
guard let audioEngine, let hardwareFormat else { return }
|
|
155
|
+
guard let audioEngine, let hardwareFormat = self.recognizerDelegate?.hardwareFormat else { return }
|
|
78
156
|
audioEngine.inputNode.installTap(
|
|
79
157
|
onBus: 0,
|
|
80
158
|
bufferSize: 1024,
|
|
@@ -124,53 +202,6 @@ class RecognizerEngine {
|
|
|
124
202
|
recognizerDelegate.readyForSpeech()
|
|
125
203
|
recognizerDelegate.result(batches: [])
|
|
126
204
|
}
|
|
127
|
-
|
|
128
|
-
func updateSession(
|
|
129
|
-
newConfig: MutableSpeechRecognitionConfig? = nil,
|
|
130
|
-
addMsToTimer: Double? = nil,
|
|
131
|
-
resetTimer: Bool? = nil
|
|
132
|
-
) {
|
|
133
|
-
guard let recognizerDelegate, isActive, !isStopping else { return }
|
|
134
|
-
let currentConfig = recognizerDelegate.config
|
|
135
|
-
// Update AutoFinish time
|
|
136
|
-
if let newAutoFinish = newConfig?.autoFinishRecognitionMs,
|
|
137
|
-
newAutoFinish != currentConfig?.autoFinishRecognitionMs {
|
|
138
|
-
autoStopper?.updateThreshold(
|
|
139
|
-
newAutoFinish,
|
|
140
|
-
from: "updateSession"
|
|
141
|
-
)
|
|
142
|
-
}
|
|
143
|
-
// Update AutoFinish progress interval
|
|
144
|
-
if let newInterval = newConfig?.autoFinishProgressIntervalMs,
|
|
145
|
-
newInterval != currentConfig?.autoFinishProgressIntervalMs {
|
|
146
|
-
autoStopper?.updateProgressInterval(
|
|
147
|
-
newInterval,
|
|
148
|
-
from: "updateSession"
|
|
149
|
-
)
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
if let addMsToTimer {
|
|
153
|
-
// Add time to the timer once
|
|
154
|
-
autoStopper?.addMsOnce(
|
|
155
|
-
addMsToTimer,
|
|
156
|
-
from: "updateSession"
|
|
157
|
-
)
|
|
158
|
-
} else if resetTimer == true {
|
|
159
|
-
// Reset to current baseline threshold.
|
|
160
|
-
autoStopper?.resetTimer(from: "updateSession")
|
|
161
|
-
}
|
|
162
|
-
// Only update new non-nil values in the config
|
|
163
|
-
recognizerDelegate.softlyUpdateConfig(newConfig: newConfig)
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
func getVoiceInputVolume() -> VolumeChangeEvent? {
|
|
167
|
-
guard let currentSample = audioLevelTracker.currentSample else { return nil }
|
|
168
|
-
return VolumeChangeEvent(
|
|
169
|
-
smoothedVolume: currentSample.smoothed,
|
|
170
|
-
rawVolume: currentSample.raw,
|
|
171
|
-
db: currentSample.db
|
|
172
|
-
)
|
|
173
|
-
}
|
|
174
205
|
|
|
175
206
|
func cleanup(from: String) {
|
|
176
207
|
lg.log("[cleanup]: \(from)")
|
|
@@ -226,29 +257,32 @@ class RecognizerEngine {
|
|
|
226
257
|
}
|
|
227
258
|
}
|
|
228
259
|
|
|
229
|
-
// MARK:
|
|
260
|
+
// MARK: Permissions
|
|
230
261
|
|
|
231
|
-
private func
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
message: "Audio Engine failed to initiate",
|
|
238
|
-
// RecognizerEngine-agnostic Error
|
|
239
|
-
type: .system
|
|
240
|
-
)
|
|
241
|
-
return
|
|
262
|
+
private func requestPermissions() async -> Bool {
|
|
263
|
+
guard let recognizerDelegate else { return false }
|
|
264
|
+
let authStatus = await Permissions.requestAuthorization()
|
|
265
|
+
if authStatus == .denied || authStatus == .restricted {
|
|
266
|
+
recognizerDelegate.permissionDenied()
|
|
267
|
+
return false
|
|
242
268
|
}
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
269
|
+
|
|
270
|
+
if authStatus != .authorized {
|
|
271
|
+
// .notDetermined or unknown issue
|
|
272
|
+
recognizerDelegate.error(message: "Speech recognition permission is not determined")
|
|
273
|
+
return false
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
if !(await Permissions.requestMicrophonePermission()) {
|
|
277
|
+
recognizerDelegate.permissionDenied()
|
|
278
|
+
return false
|
|
248
279
|
}
|
|
280
|
+
|
|
281
|
+
// Granted
|
|
282
|
+
return true
|
|
249
283
|
}
|
|
250
284
|
|
|
251
|
-
// MARK:
|
|
285
|
+
// MARK: Auto Stopper
|
|
252
286
|
|
|
253
287
|
private func initAutoStop() {
|
|
254
288
|
let config = self.recognizerDelegate?.config
|
|
@@ -271,7 +305,7 @@ class RecognizerEngine {
|
|
|
271
305
|
autoStopper = nil
|
|
272
306
|
}
|
|
273
307
|
|
|
274
|
-
// MARK:
|
|
308
|
+
// MARK: App State Observer
|
|
275
309
|
|
|
276
310
|
private func startAppStateObserver() {
|
|
277
311
|
appStateObserver = AppStateObserver { [weak self] in
|
|
@@ -285,12 +319,37 @@ class RecognizerEngine {
|
|
|
285
319
|
appStateObserver = nil
|
|
286
320
|
}
|
|
287
321
|
|
|
288
|
-
// MARK:
|
|
322
|
+
// MARK: Audio Session
|
|
323
|
+
|
|
324
|
+
private func prewarmAudioSession(for type: PrewarmType) {
|
|
325
|
+
guard let audioEngine else {
|
|
326
|
+
self.reportFailure(
|
|
327
|
+
from: "Audio Engine",
|
|
328
|
+
message: "Audio Engine failed to initiate",
|
|
329
|
+
// RecognizerEngine-agnostic Error
|
|
330
|
+
type: .system
|
|
331
|
+
)
|
|
332
|
+
return
|
|
333
|
+
}
|
|
334
|
+
startAudioSession()
|
|
335
|
+
lg.log("[prewarmAudioSession.audioSession]")
|
|
336
|
+
// heavy first hardwareFormat retrieval
|
|
337
|
+
if let recognizerDelegate, recognizerDelegate.hardwareFormat == nil {
|
|
338
|
+
let format = audioEngine.inputNode.outputFormat(forBus: 0)
|
|
339
|
+
recognizerDelegate.setHardwareFormat(format: format)
|
|
340
|
+
lg.log("[prewarmAudioSession.hardwareFormat]")
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
if type == .prewarm {
|
|
344
|
+
stopAudioSession()
|
|
345
|
+
lg.log("[prewarmAudioSession.stopAudioSession]")
|
|
346
|
+
}
|
|
347
|
+
}
|
|
289
348
|
|
|
290
349
|
private func startAudioSession() {
|
|
291
350
|
do {
|
|
292
351
|
let audioSession = AVAudioSession.sharedInstance()
|
|
293
|
-
try audioSession.setCategory(.
|
|
352
|
+
try audioSession.setCategory(.playAndRecord, mode: .default, options: .duckOthers)
|
|
294
353
|
// Required for haptic feedback
|
|
295
354
|
try audioSession.setAllowHapticsAndSystemSoundsDuringRecording(true)
|
|
296
355
|
try audioSession.setActive(true, options: .notifyOthersOnDeactivation)
|
|
@@ -305,7 +364,6 @@ class RecognizerEngine {
|
|
|
305
364
|
}
|
|
306
365
|
private func stopAudioSession() {
|
|
307
366
|
do {
|
|
308
|
-
// TODO: check unduck
|
|
309
367
|
try AVAudioSession.sharedInstance().setActive(false)
|
|
310
368
|
} catch {
|
|
311
369
|
// Just log and no-op - not critical
|
|
@@ -15,18 +15,22 @@ final class SFSpeechEngine: RecognizerEngine {
|
|
|
15
15
|
recognitionTask?.finish()
|
|
16
16
|
}
|
|
17
17
|
|
|
18
|
-
override func prewarm(for type:
|
|
18
|
+
override func prewarm(for type: PrewarmType, _ options: SpeechRecognitionPrewarm? = nil) async {
|
|
19
19
|
speechRecognizer = SFSpeechRecognizer(
|
|
20
20
|
locale: Locale(identifier: self.recognizerDelegate?.config?.locale ?? "en-US")
|
|
21
21
|
)
|
|
22
22
|
if speechRecognizer?.isAvailable != true {
|
|
23
|
+
let failureType: FailureType = switch type {
|
|
24
|
+
case .prewarm: .prewarm
|
|
25
|
+
case .start: .start
|
|
26
|
+
}
|
|
23
27
|
self.reportFailure(
|
|
24
28
|
from: "prewarm",
|
|
25
29
|
message: "SFSpeechRecognizer is not available",
|
|
26
|
-
type:
|
|
30
|
+
type: failureType
|
|
27
31
|
)
|
|
28
32
|
}
|
|
29
|
-
await super.prewarm(for: type)
|
|
33
|
+
await super.prewarm(for: type, options)
|
|
30
34
|
}
|
|
31
35
|
|
|
32
36
|
override func startSession() async {
|