@gmessier/nitro-speech 0.3.3 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +176 -148
- package/android/build.gradle +0 -1
- package/android/src/main/cpp/cpp-adapter.cpp +5 -1
- package/android/src/main/java/com/margelo/nitro/nitrospeech/HybridNitroSpeech.kt +2 -0
- package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/AutoStopper.kt +82 -18
- package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/HybridRecognizer.kt +118 -30
- package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/Logger.kt +16 -0
- package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/RecognitionListenerSession.kt +35 -24
- package/ios/{BufferUtil.swift → Audio/AudioBufferConverter.swift} +3 -34
- package/ios/Audio/AudioLevelTracker.swift +60 -0
- package/ios/Coordinator.swift +105 -0
- package/ios/Engines/AnalyzerEngine.swift +241 -0
- package/ios/Engines/DictationRuntime.swift +67 -0
- package/ios/Engines/RecognizerEngine.swift +315 -0
- package/ios/Engines/SFSpeechEngine.swift +119 -0
- package/ios/Engines/SpeechRuntime.swift +58 -0
- package/ios/Engines/TranscriberRuntimeProtocol.swift +21 -0
- package/ios/HybridNitroSpeech.swift +1 -10
- package/ios/HybridRecognizer.swift +142 -191
- package/ios/LocaleManager.swift +73 -0
- package/ios/{AppStateObserver.swift → Shared/AppStateObserver.swift} +1 -2
- package/ios/Shared/AutoStopper.swift +147 -0
- package/ios/Shared/HapticImpact.swift +24 -0
- package/ios/Shared/Log.swift +41 -0
- package/ios/Shared/Permissions.swift +59 -0
- package/ios/Shared/Utils.swift +58 -0
- package/lib/NitroSpeech.d.ts +2 -0
- package/lib/NitroSpeech.js +2 -0
- package/lib/Recognizer/RecognizerRef.d.ts +7 -0
- package/lib/Recognizer/RecognizerRef.js +16 -0
- package/lib/Recognizer/SpeechRecognizer.d.ts +8 -0
- package/lib/Recognizer/SpeechRecognizer.js +9 -0
- package/lib/Recognizer/methods.d.ts +9 -0
- package/lib/Recognizer/methods.js +33 -0
- package/lib/Recognizer/types.d.ts +6 -0
- package/lib/Recognizer/types.js +1 -0
- package/lib/Recognizer/useRecognizer.d.ts +16 -0
- package/lib/Recognizer/useRecognizer.js +71 -0
- package/lib/Recognizer/useRecognizerIsActive.d.ts +25 -0
- package/lib/Recognizer/useRecognizerIsActive.js +40 -0
- package/lib/Recognizer/useVoiceInputVolume.d.ts +25 -0
- package/lib/Recognizer/useVoiceInputVolume.js +52 -0
- package/lib/index.d.ts +7 -0
- package/lib/index.js +7 -0
- package/lib/specs/NitroSpeech.nitro.d.ts +8 -0
- package/lib/specs/NitroSpeech.nitro.js +1 -0
- package/lib/specs/Recognizer.nitro.d.ts +97 -0
- package/lib/specs/Recognizer.nitro.js +1 -0
- package/lib/specs/SpeechRecognitionConfig.d.ts +162 -0
- package/lib/specs/SpeechRecognitionConfig.js +1 -0
- package/lib/specs/VolumeChangeEvent.d.ts +31 -0
- package/lib/specs/VolumeChangeEvent.js +1 -0
- package/nitro.json +0 -4
- package/nitrogen/generated/android/NitroSpeech+autolinking.cmake +2 -2
- package/nitrogen/generated/android/NitroSpeechOnLoad.cpp +4 -2
- package/nitrogen/generated/android/c++/JFunc_void_VolumeChangeEvent.hpp +78 -0
- package/nitrogen/generated/android/c++/JFunc_void_std__vector_std__string_.hpp +14 -14
- package/nitrogen/generated/android/c++/JHybridRecognizerSpec.cpp +73 -19
- package/nitrogen/generated/android/c++/JHybridRecognizerSpec.hpp +8 -4
- package/nitrogen/generated/android/c++/JIosPreset.hpp +58 -0
- package/nitrogen/generated/android/c++/JMutableSpeechRecognitionConfig.hpp +79 -0
- package/nitrogen/generated/android/c++/{JSpeechToTextParams.hpp → JSpeechRecognitionConfig.hpp} +48 -30
- package/nitrogen/generated/android/c++/JVolumeChangeEvent.hpp +65 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/Func_void_VolumeChangeEvent.kt +80 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/HybridRecognizerSpec.kt +22 -5
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/IosPreset.kt +23 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/MutableSpeechRecognitionConfig.kt +76 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/SpeechRecognitionConfig.kt +121 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/VolumeChangeEvent.kt +61 -0
- package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Bridge.cpp +46 -30
- package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Bridge.hpp +211 -69
- package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Umbrella.hpp +13 -3
- package/nitrogen/generated/ios/c++/HybridRecognizerSpecSwift.hpp +49 -9
- package/nitrogen/generated/ios/swift/Func_void_VolumeChangeEvent.swift +46 -0
- package/nitrogen/generated/ios/swift/Func_void_std__exception_ptr.swift +46 -0
- package/nitrogen/generated/ios/swift/HybridRecognizerSpec.swift +7 -3
- package/nitrogen/generated/ios/swift/HybridRecognizerSpec_cxx.swift +78 -18
- package/nitrogen/generated/ios/swift/IosPreset.swift +40 -0
- package/nitrogen/generated/ios/swift/MutableSpeechRecognitionConfig.swift +118 -0
- package/nitrogen/generated/ios/swift/{SpeechToTextParams.swift → SpeechRecognitionConfig.swift} +108 -43
- package/nitrogen/generated/ios/swift/VolumeChangeEvent.swift +52 -0
- package/nitrogen/generated/shared/c++/HybridRecognizerSpec.cpp +5 -1
- package/nitrogen/generated/shared/c++/HybridRecognizerSpec.hpp +18 -7
- package/nitrogen/generated/shared/c++/IosPreset.hpp +76 -0
- package/nitrogen/generated/shared/c++/MutableSpeechRecognitionConfig.hpp +105 -0
- package/nitrogen/generated/shared/c++/{SpeechToTextParams.hpp → SpeechRecognitionConfig.hpp} +39 -20
- package/nitrogen/generated/shared/c++/VolumeChangeEvent.hpp +91 -0
- package/package.json +15 -16
- package/src/NitroSpeech.ts +5 -0
- package/src/Recognizer/RecognizerRef.ts +27 -0
- package/src/Recognizer/SpeechRecognizer.ts +10 -0
- package/src/Recognizer/methods.ts +45 -0
- package/src/Recognizer/types.ts +34 -0
- package/src/Recognizer/useRecognizer.ts +87 -0
- package/src/Recognizer/useRecognizerIsActive.ts +49 -0
- package/src/Recognizer/useVoiceInputVolume.ts +65 -0
- package/src/index.ts +13 -182
- package/src/specs/NitroSpeech.nitro.ts +2 -163
- package/src/specs/Recognizer.nitro.ts +113 -0
- package/src/specs/SpeechRecognitionConfig.ts +167 -0
- package/src/specs/VolumeChangeEvent.ts +31 -0
- package/android/proguard-rules.pro +0 -1
- package/ios/AnylyzerTranscriber.swift +0 -331
- package/ios/AutoStopper.swift +0 -69
- package/ios/HapticImpact.swift +0 -32
- package/ios/LegacySpeechRecognizer.swift +0 -161
- package/lib/commonjs/index.js +0 -145
- package/lib/commonjs/index.js.map +0 -1
- package/lib/commonjs/package.json +0 -1
- package/lib/commonjs/specs/NitroSpeech.nitro.js +0 -6
- package/lib/commonjs/specs/NitroSpeech.nitro.js.map +0 -1
- package/lib/module/index.js +0 -138
- package/lib/module/index.js.map +0 -1
- package/lib/module/package.json +0 -1
- package/lib/module/specs/NitroSpeech.nitro.js +0 -4
- package/lib/module/specs/NitroSpeech.nitro.js.map +0 -1
- package/lib/tsconfig.tsbuildinfo +0 -1
- package/lib/typescript/index.d.ts +0 -50
- package/lib/typescript/index.d.ts.map +0 -1
- package/lib/typescript/specs/NitroSpeech.nitro.d.ts +0 -162
- package/lib/typescript/specs/NitroSpeech.nitro.d.ts.map +0 -1
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/SpeechToTextParams.kt +0 -68
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
import Foundation
|
|
2
|
+
import Speech
|
|
3
|
+
import AVFoundation
|
|
4
|
+
|
|
5
|
+
// No practical diff between "system" and "onSession" for now.
|
|
6
|
+
// For future: send the level of error to RN
|
|
7
|
+
// "onSession" is less critical level, since the session has been started successfully
|
|
8
|
+
enum FailureType {
|
|
9
|
+
case system
|
|
10
|
+
case start
|
|
11
|
+
case prewarm
|
|
12
|
+
case onSession
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
class RecognizerEngine {
|
|
16
|
+
var isActive = false
|
|
17
|
+
var isStopping = false
|
|
18
|
+
var hardwareFormat: AVAudioFormat?
|
|
19
|
+
weak var recognizerDelegate: RecognizerDelegate?
|
|
20
|
+
|
|
21
|
+
private let audioLevelTracker = AudioLevelTracker()
|
|
22
|
+
private var appStateObserver: AppStateObserver?
|
|
23
|
+
private var audioEngine: AVAudioEngine?
|
|
24
|
+
private var autoStopper: AutoStopper?
|
|
25
|
+
private let lg = Lg(prefix: "RecognizerEngine")
|
|
26
|
+
|
|
27
|
+
let locale: Locale
|
|
28
|
+
|
|
29
|
+
init(locale: Locale, delegate: RecognizerDelegate) {
|
|
30
|
+
self.locale = locale
|
|
31
|
+
self.recognizerDelegate = delegate
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// MARK: - Recognizer Methods
|
|
35
|
+
|
|
36
|
+
func prewarm(for: FailureType) async {
|
|
37
|
+
self.prepareAudioEngine()
|
|
38
|
+
// for SpeechTranscriber: .isAvailable and async assets
|
|
39
|
+
// for Dictation: only async assets
|
|
40
|
+
// for legacy SF: only sync .isAvailable
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
func start() {
|
|
44
|
+
guard let recognizerDelegate, !isActive else { return }
|
|
45
|
+
|
|
46
|
+
Permissions(
|
|
47
|
+
onGranted: self.startSession,
|
|
48
|
+
onDenied: recognizerDelegate.permissionDenied,
|
|
49
|
+
onError: recognizerDelegate.error
|
|
50
|
+
).requestAuthorization()
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
func stop() {
|
|
54
|
+
guard isActive, !isStopping else { return }
|
|
55
|
+
isStopping = true
|
|
56
|
+
HapticImpact.trigger(with: self.recognizerDelegate?.config?.stopHapticFeedbackStyle)
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
func startSession() async {
|
|
60
|
+
lg.log("[startSession.startSession]")
|
|
61
|
+
// Init everything
|
|
62
|
+
isStopping = false
|
|
63
|
+
isActive = true
|
|
64
|
+
|
|
65
|
+
initAutoStop()
|
|
66
|
+
lg.log("[startSession.initAutoStop]")
|
|
67
|
+
startAppStateObserver()
|
|
68
|
+
lg.log("[startSession.startAppStateObserver]")
|
|
69
|
+
startAudioSession()
|
|
70
|
+
lg.log("[startSession.startAudioSession]")
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
func startAudioEngine(
|
|
74
|
+
onBuffer: @escaping (AVAudioPCMBuffer) -> Void
|
|
75
|
+
) {
|
|
76
|
+
lg.log("[startAudioEngine]")
|
|
77
|
+
guard let audioEngine, let hardwareFormat else { return }
|
|
78
|
+
audioEngine.inputNode.installTap(
|
|
79
|
+
onBus: 0,
|
|
80
|
+
bufferSize: 1024,
|
|
81
|
+
format: hardwareFormat
|
|
82
|
+
) { [weak self] buffer, _ in
|
|
83
|
+
guard let self, let recognizerDelegate = self.recognizerDelegate else { return }
|
|
84
|
+
if let sample = self.audioLevelTracker.process(
|
|
85
|
+
buffer,
|
|
86
|
+
recognizerDelegate.config?.resetAutoFinishVoiceSensitivity
|
|
87
|
+
) {
|
|
88
|
+
// Send buffer volume data
|
|
89
|
+
recognizerDelegate.volumeChange(
|
|
90
|
+
event:
|
|
91
|
+
VolumeChangeEvent(
|
|
92
|
+
smoothedVolume: sample.smoothed,
|
|
93
|
+
rawVolume: sample.raw,
|
|
94
|
+
db: sample.db
|
|
95
|
+
)
|
|
96
|
+
)
|
|
97
|
+
if sample.resetTimer {
|
|
98
|
+
self.autoStopper?.resetTimer(from: "rms threshold")
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
onBuffer(buffer)
|
|
102
|
+
}
|
|
103
|
+
lg.log("[startAudioEngine.installTap]")
|
|
104
|
+
do {
|
|
105
|
+
audioEngine.prepare()
|
|
106
|
+
lg.log("[startAudioEngine.prepare]")
|
|
107
|
+
try audioEngine.start()
|
|
108
|
+
lg.log("[startAudioEngine.start]")
|
|
109
|
+
} catch {
|
|
110
|
+
self.reportFailure(
|
|
111
|
+
from: "Audio Engine",
|
|
112
|
+
message: "Audio Engine failed to start",
|
|
113
|
+
// RecognizerEngine-agnostic Error
|
|
114
|
+
type: .system
|
|
115
|
+
)
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
func sendFeedbackOnStart() {
|
|
120
|
+
guard let recognizerDelegate else { return }
|
|
121
|
+
lg.log("[sendFeedbackOnStart]")
|
|
122
|
+
HapticImpact.trigger(with: recognizerDelegate.config?.startHapticFeedbackStyle)
|
|
123
|
+
autoStopper?.resetTimer(from: "startListening.sendFeedbackOnStart")
|
|
124
|
+
recognizerDelegate.readyForSpeech()
|
|
125
|
+
recognizerDelegate.result(batches: [])
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
func updateSession(
|
|
129
|
+
newConfig: MutableSpeechRecognitionConfig? = nil,
|
|
130
|
+
addMsToTimer: Double? = nil,
|
|
131
|
+
resetTimer: Bool? = nil
|
|
132
|
+
) {
|
|
133
|
+
guard let recognizerDelegate, isActive, !isStopping else { return }
|
|
134
|
+
let currentConfig = recognizerDelegate.config
|
|
135
|
+
// Update AutoFinish time
|
|
136
|
+
if let newAutoFinish = newConfig?.autoFinishRecognitionMs,
|
|
137
|
+
newAutoFinish != currentConfig?.autoFinishRecognitionMs {
|
|
138
|
+
autoStopper?.updateThreshold(
|
|
139
|
+
newAutoFinish,
|
|
140
|
+
from: "updateSession"
|
|
141
|
+
)
|
|
142
|
+
}
|
|
143
|
+
// Update AutoFinish progress interval
|
|
144
|
+
if let newInterval = newConfig?.autoFinishProgressIntervalMs,
|
|
145
|
+
newInterval != currentConfig?.autoFinishProgressIntervalMs {
|
|
146
|
+
autoStopper?.updateProgressInterval(
|
|
147
|
+
newInterval,
|
|
148
|
+
from: "updateSession"
|
|
149
|
+
)
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
if let addMsToTimer {
|
|
153
|
+
// Add time to the timer once
|
|
154
|
+
autoStopper?.addMsOnce(
|
|
155
|
+
addMsToTimer,
|
|
156
|
+
from: "updateSession"
|
|
157
|
+
)
|
|
158
|
+
} else if resetTimer == true {
|
|
159
|
+
// Reset to current baseline threshold.
|
|
160
|
+
autoStopper?.resetTimer(from: "updateSession")
|
|
161
|
+
}
|
|
162
|
+
// Only update new non-nil values in the config
|
|
163
|
+
recognizerDelegate.softlyUpdateConfig(newConfig: newConfig)
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
func getVoiceInputVolume() -> VolumeChangeEvent? {
|
|
167
|
+
guard let currentSample = audioLevelTracker.currentSample else { return nil }
|
|
168
|
+
return VolumeChangeEvent(
|
|
169
|
+
smoothedVolume: currentSample.smoothed,
|
|
170
|
+
rawVolume: currentSample.raw,
|
|
171
|
+
db: currentSample.db
|
|
172
|
+
)
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
func cleanup(from: String) {
|
|
176
|
+
lg.log("[cleanup]: \(from)")
|
|
177
|
+
let wasActive = isActive
|
|
178
|
+
deinitAutoStop()
|
|
179
|
+
stopAppStateObserver()
|
|
180
|
+
stopAudioSession()
|
|
181
|
+
audioLevelTracker.reset()
|
|
182
|
+
|
|
183
|
+
if let audioEngine, audioEngine.isRunning {
|
|
184
|
+
audioEngine.stop()
|
|
185
|
+
}
|
|
186
|
+
audioEngine?.inputNode.removeTap(onBus: 0)
|
|
187
|
+
|
|
188
|
+
audioEngine = nil
|
|
189
|
+
isActive = false
|
|
190
|
+
isStopping = false
|
|
191
|
+
self.recognizerDelegate?.volumeChange(
|
|
192
|
+
event:
|
|
193
|
+
VolumeChangeEvent(
|
|
194
|
+
smoothedVolume: 0,
|
|
195
|
+
rawVolume: 0,
|
|
196
|
+
db: nil
|
|
197
|
+
)
|
|
198
|
+
)
|
|
199
|
+
if wasActive {
|
|
200
|
+
self.recognizerDelegate?.recordingStopped()
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
func reportFailure(from: String, message: String, type: FailureType) {
|
|
205
|
+
// Log message
|
|
206
|
+
lg.log("[Failure] type: \(type), message: \(message)")
|
|
207
|
+
|
|
208
|
+
// Cleanup on engine level anyway
|
|
209
|
+
self.cleanup(from: from)
|
|
210
|
+
|
|
211
|
+
switch type {
|
|
212
|
+
// Try to reselect engine and try again
|
|
213
|
+
case .prewarm, .start:
|
|
214
|
+
let isPrewarm = type == .prewarm
|
|
215
|
+
self.recognizerDelegate?.reselectEngine(forPrewarm: isPrewarm)
|
|
216
|
+
// System level issue: send onError with description and clean
|
|
217
|
+
// Session has already started: send onError and cleanup
|
|
218
|
+
case .system, .onSession:
|
|
219
|
+
self.recognizerDelegate?.error(message: message)
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
func trackPartialActivity() {
|
|
224
|
+
if !self.isStopping {
|
|
225
|
+
self.autoStopper?.resetTimer(from: "Partial results")
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
// MARK: - AudioEngine heavy prepare
|
|
230
|
+
|
|
231
|
+
private func prepareAudioEngine() {
|
|
232
|
+
lg.log("[prewarm.start]")
|
|
233
|
+
audioEngine = AVAudioEngine()
|
|
234
|
+
guard let audioEngine else {
|
|
235
|
+
self.reportFailure(
|
|
236
|
+
from: "Audio Engine",
|
|
237
|
+
message: "Audio Engine failed to initiate",
|
|
238
|
+
// RecognizerEngine-agnostic Error
|
|
239
|
+
type: .system
|
|
240
|
+
)
|
|
241
|
+
return
|
|
242
|
+
}
|
|
243
|
+
lg.log("[prewarm.audioEngine]")
|
|
244
|
+
// heavy first hardwareFormat retrieval
|
|
245
|
+
if hardwareFormat == nil {
|
|
246
|
+
hardwareFormat = audioEngine.inputNode.outputFormat(forBus: 0)
|
|
247
|
+
lg.log("[prewarm.hardwareFormat]")
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
// MARK: - AutoStopper
|
|
252
|
+
|
|
253
|
+
private func initAutoStop() {
|
|
254
|
+
let config = self.recognizerDelegate?.config
|
|
255
|
+
autoStopper = AutoStopper(
|
|
256
|
+
silenceThresholdMs: config?.autoFinishRecognitionMs,
|
|
257
|
+
progressIntervalMs: config?.autoFinishProgressIntervalMs,
|
|
258
|
+
onProgress: { [weak self] timeLeftMs in
|
|
259
|
+
guard let self else { return }
|
|
260
|
+
self.recognizerDelegate?.autoFinishProgress(
|
|
261
|
+
timeLeftMs: timeLeftMs
|
|
262
|
+
)
|
|
263
|
+
},
|
|
264
|
+
onTimeout: { [weak self] in
|
|
265
|
+
self?.stop()
|
|
266
|
+
}
|
|
267
|
+
)
|
|
268
|
+
}
|
|
269
|
+
private func deinitAutoStop() {
|
|
270
|
+
autoStopper?.stop()
|
|
271
|
+
autoStopper = nil
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
// MARK: - App State Observer
|
|
275
|
+
|
|
276
|
+
private func startAppStateObserver() {
|
|
277
|
+
appStateObserver = AppStateObserver { [weak self] in
|
|
278
|
+
guard let self, self.isActive else { return }
|
|
279
|
+
self.stop()
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
private func stopAppStateObserver() {
|
|
284
|
+
appStateObserver?.stop()
|
|
285
|
+
appStateObserver = nil
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
// MARK: - Audio Session
|
|
289
|
+
|
|
290
|
+
private func startAudioSession() {
|
|
291
|
+
do {
|
|
292
|
+
let audioSession = AVAudioSession.sharedInstance()
|
|
293
|
+
try audioSession.setCategory(.record, mode: .measurement, options: .duckOthers)
|
|
294
|
+
// Required for haptic feedback
|
|
295
|
+
try audioSession.setAllowHapticsAndSystemSoundsDuringRecording(true)
|
|
296
|
+
try audioSession.setActive(true, options: .notifyOthersOnDeactivation)
|
|
297
|
+
} catch {
|
|
298
|
+
self.reportFailure(
|
|
299
|
+
from: "startAudioSession",
|
|
300
|
+
message: "Failed to activate audio session: \(error.localizedDescription)",
|
|
301
|
+
// RecognizerEngine-agnostic Error
|
|
302
|
+
type: .system
|
|
303
|
+
)
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
private func stopAudioSession() {
|
|
307
|
+
do {
|
|
308
|
+
// TODO: check unduck
|
|
309
|
+
try AVAudioSession.sharedInstance().setActive(false)
|
|
310
|
+
} catch {
|
|
311
|
+
// Just log and no-op - not critical
|
|
312
|
+
lg.log("Failed to deactivate audio session: \(error.localizedDescription)")
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
}
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
import Foundation
|
|
2
|
+
import Speech
|
|
3
|
+
import AVFoundation
|
|
4
|
+
|
|
5
|
+
final class SFSpeechEngine: RecognizerEngine {
|
|
6
|
+
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
|
|
7
|
+
private var recognitionTask: SFSpeechRecognitionTask?
|
|
8
|
+
private var speechRecognizer: SFSpeechRecognizer?
|
|
9
|
+
|
|
10
|
+
private let lg = Lg(prefix: "SFSpeechEngine")
|
|
11
|
+
|
|
12
|
+
override func stop() {
|
|
13
|
+
super.stop()
|
|
14
|
+
recognitionRequest?.endAudio()
|
|
15
|
+
recognitionTask?.finish()
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
override func prewarm(for type: FailureType) async {
|
|
19
|
+
speechRecognizer = SFSpeechRecognizer(
|
|
20
|
+
locale: Locale(identifier: self.recognizerDelegate?.config?.locale ?? "en-US")
|
|
21
|
+
)
|
|
22
|
+
if speechRecognizer?.isAvailable != true {
|
|
23
|
+
self.reportFailure(
|
|
24
|
+
from: "prewarm",
|
|
25
|
+
message: "SFSpeechRecognizer is not available",
|
|
26
|
+
type: type
|
|
27
|
+
)
|
|
28
|
+
}
|
|
29
|
+
await super.prewarm(for: type)
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
override func startSession() async {
|
|
33
|
+
await super.startSession()
|
|
34
|
+
lg.log("[startSession.startSession]")
|
|
35
|
+
|
|
36
|
+
await prewarm(for: .start)
|
|
37
|
+
lg.log("[startSession.prewarm]")
|
|
38
|
+
guard let speechRecognizer else { return }
|
|
39
|
+
|
|
40
|
+
recognitionRequest = createRecognitionRequest()
|
|
41
|
+
lg.log("[startSession.createRecognitionRequest]")
|
|
42
|
+
guard let recognitionRequest else { return }
|
|
43
|
+
|
|
44
|
+
recognitionTask = speechRecognizer.recognitionTask(
|
|
45
|
+
with: recognitionRequest
|
|
46
|
+
) { [weak self] result, error in
|
|
47
|
+
guard let self else { return }
|
|
48
|
+
|
|
49
|
+
if let result = result {
|
|
50
|
+
var transcription = result.bestTranscription.formattedString
|
|
51
|
+
if !transcription.isEmpty {
|
|
52
|
+
// Track only when transcription is coming
|
|
53
|
+
self.trackPartialActivity()
|
|
54
|
+
|
|
55
|
+
let disableRepeatingFilter = self.recognizerDelegate?.config?.disableRepeatingFilter ?? false
|
|
56
|
+
if !disableRepeatingFilter {
|
|
57
|
+
transcription = Utils.repeatingFilter(transcription)
|
|
58
|
+
}
|
|
59
|
+
// Legacy transcriber collects everything into one batch
|
|
60
|
+
self.recognizerDelegate?.result(batches: [transcription])
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
if result.isFinal {
|
|
64
|
+
self.cleanup(from: "startRecognition.recognitionTask.final")
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
if let error = error {
|
|
69
|
+
if !self.isStopping {
|
|
70
|
+
self.reportFailure(
|
|
71
|
+
from: "startSession.recognitionTask.error",
|
|
72
|
+
message: "Recognition Error: \(error.localizedDescription)",
|
|
73
|
+
type: .onSession
|
|
74
|
+
)
|
|
75
|
+
} else {
|
|
76
|
+
self.cleanup(from: "startRecognition.recognitionTask.manualStop")
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
lg.log("[startSession.recognitionTask]")
|
|
81
|
+
|
|
82
|
+
self.startAudioEngine(
|
|
83
|
+
onBuffer: { [weak self] buffer in
|
|
84
|
+
self?.recognitionRequest?.append(buffer)
|
|
85
|
+
}
|
|
86
|
+
)
|
|
87
|
+
lg.log("[startSession.startAudioEngine]")
|
|
88
|
+
|
|
89
|
+
self.sendFeedbackOnStart()
|
|
90
|
+
lg.log("[startSession.sendFeedbackOnStart]")
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
override func cleanup(from: String) {
|
|
94
|
+
super.cleanup(from: "overridden.\(from)")
|
|
95
|
+
recognitionRequest = nil
|
|
96
|
+
recognitionTask = nil
|
|
97
|
+
speechRecognizer = nil
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
private func createRecognitionRequest() -> SFSpeechAudioBufferRecognitionRequest {
|
|
101
|
+
let request = SFSpeechAudioBufferRecognitionRequest()
|
|
102
|
+
request.shouldReportPartialResults = true
|
|
103
|
+
|
|
104
|
+
if let contextualStrings = self.recognizerDelegate?.config?.contextualStrings,
|
|
105
|
+
!contextualStrings.isEmpty {
|
|
106
|
+
request.contextualStrings = contextualStrings
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
if #available(iOS 16, *) {
|
|
110
|
+
if self.recognizerDelegate?.config?.iosAddPunctuation == false {
|
|
111
|
+
request.addsPunctuation = false
|
|
112
|
+
} else {
|
|
113
|
+
request.addsPunctuation = true
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
return request
|
|
118
|
+
}
|
|
119
|
+
}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import Foundation
|
|
2
|
+
import Speech
|
|
3
|
+
|
|
4
|
+
@available(iOS 26.0, *)
|
|
5
|
+
final class SpeechRuntime: TranscriberRuntime {
|
|
6
|
+
let locale: Locale
|
|
7
|
+
private var transcriber: SpeechTranscriber?
|
|
8
|
+
|
|
9
|
+
init(with locale: Locale) {
|
|
10
|
+
self.locale = locale
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
func create(config: SpeechRecognitionConfig?) async throws {
|
|
14
|
+
if !SpeechTranscriber.isAvailable {
|
|
15
|
+
throw NSError()
|
|
16
|
+
}
|
|
17
|
+
var speechTranscriptionOptions: Set<SpeechTranscriber.TranscriptionOption> = []
|
|
18
|
+
if config?.maskOffensiveWords == true {
|
|
19
|
+
speechTranscriptionOptions.insert(.etiquetteReplacements)
|
|
20
|
+
}
|
|
21
|
+
transcriber = SpeechTranscriber(
|
|
22
|
+
locale: locale,
|
|
23
|
+
transcriptionOptions: speechTranscriptionOptions,
|
|
24
|
+
reportingOptions: [.volatileResults, .fastResults],
|
|
25
|
+
attributeOptions: [.audioTimeRange]
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
if let transcriber, let installationRequest = try await AssetInventory.assetInstallationRequest(supporting: [transcriber]) {
|
|
31
|
+
try await installationRequest.downloadAndInstall()
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
func getModules() -> [any SpeechModule] {
|
|
36
|
+
guard let transcriber else { return [] }
|
|
37
|
+
return [transcriber]
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
func handleResults(
|
|
41
|
+
onResult: @escaping (TranscriberResult) -> Void
|
|
42
|
+
) async throws {
|
|
43
|
+
if let transcriber {
|
|
44
|
+
for try await result in transcriber.results {
|
|
45
|
+
onResult(
|
|
46
|
+
TranscriberResult(
|
|
47
|
+
text: result.text,
|
|
48
|
+
rangeStart: result.range.start,
|
|
49
|
+
isFinal: result.isFinal)
|
|
50
|
+
)
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
func clean() {
|
|
56
|
+
transcriber = nil
|
|
57
|
+
}
|
|
58
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import Foundation
|
|
2
|
+
import Speech
|
|
3
|
+
|
|
4
|
+
struct TranscriberResult {
|
|
5
|
+
let text: AttributedString
|
|
6
|
+
let rangeStart: CMTime
|
|
7
|
+
let isFinal: Bool
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
@available(iOS 26.0, *)
|
|
11
|
+
protocol TranscriberRuntime {
|
|
12
|
+
var locale: Locale { get }
|
|
13
|
+
|
|
14
|
+
func create(config: SpeechRecognitionConfig?) async throws
|
|
15
|
+
|
|
16
|
+
func getModules() -> [any SpeechModule]
|
|
17
|
+
|
|
18
|
+
func handleResults(onResult: @escaping (TranscriberResult) -> Void) async throws
|
|
19
|
+
|
|
20
|
+
func clean() -> Void
|
|
21
|
+
}
|
|
@@ -2,14 +2,5 @@ import Foundation
|
|
|
2
2
|
import NitroModules
|
|
3
3
|
|
|
4
4
|
class HybridNitroSpeech : HybridNitroSpeechSpec {
|
|
5
|
-
var recognizer:
|
|
6
|
-
|
|
7
|
-
override init() {
|
|
8
|
-
if #available(iOS 26.0, *) {
|
|
9
|
-
recognizer = AnalyzerTranscriber()
|
|
10
|
-
} else {
|
|
11
|
-
recognizer = LegacySpeechRecognizer()
|
|
12
|
-
}
|
|
13
|
-
super.init()
|
|
14
|
-
}
|
|
5
|
+
var recognizer: HybridRecognizerSpec = HybridRecognizer()
|
|
15
6
|
}
|