@gmessier/nitro-speech 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +165 -148
  3. package/android/build.gradle +0 -1
  4. package/android/src/main/cpp/cpp-adapter.cpp +5 -1
  5. package/android/src/main/java/com/margelo/nitro/nitrospeech/HybridNitroSpeech.kt +2 -0
  6. package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/AutoStopper.kt +80 -16
  7. package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/HybridRecognizer.kt +93 -20
  8. package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/RecognitionListenerSession.kt +27 -15
  9. package/ios/{BufferUtil.swift → Audio/AudioBufferConverter.swift} +3 -34
  10. package/ios/Audio/AudioLevelTracker.swift +66 -0
  11. package/ios/Coordinator.swift +105 -0
  12. package/ios/Engines/AnalyzerEngine.swift +241 -0
  13. package/ios/Engines/DictationRuntime.swift +67 -0
  14. package/ios/Engines/RecognizerEngine.swift +312 -0
  15. package/ios/Engines/SFSpeechEngine.swift +119 -0
  16. package/ios/Engines/SpeechRuntime.swift +58 -0
  17. package/ios/Engines/TranscriberRuntimeProtocol.swift +21 -0
  18. package/ios/HybridNitroSpeech.swift +1 -10
  19. package/ios/HybridRecognizer.swift +135 -192
  20. package/ios/LocaleManager.swift +73 -0
  21. package/ios/{AppStateObserver.swift → Shared/AppStateObserver.swift} +1 -2
  22. package/ios/Shared/AutoStopper.swift +147 -0
  23. package/ios/Shared/HapticImpact.swift +24 -0
  24. package/ios/Shared/Log.swift +41 -0
  25. package/ios/Shared/Permissions.swift +59 -0
  26. package/ios/Shared/Utils.swift +58 -0
  27. package/lib/NitroSpeech.d.ts +2 -0
  28. package/lib/NitroSpeech.js +2 -0
  29. package/lib/Recognizer/RecognizerRef.d.ts +5 -0
  30. package/lib/Recognizer/RecognizerRef.js +13 -0
  31. package/lib/Recognizer/SpeechRecognizer.d.ts +8 -0
  32. package/lib/Recognizer/SpeechRecognizer.js +9 -0
  33. package/lib/Recognizer/methods.d.ts +8 -0
  34. package/lib/Recognizer/methods.js +29 -0
  35. package/lib/Recognizer/types.d.ts +6 -0
  36. package/lib/Recognizer/types.js +1 -0
  37. package/lib/Recognizer/useRecognizer.d.ts +16 -0
  38. package/lib/Recognizer/useRecognizer.js +71 -0
  39. package/lib/Recognizer/useVoiceInputVolume.d.ts +25 -0
  40. package/lib/Recognizer/useVoiceInputVolume.js +52 -0
  41. package/lib/index.d.ts +6 -0
  42. package/lib/index.js +6 -0
  43. package/lib/specs/NitroSpeech.nitro.d.ts +8 -0
  44. package/lib/specs/NitroSpeech.nitro.js +1 -0
  45. package/lib/specs/Recognizer.nitro.d.ts +95 -0
  46. package/lib/specs/Recognizer.nitro.js +1 -0
  47. package/lib/specs/SpeechRecognitionConfig.d.ts +162 -0
  48. package/lib/specs/SpeechRecognitionConfig.js +1 -0
  49. package/lib/specs/VolumeChangeEvent.d.ts +31 -0
  50. package/lib/specs/VolumeChangeEvent.js +1 -0
  51. package/nitro.json +2 -6
  52. package/nitrogen/generated/android/NitroSpeech+autolinking.cmake +2 -2
  53. package/nitrogen/generated/android/NitroSpeechOnLoad.cpp +5 -3
  54. package/nitrogen/generated/android/c++/JFunc_void_VolumeChangeEvent.hpp +78 -0
  55. package/nitrogen/generated/android/c++/JFunc_void_std__vector_std__string_.hpp +14 -14
  56. package/nitrogen/generated/android/c++/JHybridRecognizerSpec.cpp +68 -19
  57. package/nitrogen/generated/android/c++/JHybridRecognizerSpec.hpp +7 -4
  58. package/nitrogen/generated/android/c++/JIosPreset.hpp +58 -0
  59. package/nitrogen/generated/android/c++/JMutableSpeechRecognitionConfig.hpp +79 -0
  60. package/nitrogen/generated/android/c++/{JSpeechToTextParams.hpp → JSpeechRecognitionConfig.hpp} +48 -30
  61. package/nitrogen/generated/android/c++/JVolumeChangeEvent.hpp +65 -0
  62. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/Func_void_VolumeChangeEvent.kt +80 -0
  63. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/HybridRecognizerSpec.kt +18 -5
  64. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/IosPreset.kt +23 -0
  65. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/MutableSpeechRecognitionConfig.kt +76 -0
  66. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/SpeechRecognitionConfig.kt +121 -0
  67. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/VolumeChangeEvent.kt +61 -0
  68. package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Bridge.cpp +46 -30
  69. package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Bridge.hpp +203 -70
  70. package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Umbrella.hpp +13 -3
  71. package/nitrogen/generated/ios/NitroSpeechAutolinking.swift +2 -2
  72. package/nitrogen/generated/ios/c++/HybridRecognizerSpecSwift.hpp +41 -9
  73. package/nitrogen/generated/ios/swift/Func_void_VolumeChangeEvent.swift +46 -0
  74. package/nitrogen/generated/ios/swift/Func_void_std__exception_ptr.swift +46 -0
  75. package/nitrogen/generated/ios/swift/HybridRecognizerSpec.swift +6 -3
  76. package/nitrogen/generated/ios/swift/HybridRecognizerSpec_cxx.swift +66 -18
  77. package/nitrogen/generated/ios/swift/IosPreset.swift +40 -0
  78. package/nitrogen/generated/ios/swift/MutableSpeechRecognitionConfig.swift +118 -0
  79. package/nitrogen/generated/ios/swift/{SpeechToTextParams.swift → SpeechRecognitionConfig.swift} +108 -43
  80. package/nitrogen/generated/ios/swift/VolumeChangeEvent.swift +52 -0
  81. package/nitrogen/generated/shared/c++/HybridRecognizerSpec.cpp +4 -1
  82. package/nitrogen/generated/shared/c++/HybridRecognizerSpec.hpp +17 -7
  83. package/nitrogen/generated/shared/c++/IosPreset.hpp +76 -0
  84. package/nitrogen/generated/shared/c++/MutableSpeechRecognitionConfig.hpp +105 -0
  85. package/nitrogen/generated/shared/c++/{SpeechToTextParams.hpp → SpeechRecognitionConfig.hpp} +39 -20
  86. package/nitrogen/generated/shared/c++/VolumeChangeEvent.hpp +91 -0
  87. package/package.json +15 -16
  88. package/src/NitroSpeech.ts +5 -0
  89. package/src/Recognizer/RecognizerRef.ts +23 -0
  90. package/src/Recognizer/SpeechRecognizer.ts +10 -0
  91. package/src/Recognizer/methods.ts +40 -0
  92. package/src/Recognizer/types.ts +33 -0
  93. package/src/Recognizer/useRecognizer.ts +85 -0
  94. package/src/Recognizer/useVoiceInputVolume.ts +65 -0
  95. package/src/index.ts +6 -182
  96. package/src/specs/NitroSpeech.nitro.ts +2 -163
  97. package/src/specs/Recognizer.nitro.ts +110 -0
  98. package/src/specs/SpeechRecognitionConfig.ts +167 -0
  99. package/src/specs/VolumeChangeEvent.ts +31 -0
  100. package/android/proguard-rules.pro +0 -1
  101. package/ios/AnylyzerTranscriber.swift +0 -331
  102. package/ios/AutoStopper.swift +0 -69
  103. package/ios/HapticImpact.swift +0 -32
  104. package/ios/LegacySpeechRecognizer.swift +0 -161
  105. package/lib/commonjs/index.js +0 -145
  106. package/lib/commonjs/index.js.map +0 -1
  107. package/lib/commonjs/package.json +0 -1
  108. package/lib/commonjs/specs/NitroSpeech.nitro.js +0 -6
  109. package/lib/commonjs/specs/NitroSpeech.nitro.js.map +0 -1
  110. package/lib/module/index.js +0 -138
  111. package/lib/module/index.js.map +0 -1
  112. package/lib/module/package.json +0 -1
  113. package/lib/module/specs/NitroSpeech.nitro.js +0 -4
  114. package/lib/module/specs/NitroSpeech.nitro.js.map +0 -1
  115. package/lib/tsconfig.tsbuildinfo +0 -1
  116. package/lib/typescript/index.d.ts +0 -50
  117. package/lib/typescript/index.d.ts.map +0 -1
  118. package/lib/typescript/specs/NitroSpeech.nitro.d.ts +0 -162
  119. package/lib/typescript/specs/NitroSpeech.nitro.d.ts.map +0 -1
  120. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/SpeechToTextParams.kt +0 -68
@@ -0,0 +1,241 @@
1
+ import Foundation
2
+ import Speech
3
+ import AVFoundation
4
+
5
+ @available(iOS 26.0, *)
6
+ final class AnalyzerEngine: RecognizerEngine {
7
+ private var inputSequence: AsyncStream<AnalyzerInput>?
8
+ private var inputBuilder: AsyncStream<AnalyzerInput>.Continuation?
9
+ private var outputContinuation: AsyncStream<AVAudioPCMBuffer>.Continuation?
10
+ private var analyzer: SpeechAnalyzer?
11
+ private let transcriber: TranscriberRuntime
12
+
13
+ private var audioProducerTask: Task<Void, Never>?
14
+ private var recognizerTask: Task<(), Error>?
15
+ private var lastBatchStartTime: Float64? = nil
16
+ private var resultBatches: [String] = []
17
+
18
+ init(backend: RecognizerBackend, locale: Locale, delegate: RecognizerDelegate) {
19
+ if backend == .speechTranscriber {
20
+ transcriber = SpeechRuntime(with: locale)
21
+ } else {
22
+ transcriber = DictationRuntime(with: locale)
23
+ }
24
+ super.init(locale: locale, delegate: delegate)
25
+ }
26
+
27
+ override func stop() {
28
+ super.stop()
29
+ inputBuilder?.finish()
30
+
31
+ Task { [weak self] in
32
+ guard let self = self else { return }
33
+
34
+ do {
35
+ try await self.analyzer?.finalizeAndFinishThroughEndOfInput()
36
+ } catch {
37
+ self.reportFailure(
38
+ from: "stop.finalizeAndFinishThroughEndOfInput",
39
+ message: "Failed to finalize the end of input",
40
+ type: .onSession
41
+ )
42
+ await self.analyzer?.cancelAndFinishNow()
43
+ }
44
+
45
+ self.cleanup(from: "stopListening")
46
+ }
47
+ }
48
+
49
+ override func prewarm(for type: FailureType) async {
50
+ await super.prewarm(for: type)
51
+ do {
52
+ // Create transcriber and install assets
53
+ try await transcriber.create(config: self.recognizerDelegate?.config)
54
+ }
55
+ catch {
56
+ self.reportFailure(
57
+ from: "prewarm.assets",
58
+ message: "Failed to create transcriber",
59
+ type: type
60
+ )
61
+ }
62
+ }
63
+
64
+ override func startSession() async {
65
+ await super.startSession()
66
+
67
+ // Prepares transcriber and handles errors.
68
+ // On failure, reportFailure triggers cleanup + engine reselection.
69
+ await prewarm(for: .start)
70
+
71
+ // 3. Input sequence
72
+ (inputSequence, inputBuilder) = AsyncStream.makeStream(of: AnalyzerInput.self)
73
+
74
+ let modules = transcriber.getModules()
75
+ // 4. Analyzer
76
+ guard let audioFormat = await SpeechAnalyzer.bestAvailableAudioFormat(
77
+ compatibleWith: modules
78
+ ) else {
79
+ self.reportFailure(
80
+ from: "startRecognition.SpeechAnalyzer.bestAvailableAudioFormat",
81
+ message: "Failed to find SpeechAnalyzer audio format",
82
+ type: .start
83
+ )
84
+ return
85
+ }
86
+
87
+ analyzer = SpeechAnalyzer(modules: modules)
88
+
89
+ // 5. Supply audio
90
+ audioProducerTask = Task {
91
+ self.startAudioEngine(
92
+ onBuffer: { [weak self] buffer in
93
+ self?.outputContinuation?.yield(buffer)
94
+ }
95
+ )
96
+ guard let hardwareFormat else { return }
97
+ let stream = AsyncStream(
98
+ AVAudioPCMBuffer.self,
99
+ bufferingPolicy: .unbounded
100
+ ) { continuation in
101
+ outputContinuation = continuation
102
+ }
103
+
104
+ let needsConversion =
105
+ hardwareFormat.commonFormat != audioFormat.commonFormat ||
106
+ hardwareFormat.sampleRate != audioFormat.sampleRate ||
107
+ hardwareFormat.channelCount != audioFormat.channelCount
108
+ do {
109
+ guard let converter = AVAudioConverter(
110
+ from: hardwareFormat,
111
+ to: audioFormat
112
+ ) else {
113
+ throw NSError()
114
+ }
115
+ for await pcmBuffer in stream {
116
+ if Task.isCancelled { break }
117
+
118
+ let bufferForAnalyzer: AVAudioPCMBuffer
119
+ if needsConversion {
120
+ // Skip analyzing for empty buffers and
121
+ // Throw error if buffers are inconvertable
122
+ guard let convertedBuffer = try AudioBufferConverter.convertBuffer(
123
+ converter: converter,
124
+ audioFormat: audioFormat,
125
+ pcmBuffer: pcmBuffer
126
+ ) else {
127
+ continue
128
+ }
129
+ bufferForAnalyzer = convertedBuffer
130
+ } else {
131
+ bufferForAnalyzer = pcmBuffer
132
+ }
133
+
134
+ let input = AnalyzerInput(buffer: bufferForAnalyzer)
135
+ inputBuilder?.yield(input)
136
+ }
137
+ } catch {
138
+ if Task.isCancelled || self.isStopping {
139
+ return
140
+ }
141
+ self.reportFailure(
142
+ from: "startRecognition.audioProducerTask",
143
+ message: "Failed to convert audio format",
144
+ type: .start
145
+ )
146
+ return
147
+ }
148
+ }
149
+
150
+ // 7. Handle the results
151
+ recognizerTask = Task {
152
+ do {
153
+ try await transcriber.handleResults(
154
+ onResult: { [weak self] result in
155
+ guard let self else { return }
156
+ self.handleBatch(
157
+ attrString: result.text,
158
+ rangeStart: result.rangeStart,
159
+ isFinal: result.isFinal
160
+ )
161
+ }
162
+ )
163
+ } catch {
164
+ if self.isStopping || error is CancellationError {
165
+ return
166
+ }
167
+ self.reportFailure(
168
+ from: "startRecognition.recognizerTask",
169
+ message: "Failed to retrieve transcriber result",
170
+ type: .onSession
171
+ )
172
+ }
173
+ }
174
+
175
+ do {
176
+ if let inputSequence, let analyzer {
177
+ if let contextualStrings = self.recognizerDelegate?.config?.contextualStrings {
178
+ let context = AnalysisContext()
179
+ context.contextualStrings = [
180
+ AnalysisContext.ContextualStringsTag.general: contextualStrings
181
+ ]
182
+ try await analyzer.setContext(context)
183
+ }
184
+ try await analyzer.start(inputSequence: inputSequence)
185
+ }
186
+ } catch {
187
+ self.reportFailure(
188
+ from: "startRecognition.analyzerStart",
189
+ message: "Failed to start analyze input sequence",
190
+ type: .start
191
+ )
192
+ return
193
+ }
194
+
195
+ self.sendFeedbackOnStart()
196
+ }
197
+
198
+ override func cleanup(from: String) {
199
+ super.cleanup(from: "overridden.\(from)")
200
+
201
+ inputSequence = nil
202
+ inputBuilder = nil
203
+ outputContinuation?.finish()
204
+ outputContinuation = nil
205
+ analyzer = nil
206
+ transcriber.clean()
207
+ audioProducerTask?.cancel()
208
+ audioProducerTask = nil
209
+ recognizerTask?.cancel()
210
+ recognizerTask = nil
211
+ lastBatchStartTime = nil
212
+ resultBatches = []
213
+ }
214
+
215
+ private func handleBatch(attrString: AttributedString, rangeStart: CMTime, isFinal: Bool) {
216
+ var newBatch = String(attrString.characters)
217
+ // Ignore all batches without A-z0-9
218
+ if !newBatch.contains(/\w+/) {
219
+ return
220
+ }
221
+ // Track only when transcription is coming
222
+ self.trackPartialActivity()
223
+
224
+ let disableRepeatingFilter = self.recognizerDelegate?.config?.disableRepeatingFilter ?? false
225
+ if !disableRepeatingFilter {
226
+ newBatch = Utils.repeatingFilter(newBatch)
227
+ }
228
+ Log.log("[1] lastBatch: \(self.resultBatches.last ?? "") | newBatch: \(newBatch)")
229
+ if self.resultBatches.isEmpty {
230
+ self.resultBatches.append(newBatch)
231
+ } else if CMTimeGetSeconds(rangeStart) == self.lastBatchStartTime || isFinal {
232
+ Log.log("[2] replace, isFinal: \(isFinal)")
233
+ self.resultBatches[self.resultBatches.count - 1] = newBatch
234
+ } else {
235
+ Log.log("[2] add new batch")
236
+ self.resultBatches.append(newBatch)
237
+ }
238
+ self.lastBatchStartTime = CMTimeGetSeconds(rangeStart)
239
+ self.recognizerDelegate?.result(batches: self.resultBatches)
240
+ }
241
+ }
@@ -0,0 +1,67 @@
1
+ import Foundation
2
+ import Speech
3
+
4
+ @available(iOS 26.0, *)
5
+ final class DictationRuntime: TranscriberRuntime {
6
+ let locale: Locale
7
+ private var transcriber: DictationTranscriber?
8
+
9
+ init(with locale: Locale) {
10
+ self.locale = locale
11
+ }
12
+
13
+ func create(config: SpeechRecognitionConfig?) async throws {
14
+ var dictationTranscriptionOptions: Set<DictationTranscriber.TranscriptionOption> = [
15
+ .punctuation
16
+ ]
17
+ if config?.maskOffensiveWords == true {
18
+ dictationTranscriptionOptions.insert(.etiquetteReplacements)
19
+ }
20
+ if config?.iosAddPunctuation == false
21
+ || config?.iosPreset == IosPreset.shortform {
22
+ dictationTranscriptionOptions.remove(.punctuation)
23
+ }
24
+ var contentHints: Set<DictationTranscriber.ContentHint> = [
25
+ .shortForm,
26
+ .farField,
27
+ ]
28
+ if config?.iosAtypicalSpeech == true {
29
+ contentHints.insert(.atypicalSpeech)
30
+ }
31
+ transcriber = DictationTranscriber(
32
+ locale: locale,
33
+ contentHints: contentHints,
34
+ transcriptionOptions: dictationTranscriptionOptions,
35
+ reportingOptions: [.frequentFinalization, .volatileResults],
36
+ attributeOptions: [.audioTimeRange]
37
+ )
38
+
39
+ if let transcriber, let installationRequest = try await AssetInventory.assetInstallationRequest(supporting: [transcriber]) {
40
+ try await installationRequest.downloadAndInstall()
41
+ }
42
+ }
43
+
44
+ func getModules() -> [any SpeechModule] {
45
+ guard let transcriber else { return [] }
46
+ return [transcriber]
47
+ }
48
+
49
+ func handleResults(
50
+ onResult: @escaping (TranscriberResult) -> Void
51
+ ) async throws {
52
+ if let transcriber {
53
+ for try await result in transcriber.results {
54
+ onResult(
55
+ TranscriberResult(
56
+ text: result.text,
57
+ rangeStart: result.range.start,
58
+ isFinal: result.isFinal)
59
+ )
60
+ }
61
+ }
62
+ }
63
+
64
+ func clean() {
65
+ transcriber = nil
66
+ }
67
+ }
@@ -0,0 +1,312 @@
1
+ import Foundation
2
+ import Speech
3
+ import AVFoundation
4
+
5
+ // No practical diff between "system" and "onSession" for now.
6
+ // For future: send the level of error to RN
7
+ // "onSession" is less critical level, since the session has been started successfully
8
+ enum FailureType {
9
+ case system
10
+ case start
11
+ case prewarm
12
+ case onSession
13
+ }
14
+
15
+ class RecognizerEngine {
16
+ var isActive = false
17
+ var isStopping = false
18
+ var hardwareFormat: AVAudioFormat?
19
+ weak var recognizerDelegate: RecognizerDelegate?
20
+
21
+ private let audioLevelTracker: AudioLevelTracker
22
+ private var appStateObserver: AppStateObserver?
23
+ private var audioEngine: AVAudioEngine?
24
+ private var autoStopper: AutoStopper?
25
+ private let lg = Lg(prefix: "RecognizerEngine")
26
+
27
+ let locale: Locale
28
+
29
+ init(locale: Locale, delegate: RecognizerDelegate) {
30
+ self.locale = locale
31
+ self.recognizerDelegate = delegate
32
+ self.audioLevelTracker = AudioLevelTracker(
33
+ resetAutoFinishVoiceSensitivity: delegate.config?.resetAutoFinishVoiceSensitivity
34
+ )
35
+ }
36
+
37
+ // MARK: - Recognizer Methods
38
+
39
+ func prewarm(for: FailureType) async {
40
+ self.prepareAudioEngine()
41
+ // for SpeechTranscriber: .isAvailable and async assets
42
+ // for Dictation: only async assets
43
+ // for legacy SF: only sync .isAvailable
44
+ }
45
+
46
+ func start() {
47
+ guard let recognizerDelegate, !isActive else { return }
48
+
49
+ Permissions(
50
+ onGranted: self.startSession,
51
+ onDenied: recognizerDelegate.permissionDenied,
52
+ onError: recognizerDelegate.error
53
+ ).requestAuthorization()
54
+ }
55
+
56
+ func stop() {
57
+ guard isActive, !isStopping else { return }
58
+ isStopping = true
59
+ HapticImpact.trigger(with: self.recognizerDelegate?.config?.stopHapticFeedbackStyle)
60
+ }
61
+
62
+ func startSession() async {
63
+ lg.log("[startSession.startSession]")
64
+ // Init everything
65
+ isStopping = false
66
+ isActive = true
67
+
68
+ initAutoStop()
69
+ lg.log("[startSession.initAutoStop]")
70
+ startAppStateObserver()
71
+ lg.log("[startSession.startAppStateObserver]")
72
+ startAudioSession()
73
+ lg.log("[startSession.startAudioSession]")
74
+ }
75
+
76
+ func startAudioEngine(
77
+ onBuffer: @escaping (AVAudioPCMBuffer) -> Void
78
+ ) {
79
+ lg.log("[startAudioEngine]")
80
+ guard let audioEngine, let hardwareFormat else { return }
81
+ audioEngine.inputNode.installTap(
82
+ onBus: 0,
83
+ bufferSize: 1024,
84
+ format: hardwareFormat
85
+ ) { [weak self] buffer, _ in
86
+ guard let self, let recognizerDelegate = self.recognizerDelegate else { return }
87
+ if let sample = self.audioLevelTracker.process(buffer) {
88
+ // Send buffer volume data
89
+ recognizerDelegate.volumeChange(
90
+ event:
91
+ VolumeChangeEvent(
92
+ smoothedVolume: sample.smoothed,
93
+ rawVolume: sample.raw,
94
+ db: sample.db
95
+ )
96
+ )
97
+ if sample.resetTimer {
98
+ self.autoStopper?.resetTimer(from: "rms threshold")
99
+ }
100
+ }
101
+ onBuffer(buffer)
102
+ }
103
+ lg.log("[startAudioEngine.installTap]")
104
+ do {
105
+ audioEngine.prepare()
106
+ lg.log("[startAudioEngine.prepare]")
107
+ try audioEngine.start()
108
+ lg.log("[startAudioEngine.start]")
109
+ } catch {
110
+ self.reportFailure(
111
+ from: "Audio Engine",
112
+ message: "Audio Engine failed to start",
113
+ // RecognizerEngine-agnostic Error
114
+ type: .system
115
+ )
116
+ }
117
+ }
118
+
119
+ func sendFeedbackOnStart() {
120
+ guard let recognizerDelegate else { return }
121
+ lg.log("[sendFeedbackOnStart]")
122
+ HapticImpact.trigger(with: recognizerDelegate.config?.startHapticFeedbackStyle)
123
+ autoStopper?.resetTimer(from: "startListening.sendFeedbackOnStart")
124
+ recognizerDelegate.readyForSpeech()
125
+ recognizerDelegate.result(batches: [])
126
+ }
127
+
128
+ func updateSession(
129
+ newConfig: MutableSpeechRecognitionConfig? = nil,
130
+ addMsToTimer: Double? = nil,
131
+ resetTimer: Bool? = nil
132
+ ) {
133
+ guard let recognizerDelegate, isActive, !isStopping else { return }
134
+ let currentConfig = recognizerDelegate.config
135
+ // Update AutoFinish time
136
+ if let newAutoFinish = newConfig?.autoFinishRecognitionMs,
137
+ newAutoFinish != currentConfig?.autoFinishRecognitionMs {
138
+ autoStopper?.updateThreshold(
139
+ newAutoFinish,
140
+ from: "updateSession"
141
+ )
142
+ }
143
+ // Update AutoFinish progress interval
144
+ if let newInterval = newConfig?.autoFinishProgressIntervalMs,
145
+ newInterval != currentConfig?.autoFinishProgressIntervalMs {
146
+ autoStopper?.updateProgressInterval(
147
+ newInterval,
148
+ from: "updateSession"
149
+ )
150
+ }
151
+ // Update AutoFinish reset voice sensitivity interval
152
+ if let newSensitivity = newConfig?.resetAutoFinishVoiceSensitivity,
153
+ newSensitivity != currentConfig?.resetAutoFinishVoiceSensitivity {
154
+ audioLevelTracker.updateResetAutoFinishVoiceSensitivity(
155
+ newValue: newSensitivity
156
+ )
157
+ }
158
+ if let addMsToTimer {
159
+ // Add time to the timer once
160
+ autoStopper?.addMsOnce(
161
+ addMsToTimer,
162
+ from: "updateSession"
163
+ )
164
+ } else if resetTimer == true {
165
+ // Reset to current baseline threshold.
166
+ autoStopper?.resetTimer(from: "updateSession")
167
+ }
168
+ // Only update new non-nil values in the config
169
+ recognizerDelegate.softlyUpdateConfig(newConfig: newConfig)
170
+ }
171
+
172
+ func cleanup(from: String) {
173
+ lg.log("[cleanup]: \(from)")
174
+ let wasActive = isActive
175
+ deinitAutoStop()
176
+ stopAppStateObserver()
177
+ stopAudioSession()
178
+ audioLevelTracker.reset()
179
+
180
+ if let audioEngine, audioEngine.isRunning {
181
+ audioEngine.stop()
182
+ }
183
+ audioEngine?.inputNode.removeTap(onBus: 0)
184
+
185
+ audioEngine = nil
186
+ isActive = false
187
+ isStopping = false
188
+ self.recognizerDelegate?.volumeChange(
189
+ event:
190
+ VolumeChangeEvent(
191
+ smoothedVolume: 0,
192
+ rawVolume: 0,
193
+ db: nil
194
+ )
195
+ )
196
+ if wasActive {
197
+ self.recognizerDelegate?.recordingStopped()
198
+ }
199
+ }
200
+
201
+ func reportFailure(from: String, message: String, type: FailureType) {
202
+ // Log message
203
+ lg.log("[Failure] type: \(type), message: \(message)")
204
+
205
+ // Cleanup on engine level anyway
206
+ self.cleanup(from: from)
207
+
208
+ switch type {
209
+ // Try to reselect engine and try again
210
+ case .prewarm, .start:
211
+ let isPrewarm = type == .prewarm
212
+ self.recognizerDelegate?.reselectEngine(forPrewarm: isPrewarm)
213
+ // System level issue: send onError with description and clean
214
+ // Session has already started: send onError and cleanup
215
+ case .system, .onSession:
216
+ self.recognizerDelegate?.error(message: message)
217
+ }
218
+ }
219
+
220
+ func trackPartialActivity() {
221
+ if !self.isStopping {
222
+ self.autoStopper?.resetTimer(from: "Partial results")
223
+ }
224
+ }
225
+
226
+ // MARK: - AudioEngine heavy prepare
227
+
228
+ private func prepareAudioEngine() {
229
+ lg.log("[prewarm.start]")
230
+ audioEngine = AVAudioEngine()
231
+ guard let audioEngine else {
232
+ self.reportFailure(
233
+ from: "Audio Engine",
234
+ message: "Audio Engine failed to initiate",
235
+ // RecognizerEngine-agnostic Error
236
+ type: .system
237
+ )
238
+ return
239
+ }
240
+ lg.log("[prewarm.audioEngine]")
241
+ // heavy first hardwareFormat retrieval
242
+ if hardwareFormat == nil {
243
+ hardwareFormat = audioEngine.inputNode.outputFormat(forBus: 0)
244
+ lg.log("[prewarm.hardwareFormat]")
245
+ }
246
+ }
247
+
248
+ // MARK: - AutoStopper
249
+
250
+ private func initAutoStop() {
251
+ let config = self.recognizerDelegate?.config
252
+ autoStopper = AutoStopper(
253
+ silenceThresholdMs: config?.autoFinishRecognitionMs,
254
+ progressIntervalMs: config?.autoFinishProgressIntervalMs,
255
+ onProgress: { [weak self] timeLeftMs in
256
+ guard let self else { return }
257
+ self.recognizerDelegate?.autoFinishProgress(
258
+ timeLeftMs: timeLeftMs
259
+ )
260
+ },
261
+ onTimeout: { [weak self] in
262
+ self?.stop()
263
+ }
264
+ )
265
+ }
266
+ private func deinitAutoStop() {
267
+ autoStopper?.stop()
268
+ autoStopper = nil
269
+ }
270
+
271
+ // MARK: - App State Observer
272
+
273
+ private func startAppStateObserver() {
274
+ appStateObserver = AppStateObserver { [weak self] in
275
+ guard let self, self.isActive else { return }
276
+ self.stop()
277
+ }
278
+ }
279
+
280
+ private func stopAppStateObserver() {
281
+ appStateObserver?.stop()
282
+ appStateObserver = nil
283
+ }
284
+
285
+ // MARK: - Audio Session
286
+
287
+ private func startAudioSession() {
288
+ do {
289
+ let audioSession = AVAudioSession.sharedInstance()
290
+ try audioSession.setCategory(.record, mode: .measurement, options: .duckOthers)
291
+ // Required for haptic feedback
292
+ try audioSession.setAllowHapticsAndSystemSoundsDuringRecording(true)
293
+ try audioSession.setActive(true, options: .notifyOthersOnDeactivation)
294
+ } catch {
295
+ self.reportFailure(
296
+ from: "startAudioSession",
297
+ message: "Failed to activate audio session: \(error.localizedDescription)",
298
+ // RecognizerEngine-agnostic Error
299
+ type: .system
300
+ )
301
+ }
302
+ }
303
+ private func stopAudioSession() {
304
+ do {
305
+ // TODO: check unduck
306
+ try AVAudioSession.sharedInstance().setActive(false)
307
+ } catch {
308
+ // Just log and no-op - not critical
309
+ lg.log("Failed to deactivate audio session: \(error.localizedDescription)")
310
+ }
311
+ }
312
+ }