react-native-davoice-tts 1.0.218 → 1.0.219

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/TTSRNBridge.podspec +1 -1
  2. package/ios/SpeechBridge/SpeechBridge.m +153 -0
  3. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64/DavoiceTTS.framework/DavoiceTTS +0 -0
  4. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64/DavoiceTTS.framework/Modules/DavoiceTTS.swiftmodule/arm64-apple-ios.abi.json +3388 -3388
  5. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64/DavoiceTTS.framework/Modules/DavoiceTTS.swiftmodule/arm64-apple-ios.private.swiftinterface +20 -20
  6. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64/DavoiceTTS.framework/Modules/DavoiceTTS.swiftmodule/arm64-apple-ios.swiftinterface +20 -20
  7. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64_x86_64-simulator/DavoiceTTS.framework/DavoiceTTS +0 -0
  8. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64_x86_64-simulator/DavoiceTTS.framework/Modules/DavoiceTTS.swiftmodule/arm64-apple-ios-simulator.abi.json +3316 -3316
  9. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64_x86_64-simulator/DavoiceTTS.framework/Modules/DavoiceTTS.swiftmodule/arm64-apple-ios-simulator.private.swiftinterface +32 -32
  10. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64_x86_64-simulator/DavoiceTTS.framework/Modules/DavoiceTTS.swiftmodule/arm64-apple-ios-simulator.swiftinterface +32 -32
  11. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64_x86_64-simulator/DavoiceTTS.framework/Modules/DavoiceTTS.swiftmodule/x86_64-apple-ios-simulator.abi.json +3316 -3316
  12. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64_x86_64-simulator/DavoiceTTS.framework/Modules/DavoiceTTS.swiftmodule/x86_64-apple-ios-simulator.private.swiftinterface +32 -32
  13. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64_x86_64-simulator/DavoiceTTS.framework/Modules/DavoiceTTS.swiftmodule/x86_64-apple-ios-simulator.swiftinterface +32 -32
  14. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64_x86_64-simulator/DavoiceTTS.framework/_CodeSignature/CodeDirectory +0 -0
  15. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64_x86_64-simulator/DavoiceTTS.framework/_CodeSignature/CodeRequirements-1 +0 -0
  16. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64_x86_64-simulator/DavoiceTTS.framework/_CodeSignature/CodeResources +24 -99
  17. package/package.json +1 -1
  18. package/speech/index.ts +106 -0
  19. package/android/src/main/java/com/davoice/tts/rn/DaVoiceTTSPackage.java_old_using_new_for_both_stt_and_tts +0 -26
  20. package/ios/STTRNBridge/STTBridge.m_wtf +0 -109
  21. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64/DavoiceTTS.framework/DaVoiceSTT copy.swift____ +0 -1202
  22. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64/DavoiceTTS.framework/DaVoiceSTT.swift.bkup +0 -1000
  23. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64/DavoiceTTS.framework/DaVoiceSTT.swift.latest +0 -1359
  24. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64/DavoiceTTS.framework/DaVoiceSTT.swift1.swift__ +0 -1134
  25. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64/DavoiceTTS.framework/DaVoiceSTT.swift__ +0 -1329
  26. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64_x86_64-simulator/DavoiceTTS.framework/DaVoiceSTT copy.swift____ +0 -1202
  27. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64_x86_64-simulator/DavoiceTTS.framework/DaVoiceSTT.swift.bkup +0 -1000
  28. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64_x86_64-simulator/DavoiceTTS.framework/DaVoiceSTT.swift.latest +0 -1359
  29. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64_x86_64-simulator/DavoiceTTS.framework/DaVoiceSTT.swift1.swift__ +0 -1134
  30. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64_x86_64-simulator/DavoiceTTS.framework/DaVoiceSTT.swift__ +0 -1329
@@ -1,1134 +0,0 @@
1
- // STT.swift
2
- // Native iOS Swift version (AEC flow preserved 1:1)
3
-
4
- import Foundation
5
- import UIKit
6
- import Speech
7
- import Accelerate
8
- import AVFAudio // or import AVFoundation
9
-
10
- @objc public protocol STTDelegate: AnyObject {
11
- @objc func stt(_ stt: STT, didEmitEvent name: String, body: [String: Any]?)
12
- }
13
-
14
- @objcMembers
15
- public final class STT: NSObject, SFSpeechRecognizerDelegate {
16
- public weak var delegate: STTDelegate?
17
- public var continuous: Bool = true
18
-
19
- // MARK: - Private
20
- private var speechRecognizer: SFSpeechRecognizer?
21
- private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
22
- private var audioEngine: AVAudioEngine?
23
- private var recognitionTask: SFSpeechRecognitionTask?
24
- private var audioSession: AVAudioSession?
25
- private var isTearingDown: Bool = false
26
- private var sessionId: String?
27
- private var priorAudioCategory: AVAudioSession.Category?
28
- private var averagePowerForChannel0: Float = 0
29
- private var averagePowerForChannel1: Float = 0
30
-
31
- private var playbackNode: AVAudioPlayerNode?
32
- private var seenRealSpeech = false // flips true after first non-blank token
33
- private var engineHotAt: CFTimeInterval = 0 // when engine actually started
34
- private let warmupKeepAlive: CFTimeInterval = 4.0 // seconds we’ll keep re-arming in silence
35
-
36
- // Keep-engine-alive helpers
37
- private var lastReclaimAttempt: CFAbsoluteTime = 0
38
- private let reclaimCooldown: CFTimeInterval = 1.0
39
-
40
- // --- Task health ---
41
- private var lastBufferAt: CFTimeInterval = 0 // updated from tap
42
- private var lastResultAt: CFTimeInterval = 0 // updated from recognition callback
43
- private var lastTaskStartAt: CFTimeInterval = 0
44
- private var stallWatchdog: Timer?
45
- private var consecutiveStallCount = 0
46
- private let stallThreshold: CFTimeInterval = 8.0 // seconds w/o results while engine is hot
47
- private let rearmCooldownTask: CFTimeInterval = 2.0
48
- private var lastRearmAt: CFTimeInterval = 0
49
- private var engineHot = false
50
- private var hotAt: CFTimeInterval = 0
51
-
52
- // --- Recovery & diagnostics ---
53
- private var recoverySeq = 0
54
- private var lastRecoveryAt: CFTimeInterval = 0
55
- private var lastTaskOrigin: String = "cold"
56
-
57
- private(set) var sttActive = false
58
-
59
- // partial cadence monitor
60
- private var emaPartialGap: Double = 0 // exponential moving average of time between partials
61
- private let emaAlpha: Double = 0.3
62
-
63
- // MARK: - Event names (unchanged)
64
- public static let supportedEvents: [String] = [
65
- "onSpeechResults",
66
- "onSpeechStart",
67
- "onSpeechPartialResults",
68
- "onSpeechError",
69
- "onSpeechEnd",
70
- "onSpeechRecognized",
71
- "onSpeechVolumeChanged"
72
- ]
73
-
74
- // MARK: - Public API (native replacements for the former RCT methods)
75
-
76
- public func isSpeechAvailable(_ completion: @escaping (Bool) -> Void) {
77
- SFSpeechRecognizer.requestAuthorization { status in
78
- switch status {
79
- case .authorized: completion(true)
80
- default: completion(false)
81
- }
82
- }
83
- }
84
-
85
- public func isRecognizing() -> Bool {
86
- guard let task = recognitionTask else { return false }
87
- return task.state == .running
88
- }
89
-
90
- private func ensurePlaybackNode(in engine: AVAudioEngine) -> AVAudioPlayerNode {
91
- // If we have a node but it's tied to a different engine or got disconnected, recreate it.
92
- if let p = playbackNode, p.engine === engine {
93
- return p
94
- }
95
- let p = AVAudioPlayerNode()
96
- playbackNode = p
97
- engine.attach(p)
98
- // Connect with nil format so the mixer does SRC if needed
99
- engine.connect(p, to: engine.mainMixerNode, format: nil)
100
- return p
101
- }
102
-
103
- private func startWatchdog() {
104
- stallWatchdog?.invalidate()
105
- stallWatchdog = Timer.scheduledTimer(withTimeInterval: 2.0, repeats: true) { [weak self] _ in
106
- self?.checkTaskHealth()
107
- }
108
- RunLoop.main.add(stallWatchdog!, forMode: .common)
109
- }
110
-
111
- private func stopWatchdog() {
112
- stallWatchdog?.invalidate()
113
- stallWatchdog = nil
114
- }
115
-
116
- private func rearmTask(reason: String) {
117
- // Cancel old task only — keep the engine and tap running.
118
- recognitionTask?.cancel()
119
- recognitionTask = nil
120
-
121
- seenRealSpeech = false
122
- lastTaskStartAt = CACurrentMediaTime()
123
- startTask(makeFreshRequest())
124
- NSLog("[STT] rearmTask(\(reason)) -> new task started")
125
- }
126
-
127
- private func checkTaskHealth() {
128
- guard let engine = audioEngine else { return }
129
- let now = CACurrentMediaTime()
130
-
131
- // Engine down? Let your existing logic handle it; just bail.
132
- if !engine.isRunning { return }
133
-
134
- // If recognizer is globally unavailable, don’t thrash — wait until it flips back.
135
- if let rec = speechRecognizer, rec.isAvailable == false {
136
- NSLog("[STT] watchdog: recognizer unavailable; waiting…")
137
- return
138
- }
139
-
140
- // No task at all? Spin one up.
141
- if recognitionTask == nil {
142
- if now - lastRearmAt > rearmCooldownTask {
143
- NSLog("[STT] watchdog: no task -> start fresh request")
144
- lastRearmAt = now
145
- startTask(makeFreshRequest())
146
- }
147
- return
148
- }
149
-
150
- // If we’ve had buffers recently but no results for a while, assume the task is stuck.
151
- let noResultsFor = now - lastResultAt
152
- let hadRecentAudio = (now - lastBufferAt) < max(2.0, stallThreshold) // tap is alive
153
-
154
- if hadRecentAudio && noResultsFor > stallThreshold {
155
- if now - lastRearmAt > rearmCooldownTask {
156
- consecutiveStallCount += 1
157
- NSLog("[STT] watchdog: stall detected (no results for \(Int(noResultsFor))s, audio flowing). rearm #\(consecutiveStallCount)")
158
-
159
- rearmTask(reason: "watchdog-stall")
160
- lastRearmAt = now
161
-
162
- // If we stall repeatedly, recreate the recognizer itself (server/session could be hosed)
163
- if consecutiveStallCount >= 3 {
164
- recreateSpeechRecognizerPreservingLocale()
165
- consecutiveStallCount = 0
166
- }
167
- }
168
- } else if hadRecentAudio {
169
- // Healthy path: audio & results are flowing; reset stall counter
170
- consecutiveStallCount = 0
171
- }
172
- }
173
-
174
- public func startSpeech(localeStr: String?) {
175
- NSLog("[STT] startSpeech(locale=\(localeStr ?? "nil"))")
176
-
177
- if recognitionTask != nil {
178
- sendResult(error: ["code": "already_started", "message": "Speech recognition already started!"],
179
- bestTranscription: nil, transcriptions: nil, isFinal: nil)
180
- return
181
- }
182
-
183
- SFSpeechRecognizer.requestAuthorization { [weak self] status in
184
- guard let self = self else { return }
185
- switch status {
186
- case .notDetermined:
187
- self.sendResult(error: ["message": "Speech recognition not yet authorized"], bestTranscription: nil, transcriptions: nil, isFinal: nil)
188
- case .denied:
189
- self.sendResult(error: ["message": "User denied access to speech recognition"], bestTranscription: nil, transcriptions: nil, isFinal: nil)
190
- case .restricted:
191
- self.sendResult(error: ["message": "Speech recognition restricted on this device"], bestTranscription: nil, transcriptions: nil, isFinal: nil)
192
- case .authorized:
193
- self.setupAndStartRecognizing(localeStr: localeStr)
194
- @unknown default:
195
- self.sendResult(error: ["message": "Unknown authorization status"], bestTranscription: nil, transcriptions: nil, isFinal: nil)
196
- }
197
- }
198
- }
199
-
200
- public func stopSpeech(_ completion: ((Bool) -> Void)? = nil) {
201
- NSLog("[STT] stopSpeech() requested by app")
202
- recognitionTask?.finish()
203
- completion?(false)
204
- }
205
-
206
- public func cancelSpeech(_ completion: ((Bool) -> Void)? = nil) {
207
- NSLog("[STT] cancelSpeech() requested by app")
208
-
209
- recognitionTask?.cancel()
210
- completion?(false)
211
- }
212
-
213
- public func destroySpeech(_ completion: ((Bool) -> Void)? = nil) {
214
- NSLog("[STT] **** destroySpeech!!!")
215
- teardown()
216
- completion?(false)
217
- }
218
-
219
- private func updateSessionRouting(selectBestInput: Bool = true) {
220
- NSLog("[STT] ⚠️ updateSessionRouting??? why???")
221
-
222
- let s = AVAudioSession.sharedInstance()
223
-
224
- // fast checks & logs can run on main
225
- let inputs = s.currentRoute.inputs
226
- guard !inputs.isEmpty else {
227
- NSLog("[STT] ⚠️ No capture route (likely A2DP). Deferring engine start.")
228
- return
229
- }
230
-
231
- DispatchQueue.global(qos: .userInitiated).async { [weak self] in
232
- guard let self = self else { return }
233
- do { try s.setActive(false, options: [.notifyOthersOnDeactivation]) }
234
- catch { NSLog("[STT] setActive false failed: \(error.localizedDescription)") }
235
-
236
- let hasWiredOrCar = s.currentRoute.outputs.contains {
237
- $0.portType == .headphones || $0.portType == .carAudio || $0.portType == .usbAudio
238
- }
239
- if selectBestInput, let all = s.availableInputs {
240
- let btHFP = all.first { $0.portType == .bluetoothHFP }
241
- let wired = all.first { $0.portType == .headsetMic }
242
- let built = all.first { $0.portType == .builtInMic }
243
- let best = btHFP ?? wired ?? built
244
- do {
245
- if s.preferredInput?.uid != best?.uid { try s.setPreferredInput(best) }
246
- if let builtIn = best, builtIn.portType == .builtInMic,
247
- let ds = builtIn.dataSources?.first(where: { $0.orientation == .bottom || $0.orientation == .back }) {
248
- try? builtIn.setPreferredDataSource(ds)
249
- }
250
- } catch {
251
- NSLog("[STT] setPreferredInput failed: \(error.localizedDescription)")
252
- }
253
- }
254
-
255
- var opts: AVAudioSession.CategoryOptions = [.allowBluetooth]
256
- if !hasWiredOrCar { opts.insert(.defaultToSpeaker) }
257
-
258
- if s.category != .playAndRecord || s.mode != .voiceChat || s.categoryOptions != opts {
259
- do { try s.setCategory(.playAndRecord, mode: .voiceChat, options: opts) }
260
- catch { NSLog("[STT] setCategory failed: \(error.localizedDescription)") }
261
- }
262
-
263
- do { try s.setActive(true, options: []) }
264
- catch { NSLog("[STT] setActive failed: \(error.localizedDescription)") }
265
-
266
- // Optional: force 16k after activation
267
- self.force16kIfPossible(s)
268
-
269
- // Log route back on main so logs stay ordered
270
- DispatchQueue.main.async {
271
- let inPorts = s.currentRoute.inputs.map { "\($0.portType.rawValue):\($0.portName)" }.joined(separator:", ")
272
- let outPorts = s.currentRoute.outputs.map { "\($0.portType.rawValue):\($0.portName)" }.joined(separator:", ")
273
- NSLog("[STT] route in=[\(inPorts)] out=[\(outPorts)]")
274
- }
275
- }
276
- }
277
-
278
- // ↓↓↓ preferred settings helper
279
- private func force16kIfPossible(_ session: AVAudioSession) {
280
- try? session.setPreferredSampleRate(16_000)
281
- if session.isInputAvailable { try? session.setPreferredInputNumberOfChannels(1) }
282
- try? session.setPreferredOutputNumberOfChannels(1)
283
- try? session.setPreferredIOBufferDuration(0.02) // ~20 ms frames
284
- }
285
-
286
- // MARK: - Core logic (kept intact, including AEC order/steps)
287
-
288
- /// Returns true if no errors occurred (identical flow & calls as ObjC).
289
- /// Returns true if no errors occurred (identical flow & calls as ObjC) + keep-alive opts.
290
- /// Returns true if no errors occurred (identical flow & calls as ObjC) + keep-alive opts.
291
- private func setupAudioSession() -> Bool {
292
- var err: NSError?
293
- let session = AVAudioSession.sharedInstance()
294
- self.audioSession = session
295
-
296
- do { try session.setActive(false, options: [.notifyOthersOnDeactivation]) }
297
- catch { NSLog("[STT] setActive false failed: \(error.localizedDescription)") }
298
-
299
- // Build options to match our routing rules
300
- // (defaultToSpeaker only when no external output is active)
301
- let hasExternalOutput: Bool = session.currentRoute.outputs.contains {
302
- switch $0.portType {
303
- case .headphones, .bluetoothA2DP, .bluetoothHFP, .bluetoothLE, .airPlay, .carAudio, .usbAudio:
304
- return true
305
- default:
306
- return false
307
- }
308
- }
309
-
310
- var opts: AVAudioSession.CategoryOptions = [.allowBluetooth]
311
- if !hasExternalOutput { opts.insert(.defaultToSpeaker) }
312
- if #available(iOS 14.5, *) {
313
- // Prevent muted switch / mic mute from killing our capture pipeline
314
- opts.insert(.overrideMutedMicrophoneInterruption)
315
- }
316
-
317
- do {
318
- try session.setCategory(.playAndRecord, mode: .voiceChat, options: opts)
319
- } catch { err = error as NSError }
320
-
321
- do { try session.setActive(false, options: [.notifyOthersOnDeactivation]) }
322
- catch { NSLog("[STT] setActive false failed: \(error.localizedDescription)") }
323
-
324
- // Force 16k before and after activation (some routes settle only after setActive)
325
- force16kIfPossible(session)
326
- do { try session.setActive(true) } catch { err = error as NSError }
327
- NSLog("[STT] session SR=%.1f inCh=%d outCh=%d (wanted 16000)",
328
- session.sampleRate,
329
- Int(session.inputNumberOfChannels),
330
- Int(session.outputNumberOfChannels))
331
- force16kIfPossible(session)
332
-
333
- if let e = err {
334
- NSLog("[STT] setupAudioSession error: \(e.localizedDescription)")
335
- sendResult(error: ["code": "audio", "message": e.localizedDescription],
336
- bestTranscription: nil, transcriptions: nil, isFinal: nil)
337
- return false
338
- }
339
- return true
340
- }
341
-
342
- private func currentInputFormat(_ engine: AVAudioEngine) -> AVAudioFormat? {
343
- // Prefer whatever CoreAudio currently provides; avoid cached formats.
344
- let fmt = engine.inputNode.outputFormat(forBus: 0)
345
- if fmt.sampleRate > 0 && fmt.channelCount > 0 { return fmt }
346
- // Fallback: build a sane mono format from session if ever needed.
347
- let sr = max(8000, AVAudioSession.sharedInstance().sampleRate)
348
- return AVAudioFormat(commonFormat: .pcmFormatFloat32, sampleRate: sr, channels: 1, interleaved: false)
349
- }
350
-
351
- private func isHeadsetPluggedIn() -> Bool {
352
- let route = AVAudioSession.sharedInstance().currentRoute
353
- for out in route.outputs {
354
- if out.portType == .headphones || out.portType == .bluetoothA2DP {
355
- return true
356
- }
357
- }
358
- return false
359
- }
360
-
361
- private func isHeadSetBluetooth() -> Bool {
362
- for port in AVAudioSession.sharedInstance().availableInputs ?? [] {
363
- if port.portType == .bluetoothHFP { return true }
364
- }
365
- return false
366
- }
367
-
368
- private func loadContextualStrings() -> [String] {
369
- guard let filePath = Bundle.main.path(forResource: "words_flattened", ofType: "txt") else {
370
- NSLog("words_flattened.txt not found in bundle")
371
- return []
372
- }
373
- do {
374
- let contents = try String(contentsOfFile: filePath, encoding: .utf8)
375
- let rawItems = contents.components(separatedBy: ",")
376
- var cleaned: [String] = []
377
- cleaned.reserveCapacity(rawItems.count)
378
- for item in rawItems {
379
- var t = item.trimmingCharacters(in: .whitespacesAndNewlines)
380
- t = t.replacingOccurrences(of: "\"", with: "")
381
- if !t.isEmpty { cleaned.append(t) }
382
- }
383
- return cleaned
384
- } catch {
385
- NSLog("Error reading contextualStrings: \(error)")
386
- return []
387
- }
388
- }
389
-
390
- // Add helpers
391
- private func makeFreshRequest() -> SFSpeechAudioBufferRecognitionRequest {
392
- let req = SFSpeechAudioBufferRecognitionRequest()
393
- if #available(iOS 16, *) { req.addsPunctuation = true }
394
- req.shouldReportPartialResults = true
395
- //if #available(iOS 13.0, *) { req.taskHint = .dictation }
396
- req.contextualStrings = loadContextualStrings()
397
- self.recognitionRequest = req
398
- NSLog("makeFreshRequest()")
399
- return req
400
- }
401
-
402
- private func startTask(_ req: SFSpeechAudioBufferRecognitionRequest) {
403
- NSLog("starting recognitionTask")
404
- lastTaskStartAt = CACurrentMediaTime()
405
- lastResultAt = lastTaskStartAt
406
- let taskSessionId = self.sessionId
407
- self.recognitionTask = self.speechRecognizer?.recognitionTask(with: req) { [weak self] result, error in
408
- guard let self = self else { return }
409
- if taskSessionId != self.sessionId { NSLog("task session mismatch -> ignore"); return }
410
- self.lastResultAt = CACurrentMediaTime()
411
-
412
- func markIfReal(_ r: SFSpeechRecognitionResult?) {
413
- guard let r = r else { return }
414
- let best = r.bestTranscription.formattedString.trimmingCharacters(in: .whitespacesAndNewlines)
415
- if !best.isEmpty ||
416
- r.transcriptions.contains(where: { !$0.formattedString.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty }) {
417
- if !self.seenRealSpeech {
418
- self.seenRealSpeech = true
419
- NSLog("first real speech detected -> onSpeechStart to JS")
420
- self.sendEvent(name: "onSpeechStart", body: nil)
421
- }
422
- }
423
- }
424
- markIfReal(result)
425
-
426
- func rearm(_ why: String, delay: TimeInterval = 0.05) {
427
- guard self.continuous else { return }
428
- NSLog("REARM (\(why))")
429
- self.recognitionTask?.cancel()
430
- self.recognitionTask = nil
431
- DispatchQueue.main.asyncAfter(deadline: .now() + delay) {
432
- self.startTask(self.makeFreshRequest())
433
- }
434
- }
435
-
436
- if let error = error {
437
- NSLog("task error \(error._code): \(error.localizedDescription)")
438
- // treat as transient for continuous mode
439
- rearmTask(reason: "error")
440
- return
441
- }
442
-
443
- guard let result = result else {
444
- NSLog("task nil result")
445
- rearmTask(reason: "nil-result")
446
- return
447
- }
448
-
449
- let isFinal = result.isFinal
450
- let parts = result.transcriptions.map { $0.formattedString }
451
- self.sendResult(error: nil,
452
- bestTranscription: result.bestTranscription.formattedString,
453
- transcriptions: parts,
454
- isFinal: isFinal)
455
-
456
- if isFinal {
457
- NSLog("task final -> onSpeechEnd")
458
- self.sendEvent(name: "onSpeechEnd", body: nil)
459
- if self.continuous {
460
- self.rearmTask(reason: "final")
461
- } else {
462
- NSLog("non-continuous final -> teardown")
463
- self.teardown()
464
- }
465
- }
466
- }
467
- }
468
-
469
- public func teardown() {
470
- NSLog("[STT] teardown() begin")
471
- isTearingDown = true
472
- stopWatchdog()
473
- consecutiveStallCount = 0
474
-
475
- if let task = recognitionTask {
476
- task.cancel()
477
- recognitionTask = nil
478
- }
479
- AudioPlaybackHook.engineScheduleFile = nil
480
- AudioPlaybackHook.isEngineReady = nil
481
- AudioPlaybackHook.useOnlyEnginePlayback = nil
482
- AudioPlaybackHook.stopEnginePlayback = nil // ← NEW
483
- sttActive = false
484
-
485
- if let p = playbackNode {
486
- p.stop()
487
- }
488
- playbackNode = nil
489
-
490
- if let req = recognitionRequest {
491
- req.endAudio()
492
- recognitionRequest = nil
493
- }
494
-
495
- if let engine = audioEngine {
496
- if engine.inputNode != nil {
497
- engine.inputNode.removeTap(onBus: 0)
498
- engine.inputNode.reset()
499
- }
500
- if engine.isRunning {
501
- engine.stop()
502
- }
503
- engine.reset()
504
- audioEngine = nil // Crucial step!
505
- }
506
-
507
- resetAudioSession()
508
-
509
- sessionId = nil
510
- isTearingDown = false
511
- }
512
-
513
- public func teardown2() {
514
- isTearingDown = true
515
- recognitionTask?.cancel()
516
- recognitionTask = nil
517
-
518
- resetAudioSession()
519
-
520
- recognitionRequest?.endAudio()
521
- recognitionRequest = nil
522
-
523
- if let engine = audioEngine {
524
- engine.inputNode.removeTap(onBus: 0)
525
- engine.inputNode.reset()
526
-
527
- if engine.isRunning {
528
- engine.stop()
529
- engine.reset()
530
- audioEngine = nil
531
- }
532
- }
533
-
534
- sessionId = nil
535
- isTearingDown = false
536
- }
537
-
538
- private func resetAudioSession() {
539
- if audioSession == nil {
540
- audioSession = AVAudioSession.sharedInstance()
541
- }
542
- guard let session = audioSession else { return }
543
-
544
- // Preserve & compare category exactly as original logic
545
- let current = session.category
546
- if priorAudioCategory == current { return }
547
-
548
- // do {
549
- // try session.setCategory(priorAudioCategory ?? .soloAmbient,
550
- // mode: .default,
551
- // options: [.allowBluetooth,
552
- // .defaultToSpeaker,
553
- // .allowAirPlay,
554
- // .mixWithOthers])
555
- // } catch {
556
- // // Silent, matching original behavior (no error propagation here)
557
- // }
558
- audioSession = nil
559
- }
560
-
561
- private func isPlayerConnected(_ player: AVAudioPlayerNode?, to engine: AVAudioEngine?) -> Bool {
562
- guard let p = player, let e = engine else { return false }
563
- // If the node is attached and has a non-zero channel count on its output, it’s effectively connected.
564
- let fmt = p.outputFormat(forBus: 0)
565
- return (p.engine === e) && (fmt.channelCount > 0) && (fmt.sampleRate > 0)
566
- }
567
-
568
- /// Try to keep the capture alive without tearing down recognition.
569
- /// 1) If engine exists but not running → try start()
570
- /// 2) If start fails or graph became invalid → rebuild graph and start
571
- /// 3) If we don’t have a task yet, start one.
572
- private func ensureEngineRunning(reason: String) {
573
- let now = CFAbsoluteTimeGetCurrent()
574
- if (now - lastReclaimAttempt) < reclaimCooldown {
575
- NSLog("[STT] ensureEngineRunning(\(reason)) skipped (cooldown)")
576
- return
577
- }
578
- lastReclaimAttempt = now
579
-
580
- if (audioEngine != nil) && !audioEngine!.isRunning {
581
- do {
582
- playbackNode?.stop()
583
- playbackNode = nil
584
- // Possibly re-apply your format or re-install taps if the hardware changed sample rates
585
- try audioEngine!.start()
586
- print("🔄 AVAudioEngine restarted after config change. isRunning=%@",
587
- audioEngine!.isRunning ? "YES":"NO")
588
- } catch {
589
- print("❌ Could not re-start after config change: \(error)")
590
- }
591
- }
592
-
593
- guard let engine = audioEngine else {
594
- NSLog("[STT] ensureEngineRunning(\(reason)): no engine → rebuild")
595
- rebuildEngineGraphAndRestart(reason: reason)
596
- return
597
- }
598
-
599
- if !engine.isRunning {
600
- do {
601
- try engine.start()
602
- NSLog("[STT] ensureEngineRunning(\(reason)): engine.start() -> running=\(engine.isRunning)")
603
- } catch {
604
- NSLog("[STT] ensureEngineRunning(\(reason)): engine.start() failed: \(error) → rebuild")
605
- rebuildEngineGraphAndRestart(reason: reason)
606
- return
607
- }
608
- }
609
-
610
- // If we have no active task, spin one up against the current request
611
- if recognitionTask == nil {
612
- if let req = recognitionRequest {
613
- NSLog("[STT] ensureEngineRunning(\(reason)): no task -> startTask(existing req)")
614
- startTask(req)
615
- } else {
616
- NSLog("[STT] ensureEngineRunning(\(reason)): no req -> makeFreshRequest + startTask")
617
- startTask(makeFreshRequest())
618
- }
619
- }
620
- }
621
-
622
- /// Rebuilds AVAudioEngine graph (mic→mute mixer, player→mainMixer), reinstalls tap,
623
- /// and restarts the engine. Does NOT nuke the current recognitionRequest/task unless required.
624
- private func rebuildEngineGraphAndRestart(reason: String) {
625
- NSLog("[STT] 🔄 rebuildEngineGraphAndRestart (\(reason))")
626
-
627
- // Keep current request if present; we'll keep appending into it
628
- let existingReq = self.recognitionRequest
629
-
630
- // Tear down engine ONLY (keep session, request)
631
- if let engine = audioEngine {
632
- if engine.inputNode != nil {
633
- engine.inputNode.removeTap(onBus: 0)
634
- engine.inputNode.reset()
635
- }
636
- if engine.isRunning { engine.stop() }
637
- engine.reset()
638
- }
639
-
640
- // Recreate engine and graph
641
- let newEngine = AVAudioEngine()
642
- self.audioEngine = newEngine
643
-
644
- let inputNode = newEngine.inputNode
645
- do {
646
- try inputNode.setVoiceProcessingEnabled(true)
647
- } catch {
648
- NSLog("[STT] rebuild: failed to enable voice processing: \(error)")
649
- }
650
- if #available(iOS 17.0, *) {
651
- var duck = AVAudioVoiceProcessingOtherAudioDuckingConfiguration()
652
- duck.enableAdvancedDucking = false
653
- duck.duckingLevel = .min
654
- inputNode.voiceProcessingOtherAudioDuckingConfiguration = duck
655
- }
656
-
657
- // --- FIXED WIRING: use live format on first hop, nil downstream, nil for tap ---
658
- let inFmt = newEngine.inputNode.outputFormat(forBus: 0)
659
-
660
- // mic → mute mixer → mainMixer
661
- let micMixer = AVAudioMixerNode()
662
- newEngine.attach(micMixer)
663
- newEngine.connect(inputNode, to: micMixer, format: inFmt) // live input format
664
- newEngine.connect(micMixer, to: newEngine.mainMixerNode, format: nil) // let mixer choose
665
- micMixer.outputVolume = 0.0
666
-
667
- // TTS player → mainMixer (keep same player if possible, else recreate)
668
- if playbackNode == nil { playbackNode = AVAudioPlayerNode() }
669
- if let player = playbackNode {
670
- if player.engine == nil { newEngine.attach(player) }
671
- newEngine.connect(player, to: newEngine.mainMixerNode, format: nil)
672
- }
673
-
674
- do {
675
- // if there was a previous tap, remove it first
676
- try? inputNode.removeTap(onBus: 0)
677
- } catch {
678
- NSLog("[STT] removeTap error: \(error)")
679
- }
680
- let format = inputNode.outputFormat(forBus: 0) // <- prefer explicit format
681
-
682
- // Tap uses nil to follow the node’s current output format
683
- inputNode.installTap(onBus: 0, bufferSize: 1024, format: format) { [weak self] buffer, _ in
684
- guard let self = self else { return }
685
-
686
- // (same level metering as your current code)
687
- let frames: vDSP_Length = vDSP_Length(buffer.frameLength)
688
- let LP: Float = 0.5
689
-
690
- if buffer.format.channelCount > 0, let ch0 = buffer.floatChannelData?[0] {
691
- var peak0: Float = 0
692
- vDSP_maxmgv(ch0, 1, &peak0, frames)
693
- let db0: Float = (peak0 == 0) ? -100 : 20.0 * log10f(peak0)
694
- let sm0 = LP * db0 + (1 - LP) * self.averagePowerForChannel0
695
- self.averagePowerForChannel0 = sm0
696
- self.averagePowerForChannel1 = sm0
697
- }
698
- if buffer.format.channelCount > 1, let ch1 = buffer.floatChannelData?[1] {
699
- var peak1: Float = 0
700
- vDSP_maxmgv(ch1, 1, &peak1, frames)
701
- let db1: Float = (peak1 == 0) ? -100 : 20.0 * log10f(peak1)
702
- let sm1 = LP * db1 + (1 - LP) * self.averagePowerForChannel1
703
- self.averagePowerForChannel1 = sm1
704
- }
705
- self.averagePowerForChannel1 = Float(self._normalizedPowerLevelFromDecibels(CGFloat(self.averagePowerForChannel1)) * 10.0)
706
- self.sendEvent(name: "onSpeechVolumeChanged", body: ["value": self.averagePowerForChannel1])
707
-
708
- self.recognitionRequest?.append(buffer)
709
- self.lastBufferAt = CACurrentMediaTime()
710
- }
711
-
712
- newEngine.prepare()
713
- do {
714
- try newEngine.start()
715
- NSLog("[STT] rebuild: engine.start() ok, running=\(newEngine.isRunning)")
716
- } catch {
717
- NSLog("[STT] rebuild: engine.start() failed: \(error)")
718
- }
719
-
720
- // If we lost the request during rebuild, recreate + start task.
721
- if self.recognitionRequest == nil {
722
- if let old = existingReq {
723
- self.recognitionRequest = old
724
- } else {
725
- self.recognitionRequest = makeFreshRequest()
726
- }
727
- }
728
- if self.recognitionTask == nil {
729
- startTask(self.recognitionRequest!)
730
- }
731
- }
732
-
733
- @objc private func handleEngineConfigChange(_ note: Notification) {
734
- NSLog("[STT] ⚙️ AVAudioEngineConfigurationChange: ensuring engine running")
735
- if (audioEngine != nil) && !audioEngine!.isRunning {
736
- playbackNode?.stop()
737
- playbackNode = nil
738
- }
739
- ensureEngineRunning(reason: "engine-config-change")
740
- }
741
-
742
- @objc private func handleMediaServicesReset(_ note: Notification) {
743
- NSLog("[STT] 📺 Media services were RESET: reclaiming mic & session")
744
- // Re-apply audio session and try to rebuild graph if needed
745
- _ = setupAudioSession()
746
- ensureEngineRunning(reason: "media-services-reset")
747
- }
748
-
749
- @objc private func handleRouteChange(_ note: Notification) {
750
- let info = note.userInfo ?? [:]
751
- NSLog("[STT] 🔀 route change: \(info)")
752
-
753
- guard let reasonVal = info[AVAudioSessionRouteChangeReasonKey] as? UInt,
754
- let reason = AVAudioSession.RouteChangeReason(rawValue: reasonVal) else {
755
- ensureEngineRunning(reason: "route-change-unknown")
756
- return
757
- }
758
-
759
- // On any meaningful route change, reclaim mic
760
- switch reason {
761
- case .oldDeviceUnavailable, .newDeviceAvailable, .categoryChange, .routeConfigurationChange, .override:
762
- ensureEngineRunning(reason: "route-change-\(reason.rawValue)")
763
- default:
764
- break
765
- }
766
- }
767
-
768
- // Call once, right after you create the engine (or inside setupAudioSession)
769
- // Call once after engine is created
770
- private func installEngineObservers() {
771
- let nc = NotificationCenter.default
772
-
773
- if let engine = audioEngine {
774
- nc.addObserver(self,
775
- selector: #selector(handleEngineConfigChange(_:)),
776
- name: .AVAudioEngineConfigurationChange,
777
- object: engine)
778
- }
779
-
780
- nc.addObserver(self,
781
- selector: #selector(handleSessionInterruption(_:)),
782
- name: AVAudioSession.interruptionNotification,
783
- object: AVAudioSession.sharedInstance())
784
-
785
- nc.addObserver(self,
786
- selector: #selector(handleRouteChange(_:)),
787
- name: AVAudioSession.routeChangeNotification,
788
- object: AVAudioSession.sharedInstance())
789
-
790
- nc.addObserver(self,
791
- selector: #selector(handleMediaServicesReset(_:)),
792
- name: AVAudioSession.mediaServicesWereResetNotification,
793
- object: nil)
794
- }
795
-
796
- @objc private func handleSessionInterruption(_ note: Notification) {
797
- guard
798
- let info = note.userInfo,
799
- let typeVal = info[AVAudioSessionInterruptionTypeKey] as? UInt,
800
- let type = AVAudioSession.InterruptionType(rawValue: typeVal)
801
- else { return }
802
-
803
- if type == .ended {
804
- // On real “render err” Core Audio posts an interruption END
805
- NSLog("Session interruption ended (possible render err):")
806
- }
807
- }
808
-
809
- private func setupAndStartRecognizing(localeStr: String?) {
810
- NSLog("[STT] setupAndStartRecognizing begin")
811
- sttActive = true
812
-
813
- audioSession = AVAudioSession.sharedInstance()
814
- guard let session = audioSession else { return }
815
- var err: NSError?
816
-
817
- priorAudioCategory = session.category
818
-
819
- // Tear down resources before starting speech recognition..
820
- NSLog("[STT] pre-teardown")
821
- teardown()
822
- // ** IMPORTANT ** Call this again as teardown marks this false
823
- sttActive = true
824
-
825
- sessionId = UUID().uuidString
826
-
827
- let locale: Locale? = {
828
- if let s = localeStr, !s.isEmpty { return Locale(identifier: s) }
829
- sttActive = false
830
- return nil
831
- }()
832
-
833
- if let loc = locale {
834
- speechRecognizer = SFSpeechRecognizer(locale: loc)
835
- } else {
836
- speechRecognizer = SFSpeechRecognizer()
837
- }
838
- speechRecognizer?.delegate = self
839
-
840
- // Start audio session...
841
- NSLog("[STT] setupAudioSession()")
842
- guard setupAudioSession() else {
843
- NSLog("[STT] ERROR ERROR ******** setupAudioSession()")
844
- teardown()
845
- sttActive = false
846
- return
847
- }
848
- installEngineObservers()
849
-
850
- let request = SFSpeechAudioBufferRecognitionRequest()
851
- recognitionRequest = request
852
-
853
- if #available(iOS 16, *) {
854
- request.addsPunctuation = true
855
- } else {
856
- // Fallback on earlier versions
857
- }
858
- request.shouldReportPartialResults = true
859
- //if #available(iOS 13.0, *) { request.taskHint = .dictation }
860
- request.contextualStrings = loadContextualStrings()
861
-
862
- guard recognitionRequest != nil else {
863
- sendResult(error: ["code": "recognition_init"], bestTranscription: nil, transcriptions: nil, isFinal: nil)
864
- teardown()
865
- return
866
- }
867
-
868
- if audioEngine == nil {
869
- audioEngine = AVAudioEngine()
870
- }
871
- do {
872
- guard let engine = audioEngine else { throw NSError(domain: "voice.audio", code: -1) }
873
- let inputNode = engine.inputNode
874
- let _ = inputNode // presence check
875
-
876
- // Enable voice processing (AEC)
877
- do {
878
- try inputNode.setVoiceProcessingEnabled(true)
879
- } catch {
880
- NSLog("Failed to enable voice processing for AEC on input node: \(error)")
881
- }
882
-
883
- if #available(iOS 17.0, *) {
884
- var duck = AVAudioVoiceProcessingOtherAudioDuckingConfiguration()
885
- duck.enableAdvancedDucking = false // disable advanced (VAD-based) ducking
886
- duck.duckingLevel = .min // “as loud as possible” for other audio
887
- inputNode.voiceProcessingOtherAudioDuckingConfiguration = duck
888
- }
889
-
890
- // if output node voice processing is ever needed, keep commented as in original:
891
- // do { try engine.outputNode.setVoiceProcessingEnabled(true) } catch { ... }
892
-
893
- NSLog("[STT] AEC enable done")
894
-
895
- // --- FIXED WIRING: use live format on first hop, nil downstream, nil for tap ---
896
- let inFmt = engine.inputNode.outputFormat(forBus: 0)
897
-
898
- // 1) Mute only the mic path, not the whole main mixer
899
- let micMixer = AVAudioMixerNode()
900
- engine.attach(micMixer)
901
- // Use the live input format for input → micMixer
902
- engine.connect(inputNode, to: micMixer, format: inFmt)
903
- // Let main mixer pick downstream format
904
- engine.connect(micMixer, to: engine.mainMixerNode, format: nil)
905
- micMixer.outputVolume = 0.0 // ← you won't hear your own mic
906
-
907
- // 2) Prepare a player node for TTS inside the SAME engine/graph
908
- let player = AVAudioPlayerNode()
909
- self.playbackNode = player
910
- engine.attach(player)
911
- // Let the mixer choose the format for TTS
912
- engine.connect(player, to: engine.mainMixerNode, format: nil)
913
-
914
- NSLog("[STT] graph connected (mic->mute mixer, player->mainMixer)")
915
-
916
- var tapFrames: UInt64 = 0
917
- // Tap uses nil so it follows the node’s current output format (survives route SR changes)
918
-
919
- do {
920
- // if there was a previous tap, remove it first
921
- try? inputNode.removeTap(onBus: 0)
922
- } catch {
923
- NSLog("[STT] removeTap error: \(error)")
924
- }
925
- let format = inputNode.outputFormat(forBus: 0) // <- prefer explicit format
926
-
927
- inputNode.installTap(onBus: 0, bufferSize: 1024, format: format) { [weak self] buffer, _ in
928
- // Strongify self once
929
- guard let self = self else { return }
930
- tapFrames &+= UInt64(buffer.frameLength)
931
- if tapFrames % (44100 * 2) < 1024 { // ~every ~2s at 44.1k
932
- NSLog("[STT] tap alive, totalFrames=\(tapFrames)")
933
- }
934
-
935
- let frames: vDSP_Length = vDSP_Length(buffer.frameLength)
936
- let LEVEL_LOWPASS_TRIG: Float = 0.5
937
-
938
- // CH0
939
- if buffer.format.channelCount > 0, let ch0 = buffer.floatChannelData?[0] {
940
- var peak0: Float = 0
941
- vDSP_maxmgv(ch0, 1, &peak0, frames)
942
- let db0: Float = (peak0 == 0) ? -100 : 20.0 * log10f(peak0)
943
-
944
- let smoothed0 = LEVEL_LOWPASS_TRIG * db0
945
- + (1 - LEVEL_LOWPASS_TRIG) * self.averagePowerForChannel0
946
- self.averagePowerForChannel0 = smoothed0
947
- self.averagePowerForChannel1 = smoothed0
948
- }
949
-
950
- // CH1
951
- if buffer.format.channelCount > 1, let ch1 = buffer.floatChannelData?[1] {
952
- var peak1: Float = 0
953
- vDSP_maxmgv(ch1, 1, &peak1, frames)
954
- let db1: Float = (peak1 == 0) ? -100 : 20.0 * log10f(peak1)
955
-
956
- let smoothed1 = LEVEL_LOWPASS_TRIG * db1
957
- + (1 - LEVEL_LOWPASS_TRIG) * self.averagePowerForChannel1
958
- self.averagePowerForChannel1 = smoothed1
959
- }
960
-
961
- // Normalize 0–10 and emit
962
- self.averagePowerForChannel1 = Float(self._normalizedPowerLevelFromDecibels(CGFloat(self.averagePowerForChannel1)) * 10.0)
963
- let value = self.averagePowerForChannel1
964
- self.sendEvent(name: "onSpeechVolumeChanged", body: ["value": value])
965
-
966
- // Append to recognition
967
- self.recognitionRequest?.append(buffer)
968
-
969
- // inside inputNode.installTap { buffer, _ in
970
- self.lastBufferAt = CACurrentMediaTime()
971
- }
972
-
973
- engine.prepare()
974
- NSLog("[STT] audioEngine prepare")
975
- var audioSessionError: NSError?
976
- do {
977
- try engine.start()
978
- } catch {
979
- audioSessionError = error as NSError
980
- }
981
-
982
- // after engine.start() success:
983
- engineHotAt = CACurrentMediaTime()
984
- seenRealSpeech = false
985
- NSLog("engine HOT at \(engineHotAt)")
986
- sendEvent(name: "onSpeechStart", body: nil) // engine hot signal (keep if you want)
987
- startTask(makeFreshRequest())
988
-
989
- // Engine is up; expose readiness
990
- AudioPlaybackHook.isEngineReady = { [weak self] in
991
- guard let eng = self?.audioEngine else { return false }
992
- return eng.isRunning
993
- }
994
-
995
- // Tell TTS layer: do NOT use AVAudioPlayer fallback while STT is active
996
- AudioPlaybackHook.useOnlyEnginePlayback = { [weak self] in
997
- return self?.sttActive == true
998
- }
999
-
1000
- startWatchdog()
1001
-
1002
- // After engine.start() succeeds:
1003
- AudioPlaybackHook.engineScheduleFile = { [weak self] url, done in
1004
- // Always run on main because AVAudioEngine/Nodes are main-thread-y for our usage
1005
- DispatchQueue.main.async {
1006
- guard let self = self else { return }
1007
- if self.isTearingDown { return } // guard against teardown races
1008
-
1009
- guard let engine = self.audioEngine else { return }
1010
-
1011
- do {
1012
-
1013
- // If the graph changed or the node isn't tied to this engine, recreate it.
1014
- if self.playbackNode?.engine !== engine || !self.isPlayerConnected(self.playbackNode, to: engine) {
1015
- self.playbackNode?.stop()
1016
- self.playbackNode = nil
1017
- }
1018
-
1019
- // Ensure the player node is attached/connected to THIS engine
1020
- let player = self.ensurePlaybackNode(in: engine)
1021
-
1022
- // Make sure engine is running before we play
1023
- if !engine.isRunning {
1024
- do { try engine.start() } catch {
1025
- NSLog("[STT] TTS: engine.start() failed: \(error)")
1026
- return
1027
- }
1028
- }
1029
-
1030
- let file = try AVAudioFile(forReading: url)
1031
-
1032
- // Start player after we know it's attached and engine runs
1033
- if !player.isPlaying { player.play() }
1034
-
1035
- player.scheduleFile(file, at: nil) {
1036
- DispatchQueue.main.async { done() }
1037
- }
1038
- } catch {
1039
- NSLog("[STT] TTS schedule error: \(error)")
1040
- }
1041
- }
1042
- return true
1043
- }
1044
-
1045
- AudioPlaybackHook.stopEnginePlayback = { [weak self] in
1046
- DispatchQueue.main.async {
1047
- guard let self = self else { return }
1048
- // Stop only the TTS playback node; keep the engine running for STT
1049
- self.playbackNode?.stop()
1050
- }
1051
- }
1052
-
1053
- NSLog("audioEngine startAndReturnError")
1054
- if let audioSessionError = audioSessionError {
1055
- NotificationCenter.default.addObserver(self,
1056
- selector: #selector(self.handleEngineConfigChange(_:)),
1057
- name: .AVAudioEngineConfigurationChange,
1058
- object: engine)
1059
- NSLog("audioEngine audioSessionError!=nil")
1060
- self.sendResult(error: ["code": "audio", "message": audioSessionError.localizedDescription],
1061
- bestTranscription: nil, transcriptions: nil, isFinal: nil)
1062
- NSLog("[STT] self sendResult")
1063
- // self.teardown()
1064
- NSLog("[STT] Removed self teardown")
1065
- return
1066
- }
1067
- NSLog("After Start recording and append recording")
1068
- DispatchQueue.main.asyncAfter(deadline: .now() + 3.0) { [weak self] in
1069
- guard let self = self else { return }
1070
- let running = self.audioEngine?.isRunning ?? false
1071
- let taskState = self.recognitionTask?.state.rawValue ?? -1
1072
- NSLog("[STT] health: engineRunning=\(running) taskState=\(taskState)")
1073
- }
1074
-
1075
- NSLog("After if audioSessionError != nil")
1076
- } catch let e as NSError {
1077
- sendResult(error: ["code": "start_recording", "message": e.localizedDescription],
1078
- bestTranscription: nil, transcriptions: nil, isFinal: nil)
1079
- NSLog("End of init...")
1080
- return
1081
- }
1082
- }
1083
-
1084
- // MARK: - Helpers
1085
-
1086
- private func _normalizedPowerLevelFromDecibels(_ decibels: CGFloat) -> CGFloat {
1087
- if decibels < -80.0 || decibels == 0.0 { return 0.0 }
1088
- let minDb: Float = -80.0
1089
- let pow10_min = powf(10.0, 0.05 * minDb)
1090
- let pow10_db = powf(10.0, 0.05 * Float(decibels))
1091
- let power = powf((pow10_db - pow10_min) * (1.0 / (1.0 - pow10_min)), 1.0 / 2.0)
1092
- if power < 1.0 { return CGFloat(power) } else { return 1.0 }
1093
- }
1094
-
1095
- private func sendEvent(name: String, body: [String: Any]?) {
1096
- delegate?.stt(self, didEmitEvent: name, body: body)
1097
- }
1098
-
1099
- /// Exact event behavior preserved from ObjC `sendResult`.
1100
- private func sendResult(error: [String: Any]?,
1101
- bestTranscription: String?,
1102
- transcriptions: [String]?,
1103
- isFinal: Bool?) {
1104
- if let error = error {
1105
- sendEvent(name: "onSpeechError", body: ["error": error])
1106
- }
1107
- if let best = bestTranscription {
1108
- sendEvent(name: "onSpeechResults", body: ["value": [best]])
1109
- }
1110
- if let trans = transcriptions {
1111
- sendEvent(name: "onSpeechPartialResults", body: ["value": trans])
1112
- }
1113
- if let isFinal = isFinal {
1114
- sendEvent(name: "onSpeechRecognized", body: ["isFinal": isFinal])
1115
- }
1116
- }
1117
-
1118
- // MARK: - SFSpeechRecognizerDelegate
1119
-
1120
- public func speechRecognizer(_ speechRecognizer: SFSpeechRecognizer, availabilityDidChange available: Bool) {
1121
- if available == false {
1122
- sendResult(error: ["message": "Speech recognition is not available now"],
1123
- bestTranscription: nil, transcriptions: nil, isFinal: nil)
1124
- }
1125
- }
1126
-
1127
- // MARK: - Small helper to recreate recognizer (used by watchdog)
1128
- private func recreateSpeechRecognizerPreservingLocale() {
1129
- let loc = speechRecognizer?.locale
1130
- speechRecognizer = loc != nil ? SFSpeechRecognizer(locale: loc!) : SFSpeechRecognizer()
1131
- speechRecognizer?.delegate = self
1132
- NSLog("[STT] recreated SFSpeechRecognizer (locale preserved: \(loc?.identifier ?? "default"))")
1133
- }
1134
- }