react-native-davoice-tts 1.0.218 → 1.0.219

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/TTSRNBridge.podspec +1 -1
  2. package/ios/SpeechBridge/SpeechBridge.m +153 -0
  3. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64/DavoiceTTS.framework/DavoiceTTS +0 -0
  4. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64/DavoiceTTS.framework/Modules/DavoiceTTS.swiftmodule/arm64-apple-ios.abi.json +3388 -3388
  5. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64/DavoiceTTS.framework/Modules/DavoiceTTS.swiftmodule/arm64-apple-ios.private.swiftinterface +20 -20
  6. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64/DavoiceTTS.framework/Modules/DavoiceTTS.swiftmodule/arm64-apple-ios.swiftinterface +20 -20
  7. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64_x86_64-simulator/DavoiceTTS.framework/DavoiceTTS +0 -0
  8. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64_x86_64-simulator/DavoiceTTS.framework/Modules/DavoiceTTS.swiftmodule/arm64-apple-ios-simulator.abi.json +3316 -3316
  9. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64_x86_64-simulator/DavoiceTTS.framework/Modules/DavoiceTTS.swiftmodule/arm64-apple-ios-simulator.private.swiftinterface +32 -32
  10. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64_x86_64-simulator/DavoiceTTS.framework/Modules/DavoiceTTS.swiftmodule/arm64-apple-ios-simulator.swiftinterface +32 -32
  11. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64_x86_64-simulator/DavoiceTTS.framework/Modules/DavoiceTTS.swiftmodule/x86_64-apple-ios-simulator.abi.json +3316 -3316
  12. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64_x86_64-simulator/DavoiceTTS.framework/Modules/DavoiceTTS.swiftmodule/x86_64-apple-ios-simulator.private.swiftinterface +32 -32
  13. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64_x86_64-simulator/DavoiceTTS.framework/Modules/DavoiceTTS.swiftmodule/x86_64-apple-ios-simulator.swiftinterface +32 -32
  14. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64_x86_64-simulator/DavoiceTTS.framework/_CodeSignature/CodeDirectory +0 -0
  15. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64_x86_64-simulator/DavoiceTTS.framework/_CodeSignature/CodeRequirements-1 +0 -0
  16. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64_x86_64-simulator/DavoiceTTS.framework/_CodeSignature/CodeResources +24 -99
  17. package/package.json +1 -1
  18. package/speech/index.ts +106 -0
  19. package/android/src/main/java/com/davoice/tts/rn/DaVoiceTTSPackage.java_old_using_new_for_both_stt_and_tts +0 -26
  20. package/ios/STTRNBridge/STTBridge.m_wtf +0 -109
  21. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64/DavoiceTTS.framework/DaVoiceSTT copy.swift____ +0 -1202
  22. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64/DavoiceTTS.framework/DaVoiceSTT.swift.bkup +0 -1000
  23. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64/DavoiceTTS.framework/DaVoiceSTT.swift.latest +0 -1359
  24. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64/DavoiceTTS.framework/DaVoiceSTT.swift1.swift__ +0 -1134
  25. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64/DavoiceTTS.framework/DaVoiceSTT.swift__ +0 -1329
  26. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64_x86_64-simulator/DavoiceTTS.framework/DaVoiceSTT copy.swift____ +0 -1202
  27. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64_x86_64-simulator/DavoiceTTS.framework/DaVoiceSTT.swift.bkup +0 -1000
  28. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64_x86_64-simulator/DavoiceTTS.framework/DaVoiceSTT.swift.latest +0 -1359
  29. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64_x86_64-simulator/DavoiceTTS.framework/DaVoiceSTT.swift1.swift__ +0 -1134
  30. package/ios/TTSRNBridge/DavoiceTTS.xcframework/ios-arm64_x86_64-simulator/DavoiceTTS.framework/DaVoiceSTT.swift__ +0 -1329
@@ -1,1000 +0,0 @@
1
- // STT.swift
2
- // Native iOS Swift version (AEC flow preserved 1:1)
3
-
4
- import Foundation
5
- import UIKit
6
- import Speech
7
- import Accelerate
8
- import AVFAudio // or import AVFoundation
9
-
10
-
11
- @objc public protocol STTDelegate: AnyObject {
12
- @objc func stt(_ stt: STT, didEmitEvent name: String, body: [String: Any]?)
13
- }
14
-
15
- @objcMembers
16
- public final class STT: NSObject, SFSpeechRecognizerDelegate {
17
- public weak var delegate: STTDelegate?
18
- public var continuous: Bool = true
19
-
20
- // MARK: - Private
21
- private var speechRecognizer: SFSpeechRecognizer?
22
- private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
23
- private var audioEngine: AVAudioEngine?
24
- private var recognitionTask: SFSpeechRecognitionTask?
25
- private var audioSession: AVAudioSession?
26
- private var isTearingDown: Bool = false
27
- private var sessionId: String?
28
- private var priorAudioCategory: AVAudioSession.Category?
29
- private var averagePowerForChannel0: Float = 0
30
- private var averagePowerForChannel1: Float = 0
31
-
32
- private var playbackNode: AVAudioPlayerNode?
33
- private var seenRealSpeech = false // flips true after first non-blank token
34
- private var engineHotAt: CFTimeInterval = 0 // when engine actually started
35
- private let warmupKeepAlive: CFTimeInterval = 4.0 // seconds we’ll keep re-arming in silence
36
-
37
- // Keep-engine-alive helpers
38
- private var lastReclaimAttempt: CFAbsoluteTime = 0
39
- private let reclaimCooldown: CFTimeInterval = 1.0
40
-
41
- // --- Task health ---
42
- private var lastBufferAt: CFTimeInterval = 0 // updated from tap
43
- private var lastResultAt: CFTimeInterval = 0 // updated from recognition callback
44
- private var lastTaskStartAt: CFTimeInterval = 0
45
- private var stallWatchdog: Timer?
46
- private var consecutiveStallCount = 0
47
- private let stallThreshold: CFTimeInterval = 8.0 // seconds w/o results while engine is hot
48
- private let rearmCooldownTask: CFTimeInterval = 2.0
49
- private var lastRearmAt: CFTimeInterval = 0
50
-
51
- private var engineHot = false
52
- private var hotAt: CFTimeInterval = 0
53
-
54
- // MARK: - Event names (unchanged)
55
- public static let supportedEvents: [String] = [
56
- "onSpeechResults",
57
- "onSpeechStart",
58
- "onSpeechPartialResults",
59
- "onSpeechError",
60
- "onSpeechEnd",
61
- "onSpeechRecognized",
62
- "onSpeechVolumeChanged"
63
- ]
64
-
65
- // MARK: - Public API (native replacements for the former RCT methods)
66
-
67
- public func isSpeechAvailable(_ completion: @escaping (Bool) -> Void) {
68
- SFSpeechRecognizer.requestAuthorization { status in
69
- switch status {
70
- case .authorized: completion(true)
71
- default: completion(false)
72
- }
73
- }
74
- }
75
-
76
- public func isRecognizing() -> Bool {
77
- guard let task = recognitionTask else { return false }
78
- return task.state == .running
79
- }
80
-
81
- private func startWatchdog() {
82
- stallWatchdog?.invalidate()
83
- stallWatchdog = Timer.scheduledTimer(withTimeInterval: 2.0, repeats: true) { [weak self] _ in
84
- self?.checkTaskHealth()
85
- }
86
- RunLoop.main.add(stallWatchdog!, forMode: .common)
87
- }
88
-
89
- private func stopWatchdog() {
90
- stallWatchdog?.invalidate()
91
- stallWatchdog = nil
92
- }
93
-
94
- private func rearmTask(reason: String) {
95
- // Cancel old task only — keep the engine and tap running.
96
- recognitionTask?.cancel()
97
- recognitionTask = nil
98
-
99
- seenRealSpeech = false
100
- lastTaskStartAt = CACurrentMediaTime()
101
- startTask(makeFreshRequest())
102
- NSLog("[STT] rearmTask(\(reason)) -> new task started")
103
- }
104
-
105
- private func checkTaskHealth() {
106
- guard let engine = audioEngine else { return }
107
- let now = CACurrentMediaTime()
108
-
109
- // Engine down? Let your existing logic handle it; just bail.
110
- if !engine.isRunning { return }
111
-
112
- // If recognizer is globally unavailable, don’t thrash — wait until it flips back.
113
- if let rec = speechRecognizer, rec.isAvailable == false {
114
- NSLog("[STT] watchdog: recognizer unavailable; waiting…")
115
- return
116
- }
117
-
118
- // No task at all? Spin one up.
119
- if recognitionTask == nil {
120
- if now - lastRearmAt > rearmCooldownTask {
121
- NSLog("[STT] watchdog: no task -> start fresh request")
122
- lastRearmAt = now
123
- startTask(makeFreshRequest())
124
- }
125
- return
126
- }
127
-
128
- // If we’ve had buffers recently but no results for a while, assume the task is stuck.
129
- let noResultsFor = now - lastResultAt
130
- let hadRecentAudio = (now - lastBufferAt) < max(2.0, stallThreshold) // tap is alive
131
-
132
- if hadRecentAudio && noResultsFor > stallThreshold {
133
- if now - lastRearmAt > rearmCooldownTask {
134
- consecutiveStallCount += 1
135
- NSLog("[STT] watchdog: stall detected (no results for \(Int(noResultsFor))s, audio flowing). rearm #\(consecutiveStallCount)")
136
-
137
- rearmTask(reason: "watchdog-stall")
138
- lastRearmAt = now
139
-
140
- // If we stall repeatedly, recreate the recognizer itself (server/session could be hosed)
141
- if consecutiveStallCount >= 3 {
142
- recreateSpeechRecognizerPreservingLocale()
143
- consecutiveStallCount = 0
144
- }
145
- }
146
- } else if hadRecentAudio {
147
- // Healthy path: audio & results are flowing; reset stall counter
148
- consecutiveStallCount = 0
149
- }
150
- }
151
-
152
- public func startSpeech(localeStr: String?) {
153
- NSLog("[STT] startSpeech(locale=\(localeStr ?? "nil"))")
154
-
155
- if recognitionTask != nil {
156
- sendResult(error: ["code": "already_started", "message": "Speech recognition already started!"],
157
- bestTranscription: nil, transcriptions: nil, isFinal: nil)
158
- return
159
- }
160
-
161
- SFSpeechRecognizer.requestAuthorization { [weak self] status in
162
- guard let self = self else { return }
163
- switch status {
164
- case .notDetermined:
165
- self.sendResult(error: ["message": "Speech recognition not yet authorized"], bestTranscription: nil, transcriptions: nil, isFinal: nil)
166
- case .denied:
167
- self.sendResult(error: ["message": "User denied access to speech recognition"], bestTranscription: nil, transcriptions: nil, isFinal: nil)
168
- case .restricted:
169
- self.sendResult(error: ["message": "Speech recognition restricted on this device"], bestTranscription: nil, transcriptions: nil, isFinal: nil)
170
- case .authorized:
171
- self.setupAndStartRecognizing(localeStr: localeStr)
172
- @unknown default:
173
- self.sendResult(error: ["message": "Unknown authorization status"], bestTranscription: nil, transcriptions: nil, isFinal: nil)
174
- }
175
- }
176
- }
177
-
178
- public func stopSpeech(_ completion: ((Bool) -> Void)? = nil) {
179
- NSLog("[STT] stopSpeech() requested by app")
180
- recognitionTask?.finish()
181
- completion?(false)
182
- }
183
-
184
- public func cancelSpeech(_ completion: ((Bool) -> Void)? = nil) {
185
- NSLog("[STT] cancelSpeech() requested by app")
186
-
187
- recognitionTask?.cancel()
188
- completion?(false)
189
- }
190
-
191
- public func destroySpeech(_ completion: ((Bool) -> Void)? = nil) {
192
- NSLog("[STT] **** destroySpeech!!!")
193
- teardown()
194
- completion?(false)
195
- }
196
-
197
- /// Try to avoide this!!!
198
- // Pick the best input and (optionally) prefer speaker only when nothing external is present.
199
- private func updateSessionRouting(selectBestInput: Bool = true) {
200
- let s = AVAudioSession.sharedInstance()
201
-
202
- let hasExternalOutput = s.currentRoute.outputs.contains {
203
- switch $0.portType {
204
- case .headphones, .bluetoothA2DP, .bluetoothHFP, .bluetoothLE, .airPlay, .carAudio, .usbAudio:
205
- return true
206
- default:
207
- return false
208
- }
209
- }
210
-
211
- if selectBestInput, let inputs = s.availableInputs {
212
- let btHFP = inputs.first { $0.portType == .bluetoothHFP }
213
- let btLE = inputs.first { $0.portType == .bluetoothLE }
214
- let wired = inputs.first { $0.portType == .headsetMic }
215
- let built = inputs.first { $0.portType == .builtInMic }
216
- let best = btHFP ?? btLE ?? wired ?? built
217
- if s.preferredInput?.uid != best?.uid { try? s.setPreferredInput(best) }
218
-
219
- if let builtIn = best, builtIn.portType == .builtInMic,
220
- let ds = builtIn.dataSources?.first(where: { $0.orientation == .bottom || $0.orientation == .back }) {
221
- try? builtIn.setPreferredDataSource(ds)
222
- }
223
- }
224
-
225
- var opts: AVAudioSession.CategoryOptions = [.allowBluetooth, .allowBluetoothA2DP, .allowAirPlay, .mixWithOthers]
226
- if !hasExternalOutput { opts.insert(.defaultToSpeaker) }
227
-
228
- // ✅ keep mode CONSISTENT with setupAudioSession(): .videoChat
229
- if s.category != .playAndRecord || s.mode != .videoChat || s.categoryOptions != opts {
230
- NSLog("[STT] reapply category: .playAndRecord / .videoChat (opts=\(opts))")
231
- try? s.setCategory(.playAndRecord, mode: .videoChat, options: opts)
232
- }
233
- }
234
-
235
- // MARK: - Core logic (kept intact, including AEC order/steps)
236
-
237
- /// Returns true if no errors occurred (identical flow & calls as ObjC).
238
- /// Returns true if no errors occurred (identical flow & calls as ObjC) + keep-alive opts.
239
- private func setupAudioSession() -> Bool {
240
- var err: NSError?
241
- let session = AVAudioSession.sharedInstance()
242
- self.audioSession = session
243
-
244
- // Build options to match our routing rules
245
- // (defaultToSpeaker only when no external output is active)
246
- let hasExternalOutput: Bool = session.currentRoute.outputs.contains {
247
- switch $0.portType {
248
- case .headphones, .bluetoothA2DP, .bluetoothHFP, .bluetoothLE, .airPlay, .carAudio, .usbAudio:
249
- return true
250
- default:
251
- return false
252
- }
253
- }
254
-
255
- var opts: AVAudioSession.CategoryOptions = [.allowBluetooth, .allowBluetoothA2DP, .allowAirPlay, .mixWithOthers]
256
- if !hasExternalOutput { opts.insert(.defaultToSpeaker) }
257
- if #available(iOS 14.5, *) {
258
- // Prevent muted switch / mic mute from killing our capture pipeline
259
- opts.insert(.overrideMutedMicrophoneInterruption)
260
- }
261
-
262
- do {
263
- try session.setCategory(.playAndRecord, mode: .videoChat, options: opts)
264
- } catch { err = error as NSError }
265
-
266
- do { try session.setActive(true) } catch { err = error as NSError }
267
-
268
- if let e = err {
269
- NSLog("[STT] setupAudioSession error: \(e.localizedDescription)")
270
- sendResult(error: ["code": "audio", "message": e.localizedDescription], bestTranscription: nil, transcriptions: nil, isFinal: nil)
271
- return false
272
- }
273
- return true
274
- }
275
-
276
- private func currentInputFormat(_ engine: AVAudioEngine) -> AVAudioFormat? {
277
- // Prefer whatever CoreAudio currently provides; avoid cached formats.
278
- let fmt = engine.inputNode.outputFormat(forBus: 0)
279
- if fmt.sampleRate > 0 && fmt.channelCount > 0 { return fmt }
280
- // Fallback: build a sane mono format from session if ever needed.
281
- let sr = max(8000, AVAudioSession.sharedInstance().sampleRate)
282
- return AVAudioFormat(commonFormat: .pcmFormatFloat32, sampleRate: sr, channels: 1, interleaved: false)
283
- }
284
-
285
- private func isHeadsetPluggedIn() -> Bool {
286
- let route = AVAudioSession.sharedInstance().currentRoute
287
- for out in route.outputs {
288
- if out.portType == .headphones || out.portType == .bluetoothA2DP {
289
- return true
290
- }
291
- }
292
- return false
293
- }
294
-
295
- private func isHeadSetBluetooth() -> Bool {
296
- for port in AVAudioSession.sharedInstance().availableInputs ?? [] {
297
- if port.portType == .bluetoothHFP { return true }
298
- }
299
- return false
300
- }
301
-
302
- private func loadContextualStrings() -> [String] {
303
- guard let filePath = Bundle.main.path(forResource: "words_flattened", ofType: "txt") else {
304
- NSLog("words_flattened.txt not found in bundle")
305
- return []
306
- }
307
- do {
308
- let contents = try String(contentsOfFile: filePath, encoding: .utf8)
309
- let rawItems = contents.components(separatedBy: ",")
310
- var cleaned: [String] = []
311
- cleaned.reserveCapacity(rawItems.count)
312
- for item in rawItems {
313
- var t = item.trimmingCharacters(in: .whitespacesAndNewlines)
314
- t = t.replacingOccurrences(of: "\"", with: "")
315
- if !t.isEmpty { cleaned.append(t) }
316
- }
317
- return cleaned
318
- } catch {
319
- NSLog("Error reading contextualStrings: \(error)")
320
- return []
321
- }
322
- }
323
-
324
- private func recreateSpeechRecognizerPreservingLocale() {
325
- let locale = speechRecognizer?.locale
326
- speechRecognizer?.delegate = nil
327
- speechRecognizer = nil
328
- if let loc = locale { speechRecognizer = SFSpeechRecognizer(locale: loc) }
329
- else { speechRecognizer = SFSpeechRecognizer() }
330
- speechRecognizer?.delegate = self
331
- NSLog("[STT] recreated SFSpeechRecognizer (locale=\(speechRecognizer?.locale.identifier ?? "default"))")
332
- }
333
-
334
- // Add helpers
335
- private func makeFreshRequest() -> SFSpeechAudioBufferRecognitionRequest {
336
- let req = SFSpeechAudioBufferRecognitionRequest()
337
- if #available(iOS 16, *) { req.addsPunctuation = true }
338
- req.shouldReportPartialResults = true
339
- if #available(iOS 13.0, *) { req.taskHint = .dictation }
340
- req.contextualStrings = loadContextualStrings()
341
- self.recognitionRequest = req
342
- NSLog("makeFreshRequest()")
343
- return req
344
- }
345
-
346
- private func startTask(_ req: SFSpeechAudioBufferRecognitionRequest) {
347
- NSLog("starting recognitionTask")
348
- lastTaskStartAt = CACurrentMediaTime()
349
- lastResultAt = lastTaskStartAt
350
- let taskSessionId = self.sessionId
351
- self.recognitionTask = self.speechRecognizer?.recognitionTask(with: req) { [weak self] result, error in
352
- guard let self = self else { return }
353
- if taskSessionId != self.sessionId { NSLog("task session mismatch -> ignore"); return }
354
- self.lastResultAt = CACurrentMediaTime()
355
-
356
- func markIfReal(_ r: SFSpeechRecognitionResult?) {
357
- guard let r = r else { return }
358
- let best = r.bestTranscription.formattedString.trimmingCharacters(in: .whitespacesAndNewlines)
359
- if !best.isEmpty ||
360
- r.transcriptions.contains(where: { !$0.formattedString.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty }) {
361
- if !self.seenRealSpeech {
362
- self.seenRealSpeech = true
363
- NSLog("first real speech detected -> onSpeechStart to JS")
364
- self.sendEvent(name: "onSpeechStart", body: nil)
365
- }
366
- }
367
- }
368
- markIfReal(result)
369
-
370
- func rearm(_ why: String, delay: TimeInterval = 0.05) {
371
- guard self.continuous else { return }
372
- NSLog("REARM (\(why))")
373
- self.recognitionTask?.cancel()
374
- self.recognitionTask = nil
375
- DispatchQueue.main.asyncAfter(deadline: .now() + delay) {
376
- self.startTask(self.makeFreshRequest())
377
- }
378
- }
379
-
380
- if let error = error {
381
- NSLog("task error \(error._code): \(error.localizedDescription)")
382
- // treat as transient for continuous mode
383
- rearmTask(reason: "error")
384
- return
385
- }
386
-
387
- guard let result = result else {
388
- NSLog("task nil result")
389
- rearmTask(reason: "nil-result")
390
- return
391
- }
392
-
393
- let isFinal = result.isFinal
394
- let parts = result.transcriptions.map { $0.formattedString }
395
- self.sendResult(error: nil,
396
- bestTranscription: result.bestTranscription.formattedString,
397
- transcriptions: parts,
398
- isFinal: isFinal)
399
-
400
- if isFinal {
401
- NSLog("task final -> onSpeechEnd")
402
- self.sendEvent(name: "onSpeechEnd", body: nil)
403
- if self.continuous {
404
- self.rearmTask(reason: "final")
405
- } else {
406
- NSLog("non-continuous final -> teardown")
407
- self.teardown()
408
- }
409
- }
410
- }
411
- }
412
-
413
- public func teardown() {
414
- NSLog("[STT] teardown() begin")
415
- isTearingDown = true
416
- stopWatchdog()
417
- consecutiveStallCount = 0
418
-
419
- if let task = recognitionTask {
420
- task.cancel()
421
- recognitionTask = nil
422
- }
423
- AudioPlaybackHook.engineScheduleFile = nil
424
- AudioPlaybackHook.isEngineReady = nil
425
-
426
- if let p = playbackNode {
427
- p.stop()
428
- }
429
- playbackNode = nil
430
-
431
- if let req = recognitionRequest {
432
- req.endAudio()
433
- recognitionRequest = nil
434
- }
435
-
436
- if let engine = audioEngine {
437
- if engine.inputNode != nil {
438
- engine.inputNode.removeTap(onBus: 0)
439
- engine.inputNode.reset()
440
- }
441
- if engine.isRunning {
442
- engine.stop()
443
- }
444
- engine.reset()
445
- audioEngine = nil // Crucial step!
446
- }
447
-
448
- resetAudioSession()
449
-
450
- sessionId = nil
451
- isTearingDown = false
452
- }
453
-
454
- public func teardown2() {
455
- isTearingDown = true
456
- recognitionTask?.cancel()
457
- recognitionTask = nil
458
-
459
- resetAudioSession()
460
-
461
- recognitionRequest?.endAudio()
462
- recognitionRequest = nil
463
-
464
- if let engine = audioEngine {
465
- engine.inputNode.removeTap(onBus: 0)
466
- engine.inputNode.reset()
467
-
468
- if engine.isRunning {
469
- engine.stop()
470
- engine.reset()
471
- audioEngine = nil
472
- }
473
- }
474
-
475
- sessionId = nil
476
- isTearingDown = false
477
- }
478
-
479
- private func resetAudioSession() {
480
- if audioSession == nil {
481
- audioSession = AVAudioSession.sharedInstance()
482
- }
483
- guard let session = audioSession else { return }
484
-
485
- // Preserve & compare category exactly as original logic
486
- let current = session.category
487
- if priorAudioCategory == current { return }
488
-
489
- // do {
490
- // try session.setCategory(priorAudioCategory ?? .soloAmbient,
491
- // mode: .default,
492
- // options: [.allowBluetooth,
493
- // .allowBluetoothA2DP,
494
- // .defaultToSpeaker,
495
- // .allowAirPlay,
496
- // .mixWithOthers])
497
- // } catch {
498
- // // Silent, matching original behavior (no error propagation here)
499
- // }
500
- audioSession = nil
501
- }
502
-
503
- /// Try to keep the capture alive without tearing down recognition.
504
- /// 1) If engine exists but not running → try start()
505
- /// 2) If start fails or graph became invalid → rebuild graph and start
506
- /// 3) If we don’t have a task yet, start one.
507
- private func ensureEngineRunning(reason: String) {
508
- let now = CFAbsoluteTimeGetCurrent()
509
- if (now - lastReclaimAttempt) < reclaimCooldown {
510
- NSLog("[STT] ensureEngineRunning(\(reason)) skipped (cooldown)")
511
- return
512
- }
513
- lastReclaimAttempt = now
514
-
515
- if (audioEngine != nil) && !audioEngine!.isRunning {
516
- do {
517
- // Possibly re-apply your format or re-install taps if the hardware changed sample rates
518
- try audioEngine!.start()
519
- print("🔄 AVAudioEngine restarted after config change. isRunning=%@",
520
- audioEngine!.isRunning ? "YES":"NO")
521
- } catch {
522
- print("❌ Could not re-start after config change: \(error)")
523
- }
524
- }
525
- return
526
-
527
- // BELOW CHATGPT SHIT CODE!!
528
-
529
- guard let engine = audioEngine else {
530
- NSLog("[STT] ensureEngineRunning(\(reason)): no engine → rebuild")
531
- rebuildEngineGraphAndRestart(reason: reason)
532
- return
533
- }
534
-
535
- if !engine.isRunning {
536
- do {
537
- try engine.start()
538
- NSLog("[STT] ensureEngineRunning(\(reason)): engine.start() -> running=\(engine.isRunning)")
539
- } catch {
540
- NSLog("[STT] ensureEngineRunning(\(reason)): engine.start() failed: \(error) → rebuild")
541
- rebuildEngineGraphAndRestart(reason: reason)
542
- return
543
- }
544
- }
545
-
546
- // If we have no active task, spin one up against the current request
547
- if recognitionTask == nil {
548
- if let req = recognitionRequest {
549
- NSLog("[STT] ensureEngineRunning(\(reason)): no task -> startTask(existing req)")
550
- startTask(req)
551
- } else {
552
- NSLog("[STT] ensureEngineRunning(\(reason)): no req -> makeFreshRequest + startTask")
553
- startTask(makeFreshRequest())
554
- }
555
- }
556
- }
557
-
558
- /// Rebuilds AVAudioEngine graph (mic→mute mixer, player→mainMixer), reinstalls tap,
559
- /// and restarts the engine. Does NOT nuke the current recognitionRequest/task unless required.
560
- private func rebuildEngineGraphAndRestart(reason: String) {
561
- NSLog("[STT] 🔄 rebuildEngineGraphAndRestart (\(reason))")
562
-
563
- // Keep current request if present; we'll keep appending into it
564
- let existingReq = self.recognitionRequest
565
-
566
- // Tear down engine ONLY (keep session, request)
567
- if let engine = audioEngine {
568
- if engine.inputNode != nil {
569
- engine.inputNode.removeTap(onBus: 0)
570
- engine.inputNode.reset()
571
- }
572
- if engine.isRunning { engine.stop() }
573
- engine.reset()
574
- }
575
-
576
- // Recreate engine and graph
577
- let newEngine = AVAudioEngine()
578
- self.audioEngine = newEngine
579
-
580
- let inputNode = newEngine.inputNode
581
- do {
582
- try inputNode.setVoiceProcessingEnabled(true)
583
- } catch {
584
- NSLog("[STT] rebuild: failed to enable voice processing: \(error)")
585
- }
586
- if #available(iOS 17.0, *) {
587
- var duck = AVAudioVoiceProcessingOtherAudioDuckingConfiguration()
588
- duck.enableAdvancedDucking = false
589
- duck.duckingLevel = .min
590
- inputNode.voiceProcessingOtherAudioDuckingConfiguration = duck
591
- }
592
-
593
- let recordingFormat = inputNode.outputFormat(forBus: 0)
594
-
595
- // mic → mute mixer → mainMixer
596
- let micMixer = AVAudioMixerNode()
597
- newEngine.attach(micMixer)
598
- newEngine.connect(inputNode, to: micMixer, format: recordingFormat)
599
- newEngine.connect(micMixer, to: newEngine.mainMixerNode, format: recordingFormat)
600
- micMixer.outputVolume = 0.0
601
-
602
- // TTS player → mainMixer (keep same player if possible, else recreate)
603
- if playbackNode == nil { playbackNode = AVAudioPlayerNode() }
604
- if let player = playbackNode {
605
- if player.engine == nil { newEngine.attach(player) }
606
- newEngine.connect(player, to: newEngine.mainMixerNode, format: nil)
607
- }
608
-
609
- // Reinstall tap to feed (existing or new) recognition request
610
- inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { [weak self] buffer, _ in
611
- guard let self = self else { return }
612
-
613
- // (same level metering as your current code)
614
- let frames: vDSP_Length = vDSP_Length(buffer.frameLength)
615
- let LP: Float = 0.5
616
-
617
- if buffer.format.channelCount > 0, let ch0 = buffer.floatChannelData?[0] {
618
- var peak0: Float = 0
619
- vDSP_maxmgv(ch0, 1, &peak0, frames)
620
- let db0: Float = (peak0 == 0) ? -100 : 20.0 * log10f(peak0)
621
- let sm0 = LP * db0 + (1 - LP) * self.averagePowerForChannel0
622
- self.averagePowerForChannel0 = sm0
623
- self.averagePowerForChannel1 = sm0
624
- }
625
- if buffer.format.channelCount > 1, let ch1 = buffer.floatChannelData?[1] {
626
- var peak1: Float = 0
627
- vDSP_maxmgv(ch1, 1, &peak1, frames)
628
- let db1: Float = (peak1 == 0) ? -100 : 20.0 * log10f(peak1)
629
- let sm1 = LP * db1 + (1 - LP) * self.averagePowerForChannel1
630
- self.averagePowerForChannel1 = sm1
631
- }
632
- self.averagePowerForChannel1 = Float(self._normalizedPowerLevelFromDecibels(CGFloat(self.averagePowerForChannel1)) * 10.0)
633
- self.sendEvent(name: "onSpeechVolumeChanged", body: ["value": self.averagePowerForChannel1])
634
-
635
- self.recognitionRequest?.append(buffer)
636
- }
637
-
638
- newEngine.prepare()
639
- do {
640
- try newEngine.start()
641
- NSLog("[STT] rebuild: engine.start() ok, running=\(newEngine.isRunning)")
642
- } catch {
643
- NSLog("[STT] rebuild: engine.start() failed: \(error)")
644
- }
645
-
646
- // If we lost the request during rebuild, recreate + start task.
647
- if self.recognitionRequest == nil {
648
- if let old = existingReq {
649
- self.recognitionRequest = old
650
- } else {
651
- self.recognitionRequest = makeFreshRequest()
652
- }
653
- }
654
- if self.recognitionTask == nil {
655
- startTask(self.recognitionRequest!)
656
- }
657
- }
658
-
659
- @objc private func handleEngineConfigChange(_ note: Notification) {
660
- NSLog("[STT] ⚙️ AVAudioEngineConfigurationChange: ensuring engine running")
661
- ensureEngineRunning(reason: "engine-config-change")
662
- }
663
-
664
- @objc private func handleMediaServicesReset(_ note: Notification) {
665
- NSLog("[STT] 📺 Media services were RESET: reclaiming mic & session")
666
- // Re-apply audio session and try to rebuild graph if needed
667
- _ = setupAudioSession()
668
- ensureEngineRunning(reason: "media-services-reset")
669
- }
670
-
671
- @objc private func handleRouteChange(_ note: Notification) {
672
- let info = note.userInfo ?? [:]
673
- NSLog("[STT] 🔀 route change: \(info)")
674
-
675
- guard let reasonVal = info[AVAudioSessionRouteChangeReasonKey] as? UInt,
676
- let reason = AVAudioSession.RouteChangeReason(rawValue: reasonVal) else {
677
- ensureEngineRunning(reason: "route-change-unknown")
678
- return
679
- }
680
-
681
- // On any meaningful route change, reclaim mic
682
- switch reason {
683
- case .oldDeviceUnavailable, .newDeviceAvailable, .categoryChange, .routeConfigurationChange, .override:
684
- ensureEngineRunning(reason: "route-change-\(reason.rawValue)")
685
- default:
686
- break
687
- }
688
- }
689
-
690
- // Call once, right after you create the engine (or inside setupAudioSession)
691
- // Call once after engine is created
692
- private func installEngineObservers() {
693
- let nc = NotificationCenter.default
694
-
695
- if let engine = audioEngine {
696
- nc.addObserver(self,
697
- selector: #selector(handleEngineConfigChange(_:)),
698
- name: .AVAudioEngineConfigurationChange,
699
- object: engine)
700
- }
701
-
702
- nc.addObserver(self,
703
- selector: #selector(handleSessionInterruption(_:)),
704
- name: AVAudioSession.interruptionNotification,
705
- object: AVAudioSession.sharedInstance())
706
-
707
- nc.addObserver(self,
708
- selector: #selector(handleRouteChange(_:)),
709
- name: AVAudioSession.routeChangeNotification,
710
- object: AVAudioSession.sharedInstance())
711
-
712
- nc.addObserver(self,
713
- selector: #selector(handleMediaServicesReset(_:)),
714
- name: AVAudioSession.mediaServicesWereResetNotification,
715
- object: nil)
716
- }
717
-
718
- @objc private func handleSessionInterruption(_ note: Notification) {
719
- guard
720
- let info = note.userInfo,
721
- let typeVal = info[AVAudioSessionInterruptionTypeKey] as? UInt,
722
- let type = AVAudioSession.InterruptionType(rawValue: typeVal)
723
- else { return }
724
-
725
- if type == .ended {
726
- // On real “render err” Core Audio posts an interruption END
727
- NSLog("Session interruption ended (possible render err):")
728
- }
729
- }
730
-
731
- private func setupAndStartRecognizing(localeStr: String?) {
732
- NSLog("[STT] setupAndStartRecognizing begin")
733
-
734
- audioSession = AVAudioSession.sharedInstance()
735
- guard let session = audioSession else { return }
736
- var err: NSError?
737
-
738
- priorAudioCategory = session.category
739
-
740
- // Tear down resources before starting speech recognition..
741
- NSLog("[STT] pre-teardown")
742
- teardown()
743
-
744
- sessionId = UUID().uuidString
745
-
746
- let locale: Locale? = {
747
- if let s = localeStr, !s.isEmpty { return Locale(identifier: s) }
748
- return nil
749
- }()
750
-
751
- if let loc = locale {
752
- speechRecognizer = SFSpeechRecognizer(locale: loc)
753
- } else {
754
- speechRecognizer = SFSpeechRecognizer()
755
- }
756
- speechRecognizer?.delegate = self
757
-
758
- // Start audio session...
759
- NSLog("[STT] setupAudioSession()")
760
- guard setupAudioSession() else {
761
- NSLog("[STT] ERROR ERROR ******** setupAudioSession()")
762
- teardown()
763
- return
764
- }
765
- installEngineObservers()
766
-
767
- let request = SFSpeechAudioBufferRecognitionRequest()
768
- recognitionRequest = request
769
-
770
- if #available(iOS 16, *) {
771
- request.addsPunctuation = true
772
- } else {
773
- // Fallback on earlier versions
774
- }
775
- request.shouldReportPartialResults = true
776
- if #available(iOS 13.0, *) { request.taskHint = .dictation }
777
- request.contextualStrings = loadContextualStrings()
778
-
779
- guard recognitionRequest != nil else {
780
- sendResult(error: ["code": "recognition_init"], bestTranscription: nil, transcriptions: nil, isFinal: nil)
781
- teardown()
782
- return
783
- }
784
-
785
- if audioEngine == nil {
786
- audioEngine = AVAudioEngine()
787
- }
788
- do {
789
- guard let engine = audioEngine else { throw NSError(domain: "voice.audio", code: -1) }
790
- let inputNode = engine.inputNode
791
- let _ = inputNode // presence check
792
-
793
- // Enable voice processing (AEC)
794
- do {
795
- try inputNode.setVoiceProcessingEnabled(true)
796
- } catch {
797
- NSLog("Failed to enable voice processing for AEC on input node: \(error)")
798
- }
799
-
800
- if #available(iOS 17.0, *) {
801
- var duck = AVAudioVoiceProcessingOtherAudioDuckingConfiguration()
802
- duck.enableAdvancedDucking = false // disable advanced (VAD-based) ducking
803
- duck.duckingLevel = .min // “as loud as possible” for other audio
804
- inputNode.voiceProcessingOtherAudioDuckingConfiguration = duck
805
- }
806
-
807
- // if output node voice processing is ever needed, keep commented as in original:
808
- // do { try engine.outputNode.setVoiceProcessingEnabled(true) } catch { ... }
809
-
810
- NSLog("[STT] AEC enable done")
811
-
812
- let recordingFormat = inputNode.outputFormat(forBus: 0)
813
-
814
- // engine.connect(inputNode, to: engine.mainMixerNode, format: recordingFormat)
815
- // engine.connect(engine.mainMixerNode, to: engine.outputNode, format: recordingFormat)
816
- // engine.mainMixerNode.outputVolume = 0.0
817
-
818
- // 1) Mute only the mic path, not the whole main mixer
819
- let micMixer = AVAudioMixerNode()
820
- engine.attach(micMixer)
821
- engine.connect(inputNode, to: micMixer, format: recordingFormat)
822
- engine.connect(micMixer, to: engine.mainMixerNode, format: recordingFormat)
823
- micMixer.outputVolume = 0.0 // ← you won't hear your own mic
824
-
825
- // 2) Prepare a player node for TTS inside the SAME engine/graph
826
- let player = AVAudioPlayerNode()
827
- self.playbackNode = player
828
- engine.attach(player)
829
- // Use nil format so the mixer handles sample-rate conversion (e.g., 48k TTS)
830
- engine.connect(player, to: engine.mainMixerNode, format: nil)
831
-
832
- NSLog("[STT] graph connected (mic->mute mixer, player->mainMixer)")
833
-
834
- var tapFrames: UInt64 = 0
835
- // Start recording and append recording buffer to speech recognizer
836
- inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { [weak self] buffer, _ in
837
- // Strongify self once
838
- guard let self = self else { return }
839
- tapFrames &+= UInt64(buffer.frameLength)
840
- if tapFrames % (44100 * 2) < 1024 { // ~every ~2s at 44.1k
841
- NSLog("[STT] tap alive, totalFrames=\(tapFrames)")
842
- }
843
-
844
- let frames: vDSP_Length = vDSP_Length(buffer.frameLength)
845
- let LEVEL_LOWPASS_TRIG: Float = 0.5
846
-
847
- // CH0
848
- if buffer.format.channelCount > 0, let ch0 = buffer.floatChannelData?[0] {
849
- var peak0: Float = 0
850
- vDSP_maxmgv(ch0, 1, &peak0, frames)
851
- let db0: Float = (peak0 == 0) ? -100 : 20.0 * log10f(peak0)
852
-
853
- let smoothed0 = LEVEL_LOWPASS_TRIG * db0
854
- + (1 - LEVEL_LOWPASS_TRIG) * self.averagePowerForChannel0
855
- self.averagePowerForChannel0 = smoothed0
856
- self.averagePowerForChannel1 = smoothed0
857
- }
858
-
859
- // CH1
860
- if buffer.format.channelCount > 1, let ch1 = buffer.floatChannelData?[1] {
861
- var peak1: Float = 0
862
- vDSP_maxmgv(ch1, 1, &peak1, frames)
863
- let db1: Float = (peak1 == 0) ? -100 : 20.0 * log10f(peak1)
864
-
865
- let smoothed1 = LEVEL_LOWPASS_TRIG * db1
866
- + (1 - LEVEL_LOWPASS_TRIG) * self.averagePowerForChannel1
867
- self.averagePowerForChannel1 = smoothed1
868
- }
869
-
870
- // Normalize 0–10 and emit
871
- self.averagePowerForChannel1 = Float(self._normalizedPowerLevelFromDecibels(CGFloat(self.averagePowerForChannel1)) * 10.0)
872
- let value = self.averagePowerForChannel1
873
- self.sendEvent(name: "onSpeechVolumeChanged", body: ["value": value])
874
-
875
- // Append to recognition
876
- self.recognitionRequest?.append(buffer)
877
-
878
- // inside inputNode.installTap { buffer, _ in
879
- self.lastBufferAt = CACurrentMediaTime()
880
- }
881
-
882
- engine.prepare()
883
- NSLog("[STT] audioEngine prepare")
884
- var audioSessionError: NSError?
885
- do {
886
- try engine.start()
887
- } catch {
888
- audioSessionError = error as NSError
889
- }
890
-
891
- // after engine.start() success:
892
- engineHotAt = CACurrentMediaTime()
893
- seenRealSpeech = false
894
- NSLog("engine HOT at \(engineHotAt)")
895
- sendEvent(name: "onSpeechStart", body: nil) // engine hot signal (keep if you want)
896
- startTask(makeFreshRequest())
897
-
898
- // Engine is up; expose readiness
899
- AudioPlaybackHook.isEngineReady = { [weak self] in
900
- guard let eng = self?.audioEngine else { return false }
901
- return eng.isRunning
902
- }
903
-
904
- startWatchdog()
905
-
906
- // After engine.start() succeeds:
907
- AudioPlaybackHook.engineScheduleFile = { [weak self] url, done in
908
- guard
909
- let self = self,
910
- let engine = self.audioEngine,
911
- let player = self.playbackNode
912
- else { return false }
913
-
914
- do {
915
- let file = try AVAudioFile(forReading: url)
916
- if !player.isPlaying { player.play() } // engine is already running
917
- player.scheduleFile(file, at: nil) {
918
- DispatchQueue.main.async { done() }
919
- }
920
- return true
921
- } catch {
922
- print("engineScheduleFile error:", error)
923
- return false
924
- }
925
- }
926
-
927
- NSLog("audioEngine startAndReturnError")
928
- if let audioSessionError = audioSessionError {
929
- NotificationCenter.default.addObserver(self,
930
- selector: #selector(self.handleEngineConfigChange(_:)),
931
- name: .AVAudioEngineConfigurationChange,
932
- object: engine)
933
- NSLog("audioEngine audioSessionError!=nil")
934
- self.sendResult(error: ["code": "audio", "message": audioSessionError.localizedDescription],
935
- bestTranscription: nil, transcriptions: nil, isFinal: nil)
936
- NSLog("[STT] self sendResult")
937
- // self.teardown()
938
- NSLog("[STT] Removed self teardown")
939
- return
940
- }
941
- NSLog("After Start recording and append recording")
942
- DispatchQueue.main.asyncAfter(deadline: .now() + 3.0) { [weak self] in
943
- guard let self = self else { return }
944
- let running = self.audioEngine?.isRunning ?? false
945
- let taskState = self.recognitionTask?.state.rawValue ?? -1
946
- NSLog("[STT] health: engineRunning=\(running) taskState=\(taskState)")
947
- }
948
-
949
- NSLog("After if audioSessionError != nil")
950
- } catch let e as NSError {
951
- sendResult(error: ["code": "start_recording", "message": e.localizedDescription],
952
- bestTranscription: nil, transcriptions: nil, isFinal: nil)
953
- NSLog("End of init...")
954
- return
955
- }
956
- }
957
-
958
- // MARK: - Helpers
959
-
960
- private func _normalizedPowerLevelFromDecibels(_ decibels: CGFloat) -> CGFloat {
961
- if decibels < -80.0 || decibels == 0.0 { return 0.0 }
962
- let minDb: Float = -80.0
963
- let pow10_min = powf(10.0, 0.05 * minDb)
964
- let pow10_db = powf(10.0, 0.05 * Float(decibels))
965
- let power = powf((pow10_db - pow10_min) * (1.0 / (1.0 - pow10_min)), 1.0 / 2.0)
966
- if power < 1.0 { return CGFloat(power) } else { return 1.0 }
967
- }
968
-
969
- private func sendEvent(name: String, body: [String: Any]?) {
970
- delegate?.stt(self, didEmitEvent: name, body: body)
971
- }
972
-
973
- /// Exact event behavior preserved from ObjC `sendResult`.
974
- private func sendResult(error: [String: Any]?,
975
- bestTranscription: String?,
976
- transcriptions: [String]?,
977
- isFinal: Bool?) {
978
- if let error = error {
979
- sendEvent(name: "onSpeechError", body: ["error": error])
980
- }
981
- if let best = bestTranscription {
982
- sendEvent(name: "onSpeechResults", body: ["value": [best]])
983
- }
984
- if let trans = transcriptions {
985
- sendEvent(name: "onSpeechPartialResults", body: ["value": trans])
986
- }
987
- if let isFinal = isFinal {
988
- sendEvent(name: "onSpeechRecognized", body: ["isFinal": isFinal])
989
- }
990
- }
991
-
992
- // MARK: - SFSpeechRecognizerDelegate
993
-
994
- public func speechRecognizer(_ speechRecognizer: SFSpeechRecognizer, availabilityDidChange available: Bool) {
995
- if available == false {
996
- sendResult(error: ["message": "Speech recognition is not available now"],
997
- bestTranscription: nil, transcriptions: nil, isFinal: nil)
998
- }
999
- }
1000
- }