@gmessier/nitro-speech 0.3.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +165 -148
  3. package/android/build.gradle +0 -1
  4. package/android/src/main/cpp/cpp-adapter.cpp +5 -1
  5. package/android/src/main/java/com/margelo/nitro/nitrospeech/HybridNitroSpeech.kt +2 -0
  6. package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/AutoStopper.kt +80 -16
  7. package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/HybridRecognizer.kt +93 -20
  8. package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/RecognitionListenerSession.kt +27 -15
  9. package/ios/{BufferUtil.swift → Audio/AudioBufferConverter.swift} +3 -34
  10. package/ios/Audio/AudioLevelTracker.swift +66 -0
  11. package/ios/Coordinator.swift +105 -0
  12. package/ios/Engines/AnalyzerEngine.swift +241 -0
  13. package/ios/Engines/DictationRuntime.swift +67 -0
  14. package/ios/Engines/RecognizerEngine.swift +312 -0
  15. package/ios/Engines/SFSpeechEngine.swift +119 -0
  16. package/ios/Engines/SpeechRuntime.swift +58 -0
  17. package/ios/Engines/TranscriberRuntimeProtocol.swift +21 -0
  18. package/ios/HybridNitroSpeech.swift +1 -10
  19. package/ios/HybridRecognizer.swift +135 -192
  20. package/ios/LocaleManager.swift +73 -0
  21. package/ios/{AppStateObserver.swift → Shared/AppStateObserver.swift} +1 -2
  22. package/ios/Shared/AutoStopper.swift +147 -0
  23. package/ios/Shared/HapticImpact.swift +24 -0
  24. package/ios/Shared/Log.swift +41 -0
  25. package/ios/Shared/Permissions.swift +59 -0
  26. package/ios/Shared/Utils.swift +58 -0
  27. package/lib/NitroSpeech.d.ts +2 -0
  28. package/lib/NitroSpeech.js +2 -0
  29. package/lib/Recognizer/RecognizerRef.d.ts +5 -0
  30. package/lib/Recognizer/RecognizerRef.js +13 -0
  31. package/lib/Recognizer/SpeechRecognizer.d.ts +8 -0
  32. package/lib/Recognizer/SpeechRecognizer.js +9 -0
  33. package/lib/Recognizer/methods.d.ts +8 -0
  34. package/lib/Recognizer/methods.js +29 -0
  35. package/lib/Recognizer/types.d.ts +6 -0
  36. package/lib/Recognizer/types.js +1 -0
  37. package/lib/Recognizer/useRecognizer.d.ts +16 -0
  38. package/lib/Recognizer/useRecognizer.js +71 -0
  39. package/lib/Recognizer/useVoiceInputVolume.d.ts +25 -0
  40. package/lib/Recognizer/useVoiceInputVolume.js +52 -0
  41. package/lib/index.d.ts +6 -0
  42. package/lib/index.js +6 -0
  43. package/lib/specs/NitroSpeech.nitro.d.ts +8 -0
  44. package/lib/specs/NitroSpeech.nitro.js +1 -0
  45. package/lib/specs/Recognizer.nitro.d.ts +95 -0
  46. package/lib/specs/Recognizer.nitro.js +1 -0
  47. package/lib/specs/SpeechRecognitionConfig.d.ts +162 -0
  48. package/lib/specs/SpeechRecognitionConfig.js +1 -0
  49. package/lib/specs/VolumeChangeEvent.d.ts +31 -0
  50. package/lib/specs/VolumeChangeEvent.js +1 -0
  51. package/nitro.json +0 -4
  52. package/nitrogen/generated/android/NitroSpeech+autolinking.cmake +2 -2
  53. package/nitrogen/generated/android/NitroSpeechOnLoad.cpp +4 -2
  54. package/nitrogen/generated/android/c++/JFunc_void_VolumeChangeEvent.hpp +78 -0
  55. package/nitrogen/generated/android/c++/JFunc_void_std__vector_std__string_.hpp +14 -14
  56. package/nitrogen/generated/android/c++/JHybridRecognizerSpec.cpp +68 -19
  57. package/nitrogen/generated/android/c++/JHybridRecognizerSpec.hpp +7 -4
  58. package/nitrogen/generated/android/c++/JIosPreset.hpp +58 -0
  59. package/nitrogen/generated/android/c++/JMutableSpeechRecognitionConfig.hpp +79 -0
  60. package/nitrogen/generated/android/c++/{JSpeechToTextParams.hpp → JSpeechRecognitionConfig.hpp} +48 -30
  61. package/nitrogen/generated/android/c++/JVolumeChangeEvent.hpp +65 -0
  62. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/Func_void_VolumeChangeEvent.kt +80 -0
  63. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/HybridRecognizerSpec.kt +18 -5
  64. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/IosPreset.kt +23 -0
  65. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/MutableSpeechRecognitionConfig.kt +76 -0
  66. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/SpeechRecognitionConfig.kt +121 -0
  67. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/VolumeChangeEvent.kt +61 -0
  68. package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Bridge.cpp +46 -30
  69. package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Bridge.hpp +203 -70
  70. package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Umbrella.hpp +13 -3
  71. package/nitrogen/generated/ios/c++/HybridRecognizerSpecSwift.hpp +41 -9
  72. package/nitrogen/generated/ios/swift/Func_void_VolumeChangeEvent.swift +46 -0
  73. package/nitrogen/generated/ios/swift/Func_void_std__exception_ptr.swift +46 -0
  74. package/nitrogen/generated/ios/swift/HybridRecognizerSpec.swift +6 -3
  75. package/nitrogen/generated/ios/swift/HybridRecognizerSpec_cxx.swift +66 -18
  76. package/nitrogen/generated/ios/swift/IosPreset.swift +40 -0
  77. package/nitrogen/generated/ios/swift/MutableSpeechRecognitionConfig.swift +118 -0
  78. package/nitrogen/generated/ios/swift/{SpeechToTextParams.swift → SpeechRecognitionConfig.swift} +108 -43
  79. package/nitrogen/generated/ios/swift/VolumeChangeEvent.swift +52 -0
  80. package/nitrogen/generated/shared/c++/HybridRecognizerSpec.cpp +4 -1
  81. package/nitrogen/generated/shared/c++/HybridRecognizerSpec.hpp +17 -7
  82. package/nitrogen/generated/shared/c++/IosPreset.hpp +76 -0
  83. package/nitrogen/generated/shared/c++/MutableSpeechRecognitionConfig.hpp +105 -0
  84. package/nitrogen/generated/shared/c++/{SpeechToTextParams.hpp → SpeechRecognitionConfig.hpp} +39 -20
  85. package/nitrogen/generated/shared/c++/VolumeChangeEvent.hpp +91 -0
  86. package/package.json +15 -16
  87. package/src/NitroSpeech.ts +5 -0
  88. package/src/Recognizer/RecognizerRef.ts +23 -0
  89. package/src/Recognizer/SpeechRecognizer.ts +10 -0
  90. package/src/Recognizer/methods.ts +40 -0
  91. package/src/Recognizer/types.ts +33 -0
  92. package/src/Recognizer/useRecognizer.ts +85 -0
  93. package/src/Recognizer/useVoiceInputVolume.ts +65 -0
  94. package/src/index.ts +6 -182
  95. package/src/specs/NitroSpeech.nitro.ts +2 -163
  96. package/src/specs/Recognizer.nitro.ts +110 -0
  97. package/src/specs/SpeechRecognitionConfig.ts +167 -0
  98. package/src/specs/VolumeChangeEvent.ts +31 -0
  99. package/android/proguard-rules.pro +0 -1
  100. package/ios/AnylyzerTranscriber.swift +0 -331
  101. package/ios/AutoStopper.swift +0 -69
  102. package/ios/HapticImpact.swift +0 -32
  103. package/ios/LegacySpeechRecognizer.swift +0 -161
  104. package/lib/commonjs/index.js +0 -145
  105. package/lib/commonjs/index.js.map +0 -1
  106. package/lib/commonjs/package.json +0 -1
  107. package/lib/commonjs/specs/NitroSpeech.nitro.js +0 -6
  108. package/lib/commonjs/specs/NitroSpeech.nitro.js.map +0 -1
  109. package/lib/module/index.js +0 -138
  110. package/lib/module/index.js.map +0 -1
  111. package/lib/module/package.json +0 -1
  112. package/lib/module/specs/NitroSpeech.nitro.js +0 -4
  113. package/lib/module/specs/NitroSpeech.nitro.js.map +0 -1
  114. package/lib/tsconfig.tsbuildinfo +0 -1
  115. package/lib/typescript/index.d.ts +0 -50
  116. package/lib/typescript/index.d.ts.map +0 -1
  117. package/lib/typescript/specs/NitroSpeech.nitro.d.ts +0 -162
  118. package/lib/typescript/specs/NitroSpeech.nitro.d.ts.map +0 -1
  119. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/SpeechToTextParams.kt +0 -68
@@ -0,0 +1,119 @@
1
+ import Foundation
2
+ import Speech
3
+ import AVFoundation
4
+
5
+ final class SFSpeechEngine: RecognizerEngine {
6
+ private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
7
+ private var recognitionTask: SFSpeechRecognitionTask?
8
+ private var speechRecognizer: SFSpeechRecognizer?
9
+
10
+ private let lg = Lg(prefix: "SFSpeechEngine")
11
+
12
+ override func stop() {
13
+ super.stop()
14
+ recognitionRequest?.endAudio()
15
+ recognitionTask?.finish()
16
+ }
17
+
18
+ override func prewarm(for type: FailureType) async {
19
+ speechRecognizer = SFSpeechRecognizer(
20
+ locale: Locale(identifier: self.recognizerDelegate?.config?.locale ?? "en-US")
21
+ )
22
+ if speechRecognizer?.isAvailable != true {
23
+ self.reportFailure(
24
+ from: "prewarm",
25
+ message: "SFSpeechRecognizer is not available",
26
+ type: type
27
+ )
28
+ }
29
+ await super.prewarm(for: type)
30
+ }
31
+
32
+ override func startSession() async {
33
+ await super.startSession()
34
+ lg.log("[startSession.startSession]")
35
+
36
+ await prewarm(for: .start)
37
+ lg.log("[startSession.prewarm]")
38
+ guard let speechRecognizer else { return }
39
+
40
+ recognitionRequest = createRecognitionRequest()
41
+ lg.log("[startSession.createRecognitionRequest]")
42
+ guard let recognitionRequest else { return }
43
+
44
+ recognitionTask = speechRecognizer.recognitionTask(
45
+ with: recognitionRequest
46
+ ) { [weak self] result, error in
47
+ guard let self else { return }
48
+
49
+ if let result = result {
50
+ var transcription = result.bestTranscription.formattedString
51
+ if !transcription.isEmpty {
52
+ // Track only when transcription is coming
53
+ self.trackPartialActivity()
54
+
55
+ let disableRepeatingFilter = self.recognizerDelegate?.config?.disableRepeatingFilter ?? false
56
+ if !disableRepeatingFilter {
57
+ transcription = Utils.repeatingFilter(transcription)
58
+ }
59
+ // Legacy transcriber collects everything into one batch
60
+ self.recognizerDelegate?.result(batches: [transcription])
61
+ }
62
+
63
+ if result.isFinal {
64
+ self.cleanup(from: "startRecognition.recognitionTask.final")
65
+ }
66
+ }
67
+
68
+ if let error = error {
69
+ if !self.isStopping {
70
+ self.reportFailure(
71
+ from: "startSession.recognitionTask.error",
72
+ message: "Recognition Error: \(error.localizedDescription)",
73
+ type: .onSession
74
+ )
75
+ } else {
76
+ self.cleanup(from: "startRecognition.recognitionTask.manualStop")
77
+ }
78
+ }
79
+ }
80
+ lg.log("[startSession.recognitionTask]")
81
+
82
+ self.startAudioEngine(
83
+ onBuffer: { [weak self] buffer in
84
+ self?.recognitionRequest?.append(buffer)
85
+ }
86
+ )
87
+ lg.log("[startSession.startAudioEngine]")
88
+
89
+ self.sendFeedbackOnStart()
90
+ lg.log("[startSession.sendFeedbackOnStart]")
91
+ }
92
+
93
+ override func cleanup(from: String) {
94
+ super.cleanup(from: "overridden.\(from)")
95
+ recognitionRequest = nil
96
+ recognitionTask = nil
97
+ speechRecognizer = nil
98
+ }
99
+
100
+ private func createRecognitionRequest() -> SFSpeechAudioBufferRecognitionRequest {
101
+ let request = SFSpeechAudioBufferRecognitionRequest()
102
+ request.shouldReportPartialResults = true
103
+
104
+ if let contextualStrings = self.recognizerDelegate?.config?.contextualStrings,
105
+ !contextualStrings.isEmpty {
106
+ request.contextualStrings = contextualStrings
107
+ }
108
+
109
+ if #available(iOS 16, *) {
110
+ if self.recognizerDelegate?.config?.iosAddPunctuation == false {
111
+ request.addsPunctuation = false
112
+ } else {
113
+ request.addsPunctuation = true
114
+ }
115
+ }
116
+
117
+ return request
118
+ }
119
+ }
@@ -0,0 +1,58 @@
1
+ import Foundation
2
+ import Speech
3
+
4
+ @available(iOS 26.0, *)
5
+ final class SpeechRuntime: TranscriberRuntime {
6
+ let locale: Locale
7
+ private var transcriber: SpeechTranscriber?
8
+
9
+ init(with locale: Locale) {
10
+ self.locale = locale
11
+ }
12
+
13
+ func create(config: SpeechRecognitionConfig?) async throws {
14
+ if !SpeechTranscriber.isAvailable {
15
+ throw NSError()
16
+ }
17
+ var speechTranscriptionOptions: Set<SpeechTranscriber.TranscriptionOption> = []
18
+ if config?.maskOffensiveWords == true {
19
+ speechTranscriptionOptions.insert(.etiquetteReplacements)
20
+ }
21
+ transcriber = SpeechTranscriber(
22
+ locale: locale,
23
+ transcriptionOptions: speechTranscriptionOptions,
24
+ reportingOptions: [.volatileResults, .fastResults],
25
+ attributeOptions: [.audioTimeRange]
26
+ )
27
+
28
+
29
+
30
+ if let transcriber, let installationRequest = try await AssetInventory.assetInstallationRequest(supporting: [transcriber]) {
31
+ try await installationRequest.downloadAndInstall()
32
+ }
33
+ }
34
+
35
+ func getModules() -> [any SpeechModule] {
36
+ guard let transcriber else { return [] }
37
+ return [transcriber]
38
+ }
39
+
40
+ func handleResults(
41
+ onResult: @escaping (TranscriberResult) -> Void
42
+ ) async throws {
43
+ if let transcriber {
44
+ for try await result in transcriber.results {
45
+ onResult(
46
+ TranscriberResult(
47
+ text: result.text,
48
+ rangeStart: result.range.start,
49
+ isFinal: result.isFinal)
50
+ )
51
+ }
52
+ }
53
+ }
54
+
55
+ func clean() {
56
+ transcriber = nil
57
+ }
58
+ }
@@ -0,0 +1,21 @@
1
+ import Foundation
2
+ import Speech
3
+
4
+ struct TranscriberResult {
5
+ let text: AttributedString
6
+ let rangeStart: CMTime
7
+ let isFinal: Bool
8
+ }
9
+
10
+ @available(iOS 26.0, *)
11
+ protocol TranscriberRuntime {
12
+ var locale: Locale { get }
13
+
14
+ func create(config: SpeechRecognitionConfig?) async throws
15
+
16
+ func getModules() -> [any SpeechModule]
17
+
18
+ func handleResults(onResult: @escaping (TranscriberResult) -> Void) async throws
19
+
20
+ func clean() -> Void
21
+ }
@@ -2,14 +2,5 @@ import Foundation
2
2
  import NitroModules
3
3
 
4
4
  class HybridNitroSpeech : HybridNitroSpeechSpec {
5
- var recognizer: any HybridRecognizerSpec
6
-
7
- override init() {
8
- if #available(iOS 26.0, *) {
9
- recognizer = AnalyzerTranscriber()
10
- } else {
11
- recognizer = LegacySpeechRecognizer()
12
- }
13
- super.init()
14
- }
5
+ var recognizer: HybridRecognizerSpec = HybridRecognizer()
15
6
  }
@@ -1,13 +1,8 @@
1
1
  import Foundation
2
- import Speech
3
2
  import NitroModules
4
- import os.log
5
- import AVFoundation
6
3
 
7
- class HybridRecognizer: HybridRecognizerSpec {
8
- internal let logger = Logger(subsystem: "com.margelo.nitro.nitrospeech", category: "Recognizer")
9
- internal static let defaultAutoFinishRecognitionMs = 8000.0
10
- internal static let speechRmsThreshold: Float = 0.005623
4
+ class HybridRecognizer: HybridRecognizerSpec {
5
+ var config: SpeechRecognitionConfig?
11
6
 
12
7
  var onReadyForSpeech: (() -> Void)?
13
8
  var onRecordingStopped: (() -> Void)?
@@ -15,228 +10,176 @@ class HybridRecognizer: HybridRecognizerSpec {
15
10
  var onAutoFinishProgress: ((Double) -> Void)?
16
11
  var onError: ((String) -> Void)?
17
12
  var onPermissionDenied: (() -> Void)?
18
- var onVolumeChange: ((Double) -> Void)?
13
+ var onVolumeChange: ((VolumeChangeEvent) -> Void)?
19
14
 
20
- internal var audioEngine: AVAudioEngine?
15
+ private let coordinator = Coordinator()
16
+ private var paramsHash: String?
17
+ private var engine: RecognizerEngine?
21
18
 
22
- internal var autoStopper: AutoStopper?
23
- internal var appStateObserver: AppStateObserver?
24
- internal var isActive: Bool = false
25
- internal var isStopping: Bool = false
26
- internal var config: SpeechToTextParams?
27
- internal var levelSmoothed: Float = 0
28
-
29
- func getIsActive() -> Bool {
30
- return self.isActive
19
+ override init() {
20
+ super.init()
21
+ self.coordinator.recognizerDelegate = self
31
22
  }
32
23
 
33
- func startListening(params: SpeechToTextParams) {
34
- if isActive {
35
- return
36
- }
37
-
38
- SFSpeechRecognizer.requestAuthorization { [weak self] authStatus in
39
- Task { @MainActor in
40
- guard let self = self else { return }
41
-
42
- self.config = params
43
-
44
- switch authStatus {
45
- case .authorized:
46
- self.requestMicrophonePermission()
47
- case .denied, .restricted:
48
- self.onPermissionDenied?()
49
- case .notDetermined:
50
- self.onError?("Speech recognition not determined")
51
- @unknown default:
52
- self.onError?("Unknown authorization status")
53
- }
54
- }
24
+ private let lg = Lg(prefix: "HybridRecognizer")
25
+
26
+ @discardableResult
27
+ func prewarm(defaultParams: SpeechRecognitionConfig?) -> Promise<Void> {
28
+ return Promise.async(.userInitiated) { [weak self] in
29
+ // Ensure correct engine is selected based on params and ios version
30
+ await self?.ensureEngine(params: defaultParams)
31
+ // try to preload assets and check if speech engine is available on OS level
32
+ await self?.engine?.prewarm(for: .prewarm)
55
33
  }
56
34
  }
57
-
58
- func dispose() {
59
- stopListening()
35
+
36
+ func startListening(params: SpeechRecognitionConfig?) {
37
+ Task {
38
+ // Ensure correct engine is selected based on params and ios version
39
+ await ensureEngine(params: params)
40
+ engine?.start()
41
+ }
60
42
  }
61
43
 
62
44
  func stopListening() {
63
- guard isActive, !isStopping else { return }
64
- isStopping = true
65
-
66
- self.stopHapticFeedback()
45
+ engine?.stop()
67
46
  }
68
47
 
69
- internal func handleInternalStopTrigger() {
70
- self.stopListening()
48
+ func resetAutoFinishTime() {
49
+ engine?.updateSession(resetTimer: true)
71
50
  }
72
51
 
73
52
  func addAutoFinishTime(additionalTimeMs: Double?) {
74
- guard isActive, !isStopping else { return }
75
-
76
- autoStopper?.indicateRecordingActivity(
77
- from: "refreshAutoFinish",
78
- addMsToThreshold: additionalTimeMs
53
+ if let additionalTimeMs {
54
+ engine?.updateSession(addMsToTimer: additionalTimeMs)
55
+ } else {
56
+ // Reset timer to original baseline.
57
+ engine?.updateSession(resetTimer: true)
58
+ }
59
+ }
60
+
61
+ func updateConfig(newConfig: MutableSpeechRecognitionConfig?, resetAutoFinishTime: Bool?) {
62
+ engine?.updateSession(
63
+ newConfig: newConfig,
64
+ resetTimer: resetAutoFinishTime
79
65
  )
80
66
  }
67
+
68
+ func getIsActive() -> Bool {
69
+ engine?.isActive ?? false
70
+ }
81
71
 
82
- func updateAutoFinishTime(newTimeMs: Double, withRefresh: Bool?) {
83
- guard isActive, !isStopping else { return }
84
-
85
- autoStopper?.updateSilenceThreshold(newThresholdMs: newTimeMs)
86
-
87
- if withRefresh == true {
88
- autoStopper?.indicateRecordingActivity(
89
- from: "updateAutoFinishTime",
90
- addMsToThreshold: nil
91
- )
72
+ func getSupportedLocalesIOS() -> [String] {
73
+ return self.coordinator.getSupportedLocales()
74
+ }
75
+
76
+ private func ensureEngine(params: SpeechRecognitionConfig?) async {
77
+ // Remember new params
78
+ config = params
79
+ let hash = Utils.hashParams(params)
80
+ if engine != nil && hash == paramsHash {
81
+ lg.log("Reuse Engine")
82
+ // Engine is already correct
83
+ return
84
+ }
85
+ if hash != paramsHash {
86
+ // Initialize when trying to select new engine with new params
87
+ await coordinator.initialize()
88
+ paramsHash = hash
89
+ }
90
+ lg.log("hash: \(hash)")
91
+ // Try to select new engine
92
+ engine = coordinator.getEngine()
93
+ if engine == nil {
94
+ // Only wrong locale can wipe out all candidates
95
+ self.onError?("No recognition engine available for the requested locale")
96
+ return
92
97
  }
93
98
  }
99
+ }
94
100
 
95
- internal func requestMicrophonePermission() {}
96
-
97
- internal func startRecognitionSetup() -> Bool {
98
- isStopping = false
99
- isActive = true
100
-
101
- initAutoStop()
102
- monitorAppState()
103
- guard startAudioSession() else {
104
- cleanup(from: "startRecognitionSetup")
105
- return false
101
+ protocol RecognizerDelegate: AnyObject {
102
+ var config: SpeechRecognitionConfig? { get }
103
+ func softlyUpdateConfig(newConfig: MutableSpeechRecognitionConfig?)
104
+ func reselectEngine(forPrewarm: Bool)
105
+ func readyForSpeech()
106
+ func recordingStopped()
107
+ func result (batches: [String])
108
+ func autoFinishProgress (timeLeftMs: Double)
109
+ func error (message: String)
110
+ func permissionDenied ()
111
+ func volumeChange (event: VolumeChangeEvent)
112
+ }
113
+
114
+ extension HybridRecognizer: RecognizerDelegate {
115
+ func softlyUpdateConfig(newConfig: MutableSpeechRecognitionConfig?) {
116
+ if let newConfig {
117
+ config = SpeechRecognitionConfig(
118
+ locale: config?.locale,
119
+ contextualStrings: config?.contextualStrings,
120
+ maskOffensiveWords: config?.maskOffensiveWords,
121
+ autoFinishRecognitionMs: newConfig.autoFinishRecognitionMs ?? config?.autoFinishRecognitionMs,
122
+ autoFinishProgressIntervalMs: newConfig.autoFinishProgressIntervalMs ?? config?.autoFinishProgressIntervalMs,
123
+ resetAutoFinishVoiceSensitivity: newConfig.resetAutoFinishVoiceSensitivity ?? config?.resetAutoFinishVoiceSensitivity,
124
+ disableRepeatingFilter: newConfig.disableRepeatingFilter ?? config?.disableRepeatingFilter,
125
+ startHapticFeedbackStyle: newConfig.startHapticFeedbackStyle ?? config?.startHapticFeedbackStyle,
126
+ stopHapticFeedbackStyle: newConfig.stopHapticFeedbackStyle ?? config?.stopHapticFeedbackStyle,
127
+ androidFormattingPreferQuality: config?.androidFormattingPreferQuality,
128
+ androidUseWebSearchModel: config?.androidUseWebSearchModel,
129
+ androidDisableBatchHandling: config?.androidDisableBatchHandling,
130
+ iosAddPunctuation: config?.iosAddPunctuation,
131
+ iosPreset: config?.iosPreset,
132
+ iosAtypicalSpeech: config?.iosAtypicalSpeech
133
+ )
106
134
  }
107
-
108
- return true
109
135
  }
110
136
 
111
- internal func startRecognitionFeedback() {
112
- self.startHapticFeedback()
113
- autoStopper?.indicateRecordingActivity(
114
- from: "startListening",
115
- addMsToThreshold: nil
116
- )
117
- onReadyForSpeech?()
118
- onResult?([])
137
+ func readyForSpeech() {
138
+ self.lg.log("[HR -> onReadyForSpeech]")
139
+ self.onReadyForSpeech?()
119
140
  }
120
141
 
121
- internal func startRecognition() {}
122
- internal func startRecognition() async {}
123
-
124
- internal func cleanup(from: String) {
125
- logger.info("cleanup called from: \(from)")
126
- deinitAutoStop()
127
- stopMonitorAppState()
128
- stopAudioSession()
129
- stopAudioEngine()
130
- levelSmoothed = 0
131
- isActive = false
132
- isStopping = false
133
- onVolumeChange?(0)
142
+ func recordingStopped() {
143
+ self.lg.log("[onRecordingStopped]")
144
+ self.onRecordingStopped?()
134
145
  }
135
146
 
136
- internal func stopAudioEngine() {
137
- if let audioEngine = audioEngine, audioEngine.isRunning {
138
- audioEngine.stop()
139
- }
140
- audioEngine?.inputNode.removeTap(onBus: 0)
141
- audioEngine = nil
147
+ func result(batches: [String]) {
148
+ self.lg.log("[onResult] \(batches)")
149
+ self.onResult?(batches)
142
150
  }
143
151
 
144
- internal func monitorAppState() {
145
- appStateObserver = AppStateObserver { [weak self] in
146
- guard let self = self, self.isActive else { return }
147
- self.handleInternalStopTrigger()
148
- }
149
- }
150
- internal func stopMonitorAppState () {
151
- appStateObserver?.stop()
152
- appStateObserver = nil
152
+ func autoFinishProgress(timeLeftMs: Double) {
153
+ self.lg.log("[onAutoFinishProgress] \(timeLeftMs)ms")
154
+ self.onAutoFinishProgress?(timeLeftMs)
153
155
  }
154
156
 
155
- internal func initAutoStop() {
156
- autoStopper = AutoStopper(
157
- silenceThresholdMs: config?.autoFinishRecognitionMs ?? Self.defaultAutoFinishRecognitionMs,
158
- onProgress: { [weak self] timeLeftMs in
159
- self?.onAutoFinishProgress?(timeLeftMs)
160
- },
161
- onTimeout: { [weak self] in
162
- self?.handleInternalStopTrigger()
163
- }
164
- )
165
- }
166
- internal func deinitAutoStop () {
167
- autoStopper?.stop()
168
- autoStopper = nil
169
- }
170
-
171
- internal func startAudioSession() -> Bool {
172
- do {
173
- let audioSession = AVAudioSession.sharedInstance()
174
- try audioSession.setCategory(.record, mode: .measurement, options: .duckOthers)
175
- // Without this, iOS may suppress haptics while recording.
176
- try audioSession.setAllowHapticsAndSystemSoundsDuringRecording(true)
177
- try audioSession.setActive(true, options: .notifyOthersOnDeactivation)
178
- return true
179
- } catch {
180
- onError?("Failed to activate audio session: \(error.localizedDescription)")
181
- return false
182
- }
183
- }
184
- internal func stopAudioSession () {
185
- do {
186
- try AVAudioSession.sharedInstance().setActive(false)
187
- } catch {
188
- logger.info("Failed to deactivate audio session: \(error.localizedDescription)")
189
- return
190
- }
157
+ func error(message: String) {
158
+ self.lg.log("[onError]")
159
+ self.onError?(message)
191
160
  }
192
161
 
193
- internal func startHapticFeedback() {
194
- if let hapticStyle = config?.startHapticFeedbackStyle {
195
- HapticImpact(style: hapticStyle).trigger()
196
- } else {
197
- HapticImpact(style: .medium).trigger()
198
- }
199
- }
200
- internal func stopHapticFeedback () {
201
- if let hapticStyle = config?.stopHapticFeedbackStyle {
202
- HapticImpact(style: hapticStyle).trigger()
203
- } else {
204
- HapticImpact(style: .medium).trigger()
205
- }
162
+ func permissionDenied() {
163
+ self.lg.log("[onPermissionDenied]")
164
+ self.onPermissionDenied?()
206
165
  }
207
166
 
208
- internal func trackPartialActivity() {
209
- if !self.isStopping {
210
- self.autoStopper?.indicateRecordingActivity(
211
- from: "partial results",
212
- addMsToThreshold: nil
213
- )
214
- }
167
+ func volumeChange(event: VolumeChangeEvent) {
168
+ // self.lg.log("[onVolumeChange] \(event.rawVolume)")
169
+ self.onVolumeChange?(event)
215
170
  }
216
-
217
- internal func repeatingFilter(text: String) -> String {
218
- var subStrings = text.split { $0.isWhitespace }.map { String($0) }
219
- var joiner = ""
220
- // 10 - arbitrary number of last substrings that is still unstable
221
- // and needs to be filtered. Prev substrings were handled earlier.
222
- if subStrings.count >= 10 {
223
- joiner = subStrings.prefix(subStrings.count - 9).joined(separator: " ")
224
- subStrings = Array(subStrings.suffix(10))
171
+
172
+ func reselectEngine(forPrewarm: Bool) {
173
+ // Remove failed engine from candidates
174
+ coordinator.reportEngineFailure()
175
+ // Reset active engine
176
+ engine = nil
177
+ // Try to prewarm with another candidate
178
+ if forPrewarm {
179
+ self.prewarm(defaultParams: config)
225
180
  } else {
226
- joiner = subStrings.first ?? ""
227
- }
228
- for i in subStrings.indices {
229
- if i == 0 { continue }
230
- // Always add number-contained strings
231
- if #available(iOS 16.0, *), subStrings[i].contains(/\d+/) {
232
- joiner += " \(subStrings[i])"
233
- continue
234
- }
235
-
236
- // Skip consecutive duplicate strings
237
- if subStrings[i] == subStrings[i-1] { continue }
238
- joiner += " \(subStrings[i])"
181
+ // Try to start with another candidate
182
+ self.startListening(params: config)
239
183
  }
240
- return joiner
241
184
  }
242
185
  }
@@ -0,0 +1,73 @@
1
+ import Foundation
2
+ import Speech
3
+
4
+ final class LocaleManager {
5
+ private let sfSpeechLocales = SFSpeechRecognizer.supportedLocales().map { $0.identifier }
6
+ private var speechLocales: [String]
7
+ private var dictationLocales: [String]
8
+ var supportedLocales: [String]
9
+ var SFLocale: Locale?
10
+ var speechLocale: Locale?
11
+ var dictationLocale: Locale?
12
+
13
+ private var equivalentsCountedFor: String?
14
+
15
+ init() async {
16
+ self.speechLocales = []
17
+ self.dictationLocales = []
18
+ self.supportedLocales = sfSpeechLocales
19
+
20
+ if #available(iOS 26.0, *) {
21
+ self.speechLocales = await SpeechTranscriber.supportedLocales.map {
22
+ $0.identifier
23
+ }
24
+ self.dictationLocales = await DictationTranscriber.supportedLocales.map {
25
+ $0.identifier
26
+ }
27
+ Log.log("[Coordinator] sfSpeechLocales: \(self.sfSpeechLocales)")
28
+ Log.log("[Coordinator] speechLocales: \(self.speechLocales)")
29
+ Log.log("[Coordinator] dictationLocales: \(self.dictationLocales)")
30
+ self.supportedLocales = Array(
31
+ Set(sfSpeechLocales)
32
+ .union(Set(speechLocales))
33
+ .union(Set(dictationLocales))
34
+ )
35
+ }
36
+ }
37
+
38
+ func ensureLocale(localeString: String?) async {
39
+ let identifier = localeString ?? "en-US"
40
+ if self.equivalentsCountedFor == identifier {
41
+ // All locales has been counted already, might be nil, but use them
42
+ Log.log("[Coordinator] ensureLocale: \(identifier) -> Already counted ")
43
+ return
44
+ }
45
+ if #available(iOS 26.0, *) {
46
+ let speechEquivalent = await SpeechTranscriber.supportedLocale(
47
+ equivalentTo: Locale(identifier: identifier)
48
+ )?.identifier
49
+ if let speechEquivalent, speechLocales.contains(speechEquivalent) {
50
+ self.speechLocale = Locale(identifier: speechEquivalent)
51
+ } else {
52
+ self.speechLocale = nil
53
+ }
54
+
55
+ let dictationEquivalent = await DictationTranscriber.supportedLocale(
56
+ equivalentTo: Locale(identifier: identifier)
57
+ )?.identifier
58
+ if let dictationEquivalent, self.dictationLocales.contains(dictationEquivalent) {
59
+ self.dictationLocale = Locale(identifier: dictationEquivalent)
60
+ } else {
61
+ self.dictationLocale = nil
62
+ }
63
+ }
64
+ if sfSpeechLocales.contains(identifier) {
65
+ self.SFLocale = Locale(identifier: identifier)
66
+ } else {
67
+ self.SFLocale = nil
68
+ }
69
+ self.equivalentsCountedFor = identifier
70
+ Log.log("[Coordinator] equivalents: speechLocale: \(self.speechLocale?.identifier), dictationLocale: \(self.dictationLocale?.identifier), SFLocale: \(self.SFLocale?.identifier)")
71
+ Log.log("[Coordinator] ensureLocale: \(identifier) -> New")
72
+ }
73
+ }
@@ -1,7 +1,7 @@
1
1
  import Foundation
2
2
  import UIKit
3
3
 
4
- class AppStateObserver {
4
+ final class AppStateObserver {
5
5
  private var observer: NSObjectProtocol?
6
6
  private let onResignActive: () -> Void
7
7
 
@@ -28,4 +28,3 @@ class AppStateObserver {
28
28
  stop()
29
29
  }
30
30
  }
31
-