@gmessier/nitro-speech 0.3.3 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +176 -148
  3. package/android/build.gradle +0 -1
  4. package/android/src/main/cpp/cpp-adapter.cpp +5 -1
  5. package/android/src/main/java/com/margelo/nitro/nitrospeech/HybridNitroSpeech.kt +2 -0
  6. package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/AutoStopper.kt +82 -18
  7. package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/HybridRecognizer.kt +118 -30
  8. package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/Logger.kt +16 -0
  9. package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/RecognitionListenerSession.kt +35 -24
  10. package/ios/{BufferUtil.swift → Audio/AudioBufferConverter.swift} +3 -34
  11. package/ios/Audio/AudioLevelTracker.swift +60 -0
  12. package/ios/Coordinator.swift +105 -0
  13. package/ios/Engines/AnalyzerEngine.swift +241 -0
  14. package/ios/Engines/DictationRuntime.swift +67 -0
  15. package/ios/Engines/RecognizerEngine.swift +315 -0
  16. package/ios/Engines/SFSpeechEngine.swift +119 -0
  17. package/ios/Engines/SpeechRuntime.swift +58 -0
  18. package/ios/Engines/TranscriberRuntimeProtocol.swift +21 -0
  19. package/ios/HybridNitroSpeech.swift +1 -10
  20. package/ios/HybridRecognizer.swift +142 -191
  21. package/ios/LocaleManager.swift +73 -0
  22. package/ios/{AppStateObserver.swift → Shared/AppStateObserver.swift} +1 -2
  23. package/ios/Shared/AutoStopper.swift +147 -0
  24. package/ios/Shared/HapticImpact.swift +24 -0
  25. package/ios/Shared/Log.swift +41 -0
  26. package/ios/Shared/Permissions.swift +59 -0
  27. package/ios/Shared/Utils.swift +58 -0
  28. package/lib/NitroSpeech.d.ts +2 -0
  29. package/lib/NitroSpeech.js +2 -0
  30. package/lib/Recognizer/RecognizerRef.d.ts +7 -0
  31. package/lib/Recognizer/RecognizerRef.js +16 -0
  32. package/lib/Recognizer/SpeechRecognizer.d.ts +8 -0
  33. package/lib/Recognizer/SpeechRecognizer.js +9 -0
  34. package/lib/Recognizer/methods.d.ts +9 -0
  35. package/lib/Recognizer/methods.js +33 -0
  36. package/lib/Recognizer/types.d.ts +6 -0
  37. package/lib/Recognizer/types.js +1 -0
  38. package/lib/Recognizer/useRecognizer.d.ts +16 -0
  39. package/lib/Recognizer/useRecognizer.js +71 -0
  40. package/lib/Recognizer/useRecognizerIsActive.d.ts +25 -0
  41. package/lib/Recognizer/useRecognizerIsActive.js +40 -0
  42. package/lib/Recognizer/useVoiceInputVolume.d.ts +25 -0
  43. package/lib/Recognizer/useVoiceInputVolume.js +52 -0
  44. package/lib/index.d.ts +7 -0
  45. package/lib/index.js +7 -0
  46. package/lib/specs/NitroSpeech.nitro.d.ts +8 -0
  47. package/lib/specs/NitroSpeech.nitro.js +1 -0
  48. package/lib/specs/Recognizer.nitro.d.ts +97 -0
  49. package/lib/specs/Recognizer.nitro.js +1 -0
  50. package/lib/specs/SpeechRecognitionConfig.d.ts +162 -0
  51. package/lib/specs/SpeechRecognitionConfig.js +1 -0
  52. package/lib/specs/VolumeChangeEvent.d.ts +31 -0
  53. package/lib/specs/VolumeChangeEvent.js +1 -0
  54. package/nitro.json +0 -4
  55. package/nitrogen/generated/android/NitroSpeech+autolinking.cmake +2 -2
  56. package/nitrogen/generated/android/NitroSpeechOnLoad.cpp +4 -2
  57. package/nitrogen/generated/android/c++/JFunc_void_VolumeChangeEvent.hpp +78 -0
  58. package/nitrogen/generated/android/c++/JFunc_void_std__vector_std__string_.hpp +14 -14
  59. package/nitrogen/generated/android/c++/JHybridRecognizerSpec.cpp +73 -19
  60. package/nitrogen/generated/android/c++/JHybridRecognizerSpec.hpp +8 -4
  61. package/nitrogen/generated/android/c++/JIosPreset.hpp +58 -0
  62. package/nitrogen/generated/android/c++/JMutableSpeechRecognitionConfig.hpp +79 -0
  63. package/nitrogen/generated/android/c++/{JSpeechToTextParams.hpp → JSpeechRecognitionConfig.hpp} +48 -30
  64. package/nitrogen/generated/android/c++/JVolumeChangeEvent.hpp +65 -0
  65. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/Func_void_VolumeChangeEvent.kt +80 -0
  66. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/HybridRecognizerSpec.kt +22 -5
  67. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/IosPreset.kt +23 -0
  68. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/MutableSpeechRecognitionConfig.kt +76 -0
  69. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/SpeechRecognitionConfig.kt +121 -0
  70. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/VolumeChangeEvent.kt +61 -0
  71. package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Bridge.cpp +46 -30
  72. package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Bridge.hpp +211 -69
  73. package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Umbrella.hpp +13 -3
  74. package/nitrogen/generated/ios/c++/HybridRecognizerSpecSwift.hpp +49 -9
  75. package/nitrogen/generated/ios/swift/Func_void_VolumeChangeEvent.swift +46 -0
  76. package/nitrogen/generated/ios/swift/Func_void_std__exception_ptr.swift +46 -0
  77. package/nitrogen/generated/ios/swift/HybridRecognizerSpec.swift +7 -3
  78. package/nitrogen/generated/ios/swift/HybridRecognizerSpec_cxx.swift +78 -18
  79. package/nitrogen/generated/ios/swift/IosPreset.swift +40 -0
  80. package/nitrogen/generated/ios/swift/MutableSpeechRecognitionConfig.swift +118 -0
  81. package/nitrogen/generated/ios/swift/{SpeechToTextParams.swift → SpeechRecognitionConfig.swift} +108 -43
  82. package/nitrogen/generated/ios/swift/VolumeChangeEvent.swift +52 -0
  83. package/nitrogen/generated/shared/c++/HybridRecognizerSpec.cpp +5 -1
  84. package/nitrogen/generated/shared/c++/HybridRecognizerSpec.hpp +18 -7
  85. package/nitrogen/generated/shared/c++/IosPreset.hpp +76 -0
  86. package/nitrogen/generated/shared/c++/MutableSpeechRecognitionConfig.hpp +105 -0
  87. package/nitrogen/generated/shared/c++/{SpeechToTextParams.hpp → SpeechRecognitionConfig.hpp} +39 -20
  88. package/nitrogen/generated/shared/c++/VolumeChangeEvent.hpp +91 -0
  89. package/package.json +15 -16
  90. package/src/NitroSpeech.ts +5 -0
  91. package/src/Recognizer/RecognizerRef.ts +27 -0
  92. package/src/Recognizer/SpeechRecognizer.ts +10 -0
  93. package/src/Recognizer/methods.ts +45 -0
  94. package/src/Recognizer/types.ts +34 -0
  95. package/src/Recognizer/useRecognizer.ts +87 -0
  96. package/src/Recognizer/useRecognizerIsActive.ts +49 -0
  97. package/src/Recognizer/useVoiceInputVolume.ts +65 -0
  98. package/src/index.ts +13 -182
  99. package/src/specs/NitroSpeech.nitro.ts +2 -163
  100. package/src/specs/Recognizer.nitro.ts +113 -0
  101. package/src/specs/SpeechRecognitionConfig.ts +167 -0
  102. package/src/specs/VolumeChangeEvent.ts +31 -0
  103. package/android/proguard-rules.pro +0 -1
  104. package/ios/AnylyzerTranscriber.swift +0 -331
  105. package/ios/AutoStopper.swift +0 -69
  106. package/ios/HapticImpact.swift +0 -32
  107. package/ios/LegacySpeechRecognizer.swift +0 -161
  108. package/lib/commonjs/index.js +0 -145
  109. package/lib/commonjs/index.js.map +0 -1
  110. package/lib/commonjs/package.json +0 -1
  111. package/lib/commonjs/specs/NitroSpeech.nitro.js +0 -6
  112. package/lib/commonjs/specs/NitroSpeech.nitro.js.map +0 -1
  113. package/lib/module/index.js +0 -138
  114. package/lib/module/index.js.map +0 -1
  115. package/lib/module/package.json +0 -1
  116. package/lib/module/specs/NitroSpeech.nitro.js +0 -4
  117. package/lib/module/specs/NitroSpeech.nitro.js.map +0 -1
  118. package/lib/tsconfig.tsbuildinfo +0 -1
  119. package/lib/typescript/index.d.ts +0 -50
  120. package/lib/typescript/index.d.ts.map +0 -1
  121. package/lib/typescript/specs/NitroSpeech.nitro.d.ts +0 -162
  122. package/lib/typescript/specs/NitroSpeech.nitro.d.ts.map +0 -1
  123. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/SpeechToTextParams.kt +0 -68
@@ -0,0 +1,315 @@
1
+ import Foundation
2
+ import Speech
3
+ import AVFoundation
4
+
5
+ // No practical diff between "system" and "onSession" for now.
6
+ // For future: send the level of error to RN
7
+ // "onSession" is less critical level, since the session has been started successfully
8
+ enum FailureType {
9
+ case system
10
+ case start
11
+ case prewarm
12
+ case onSession
13
+ }
14
+
15
+ class RecognizerEngine {
16
+ var isActive = false
17
+ var isStopping = false
18
+ var hardwareFormat: AVAudioFormat?
19
+ weak var recognizerDelegate: RecognizerDelegate?
20
+
21
+ private let audioLevelTracker = AudioLevelTracker()
22
+ private var appStateObserver: AppStateObserver?
23
+ private var audioEngine: AVAudioEngine?
24
+ private var autoStopper: AutoStopper?
25
+ private let lg = Lg(prefix: "RecognizerEngine")
26
+
27
+ let locale: Locale
28
+
29
+ init(locale: Locale, delegate: RecognizerDelegate) {
30
+ self.locale = locale
31
+ self.recognizerDelegate = delegate
32
+ }
33
+
34
+ // MARK: - Recognizer Methods
35
+
36
+ func prewarm(for: FailureType) async {
37
+ self.prepareAudioEngine()
38
+ // for SpeechTranscriber: .isAvailable and async assets
39
+ // for Dictation: only async assets
40
+ // for legacy SF: only sync .isAvailable
41
+ }
42
+
43
+ func start() {
44
+ guard let recognizerDelegate, !isActive else { return }
45
+
46
+ Permissions(
47
+ onGranted: self.startSession,
48
+ onDenied: recognizerDelegate.permissionDenied,
49
+ onError: recognizerDelegate.error
50
+ ).requestAuthorization()
51
+ }
52
+
53
+ func stop() {
54
+ guard isActive, !isStopping else { return }
55
+ isStopping = true
56
+ HapticImpact.trigger(with: self.recognizerDelegate?.config?.stopHapticFeedbackStyle)
57
+ }
58
+
59
+ func startSession() async {
60
+ lg.log("[startSession.startSession]")
61
+ // Init everything
62
+ isStopping = false
63
+ isActive = true
64
+
65
+ initAutoStop()
66
+ lg.log("[startSession.initAutoStop]")
67
+ startAppStateObserver()
68
+ lg.log("[startSession.startAppStateObserver]")
69
+ startAudioSession()
70
+ lg.log("[startSession.startAudioSession]")
71
+ }
72
+
73
+ func startAudioEngine(
74
+ onBuffer: @escaping (AVAudioPCMBuffer) -> Void
75
+ ) {
76
+ lg.log("[startAudioEngine]")
77
+ guard let audioEngine, let hardwareFormat else { return }
78
+ audioEngine.inputNode.installTap(
79
+ onBus: 0,
80
+ bufferSize: 1024,
81
+ format: hardwareFormat
82
+ ) { [weak self] buffer, _ in
83
+ guard let self, let recognizerDelegate = self.recognizerDelegate else { return }
84
+ if let sample = self.audioLevelTracker.process(
85
+ buffer,
86
+ recognizerDelegate.config?.resetAutoFinishVoiceSensitivity
87
+ ) {
88
+ // Send buffer volume data
89
+ recognizerDelegate.volumeChange(
90
+ event:
91
+ VolumeChangeEvent(
92
+ smoothedVolume: sample.smoothed,
93
+ rawVolume: sample.raw,
94
+ db: sample.db
95
+ )
96
+ )
97
+ if sample.resetTimer {
98
+ self.autoStopper?.resetTimer(from: "rms threshold")
99
+ }
100
+ }
101
+ onBuffer(buffer)
102
+ }
103
+ lg.log("[startAudioEngine.installTap]")
104
+ do {
105
+ audioEngine.prepare()
106
+ lg.log("[startAudioEngine.prepare]")
107
+ try audioEngine.start()
108
+ lg.log("[startAudioEngine.start]")
109
+ } catch {
110
+ self.reportFailure(
111
+ from: "Audio Engine",
112
+ message: "Audio Engine failed to start",
113
+ // RecognizerEngine-agnostic Error
114
+ type: .system
115
+ )
116
+ }
117
+ }
118
+
119
+ func sendFeedbackOnStart() {
120
+ guard let recognizerDelegate else { return }
121
+ lg.log("[sendFeedbackOnStart]")
122
+ HapticImpact.trigger(with: recognizerDelegate.config?.startHapticFeedbackStyle)
123
+ autoStopper?.resetTimer(from: "startListening.sendFeedbackOnStart")
124
+ recognizerDelegate.readyForSpeech()
125
+ recognizerDelegate.result(batches: [])
126
+ }
127
+
128
+ func updateSession(
129
+ newConfig: MutableSpeechRecognitionConfig? = nil,
130
+ addMsToTimer: Double? = nil,
131
+ resetTimer: Bool? = nil
132
+ ) {
133
+ guard let recognizerDelegate, isActive, !isStopping else { return }
134
+ let currentConfig = recognizerDelegate.config
135
+ // Update AutoFinish time
136
+ if let newAutoFinish = newConfig?.autoFinishRecognitionMs,
137
+ newAutoFinish != currentConfig?.autoFinishRecognitionMs {
138
+ autoStopper?.updateThreshold(
139
+ newAutoFinish,
140
+ from: "updateSession"
141
+ )
142
+ }
143
+ // Update AutoFinish progress interval
144
+ if let newInterval = newConfig?.autoFinishProgressIntervalMs,
145
+ newInterval != currentConfig?.autoFinishProgressIntervalMs {
146
+ autoStopper?.updateProgressInterval(
147
+ newInterval,
148
+ from: "updateSession"
149
+ )
150
+ }
151
+
152
+ if let addMsToTimer {
153
+ // Add time to the timer once
154
+ autoStopper?.addMsOnce(
155
+ addMsToTimer,
156
+ from: "updateSession"
157
+ )
158
+ } else if resetTimer == true {
159
+ // Reset to current baseline threshold.
160
+ autoStopper?.resetTimer(from: "updateSession")
161
+ }
162
+ // Only update new non-nil values in the config
163
+ recognizerDelegate.softlyUpdateConfig(newConfig: newConfig)
164
+ }
165
+
166
+ func getVoiceInputVolume() -> VolumeChangeEvent? {
167
+ guard let currentSample = audioLevelTracker.currentSample else { return nil }
168
+ return VolumeChangeEvent(
169
+ smoothedVolume: currentSample.smoothed,
170
+ rawVolume: currentSample.raw,
171
+ db: currentSample.db
172
+ )
173
+ }
174
+
175
+ func cleanup(from: String) {
176
+ lg.log("[cleanup]: \(from)")
177
+ let wasActive = isActive
178
+ deinitAutoStop()
179
+ stopAppStateObserver()
180
+ stopAudioSession()
181
+ audioLevelTracker.reset()
182
+
183
+ if let audioEngine, audioEngine.isRunning {
184
+ audioEngine.stop()
185
+ }
186
+ audioEngine?.inputNode.removeTap(onBus: 0)
187
+
188
+ audioEngine = nil
189
+ isActive = false
190
+ isStopping = false
191
+ self.recognizerDelegate?.volumeChange(
192
+ event:
193
+ VolumeChangeEvent(
194
+ smoothedVolume: 0,
195
+ rawVolume: 0,
196
+ db: nil
197
+ )
198
+ )
199
+ if wasActive {
200
+ self.recognizerDelegate?.recordingStopped()
201
+ }
202
+ }
203
+
204
+ func reportFailure(from: String, message: String, type: FailureType) {
205
+ // Log message
206
+ lg.log("[Failure] type: \(type), message: \(message)")
207
+
208
+ // Cleanup on engine level anyway
209
+ self.cleanup(from: from)
210
+
211
+ switch type {
212
+ // Try to reselect engine and try again
213
+ case .prewarm, .start:
214
+ let isPrewarm = type == .prewarm
215
+ self.recognizerDelegate?.reselectEngine(forPrewarm: isPrewarm)
216
+ // System level issue: send onError with description and clean
217
+ // Session has already started: send onError and cleanup
218
+ case .system, .onSession:
219
+ self.recognizerDelegate?.error(message: message)
220
+ }
221
+ }
222
+
223
+ func trackPartialActivity() {
224
+ if !self.isStopping {
225
+ self.autoStopper?.resetTimer(from: "Partial results")
226
+ }
227
+ }
228
+
229
+ // MARK: - AudioEngine heavy prepare
230
+
231
+ private func prepareAudioEngine() {
232
+ lg.log("[prewarm.start]")
233
+ audioEngine = AVAudioEngine()
234
+ guard let audioEngine else {
235
+ self.reportFailure(
236
+ from: "Audio Engine",
237
+ message: "Audio Engine failed to initiate",
238
+ // RecognizerEngine-agnostic Error
239
+ type: .system
240
+ )
241
+ return
242
+ }
243
+ lg.log("[prewarm.audioEngine]")
244
+ // heavy first hardwareFormat retrieval
245
+ if hardwareFormat == nil {
246
+ hardwareFormat = audioEngine.inputNode.outputFormat(forBus: 0)
247
+ lg.log("[prewarm.hardwareFormat]")
248
+ }
249
+ }
250
+
251
+ // MARK: - AutoStopper
252
+
253
+ private func initAutoStop() {
254
+ let config = self.recognizerDelegate?.config
255
+ autoStopper = AutoStopper(
256
+ silenceThresholdMs: config?.autoFinishRecognitionMs,
257
+ progressIntervalMs: config?.autoFinishProgressIntervalMs,
258
+ onProgress: { [weak self] timeLeftMs in
259
+ guard let self else { return }
260
+ self.recognizerDelegate?.autoFinishProgress(
261
+ timeLeftMs: timeLeftMs
262
+ )
263
+ },
264
+ onTimeout: { [weak self] in
265
+ self?.stop()
266
+ }
267
+ )
268
+ }
269
+ private func deinitAutoStop() {
270
+ autoStopper?.stop()
271
+ autoStopper = nil
272
+ }
273
+
274
+ // MARK: - App State Observer
275
+
276
+ private func startAppStateObserver() {
277
+ appStateObserver = AppStateObserver { [weak self] in
278
+ guard let self, self.isActive else { return }
279
+ self.stop()
280
+ }
281
+ }
282
+
283
+ private func stopAppStateObserver() {
284
+ appStateObserver?.stop()
285
+ appStateObserver = nil
286
+ }
287
+
288
+ // MARK: - Audio Session
289
+
290
+ private func startAudioSession() {
291
+ do {
292
+ let audioSession = AVAudioSession.sharedInstance()
293
+ try audioSession.setCategory(.record, mode: .measurement, options: .duckOthers)
294
+ // Required for haptic feedback
295
+ try audioSession.setAllowHapticsAndSystemSoundsDuringRecording(true)
296
+ try audioSession.setActive(true, options: .notifyOthersOnDeactivation)
297
+ } catch {
298
+ self.reportFailure(
299
+ from: "startAudioSession",
300
+ message: "Failed to activate audio session: \(error.localizedDescription)",
301
+ // RecognizerEngine-agnostic Error
302
+ type: .system
303
+ )
304
+ }
305
+ }
306
+ private func stopAudioSession() {
307
+ do {
308
+ // TODO: check unduck
309
+ try AVAudioSession.sharedInstance().setActive(false)
310
+ } catch {
311
+ // Just log and no-op - not critical
312
+ lg.log("Failed to deactivate audio session: \(error.localizedDescription)")
313
+ }
314
+ }
315
+ }
@@ -0,0 +1,119 @@
1
+ import Foundation
2
+ import Speech
3
+ import AVFoundation
4
+
5
+ final class SFSpeechEngine: RecognizerEngine {
6
+ private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
7
+ private var recognitionTask: SFSpeechRecognitionTask?
8
+ private var speechRecognizer: SFSpeechRecognizer?
9
+
10
+ private let lg = Lg(prefix: "SFSpeechEngine")
11
+
12
+ override func stop() {
13
+ super.stop()
14
+ recognitionRequest?.endAudio()
15
+ recognitionTask?.finish()
16
+ }
17
+
18
+ override func prewarm(for type: FailureType) async {
19
+ speechRecognizer = SFSpeechRecognizer(
20
+ locale: Locale(identifier: self.recognizerDelegate?.config?.locale ?? "en-US")
21
+ )
22
+ if speechRecognizer?.isAvailable != true {
23
+ self.reportFailure(
24
+ from: "prewarm",
25
+ message: "SFSpeechRecognizer is not available",
26
+ type: type
27
+ )
28
+ }
29
+ await super.prewarm(for: type)
30
+ }
31
+
32
+ override func startSession() async {
33
+ await super.startSession()
34
+ lg.log("[startSession.startSession]")
35
+
36
+ await prewarm(for: .start)
37
+ lg.log("[startSession.prewarm]")
38
+ guard let speechRecognizer else { return }
39
+
40
+ recognitionRequest = createRecognitionRequest()
41
+ lg.log("[startSession.createRecognitionRequest]")
42
+ guard let recognitionRequest else { return }
43
+
44
+ recognitionTask = speechRecognizer.recognitionTask(
45
+ with: recognitionRequest
46
+ ) { [weak self] result, error in
47
+ guard let self else { return }
48
+
49
+ if let result = result {
50
+ var transcription = result.bestTranscription.formattedString
51
+ if !transcription.isEmpty {
52
+ // Track only when transcription is coming
53
+ self.trackPartialActivity()
54
+
55
+ let disableRepeatingFilter = self.recognizerDelegate?.config?.disableRepeatingFilter ?? false
56
+ if !disableRepeatingFilter {
57
+ transcription = Utils.repeatingFilter(transcription)
58
+ }
59
+ // Legacy transcriber collects everything into one batch
60
+ self.recognizerDelegate?.result(batches: [transcription])
61
+ }
62
+
63
+ if result.isFinal {
64
+ self.cleanup(from: "startRecognition.recognitionTask.final")
65
+ }
66
+ }
67
+
68
+ if let error = error {
69
+ if !self.isStopping {
70
+ self.reportFailure(
71
+ from: "startSession.recognitionTask.error",
72
+ message: "Recognition Error: \(error.localizedDescription)",
73
+ type: .onSession
74
+ )
75
+ } else {
76
+ self.cleanup(from: "startRecognition.recognitionTask.manualStop")
77
+ }
78
+ }
79
+ }
80
+ lg.log("[startSession.recognitionTask]")
81
+
82
+ self.startAudioEngine(
83
+ onBuffer: { [weak self] buffer in
84
+ self?.recognitionRequest?.append(buffer)
85
+ }
86
+ )
87
+ lg.log("[startSession.startAudioEngine]")
88
+
89
+ self.sendFeedbackOnStart()
90
+ lg.log("[startSession.sendFeedbackOnStart]")
91
+ }
92
+
93
+ override func cleanup(from: String) {
94
+ super.cleanup(from: "overridden.\(from)")
95
+ recognitionRequest = nil
96
+ recognitionTask = nil
97
+ speechRecognizer = nil
98
+ }
99
+
100
+ private func createRecognitionRequest() -> SFSpeechAudioBufferRecognitionRequest {
101
+ let request = SFSpeechAudioBufferRecognitionRequest()
102
+ request.shouldReportPartialResults = true
103
+
104
+ if let contextualStrings = self.recognizerDelegate?.config?.contextualStrings,
105
+ !contextualStrings.isEmpty {
106
+ request.contextualStrings = contextualStrings
107
+ }
108
+
109
+ if #available(iOS 16, *) {
110
+ if self.recognizerDelegate?.config?.iosAddPunctuation == false {
111
+ request.addsPunctuation = false
112
+ } else {
113
+ request.addsPunctuation = true
114
+ }
115
+ }
116
+
117
+ return request
118
+ }
119
+ }
@@ -0,0 +1,58 @@
1
+ import Foundation
2
+ import Speech
3
+
4
+ @available(iOS 26.0, *)
5
+ final class SpeechRuntime: TranscriberRuntime {
6
+ let locale: Locale
7
+ private var transcriber: SpeechTranscriber?
8
+
9
+ init(with locale: Locale) {
10
+ self.locale = locale
11
+ }
12
+
13
+ func create(config: SpeechRecognitionConfig?) async throws {
14
+ if !SpeechTranscriber.isAvailable {
15
+ throw NSError()
16
+ }
17
+ var speechTranscriptionOptions: Set<SpeechTranscriber.TranscriptionOption> = []
18
+ if config?.maskOffensiveWords == true {
19
+ speechTranscriptionOptions.insert(.etiquetteReplacements)
20
+ }
21
+ transcriber = SpeechTranscriber(
22
+ locale: locale,
23
+ transcriptionOptions: speechTranscriptionOptions,
24
+ reportingOptions: [.volatileResults, .fastResults],
25
+ attributeOptions: [.audioTimeRange]
26
+ )
27
+
28
+
29
+
30
+ if let transcriber, let installationRequest = try await AssetInventory.assetInstallationRequest(supporting: [transcriber]) {
31
+ try await installationRequest.downloadAndInstall()
32
+ }
33
+ }
34
+
35
+ func getModules() -> [any SpeechModule] {
36
+ guard let transcriber else { return [] }
37
+ return [transcriber]
38
+ }
39
+
40
+ func handleResults(
41
+ onResult: @escaping (TranscriberResult) -> Void
42
+ ) async throws {
43
+ if let transcriber {
44
+ for try await result in transcriber.results {
45
+ onResult(
46
+ TranscriberResult(
47
+ text: result.text,
48
+ rangeStart: result.range.start,
49
+ isFinal: result.isFinal)
50
+ )
51
+ }
52
+ }
53
+ }
54
+
55
+ func clean() {
56
+ transcriber = nil
57
+ }
58
+ }
@@ -0,0 +1,21 @@
1
+ import Foundation
2
+ import Speech
3
+
4
+ struct TranscriberResult {
5
+ let text: AttributedString
6
+ let rangeStart: CMTime
7
+ let isFinal: Bool
8
+ }
9
+
10
+ @available(iOS 26.0, *)
11
+ protocol TranscriberRuntime {
12
+ var locale: Locale { get }
13
+
14
+ func create(config: SpeechRecognitionConfig?) async throws
15
+
16
+ func getModules() -> [any SpeechModule]
17
+
18
+ func handleResults(onResult: @escaping (TranscriberResult) -> Void) async throws
19
+
20
+ func clean() -> Void
21
+ }
@@ -2,14 +2,5 @@ import Foundation
2
2
  import NitroModules
3
3
 
4
4
  class HybridNitroSpeech : HybridNitroSpeechSpec {
5
- var recognizer: any HybridRecognizerSpec
6
-
7
- override init() {
8
- if #available(iOS 26.0, *) {
9
- recognizer = AnalyzerTranscriber()
10
- } else {
11
- recognizer = LegacySpeechRecognizer()
12
- }
13
- super.init()
14
- }
5
+ var recognizer: HybridRecognizerSpec = HybridRecognizer()
15
6
  }