@gmessier/nitro-speech 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +64 -11
- package/android/build.gradle +2 -0
- package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/HapticImpact.kt +11 -1
- package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/HybridRecognizer.kt +12 -6
- package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/RecognitionListenerSession.kt +73 -7
- package/ios/AnylyzerTranscriber.swift +331 -0
- package/ios/AutoStopper.swift +9 -10
- package/ios/BufferUtil.swift +80 -0
- package/ios/HapticImpact.swift +12 -3
- package/ios/HybridNitroSpeech.swift +10 -1
- package/ios/HybridRecognizer.swift +139 -167
- package/ios/LegacySpeechRecognizer.swift +161 -0
- package/lib/commonjs/index.js +54 -5
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/index.js +52 -3
- package/lib/module/index.js.map +1 -1
- package/lib/tsconfig.tsbuildinfo +1 -1
- package/lib/typescript/index.d.ts +25 -8
- package/lib/typescript/index.d.ts.map +1 -1
- package/lib/typescript/specs/NitroSpeech.nitro.d.ts +24 -12
- package/lib/typescript/specs/NitroSpeech.nitro.d.ts.map +1 -1
- package/nitrogen/generated/android/c++/JHapticFeedbackStyle.hpp +3 -0
- package/nitrogen/generated/android/c++/JHybridRecognizerSpec.cpp +22 -0
- package/nitrogen/generated/android/c++/JHybridRecognizerSpec.hpp +3 -0
- package/nitrogen/generated/android/c++/JSpeechToTextParams.hpp +4 -4
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/HapticFeedbackStyle.kt +2 -1
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/HybridRecognizerSpec.kt +18 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/SpeechToTextParams.kt +3 -3
- package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Bridge.hpp +24 -0
- package/nitrogen/generated/ios/c++/HybridRecognizerSpecSwift.hpp +15 -0
- package/nitrogen/generated/ios/swift/HapticFeedbackStyle.swift +4 -0
- package/nitrogen/generated/ios/swift/HybridRecognizerSpec.swift +2 -0
- package/nitrogen/generated/ios/swift/HybridRecognizerSpec_cxx.swift +44 -0
- package/nitrogen/generated/ios/swift/SpeechToTextParams.swift +6 -6
- package/nitrogen/generated/shared/c++/HapticFeedbackStyle.hpp +4 -0
- package/nitrogen/generated/shared/c++/HybridRecognizerSpec.cpp +3 -0
- package/nitrogen/generated/shared/c++/HybridRecognizerSpec.hpp +3 -0
- package/nitrogen/generated/shared/c++/SpeechToTextParams.hpp +5 -5
- package/package.json +7 -7
- package/src/index.ts +59 -2
- package/src/specs/NitroSpeech.nitro.ts +25 -12
|
@@ -2,10 +2,12 @@ import Foundation
|
|
|
2
2
|
import Speech
|
|
3
3
|
import NitroModules
|
|
4
4
|
import os.log
|
|
5
|
+
import AVFoundation
|
|
5
6
|
|
|
6
7
|
class HybridRecognizer: HybridRecognizerSpec {
|
|
7
|
-
|
|
8
|
-
|
|
8
|
+
internal let logger = Logger(subsystem: "com.margelo.nitro.nitrospeech", category: "Recognizer")
|
|
9
|
+
internal static let defaultAutoFinishRecognitionMs = 8000.0
|
|
10
|
+
internal static let speechRmsThreshold: Float = 0.005623
|
|
9
11
|
|
|
10
12
|
var onReadyForSpeech: (() -> Void)?
|
|
11
13
|
var onRecordingStopped: (() -> Void)?
|
|
@@ -13,24 +15,28 @@ class HybridRecognizer: HybridRecognizerSpec {
|
|
|
13
15
|
var onAutoFinishProgress: ((Double) -> Void)?
|
|
14
16
|
var onError: ((String) -> Void)?
|
|
15
17
|
var onPermissionDenied: (() -> Void)?
|
|
18
|
+
var onVolumeChange: ((Double) -> Void)?
|
|
16
19
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
20
|
+
internal var audioEngine: AVAudioEngine?
|
|
21
|
+
|
|
22
|
+
internal var autoStopper: AutoStopper?
|
|
23
|
+
internal var appStateObserver: AppStateObserver?
|
|
24
|
+
internal var isActive: Bool = false
|
|
25
|
+
internal var isStopping: Bool = false
|
|
26
|
+
internal var config: SpeechToTextParams?
|
|
27
|
+
internal var levelSmoothed: Float = 0
|
|
28
|
+
|
|
29
|
+
func getIsActive() -> Bool {
|
|
30
|
+
return self.isActive
|
|
31
|
+
}
|
|
25
32
|
|
|
26
33
|
func startListening(params: SpeechToTextParams) {
|
|
27
34
|
if isActive {
|
|
28
|
-
// Previous recognition session is still active
|
|
29
35
|
return
|
|
30
36
|
}
|
|
31
37
|
|
|
32
38
|
SFSpeechRecognizer.requestAuthorization { [weak self] authStatus in
|
|
33
|
-
|
|
39
|
+
Task { @MainActor in
|
|
34
40
|
guard let self = self else { return }
|
|
35
41
|
|
|
36
42
|
self.config = params
|
|
@@ -48,18 +54,20 @@ class HybridRecognizer: HybridRecognizerSpec {
|
|
|
48
54
|
}
|
|
49
55
|
}
|
|
50
56
|
}
|
|
57
|
+
|
|
58
|
+
func dispose() {
|
|
59
|
+
stopListening()
|
|
60
|
+
}
|
|
51
61
|
|
|
52
62
|
func stopListening() {
|
|
53
63
|
guard isActive, !isStopping else { return }
|
|
54
64
|
isStopping = true
|
|
55
65
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
recognitionRequest?.endAudio()
|
|
62
|
-
recognitionTask?.finish()
|
|
66
|
+
self.stopHapticFeedback()
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
internal func handleInternalStopTrigger() {
|
|
70
|
+
self.stopListening()
|
|
63
71
|
}
|
|
64
72
|
|
|
65
73
|
func addAutoFinishTime(additionalTimeMs: Double?) {
|
|
@@ -83,183 +91,147 @@ class HybridRecognizer: HybridRecognizerSpec {
|
|
|
83
91
|
)
|
|
84
92
|
}
|
|
85
93
|
}
|
|
86
|
-
|
|
87
|
-
func dispose() {
|
|
88
|
-
stopListening()
|
|
89
|
-
}
|
|
90
94
|
|
|
91
|
-
|
|
92
|
-
AVAudioSession.sharedInstance().requestRecordPermission { [weak self] granted in
|
|
93
|
-
DispatchQueue.main.async {
|
|
94
|
-
guard let self = self else { return }
|
|
95
|
-
|
|
96
|
-
if granted {
|
|
97
|
-
self.startRecognition()
|
|
98
|
-
} else {
|
|
99
|
-
self.onPermissionDenied?()
|
|
100
|
-
}
|
|
101
|
-
}
|
|
102
|
-
}
|
|
103
|
-
}
|
|
95
|
+
internal func requestMicrophonePermission() {}
|
|
104
96
|
|
|
105
|
-
|
|
97
|
+
internal func startRecognitionSetup() -> Bool {
|
|
106
98
|
isStopping = false
|
|
99
|
+
isActive = true
|
|
107
100
|
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
101
|
+
initAutoStop()
|
|
102
|
+
monitorAppState()
|
|
103
|
+
guard startAudioSession() else {
|
|
104
|
+
cleanup(from: "startRecognitionSetup")
|
|
105
|
+
return false
|
|
112
106
|
}
|
|
113
107
|
|
|
108
|
+
return true
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
internal func startRecognitionFeedback() {
|
|
112
|
+
self.startHapticFeedback()
|
|
113
|
+
autoStopper?.indicateRecordingActivity(
|
|
114
|
+
from: "startListening",
|
|
115
|
+
addMsToThreshold: nil
|
|
116
|
+
)
|
|
117
|
+
onReadyForSpeech?()
|
|
118
|
+
onResult?([])
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
internal func startRecognition() {}
|
|
122
|
+
internal func startRecognition() async {}
|
|
123
|
+
|
|
124
|
+
internal func cleanup(from: String) {
|
|
125
|
+
logger.info("cleanup called from: \(from)")
|
|
126
|
+
deinitAutoStop()
|
|
127
|
+
stopMonitorAppState()
|
|
128
|
+
stopAudioSession()
|
|
129
|
+
stopAudioEngine()
|
|
130
|
+
levelSmoothed = 0
|
|
131
|
+
isActive = false
|
|
132
|
+
isStopping = false
|
|
133
|
+
onVolumeChange?(0)
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
internal func stopAudioEngine() {
|
|
137
|
+
if let audioEngine = audioEngine, audioEngine.isRunning {
|
|
138
|
+
audioEngine.stop()
|
|
139
|
+
}
|
|
140
|
+
audioEngine?.inputNode.removeTap(onBus: 0)
|
|
141
|
+
audioEngine = nil
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
internal func monitorAppState() {
|
|
145
|
+
appStateObserver = AppStateObserver { [weak self] in
|
|
146
|
+
guard let self = self, self.isActive else { return }
|
|
147
|
+
self.handleInternalStopTrigger()
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
internal func stopMonitorAppState () {
|
|
151
|
+
appStateObserver?.stop()
|
|
152
|
+
appStateObserver = nil
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
internal func initAutoStop() {
|
|
114
156
|
autoStopper = AutoStopper(
|
|
115
157
|
silenceThresholdMs: config?.autoFinishRecognitionMs ?? Self.defaultAutoFinishRecognitionMs,
|
|
116
158
|
onProgress: { [weak self] timeLeftMs in
|
|
117
159
|
self?.onAutoFinishProgress?(timeLeftMs)
|
|
118
160
|
},
|
|
119
161
|
onTimeout: { [weak self] in
|
|
120
|
-
self?.
|
|
162
|
+
self?.handleInternalStopTrigger()
|
|
121
163
|
}
|
|
122
164
|
)
|
|
123
|
-
|
|
165
|
+
}
|
|
166
|
+
internal func deinitAutoStop () {
|
|
167
|
+
autoStopper?.stop()
|
|
168
|
+
autoStopper = nil
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
internal func startAudioSession() -> Bool {
|
|
124
172
|
do {
|
|
125
173
|
let audioSession = AVAudioSession.sharedInstance()
|
|
126
174
|
try audioSession.setCategory(.record, mode: .measurement, options: .duckOthers)
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
try audioSession.setAllowHapticsAndSystemSoundsDuringRecording(true)
|
|
130
|
-
}
|
|
175
|
+
// Without this, iOS may suppress haptics while recording.
|
|
176
|
+
try audioSession.setAllowHapticsAndSystemSoundsDuringRecording(true)
|
|
131
177
|
try audioSession.setActive(true, options: .notifyOthersOnDeactivation)
|
|
178
|
+
return true
|
|
132
179
|
} catch {
|
|
133
|
-
onError?("Failed to
|
|
134
|
-
return
|
|
180
|
+
onError?("Failed to activate audio session: \(error.localizedDescription)")
|
|
181
|
+
return false
|
|
135
182
|
}
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
|
|
139
|
-
|
|
140
|
-
guard let recognitionRequest = recognitionRequest, let audioEngine = audioEngine else {
|
|
141
|
-
onError?("Failed to create recognition request or audio engine")
|
|
142
|
-
return
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
recognitionRequest.shouldReportPartialResults = true
|
|
146
|
-
|
|
147
|
-
if let contextualStrings = config?.contextualStrings, !contextualStrings.isEmpty {
|
|
148
|
-
recognitionRequest.contextualStrings = contextualStrings
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
if #available(iOS 16, *) {
|
|
152
|
-
if let addPunctiation = config?.iosAddPunctuation, addPunctiation == false {
|
|
153
|
-
recognitionRequest.addsPunctuation = false
|
|
154
|
-
} else {
|
|
155
|
-
recognitionRequest.addsPunctuation = true
|
|
156
|
-
}
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
let disableRepeatingFilter = config?.disableRepeatingFilter ?? false
|
|
160
|
-
|
|
161
|
-
recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { [weak self] result, error in
|
|
162
|
-
guard let self = self else { return }
|
|
163
|
-
|
|
164
|
-
if let result = result {
|
|
165
|
-
// Only process partial results if not stopping
|
|
166
|
-
if !self.isStopping {
|
|
167
|
-
self.autoStopper?.indicateRecordingActivity(
|
|
168
|
-
from: "partial results",
|
|
169
|
-
addMsToThreshold: nil
|
|
170
|
-
)
|
|
171
|
-
|
|
172
|
-
var transcription = result.bestTranscription.formattedString
|
|
173
|
-
if !transcription.isEmpty {
|
|
174
|
-
if !disableRepeatingFilter {
|
|
175
|
-
transcription = self.repeatingFilter(text: transcription)
|
|
176
|
-
}
|
|
177
|
-
self.onResult?([transcription])
|
|
178
|
-
}
|
|
179
|
-
}
|
|
180
|
-
|
|
181
|
-
// Task completed - cleanup whether natural or manual stop
|
|
182
|
-
if result.isFinal {
|
|
183
|
-
self.cleanup()
|
|
184
|
-
}
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
if let error = error {
|
|
188
|
-
// Only report error if not intentionally stopping
|
|
189
|
-
if !self.isStopping {
|
|
190
|
-
self.onError?("Recognition error: \(error.localizedDescription)")
|
|
191
|
-
}
|
|
192
|
-
self.cleanup()
|
|
193
|
-
}
|
|
194
|
-
}
|
|
195
|
-
|
|
196
|
-
let inputNode = audioEngine.inputNode
|
|
197
|
-
let recordingFormat = inputNode.outputFormat(forBus: 0)
|
|
198
|
-
|
|
199
|
-
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { [weak self] buffer, _ in
|
|
200
|
-
self?.recognitionRequest?.append(buffer)
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
// Observe app going to background
|
|
204
|
-
appStateObserver = AppStateObserver { [weak self] in
|
|
205
|
-
guard let self = self, self.isActive else { return }
|
|
206
|
-
self.stopListening()
|
|
207
|
-
}
|
|
208
|
-
|
|
183
|
+
}
|
|
184
|
+
internal func stopAudioSession () {
|
|
209
185
|
do {
|
|
210
|
-
|
|
211
|
-
try audioEngine.start()
|
|
212
|
-
isActive = true
|
|
213
|
-
|
|
214
|
-
if let hapticStyle = config?.startHapticFeedbackStyle {
|
|
215
|
-
HapticImpact(style: hapticStyle).trigger()
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
autoStopper?.indicateRecordingActivity(
|
|
219
|
-
from: "startListening",
|
|
220
|
-
addMsToThreshold: nil
|
|
221
|
-
)
|
|
222
|
-
onReadyForSpeech?()
|
|
223
|
-
onResult?([])
|
|
186
|
+
try AVAudioSession.sharedInstance().setActive(false)
|
|
224
187
|
} catch {
|
|
225
|
-
|
|
226
|
-
|
|
188
|
+
logger.info("Failed to deactivate audio session: \(error.localizedDescription)")
|
|
189
|
+
return
|
|
227
190
|
}
|
|
228
191
|
}
|
|
229
192
|
|
|
230
|
-
|
|
231
|
-
let
|
|
232
|
-
|
|
233
|
-
autoStopper?.stop()
|
|
234
|
-
autoStopper = nil
|
|
235
|
-
|
|
236
|
-
appStateObserver?.stop()
|
|
237
|
-
appStateObserver = nil
|
|
238
|
-
|
|
239
|
-
if let audioEngine = audioEngine, audioEngine.isRunning {
|
|
240
|
-
audioEngine.stop()
|
|
193
|
+
internal func startHapticFeedback() {
|
|
194
|
+
if let hapticStyle = config?.startHapticFeedbackStyle {
|
|
195
|
+
HapticImpact(style: hapticStyle).trigger()
|
|
241
196
|
}
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
isStopping
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
197
|
+
}
|
|
198
|
+
internal func stopHapticFeedback () {
|
|
199
|
+
if let hapticStyle = config?.stopHapticFeedbackStyle {
|
|
200
|
+
HapticImpact(style: hapticStyle).trigger()
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
internal func trackPartialActivity() {
|
|
205
|
+
if !self.isStopping {
|
|
206
|
+
self.autoStopper?.indicateRecordingActivity(
|
|
207
|
+
from: "partial results",
|
|
208
|
+
addMsToThreshold: nil
|
|
209
|
+
)
|
|
254
210
|
}
|
|
255
211
|
}
|
|
256
212
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
var joiner =
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
213
|
+
internal func repeatingFilter(text: String) -> String {
|
|
214
|
+
var subStrings = text.split { $0.isWhitespace }.map { String($0) }
|
|
215
|
+
var joiner = ""
|
|
216
|
+
// 10 - arbitrary number of last substrings that is still unstable
|
|
217
|
+
// and needs to be filtered. Prev substrings were handled earlier.
|
|
218
|
+
if subStrings.count >= 10 {
|
|
219
|
+
joiner = subStrings.prefix(subStrings.count - 9).joined(separator: " ")
|
|
220
|
+
subStrings = Array(subStrings.suffix(10))
|
|
221
|
+
} else {
|
|
222
|
+
joiner = subStrings.first ?? ""
|
|
223
|
+
}
|
|
224
|
+
for i in subStrings.indices {
|
|
225
|
+
if i == 0 { continue }
|
|
226
|
+
// Always add number-contained strings
|
|
227
|
+
if #available(iOS 16.0, *), subStrings[i].contains(/\d+/) {
|
|
228
|
+
joiner += " \(subStrings[i])"
|
|
229
|
+
continue
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
// Skip consecutive duplicate strings
|
|
233
|
+
if subStrings[i] == subStrings[i-1] { continue }
|
|
234
|
+
joiner += " \(subStrings[i])"
|
|
263
235
|
}
|
|
264
236
|
return joiner
|
|
265
237
|
}
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
import Foundation
|
|
2
|
+
import Speech
|
|
3
|
+
import NitroModules
|
|
4
|
+
import AVFoundation
|
|
5
|
+
|
|
6
|
+
class LegacySpeechRecognizer: HybridRecognizer {
|
|
7
|
+
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
|
|
8
|
+
private var recognitionTask: SFSpeechRecognitionTask?
|
|
9
|
+
|
|
10
|
+
override func dispose() {
|
|
11
|
+
super.dispose()
|
|
12
|
+
self.stopListening()
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
override func stopListening() {
|
|
16
|
+
super.stopListening()
|
|
17
|
+
|
|
18
|
+
// Signal end of audio and request graceful finish
|
|
19
|
+
recognitionRequest?.endAudio()
|
|
20
|
+
recognitionTask?.finish()
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
override func handleInternalStopTrigger() {
|
|
24
|
+
self.stopListening()
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
override func requestMicrophonePermission() {
|
|
28
|
+
AVAudioSession.sharedInstance().requestRecordPermission { [weak self] granted in
|
|
29
|
+
DispatchQueue.main.async {
|
|
30
|
+
guard let self = self else { return }
|
|
31
|
+
|
|
32
|
+
if granted {
|
|
33
|
+
self.startRecognition()
|
|
34
|
+
} else {
|
|
35
|
+
self.onPermissionDenied?()
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
override func startRecognition() {
|
|
42
|
+
guard self.startRecognitionSetup() else { return }
|
|
43
|
+
|
|
44
|
+
let locale = Locale(identifier: config?.locale ?? "en-US")
|
|
45
|
+
guard let speechRecognizer = SFSpeechRecognizer(locale: locale), speechRecognizer.isAvailable
|
|
46
|
+
else {
|
|
47
|
+
onError?("Speech recognizer is not available")
|
|
48
|
+
self.cleanup(from: "startRecognition.speechRecognizer")
|
|
49
|
+
return
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
recognitionRequest = createRecognitionRequest()
|
|
53
|
+
guard let recognitionRequest else {
|
|
54
|
+
onError?("Failed to create recognition request")
|
|
55
|
+
self.cleanup(from: "startRecognition.recognitionRequest")
|
|
56
|
+
return
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
recognitionTask = speechRecognizer.recognitionTask(
|
|
60
|
+
with: recognitionRequest
|
|
61
|
+
) { [weak self] result, error in
|
|
62
|
+
guard let self = self else { return }
|
|
63
|
+
|
|
64
|
+
if let result = result {
|
|
65
|
+
self.trackPartialActivity()
|
|
66
|
+
var transcription = result.bestTranscription.formattedString
|
|
67
|
+
if !transcription.isEmpty {
|
|
68
|
+
let disableRepeatingFilter = config?.disableRepeatingFilter ?? false
|
|
69
|
+
if !disableRepeatingFilter {
|
|
70
|
+
transcription = self.repeatingFilter(text: transcription)
|
|
71
|
+
}
|
|
72
|
+
self.onResult?([transcription])
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// Task completed - cleanup whether natural or manual stop
|
|
76
|
+
if result.isFinal {
|
|
77
|
+
self.cleanup(from: "startRecognition.recognitionTask.final")
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
if let error = error {
|
|
82
|
+
// Only report error if not intentionally stopping
|
|
83
|
+
if !self.isStopping {
|
|
84
|
+
self.onError?("Recognition error: \(error.localizedDescription)")
|
|
85
|
+
}
|
|
86
|
+
self.cleanup(from: "startRecognition.recognitionTask.error")
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
audioEngine = AVAudioEngine()
|
|
91
|
+
guard let audioEngine else {
|
|
92
|
+
onError?("Failed to create audio engine")
|
|
93
|
+
self.cleanup(from: "startRecognition.createAudioEngine")
|
|
94
|
+
return
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
let hardwareFormat = audioEngine.inputNode.outputFormat(forBus: 0)
|
|
98
|
+
audioEngine.inputNode.installTap(onBus: 0, bufferSize: 1024, format: hardwareFormat) { [weak self] buffer, time in
|
|
99
|
+
guard let self else {return}
|
|
100
|
+
let (rms, nextLevelSmoothed) = BufferUtil().calcRmsVolume(levelSmoothed: levelSmoothed, buffer: buffer) ?? (nil, nil)
|
|
101
|
+
|
|
102
|
+
if let nextLevelSmoothed {
|
|
103
|
+
levelSmoothed = nextLevelSmoothed
|
|
104
|
+
let volume = Double(nextLevelSmoothed * 1_000_000).rounded() / 1_000_000
|
|
105
|
+
onVolumeChange?(volume)
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
if let rms, rms > Self.speechRmsThreshold {
|
|
109
|
+
self.autoStopper?.indicateRecordingActivity(
|
|
110
|
+
from: "rms change",
|
|
111
|
+
addMsToThreshold: nil
|
|
112
|
+
)
|
|
113
|
+
}
|
|
114
|
+
self.recognitionRequest?.append(buffer)
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
do {
|
|
118
|
+
audioEngine.prepare()
|
|
119
|
+
try audioEngine.start()
|
|
120
|
+
} catch {
|
|
121
|
+
onError?("Failed to start audio engine: \(error.localizedDescription)")
|
|
122
|
+
self.cleanup(from: "startRecognition.startAudioEngine")
|
|
123
|
+
return
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
self.startRecognitionFeedback()
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
override func cleanup(from: String) {
|
|
130
|
+
let wasActive = isActive
|
|
131
|
+
|
|
132
|
+
super.cleanup(from: "overrider.\(from)")
|
|
133
|
+
|
|
134
|
+
recognitionRequest = nil
|
|
135
|
+
recognitionTask = nil
|
|
136
|
+
|
|
137
|
+
if wasActive {
|
|
138
|
+
onRecordingStopped?()
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
private func createRecognitionRequest() -> SFSpeechAudioBufferRecognitionRequest {
|
|
143
|
+
let request = SFSpeechAudioBufferRecognitionRequest()
|
|
144
|
+
|
|
145
|
+
request.shouldReportPartialResults = true
|
|
146
|
+
|
|
147
|
+
if let contextualStrings = config?.contextualStrings, !contextualStrings.isEmpty {
|
|
148
|
+
request.contextualStrings = contextualStrings
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
if #available(iOS 16, *) {
|
|
152
|
+
if let addPunctiation = config?.iosAddPunctuation, addPunctiation == false {
|
|
153
|
+
request.addsPunctuation = false
|
|
154
|
+
} else {
|
|
155
|
+
request.addsPunctuation = true
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
return request
|
|
160
|
+
}
|
|
161
|
+
}
|
package/lib/commonjs/index.js
CHANGED
|
@@ -3,12 +3,10 @@
|
|
|
3
3
|
Object.defineProperty(exports, "__esModule", {
|
|
4
4
|
value: true
|
|
5
5
|
});
|
|
6
|
-
exports.useRecognizer = exports.RecognizerSession = exports.RecognizerRef = void 0;
|
|
6
|
+
exports.useVoiceInputVolume = exports.useRecognizer = exports.unsafe_onVolumeChange = exports.RecognizerSession = exports.RecognizerRef = void 0;
|
|
7
7
|
var _react = _interopRequireDefault(require("react"));
|
|
8
8
|
var _reactNativeNitroModules = require("react-native-nitro-modules");
|
|
9
9
|
function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
|
|
10
|
-
/* eslint-disable react-hooks/exhaustive-deps */
|
|
11
|
-
|
|
12
10
|
const NitroSpeech = _reactNativeNitroModules.NitroModules.createHybridObject('NitroSpeech');
|
|
13
11
|
|
|
14
12
|
/**
|
|
@@ -27,6 +25,46 @@ const recognizerAddAutoFinishTime = additionalTimeMs => {
|
|
|
27
25
|
const recognizerUpdateAutoFinishTime = (newTimeMs, withRefresh) => {
|
|
28
26
|
RecognizerSession.updateAutoFinishTime(newTimeMs, withRefresh);
|
|
29
27
|
};
|
|
28
|
+
const recognizerGetIsActive = () => {
|
|
29
|
+
return RecognizerSession.getIsActive();
|
|
30
|
+
};
|
|
31
|
+
const subscribers = new Set();
|
|
32
|
+
let currentVolume = 0;
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Subscription to the voice input volume changes
|
|
36
|
+
*
|
|
37
|
+
* Updates with arbitrary frequency (many times per second) while audio recording is active.
|
|
38
|
+
*
|
|
39
|
+
* @returns The current voice input volume normalized to a range of 0 to 1.
|
|
40
|
+
*/
|
|
41
|
+
const useVoiceInputVolume = () => {
|
|
42
|
+
return _react.default.useSyncExternalStore(subscriber => {
|
|
43
|
+
subscribers.add(subscriber);
|
|
44
|
+
return () => subscribers.delete(subscriber);
|
|
45
|
+
}, () => currentVolume);
|
|
46
|
+
};
|
|
47
|
+
exports.useVoiceInputVolume = useVoiceInputVolume;
|
|
48
|
+
const handleVolumeChange = normVolume => {
|
|
49
|
+
if (normVolume === currentVolume) return;
|
|
50
|
+
currentVolume = normVolume;
|
|
51
|
+
subscribers.forEach(subscriber => subscriber?.(normVolume));
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Unsafe access to default Recognizer Session's volume change handler.
|
|
56
|
+
*
|
|
57
|
+
* In case you use static Recognizer Session:
|
|
58
|
+
*
|
|
59
|
+
* ```typescript
|
|
60
|
+
* import { unsafe_onVolumeChange } from '@gmessier/nitro-speech'
|
|
61
|
+
*
|
|
62
|
+
* RecognizerSession.onVolumeChange = unsafe_onVolumeChange
|
|
63
|
+
* ... // do something
|
|
64
|
+
* RecognizerSession.startListening({ locale: 'en-US' })
|
|
65
|
+
* ```
|
|
66
|
+
*/
|
|
67
|
+
const unsafe_onVolumeChange = exports.unsafe_onVolumeChange = handleVolumeChange;
|
|
30
68
|
|
|
31
69
|
/**
|
|
32
70
|
* Safe, lifecycle-aware hook to use the recognizer.
|
|
@@ -43,6 +81,13 @@ const recognizerUpdateAutoFinishTime = (newTimeMs, withRefresh) => {
|
|
|
43
81
|
*/
|
|
44
82
|
const useRecognizer = (callbacks, destroyDeps = []) => {
|
|
45
83
|
_react.default.useEffect(() => {
|
|
84
|
+
if (callbacks.onVolumeChange) {
|
|
85
|
+
RecognizerSession.onVolumeChange = normVolume => {
|
|
86
|
+
callbacks.onVolumeChange?.(normVolume);
|
|
87
|
+
};
|
|
88
|
+
} else {
|
|
89
|
+
RecognizerSession.onVolumeChange = handleVolumeChange;
|
|
90
|
+
}
|
|
46
91
|
RecognizerSession.onReadyForSpeech = () => {
|
|
47
92
|
callbacks.onReadyForSpeech?.();
|
|
48
93
|
};
|
|
@@ -68,18 +113,21 @@ const useRecognizer = (callbacks, destroyDeps = []) => {
|
|
|
68
113
|
RecognizerSession.onAutoFinishProgress = undefined;
|
|
69
114
|
RecognizerSession.onError = undefined;
|
|
70
115
|
RecognizerSession.onPermissionDenied = undefined;
|
|
116
|
+
RecognizerSession.onVolumeChange = undefined;
|
|
71
117
|
};
|
|
72
118
|
}, [callbacks]);
|
|
73
119
|
_react.default.useEffect(() => {
|
|
74
120
|
return () => {
|
|
75
121
|
RecognizerSession.stopListening();
|
|
76
122
|
};
|
|
123
|
+
// eslint-disable-next-line react-hooks/exhaustive-deps
|
|
77
124
|
}, [...destroyDeps]);
|
|
78
125
|
return {
|
|
79
126
|
startListening: recognizerStartListening,
|
|
80
127
|
stopListening: recognizerStopListening,
|
|
81
128
|
addAutoFinishTime: recognizerAddAutoFinishTime,
|
|
82
|
-
updateAutoFinishTime: recognizerUpdateAutoFinishTime
|
|
129
|
+
updateAutoFinishTime: recognizerUpdateAutoFinishTime,
|
|
130
|
+
getIsActive: recognizerGetIsActive
|
|
83
131
|
};
|
|
84
132
|
};
|
|
85
133
|
|
|
@@ -91,6 +139,7 @@ const RecognizerRef = exports.RecognizerRef = {
|
|
|
91
139
|
startListening: recognizerStartListening,
|
|
92
140
|
stopListening: recognizerStopListening,
|
|
93
141
|
addAutoFinishTime: recognizerAddAutoFinishTime,
|
|
94
|
-
updateAutoFinishTime: recognizerUpdateAutoFinishTime
|
|
142
|
+
updateAutoFinishTime: recognizerUpdateAutoFinishTime,
|
|
143
|
+
getIsActive: recognizerGetIsActive
|
|
95
144
|
};
|
|
96
145
|
//# sourceMappingURL=index.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"names":["_react","_interopRequireDefault","require","_reactNativeNitroModules","e","__esModule","default","NitroSpeech","NitroModules","createHybridObject","RecognizerSession","exports","recognizer","recognizerStartListening","params","startListening","recognizerStopListening","stopListening","recognizerAddAutoFinishTime","additionalTimeMs","addAutoFinishTime","recognizerUpdateAutoFinishTime","newTimeMs","withRefresh","updateAutoFinishTime","useRecognizer","callbacks","destroyDeps","
|
|
1
|
+
{"version":3,"names":["_react","_interopRequireDefault","require","_reactNativeNitroModules","e","__esModule","default","NitroSpeech","NitroModules","createHybridObject","RecognizerSession","exports","recognizer","recognizerStartListening","params","startListening","recognizerStopListening","stopListening","recognizerAddAutoFinishTime","additionalTimeMs","addAutoFinishTime","recognizerUpdateAutoFinishTime","newTimeMs","withRefresh","updateAutoFinishTime","recognizerGetIsActive","getIsActive","subscribers","Set","currentVolume","useVoiceInputVolume","React","useSyncExternalStore","subscriber","add","delete","handleVolumeChange","normVolume","forEach","unsafe_onVolumeChange","useRecognizer","callbacks","destroyDeps","useEffect","onVolumeChange","onReadyForSpeech","onRecordingStopped","onResult","resultBatches","onAutoFinishProgress","timeLeftMs","onError","message","onPermissionDenied","undefined","RecognizerRef"],"sourceRoot":"../../src","sources":["index.ts"],"mappings":";;;;;;AAAA,IAAAA,MAAA,GAAAC,sBAAA,CAAAC,OAAA;AACA,IAAAC,wBAAA,GAAAD,OAAA;AAAyD,SAAAD,uBAAAG,CAAA,WAAAA,CAAA,IAAAA,CAAA,CAAAC,UAAA,GAAAD,CAAA,KAAAE,OAAA,EAAAF,CAAA;AAOzD,MAAMG,WAAW,GACfC,qCAAY,CAACC,kBAAkB,CAAkB,aAAa,CAAC;;AAEjE;AACA;AACA;AACO,MAAMC,iBAAiB,GAAAC,OAAA,CAAAD,iBAAA,GAAGH,WAAW,CAACK,UAAU;AAsBvD,MAAMC,wBAAwB,GAAIC,MAA0B,IAAK;EAC/DJ,iBAAiB,CAACK,cAAc,CAACD,MAAM,CAAC;AAC1C,CAAC;AAED,MAAME,uBAAuB,GAAGA,CAAA,KAAM;EACpCN,iBAAiB,CAACO,aAAa,CAAC,CAAC;AACnC,CAAC;AAED,MAAMC,2BAA2B,GAAIC,gBAAyB,IAAK;EACjET,iBAAiB,CAACU,iBAAiB,CAACD,gBAAgB,CAAC;AACvD,CAAC;AAED,MAAME,8BAA8B,GAAGA,CACrCC,SAAiB,EACjBC,WAAqB,KAClB;EACHb,iBAAiB,CAACc,oBAAoB,CAACF,SAAS,EAAEC,WAAW,CAAC;AAChE,CAAC;AAED,MAAME,qBAAqB,GAAGA,CAAA,KAAM;EAClC,OAAOf,iBAAiB,CAACgB,WAAW,CAAC,CAAC;AACxC,CAAC;AAED,MAAMC,WAAW,GAAG,IAAIC,GAAG,CAAmC,CAAC;AAC/D,IAAIC,aAAa,GAAG,CAAC;;AAErB;AACA;AACA;AACA;AACA;AACA;AACA;AACO,MAAMC,mBAAmB,GAAGA,CAAA,KAAM;EACvC,OAAOC,cAAK,CAACC,oBAAoB,CAC9BC,UAAU,IAAK;IACdN,WAAW,CAACO,GAAG,CAACD,UAAU,CAAC;IAC3B,OAAO,MAAMN,WAAW,CAACQ,MAAM,CAACF,UAAU,CAAC;EAC7C,CAAC,EACD,MAAMJ,aACR,CAAC;AACH,CAAC;AAAAlB,OAAA,CAAAmB,mBAAA,GAAAA,mBAAA;AAED,MAAMM,kBAAoD,GAAIC,UAAU,IAAK;EAC3E,IAAIA,UAAU,KAAKR,aAAa,EAAE;EAClCA,aAAa,GAAGQ,UAAU;EAC1BV,WAAW,CAACW,OAAO,CAAEL,UAAU,IAAKA,UAAU,GAAGI,UAAU,CAAC,CAAC;AAC/D,CAAC;;AAED;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACO,MAAME,qBAAqB,GAAA5B,OAAA,CAAA4B,qBAAA,GAAGH,kBAAkB;;AAEvD;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACO,MAAMI,aAAa,GAAGA,CAC3BC,SAA8B,EAC9BC,WAAiC,GAAG,EAAE,KACf;EACvBX,cAAK,CAACY,SAAS,CAAC,MAAM;IACpB,IAAIF,SAAS,CAACG,cAAc,EAAE;MAC5BlC,iBAAiB,CAACkC,cAAc,GAAIP,UAAkB,IAAK;QACzDI,SAAS,CAACG,cAAc,GAAGP,UAAU,CAAC;MACxC,CAAC;IACH,CAAC,MAAM;MACL3B,iBAAiB,CAACkC,cAAc,GAAGR,kBAAkB;IACvD;IACA1B,iBAAiB,CAACmC,gBAAgB,GAAG,MAAM;MACzCJ,SAAS,CAACI,gBAAgB,GAAG,CAAC;IAChC,CAAC;IACDnC,iBAAiB,CAACoC,kBAAkB,GAAG,MAAM;MAC3CL,SAAS,CAACK,kBAAkB,GAAG,CAAC;IAClC,CAAC;IACDpC,iBAAiB,CAACqC,QAAQ,GAAIC,aAAuB,IAAK;MACxDP,SAAS,CAACM,QAAQ,GAAGC,aAAa,CAAC;IACrC,CAAC;IACDtC,iBAAiB,CAACuC,oBAAoB,GAAIC,UAAkB,IAAK;MAC/DT,SAAS,CAACQ,oBAAoB,GAAGC,UAAU,CAAC;IAC9C,CAAC;IACDxC,iBAAiB,CAACyC,OAAO,GAAIC,OAAe,IAAK;MAC/CX,SAAS,CAACU,OAAO,GAAGC,OAAO,CAAC;IAC9B,CAAC;IACD1C,iBAAiB,CAAC2C,kBAAkB,GAAG,MAAM;MAC3CZ,SAAS,CAACY,kBAAkB,GAAG,CAAC;IAClC,CAAC;IACD,OAAO,MAAM;MACX3C,iBAAiB,CAACmC,gBAAgB,GAAGS,SAAS;MAC9C5C,iBAAiB,CAACoC,kBAAkB,GAAGQ,SAAS;MAChD5C,iBAAiB,CAACqC,QAAQ,GAAGO,SAAS;MACtC5C,iBAAiB,CAACuC,oBAAoB,GAAGK,SAAS;MAClD5C,iBAAiB,CAACyC,OAAO,GAAGG,SAAS;MACrC5C,iBAAiB,CAAC2C,kBAAkB,GAAGC,SAAS;MAChD5C,iBAAiB,CAACkC,cAAc,GAAGU,SAAS;IAC9C,CAAC;EACH,CAAC,EAAE,CAACb,SAAS,CAAC,CAAC;EAEfV,cAAK,CAACY,SAAS,CAAC,MAAM;IACpB,OAAO,MAAM;MACXjC,iBAAiB,CAACO,aAAa,CAAC,CAAC;IACnC,CAAC;IACD;EACF,CAAC,EAAE,CAAC,GAAGyB,WAAW,CAAC,CAAC;EAEpB,OAAO;IACL3B,cAAc,EAAEF,wBAAwB;IACxCI,aAAa,EAAED,uBAAuB;IACtCI,iBAAiB,EAAEF,2BAA2B;IAC9CM,oBAAoB,EAAEH,8BAA8B;IACpDK,WAAW,EAAED;EACf,CAAC;AACH,CAAC;;AAED;AACA;AACA;AAFAd,OAAA,CAAA6B,aAAA,GAAAA,aAAA;AAGO,MAAMe,aAAiC,GAAA5C,OAAA,CAAA4C,aAAA,GAAG;EAC/CxC,cAAc,EAAEF,wBAAwB;EACxCI,aAAa,EAAED,uBAAuB;EACtCI,iBAAiB,EAAEF,2BAA2B;EAC9CM,oBAAoB,EAAEH,8BAA8B;EACpDK,WAAW,EAAED;AACf,CAAC","ignoreList":[]}
|