@gmessier/nitro-speech 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +165 -148
- package/android/build.gradle +0 -1
- package/android/src/main/cpp/cpp-adapter.cpp +5 -1
- package/android/src/main/java/com/margelo/nitro/nitrospeech/HybridNitroSpeech.kt +2 -0
- package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/AutoStopper.kt +80 -16
- package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/HybridRecognizer.kt +93 -20
- package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/RecognitionListenerSession.kt +27 -15
- package/ios/{BufferUtil.swift → Audio/AudioBufferConverter.swift} +3 -34
- package/ios/Audio/AudioLevelTracker.swift +66 -0
- package/ios/Coordinator.swift +105 -0
- package/ios/Engines/AnalyzerEngine.swift +241 -0
- package/ios/Engines/DictationRuntime.swift +67 -0
- package/ios/Engines/RecognizerEngine.swift +312 -0
- package/ios/Engines/SFSpeechEngine.swift +119 -0
- package/ios/Engines/SpeechRuntime.swift +58 -0
- package/ios/Engines/TranscriberRuntimeProtocol.swift +21 -0
- package/ios/HybridNitroSpeech.swift +1 -10
- package/ios/HybridRecognizer.swift +135 -192
- package/ios/LocaleManager.swift +73 -0
- package/ios/{AppStateObserver.swift → Shared/AppStateObserver.swift} +1 -2
- package/ios/Shared/AutoStopper.swift +147 -0
- package/ios/Shared/HapticImpact.swift +24 -0
- package/ios/Shared/Log.swift +41 -0
- package/ios/Shared/Permissions.swift +59 -0
- package/ios/Shared/Utils.swift +58 -0
- package/lib/NitroSpeech.d.ts +2 -0
- package/lib/NitroSpeech.js +2 -0
- package/lib/Recognizer/RecognizerRef.d.ts +5 -0
- package/lib/Recognizer/RecognizerRef.js +13 -0
- package/lib/Recognizer/SpeechRecognizer.d.ts +8 -0
- package/lib/Recognizer/SpeechRecognizer.js +9 -0
- package/lib/Recognizer/methods.d.ts +8 -0
- package/lib/Recognizer/methods.js +29 -0
- package/lib/Recognizer/types.d.ts +6 -0
- package/lib/Recognizer/types.js +1 -0
- package/lib/Recognizer/useRecognizer.d.ts +16 -0
- package/lib/Recognizer/useRecognizer.js +71 -0
- package/lib/Recognizer/useVoiceInputVolume.d.ts +25 -0
- package/lib/Recognizer/useVoiceInputVolume.js +52 -0
- package/lib/index.d.ts +6 -0
- package/lib/index.js +6 -0
- package/lib/specs/NitroSpeech.nitro.d.ts +8 -0
- package/lib/specs/NitroSpeech.nitro.js +1 -0
- package/lib/specs/Recognizer.nitro.d.ts +95 -0
- package/lib/specs/Recognizer.nitro.js +1 -0
- package/lib/specs/SpeechRecognitionConfig.d.ts +162 -0
- package/lib/specs/SpeechRecognitionConfig.js +1 -0
- package/lib/specs/VolumeChangeEvent.d.ts +31 -0
- package/lib/specs/VolumeChangeEvent.js +1 -0
- package/nitro.json +2 -6
- package/nitrogen/generated/android/NitroSpeech+autolinking.cmake +2 -2
- package/nitrogen/generated/android/NitroSpeechOnLoad.cpp +5 -3
- package/nitrogen/generated/android/c++/JFunc_void_VolumeChangeEvent.hpp +78 -0
- package/nitrogen/generated/android/c++/JFunc_void_std__vector_std__string_.hpp +14 -14
- package/nitrogen/generated/android/c++/JHybridRecognizerSpec.cpp +68 -19
- package/nitrogen/generated/android/c++/JHybridRecognizerSpec.hpp +7 -4
- package/nitrogen/generated/android/c++/JIosPreset.hpp +58 -0
- package/nitrogen/generated/android/c++/JMutableSpeechRecognitionConfig.hpp +79 -0
- package/nitrogen/generated/android/c++/{JSpeechToTextParams.hpp → JSpeechRecognitionConfig.hpp} +48 -30
- package/nitrogen/generated/android/c++/JVolumeChangeEvent.hpp +65 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/Func_void_VolumeChangeEvent.kt +80 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/HybridRecognizerSpec.kt +18 -5
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/IosPreset.kt +23 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/MutableSpeechRecognitionConfig.kt +76 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/SpeechRecognitionConfig.kt +121 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/VolumeChangeEvent.kt +61 -0
- package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Bridge.cpp +46 -30
- package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Bridge.hpp +203 -70
- package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Umbrella.hpp +13 -3
- package/nitrogen/generated/ios/NitroSpeechAutolinking.swift +2 -2
- package/nitrogen/generated/ios/c++/HybridRecognizerSpecSwift.hpp +41 -9
- package/nitrogen/generated/ios/swift/Func_void_VolumeChangeEvent.swift +46 -0
- package/nitrogen/generated/ios/swift/Func_void_std__exception_ptr.swift +46 -0
- package/nitrogen/generated/ios/swift/HybridRecognizerSpec.swift +6 -3
- package/nitrogen/generated/ios/swift/HybridRecognizerSpec_cxx.swift +66 -18
- package/nitrogen/generated/ios/swift/IosPreset.swift +40 -0
- package/nitrogen/generated/ios/swift/MutableSpeechRecognitionConfig.swift +118 -0
- package/nitrogen/generated/ios/swift/{SpeechToTextParams.swift → SpeechRecognitionConfig.swift} +108 -43
- package/nitrogen/generated/ios/swift/VolumeChangeEvent.swift +52 -0
- package/nitrogen/generated/shared/c++/HybridRecognizerSpec.cpp +4 -1
- package/nitrogen/generated/shared/c++/HybridRecognizerSpec.hpp +17 -7
- package/nitrogen/generated/shared/c++/IosPreset.hpp +76 -0
- package/nitrogen/generated/shared/c++/MutableSpeechRecognitionConfig.hpp +105 -0
- package/nitrogen/generated/shared/c++/{SpeechToTextParams.hpp → SpeechRecognitionConfig.hpp} +39 -20
- package/nitrogen/generated/shared/c++/VolumeChangeEvent.hpp +91 -0
- package/package.json +15 -16
- package/src/NitroSpeech.ts +5 -0
- package/src/Recognizer/RecognizerRef.ts +23 -0
- package/src/Recognizer/SpeechRecognizer.ts +10 -0
- package/src/Recognizer/methods.ts +40 -0
- package/src/Recognizer/types.ts +33 -0
- package/src/Recognizer/useRecognizer.ts +85 -0
- package/src/Recognizer/useVoiceInputVolume.ts +65 -0
- package/src/index.ts +6 -182
- package/src/specs/NitroSpeech.nitro.ts +2 -163
- package/src/specs/Recognizer.nitro.ts +110 -0
- package/src/specs/SpeechRecognitionConfig.ts +167 -0
- package/src/specs/VolumeChangeEvent.ts +31 -0
- package/android/proguard-rules.pro +0 -1
- package/ios/AnylyzerTranscriber.swift +0 -331
- package/ios/AutoStopper.swift +0 -69
- package/ios/HapticImpact.swift +0 -32
- package/ios/LegacySpeechRecognizer.swift +0 -161
- package/lib/commonjs/index.js +0 -145
- package/lib/commonjs/index.js.map +0 -1
- package/lib/commonjs/package.json +0 -1
- package/lib/commonjs/specs/NitroSpeech.nitro.js +0 -6
- package/lib/commonjs/specs/NitroSpeech.nitro.js.map +0 -1
- package/lib/module/index.js +0 -138
- package/lib/module/index.js.map +0 -1
- package/lib/module/package.json +0 -1
- package/lib/module/specs/NitroSpeech.nitro.js +0 -4
- package/lib/module/specs/NitroSpeech.nitro.js.map +0 -1
- package/lib/tsconfig.tsbuildinfo +0 -1
- package/lib/typescript/index.d.ts +0 -50
- package/lib/typescript/index.d.ts.map +0 -1
- package/lib/typescript/specs/NitroSpeech.nitro.d.ts +0 -162
- package/lib/typescript/specs/NitroSpeech.nitro.d.ts.map +0 -1
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/SpeechToTextParams.kt +0 -68
|
@@ -1,331 +0,0 @@
|
|
|
1
|
-
import Foundation
|
|
2
|
-
import Speech
|
|
3
|
-
import NitroModules
|
|
4
|
-
import os.log
|
|
5
|
-
import AVFoundation
|
|
6
|
-
|
|
7
|
-
@available(iOS 26.0, *)
|
|
8
|
-
class AnalyzerTranscriber: HybridRecognizer {
|
|
9
|
-
private var inputSequence: AsyncStream<AnalyzerInput>?
|
|
10
|
-
private var inputBuilder: AsyncStream<AnalyzerInput>.Continuation?
|
|
11
|
-
private var outputContinuation: AsyncStream<AVAudioPCMBuffer>.Continuation?
|
|
12
|
-
private var analyzer: SpeechAnalyzer?
|
|
13
|
-
private var speechTranscriber: SpeechTranscriber?
|
|
14
|
-
private var dictationTranscriber: DictationTranscriber?
|
|
15
|
-
private var audioProducerTask: Task<Void, Never>?
|
|
16
|
-
private var recognizerTask: Task<(), Error>?
|
|
17
|
-
private var lastBatchStartTime: Float64? = nil
|
|
18
|
-
private var resultBatches: [String] = []
|
|
19
|
-
|
|
20
|
-
override func dispose() {
|
|
21
|
-
super.dispose()
|
|
22
|
-
self.stopListening()
|
|
23
|
-
self.deallocAssets()
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
override func stopListening() {
|
|
27
|
-
super.stopListening()
|
|
28
|
-
inputBuilder?.finish()
|
|
29
|
-
|
|
30
|
-
Task { [weak self] in
|
|
31
|
-
guard let self = self else { return }
|
|
32
|
-
|
|
33
|
-
do {
|
|
34
|
-
try await self.analyzer?.finalizeAndFinishThroughEndOfInput()
|
|
35
|
-
} catch {
|
|
36
|
-
self.onError?("Analyzer finalize failed during stop: \(error.localizedDescription)")
|
|
37
|
-
await self.analyzer?.cancelAndFinishNow()
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
self.cleanup(from: "stopListening")
|
|
41
|
-
}
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
override func handleInternalStopTrigger() {
|
|
45
|
-
self.stopListening()
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
override func requestMicrophonePermission() {
|
|
49
|
-
AVAudioApplication.requestRecordPermission { [weak self] granted in
|
|
50
|
-
Task { @MainActor in
|
|
51
|
-
guard let self = self else { return }
|
|
52
|
-
|
|
53
|
-
if granted {
|
|
54
|
-
await self.startRecognition()
|
|
55
|
-
} else {
|
|
56
|
-
self.onPermissionDenied?()
|
|
57
|
-
}
|
|
58
|
-
}
|
|
59
|
-
}
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
override func startRecognition() async {
|
|
63
|
-
guard self.startRecognitionSetup() else { return }
|
|
64
|
-
|
|
65
|
-
// 1. Modules
|
|
66
|
-
let supportedLocale = await SpeechTranscriber.supportedLocale(
|
|
67
|
-
equivalentTo: Locale(identifier: config?.locale ?? "en-US")
|
|
68
|
-
)
|
|
69
|
-
if supportedLocale == nil {
|
|
70
|
-
onError?("Unsupported locale name: en-US is used instead as default")
|
|
71
|
-
}
|
|
72
|
-
let locale = supportedLocale ?? Locale(identifier: "en-US")
|
|
73
|
-
var speechTranscriptionOptions: Set<SpeechTranscriber.TranscriptionOption> = []
|
|
74
|
-
if config?.maskOffensiveWords == true {
|
|
75
|
-
speechTranscriptionOptions.insert(.etiquetteReplacements)
|
|
76
|
-
}
|
|
77
|
-
speechTranscriber = SpeechTranscriber(
|
|
78
|
-
locale: locale,
|
|
79
|
-
transcriptionOptions: speechTranscriptionOptions,
|
|
80
|
-
reportingOptions: [.volatileResults, .fastResults],
|
|
81
|
-
attributeOptions: [.audioTimeRange]
|
|
82
|
-
)
|
|
83
|
-
if speechTranscriber == nil || !SpeechTranscriber.isAvailable {
|
|
84
|
-
// Punctuation is true by default
|
|
85
|
-
var dictationTranscriptionOptions: Set<DictationTranscriber.TranscriptionOption> = [
|
|
86
|
-
.punctuation
|
|
87
|
-
]
|
|
88
|
-
if config?.maskOffensiveWords == true {
|
|
89
|
-
dictationTranscriptionOptions.insert(.etiquetteReplacements)
|
|
90
|
-
}
|
|
91
|
-
if config?.iosAddPunctuation == false {
|
|
92
|
-
dictationTranscriptionOptions.remove(.punctuation)
|
|
93
|
-
}
|
|
94
|
-
dictationTranscriber = DictationTranscriber(
|
|
95
|
-
locale: locale,
|
|
96
|
-
contentHints: [.shortForm],
|
|
97
|
-
transcriptionOptions: dictationTranscriptionOptions,
|
|
98
|
-
reportingOptions: [.frequentFinalization, .volatileResults],
|
|
99
|
-
attributeOptions: [.audioTimeRange]
|
|
100
|
-
)
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
var modules: [any SpeechModule]
|
|
104
|
-
if let speechTranscriber {
|
|
105
|
-
modules = [speechTranscriber]
|
|
106
|
-
logger.info("[SpeechTranscriber] Activated")
|
|
107
|
-
} else if let dictationTranscriber {
|
|
108
|
-
modules = [dictationTranscriber]
|
|
109
|
-
logger.info("[DictationTranscriber] Activated")
|
|
110
|
-
} else {
|
|
111
|
-
onError?("Failed to create Transcriber")
|
|
112
|
-
self.cleanup(from: "startRecognition.Transcriber")
|
|
113
|
-
return
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
// 2. Assets management
|
|
117
|
-
guard await ensureAssetInventory(modules: modules) else {
|
|
118
|
-
onError?("Speech assets installation failed")
|
|
119
|
-
self.cleanup(from: "startRecognition.ensureAssetInventory")
|
|
120
|
-
return
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
// 3. Input sequence
|
|
124
|
-
(inputSequence, inputBuilder) = AsyncStream.makeStream(of: AnalyzerInput.self)
|
|
125
|
-
|
|
126
|
-
// 4. Analyzer
|
|
127
|
-
guard let audioFormat = await SpeechAnalyzer.bestAvailableAudioFormat(
|
|
128
|
-
compatibleWith: modules
|
|
129
|
-
) else {
|
|
130
|
-
onError?("Could not find SpeechAnalyzer audio format")
|
|
131
|
-
self.cleanup(from: "startRecognition.SpeechAnalyzer.bestAvailableAudioFormat")
|
|
132
|
-
return
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
analyzer = SpeechAnalyzer(modules: modules)
|
|
136
|
-
|
|
137
|
-
// 5. Supply audio
|
|
138
|
-
audioProducerTask = Task {
|
|
139
|
-
do {
|
|
140
|
-
audioEngine = AVAudioEngine()
|
|
141
|
-
guard let audioEngine = audioEngine else {
|
|
142
|
-
throw NSError()
|
|
143
|
-
}
|
|
144
|
-
let hardwareFormat = audioEngine.inputNode.outputFormat(forBus: 0)
|
|
145
|
-
audioEngine.inputNode.installTap(onBus: 0, bufferSize: 1024, format: hardwareFormat) { [weak self] buffer, time in
|
|
146
|
-
guard let self else {return}
|
|
147
|
-
let (rms, nextLevelSmoothed) = BufferUtil().calcRmsVolume(levelSmoothed: levelSmoothed, buffer: buffer) ?? (nil, nil)
|
|
148
|
-
|
|
149
|
-
if let nextLevelSmoothed {
|
|
150
|
-
levelSmoothed = nextLevelSmoothed
|
|
151
|
-
let volume = Double(nextLevelSmoothed * 1_000_000).rounded() / 1_000_000
|
|
152
|
-
onVolumeChange?(volume)
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
if let rms, rms > Self.speechRmsThreshold {
|
|
156
|
-
self.autoStopper?.indicateRecordingActivity(
|
|
157
|
-
from: "rms change",
|
|
158
|
-
addMsToThreshold: nil
|
|
159
|
-
)
|
|
160
|
-
}
|
|
161
|
-
outputContinuation?.yield(buffer)
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
audioEngine.prepare()
|
|
165
|
-
try audioEngine.start()
|
|
166
|
-
|
|
167
|
-
let stream = AsyncStream(AVAudioPCMBuffer.self, bufferingPolicy: .unbounded) { continuation in
|
|
168
|
-
outputContinuation = continuation
|
|
169
|
-
}
|
|
170
|
-
|
|
171
|
-
let needsConversion =
|
|
172
|
-
hardwareFormat.commonFormat != audioFormat.commonFormat ||
|
|
173
|
-
hardwareFormat.sampleRate != audioFormat.sampleRate ||
|
|
174
|
-
hardwareFormat.channelCount != audioFormat.channelCount
|
|
175
|
-
guard let converter = AVAudioConverter(from: hardwareFormat, to: audioFormat)
|
|
176
|
-
else {
|
|
177
|
-
throw NSError()
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
for await pcmBuffer in stream {
|
|
181
|
-
if Task.isCancelled { break }
|
|
182
|
-
|
|
183
|
-
let bufferForAnalyzer: AVAudioPCMBuffer
|
|
184
|
-
if needsConversion {
|
|
185
|
-
// Skip analyzing for empty buffers and
|
|
186
|
-
// Throw error if buffers are inconvertable
|
|
187
|
-
guard let convertedBuffer = try BufferUtil().convertBuffer(
|
|
188
|
-
converter: converter,
|
|
189
|
-
audioFormat: audioFormat,
|
|
190
|
-
pcmBuffer: pcmBuffer
|
|
191
|
-
) else {
|
|
192
|
-
continue
|
|
193
|
-
}
|
|
194
|
-
bufferForAnalyzer = convertedBuffer
|
|
195
|
-
} else {
|
|
196
|
-
bufferForAnalyzer = pcmBuffer
|
|
197
|
-
}
|
|
198
|
-
|
|
199
|
-
let input = AnalyzerInput(buffer: bufferForAnalyzer)
|
|
200
|
-
inputBuilder?.yield(input)
|
|
201
|
-
}
|
|
202
|
-
} catch {
|
|
203
|
-
if Task.isCancelled || self.isStopping {
|
|
204
|
-
return
|
|
205
|
-
}
|
|
206
|
-
onError?("Audio producer failed while capturing microphone input: \(error.localizedDescription)")
|
|
207
|
-
self.cleanup(from: "startRecognition.audioProducerTask")
|
|
208
|
-
return
|
|
209
|
-
}
|
|
210
|
-
}
|
|
211
|
-
|
|
212
|
-
// 7. Handle the results
|
|
213
|
-
recognizerTask = Task {
|
|
214
|
-
do {
|
|
215
|
-
if let speechTranscriber {
|
|
216
|
-
for try await result in speechTranscriber.results {
|
|
217
|
-
self.trackPartialActivity()
|
|
218
|
-
self.handleBatches(
|
|
219
|
-
attrString: result.text,
|
|
220
|
-
rangeStart: result.range.start,
|
|
221
|
-
isFinal: result.isFinal
|
|
222
|
-
)
|
|
223
|
-
}
|
|
224
|
-
} else if let dictationTranscriber {
|
|
225
|
-
for try await result in dictationTranscriber.results {
|
|
226
|
-
self.trackPartialActivity()
|
|
227
|
-
self.handleBatches(
|
|
228
|
-
attrString: result.text,
|
|
229
|
-
rangeStart: result.range.start,
|
|
230
|
-
isFinal: result.isFinal
|
|
231
|
-
)
|
|
232
|
-
}
|
|
233
|
-
}
|
|
234
|
-
} catch {
|
|
235
|
-
if self.isStopping || error is CancellationError {
|
|
236
|
-
return
|
|
237
|
-
}
|
|
238
|
-
onError?("Transcriber results stream failed: \(error.localizedDescription)")
|
|
239
|
-
self.cleanup(from: "startRecognition.recognizerTask")
|
|
240
|
-
}
|
|
241
|
-
}
|
|
242
|
-
|
|
243
|
-
do {
|
|
244
|
-
if let inputSequence, let analyzer {
|
|
245
|
-
if let contextualStrings = config?.contextualStrings {
|
|
246
|
-
let context = AnalysisContext()
|
|
247
|
-
context.contextualStrings = [
|
|
248
|
-
AnalysisContext.ContextualStringsTag.general: contextualStrings
|
|
249
|
-
]
|
|
250
|
-
try await analyzer.setContext(context)
|
|
251
|
-
}
|
|
252
|
-
try await analyzer.start(inputSequence: inputSequence)
|
|
253
|
-
}
|
|
254
|
-
} catch {
|
|
255
|
-
onError?("Analyzer failed to start input sequence: \(error.localizedDescription)")
|
|
256
|
-
self.cleanup(from: "startRecognition.analyzerStart")
|
|
257
|
-
return
|
|
258
|
-
}
|
|
259
|
-
|
|
260
|
-
self.startRecognitionFeedback()
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
override func cleanup(from: String) {
|
|
264
|
-
let wasActive = isActive
|
|
265
|
-
|
|
266
|
-
super.cleanup(from: "overridden.\(from)")
|
|
267
|
-
|
|
268
|
-
inputSequence = nil
|
|
269
|
-
inputBuilder = nil
|
|
270
|
-
outputContinuation?.finish()
|
|
271
|
-
outputContinuation = nil
|
|
272
|
-
analyzer = nil
|
|
273
|
-
speechTranscriber = nil
|
|
274
|
-
dictationTranscriber = nil
|
|
275
|
-
audioProducerTask?.cancel()
|
|
276
|
-
audioProducerTask = nil
|
|
277
|
-
recognizerTask?.cancel()
|
|
278
|
-
recognizerTask = nil
|
|
279
|
-
lastBatchStartTime = nil
|
|
280
|
-
resultBatches = []
|
|
281
|
-
|
|
282
|
-
if wasActive {
|
|
283
|
-
onRecordingStopped?()
|
|
284
|
-
}
|
|
285
|
-
}
|
|
286
|
-
|
|
287
|
-
private func ensureAssetInventory(modules: [any SpeechModule]) async -> Bool {
|
|
288
|
-
do {
|
|
289
|
-
if let installationRequest = try await AssetInventory.assetInstallationRequest(supporting: modules) {
|
|
290
|
-
try await installationRequest.downloadAndInstall()
|
|
291
|
-
}
|
|
292
|
-
return true
|
|
293
|
-
}
|
|
294
|
-
catch {
|
|
295
|
-
return false
|
|
296
|
-
}
|
|
297
|
-
}
|
|
298
|
-
|
|
299
|
-
private func deallocAssets() {
|
|
300
|
-
Task {
|
|
301
|
-
let reserved = await AssetInventory.reservedLocales
|
|
302
|
-
for l in reserved {
|
|
303
|
-
await AssetInventory.release(reservedLocale: l)
|
|
304
|
-
}
|
|
305
|
-
}
|
|
306
|
-
}
|
|
307
|
-
|
|
308
|
-
private func handleBatches(attrString: AttributedString, rangeStart: CMTime, isFinal: Bool) {
|
|
309
|
-
var newBatch = String(attrString.characters)
|
|
310
|
-
// Ignore all batches without A-z0-9
|
|
311
|
-
if !newBatch.contains(/\w+/) {
|
|
312
|
-
return
|
|
313
|
-
}
|
|
314
|
-
let disableRepeatingFilter = config?.disableRepeatingFilter ?? false
|
|
315
|
-
if !disableRepeatingFilter {
|
|
316
|
-
newBatch = self.repeatingFilter(text: newBatch)
|
|
317
|
-
}
|
|
318
|
-
logger.info("[1] lastBatch: \(self.resultBatches.last ?? "") | newBatch: \(newBatch)")
|
|
319
|
-
if resultBatches.isEmpty {
|
|
320
|
-
resultBatches.append(newBatch)
|
|
321
|
-
} else if CMTimeGetSeconds(rangeStart) == lastBatchStartTime || isFinal {
|
|
322
|
-
logger.info("[2] replace, isFinal: \(isFinal)")
|
|
323
|
-
resultBatches[resultBatches.count - 1] = newBatch
|
|
324
|
-
} else {
|
|
325
|
-
logger.info("[2] add new batch")
|
|
326
|
-
resultBatches.append(newBatch)
|
|
327
|
-
}
|
|
328
|
-
lastBatchStartTime = CMTimeGetSeconds(rangeStart)
|
|
329
|
-
self.onResult?(resultBatches)
|
|
330
|
-
}
|
|
331
|
-
}
|
package/ios/AutoStopper.swift
DELETED
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
import Foundation
|
|
2
|
-
import os.log
|
|
3
|
-
|
|
4
|
-
class AutoStopper {
|
|
5
|
-
private let logger = Logger(subsystem: "com.margelo.nitro.nitrospeech", category: "AutoStopper")
|
|
6
|
-
private let onTimeout: () -> Void
|
|
7
|
-
private let onProgress: (Double) -> Void
|
|
8
|
-
|
|
9
|
-
private var defaultSilenceThresholdMs: Double
|
|
10
|
-
private var silenceThresholdMs: Double
|
|
11
|
-
|
|
12
|
-
private var progressTask: Task<Void, Never>?
|
|
13
|
-
private var elapsedMs: Double = 0
|
|
14
|
-
private var isStopped = false
|
|
15
|
-
|
|
16
|
-
init(silenceThresholdMs: Double, onProgress: @escaping (Double) -> Void, onTimeout: @escaping () -> Void) {
|
|
17
|
-
self.defaultSilenceThresholdMs = silenceThresholdMs
|
|
18
|
-
self.silenceThresholdMs = silenceThresholdMs
|
|
19
|
-
self.onProgress = onProgress
|
|
20
|
-
self.onTimeout = onTimeout
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
func indicateRecordingActivity(from: String, addMsToThreshold: Double?) {
|
|
24
|
-
logger.info("[IndicateRecordingActivity]: \(from)")
|
|
25
|
-
if let addMsToThreshold = addMsToThreshold {
|
|
26
|
-
self.silenceThresholdMs = addMsToThreshold + self.silenceThresholdMs - self.elapsedMs
|
|
27
|
-
} else {
|
|
28
|
-
self.silenceThresholdMs = self.defaultSilenceThresholdMs
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
self.onProgress(self.silenceThresholdMs)
|
|
32
|
-
progressTask?.cancel()
|
|
33
|
-
self.elapsedMs = 0
|
|
34
|
-
if isStopped { return }
|
|
35
|
-
|
|
36
|
-
scheduleNextTick()
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
private func scheduleNextTick() {
|
|
40
|
-
progressTask = Task { @MainActor [weak self] in
|
|
41
|
-
try? await Task.sleep(nanoseconds: 1_000_000_000)
|
|
42
|
-
guard let self = self, !self.isStopped, !Task.isCancelled else { return }
|
|
43
|
-
|
|
44
|
-
self.elapsedMs += 1000
|
|
45
|
-
let timeLeftMs = self.silenceThresholdMs - self.elapsedMs
|
|
46
|
-
|
|
47
|
-
if timeLeftMs <= 0 {
|
|
48
|
-
self.onTimeout()
|
|
49
|
-
} else {
|
|
50
|
-
self.onProgress(timeLeftMs)
|
|
51
|
-
self.scheduleNextTick()
|
|
52
|
-
}
|
|
53
|
-
}
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
func updateSilenceThreshold(newThresholdMs: Double) {
|
|
57
|
-
self.defaultSilenceThresholdMs = newThresholdMs
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
func stop() {
|
|
61
|
-
isStopped = true
|
|
62
|
-
progressTask?.cancel()
|
|
63
|
-
progressTask = nil
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
deinit {
|
|
67
|
-
stop()
|
|
68
|
-
}
|
|
69
|
-
}
|
package/ios/HapticImpact.swift
DELETED
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
import Foundation
|
|
2
|
-
import UIKit
|
|
3
|
-
|
|
4
|
-
class HapticImpact {
|
|
5
|
-
private let impactGenerator: UIImpactFeedbackGenerator?
|
|
6
|
-
|
|
7
|
-
init(style: HapticFeedbackStyle) {
|
|
8
|
-
if style == HapticFeedbackStyle.none {
|
|
9
|
-
self.impactGenerator = nil
|
|
10
|
-
return
|
|
11
|
-
}
|
|
12
|
-
let hapticStyle = switch style {
|
|
13
|
-
case .light:
|
|
14
|
-
UIImpactFeedbackGenerator.FeedbackStyle.light
|
|
15
|
-
case .medium:
|
|
16
|
-
UIImpactFeedbackGenerator.FeedbackStyle.medium
|
|
17
|
-
case .heavy:
|
|
18
|
-
UIImpactFeedbackGenerator.FeedbackStyle.heavy
|
|
19
|
-
// Unreachable
|
|
20
|
-
case .none:
|
|
21
|
-
UIImpactFeedbackGenerator.FeedbackStyle.medium
|
|
22
|
-
}
|
|
23
|
-
self.impactGenerator = UIImpactFeedbackGenerator(style: hapticStyle)
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
func trigger() {
|
|
27
|
-
if let impactGenerator {
|
|
28
|
-
impactGenerator.prepare()
|
|
29
|
-
impactGenerator.impactOccurred()
|
|
30
|
-
}
|
|
31
|
-
}
|
|
32
|
-
}
|
|
@@ -1,161 +0,0 @@
|
|
|
1
|
-
import Foundation
|
|
2
|
-
import Speech
|
|
3
|
-
import NitroModules
|
|
4
|
-
import AVFoundation
|
|
5
|
-
|
|
6
|
-
class LegacySpeechRecognizer: HybridRecognizer {
|
|
7
|
-
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
|
|
8
|
-
private var recognitionTask: SFSpeechRecognitionTask?
|
|
9
|
-
|
|
10
|
-
override func dispose() {
|
|
11
|
-
super.dispose()
|
|
12
|
-
self.stopListening()
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
override func stopListening() {
|
|
16
|
-
super.stopListening()
|
|
17
|
-
|
|
18
|
-
// Signal end of audio and request graceful finish
|
|
19
|
-
recognitionRequest?.endAudio()
|
|
20
|
-
recognitionTask?.finish()
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
override func handleInternalStopTrigger() {
|
|
24
|
-
self.stopListening()
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
override func requestMicrophonePermission() {
|
|
28
|
-
AVAudioSession.sharedInstance().requestRecordPermission { [weak self] granted in
|
|
29
|
-
DispatchQueue.main.async {
|
|
30
|
-
guard let self = self else { return }
|
|
31
|
-
|
|
32
|
-
if granted {
|
|
33
|
-
self.startRecognition()
|
|
34
|
-
} else {
|
|
35
|
-
self.onPermissionDenied?()
|
|
36
|
-
}
|
|
37
|
-
}
|
|
38
|
-
}
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
override func startRecognition() {
|
|
42
|
-
guard self.startRecognitionSetup() else { return }
|
|
43
|
-
|
|
44
|
-
let locale = Locale(identifier: config?.locale ?? "en-US")
|
|
45
|
-
guard let speechRecognizer = SFSpeechRecognizer(locale: locale), speechRecognizer.isAvailable
|
|
46
|
-
else {
|
|
47
|
-
onError?("Speech recognizer is not available")
|
|
48
|
-
self.cleanup(from: "startRecognition.speechRecognizer")
|
|
49
|
-
return
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
recognitionRequest = createRecognitionRequest()
|
|
53
|
-
guard let recognitionRequest else {
|
|
54
|
-
onError?("Failed to create recognition request")
|
|
55
|
-
self.cleanup(from: "startRecognition.recognitionRequest")
|
|
56
|
-
return
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
recognitionTask = speechRecognizer.recognitionTask(
|
|
60
|
-
with: recognitionRequest
|
|
61
|
-
) { [weak self] result, error in
|
|
62
|
-
guard let self = self else { return }
|
|
63
|
-
|
|
64
|
-
if let result = result {
|
|
65
|
-
self.trackPartialActivity()
|
|
66
|
-
var transcription = result.bestTranscription.formattedString
|
|
67
|
-
if !transcription.isEmpty {
|
|
68
|
-
let disableRepeatingFilter = config?.disableRepeatingFilter ?? false
|
|
69
|
-
if !disableRepeatingFilter {
|
|
70
|
-
transcription = self.repeatingFilter(text: transcription)
|
|
71
|
-
}
|
|
72
|
-
self.onResult?([transcription])
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
// Task completed - cleanup whether natural or manual stop
|
|
76
|
-
if result.isFinal {
|
|
77
|
-
self.cleanup(from: "startRecognition.recognitionTask.final")
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
if let error = error {
|
|
82
|
-
// Only report error if not intentionally stopping
|
|
83
|
-
if !self.isStopping {
|
|
84
|
-
self.onError?("Recognition error: \(error.localizedDescription)")
|
|
85
|
-
}
|
|
86
|
-
self.cleanup(from: "startRecognition.recognitionTask.error")
|
|
87
|
-
}
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
audioEngine = AVAudioEngine()
|
|
91
|
-
guard let audioEngine else {
|
|
92
|
-
onError?("Failed to create audio engine")
|
|
93
|
-
self.cleanup(from: "startRecognition.createAudioEngine")
|
|
94
|
-
return
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
let hardwareFormat = audioEngine.inputNode.outputFormat(forBus: 0)
|
|
98
|
-
audioEngine.inputNode.installTap(onBus: 0, bufferSize: 1024, format: hardwareFormat) { [weak self] buffer, time in
|
|
99
|
-
guard let self else {return}
|
|
100
|
-
let (rms, nextLevelSmoothed) = BufferUtil().calcRmsVolume(levelSmoothed: levelSmoothed, buffer: buffer) ?? (nil, nil)
|
|
101
|
-
|
|
102
|
-
if let nextLevelSmoothed {
|
|
103
|
-
levelSmoothed = nextLevelSmoothed
|
|
104
|
-
let volume = Double(nextLevelSmoothed * 1_000_000).rounded() / 1_000_000
|
|
105
|
-
onVolumeChange?(volume)
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
if let rms, rms > Self.speechRmsThreshold {
|
|
109
|
-
self.autoStopper?.indicateRecordingActivity(
|
|
110
|
-
from: "rms change",
|
|
111
|
-
addMsToThreshold: nil
|
|
112
|
-
)
|
|
113
|
-
}
|
|
114
|
-
self.recognitionRequest?.append(buffer)
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
do {
|
|
118
|
-
audioEngine.prepare()
|
|
119
|
-
try audioEngine.start()
|
|
120
|
-
} catch {
|
|
121
|
-
onError?("Failed to start audio engine: \(error.localizedDescription)")
|
|
122
|
-
self.cleanup(from: "startRecognition.startAudioEngine")
|
|
123
|
-
return
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
self.startRecognitionFeedback()
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
override func cleanup(from: String) {
|
|
130
|
-
let wasActive = isActive
|
|
131
|
-
|
|
132
|
-
super.cleanup(from: "overrider.\(from)")
|
|
133
|
-
|
|
134
|
-
recognitionRequest = nil
|
|
135
|
-
recognitionTask = nil
|
|
136
|
-
|
|
137
|
-
if wasActive {
|
|
138
|
-
onRecordingStopped?()
|
|
139
|
-
}
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
private func createRecognitionRequest() -> SFSpeechAudioBufferRecognitionRequest {
|
|
143
|
-
let request = SFSpeechAudioBufferRecognitionRequest()
|
|
144
|
-
|
|
145
|
-
request.shouldReportPartialResults = true
|
|
146
|
-
|
|
147
|
-
if let contextualStrings = config?.contextualStrings, !contextualStrings.isEmpty {
|
|
148
|
-
request.contextualStrings = contextualStrings
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
if #available(iOS 16, *) {
|
|
152
|
-
if let addPunctiation = config?.iosAddPunctuation, addPunctiation == false {
|
|
153
|
-
request.addsPunctuation = false
|
|
154
|
-
} else {
|
|
155
|
-
request.addsPunctuation = true
|
|
156
|
-
}
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
return request
|
|
160
|
-
}
|
|
161
|
-
}
|