@gmessier/nitro-speech 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +64 -11
  2. package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/HapticImpact.kt +11 -1
  3. package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/HybridRecognizer.kt +12 -6
  4. package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/RecognitionListenerSession.kt +73 -7
  5. package/ios/AnylyzerTranscriber.swift +331 -0
  6. package/ios/AutoStopper.swift +9 -10
  7. package/ios/BufferUtil.swift +80 -0
  8. package/ios/HapticImpact.swift +12 -3
  9. package/ios/HybridNitroSpeech.swift +10 -1
  10. package/ios/HybridRecognizer.swift +139 -167
  11. package/ios/LegacySpeechRecognizer.swift +161 -0
  12. package/lib/commonjs/index.js +54 -5
  13. package/lib/commonjs/index.js.map +1 -1
  14. package/lib/module/index.js +52 -3
  15. package/lib/module/index.js.map +1 -1
  16. package/lib/tsconfig.tsbuildinfo +1 -1
  17. package/lib/typescript/index.d.ts +25 -8
  18. package/lib/typescript/index.d.ts.map +1 -1
  19. package/lib/typescript/specs/NitroSpeech.nitro.d.ts +24 -12
  20. package/lib/typescript/specs/NitroSpeech.nitro.d.ts.map +1 -1
  21. package/nitrogen/generated/android/c++/JHapticFeedbackStyle.hpp +3 -0
  22. package/nitrogen/generated/android/c++/JHybridRecognizerSpec.cpp +22 -0
  23. package/nitrogen/generated/android/c++/JHybridRecognizerSpec.hpp +3 -0
  24. package/nitrogen/generated/android/c++/JSpeechToTextParams.hpp +4 -4
  25. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/HapticFeedbackStyle.kt +2 -1
  26. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/HybridRecognizerSpec.kt +18 -0
  27. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/SpeechToTextParams.kt +3 -3
  28. package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Bridge.hpp +24 -0
  29. package/nitrogen/generated/ios/c++/HybridRecognizerSpecSwift.hpp +15 -0
  30. package/nitrogen/generated/ios/swift/HapticFeedbackStyle.swift +4 -0
  31. package/nitrogen/generated/ios/swift/HybridRecognizerSpec.swift +2 -0
  32. package/nitrogen/generated/ios/swift/HybridRecognizerSpec_cxx.swift +44 -0
  33. package/nitrogen/generated/ios/swift/SpeechToTextParams.swift +6 -6
  34. package/nitrogen/generated/shared/c++/HapticFeedbackStyle.hpp +4 -0
  35. package/nitrogen/generated/shared/c++/HybridRecognizerSpec.cpp +3 -0
  36. package/nitrogen/generated/shared/c++/HybridRecognizerSpec.hpp +3 -0
  37. package/nitrogen/generated/shared/c++/SpeechToTextParams.hpp +5 -5
  38. package/package.json +7 -7
  39. package/src/index.ts +59 -2
  40. package/src/specs/NitroSpeech.nitro.ts +25 -12
@@ -0,0 +1,331 @@
1
+ import Foundation
2
+ import Speech
3
+ import NitroModules
4
+ import os.log
5
+ import AVFoundation
6
+
7
+ @available(iOS 26.0, *)
8
+ class AnalyzerTranscriber: HybridRecognizer {
9
+ private var inputSequence: AsyncStream<AnalyzerInput>?
10
+ private var inputBuilder: AsyncStream<AnalyzerInput>.Continuation?
11
+ private var outputContinuation: AsyncStream<AVAudioPCMBuffer>.Continuation?
12
+ private var analyzer: SpeechAnalyzer?
13
+ private var speechTranscriber: SpeechTranscriber?
14
+ private var dictationTranscriber: DictationTranscriber?
15
+ private var audioProducerTask: Task<Void, Never>?
16
+ private var recognizerTask: Task<(), Error>?
17
+ private var lastBatchStartTime: Float64? = nil
18
+ private var resultBatches: [String] = []
19
+
20
+ override func dispose() {
21
+ super.dispose()
22
+ self.stopListening()
23
+ self.deallocAssets()
24
+ }
25
+
26
+ override func stopListening() {
27
+ super.stopListening()
28
+ inputBuilder?.finish()
29
+
30
+ Task { [weak self] in
31
+ guard let self = self else { return }
32
+
33
+ do {
34
+ try await self.analyzer?.finalizeAndFinishThroughEndOfInput()
35
+ } catch {
36
+ self.onError?("Analyzer finalize failed during stop: \(error.localizedDescription)")
37
+ await self.analyzer?.cancelAndFinishNow()
38
+ }
39
+
40
+ self.cleanup(from: "stopListening")
41
+ }
42
+ }
43
+
44
+ override func handleInternalStopTrigger() {
45
+ self.stopListening()
46
+ }
47
+
48
+ override func requestMicrophonePermission() {
49
+ AVAudioApplication.requestRecordPermission { [weak self] granted in
50
+ Task { @MainActor in
51
+ guard let self = self else { return }
52
+
53
+ if granted {
54
+ await self.startRecognition()
55
+ } else {
56
+ self.onPermissionDenied?()
57
+ }
58
+ }
59
+ }
60
+ }
61
+
62
+ override func startRecognition() async {
63
+ guard self.startRecognitionSetup() else { return }
64
+
65
+ // 1. Modules
66
+ let supportedLocale = await SpeechTranscriber.supportedLocale(
67
+ equivalentTo: Locale(identifier: config?.locale ?? "en-US")
68
+ )
69
+ if supportedLocale == nil {
70
+ onError?("Unsupported locale name: en-US is used instead as default")
71
+ }
72
+ let locale = supportedLocale ?? Locale(identifier: "en-US")
73
+ var speechTranscriptionOptions: Set<SpeechTranscriber.TranscriptionOption> = []
74
+ if config?.maskOffensiveWords == true {
75
+ speechTranscriptionOptions.insert(.etiquetteReplacements)
76
+ }
77
+ speechTranscriber = SpeechTranscriber(
78
+ locale: locale,
79
+ transcriptionOptions: speechTranscriptionOptions,
80
+ reportingOptions: [.volatileResults, .fastResults],
81
+ attributeOptions: [.audioTimeRange]
82
+ )
83
+ if speechTranscriber == nil || !SpeechTranscriber.isAvailable {
84
+ // Punctuation is true by default
85
+ var dictationTranscriptionOptions: Set<DictationTranscriber.TranscriptionOption> = [
86
+ .punctuation
87
+ ]
88
+ if config?.maskOffensiveWords == true {
89
+ dictationTranscriptionOptions.insert(.etiquetteReplacements)
90
+ }
91
+ if config?.iosAddPunctuation == false {
92
+ dictationTranscriptionOptions.remove(.punctuation)
93
+ }
94
+ dictationTranscriber = DictationTranscriber(
95
+ locale: locale,
96
+ contentHints: [.shortForm],
97
+ transcriptionOptions: dictationTranscriptionOptions,
98
+ reportingOptions: [.frequentFinalization, .volatileResults],
99
+ attributeOptions: [.audioTimeRange]
100
+ )
101
+ }
102
+
103
+ var modules: [any SpeechModule]
104
+ if let speechTranscriber {
105
+ modules = [speechTranscriber]
106
+ logger.info("[SpeechTranscriber] Activated")
107
+ } else if let dictationTranscriber {
108
+ modules = [dictationTranscriber]
109
+ logger.info("[DictationTranscriber] Activated")
110
+ } else {
111
+ onError?("Failed to create Transcriber")
112
+ self.cleanup(from: "startRecognition.Transcriber")
113
+ return
114
+ }
115
+
116
+ // 2. Assets management
117
+ guard await ensureAssetInventory(modules: modules) else {
118
+ onError?("Speech assets installation failed")
119
+ self.cleanup(from: "startRecognition.ensureAssetInventory")
120
+ return
121
+ }
122
+
123
+ // 3. Input sequence
124
+ (inputSequence, inputBuilder) = AsyncStream.makeStream(of: AnalyzerInput.self)
125
+
126
+ // 4. Analyzer
127
+ guard let audioFormat = await SpeechAnalyzer.bestAvailableAudioFormat(
128
+ compatibleWith: modules
129
+ ) else {
130
+ onError?("Could not find SpeechAnalyzer audio format")
131
+ self.cleanup(from: "startRecognition.SpeechAnalyzer.bestAvailableAudioFormat")
132
+ return
133
+ }
134
+
135
+ analyzer = SpeechAnalyzer(modules: modules)
136
+
137
+ // 5. Supply audio
138
+ audioProducerTask = Task {
139
+ do {
140
+ audioEngine = AVAudioEngine()
141
+ guard let audioEngine = audioEngine else {
142
+ throw NSError()
143
+ }
144
+ let hardwareFormat = audioEngine.inputNode.outputFormat(forBus: 0)
145
+ audioEngine.inputNode.installTap(onBus: 0, bufferSize: 1024, format: hardwareFormat) { [weak self] buffer, time in
146
+ guard let self else {return}
147
+ let (rms, nextLevelSmoothed) = BufferUtil().calcRmsVolume(levelSmoothed: levelSmoothed, buffer: buffer) ?? (nil, nil)
148
+
149
+ if let nextLevelSmoothed {
150
+ levelSmoothed = nextLevelSmoothed
151
+ let volume = Double(nextLevelSmoothed * 1_000_000).rounded() / 1_000_000
152
+ onVolumeChange?(volume)
153
+ }
154
+
155
+ if let rms, rms > Self.speechRmsThreshold {
156
+ self.autoStopper?.indicateRecordingActivity(
157
+ from: "rms change",
158
+ addMsToThreshold: nil
159
+ )
160
+ }
161
+ outputContinuation?.yield(buffer)
162
+ }
163
+
164
+ audioEngine.prepare()
165
+ try audioEngine.start()
166
+
167
+ let stream = AsyncStream(AVAudioPCMBuffer.self, bufferingPolicy: .unbounded) { continuation in
168
+ outputContinuation = continuation
169
+ }
170
+
171
+ let needsConversion =
172
+ hardwareFormat.commonFormat != audioFormat.commonFormat ||
173
+ hardwareFormat.sampleRate != audioFormat.sampleRate ||
174
+ hardwareFormat.channelCount != audioFormat.channelCount
175
+ guard let converter = AVAudioConverter(from: hardwareFormat, to: audioFormat)
176
+ else {
177
+ throw NSError()
178
+ }
179
+
180
+ for await pcmBuffer in stream {
181
+ if Task.isCancelled { break }
182
+
183
+ let bufferForAnalyzer: AVAudioPCMBuffer
184
+ if needsConversion {
185
+ // Skip analyzing for empty buffers and
186
+ // Throw error if buffers are inconvertable
187
+ guard let convertedBuffer = try BufferUtil().convertBuffer(
188
+ converter: converter,
189
+ audioFormat: audioFormat,
190
+ pcmBuffer: pcmBuffer
191
+ ) else {
192
+ continue
193
+ }
194
+ bufferForAnalyzer = convertedBuffer
195
+ } else {
196
+ bufferForAnalyzer = pcmBuffer
197
+ }
198
+
199
+ let input = AnalyzerInput(buffer: bufferForAnalyzer)
200
+ inputBuilder?.yield(input)
201
+ }
202
+ } catch {
203
+ if Task.isCancelled || self.isStopping {
204
+ return
205
+ }
206
+ onError?("Audio producer failed while capturing microphone input: \(error.localizedDescription)")
207
+ self.cleanup(from: "startRecognition.audioProducerTask")
208
+ return
209
+ }
210
+ }
211
+
212
+ // 7. Handle the results
213
+ recognizerTask = Task {
214
+ do {
215
+ if let speechTranscriber {
216
+ for try await result in speechTranscriber.results {
217
+ self.trackPartialActivity()
218
+ self.handleBatches(
219
+ attrString: result.text,
220
+ rangeStart: result.range.start,
221
+ isFinal: result.isFinal
222
+ )
223
+ }
224
+ } else if let dictationTranscriber {
225
+ for try await result in dictationTranscriber.results {
226
+ self.trackPartialActivity()
227
+ self.handleBatches(
228
+ attrString: result.text,
229
+ rangeStart: result.range.start,
230
+ isFinal: result.isFinal
231
+ )
232
+ }
233
+ }
234
+ } catch {
235
+ if self.isStopping || error is CancellationError {
236
+ return
237
+ }
238
+ onError?("Transcriber results stream failed: \(error.localizedDescription)")
239
+ self.cleanup(from: "startRecognition.recognizerTask")
240
+ }
241
+ }
242
+
243
+ do {
244
+ if let inputSequence, let analyzer {
245
+ if let contextualStrings = config?.contextualStrings {
246
+ let context = AnalysisContext()
247
+ context.contextualStrings = [
248
+ AnalysisContext.ContextualStringsTag.general: contextualStrings
249
+ ]
250
+ try await analyzer.setContext(context)
251
+ }
252
+ try await analyzer.start(inputSequence: inputSequence)
253
+ }
254
+ } catch {
255
+ onError?("Analyzer failed to start input sequence: \(error.localizedDescription)")
256
+ self.cleanup(from: "startRecognition.analyzerStart")
257
+ return
258
+ }
259
+
260
+ self.startRecognitionFeedback()
261
+ }
262
+
263
+ override func cleanup(from: String) {
264
+ let wasActive = isActive
265
+
266
+ super.cleanup(from: "overridden.\(from)")
267
+
268
+ inputSequence = nil
269
+ inputBuilder = nil
270
+ outputContinuation?.finish()
271
+ outputContinuation = nil
272
+ analyzer = nil
273
+ speechTranscriber = nil
274
+ dictationTranscriber = nil
275
+ audioProducerTask?.cancel()
276
+ audioProducerTask = nil
277
+ recognizerTask?.cancel()
278
+ recognizerTask = nil
279
+ lastBatchStartTime = nil
280
+ resultBatches = []
281
+
282
+ if wasActive {
283
+ onRecordingStopped?()
284
+ }
285
+ }
286
+
287
+ private func ensureAssetInventory(modules: [any SpeechModule]) async -> Bool {
288
+ do {
289
+ if let installationRequest = try await AssetInventory.assetInstallationRequest(supporting: modules) {
290
+ try await installationRequest.downloadAndInstall()
291
+ }
292
+ return true
293
+ }
294
+ catch {
295
+ return false
296
+ }
297
+ }
298
+
299
+ private func deallocAssets() {
300
+ Task {
301
+ let reserved = await AssetInventory.reservedLocales
302
+ for l in reserved {
303
+ await AssetInventory.release(reservedLocale: l)
304
+ }
305
+ }
306
+ }
307
+
308
+ private func handleBatches(attrString: AttributedString, rangeStart: CMTime, isFinal: Bool) {
309
+ var newBatch = String(attrString.characters)
310
+ // Ignore all batches without A-z0-9
311
+ if !newBatch.contains(/\w+/) {
312
+ return
313
+ }
314
+ let disableRepeatingFilter = config?.disableRepeatingFilter ?? false
315
+ if !disableRepeatingFilter {
316
+ newBatch = self.repeatingFilter(text: newBatch)
317
+ }
318
+ logger.info("[1] lastBatch: \(self.resultBatches.last ?? "") | newBatch: \(newBatch)")
319
+ if resultBatches.isEmpty {
320
+ resultBatches.append(newBatch)
321
+ } else if CMTimeGetSeconds(rangeStart) == lastBatchStartTime || isFinal {
322
+ logger.info("[2] replace, isFinal: \(isFinal)")
323
+ resultBatches[resultBatches.count - 1] = newBatch
324
+ } else {
325
+ logger.info("[2] add new batch")
326
+ resultBatches.append(newBatch)
327
+ }
328
+ lastBatchStartTime = CMTimeGetSeconds(rangeStart)
329
+ self.onResult?(resultBatches)
330
+ }
331
+ }
@@ -9,7 +9,7 @@ class AutoStopper {
9
9
  private var defaultSilenceThresholdMs: Double
10
10
  private var silenceThresholdMs: Double
11
11
 
12
- private var progressWorkItem: DispatchWorkItem?
12
+ private var progressTask: Task<Void, Never>?
13
13
  private var elapsedMs: Double = 0
14
14
  private var isStopped = false
15
15
 
@@ -21,7 +21,7 @@ class AutoStopper {
21
21
  }
22
22
 
23
23
  func indicateRecordingActivity(from: String, addMsToThreshold: Double?) {
24
- logger.info("indicateRecordingActivity: \(from)")
24
+ logger.info("[IndicateRecordingActivity]: \(from)")
25
25
  if let addMsToThreshold = addMsToThreshold {
26
26
  self.silenceThresholdMs = addMsToThreshold + self.silenceThresholdMs - self.elapsedMs
27
27
  } else {
@@ -29,7 +29,7 @@ class AutoStopper {
29
29
  }
30
30
 
31
31
  self.onProgress(self.silenceThresholdMs)
32
- progressWorkItem?.cancel()
32
+ progressTask?.cancel()
33
33
  self.elapsedMs = 0
34
34
  if isStopped { return }
35
35
 
@@ -37,11 +37,12 @@ class AutoStopper {
37
37
  }
38
38
 
39
39
  private func scheduleNextTick() {
40
- let item = DispatchWorkItem { [weak self] in
41
- guard let self = self, !self.isStopped else { return }
40
+ progressTask = Task { @MainActor [weak self] in
41
+ try? await Task.sleep(nanoseconds: 1_000_000_000)
42
+ guard let self = self, !self.isStopped, !Task.isCancelled else { return }
42
43
 
43
44
  self.elapsedMs += 1000
44
- let timeLeftMs = silenceThresholdMs - elapsedMs
45
+ let timeLeftMs = self.silenceThresholdMs - self.elapsedMs
45
46
 
46
47
  if timeLeftMs <= 0 {
47
48
  self.onTimeout()
@@ -50,8 +51,6 @@ class AutoStopper {
50
51
  self.scheduleNextTick()
51
52
  }
52
53
  }
53
- progressWorkItem = item
54
- DispatchQueue.main.asyncAfter(deadline: .now() + 1.0, execute: item)
55
54
  }
56
55
 
57
56
  func updateSilenceThreshold(newThresholdMs: Double) {
@@ -60,8 +59,8 @@ class AutoStopper {
60
59
 
61
60
  func stop() {
62
61
  isStopped = true
63
- progressWorkItem?.cancel()
64
- progressWorkItem = nil
62
+ progressTask?.cancel()
63
+ progressTask = nil
65
64
  }
66
65
 
67
66
  deinit {
@@ -0,0 +1,80 @@
1
+ import Foundation
2
+ import AVFoundation
3
+ import Accelerate
4
+
5
+ private final class SendablePCMBufferBox: @unchecked Sendable {
6
+ let buffer: AVAudioPCMBuffer
7
+
8
+ init(_ buffer: AVAudioPCMBuffer) {
9
+ self.buffer = buffer
10
+ }
11
+ }
12
+
13
+ class BufferUtil {
14
+ private static let meterMinDb: Float = -70 // silence floor
15
+ private static let meterMaxDb: Float = -10 // loud speech ceiling
16
+ private static let meterAttack: Float = 0.35 // rise speed
17
+ private static let meterRelease: Float = 0.08 // fall speed
18
+
19
+ func calcRmsVolume(
20
+ levelSmoothed: Float,
21
+ buffer: AVAudioPCMBuffer
22
+ ) -> (Float, Float)? {
23
+ guard let samples = buffer.floatChannelData?[0] else { return nil }
24
+
25
+ let frameL = Int(buffer.frameLength)
26
+ var rms: Float = 0
27
+
28
+ vDSP_rmsqv(samples, 1, &rms, vDSP_Length(frameL))
29
+
30
+ // 2) RMS -> dBFS
31
+ let db = 20 * log10(rms + 0.00001)
32
+
33
+ // 3) Normalize dB to 0...1
34
+ let raw = (db - Self.meterMinDb) / (Self.meterMaxDb - Self.meterMinDb)
35
+ let normalized = max(0, min(1, raw))
36
+
37
+ // 4) Smooth (fast attack, slow release)
38
+ let coeff = normalized > levelSmoothed ? Self.meterAttack : Self.meterRelease
39
+ let nextLevelSmoothed = levelSmoothed + coeff * (normalized - levelSmoothed)
40
+
41
+ return (rms, nextLevelSmoothed)
42
+ }
43
+
44
+ func convertBuffer(
45
+ converter: AVAudioConverter,
46
+ audioFormat: AVAudioFormat,
47
+ pcmBuffer: AVAudioPCMBuffer
48
+ ) throws -> AVAudioPCMBuffer? {
49
+ let resampledCapacity = AVAudioFrameCount(
50
+ (Double(pcmBuffer.frameLength) * (audioFormat.sampleRate / pcmBuffer.format.sampleRate)).rounded(.up)
51
+ )
52
+ let convertedCapacity = max(pcmBuffer.frameLength, max(1, resampledCapacity))
53
+ guard let convertedBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: convertedCapacity) else {
54
+ throw NSError()
55
+ }
56
+
57
+ let inputBufferBox = SendablePCMBufferBox(pcmBuffer)
58
+ var didProvideInput = false
59
+ var conversionError: NSError?
60
+ let status = converter.convert(to: convertedBuffer, error: &conversionError) { _, outStatus in
61
+ if didProvideInput {
62
+ outStatus.pointee = .noDataNow
63
+ return nil
64
+ }
65
+ didProvideInput = true
66
+ outStatus.pointee = .haveData
67
+ return inputBufferBox.buffer
68
+ }
69
+ if let conversionError {
70
+ throw conversionError
71
+ }
72
+ guard status == .haveData || status == .inputRanDry else {
73
+ return nil
74
+ }
75
+ guard convertedBuffer.frameLength > 0 else {
76
+ return nil
77
+ }
78
+ return convertedBuffer
79
+ }
80
+ }
@@ -2,9 +2,13 @@ import Foundation
2
2
  import UIKit
3
3
 
4
4
  class HapticImpact {
5
- private let impactGenerator: UIImpactFeedbackGenerator
5
+ private let impactGenerator: UIImpactFeedbackGenerator?
6
6
 
7
7
  init(style: HapticFeedbackStyle) {
8
+ if style == HapticFeedbackStyle.none {
9
+ self.impactGenerator = nil
10
+ return
11
+ }
8
12
  let hapticStyle = switch style {
9
13
  case .light:
10
14
  UIImpactFeedbackGenerator.FeedbackStyle.light
@@ -12,12 +16,17 @@ class HapticImpact {
12
16
  UIImpactFeedbackGenerator.FeedbackStyle.medium
13
17
  case .heavy:
14
18
  UIImpactFeedbackGenerator.FeedbackStyle.heavy
19
+ // Unreachable
20
+ case .none:
21
+ UIImpactFeedbackGenerator.FeedbackStyle.medium
15
22
  }
16
23
  self.impactGenerator = UIImpactFeedbackGenerator(style: hapticStyle)
17
24
  }
18
25
 
19
26
  func trigger() {
20
- impactGenerator.prepare()
21
- impactGenerator.impactOccurred()
27
+ if let impactGenerator {
28
+ impactGenerator.prepare()
29
+ impactGenerator.impactOccurred()
30
+ }
22
31
  }
23
32
  }
@@ -2,5 +2,14 @@ import Foundation
2
2
  import NitroModules
3
3
 
4
4
  class HybridNitroSpeech : HybridNitroSpeechSpec {
5
- var recognizer: HybridRecognizerSpec = HybridRecognizer()
5
+ var recognizer: any HybridRecognizerSpec
6
+
7
+ override init() {
8
+ if #available(iOS 26.0, *) {
9
+ recognizer = AnalyzerTranscriber()
10
+ } else {
11
+ recognizer = LegacySpeechRecognizer()
12
+ }
13
+ super.init()
14
+ }
6
15
  }