@gmessier/nitro-speech 0.3.3 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +176 -148
- package/android/build.gradle +0 -1
- package/android/src/main/cpp/cpp-adapter.cpp +5 -1
- package/android/src/main/java/com/margelo/nitro/nitrospeech/HybridNitroSpeech.kt +2 -0
- package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/AutoStopper.kt +82 -18
- package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/HybridRecognizer.kt +118 -30
- package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/Logger.kt +16 -0
- package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/RecognitionListenerSession.kt +35 -24
- package/ios/{BufferUtil.swift → Audio/AudioBufferConverter.swift} +3 -34
- package/ios/Audio/AudioLevelTracker.swift +60 -0
- package/ios/Coordinator.swift +105 -0
- package/ios/Engines/AnalyzerEngine.swift +241 -0
- package/ios/Engines/DictationRuntime.swift +67 -0
- package/ios/Engines/RecognizerEngine.swift +315 -0
- package/ios/Engines/SFSpeechEngine.swift +119 -0
- package/ios/Engines/SpeechRuntime.swift +58 -0
- package/ios/Engines/TranscriberRuntimeProtocol.swift +21 -0
- package/ios/HybridNitroSpeech.swift +1 -10
- package/ios/HybridRecognizer.swift +142 -191
- package/ios/LocaleManager.swift +73 -0
- package/ios/{AppStateObserver.swift → Shared/AppStateObserver.swift} +1 -2
- package/ios/Shared/AutoStopper.swift +147 -0
- package/ios/Shared/HapticImpact.swift +24 -0
- package/ios/Shared/Log.swift +41 -0
- package/ios/Shared/Permissions.swift +59 -0
- package/ios/Shared/Utils.swift +58 -0
- package/lib/NitroSpeech.d.ts +2 -0
- package/lib/NitroSpeech.js +2 -0
- package/lib/Recognizer/RecognizerRef.d.ts +7 -0
- package/lib/Recognizer/RecognizerRef.js +16 -0
- package/lib/Recognizer/SpeechRecognizer.d.ts +8 -0
- package/lib/Recognizer/SpeechRecognizer.js +9 -0
- package/lib/Recognizer/methods.d.ts +9 -0
- package/lib/Recognizer/methods.js +33 -0
- package/lib/Recognizer/types.d.ts +6 -0
- package/lib/Recognizer/types.js +1 -0
- package/lib/Recognizer/useRecognizer.d.ts +16 -0
- package/lib/Recognizer/useRecognizer.js +71 -0
- package/lib/Recognizer/useRecognizerIsActive.d.ts +25 -0
- package/lib/Recognizer/useRecognizerIsActive.js +40 -0
- package/lib/Recognizer/useVoiceInputVolume.d.ts +25 -0
- package/lib/Recognizer/useVoiceInputVolume.js +52 -0
- package/lib/index.d.ts +7 -0
- package/lib/index.js +7 -0
- package/lib/specs/NitroSpeech.nitro.d.ts +8 -0
- package/lib/specs/NitroSpeech.nitro.js +1 -0
- package/lib/specs/Recognizer.nitro.d.ts +97 -0
- package/lib/specs/Recognizer.nitro.js +1 -0
- package/lib/specs/SpeechRecognitionConfig.d.ts +162 -0
- package/lib/specs/SpeechRecognitionConfig.js +1 -0
- package/lib/specs/VolumeChangeEvent.d.ts +31 -0
- package/lib/specs/VolumeChangeEvent.js +1 -0
- package/nitro.json +0 -4
- package/nitrogen/generated/android/NitroSpeech+autolinking.cmake +2 -2
- package/nitrogen/generated/android/NitroSpeechOnLoad.cpp +4 -2
- package/nitrogen/generated/android/c++/JFunc_void_VolumeChangeEvent.hpp +78 -0
- package/nitrogen/generated/android/c++/JFunc_void_std__vector_std__string_.hpp +14 -14
- package/nitrogen/generated/android/c++/JHybridRecognizerSpec.cpp +73 -19
- package/nitrogen/generated/android/c++/JHybridRecognizerSpec.hpp +8 -4
- package/nitrogen/generated/android/c++/JIosPreset.hpp +58 -0
- package/nitrogen/generated/android/c++/JMutableSpeechRecognitionConfig.hpp +79 -0
- package/nitrogen/generated/android/c++/{JSpeechToTextParams.hpp → JSpeechRecognitionConfig.hpp} +48 -30
- package/nitrogen/generated/android/c++/JVolumeChangeEvent.hpp +65 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/Func_void_VolumeChangeEvent.kt +80 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/HybridRecognizerSpec.kt +22 -5
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/IosPreset.kt +23 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/MutableSpeechRecognitionConfig.kt +76 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/SpeechRecognitionConfig.kt +121 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/VolumeChangeEvent.kt +61 -0
- package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Bridge.cpp +46 -30
- package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Bridge.hpp +211 -69
- package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Umbrella.hpp +13 -3
- package/nitrogen/generated/ios/c++/HybridRecognizerSpecSwift.hpp +49 -9
- package/nitrogen/generated/ios/swift/Func_void_VolumeChangeEvent.swift +46 -0
- package/nitrogen/generated/ios/swift/Func_void_std__exception_ptr.swift +46 -0
- package/nitrogen/generated/ios/swift/HybridRecognizerSpec.swift +7 -3
- package/nitrogen/generated/ios/swift/HybridRecognizerSpec_cxx.swift +78 -18
- package/nitrogen/generated/ios/swift/IosPreset.swift +40 -0
- package/nitrogen/generated/ios/swift/MutableSpeechRecognitionConfig.swift +118 -0
- package/nitrogen/generated/ios/swift/{SpeechToTextParams.swift → SpeechRecognitionConfig.swift} +108 -43
- package/nitrogen/generated/ios/swift/VolumeChangeEvent.swift +52 -0
- package/nitrogen/generated/shared/c++/HybridRecognizerSpec.cpp +5 -1
- package/nitrogen/generated/shared/c++/HybridRecognizerSpec.hpp +18 -7
- package/nitrogen/generated/shared/c++/IosPreset.hpp +76 -0
- package/nitrogen/generated/shared/c++/MutableSpeechRecognitionConfig.hpp +105 -0
- package/nitrogen/generated/shared/c++/{SpeechToTextParams.hpp → SpeechRecognitionConfig.hpp} +39 -20
- package/nitrogen/generated/shared/c++/VolumeChangeEvent.hpp +91 -0
- package/package.json +15 -16
- package/src/NitroSpeech.ts +5 -0
- package/src/Recognizer/RecognizerRef.ts +27 -0
- package/src/Recognizer/SpeechRecognizer.ts +10 -0
- package/src/Recognizer/methods.ts +45 -0
- package/src/Recognizer/types.ts +34 -0
- package/src/Recognizer/useRecognizer.ts +87 -0
- package/src/Recognizer/useRecognizerIsActive.ts +49 -0
- package/src/Recognizer/useVoiceInputVolume.ts +65 -0
- package/src/index.ts +13 -182
- package/src/specs/NitroSpeech.nitro.ts +2 -163
- package/src/specs/Recognizer.nitro.ts +113 -0
- package/src/specs/SpeechRecognitionConfig.ts +167 -0
- package/src/specs/VolumeChangeEvent.ts +31 -0
- package/android/proguard-rules.pro +0 -1
- package/ios/AnylyzerTranscriber.swift +0 -331
- package/ios/AutoStopper.swift +0 -69
- package/ios/HapticImpact.swift +0 -32
- package/ios/LegacySpeechRecognizer.swift +0 -161
- package/lib/commonjs/index.js +0 -145
- package/lib/commonjs/index.js.map +0 -1
- package/lib/commonjs/package.json +0 -1
- package/lib/commonjs/specs/NitroSpeech.nitro.js +0 -6
- package/lib/commonjs/specs/NitroSpeech.nitro.js.map +0 -1
- package/lib/module/index.js +0 -138
- package/lib/module/index.js.map +0 -1
- package/lib/module/package.json +0 -1
- package/lib/module/specs/NitroSpeech.nitro.js +0 -4
- package/lib/module/specs/NitroSpeech.nitro.js.map +0 -1
- package/lib/tsconfig.tsbuildinfo +0 -1
- package/lib/typescript/index.d.ts +0 -50
- package/lib/typescript/index.d.ts.map +0 -1
- package/lib/typescript/specs/NitroSpeech.nitro.d.ts +0 -162
- package/lib/typescript/specs/NitroSpeech.nitro.d.ts.map +0 -1
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/SpeechToTextParams.kt +0 -68
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
import Foundation
|
|
2
|
+
import NitroModules
|
|
3
|
+
import Speech
|
|
4
|
+
|
|
5
|
+
enum RecognizerBackend {
|
|
6
|
+
case speechTranscriber
|
|
7
|
+
case dictationTranscriber
|
|
8
|
+
case sfSpeech
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
final class Coordinator {
|
|
12
|
+
weak var recognizerDelegate: RecognizerDelegate?
|
|
13
|
+
private var localeManager: LocaleManager?
|
|
14
|
+
private var candidates: [RecognizerBackend] = []
|
|
15
|
+
private var localeTask: Task<Void, Never>?
|
|
16
|
+
|
|
17
|
+
init() {
|
|
18
|
+
self.localeTask = Task {
|
|
19
|
+
self.localeManager = await LocaleManager()
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
func initialize() async {
|
|
24
|
+
let params = self.recognizerDelegate?.config
|
|
25
|
+
Log.log("[Coordinator] LocaleManager - init (\(params?.locale))")
|
|
26
|
+
if self.localeManager == nil {
|
|
27
|
+
self.localeTask?.cancel()
|
|
28
|
+
self.localeTask = nil
|
|
29
|
+
self.localeManager = await LocaleManager()
|
|
30
|
+
}
|
|
31
|
+
guard let localeManager else { return }
|
|
32
|
+
await localeManager.ensureLocale(localeString: params?.locale)
|
|
33
|
+
self.candidates = []
|
|
34
|
+
guard #available(iOS 26.0, *) else {
|
|
35
|
+
if localeManager.SFLocale != nil {
|
|
36
|
+
self.candidates = [.sfSpeech]
|
|
37
|
+
}
|
|
38
|
+
return
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
if params?.iosPreset == IosPreset.shortform
|
|
42
|
+
|| params?.iosAddPunctuation == false
|
|
43
|
+
|| params?.iosAtypicalSpeech == true {
|
|
44
|
+
// DictationTranscriber priority
|
|
45
|
+
if localeManager.dictationLocale != nil {
|
|
46
|
+
self.candidates.append(.dictationTranscriber)
|
|
47
|
+
}
|
|
48
|
+
if localeManager.speechLocale != nil {
|
|
49
|
+
self.candidates.append(.speechTranscriber)
|
|
50
|
+
}
|
|
51
|
+
} else {
|
|
52
|
+
// SpeechTranscriber priority
|
|
53
|
+
if localeManager.speechLocale != nil {
|
|
54
|
+
self.candidates.append(.speechTranscriber)
|
|
55
|
+
}
|
|
56
|
+
if localeManager.dictationLocale != nil {
|
|
57
|
+
self.candidates.append(.dictationTranscriber)
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
// Add SF Engine at the end
|
|
61
|
+
if localeManager.SFLocale != nil {
|
|
62
|
+
self.candidates.append(.sfSpeech)
|
|
63
|
+
}
|
|
64
|
+
Log.log("[Coordinator] candidates: \(self.candidates)")
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
func getEngine() -> RecognizerEngine? {
|
|
68
|
+
Log.log("[Coordinator] getEngine")
|
|
69
|
+
guard let recognizerDelegate else { return nil }
|
|
70
|
+
guard let localeManager else { return nil }
|
|
71
|
+
guard let backend = candidates.first else { return nil }
|
|
72
|
+
Log.log("[Coordinator] backend: \(backend)")
|
|
73
|
+
if backend == .sfSpeech, let locale = localeManager.SFLocale {
|
|
74
|
+
Log.log("[Coordinator] SFSpeechEngine Activated")
|
|
75
|
+
return SFSpeechEngine(locale: locale, delegate: recognizerDelegate)
|
|
76
|
+
}
|
|
77
|
+
if #available(iOS 26.0, *) {
|
|
78
|
+
if backend == .speechTranscriber, let locale = localeManager.speechLocale {
|
|
79
|
+
Log.log("[Coordinator] SpeechTranscriber Activated")
|
|
80
|
+
return AnalyzerEngine(
|
|
81
|
+
backend: .speechTranscriber,
|
|
82
|
+
locale: locale,
|
|
83
|
+
delegate: recognizerDelegate
|
|
84
|
+
)
|
|
85
|
+
}
|
|
86
|
+
if backend == .dictationTranscriber, let locale = localeManager.dictationLocale {
|
|
87
|
+
Log.log("[Coordinator] DictationTranscriber Activated")
|
|
88
|
+
return AnalyzerEngine(
|
|
89
|
+
backend: .dictationTranscriber,
|
|
90
|
+
locale: locale,
|
|
91
|
+
delegate: recognizerDelegate
|
|
92
|
+
)
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
return nil
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
func reportEngineFailure() {
|
|
99
|
+
self.candidates = Array(self.candidates.dropFirst())
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
func getSupportedLocales() -> [String] {
|
|
103
|
+
return localeManager?.supportedLocales ?? []
|
|
104
|
+
}
|
|
105
|
+
}
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
import Foundation
|
|
2
|
+
import Speech
|
|
3
|
+
import AVFoundation
|
|
4
|
+
|
|
5
|
+
@available(iOS 26.0, *)
|
|
6
|
+
final class AnalyzerEngine: RecognizerEngine {
|
|
7
|
+
private var inputSequence: AsyncStream<AnalyzerInput>?
|
|
8
|
+
private var inputBuilder: AsyncStream<AnalyzerInput>.Continuation?
|
|
9
|
+
private var outputContinuation: AsyncStream<AVAudioPCMBuffer>.Continuation?
|
|
10
|
+
private var analyzer: SpeechAnalyzer?
|
|
11
|
+
private let transcriber: TranscriberRuntime
|
|
12
|
+
|
|
13
|
+
private var audioProducerTask: Task<Void, Never>?
|
|
14
|
+
private var recognizerTask: Task<(), Error>?
|
|
15
|
+
private var lastBatchStartTime: Float64? = nil
|
|
16
|
+
private var resultBatches: [String] = []
|
|
17
|
+
|
|
18
|
+
init(backend: RecognizerBackend, locale: Locale, delegate: RecognizerDelegate) {
|
|
19
|
+
if backend == .speechTranscriber {
|
|
20
|
+
transcriber = SpeechRuntime(with: locale)
|
|
21
|
+
} else {
|
|
22
|
+
transcriber = DictationRuntime(with: locale)
|
|
23
|
+
}
|
|
24
|
+
super.init(locale: locale, delegate: delegate)
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
override func stop() {
|
|
28
|
+
super.stop()
|
|
29
|
+
inputBuilder?.finish()
|
|
30
|
+
|
|
31
|
+
Task { [weak self] in
|
|
32
|
+
guard let self = self else { return }
|
|
33
|
+
|
|
34
|
+
do {
|
|
35
|
+
try await self.analyzer?.finalizeAndFinishThroughEndOfInput()
|
|
36
|
+
} catch {
|
|
37
|
+
self.reportFailure(
|
|
38
|
+
from: "stop.finalizeAndFinishThroughEndOfInput",
|
|
39
|
+
message: "Failed to finalize the end of input",
|
|
40
|
+
type: .onSession
|
|
41
|
+
)
|
|
42
|
+
await self.analyzer?.cancelAndFinishNow()
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
self.cleanup(from: "stopListening")
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
override func prewarm(for type: FailureType) async {
|
|
50
|
+
await super.prewarm(for: type)
|
|
51
|
+
do {
|
|
52
|
+
// Create transcriber and install assets
|
|
53
|
+
try await transcriber.create(config: self.recognizerDelegate?.config)
|
|
54
|
+
}
|
|
55
|
+
catch {
|
|
56
|
+
self.reportFailure(
|
|
57
|
+
from: "prewarm.assets",
|
|
58
|
+
message: "Failed to create transcriber",
|
|
59
|
+
type: type
|
|
60
|
+
)
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
override func startSession() async {
|
|
65
|
+
await super.startSession()
|
|
66
|
+
|
|
67
|
+
// Prepares transcriber and handles errors.
|
|
68
|
+
// On failure, reportFailure triggers cleanup + engine reselection.
|
|
69
|
+
await prewarm(for: .start)
|
|
70
|
+
|
|
71
|
+
// 3. Input sequence
|
|
72
|
+
(inputSequence, inputBuilder) = AsyncStream.makeStream(of: AnalyzerInput.self)
|
|
73
|
+
|
|
74
|
+
let modules = transcriber.getModules()
|
|
75
|
+
// 4. Analyzer
|
|
76
|
+
guard let audioFormat = await SpeechAnalyzer.bestAvailableAudioFormat(
|
|
77
|
+
compatibleWith: modules
|
|
78
|
+
) else {
|
|
79
|
+
self.reportFailure(
|
|
80
|
+
from: "startRecognition.SpeechAnalyzer.bestAvailableAudioFormat",
|
|
81
|
+
message: "Failed to find SpeechAnalyzer audio format",
|
|
82
|
+
type: .start
|
|
83
|
+
)
|
|
84
|
+
return
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
analyzer = SpeechAnalyzer(modules: modules)
|
|
88
|
+
|
|
89
|
+
// 5. Supply audio
|
|
90
|
+
audioProducerTask = Task {
|
|
91
|
+
self.startAudioEngine(
|
|
92
|
+
onBuffer: { [weak self] buffer in
|
|
93
|
+
self?.outputContinuation?.yield(buffer)
|
|
94
|
+
}
|
|
95
|
+
)
|
|
96
|
+
guard let hardwareFormat else { return }
|
|
97
|
+
let stream = AsyncStream(
|
|
98
|
+
AVAudioPCMBuffer.self,
|
|
99
|
+
bufferingPolicy: .unbounded
|
|
100
|
+
) { continuation in
|
|
101
|
+
outputContinuation = continuation
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
let needsConversion =
|
|
105
|
+
hardwareFormat.commonFormat != audioFormat.commonFormat ||
|
|
106
|
+
hardwareFormat.sampleRate != audioFormat.sampleRate ||
|
|
107
|
+
hardwareFormat.channelCount != audioFormat.channelCount
|
|
108
|
+
do {
|
|
109
|
+
guard let converter = AVAudioConverter(
|
|
110
|
+
from: hardwareFormat,
|
|
111
|
+
to: audioFormat
|
|
112
|
+
) else {
|
|
113
|
+
throw NSError()
|
|
114
|
+
}
|
|
115
|
+
for await pcmBuffer in stream {
|
|
116
|
+
if Task.isCancelled { break }
|
|
117
|
+
|
|
118
|
+
let bufferForAnalyzer: AVAudioPCMBuffer
|
|
119
|
+
if needsConversion {
|
|
120
|
+
// Skip analyzing for empty buffers and
|
|
121
|
+
// Throw error if buffers are inconvertable
|
|
122
|
+
guard let convertedBuffer = try AudioBufferConverter.convertBuffer(
|
|
123
|
+
converter: converter,
|
|
124
|
+
audioFormat: audioFormat,
|
|
125
|
+
pcmBuffer: pcmBuffer
|
|
126
|
+
) else {
|
|
127
|
+
continue
|
|
128
|
+
}
|
|
129
|
+
bufferForAnalyzer = convertedBuffer
|
|
130
|
+
} else {
|
|
131
|
+
bufferForAnalyzer = pcmBuffer
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
let input = AnalyzerInput(buffer: bufferForAnalyzer)
|
|
135
|
+
inputBuilder?.yield(input)
|
|
136
|
+
}
|
|
137
|
+
} catch {
|
|
138
|
+
if Task.isCancelled || self.isStopping {
|
|
139
|
+
return
|
|
140
|
+
}
|
|
141
|
+
self.reportFailure(
|
|
142
|
+
from: "startRecognition.audioProducerTask",
|
|
143
|
+
message: "Failed to convert audio format",
|
|
144
|
+
type: .start
|
|
145
|
+
)
|
|
146
|
+
return
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// 7. Handle the results
|
|
151
|
+
recognizerTask = Task {
|
|
152
|
+
do {
|
|
153
|
+
try await transcriber.handleResults(
|
|
154
|
+
onResult: { [weak self] result in
|
|
155
|
+
guard let self else { return }
|
|
156
|
+
self.handleBatch(
|
|
157
|
+
attrString: result.text,
|
|
158
|
+
rangeStart: result.rangeStart,
|
|
159
|
+
isFinal: result.isFinal
|
|
160
|
+
)
|
|
161
|
+
}
|
|
162
|
+
)
|
|
163
|
+
} catch {
|
|
164
|
+
if self.isStopping || error is CancellationError {
|
|
165
|
+
return
|
|
166
|
+
}
|
|
167
|
+
self.reportFailure(
|
|
168
|
+
from: "startRecognition.recognizerTask",
|
|
169
|
+
message: "Failed to retrieve transcriber result",
|
|
170
|
+
type: .onSession
|
|
171
|
+
)
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
do {
|
|
176
|
+
if let inputSequence, let analyzer {
|
|
177
|
+
if let contextualStrings = self.recognizerDelegate?.config?.contextualStrings {
|
|
178
|
+
let context = AnalysisContext()
|
|
179
|
+
context.contextualStrings = [
|
|
180
|
+
AnalysisContext.ContextualStringsTag.general: contextualStrings
|
|
181
|
+
]
|
|
182
|
+
try await analyzer.setContext(context)
|
|
183
|
+
}
|
|
184
|
+
try await analyzer.start(inputSequence: inputSequence)
|
|
185
|
+
}
|
|
186
|
+
} catch {
|
|
187
|
+
self.reportFailure(
|
|
188
|
+
from: "startRecognition.analyzerStart",
|
|
189
|
+
message: "Failed to start analyze input sequence",
|
|
190
|
+
type: .start
|
|
191
|
+
)
|
|
192
|
+
return
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
self.sendFeedbackOnStart()
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
override func cleanup(from: String) {
|
|
199
|
+
super.cleanup(from: "overridden.\(from)")
|
|
200
|
+
|
|
201
|
+
inputSequence = nil
|
|
202
|
+
inputBuilder = nil
|
|
203
|
+
outputContinuation?.finish()
|
|
204
|
+
outputContinuation = nil
|
|
205
|
+
analyzer = nil
|
|
206
|
+
transcriber.clean()
|
|
207
|
+
audioProducerTask?.cancel()
|
|
208
|
+
audioProducerTask = nil
|
|
209
|
+
recognizerTask?.cancel()
|
|
210
|
+
recognizerTask = nil
|
|
211
|
+
lastBatchStartTime = nil
|
|
212
|
+
resultBatches = []
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
private func handleBatch(attrString: AttributedString, rangeStart: CMTime, isFinal: Bool) {
|
|
216
|
+
var newBatch = String(attrString.characters)
|
|
217
|
+
// Ignore all batches without A-z0-9
|
|
218
|
+
if !newBatch.contains(/\w+/) {
|
|
219
|
+
return
|
|
220
|
+
}
|
|
221
|
+
// Track only when transcription is coming
|
|
222
|
+
self.trackPartialActivity()
|
|
223
|
+
|
|
224
|
+
let disableRepeatingFilter = self.recognizerDelegate?.config?.disableRepeatingFilter ?? false
|
|
225
|
+
if !disableRepeatingFilter {
|
|
226
|
+
newBatch = Utils.repeatingFilter(newBatch)
|
|
227
|
+
}
|
|
228
|
+
Log.log("[1] lastBatch: \(self.resultBatches.last ?? "") | newBatch: \(newBatch)")
|
|
229
|
+
if self.resultBatches.isEmpty {
|
|
230
|
+
self.resultBatches.append(newBatch)
|
|
231
|
+
} else if CMTimeGetSeconds(rangeStart) == self.lastBatchStartTime || isFinal {
|
|
232
|
+
Log.log("[2] replace, isFinal: \(isFinal)")
|
|
233
|
+
self.resultBatches[self.resultBatches.count - 1] = newBatch
|
|
234
|
+
} else {
|
|
235
|
+
Log.log("[2] add new batch")
|
|
236
|
+
self.resultBatches.append(newBatch)
|
|
237
|
+
}
|
|
238
|
+
self.lastBatchStartTime = CMTimeGetSeconds(rangeStart)
|
|
239
|
+
self.recognizerDelegate?.result(batches: self.resultBatches)
|
|
240
|
+
}
|
|
241
|
+
}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import Foundation
|
|
2
|
+
import Speech
|
|
3
|
+
|
|
4
|
+
@available(iOS 26.0, *)
|
|
5
|
+
final class DictationRuntime: TranscriberRuntime {
|
|
6
|
+
let locale: Locale
|
|
7
|
+
private var transcriber: DictationTranscriber?
|
|
8
|
+
|
|
9
|
+
init(with locale: Locale) {
|
|
10
|
+
self.locale = locale
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
func create(config: SpeechRecognitionConfig?) async throws {
|
|
14
|
+
var dictationTranscriptionOptions: Set<DictationTranscriber.TranscriptionOption> = [
|
|
15
|
+
.punctuation
|
|
16
|
+
]
|
|
17
|
+
if config?.maskOffensiveWords == true {
|
|
18
|
+
dictationTranscriptionOptions.insert(.etiquetteReplacements)
|
|
19
|
+
}
|
|
20
|
+
if config?.iosAddPunctuation == false
|
|
21
|
+
|| config?.iosPreset == IosPreset.shortform {
|
|
22
|
+
dictationTranscriptionOptions.remove(.punctuation)
|
|
23
|
+
}
|
|
24
|
+
var contentHints: Set<DictationTranscriber.ContentHint> = [
|
|
25
|
+
.shortForm,
|
|
26
|
+
.farField,
|
|
27
|
+
]
|
|
28
|
+
if config?.iosAtypicalSpeech == true {
|
|
29
|
+
contentHints.insert(.atypicalSpeech)
|
|
30
|
+
}
|
|
31
|
+
transcriber = DictationTranscriber(
|
|
32
|
+
locale: locale,
|
|
33
|
+
contentHints: contentHints,
|
|
34
|
+
transcriptionOptions: dictationTranscriptionOptions,
|
|
35
|
+
reportingOptions: [.frequentFinalization, .volatileResults],
|
|
36
|
+
attributeOptions: [.audioTimeRange]
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
if let transcriber, let installationRequest = try await AssetInventory.assetInstallationRequest(supporting: [transcriber]) {
|
|
40
|
+
try await installationRequest.downloadAndInstall()
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
func getModules() -> [any SpeechModule] {
|
|
45
|
+
guard let transcriber else { return [] }
|
|
46
|
+
return [transcriber]
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
func handleResults(
|
|
50
|
+
onResult: @escaping (TranscriberResult) -> Void
|
|
51
|
+
) async throws {
|
|
52
|
+
if let transcriber {
|
|
53
|
+
for try await result in transcriber.results {
|
|
54
|
+
onResult(
|
|
55
|
+
TranscriberResult(
|
|
56
|
+
text: result.text,
|
|
57
|
+
rangeStart: result.range.start,
|
|
58
|
+
isFinal: result.isFinal)
|
|
59
|
+
)
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
func clean() {
|
|
65
|
+
transcriber = nil
|
|
66
|
+
}
|
|
67
|
+
}
|