@gmessier/nitro-speech 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +165 -148
- package/android/build.gradle +0 -1
- package/android/src/main/cpp/cpp-adapter.cpp +5 -1
- package/android/src/main/java/com/margelo/nitro/nitrospeech/HybridNitroSpeech.kt +2 -0
- package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/AutoStopper.kt +80 -16
- package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/HybridRecognizer.kt +93 -20
- package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/RecognitionListenerSession.kt +27 -15
- package/ios/{BufferUtil.swift → Audio/AudioBufferConverter.swift} +3 -34
- package/ios/Audio/AudioLevelTracker.swift +66 -0
- package/ios/Coordinator.swift +105 -0
- package/ios/Engines/AnalyzerEngine.swift +241 -0
- package/ios/Engines/DictationRuntime.swift +67 -0
- package/ios/Engines/RecognizerEngine.swift +312 -0
- package/ios/Engines/SFSpeechEngine.swift +119 -0
- package/ios/Engines/SpeechRuntime.swift +58 -0
- package/ios/Engines/TranscriberRuntimeProtocol.swift +21 -0
- package/ios/HybridNitroSpeech.swift +1 -10
- package/ios/HybridRecognizer.swift +135 -192
- package/ios/LocaleManager.swift +73 -0
- package/ios/{AppStateObserver.swift → Shared/AppStateObserver.swift} +1 -2
- package/ios/Shared/AutoStopper.swift +147 -0
- package/ios/Shared/HapticImpact.swift +24 -0
- package/ios/Shared/Log.swift +41 -0
- package/ios/Shared/Permissions.swift +59 -0
- package/ios/Shared/Utils.swift +58 -0
- package/lib/NitroSpeech.d.ts +2 -0
- package/lib/NitroSpeech.js +2 -0
- package/lib/Recognizer/RecognizerRef.d.ts +5 -0
- package/lib/Recognizer/RecognizerRef.js +13 -0
- package/lib/Recognizer/SpeechRecognizer.d.ts +8 -0
- package/lib/Recognizer/SpeechRecognizer.js +9 -0
- package/lib/Recognizer/methods.d.ts +8 -0
- package/lib/Recognizer/methods.js +29 -0
- package/lib/Recognizer/types.d.ts +6 -0
- package/lib/Recognizer/types.js +1 -0
- package/lib/Recognizer/useRecognizer.d.ts +16 -0
- package/lib/Recognizer/useRecognizer.js +71 -0
- package/lib/Recognizer/useVoiceInputVolume.d.ts +25 -0
- package/lib/Recognizer/useVoiceInputVolume.js +52 -0
- package/lib/index.d.ts +6 -0
- package/lib/index.js +6 -0
- package/lib/specs/NitroSpeech.nitro.d.ts +8 -0
- package/lib/specs/NitroSpeech.nitro.js +1 -0
- package/lib/specs/Recognizer.nitro.d.ts +95 -0
- package/lib/specs/Recognizer.nitro.js +1 -0
- package/lib/specs/SpeechRecognitionConfig.d.ts +162 -0
- package/lib/specs/SpeechRecognitionConfig.js +1 -0
- package/lib/specs/VolumeChangeEvent.d.ts +31 -0
- package/lib/specs/VolumeChangeEvent.js +1 -0
- package/nitro.json +2 -6
- package/nitrogen/generated/android/NitroSpeech+autolinking.cmake +2 -2
- package/nitrogen/generated/android/NitroSpeechOnLoad.cpp +5 -3
- package/nitrogen/generated/android/c++/JFunc_void_VolumeChangeEvent.hpp +78 -0
- package/nitrogen/generated/android/c++/JFunc_void_std__vector_std__string_.hpp +14 -14
- package/nitrogen/generated/android/c++/JHybridRecognizerSpec.cpp +68 -19
- package/nitrogen/generated/android/c++/JHybridRecognizerSpec.hpp +7 -4
- package/nitrogen/generated/android/c++/JIosPreset.hpp +58 -0
- package/nitrogen/generated/android/c++/JMutableSpeechRecognitionConfig.hpp +79 -0
- package/nitrogen/generated/android/c++/{JSpeechToTextParams.hpp → JSpeechRecognitionConfig.hpp} +48 -30
- package/nitrogen/generated/android/c++/JVolumeChangeEvent.hpp +65 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/Func_void_VolumeChangeEvent.kt +80 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/HybridRecognizerSpec.kt +18 -5
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/IosPreset.kt +23 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/MutableSpeechRecognitionConfig.kt +76 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/SpeechRecognitionConfig.kt +121 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/VolumeChangeEvent.kt +61 -0
- package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Bridge.cpp +46 -30
- package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Bridge.hpp +203 -70
- package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Umbrella.hpp +13 -3
- package/nitrogen/generated/ios/NitroSpeechAutolinking.swift +2 -2
- package/nitrogen/generated/ios/c++/HybridRecognizerSpecSwift.hpp +41 -9
- package/nitrogen/generated/ios/swift/Func_void_VolumeChangeEvent.swift +46 -0
- package/nitrogen/generated/ios/swift/Func_void_std__exception_ptr.swift +46 -0
- package/nitrogen/generated/ios/swift/HybridRecognizerSpec.swift +6 -3
- package/nitrogen/generated/ios/swift/HybridRecognizerSpec_cxx.swift +66 -18
- package/nitrogen/generated/ios/swift/IosPreset.swift +40 -0
- package/nitrogen/generated/ios/swift/MutableSpeechRecognitionConfig.swift +118 -0
- package/nitrogen/generated/ios/swift/{SpeechToTextParams.swift → SpeechRecognitionConfig.swift} +108 -43
- package/nitrogen/generated/ios/swift/VolumeChangeEvent.swift +52 -0
- package/nitrogen/generated/shared/c++/HybridRecognizerSpec.cpp +4 -1
- package/nitrogen/generated/shared/c++/HybridRecognizerSpec.hpp +17 -7
- package/nitrogen/generated/shared/c++/IosPreset.hpp +76 -0
- package/nitrogen/generated/shared/c++/MutableSpeechRecognitionConfig.hpp +105 -0
- package/nitrogen/generated/shared/c++/{SpeechToTextParams.hpp → SpeechRecognitionConfig.hpp} +39 -20
- package/nitrogen/generated/shared/c++/VolumeChangeEvent.hpp +91 -0
- package/package.json +15 -16
- package/src/NitroSpeech.ts +5 -0
- package/src/Recognizer/RecognizerRef.ts +23 -0
- package/src/Recognizer/SpeechRecognizer.ts +10 -0
- package/src/Recognizer/methods.ts +40 -0
- package/src/Recognizer/types.ts +33 -0
- package/src/Recognizer/useRecognizer.ts +85 -0
- package/src/Recognizer/useVoiceInputVolume.ts +65 -0
- package/src/index.ts +6 -182
- package/src/specs/NitroSpeech.nitro.ts +2 -163
- package/src/specs/Recognizer.nitro.ts +110 -0
- package/src/specs/SpeechRecognitionConfig.ts +167 -0
- package/src/specs/VolumeChangeEvent.ts +31 -0
- package/android/proguard-rules.pro +0 -1
- package/ios/AnylyzerTranscriber.swift +0 -331
- package/ios/AutoStopper.swift +0 -69
- package/ios/HapticImpact.swift +0 -32
- package/ios/LegacySpeechRecognizer.swift +0 -161
- package/lib/commonjs/index.js +0 -145
- package/lib/commonjs/index.js.map +0 -1
- package/lib/commonjs/package.json +0 -1
- package/lib/commonjs/specs/NitroSpeech.nitro.js +0 -6
- package/lib/commonjs/specs/NitroSpeech.nitro.js.map +0 -1
- package/lib/module/index.js +0 -138
- package/lib/module/index.js.map +0 -1
- package/lib/module/package.json +0 -1
- package/lib/module/specs/NitroSpeech.nitro.js +0 -4
- package/lib/module/specs/NitroSpeech.nitro.js.map +0 -1
- package/lib/tsconfig.tsbuildinfo +0 -1
- package/lib/typescript/index.d.ts +0 -50
- package/lib/typescript/index.d.ts.map +0 -1
- package/lib/typescript/specs/NitroSpeech.nitro.d.ts +0 -162
- package/lib/typescript/specs/NitroSpeech.nitro.d.ts.map +0 -1
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/SpeechToTextParams.kt +0 -68
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
import Foundation
|
|
2
|
+
import Speech
|
|
3
|
+
import AVFoundation
|
|
4
|
+
|
|
5
|
+
final class SFSpeechEngine: RecognizerEngine {
|
|
6
|
+
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
|
|
7
|
+
private var recognitionTask: SFSpeechRecognitionTask?
|
|
8
|
+
private var speechRecognizer: SFSpeechRecognizer?
|
|
9
|
+
|
|
10
|
+
private let lg = Lg(prefix: "SFSpeechEngine")
|
|
11
|
+
|
|
12
|
+
override func stop() {
|
|
13
|
+
super.stop()
|
|
14
|
+
recognitionRequest?.endAudio()
|
|
15
|
+
recognitionTask?.finish()
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
override func prewarm(for type: FailureType) async {
|
|
19
|
+
speechRecognizer = SFSpeechRecognizer(
|
|
20
|
+
locale: Locale(identifier: self.recognizerDelegate?.config?.locale ?? "en-US")
|
|
21
|
+
)
|
|
22
|
+
if speechRecognizer?.isAvailable != true {
|
|
23
|
+
self.reportFailure(
|
|
24
|
+
from: "prewarm",
|
|
25
|
+
message: "SFSpeechRecognizer is not available",
|
|
26
|
+
type: type
|
|
27
|
+
)
|
|
28
|
+
}
|
|
29
|
+
await super.prewarm(for: type)
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
override func startSession() async {
|
|
33
|
+
await super.startSession()
|
|
34
|
+
lg.log("[startSession.startSession]")
|
|
35
|
+
|
|
36
|
+
await prewarm(for: .start)
|
|
37
|
+
lg.log("[startSession.prewarm]")
|
|
38
|
+
guard let speechRecognizer else { return }
|
|
39
|
+
|
|
40
|
+
recognitionRequest = createRecognitionRequest()
|
|
41
|
+
lg.log("[startSession.createRecognitionRequest]")
|
|
42
|
+
guard let recognitionRequest else { return }
|
|
43
|
+
|
|
44
|
+
recognitionTask = speechRecognizer.recognitionTask(
|
|
45
|
+
with: recognitionRequest
|
|
46
|
+
) { [weak self] result, error in
|
|
47
|
+
guard let self else { return }
|
|
48
|
+
|
|
49
|
+
if let result = result {
|
|
50
|
+
var transcription = result.bestTranscription.formattedString
|
|
51
|
+
if !transcription.isEmpty {
|
|
52
|
+
// Track only when transcription is coming
|
|
53
|
+
self.trackPartialActivity()
|
|
54
|
+
|
|
55
|
+
let disableRepeatingFilter = self.recognizerDelegate?.config?.disableRepeatingFilter ?? false
|
|
56
|
+
if !disableRepeatingFilter {
|
|
57
|
+
transcription = Utils.repeatingFilter(transcription)
|
|
58
|
+
}
|
|
59
|
+
// Legacy transcriber collects everything into one batch
|
|
60
|
+
self.recognizerDelegate?.result(batches: [transcription])
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
if result.isFinal {
|
|
64
|
+
self.cleanup(from: "startRecognition.recognitionTask.final")
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
if let error = error {
|
|
69
|
+
if !self.isStopping {
|
|
70
|
+
self.reportFailure(
|
|
71
|
+
from: "startSession.recognitionTask.error",
|
|
72
|
+
message: "Recognition Error: \(error.localizedDescription)",
|
|
73
|
+
type: .onSession
|
|
74
|
+
)
|
|
75
|
+
} else {
|
|
76
|
+
self.cleanup(from: "startRecognition.recognitionTask.manualStop")
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
lg.log("[startSession.recognitionTask]")
|
|
81
|
+
|
|
82
|
+
self.startAudioEngine(
|
|
83
|
+
onBuffer: { [weak self] buffer in
|
|
84
|
+
self?.recognitionRequest?.append(buffer)
|
|
85
|
+
}
|
|
86
|
+
)
|
|
87
|
+
lg.log("[startSession.startAudioEngine]")
|
|
88
|
+
|
|
89
|
+
self.sendFeedbackOnStart()
|
|
90
|
+
lg.log("[startSession.sendFeedbackOnStart]")
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
override func cleanup(from: String) {
|
|
94
|
+
super.cleanup(from: "overridden.\(from)")
|
|
95
|
+
recognitionRequest = nil
|
|
96
|
+
recognitionTask = nil
|
|
97
|
+
speechRecognizer = nil
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
private func createRecognitionRequest() -> SFSpeechAudioBufferRecognitionRequest {
|
|
101
|
+
let request = SFSpeechAudioBufferRecognitionRequest()
|
|
102
|
+
request.shouldReportPartialResults = true
|
|
103
|
+
|
|
104
|
+
if let contextualStrings = self.recognizerDelegate?.config?.contextualStrings,
|
|
105
|
+
!contextualStrings.isEmpty {
|
|
106
|
+
request.contextualStrings = contextualStrings
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
if #available(iOS 16, *) {
|
|
110
|
+
if self.recognizerDelegate?.config?.iosAddPunctuation == false {
|
|
111
|
+
request.addsPunctuation = false
|
|
112
|
+
} else {
|
|
113
|
+
request.addsPunctuation = true
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
return request
|
|
118
|
+
}
|
|
119
|
+
}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import Foundation
|
|
2
|
+
import Speech
|
|
3
|
+
|
|
4
|
+
@available(iOS 26.0, *)
|
|
5
|
+
final class SpeechRuntime: TranscriberRuntime {
|
|
6
|
+
let locale: Locale
|
|
7
|
+
private var transcriber: SpeechTranscriber?
|
|
8
|
+
|
|
9
|
+
init(with locale: Locale) {
|
|
10
|
+
self.locale = locale
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
func create(config: SpeechRecognitionConfig?) async throws {
|
|
14
|
+
if !SpeechTranscriber.isAvailable {
|
|
15
|
+
throw NSError()
|
|
16
|
+
}
|
|
17
|
+
var speechTranscriptionOptions: Set<SpeechTranscriber.TranscriptionOption> = []
|
|
18
|
+
if config?.maskOffensiveWords == true {
|
|
19
|
+
speechTranscriptionOptions.insert(.etiquetteReplacements)
|
|
20
|
+
}
|
|
21
|
+
transcriber = SpeechTranscriber(
|
|
22
|
+
locale: locale,
|
|
23
|
+
transcriptionOptions: speechTranscriptionOptions,
|
|
24
|
+
reportingOptions: [.volatileResults, .fastResults],
|
|
25
|
+
attributeOptions: [.audioTimeRange]
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
if let transcriber, let installationRequest = try await AssetInventory.assetInstallationRequest(supporting: [transcriber]) {
|
|
31
|
+
try await installationRequest.downloadAndInstall()
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
func getModules() -> [any SpeechModule] {
|
|
36
|
+
guard let transcriber else { return [] }
|
|
37
|
+
return [transcriber]
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
func handleResults(
|
|
41
|
+
onResult: @escaping (TranscriberResult) -> Void
|
|
42
|
+
) async throws {
|
|
43
|
+
if let transcriber {
|
|
44
|
+
for try await result in transcriber.results {
|
|
45
|
+
onResult(
|
|
46
|
+
TranscriberResult(
|
|
47
|
+
text: result.text,
|
|
48
|
+
rangeStart: result.range.start,
|
|
49
|
+
isFinal: result.isFinal)
|
|
50
|
+
)
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
func clean() {
|
|
56
|
+
transcriber = nil
|
|
57
|
+
}
|
|
58
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import Foundation
|
|
2
|
+
import Speech
|
|
3
|
+
|
|
4
|
+
struct TranscriberResult {
|
|
5
|
+
let text: AttributedString
|
|
6
|
+
let rangeStart: CMTime
|
|
7
|
+
let isFinal: Bool
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
@available(iOS 26.0, *)
|
|
11
|
+
protocol TranscriberRuntime {
|
|
12
|
+
var locale: Locale { get }
|
|
13
|
+
|
|
14
|
+
func create(config: SpeechRecognitionConfig?) async throws
|
|
15
|
+
|
|
16
|
+
func getModules() -> [any SpeechModule]
|
|
17
|
+
|
|
18
|
+
func handleResults(onResult: @escaping (TranscriberResult) -> Void) async throws
|
|
19
|
+
|
|
20
|
+
func clean() -> Void
|
|
21
|
+
}
|
|
@@ -2,14 +2,5 @@ import Foundation
|
|
|
2
2
|
import NitroModules
|
|
3
3
|
|
|
4
4
|
class HybridNitroSpeech : HybridNitroSpeechSpec {
|
|
5
|
-
var recognizer:
|
|
6
|
-
|
|
7
|
-
override init() {
|
|
8
|
-
if #available(iOS 26.0, *) {
|
|
9
|
-
recognizer = AnalyzerTranscriber()
|
|
10
|
-
} else {
|
|
11
|
-
recognizer = LegacySpeechRecognizer()
|
|
12
|
-
}
|
|
13
|
-
super.init()
|
|
14
|
-
}
|
|
5
|
+
var recognizer: HybridRecognizerSpec = HybridRecognizer()
|
|
15
6
|
}
|
|
@@ -1,13 +1,8 @@
|
|
|
1
1
|
import Foundation
|
|
2
|
-
import Speech
|
|
3
2
|
import NitroModules
|
|
4
|
-
import os.log
|
|
5
|
-
import AVFoundation
|
|
6
3
|
|
|
7
|
-
class HybridRecognizer: HybridRecognizerSpec
|
|
8
|
-
|
|
9
|
-
internal static let defaultAutoFinishRecognitionMs = 8000.0
|
|
10
|
-
internal static let speechRmsThreshold: Float = 0.005623
|
|
4
|
+
class HybridRecognizer: HybridRecognizerSpec {
|
|
5
|
+
var config: SpeechRecognitionConfig?
|
|
11
6
|
|
|
12
7
|
var onReadyForSpeech: (() -> Void)?
|
|
13
8
|
var onRecordingStopped: (() -> Void)?
|
|
@@ -15,228 +10,176 @@ class HybridRecognizer: HybridRecognizerSpec {
|
|
|
15
10
|
var onAutoFinishProgress: ((Double) -> Void)?
|
|
16
11
|
var onError: ((String) -> Void)?
|
|
17
12
|
var onPermissionDenied: (() -> Void)?
|
|
18
|
-
var onVolumeChange: ((
|
|
13
|
+
var onVolumeChange: ((VolumeChangeEvent) -> Void)?
|
|
19
14
|
|
|
20
|
-
|
|
15
|
+
private let coordinator = Coordinator()
|
|
16
|
+
private var paramsHash: String?
|
|
17
|
+
private var engine: RecognizerEngine?
|
|
21
18
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
internal var isStopping: Bool = false
|
|
26
|
-
internal var config: SpeechToTextParams?
|
|
27
|
-
internal var levelSmoothed: Float = 0
|
|
28
|
-
|
|
29
|
-
func getIsActive() -> Bool {
|
|
30
|
-
return self.isActive
|
|
19
|
+
override init() {
|
|
20
|
+
super.init()
|
|
21
|
+
self.coordinator.recognizerDelegate = self
|
|
31
22
|
}
|
|
32
23
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
self.config = params
|
|
43
|
-
|
|
44
|
-
switch authStatus {
|
|
45
|
-
case .authorized:
|
|
46
|
-
self.requestMicrophonePermission()
|
|
47
|
-
case .denied, .restricted:
|
|
48
|
-
self.onPermissionDenied?()
|
|
49
|
-
case .notDetermined:
|
|
50
|
-
self.onError?("Speech recognition not determined")
|
|
51
|
-
@unknown default:
|
|
52
|
-
self.onError?("Unknown authorization status")
|
|
53
|
-
}
|
|
54
|
-
}
|
|
24
|
+
private let lg = Lg(prefix: "HybridRecognizer")
|
|
25
|
+
|
|
26
|
+
@discardableResult
|
|
27
|
+
func prewarm(defaultParams: SpeechRecognitionConfig?) -> Promise<Void> {
|
|
28
|
+
return Promise.async(.userInitiated) { [weak self] in
|
|
29
|
+
// Ensure correct engine is selected based on params and ios version
|
|
30
|
+
await self?.ensureEngine(params: defaultParams)
|
|
31
|
+
// try to preload assets and check if speech engine is available on OS level
|
|
32
|
+
await self?.engine?.prewarm(for: .prewarm)
|
|
55
33
|
}
|
|
56
34
|
}
|
|
57
|
-
|
|
58
|
-
func
|
|
59
|
-
|
|
35
|
+
|
|
36
|
+
func startListening(params: SpeechRecognitionConfig?) {
|
|
37
|
+
Task {
|
|
38
|
+
// Ensure correct engine is selected based on params and ios version
|
|
39
|
+
await ensureEngine(params: params)
|
|
40
|
+
engine?.start()
|
|
41
|
+
}
|
|
60
42
|
}
|
|
61
43
|
|
|
62
44
|
func stopListening() {
|
|
63
|
-
|
|
64
|
-
isStopping = true
|
|
65
|
-
|
|
66
|
-
self.stopHapticFeedback()
|
|
45
|
+
engine?.stop()
|
|
67
46
|
}
|
|
68
47
|
|
|
69
|
-
|
|
70
|
-
|
|
48
|
+
func resetAutoFinishTime() {
|
|
49
|
+
engine?.updateSession(resetTimer: true)
|
|
71
50
|
}
|
|
72
51
|
|
|
73
52
|
func addAutoFinishTime(additionalTimeMs: Double?) {
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
53
|
+
if let additionalTimeMs {
|
|
54
|
+
engine?.updateSession(addMsToTimer: additionalTimeMs)
|
|
55
|
+
} else {
|
|
56
|
+
// Reset timer to original baseline.
|
|
57
|
+
engine?.updateSession(resetTimer: true)
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
func updateConfig(newConfig: MutableSpeechRecognitionConfig?, resetAutoFinishTime: Bool?) {
|
|
62
|
+
engine?.updateSession(
|
|
63
|
+
newConfig: newConfig,
|
|
64
|
+
resetTimer: resetAutoFinishTime
|
|
79
65
|
)
|
|
80
66
|
}
|
|
67
|
+
|
|
68
|
+
func getIsActive() -> Bool {
|
|
69
|
+
engine?.isActive ?? false
|
|
70
|
+
}
|
|
81
71
|
|
|
82
|
-
func
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
)
|
|
72
|
+
func getSupportedLocalesIOS() -> [String] {
|
|
73
|
+
return self.coordinator.getSupportedLocales()
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
private func ensureEngine(params: SpeechRecognitionConfig?) async {
|
|
77
|
+
// Remember new params
|
|
78
|
+
config = params
|
|
79
|
+
let hash = Utils.hashParams(params)
|
|
80
|
+
if engine != nil && hash == paramsHash {
|
|
81
|
+
lg.log("Reuse Engine")
|
|
82
|
+
// Engine is already correct
|
|
83
|
+
return
|
|
84
|
+
}
|
|
85
|
+
if hash != paramsHash {
|
|
86
|
+
// Initialize when trying to select new engine with new params
|
|
87
|
+
await coordinator.initialize()
|
|
88
|
+
paramsHash = hash
|
|
89
|
+
}
|
|
90
|
+
lg.log("hash: \(hash)")
|
|
91
|
+
// Try to select new engine
|
|
92
|
+
engine = coordinator.getEngine()
|
|
93
|
+
if engine == nil {
|
|
94
|
+
// Only wrong locale can wipe out all candidates
|
|
95
|
+
self.onError?("No recognition engine available for the requested locale")
|
|
96
|
+
return
|
|
92
97
|
}
|
|
93
98
|
}
|
|
99
|
+
}
|
|
94
100
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
101
|
+
protocol RecognizerDelegate: AnyObject {
|
|
102
|
+
var config: SpeechRecognitionConfig? { get }
|
|
103
|
+
func softlyUpdateConfig(newConfig: MutableSpeechRecognitionConfig?)
|
|
104
|
+
func reselectEngine(forPrewarm: Bool)
|
|
105
|
+
func readyForSpeech()
|
|
106
|
+
func recordingStopped()
|
|
107
|
+
func result (batches: [String])
|
|
108
|
+
func autoFinishProgress (timeLeftMs: Double)
|
|
109
|
+
func error (message: String)
|
|
110
|
+
func permissionDenied ()
|
|
111
|
+
func volumeChange (event: VolumeChangeEvent)
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
extension HybridRecognizer: RecognizerDelegate {
|
|
115
|
+
func softlyUpdateConfig(newConfig: MutableSpeechRecognitionConfig?) {
|
|
116
|
+
if let newConfig {
|
|
117
|
+
config = SpeechRecognitionConfig(
|
|
118
|
+
locale: config?.locale,
|
|
119
|
+
contextualStrings: config?.contextualStrings,
|
|
120
|
+
maskOffensiveWords: config?.maskOffensiveWords,
|
|
121
|
+
autoFinishRecognitionMs: newConfig.autoFinishRecognitionMs ?? config?.autoFinishRecognitionMs,
|
|
122
|
+
autoFinishProgressIntervalMs: newConfig.autoFinishProgressIntervalMs ?? config?.autoFinishProgressIntervalMs,
|
|
123
|
+
resetAutoFinishVoiceSensitivity: newConfig.resetAutoFinishVoiceSensitivity ?? config?.resetAutoFinishVoiceSensitivity,
|
|
124
|
+
disableRepeatingFilter: newConfig.disableRepeatingFilter ?? config?.disableRepeatingFilter,
|
|
125
|
+
startHapticFeedbackStyle: newConfig.startHapticFeedbackStyle ?? config?.startHapticFeedbackStyle,
|
|
126
|
+
stopHapticFeedbackStyle: newConfig.stopHapticFeedbackStyle ?? config?.stopHapticFeedbackStyle,
|
|
127
|
+
androidFormattingPreferQuality: config?.androidFormattingPreferQuality,
|
|
128
|
+
androidUseWebSearchModel: config?.androidUseWebSearchModel,
|
|
129
|
+
androidDisableBatchHandling: config?.androidDisableBatchHandling,
|
|
130
|
+
iosAddPunctuation: config?.iosAddPunctuation,
|
|
131
|
+
iosPreset: config?.iosPreset,
|
|
132
|
+
iosAtypicalSpeech: config?.iosAtypicalSpeech
|
|
133
|
+
)
|
|
106
134
|
}
|
|
107
|
-
|
|
108
|
-
return true
|
|
109
135
|
}
|
|
110
136
|
|
|
111
|
-
|
|
112
|
-
self.
|
|
113
|
-
|
|
114
|
-
from: "startListening",
|
|
115
|
-
addMsToThreshold: nil
|
|
116
|
-
)
|
|
117
|
-
onReadyForSpeech?()
|
|
118
|
-
onResult?([])
|
|
137
|
+
func readyForSpeech() {
|
|
138
|
+
self.lg.log("[HR -> onReadyForSpeech]")
|
|
139
|
+
self.onReadyForSpeech?()
|
|
119
140
|
}
|
|
120
141
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
internal func cleanup(from: String) {
|
|
125
|
-
logger.info("cleanup called from: \(from)")
|
|
126
|
-
deinitAutoStop()
|
|
127
|
-
stopMonitorAppState()
|
|
128
|
-
stopAudioSession()
|
|
129
|
-
stopAudioEngine()
|
|
130
|
-
levelSmoothed = 0
|
|
131
|
-
isActive = false
|
|
132
|
-
isStopping = false
|
|
133
|
-
onVolumeChange?(0)
|
|
142
|
+
func recordingStopped() {
|
|
143
|
+
self.lg.log("[onRecordingStopped]")
|
|
144
|
+
self.onRecordingStopped?()
|
|
134
145
|
}
|
|
135
146
|
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
}
|
|
140
|
-
audioEngine?.inputNode.removeTap(onBus: 0)
|
|
141
|
-
audioEngine = nil
|
|
147
|
+
func result(batches: [String]) {
|
|
148
|
+
self.lg.log("[onResult] \(batches)")
|
|
149
|
+
self.onResult?(batches)
|
|
142
150
|
}
|
|
143
151
|
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
self.handleInternalStopTrigger()
|
|
148
|
-
}
|
|
149
|
-
}
|
|
150
|
-
internal func stopMonitorAppState () {
|
|
151
|
-
appStateObserver?.stop()
|
|
152
|
-
appStateObserver = nil
|
|
152
|
+
func autoFinishProgress(timeLeftMs: Double) {
|
|
153
|
+
self.lg.log("[onAutoFinishProgress] \(timeLeftMs)ms")
|
|
154
|
+
self.onAutoFinishProgress?(timeLeftMs)
|
|
153
155
|
}
|
|
154
156
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
onProgress: { [weak self] timeLeftMs in
|
|
159
|
-
self?.onAutoFinishProgress?(timeLeftMs)
|
|
160
|
-
},
|
|
161
|
-
onTimeout: { [weak self] in
|
|
162
|
-
self?.handleInternalStopTrigger()
|
|
163
|
-
}
|
|
164
|
-
)
|
|
165
|
-
}
|
|
166
|
-
internal func deinitAutoStop () {
|
|
167
|
-
autoStopper?.stop()
|
|
168
|
-
autoStopper = nil
|
|
169
|
-
}
|
|
170
|
-
|
|
171
|
-
internal func startAudioSession() -> Bool {
|
|
172
|
-
do {
|
|
173
|
-
let audioSession = AVAudioSession.sharedInstance()
|
|
174
|
-
try audioSession.setCategory(.record, mode: .measurement, options: .duckOthers)
|
|
175
|
-
// Without this, iOS may suppress haptics while recording.
|
|
176
|
-
try audioSession.setAllowHapticsAndSystemSoundsDuringRecording(true)
|
|
177
|
-
try audioSession.setActive(true, options: .notifyOthersOnDeactivation)
|
|
178
|
-
return true
|
|
179
|
-
} catch {
|
|
180
|
-
onError?("Failed to activate audio session: \(error.localizedDescription)")
|
|
181
|
-
return false
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
internal func stopAudioSession () {
|
|
185
|
-
do {
|
|
186
|
-
try AVAudioSession.sharedInstance().setActive(false)
|
|
187
|
-
} catch {
|
|
188
|
-
logger.info("Failed to deactivate audio session: \(error.localizedDescription)")
|
|
189
|
-
return
|
|
190
|
-
}
|
|
157
|
+
func error(message: String) {
|
|
158
|
+
self.lg.log("[onError]")
|
|
159
|
+
self.onError?(message)
|
|
191
160
|
}
|
|
192
161
|
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
} else {
|
|
197
|
-
HapticImpact(style: .medium).trigger()
|
|
198
|
-
}
|
|
199
|
-
}
|
|
200
|
-
internal func stopHapticFeedback () {
|
|
201
|
-
if let hapticStyle = config?.stopHapticFeedbackStyle {
|
|
202
|
-
HapticImpact(style: hapticStyle).trigger()
|
|
203
|
-
} else {
|
|
204
|
-
HapticImpact(style: .medium).trigger()
|
|
205
|
-
}
|
|
162
|
+
func permissionDenied() {
|
|
163
|
+
self.lg.log("[onPermissionDenied]")
|
|
164
|
+
self.onPermissionDenied?()
|
|
206
165
|
}
|
|
207
166
|
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
from: "partial results",
|
|
212
|
-
addMsToThreshold: nil
|
|
213
|
-
)
|
|
214
|
-
}
|
|
167
|
+
func volumeChange(event: VolumeChangeEvent) {
|
|
168
|
+
// self.lg.log("[onVolumeChange] \(event.rawVolume)")
|
|
169
|
+
self.onVolumeChange?(event)
|
|
215
170
|
}
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
//
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
171
|
+
|
|
172
|
+
func reselectEngine(forPrewarm: Bool) {
|
|
173
|
+
// Remove failed engine from candidates
|
|
174
|
+
coordinator.reportEngineFailure()
|
|
175
|
+
// Reset active engine
|
|
176
|
+
engine = nil
|
|
177
|
+
// Try to prewarm with another candidate
|
|
178
|
+
if forPrewarm {
|
|
179
|
+
self.prewarm(defaultParams: config)
|
|
225
180
|
} else {
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
for i in subStrings.indices {
|
|
229
|
-
if i == 0 { continue }
|
|
230
|
-
// Always add number-contained strings
|
|
231
|
-
if #available(iOS 16.0, *), subStrings[i].contains(/\d+/) {
|
|
232
|
-
joiner += " \(subStrings[i])"
|
|
233
|
-
continue
|
|
234
|
-
}
|
|
235
|
-
|
|
236
|
-
// Skip consecutive duplicate strings
|
|
237
|
-
if subStrings[i] == subStrings[i-1] { continue }
|
|
238
|
-
joiner += " \(subStrings[i])"
|
|
181
|
+
// Try to start with another candidate
|
|
182
|
+
self.startListening(params: config)
|
|
239
183
|
}
|
|
240
|
-
return joiner
|
|
241
184
|
}
|
|
242
185
|
}
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import Foundation
|
|
2
|
+
import Speech
|
|
3
|
+
|
|
4
|
+
final class LocaleManager {
|
|
5
|
+
private let sfSpeechLocales = SFSpeechRecognizer.supportedLocales().map { $0.identifier }
|
|
6
|
+
private var speechLocales: [String]
|
|
7
|
+
private var dictationLocales: [String]
|
|
8
|
+
var supportedLocales: [String]
|
|
9
|
+
var SFLocale: Locale?
|
|
10
|
+
var speechLocale: Locale?
|
|
11
|
+
var dictationLocale: Locale?
|
|
12
|
+
|
|
13
|
+
private var equivalentsCountedFor: String?
|
|
14
|
+
|
|
15
|
+
init() async {
|
|
16
|
+
self.speechLocales = []
|
|
17
|
+
self.dictationLocales = []
|
|
18
|
+
self.supportedLocales = sfSpeechLocales
|
|
19
|
+
|
|
20
|
+
if #available(iOS 26.0, *) {
|
|
21
|
+
self.speechLocales = await SpeechTranscriber.supportedLocales.map {
|
|
22
|
+
$0.identifier
|
|
23
|
+
}
|
|
24
|
+
self.dictationLocales = await DictationTranscriber.supportedLocales.map {
|
|
25
|
+
$0.identifier
|
|
26
|
+
}
|
|
27
|
+
Log.log("[Coordinator] sfSpeechLocales: \(self.sfSpeechLocales)")
|
|
28
|
+
Log.log("[Coordinator] speechLocales: \(self.speechLocales)")
|
|
29
|
+
Log.log("[Coordinator] dictationLocales: \(self.dictationLocales)")
|
|
30
|
+
self.supportedLocales = Array(
|
|
31
|
+
Set(sfSpeechLocales)
|
|
32
|
+
.union(Set(speechLocales))
|
|
33
|
+
.union(Set(dictationLocales))
|
|
34
|
+
)
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
func ensureLocale(localeString: String?) async {
|
|
39
|
+
let identifier = localeString ?? "en-US"
|
|
40
|
+
if self.equivalentsCountedFor == identifier {
|
|
41
|
+
// All locales has been counted already, might be nil, but use them
|
|
42
|
+
Log.log("[Coordinator] ensureLocale: \(identifier) -> Already counted ")
|
|
43
|
+
return
|
|
44
|
+
}
|
|
45
|
+
if #available(iOS 26.0, *) {
|
|
46
|
+
let speechEquivalent = await SpeechTranscriber.supportedLocale(
|
|
47
|
+
equivalentTo: Locale(identifier: identifier)
|
|
48
|
+
)?.identifier
|
|
49
|
+
if let speechEquivalent, speechLocales.contains(speechEquivalent) {
|
|
50
|
+
self.speechLocale = Locale(identifier: speechEquivalent)
|
|
51
|
+
} else {
|
|
52
|
+
self.speechLocale = nil
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
let dictationEquivalent = await DictationTranscriber.supportedLocale(
|
|
56
|
+
equivalentTo: Locale(identifier: identifier)
|
|
57
|
+
)?.identifier
|
|
58
|
+
if let dictationEquivalent, self.dictationLocales.contains(dictationEquivalent) {
|
|
59
|
+
self.dictationLocale = Locale(identifier: dictationEquivalent)
|
|
60
|
+
} else {
|
|
61
|
+
self.dictationLocale = nil
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
if sfSpeechLocales.contains(identifier) {
|
|
65
|
+
self.SFLocale = Locale(identifier: identifier)
|
|
66
|
+
} else {
|
|
67
|
+
self.SFLocale = nil
|
|
68
|
+
}
|
|
69
|
+
self.equivalentsCountedFor = identifier
|
|
70
|
+
Log.log("[Coordinator] equivalents: speechLocale: \(self.speechLocale?.identifier), dictationLocale: \(self.dictationLocale?.identifier), SFLocale: \(self.SFLocale?.identifier)")
|
|
71
|
+
Log.log("[Coordinator] ensureLocale: \(identifier) -> New")
|
|
72
|
+
}
|
|
73
|
+
}
|