@iternio/react-native-auto-play 0.4.6 → 0.4.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/java/com/margelo/nitro/swe/iternio/reactnativeautoplay/HybridAutoPlay.kt +0 -89
- package/android/src/main/java/com/margelo/nitro/swe/iternio/reactnativeautoplay/HybridVoice.kt +97 -0
- package/android/src/main/java/com/margelo/nitro/swe/iternio/reactnativeautoplay/VoiceInputManager.kt +286 -20
- package/android/src/main/java/com/margelo/nitro/swe/iternio/reactnativeautoplay/utils/ThreadUtil.kt +6 -13
- package/ios/hybrid/HybridAutoPlay.swift +2 -47
- package/ios/hybrid/HybridVoice.swift +65 -0
- package/ios/utils/VoiceInputManager.swift +144 -40
- package/lib/HybridAutoPlay.d.ts +2 -0
- package/lib/HybridAutoPlay.js +2 -0
- package/lib/hooks/useIsAutoPlayFocused.d.ts +7 -0
- package/lib/hooks/useIsAutoPlayFocused.js +20 -0
- package/lib/hybrid/HybridVoice.d.ts +52 -0
- package/lib/hybrid/HybridVoice.js +52 -0
- package/lib/hybrid.d.ts +2 -0
- package/lib/hybrid.js +2 -0
- package/lib/index.d.ts +3 -1
- package/lib/index.js +2 -1
- package/lib/specs/AutoPlay.nitro.d.ts +0 -29
- package/lib/specs/AutomotivePermissionRequestTemplate.d.ts +11 -0
- package/lib/specs/AutomotivePermissionRequestTemplate.js +1 -0
- package/lib/specs/AutomotivePermissionRequestTemplate.nitro.d.ts +11 -0
- package/lib/specs/AutomotivePermissionRequestTemplate.nitro.js +1 -0
- package/lib/specs/Voice.nitro.d.ts +11 -0
- package/lib/specs/Voice.nitro.js +1 -0
- package/lib/templates/AutomotivePermissionRequestTemplate.d.ts +23 -0
- package/lib/templates/AutomotivePermissionRequestTemplate.js +18 -0
- package/lib/templates/MapTemplate.js +1 -6
- package/lib/types/Glyphmap.d.ts +4105 -0
- package/lib/types/Glyphmap.js +4105 -0
- package/lib/types/Voice.d.ts +16 -0
- package/lib/types/Voice.js +1 -0
- package/nitro.json +10 -0
- package/nitrogen/generated/android/ReactNativeAutoPlay+autolinking.cmake +2 -0
- package/nitrogen/generated/android/ReactNativeAutoPlayOnLoad.cpp +18 -0
- package/nitrogen/generated/android/c++/JFunc_void_VoiceInputChunk.hpp +81 -0
- package/nitrogen/generated/android/c++/JHybridAutoPlaySpec.cpp +0 -43
- package/nitrogen/generated/android/c++/JHybridAutoPlaySpec.hpp +0 -4
- package/nitrogen/generated/android/c++/JHybridVoiceSpec.cpp +104 -0
- package/nitrogen/generated/android/c++/JHybridVoiceSpec.hpp +66 -0
- package/nitrogen/generated/android/c++/JVoiceInputChunk.hpp +64 -0
- package/nitrogen/generated/android/c++/JVoiceInputResult.hpp +64 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/swe/iternio/reactnativeautoplay/Func_void_VoiceInputChunk.kt +80 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/swe/iternio/reactnativeautoplay/HybridAutoPlaySpec.kt +0 -17
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/swe/iternio/reactnativeautoplay/HybridVoiceSpec.kt +72 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/swe/iternio/reactnativeautoplay/VoiceInputChunk.kt +41 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/swe/iternio/reactnativeautoplay/VoiceInputResult.kt +41 -0
- package/nitrogen/generated/ios/ReactNativeAutoPlay-Swift-Cxx-Bridge.cpp +41 -16
- package/nitrogen/generated/ios/ReactNativeAutoPlay-Swift-Cxx-Bridge.hpp +201 -126
- package/nitrogen/generated/ios/ReactNativeAutoPlay-Swift-Cxx-Umbrella.hpp +11 -0
- package/nitrogen/generated/ios/ReactNativeAutoPlayAutolinking.mm +8 -0
- package/nitrogen/generated/ios/ReactNativeAutoPlayAutolinking.swift +12 -0
- package/nitrogen/generated/ios/c++/HybridAutoPlaySpecSwift.hpp +0 -34
- package/nitrogen/generated/ios/c++/HybridVoiceSpecSwift.cpp +11 -0
- package/nitrogen/generated/ios/c++/HybridVoiceSpecSwift.hpp +116 -0
- package/nitrogen/generated/ios/swift/Func_void_VoiceInputChunk.swift +46 -0
- package/nitrogen/generated/ios/swift/{Func_void_std__shared_ptr_ArrayBuffer_.swift → Func_void_VoiceInputResult.swift} +10 -10
- package/nitrogen/generated/ios/swift/Func_void_bool.swift +5 -5
- package/nitrogen/generated/ios/swift/HybridAutoPlaySpec.swift +0 -4
- package/nitrogen/generated/ios/swift/HybridAutoPlaySpec_cxx.swift +0 -82
- package/nitrogen/generated/ios/swift/HybridVoiceSpec.swift +58 -0
- package/nitrogen/generated/ios/swift/HybridVoiceSpec_cxx.swift +234 -0
- package/nitrogen/generated/ios/swift/VoiceInputChunk.swift +60 -0
- package/nitrogen/generated/ios/swift/VoiceInputResult.swift +60 -0
- package/nitrogen/generated/shared/c++/HybridAutoPlaySpec.cpp +0 -4
- package/nitrogen/generated/shared/c++/HybridAutoPlaySpec.hpp +0 -5
- package/nitrogen/generated/shared/c++/HybridVoiceSpec.cpp +24 -0
- package/nitrogen/generated/shared/c++/HybridVoiceSpec.hpp +73 -0
- package/nitrogen/generated/shared/c++/VoiceInputChunk.hpp +89 -0
- package/nitrogen/generated/shared/c++/VoiceInputResult.hpp +89 -0
- package/package.json +1 -1
- package/src/hybrid/HybridVoice.ts +79 -0
- package/src/index.ts +3 -1
- package/src/specs/AutoPlay.nitro.ts +0 -37
- package/src/specs/Voice.nitro.ts +16 -0
- package/src/templates/MapTemplate.ts +1 -6
- package/src/types/Voice.ts +18 -0
- package/src/components/OnAppearedChildRenderer.tsx +0 -37
|
@@ -21,7 +21,7 @@ class HybridAutoPlay: HybridAutoPlaySpec {
|
|
|
21
21
|
private static var listeners = [EventName: [StateListener]]()
|
|
22
22
|
private static var renderStateListeners = [String: [RenderStateListener]]()
|
|
23
23
|
private static var safeAreaInsetsListeners = [String: [SafeAreaListener]]()
|
|
24
|
-
|
|
24
|
+
|
|
25
25
|
|
|
26
26
|
override init() {
|
|
27
27
|
HybridAutoPlay.listeners.removeAll()
|
|
@@ -119,55 +119,10 @@ class HybridAutoPlay: HybridAutoPlaySpec {
|
|
|
119
119
|
func addListenerVoiceInput(
|
|
120
120
|
callback: @escaping (Location?, String?) -> Void
|
|
121
121
|
) throws -> () -> Void {
|
|
122
|
-
// iOS does not use the OS-triggered voice input path — use
|
|
122
|
+
// iOS does not use the OS-triggered voice input path — use HybridVoice instead.
|
|
123
123
|
return {}
|
|
124
124
|
}
|
|
125
125
|
|
|
126
|
-
func hasVoiceInputPermission() throws -> Bool {
|
|
127
|
-
return AVAudioSession.sharedInstance().recordPermission == .granted
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
func requestVoiceInputPermission() throws -> Promise<Bool> {
|
|
131
|
-
return Promise.async {
|
|
132
|
-
return await withCheckedContinuation { cont in
|
|
133
|
-
AVAudioSession.sharedInstance().requestRecordPermission { granted in
|
|
134
|
-
cont.resume(returning: granted)
|
|
135
|
-
}
|
|
136
|
-
}
|
|
137
|
-
}
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
func startVoiceInput(silenceThresholdMs: Double?, maxDurationMs: Double?, listeningText: String?) throws -> Promise<
|
|
141
|
-
ArrayBuffer
|
|
142
|
-
> {
|
|
143
|
-
return Promise.async {
|
|
144
|
-
let interfaceController = try? await RootModule.withInterfaceController { $0 }
|
|
145
|
-
|
|
146
|
-
let manager = VoiceInputManager()
|
|
147
|
-
HybridAutoPlay.voiceInputManager = manager
|
|
148
|
-
|
|
149
|
-
defer {
|
|
150
|
-
HybridAutoPlay.voiceInputManager = nil
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
let data = try await manager.start(
|
|
154
|
-
interfaceController: interfaceController,
|
|
155
|
-
silenceThresholdMs: silenceThresholdMs ?? 1_500,
|
|
156
|
-
maxDurationMs: maxDurationMs ?? 10_000,
|
|
157
|
-
listeningText: listeningText ?? "Listening..."
|
|
158
|
-
)
|
|
159
|
-
|
|
160
|
-
return try ArrayBuffer.copy(data: data)
|
|
161
|
-
}
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
func stopVoiceInput() throws {
|
|
165
|
-
Task { @MainActor in
|
|
166
|
-
let interfaceController = try? await RootModule.withInterfaceController { $0 }
|
|
167
|
-
HybridAutoPlay.voiceInputManager?.stop(interfaceController: interfaceController)
|
|
168
|
-
}
|
|
169
|
-
}
|
|
170
|
-
|
|
171
126
|
// MARK: set/push/pop templates
|
|
172
127
|
func setRootTemplate(templateId: String) throws -> Promise<Void> {
|
|
173
128
|
return Promise.async {
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import AVFoundation
|
|
2
|
+
import NitroModules
|
|
3
|
+
import Speech
|
|
4
|
+
|
|
5
|
+
class HybridVoice: HybridVoiceSpec {
|
|
6
|
+
private var voiceInputManager: VoiceInputManager?
|
|
7
|
+
|
|
8
|
+
func hasVoiceInputPermission() throws -> Bool {
|
|
9
|
+
let micGranted = AVAudioSession.sharedInstance().recordPermission == .granted
|
|
10
|
+
let speechGranted = SFSpeechRecognizer.authorizationStatus() == .authorized
|
|
11
|
+
return micGranted && speechGranted
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
func requestVoiceInputPermission() throws -> Promise<Bool> {
|
|
15
|
+
return Promise.async {
|
|
16
|
+
let micGranted = await withCheckedContinuation { cont in
|
|
17
|
+
AVAudioSession.sharedInstance().requestRecordPermission { granted in
|
|
18
|
+
cont.resume(returning: granted)
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
guard micGranted else { return false }
|
|
22
|
+
|
|
23
|
+
return await withCheckedContinuation { cont in
|
|
24
|
+
SFSpeechRecognizer.requestAuthorization { status in
|
|
25
|
+
cont.resume(returning: status == .authorized)
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
func startVoiceInput(
|
|
32
|
+
silenceThresholdMs: Double?,
|
|
33
|
+
maxDurationMs: Double?,
|
|
34
|
+
listeningText: String?,
|
|
35
|
+
preferSpeechToText: Bool?,
|
|
36
|
+
onChunk: ((_ chunk: VoiceInputChunk) -> Void)?,
|
|
37
|
+
language: String?
|
|
38
|
+
) throws -> Promise<VoiceInputResult> {
|
|
39
|
+
return Promise.async {
|
|
40
|
+
let interfaceController = try? await RootModule.withInterfaceController { $0 }
|
|
41
|
+
|
|
42
|
+
let manager = VoiceInputManager()
|
|
43
|
+
self.voiceInputManager = manager
|
|
44
|
+
|
|
45
|
+
defer { self.voiceInputManager = nil }
|
|
46
|
+
|
|
47
|
+
return try await manager.start(
|
|
48
|
+
interfaceController: interfaceController,
|
|
49
|
+
silenceThresholdMs: silenceThresholdMs ?? 1_500,
|
|
50
|
+
maxDurationMs: maxDurationMs ?? 10_000,
|
|
51
|
+
listeningText: listeningText ?? "Listening...",
|
|
52
|
+
preferSpeechToText: preferSpeechToText ?? false,
|
|
53
|
+
onChunk: onChunk,
|
|
54
|
+
language: language
|
|
55
|
+
)
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
func stopVoiceInput() throws {
|
|
60
|
+
Task { @MainActor in
|
|
61
|
+
let interfaceController = try? await RootModule.withInterfaceController { $0 }
|
|
62
|
+
self.voiceInputManager?.stop(interfaceController: interfaceController)
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
@@ -1,16 +1,47 @@
|
|
|
1
1
|
import AVFoundation
|
|
2
2
|
import CarPlay
|
|
3
|
+
import NitroModules
|
|
4
|
+
import Speech
|
|
3
5
|
|
|
4
|
-
///
|
|
5
|
-
///
|
|
6
|
+
/// Wraps CheckedContinuation so it can only be resumed once even when
|
|
7
|
+
/// shared between a stop() call and an async recognition task callback.
|
|
8
|
+
private final class ResultBox: @unchecked Sendable {
|
|
9
|
+
private var continuation: CheckedContinuation<VoiceInputResult, Error>?
|
|
10
|
+
private let lock = NSLock()
|
|
11
|
+
|
|
12
|
+
init(_ continuation: CheckedContinuation<VoiceInputResult, Error>) {
|
|
13
|
+
self.continuation = continuation
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
func resume(returning result: VoiceInputResult) {
|
|
17
|
+
lock.lock()
|
|
18
|
+
defer { lock.unlock() }
|
|
19
|
+
continuation?.resume(returning: result)
|
|
20
|
+
continuation = nil
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
func resume(throwing error: Error) {
|
|
24
|
+
lock.lock()
|
|
25
|
+
defer { lock.unlock() }
|
|
26
|
+
continuation?.resume(throwing: error)
|
|
27
|
+
continuation = nil
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/// Captures audio from the car microphone and buffers raw 16 kHz / 16-bit / mono PCM,
|
|
32
|
+
/// or transcribes it via SFSpeechRecognizer when preferSpeechToText is true.
|
|
6
33
|
class VoiceInputManager {
|
|
7
34
|
private var audioEngine: AVAudioEngine?
|
|
8
35
|
private var voiceControlTemplate: CPVoiceControlTemplate?
|
|
9
|
-
private var
|
|
36
|
+
private var resultBox: ResultBox?
|
|
10
37
|
private var samples: [Int16] = []
|
|
11
38
|
private var isStopping = false
|
|
12
39
|
private let stopLock = NSLock()
|
|
13
40
|
|
|
41
|
+
// STT
|
|
42
|
+
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
|
|
43
|
+
private var isSTTMode = false
|
|
44
|
+
|
|
14
45
|
// Timing
|
|
15
46
|
private var recordingStart: Date?
|
|
16
47
|
private var silenceStart: Date?
|
|
@@ -33,30 +64,35 @@ class VoiceInputManager {
|
|
|
33
64
|
interfaceController: AutoPlayInterfaceController?,
|
|
34
65
|
silenceThresholdMs: Double,
|
|
35
66
|
maxDurationMs: Double,
|
|
36
|
-
listeningText: String
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
67
|
+
listeningText: String,
|
|
68
|
+
preferSpeechToText: Bool,
|
|
69
|
+
onChunk: ((_ chunk: VoiceInputChunk) -> Void)?,
|
|
70
|
+
language: String?
|
|
71
|
+
) async throws -> VoiceInputResult {
|
|
72
|
+
return try await withCheckedThrowingContinuation { cont in
|
|
73
|
+
let box = ResultBox(cont)
|
|
74
|
+
self.resultBox = box
|
|
41
75
|
self.samples = []
|
|
42
76
|
self.isStopping = false
|
|
77
|
+
self.isSTTMode = preferSpeechToText
|
|
43
78
|
|
|
44
79
|
do {
|
|
45
80
|
try self.startCapture(
|
|
46
81
|
interfaceController: interfaceController,
|
|
47
82
|
silenceThresholdMs: silenceThresholdMs,
|
|
48
83
|
maxDurationMs: maxDurationMs,
|
|
49
|
-
listeningText: listeningText
|
|
84
|
+
listeningText: listeningText,
|
|
85
|
+
preferSpeechToText: preferSpeechToText,
|
|
86
|
+
onChunk: onChunk,
|
|
87
|
+
box: box,
|
|
88
|
+
language: language
|
|
50
89
|
)
|
|
51
90
|
}
|
|
52
91
|
catch {
|
|
53
|
-
self.
|
|
54
|
-
|
|
55
|
-
cont.resume(throwing: error)
|
|
92
|
+
self.cleanup(interfaceController: interfaceController)
|
|
93
|
+
box.resume(throwing: error)
|
|
56
94
|
}
|
|
57
95
|
}
|
|
58
|
-
|
|
59
|
-
return samplesAsData(samples)
|
|
60
96
|
}
|
|
61
97
|
|
|
62
98
|
func stop(interfaceController: AutoPlayInterfaceController? = nil) {
|
|
@@ -66,14 +102,22 @@ class VoiceInputManager {
|
|
|
66
102
|
return
|
|
67
103
|
}
|
|
68
104
|
isStopping = true
|
|
69
|
-
let
|
|
105
|
+
let wasSTTMode = isSTTMode
|
|
106
|
+
let box = resultBox
|
|
70
107
|
let capturedSamples = samples
|
|
71
|
-
|
|
108
|
+
resultBox = nil
|
|
72
109
|
samples = []
|
|
73
110
|
stopLock.unlock()
|
|
74
111
|
|
|
75
|
-
|
|
76
|
-
|
|
112
|
+
if wasSTTMode {
|
|
113
|
+
// endAudio() causes the recognition task to fire its final result,
|
|
114
|
+
// which resumes the box. Engine teardown happens there too.
|
|
115
|
+
recognitionRequest?.endAudio()
|
|
116
|
+
}
|
|
117
|
+
else {
|
|
118
|
+
cleanup(interfaceController: interfaceController)
|
|
119
|
+
box?.resume(returning: makePCMResult(from: capturedSamples))
|
|
120
|
+
}
|
|
77
121
|
}
|
|
78
122
|
|
|
79
123
|
// MARK: - Private
|
|
@@ -82,13 +126,16 @@ class VoiceInputManager {
|
|
|
82
126
|
interfaceController: AutoPlayInterfaceController?,
|
|
83
127
|
silenceThresholdMs: Double,
|
|
84
128
|
maxDurationMs: Double,
|
|
85
|
-
listeningText: String
|
|
129
|
+
listeningText: String,
|
|
130
|
+
preferSpeechToText: Bool,
|
|
131
|
+
onChunk: ((_ chunk: VoiceInputChunk) -> Void)?,
|
|
132
|
+
box: ResultBox,
|
|
133
|
+
language: String?
|
|
86
134
|
) throws {
|
|
87
135
|
guard AVAudioSession.sharedInstance().recordPermission == .granted else {
|
|
88
136
|
throw VoiceInputError.microphonePermissionDenied
|
|
89
137
|
}
|
|
90
138
|
|
|
91
|
-
// Activate the session first so inputNode reports the correct hardware format
|
|
92
139
|
let session = AVAudioSession.sharedInstance()
|
|
93
140
|
try session.setCategory(.playAndRecord, mode: .measurement, options: [])
|
|
94
141
|
try session.setActive(true)
|
|
@@ -97,12 +144,59 @@ class VoiceInputManager {
|
|
|
97
144
|
presentVoiceTemplate(interfaceController: interfaceController, listeningText: listeningText)
|
|
98
145
|
}
|
|
99
146
|
|
|
147
|
+
var activeRecognitionRequest: SFSpeechAudioBufferRecognitionRequest? = nil
|
|
148
|
+
|
|
149
|
+
if preferSpeechToText, SFSpeechRecognizer.authorizationStatus() == .authorized,
|
|
150
|
+
let recognizer = language != nil ? SFSpeechRecognizer(locale: Locale(identifier: language!)) : SFSpeechRecognizer(locale: Locale.current),
|
|
151
|
+
recognizer.isAvailable
|
|
152
|
+
{
|
|
153
|
+
let request = SFSpeechAudioBufferRecognitionRequest()
|
|
154
|
+
request.shouldReportPartialResults = true
|
|
155
|
+
recognitionRequest = request
|
|
156
|
+
activeRecognitionRequest = request
|
|
157
|
+
|
|
158
|
+
recognizer.recognitionTask(with: request) { [weak self] result, error in
|
|
159
|
+
guard let self else { return }
|
|
160
|
+
|
|
161
|
+
if error != nil {
|
|
162
|
+
// STT failed — fall back to whatever PCM was accumulated
|
|
163
|
+
self.stopLock.lock()
|
|
164
|
+
let capturedSamples = self.samples
|
|
165
|
+
self.samples = []
|
|
166
|
+
self.stopLock.unlock()
|
|
167
|
+
|
|
168
|
+
self.cleanup(interfaceController: interfaceController)
|
|
169
|
+
box.resume(returning: self.makePCMResult(from: capturedSamples))
|
|
170
|
+
return
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
guard let result else { return }
|
|
174
|
+
|
|
175
|
+
if result.isFinal {
|
|
176
|
+
self.stopLock.lock()
|
|
177
|
+
self.isStopping = true
|
|
178
|
+
self.samples = []
|
|
179
|
+
self.stopLock.unlock()
|
|
180
|
+
|
|
181
|
+
self.cleanup(interfaceController: interfaceController)
|
|
182
|
+
box.resume(
|
|
183
|
+
returning: VoiceInputResult(
|
|
184
|
+
transcription: result.bestTranscription.formattedString,
|
|
185
|
+
audio: nil
|
|
186
|
+
)
|
|
187
|
+
)
|
|
188
|
+
}
|
|
189
|
+
else {
|
|
190
|
+
onChunk?(VoiceInputChunk(partial: result.bestTranscription.formattedString, audio: nil))
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
|
|
100
195
|
let engine = AVAudioEngine()
|
|
101
196
|
let inputNode = engine.inputNode
|
|
102
197
|
let nativeFormat = inputNode.outputFormat(forBus: 0)
|
|
103
198
|
|
|
104
|
-
let
|
|
105
|
-
guard let converter = AVAudioConverter(from: nativeFormat, to: targetFormat) else {
|
|
199
|
+
guard let converter = AVAudioConverter(from: nativeFormat, to: VoiceInputManager.targetFormat) else {
|
|
106
200
|
throw VoiceInputError.converterUnavailable
|
|
107
201
|
}
|
|
108
202
|
|
|
@@ -116,36 +210,43 @@ class VoiceInputManager {
|
|
|
116
210
|
) { [weak self] buffer, _ in
|
|
117
211
|
guard let self, !self.isStopping else { return }
|
|
118
212
|
|
|
213
|
+
// Feed STT if active
|
|
214
|
+
activeRecognitionRequest?.append(buffer)
|
|
215
|
+
|
|
216
|
+
// Convert to 16kHz int16 for accumulation and PCM chunks
|
|
119
217
|
let outputFrameCapacity = AVAudioFrameCount(
|
|
120
|
-
Double(buffer.frameLength)
|
|
121
|
-
* VoiceInputManager.sampleRate
|
|
122
|
-
/ nativeFormat.sampleRate
|
|
218
|
+
Double(buffer.frameLength) * VoiceInputManager.sampleRate / nativeFormat.sampleRate
|
|
123
219
|
)
|
|
124
|
-
|
|
125
220
|
guard
|
|
126
221
|
let outputBuffer = AVAudioPCMBuffer(
|
|
127
|
-
pcmFormat: targetFormat,
|
|
222
|
+
pcmFormat: VoiceInputManager.targetFormat,
|
|
128
223
|
frameCapacity: outputFrameCapacity
|
|
129
224
|
)
|
|
130
225
|
else { return }
|
|
131
226
|
|
|
132
227
|
var conversionError: NSError?
|
|
133
|
-
let status = converter.convert(to: outputBuffer, error: &conversionError) {
|
|
134
|
-
_,
|
|
135
|
-
outStatus in
|
|
228
|
+
let status = converter.convert(to: outputBuffer, error: &conversionError) { _, outStatus in
|
|
136
229
|
outStatus.pointee = .haveData
|
|
137
230
|
return buffer
|
|
138
231
|
}
|
|
139
|
-
|
|
140
232
|
guard status != .error, let int16Data = outputBuffer.int16ChannelData else { return }
|
|
141
233
|
|
|
142
234
|
let frameCount = Int(outputBuffer.frameLength)
|
|
143
235
|
let newSamples = Array(UnsafeBufferPointer(start: int16Data[0], count: frameCount))
|
|
144
236
|
self.samples.append(contentsOf: newSamples)
|
|
145
237
|
|
|
238
|
+
// PCM chunk callback
|
|
239
|
+
if activeRecognitionRequest == nil, let onChunk {
|
|
240
|
+
if let chunkBuffer = try? ArrayBuffer.copy(
|
|
241
|
+
data: newSamples.withUnsafeBufferPointer { Data(buffer: $0) }
|
|
242
|
+
) {
|
|
243
|
+
onChunk(VoiceInputChunk(partial: nil, audio: chunkBuffer))
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
|
|
146
247
|
let now = Date()
|
|
147
248
|
|
|
148
|
-
// Max duration
|
|
249
|
+
// Max duration — applies in both modes
|
|
149
250
|
if let start = self.recordingStart,
|
|
150
251
|
now.timeIntervalSince(start) * 1000 >= maxDurationMs
|
|
151
252
|
{
|
|
@@ -160,7 +261,9 @@ class VoiceInputManager {
|
|
|
160
261
|
{
|
|
161
262
|
let peak = newSamples.reduce(0) { max($0, abs(Int($1))) }
|
|
162
263
|
if peak < VoiceInputManager.silenceAmplitudeThreshold {
|
|
163
|
-
if self.silenceStart == nil {
|
|
264
|
+
if self.silenceStart == nil {
|
|
265
|
+
self.silenceStart = now
|
|
266
|
+
}
|
|
164
267
|
if let silenceBegin = self.silenceStart,
|
|
165
268
|
now.timeIntervalSince(silenceBegin) * 1000 >= silenceThresholdMs
|
|
166
269
|
{
|
|
@@ -183,10 +286,11 @@ class VoiceInputManager {
|
|
|
183
286
|
}
|
|
184
287
|
}
|
|
185
288
|
|
|
186
|
-
private func
|
|
289
|
+
private func cleanup(interfaceController: AutoPlayInterfaceController?) {
|
|
187
290
|
audioEngine?.inputNode.removeTap(onBus: 0)
|
|
188
291
|
audioEngine?.stop()
|
|
189
292
|
audioEngine = nil
|
|
293
|
+
recognitionRequest = nil
|
|
190
294
|
recordingStart = nil
|
|
191
295
|
silenceStart = nil
|
|
192
296
|
try? AVAudioSession.sharedInstance().setActive(false, options: .notifyOthersOnDeactivation)
|
|
@@ -195,6 +299,12 @@ class VoiceInputManager {
|
|
|
195
299
|
}
|
|
196
300
|
}
|
|
197
301
|
|
|
302
|
+
private func makePCMResult(from samples: [Int16]) -> VoiceInputResult {
|
|
303
|
+
let data = samples.withUnsafeBufferPointer { Data(buffer: $0) }
|
|
304
|
+
let buffer = try? ArrayBuffer.copy(data: data)
|
|
305
|
+
return VoiceInputResult(transcription: nil, audio: buffer)
|
|
306
|
+
}
|
|
307
|
+
|
|
198
308
|
private func presentVoiceTemplate(interfaceController: AutoPlayInterfaceController, listeningText: String) {
|
|
199
309
|
let listeningState = CPVoiceControlState(
|
|
200
310
|
identifier: "listening",
|
|
@@ -218,12 +328,6 @@ class VoiceInputManager {
|
|
|
218
328
|
}
|
|
219
329
|
voiceControlTemplate = nil
|
|
220
330
|
}
|
|
221
|
-
|
|
222
|
-
private func samplesAsData(_ samples: [Int16]) -> Data {
|
|
223
|
-
samples.withUnsafeBufferPointer { ptr in
|
|
224
|
-
Data(buffer: ptr)
|
|
225
|
-
}
|
|
226
|
-
}
|
|
227
331
|
}
|
|
228
332
|
|
|
229
333
|
enum VoiceInputError: Error {
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* A hook to determine if the CarPlay/Android Auto screen is currently focused (visible).
|
|
3
|
+
*
|
|
4
|
+
* @param moduleName The name of the module to listen to.
|
|
5
|
+
* @returns `true` if the screen is focused, `false` otherwise.
|
|
6
|
+
*/
|
|
7
|
+
export declare function useIsAutoPlayFocused(moduleName: string): boolean;
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { useEffect, useState } from 'react';
|
|
2
|
+
import { HybridAutoPlay } from '..';
|
|
3
|
+
/**
|
|
4
|
+
* A hook to determine if the CarPlay/Android Auto screen is currently focused (visible).
|
|
5
|
+
*
|
|
6
|
+
* @param moduleName The name of the module to listen to.
|
|
7
|
+
* @returns `true` if the screen is focused, `false` otherwise.
|
|
8
|
+
*/
|
|
9
|
+
export function useIsAutoPlayFocused(moduleName) {
|
|
10
|
+
const [isFocused, setIsFocused] = useState(false);
|
|
11
|
+
useEffect(() => {
|
|
12
|
+
const remove = HybridAutoPlay.addListenerRenderState(moduleName, (state) => {
|
|
13
|
+
setIsFocused(state === 'didAppear');
|
|
14
|
+
});
|
|
15
|
+
return () => {
|
|
16
|
+
remove();
|
|
17
|
+
};
|
|
18
|
+
}, [moduleName]);
|
|
19
|
+
return isFocused;
|
|
20
|
+
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import type { VoiceInputOptions, VoiceInputResult } from '../types/Voice';
|
|
2
|
+
type StartVoiceInput = {
|
|
3
|
+
(options: VoiceInputOptions & Required<Pick<VoiceInputOptions, 'onChunk'>>): Promise<VoiceInputResult>;
|
|
4
|
+
(options?: Omit<VoiceInputOptions, 'onChunk'>): Promise<VoiceInputResult>;
|
|
5
|
+
};
|
|
6
|
+
export declare const HybridVoice: {
|
|
7
|
+
/**
|
|
8
|
+
* Returns true if all permissions required for voice input are granted.
|
|
9
|
+
* On iOS: checks both microphone and speech recognition authorization.
|
|
10
|
+
* On Android: checks RECORD_AUDIO permission.
|
|
11
|
+
*/
|
|
12
|
+
hasVoiceInputPermission: () => boolean;
|
|
13
|
+
/**
|
|
14
|
+
* Request all permissions required for voice input.
|
|
15
|
+
* On iOS: requests microphone permission then speech recognition authorization.
|
|
16
|
+
* On Android: requests RECORD_AUDIO via car context when connected, otherwise
|
|
17
|
+
* via the React Native application context.
|
|
18
|
+
* Returns true only if all required permissions were granted.
|
|
19
|
+
*/
|
|
20
|
+
requestVoiceInputPermission: () => Promise<boolean>;
|
|
21
|
+
/**
|
|
22
|
+
* Start an in-app voice session.
|
|
23
|
+
*
|
|
24
|
+
* When preferSpeechToText is true:
|
|
25
|
+
* iOS — streams audio buffers into SFSpeechRecognizer during recording;
|
|
26
|
+
* onChunk fires with partial transcription results; resolves with
|
|
27
|
+
* { transcription } or falls back to { audio } if unavailable.
|
|
28
|
+
* Android — checks SpeechRecognizer availability upfront; if available it
|
|
29
|
+
* owns the mic and streams partial results via onChunk; if not
|
|
30
|
+
* available falls back to PCM recording.
|
|
31
|
+
*
|
|
32
|
+
* When preferSpeechToText is false (default):
|
|
33
|
+
* Both platforms record raw PCM; onChunk fires with audio chunks;
|
|
34
|
+
* resolves with { audio }.
|
|
35
|
+
*
|
|
36
|
+
* @param silenceThresholdMs ms of silence before auto-stop (default 1500)
|
|
37
|
+
* @param maxDurationMs hard cap on recording duration (default 10000)
|
|
38
|
+
* @param listeningText iOS only — text shown on CPVoiceControlTemplate
|
|
39
|
+
* @param preferSpeechToText request STT transcription instead of raw PCM
|
|
40
|
+
* @param onChunk optional streaming callback
|
|
41
|
+
* @param language specify the language for the SpeechRecognizer, falls back to system language if not set
|
|
42
|
+
*/
|
|
43
|
+
startVoiceInput: StartVoiceInput;
|
|
44
|
+
/**
|
|
45
|
+
* Stop the active voice session early.
|
|
46
|
+
* For PCM mode: resolves startVoiceInput with audio captured so far.
|
|
47
|
+
* For STT mode: finalises the recognition request.
|
|
48
|
+
* No-op if no session is active.
|
|
49
|
+
*/
|
|
50
|
+
stopVoiceInput: () => void;
|
|
51
|
+
};
|
|
52
|
+
export {};
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import { NitroModules } from 'react-native-nitro-modules';
|
|
2
|
+
const _native = NitroModules.createHybridObject('Voice');
|
|
3
|
+
const startVoiceInput = async (options) => {
|
|
4
|
+
const { onChunk, silenceThresholdMs, maxDurationMs, listeningText, preferSpeechToText, language, } = options ?? {};
|
|
5
|
+
return await _native.startVoiceInput(silenceThresholdMs, maxDurationMs, listeningText, preferSpeechToText, onChunk, language);
|
|
6
|
+
};
|
|
7
|
+
export const HybridVoice = {
|
|
8
|
+
/**
|
|
9
|
+
* Returns true if all permissions required for voice input are granted.
|
|
10
|
+
* On iOS: checks both microphone and speech recognition authorization.
|
|
11
|
+
* On Android: checks RECORD_AUDIO permission.
|
|
12
|
+
*/
|
|
13
|
+
hasVoiceInputPermission: () => _native.hasVoiceInputPermission(),
|
|
14
|
+
/**
|
|
15
|
+
* Request all permissions required for voice input.
|
|
16
|
+
* On iOS: requests microphone permission then speech recognition authorization.
|
|
17
|
+
* On Android: requests RECORD_AUDIO via car context when connected, otherwise
|
|
18
|
+
* via the React Native application context.
|
|
19
|
+
* Returns true only if all required permissions were granted.
|
|
20
|
+
*/
|
|
21
|
+
requestVoiceInputPermission: () => _native.requestVoiceInputPermission(),
|
|
22
|
+
/**
|
|
23
|
+
* Start an in-app voice session.
|
|
24
|
+
*
|
|
25
|
+
* When preferSpeechToText is true:
|
|
26
|
+
* iOS — streams audio buffers into SFSpeechRecognizer during recording;
|
|
27
|
+
* onChunk fires with partial transcription results; resolves with
|
|
28
|
+
* { transcription } or falls back to { audio } if unavailable.
|
|
29
|
+
* Android — checks SpeechRecognizer availability upfront; if available it
|
|
30
|
+
* owns the mic and streams partial results via onChunk; if not
|
|
31
|
+
* available falls back to PCM recording.
|
|
32
|
+
*
|
|
33
|
+
* When preferSpeechToText is false (default):
|
|
34
|
+
* Both platforms record raw PCM; onChunk fires with audio chunks;
|
|
35
|
+
* resolves with { audio }.
|
|
36
|
+
*
|
|
37
|
+
* @param silenceThresholdMs ms of silence before auto-stop (default 1500)
|
|
38
|
+
* @param maxDurationMs hard cap on recording duration (default 10000)
|
|
39
|
+
* @param listeningText iOS only — text shown on CPVoiceControlTemplate
|
|
40
|
+
* @param preferSpeechToText request STT transcription instead of raw PCM
|
|
41
|
+
* @param onChunk optional streaming callback
|
|
42
|
+
* @param language specify the language for the SpeechRecognizer, falls back to system language if not set
|
|
43
|
+
*/
|
|
44
|
+
startVoiceInput,
|
|
45
|
+
/**
|
|
46
|
+
* Stop the active voice session early.
|
|
47
|
+
* For PCM mode: resolves startVoiceInput with audio captured so far.
|
|
48
|
+
* For STT mode: finalises the recognition request.
|
|
49
|
+
* No-op if no session is active.
|
|
50
|
+
*/
|
|
51
|
+
stopVoiceInput: () => _native.stopVoiceInput(),
|
|
52
|
+
};
|
package/lib/hybrid.d.ts
ADDED
package/lib/hybrid.js
ADDED
package/lib/index.d.ts
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import { HybridAndroidAutoTelemetry } from './hybrid/HybridAndroidAutoTelemetry';
|
|
2
2
|
import { HybridAutoPlay } from './hybrid/HybridAutoPlay';
|
|
3
|
+
import { HybridVoice } from './hybrid/HybridVoice';
|
|
3
4
|
import type { AndroidAutomotive } from './specs/AndroidAutomotive.nitro';
|
|
4
|
-
export { HybridAndroidAutoTelemetry, HybridAutoPlay };
|
|
5
|
+
export { HybridAndroidAutoTelemetry, HybridAutoPlay, HybridVoice };
|
|
5
6
|
export declare const HybridAndroidAutomotive: AndroidAutomotive | null;
|
|
6
7
|
/**
|
|
7
8
|
* These are the static module names for the app running on the mobile device, head unit screen and the CarPlay dashboard.
|
|
@@ -39,6 +40,7 @@ export * from './types/SignInMethod';
|
|
|
39
40
|
export * from './types/Telemetry';
|
|
40
41
|
export * from './types/Text';
|
|
41
42
|
export * from './types/Trip';
|
|
43
|
+
export type { VoiceInputChunk, VoiceInputOptions, VoiceInputResult } from './types/Voice';
|
|
42
44
|
export type { AlertPriority, NavigationAlert as Alert, NavigationAlertAction as AlertAction, } from './utils/NitroAlert';
|
|
43
45
|
export type { ThemedColor } from './utils/NitroColor';
|
|
44
46
|
export type { GridButton } from './utils/NitroGrid';
|
package/lib/index.js
CHANGED
|
@@ -3,8 +3,9 @@ import { NitroModules } from 'react-native-nitro-modules';
|
|
|
3
3
|
import AutoPlayHeadlessJsTask from './AutoPlayHeadlessJsTask';
|
|
4
4
|
import { HybridAndroidAutoTelemetry } from './hybrid/HybridAndroidAutoTelemetry';
|
|
5
5
|
import { HybridAutoPlay } from './hybrid/HybridAutoPlay';
|
|
6
|
+
import { HybridVoice } from './hybrid/HybridVoice';
|
|
6
7
|
AutoPlayHeadlessJsTask.registerHeadlessTask(HybridAutoPlay);
|
|
7
|
-
export { HybridAndroidAutoTelemetry, HybridAutoPlay };
|
|
8
|
+
export { HybridAndroidAutoTelemetry, HybridAutoPlay, HybridVoice };
|
|
8
9
|
export const HybridAndroidAutomotive = Platform.OS === 'android'
|
|
9
10
|
? NitroModules.createHybridObject('AndroidAutomotive')
|
|
10
11
|
: null;
|
|
@@ -31,35 +31,6 @@ export interface AutoPlay extends HybridObject<{
|
|
|
31
31
|
* @namespace Android
|
|
32
32
|
*/
|
|
33
33
|
addListenerVoiceInput(callback: (coordinates: Location | undefined, query: string | undefined) => void): CleanupCallback;
|
|
34
|
-
/**
|
|
35
|
-
* Returns true if microphone permission has already been granted.
|
|
36
|
-
*/
|
|
37
|
-
hasVoiceInputPermission(): boolean;
|
|
38
|
-
/**
|
|
39
|
-
* Request microphone permission from the user.
|
|
40
|
-
* On Android: uses the car context when Android Auto is connected, otherwise
|
|
41
|
-
* falls back to the React Native application context.
|
|
42
|
-
* On iOS: uses AVAudioApplication (iOS 17+) or AVAudioSession (iOS 15–16).
|
|
43
|
-
* Returns true if permission was granted, false if denied.
|
|
44
|
-
*/
|
|
45
|
-
requestVoiceInputPermission(): Promise<boolean>;
|
|
46
|
-
/**
|
|
47
|
-
* Start an in-app voice recording session.
|
|
48
|
-
* On Android: acquires audio focus and captures via CarAudioRecord when
|
|
49
|
-
* Android Auto is connected, otherwise uses standard AudioRecord.
|
|
50
|
-
* On iOS: presents CPVoiceControlTemplate (when a car is connected) and
|
|
51
|
-
* captures audio via AVAudioEngine.
|
|
52
|
-
* Resolves with the complete raw PCM buffer (16 kHz, 16-bit, mono) when
|
|
53
|
-
* silence is detected, the max duration is reached, or stopVoiceInput() is called.
|
|
54
|
-
* Rejects if microphone permission has not been granted or recording fails to start.
|
|
55
|
-
*/
|
|
56
|
-
startVoiceInput(silenceThresholdMs?: number, maxDurationMs?: number, listeningText?: string): Promise<ArrayBuffer>;
|
|
57
|
-
/**
|
|
58
|
-
* Stop the active voice recording session early. Causes the Promise returned
|
|
59
|
-
* by startVoiceInput() to resolve with the audio captured so far.
|
|
60
|
-
* No-op if no recording is in progress.
|
|
61
|
-
*/
|
|
62
|
-
stopVoiceInput(): void;
|
|
63
34
|
/**
|
|
64
35
|
* sets the specified template as root template, initializes a new stack
|
|
65
36
|
* Promise might contain an error message in case setting root template failed
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { HybridObject } from 'react-native-nitro-modules';
|
|
2
|
+
import type { NitroAutomotivePermissionRequestTemplateConfig } from '../templates/AutomotivePermissionRequestTemplate';
|
|
3
|
+
import type { NitroTemplateConfig } from './AutoPlay.nitro';
|
|
4
|
+
interface AutomotivePermissionRequestTemplateConfig extends NitroTemplateConfig, NitroAutomotivePermissionRequestTemplateConfig {
|
|
5
|
+
}
|
|
6
|
+
export interface AutomotivePermissionRequestTemplate extends HybridObject<{
|
|
7
|
+
android: 'kotlin';
|
|
8
|
+
}> {
|
|
9
|
+
createAutomotivePermissionRequestTemplate(config: AutomotivePermissionRequestTemplateConfig): void;
|
|
10
|
+
}
|
|
11
|
+
export {};
|