@bytexbyte/nxtlinq-ai-agent-ui-react-native-development 0.2.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/NxtlinqAgentAssistant.d.ts +4 -4
- package/dist/NxtlinqAgentAssistant.d.ts.map +1 -1
- package/dist/NxtlinqAgentAssistant.js +5 -6
- package/dist/components/AgentAssistantShell.d.ts +1 -3
- package/dist/components/AgentAssistantShell.d.ts.map +1 -1
- package/dist/components/AgentAssistantShell.js +3 -7
- package/dist/components/AgentMessageList.d.ts.map +1 -1
- package/dist/components/AgentMessageList.js +7 -9
- package/dist/components/AgentVoiceBar.d.ts.map +1 -1
- package/dist/components/AgentVoiceBar.js +14 -34
- package/dist/components/MessageAttachmentPreview.d.ts +10 -0
- package/dist/components/MessageAttachmentPreview.d.ts.map +1 -0
- package/dist/components/MessageAttachmentPreview.js +15 -0
- package/dist/components/VoiceAddMediaModal.d.ts +12 -0
- package/dist/components/VoiceAddMediaModal.d.ts.map +1 -0
- package/dist/components/VoiceAddMediaModal.js +31 -0
- package/dist/components/VoiceAttachmentButton.d.ts +3 -0
- package/dist/components/VoiceAttachmentButton.d.ts.map +1 -0
- package/dist/components/VoiceAttachmentButton.js +58 -0
- package/dist/components/VoiceIcons.d.ts +1 -0
- package/dist/components/VoiceIcons.d.ts.map +1 -1
- package/dist/components/VoiceIcons.js +3 -0
- package/dist/components/VoiceWaveform.d.ts +2 -2
- package/dist/components/VoiceWaveform.d.ts.map +1 -1
- package/dist/components/VoiceWaveform.js +16 -5
- package/dist/components/useMessageListAutoScroll.d.ts +12 -0
- package/dist/components/useMessageListAutoScroll.d.ts.map +1 -0
- package/dist/components/useMessageListAutoScroll.js +42 -0
- package/dist/context/AgentAssistantContext.d.ts +3 -3
- package/dist/context/AgentAssistantContext.d.ts.map +1 -1
- package/dist/context/AgentAssistantContext.js +76 -29
- package/dist/index.d.ts +3 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -1
- package/dist/types.d.ts +3 -8
- package/dist/types.d.ts.map +1 -1
- package/dist/voice/float32ToPcm16.d.ts +2 -0
- package/dist/voice/float32ToPcm16.d.ts.map +1 -0
- package/dist/voice/float32ToPcm16.js +8 -0
- package/dist/voice/loadImageCropPicker.d.ts +11 -0
- package/dist/voice/loadImageCropPicker.d.ts.map +1 -0
- package/dist/voice/loadImageCropPicker.js +12 -0
- package/dist/voice/sendVoiceImageAttachment.d.ts +15 -0
- package/dist/voice/sendVoiceImageAttachment.d.ts.map +1 -0
- package/dist/voice/sendVoiceImageAttachment.js +29 -0
- package/dist/voice/useVoiceImagePicker.d.ts +11 -0
- package/dist/voice/useVoiceImagePicker.d.ts.map +1 -0
- package/dist/voice/useVoiceImagePicker.js +38 -0
- package/dist/voice/useVoiceMicState.d.ts +4 -0
- package/dist/voice/useVoiceMicState.d.ts.map +1 -1
- package/dist/voice/useVoiceMicState.js +32 -3
- package/dist/voice/useVoiceSilenceCommit.d.ts +10 -0
- package/dist/voice/useVoiceSilenceCommit.d.ts.map +1 -0
- package/dist/voice/useVoiceSilenceCommit.js +76 -0
- package/dist/voice/useVoiceTranscriptMessages.d.ts +16 -0
- package/dist/voice/useVoiceTranscriptMessages.d.ts.map +1 -0
- package/dist/voice/useVoiceTranscriptMessages.js +129 -0
- package/dist/voice/useWsRealtimeAudio.d.ts +17 -0
- package/dist/voice/useWsRealtimeAudio.d.ts.map +1 -0
- package/dist/voice/useWsRealtimeAudio.js +165 -0
- package/dist/voice/voiceImagePickerOptions.d.ts +11 -0
- package/dist/voice/voiceImagePickerOptions.d.ts.map +1 -0
- package/dist/voice/voiceImagePickerOptions.js +10 -0
- package/dist/voice/voiceSilenceConstants.d.ts +8 -0
- package/dist/voice/voiceSilenceConstants.d.ts.map +1 -0
- package/dist/voice/voiceSilenceConstants.js +7 -0
- package/dist/voice/wsPcmPlayer.d.ts +24 -0
- package/dist/voice/wsPcmPlayer.d.ts.map +1 -0
- package/dist/voice/wsPcmPlayer.js +146 -0
- package/dist/voice/wsPcmRecorder.d.ts +26 -0
- package/dist/voice/wsPcmRecorder.d.ts.map +1 -0
- package/dist/voice/wsPcmRecorder.js +145 -0
- package/dist/voice/wsRealtimeConstants.d.ts +2 -0
- package/dist/voice/wsRealtimeConstants.d.ts.map +1 -0
- package/dist/voice/wsRealtimeConstants.js +1 -0
- package/package.json +8 -5
- package/src/NxtlinqAgentAssistant.tsx +3 -12
- package/src/components/AgentAssistantShell.tsx +2 -18
- package/src/components/AgentMessageList.tsx +18 -15
- package/src/components/AgentVoiceBar.tsx +35 -70
- package/src/components/MessageAttachmentPreview.tsx +43 -0
- package/src/components/VoiceAddMediaModal.tsx +69 -0
- package/src/components/VoiceAttachmentButton.tsx +100 -0
- package/src/components/VoiceIcons.tsx +4 -0
- package/src/components/VoiceWaveform.tsx +15 -5
- package/src/components/useMessageListAutoScroll.ts +57 -0
- package/src/context/AgentAssistantContext.tsx +100 -32
- package/src/index.ts +2 -2
- package/src/react-native.d.ts +18 -1
- package/src/types.ts +3 -8
- package/src/voice/float32ToPcm16.ts +8 -0
- package/src/voice/loadImageCropPicker.ts +18 -0
- package/src/voice/sendVoiceImageAttachment.ts +49 -0
- package/src/voice/useVoiceImagePicker.ts +54 -0
- package/src/voice/useVoiceMicState.ts +38 -3
- package/src/voice/useVoiceSilenceCommit.ts +94 -0
- package/src/voice/useVoiceTranscriptMessages.ts +173 -0
- package/src/voice/useWsRealtimeAudio.ts +200 -0
- package/src/voice/voiceImagePickerOptions.ts +10 -0
- package/src/voice/voiceSilenceConstants.ts +10 -0
- package/src/voice/wsPcmPlayer.ts +166 -0
- package/src/voice/wsPcmRecorder.ts +152 -0
- package/src/voice/wsRealtimeConstants.ts +1 -0
- package/src/components/AgentRemoteAudio.tsx +0 -105
|
@@ -16,6 +16,8 @@ export type UseVoiceMicStateOptions = {
|
|
|
16
16
|
* Use with open-mic demos (`startWithMicMuted={false}`). Berify hold-to-talk keeps this true.
|
|
17
17
|
*/
|
|
18
18
|
holdMicDuringAssistant?: boolean;
|
|
19
|
+
/** Called when user opens mic while assistant is responding (barge-in / interrupt). */
|
|
20
|
+
onBargeIn?: () => void;
|
|
19
21
|
};
|
|
20
22
|
|
|
21
23
|
export function useVoiceMicState(
|
|
@@ -29,18 +31,29 @@ export function useVoiceMicState(
|
|
|
29
31
|
const assistantMicHoldRef = useRef(false);
|
|
30
32
|
const userMicOptInRef = useRef(!connectMuted);
|
|
31
33
|
const [isMicMuted, setIsMicMuted] = useState(connectMuted);
|
|
34
|
+
const [isCaptureActive, setIsCaptureActive] = useState(!connectMuted);
|
|
35
|
+
|
|
36
|
+
const syncCaptureActive = useCallback(() => {
|
|
37
|
+
const userOpen = !userMicMutedRef.current;
|
|
38
|
+
const holdBlocks =
|
|
39
|
+
assistantMicHoldRef.current
|
|
40
|
+
&& !(userMicOptInRef.current && userOpen);
|
|
41
|
+
setIsCaptureActive(userOpen && !holdBlocks);
|
|
42
|
+
}, []);
|
|
32
43
|
|
|
33
44
|
const applyMicState = useCallback(() => {
|
|
34
45
|
const shouldMute = userMicMutedRef.current || assistantMicHoldRef.current;
|
|
35
46
|
voice.muteMic(shouldMute);
|
|
36
47
|
setIsMicMuted(shouldMute);
|
|
37
|
-
|
|
48
|
+
syncCaptureActive();
|
|
49
|
+
}, [voice, syncCaptureActive]);
|
|
38
50
|
|
|
39
51
|
const resetMicState = useCallback(() => {
|
|
40
52
|
userMicMutedRef.current = false;
|
|
41
53
|
assistantMicHoldRef.current = false;
|
|
42
54
|
userMicOptInRef.current = false;
|
|
43
55
|
setIsMicMuted(false);
|
|
56
|
+
setIsCaptureActive(false);
|
|
44
57
|
}, []);
|
|
45
58
|
|
|
46
59
|
const prepareForVoiceConnect = useCallback(() => {
|
|
@@ -48,6 +61,7 @@ export function useVoiceMicState(
|
|
|
48
61
|
userMicOptInRef.current = !connectMuted;
|
|
49
62
|
assistantMicHoldRef.current = false;
|
|
50
63
|
setIsMicMuted(connectMuted);
|
|
64
|
+
setIsCaptureActive(!connectMuted);
|
|
51
65
|
voice.muteMic(connectMuted);
|
|
52
66
|
}, [voice, connectMuted]);
|
|
53
67
|
|
|
@@ -56,6 +70,7 @@ export function useVoiceMicState(
|
|
|
56
70
|
userMicMutedRef.current = connectMuted;
|
|
57
71
|
voice.muteMic(connectMuted);
|
|
58
72
|
setIsMicMuted(connectMuted);
|
|
73
|
+
setIsCaptureActive(!connectMuted);
|
|
59
74
|
}, [isVoiceConnecting, voice, connectMuted]);
|
|
60
75
|
|
|
61
76
|
const prevVoiceStatusRef = useRef(voice.voiceStatus);
|
|
@@ -66,6 +81,11 @@ export function useVoiceMicState(
|
|
|
66
81
|
prevVoiceStatusRef.current = status;
|
|
67
82
|
|
|
68
83
|
if (holdDuringAssistant && ASSISTANT_MIC_HOLD_STATUSES.has(status)) {
|
|
84
|
+
if (!userMicMutedRef.current && userMicOptInRef.current) {
|
|
85
|
+
assistantMicHoldRef.current = false;
|
|
86
|
+
applyMicState();
|
|
87
|
+
return;
|
|
88
|
+
}
|
|
69
89
|
assistantMicHoldRef.current = true;
|
|
70
90
|
applyMicState();
|
|
71
91
|
return;
|
|
@@ -84,9 +104,22 @@ export function useVoiceMicState(
|
|
|
84
104
|
}
|
|
85
105
|
}, [voice.voiceStatus, applyMicState, holdDuringAssistant]);
|
|
86
106
|
|
|
107
|
+
const muteAfterSilenceCommit = useCallback(() => {
|
|
108
|
+
userMicMutedRef.current = true;
|
|
109
|
+
userMicOptInRef.current = false;
|
|
110
|
+
applyMicState();
|
|
111
|
+
}, [applyMicState]);
|
|
112
|
+
|
|
87
113
|
const toggleVoiceMicMute = useCallback(() => {
|
|
88
114
|
if (!voice.isVoiceActive && !isVoiceConnecting) return;
|
|
89
|
-
if (assistantMicHoldRef.current && userMicMutedRef.current)
|
|
115
|
+
if (assistantMicHoldRef.current && userMicMutedRef.current) {
|
|
116
|
+
assistantMicHoldRef.current = false;
|
|
117
|
+
userMicMutedRef.current = false;
|
|
118
|
+
userMicOptInRef.current = true;
|
|
119
|
+
options?.onBargeIn?.();
|
|
120
|
+
applyMicState();
|
|
121
|
+
return;
|
|
122
|
+
}
|
|
90
123
|
if (assistantMicHoldRef.current) {
|
|
91
124
|
userMicMutedRef.current = true;
|
|
92
125
|
applyMicState();
|
|
@@ -96,7 +129,7 @@ export function useVoiceMicState(
|
|
|
96
129
|
userMicMutedRef.current = nextMuted;
|
|
97
130
|
userMicOptInRef.current = !nextMuted;
|
|
98
131
|
applyMicState();
|
|
99
|
-
}, [voice.isVoiceActive, isVoiceConnecting, applyMicState]);
|
|
132
|
+
}, [voice.isVoiceActive, isVoiceConnecting, applyMicState, options]);
|
|
100
133
|
|
|
101
134
|
const clearAssistantMicHold = useCallback(() => {
|
|
102
135
|
assistantMicHoldRef.current = false;
|
|
@@ -107,8 +140,10 @@ export function useVoiceMicState(
|
|
|
107
140
|
|
|
108
141
|
return {
|
|
109
142
|
isMicMuted,
|
|
143
|
+
isCaptureActive,
|
|
110
144
|
isMicHeldForAssistant,
|
|
111
145
|
toggleVoiceMicMute,
|
|
146
|
+
muteAfterSilenceCommit,
|
|
112
147
|
prepareForVoiceConnect,
|
|
113
148
|
resetMicState,
|
|
114
149
|
clearAssistantMicHold,
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import type { VoiceSession, VoiceStatus } from '@bytexbyte/nxtlinq-ai-agent-core-development';
|
|
2
|
+
import { useCallback, useRef } from 'react';
|
|
3
|
+
import { ASSISTANT_MIC_HOLD_STATUSES } from './voiceMicConstants';
|
|
4
|
+
import {
|
|
5
|
+
MIC_BARGE_IN_RMS_THRESHOLD,
|
|
6
|
+
MIC_SILENCE_COMMIT_MS,
|
|
7
|
+
MIC_SILENCE_POLL_MS,
|
|
8
|
+
MIC_SPEECH_RMS_THRESHOLD,
|
|
9
|
+
} from './voiceSilenceConstants';
|
|
10
|
+
|
|
11
|
+
export function useVoiceSilenceCommit(
|
|
12
|
+
getSession: () => VoiceSession | null,
|
|
13
|
+
onMutedAfterCommit: () => void,
|
|
14
|
+
voiceStatus: VoiceStatus,
|
|
15
|
+
) {
|
|
16
|
+
const lastSpeechAtRef = useRef(0);
|
|
17
|
+
const hadSpeechRef = useRef(false);
|
|
18
|
+
const commitInFlightRef = useRef(false);
|
|
19
|
+
const skipCommitOnMuteRef = useRef(false);
|
|
20
|
+
const pollRef = useRef<ReturnType<typeof setInterval> | null>(null);
|
|
21
|
+
const voiceStatusRef = useRef(voiceStatus);
|
|
22
|
+
voiceStatusRef.current = voiceStatus;
|
|
23
|
+
const onMutedAfterCommitRef = useRef(onMutedAfterCommit);
|
|
24
|
+
onMutedAfterCommitRef.current = onMutedAfterCommit;
|
|
25
|
+
|
|
26
|
+
const clearPoll = useCallback(() => {
|
|
27
|
+
if (pollRef.current) {
|
|
28
|
+
clearInterval(pollRef.current);
|
|
29
|
+
pollRef.current = null;
|
|
30
|
+
}
|
|
31
|
+
}, []);
|
|
32
|
+
|
|
33
|
+
const tryCommit = useCallback(
|
|
34
|
+
(reason: 'silence' | 'manual') => {
|
|
35
|
+
const session = getSession();
|
|
36
|
+
if (commitInFlightRef.current) return;
|
|
37
|
+
if (!hadSpeechRef.current) {
|
|
38
|
+
session?.clearInputAudio?.();
|
|
39
|
+
return;
|
|
40
|
+
}
|
|
41
|
+
commitInFlightRef.current = true;
|
|
42
|
+
hadSpeechRef.current = false;
|
|
43
|
+
session?.commitInputAudio?.();
|
|
44
|
+
if (reason === 'silence') {
|
|
45
|
+
skipCommitOnMuteRef.current = true;
|
|
46
|
+
onMutedAfterCommitRef.current();
|
|
47
|
+
}
|
|
48
|
+
queueMicrotask(() => {
|
|
49
|
+
commitInFlightRef.current = false;
|
|
50
|
+
});
|
|
51
|
+
},
|
|
52
|
+
[getSession],
|
|
53
|
+
);
|
|
54
|
+
|
|
55
|
+
const startPoll = useCallback(() => {
|
|
56
|
+
clearPoll();
|
|
57
|
+
pollRef.current = setInterval(() => {
|
|
58
|
+
if (commitInFlightRef.current || !hadSpeechRef.current) return;
|
|
59
|
+
if (Date.now() - lastSpeechAtRef.current < MIC_SILENCE_COMMIT_MS) return;
|
|
60
|
+
tryCommit('silence');
|
|
61
|
+
}, MIC_SILENCE_POLL_MS);
|
|
62
|
+
}, [clearPoll, tryCommit]);
|
|
63
|
+
|
|
64
|
+
const resetTurn = useCallback(() => {
|
|
65
|
+
hadSpeechRef.current = false;
|
|
66
|
+
lastSpeechAtRef.current = Date.now();
|
|
67
|
+
getSession()?.clearInputAudio?.();
|
|
68
|
+
}, [getSession]);
|
|
69
|
+
|
|
70
|
+
const onSpeechRms = useCallback((rms: number) => {
|
|
71
|
+
const threshold = ASSISTANT_MIC_HOLD_STATUSES.has(voiceStatusRef.current)
|
|
72
|
+
? MIC_BARGE_IN_RMS_THRESHOLD
|
|
73
|
+
: MIC_SPEECH_RMS_THRESHOLD;
|
|
74
|
+
if (rms >= threshold) {
|
|
75
|
+
lastSpeechAtRef.current = Date.now();
|
|
76
|
+
hadSpeechRef.current = true;
|
|
77
|
+
}
|
|
78
|
+
}, []);
|
|
79
|
+
|
|
80
|
+
const consumeSkipCommitOnMute = useCallback(() => {
|
|
81
|
+
const skip = skipCommitOnMuteRef.current;
|
|
82
|
+
skipCommitOnMuteRef.current = false;
|
|
83
|
+
return skip;
|
|
84
|
+
}, []);
|
|
85
|
+
|
|
86
|
+
return {
|
|
87
|
+
startPoll,
|
|
88
|
+
clearPoll,
|
|
89
|
+
resetTurn,
|
|
90
|
+
onSpeechRms,
|
|
91
|
+
tryCommit,
|
|
92
|
+
consumeSkipCommitOnMute,
|
|
93
|
+
};
|
|
94
|
+
}
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
Message,
|
|
3
|
+
VoiceDoneEvent,
|
|
4
|
+
VoiceTranscriptEvent,
|
|
5
|
+
} from '@bytexbyte/nxtlinq-ai-agent-core-development';
|
|
6
|
+
import { mergeStreamingTranscript } from '@bytexbyte/nxtlinq-ai-agent-core-development';
|
|
7
|
+
import { useCallback, useRef } from 'react';
|
|
8
|
+
import type { InteractionMode } from '../context/AgentAssistantContext';
|
|
9
|
+
|
|
10
|
+
type VoiceTranscriptAgentApi = {
|
|
11
|
+
getMessages: () => Message[];
|
|
12
|
+
setMessages: (messages: Message[]) => void;
|
|
13
|
+
syncVoiceTurnHistory: (options?: { last?: number }) => Promise<void>;
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
const STREAM_PREFIX = 'voice-stream-';
|
|
17
|
+
|
|
18
|
+
function voiceMeta(sessionId: string | null) {
|
|
19
|
+
return {
|
|
20
|
+
voiceRealtime: true as const,
|
|
21
|
+
voiceSessionId: sessionId ?? undefined,
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export function useVoiceTranscriptMessages(
|
|
26
|
+
api: VoiceTranscriptAgentApi,
|
|
27
|
+
interactionMode: InteractionMode,
|
|
28
|
+
voiceSessionId: string | null,
|
|
29
|
+
) {
|
|
30
|
+
const streamIdRef = useRef<string | null>(null);
|
|
31
|
+
const sessionIdRef = useRef(voiceSessionId);
|
|
32
|
+
sessionIdRef.current = voiceSessionId;
|
|
33
|
+
|
|
34
|
+
const isVoiceUiActive = useCallback(
|
|
35
|
+
() => interactionMode === 'voice' && sessionIdRef.current != null,
|
|
36
|
+
[interactionMode],
|
|
37
|
+
);
|
|
38
|
+
|
|
39
|
+
const upsertStreaming = useCallback(
|
|
40
|
+
(text: string) => {
|
|
41
|
+
const messages = api.getMessages();
|
|
42
|
+
let streamId = streamIdRef.current;
|
|
43
|
+
if (!streamId) {
|
|
44
|
+
streamId = `${STREAM_PREFIX}${Date.now()}`;
|
|
45
|
+
streamIdRef.current = streamId;
|
|
46
|
+
}
|
|
47
|
+
const idx = messages.findIndex((m) => m.id === streamId);
|
|
48
|
+
const partialContent =
|
|
49
|
+
idx >= 0
|
|
50
|
+
? mergeStreamingTranscript(messages[idx]?.partialContent ?? '', text)
|
|
51
|
+
: text;
|
|
52
|
+
const meta = voiceMeta(sessionIdRef.current);
|
|
53
|
+
if (idx >= 0) {
|
|
54
|
+
api.setMessages(
|
|
55
|
+
messages.map((m, i) =>
|
|
56
|
+
i === idx
|
|
57
|
+
? { ...m, partialContent, isStreaming: true, metadata: { ...m.metadata, ...meta } }
|
|
58
|
+
: m,
|
|
59
|
+
),
|
|
60
|
+
);
|
|
61
|
+
return;
|
|
62
|
+
}
|
|
63
|
+
api.setMessages([
|
|
64
|
+
...messages,
|
|
65
|
+
{
|
|
66
|
+
id: streamId,
|
|
67
|
+
role: 'assistant',
|
|
68
|
+
content: '',
|
|
69
|
+
partialContent,
|
|
70
|
+
isStreaming: true,
|
|
71
|
+
timestamp: new Date().toISOString(),
|
|
72
|
+
metadata: meta,
|
|
73
|
+
},
|
|
74
|
+
]);
|
|
75
|
+
},
|
|
76
|
+
[api],
|
|
77
|
+
);
|
|
78
|
+
|
|
79
|
+
const finalizeAssistant = useCallback(
|
|
80
|
+
(text: string, messageId?: string | null) => {
|
|
81
|
+
const trimmed = text.trim();
|
|
82
|
+
streamIdRef.current = null;
|
|
83
|
+
if (!trimmed) return;
|
|
84
|
+
|
|
85
|
+
const messages = api.getMessages();
|
|
86
|
+
const streamIdx = messages.findIndex((m) => m.isStreaming && m.role === 'assistant');
|
|
87
|
+
if (streamIdx >= 0) {
|
|
88
|
+
api.setMessages(
|
|
89
|
+
messages.map((m, i) =>
|
|
90
|
+
i === streamIdx
|
|
91
|
+
? {
|
|
92
|
+
...m,
|
|
93
|
+
id: messageId ?? m.id,
|
|
94
|
+
content: trimmed,
|
|
95
|
+
partialContent: undefined,
|
|
96
|
+
isStreaming: false,
|
|
97
|
+
metadata: { ...m.metadata, ...voiceMeta(sessionIdRef.current) },
|
|
98
|
+
}
|
|
99
|
+
: m,
|
|
100
|
+
),
|
|
101
|
+
);
|
|
102
|
+
return;
|
|
103
|
+
}
|
|
104
|
+
const last = messages[messages.length - 1];
|
|
105
|
+
if (last?.role === 'assistant' && last.content === trimmed) return;
|
|
106
|
+
api.setMessages([
|
|
107
|
+
...messages,
|
|
108
|
+
{
|
|
109
|
+
id: messageId ?? `voice-asst-${Date.now()}`,
|
|
110
|
+
role: 'assistant',
|
|
111
|
+
content: trimmed,
|
|
112
|
+
timestamp: new Date().toISOString(),
|
|
113
|
+
metadata: voiceMeta(sessionIdRef.current),
|
|
114
|
+
},
|
|
115
|
+
]);
|
|
116
|
+
},
|
|
117
|
+
[api],
|
|
118
|
+
);
|
|
119
|
+
|
|
120
|
+
const handleTranscript = useCallback(
|
|
121
|
+
(event: VoiceTranscriptEvent) => {
|
|
122
|
+
if (!isVoiceUiActive()) return;
|
|
123
|
+
const text = event.text?.trim() ?? '';
|
|
124
|
+
if (event.role === 'assistant') {
|
|
125
|
+
// Keep one streaming bubble for the whole turn; finalize only in handleDone.
|
|
126
|
+
if (text) upsertStreaming(text);
|
|
127
|
+
return;
|
|
128
|
+
}
|
|
129
|
+
if (event.role === 'user' && !event.interim && text) {
|
|
130
|
+
const messages = api.getMessages();
|
|
131
|
+
const last = messages[messages.length - 1];
|
|
132
|
+
if (last?.role === 'user' && last.content === text) return;
|
|
133
|
+
api.setMessages([
|
|
134
|
+
...messages,
|
|
135
|
+
{
|
|
136
|
+
id: `voice-user-${Date.now()}`,
|
|
137
|
+
role: 'user',
|
|
138
|
+
content: text,
|
|
139
|
+
timestamp: new Date().toISOString(),
|
|
140
|
+
metadata: voiceMeta(sessionIdRef.current),
|
|
141
|
+
},
|
|
142
|
+
]);
|
|
143
|
+
}
|
|
144
|
+
},
|
|
145
|
+
[api, finalizeAssistant, isVoiceUiActive, upsertStreaming],
|
|
146
|
+
);
|
|
147
|
+
|
|
148
|
+
const handleDone = useCallback(
|
|
149
|
+
(event: VoiceDoneEvent) => {
|
|
150
|
+
if (!isVoiceUiActive()) return;
|
|
151
|
+
if (event.guardrailsBlocked || event.billingBlocked || event.error) {
|
|
152
|
+
streamIdRef.current = null;
|
|
153
|
+
return;
|
|
154
|
+
}
|
|
155
|
+
const reply = event.replyText?.trim() ?? '';
|
|
156
|
+
if (reply) {
|
|
157
|
+
finalizeAssistant(reply, event.assistantMessageId ?? undefined);
|
|
158
|
+
} else {
|
|
159
|
+
streamIdRef.current = null;
|
|
160
|
+
}
|
|
161
|
+
void api.syncVoiceTurnHistory({ last: 20 }).catch((err) => {
|
|
162
|
+
console.warn('[nxtlinq] syncVoiceTurnHistory after voice turn failed', err);
|
|
163
|
+
});
|
|
164
|
+
},
|
|
165
|
+
[api, finalizeAssistant, isVoiceUiActive],
|
|
166
|
+
);
|
|
167
|
+
|
|
168
|
+
const clearVoiceStream = useCallback(() => {
|
|
169
|
+
streamIdRef.current = null;
|
|
170
|
+
}, []);
|
|
171
|
+
|
|
172
|
+
return { handleTranscript, handleDone, clearVoiceStream };
|
|
173
|
+
}
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
import type { VoiceSession, VoiceStatus } from '@bytexbyte/nxtlinq-ai-agent-core-development';
|
|
2
|
+
import type { UseNxtlinqVoiceOptions } from '@bytexbyte/nxtlinq-ai-agent-react-native-development';
|
|
3
|
+
import { waitForIOSAudioSessionReady } from '@bytexbyte/nxtlinq-ai-agent-react-native-development';
|
|
4
|
+
import { useCallback, useEffect, useRef } from 'react';
|
|
5
|
+
import { useVoiceSilenceCommit } from './useVoiceSilenceCommit';
|
|
6
|
+
import { WsPcmPlayer } from './wsPcmPlayer';
|
|
7
|
+
import { WsPcmRecorder } from './wsPcmRecorder';
|
|
8
|
+
|
|
9
|
+
type WsVoiceCallbacks = Pick<
|
|
10
|
+
UseNxtlinqVoiceOptions,
|
|
11
|
+
'onOpen' | 'onAudioDelta' | 'onClose' | 'onError'
|
|
12
|
+
>;
|
|
13
|
+
|
|
14
|
+
export type UseWsRealtimeAudioOptions = {
|
|
15
|
+
voiceStatus: VoiceStatus;
|
|
16
|
+
muteAfterSilenceCommit: () => void;
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
function isIOS(): boolean {
|
|
20
|
+
try {
|
|
21
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
22
|
+
const { Platform } = require('react-native') as { Platform: { OS: string } };
|
|
23
|
+
return Platform.OS === 'ios';
|
|
24
|
+
} catch {
|
|
25
|
+
return false;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export function useWsRealtimeAudio(
|
|
30
|
+
isCaptureActive: boolean,
|
|
31
|
+
isVoiceActive: boolean,
|
|
32
|
+
options: UseWsRealtimeAudioOptions,
|
|
33
|
+
) {
|
|
34
|
+
const playerRef = useRef<WsPcmPlayer | null>(null);
|
|
35
|
+
const recorderRef = useRef<WsPcmRecorder | null>(null);
|
|
36
|
+
const sessionRef = useRef<VoiceSession | null>(null);
|
|
37
|
+
const isCaptureActiveRef = useRef(isCaptureActive);
|
|
38
|
+
isCaptureActiveRef.current = isCaptureActive;
|
|
39
|
+
const prevCaptureActiveRef = useRef(isCaptureActive);
|
|
40
|
+
const iosSessionPrimedRef = useRef(false);
|
|
41
|
+
|
|
42
|
+
const getSession = useCallback(() => sessionRef.current, []);
|
|
43
|
+
const muteAfterSilenceCommitRef = useRef(options.muteAfterSilenceCommit);
|
|
44
|
+
muteAfterSilenceCommitRef.current = options.muteAfterSilenceCommit;
|
|
45
|
+
|
|
46
|
+
const silence = useVoiceSilenceCommit(
|
|
47
|
+
getSession,
|
|
48
|
+
() => muteAfterSilenceCommitRef.current(),
|
|
49
|
+
options.voiceStatus,
|
|
50
|
+
);
|
|
51
|
+
const silenceRef = useRef(silence);
|
|
52
|
+
silenceRef.current = silence;
|
|
53
|
+
|
|
54
|
+
const ensurePlayer = useCallback(async () => {
|
|
55
|
+
if (!playerRef.current) {
|
|
56
|
+
playerRef.current = new WsPcmPlayer();
|
|
57
|
+
playerRef.current.prewarm();
|
|
58
|
+
}
|
|
59
|
+
await playerRef.current.ensureRunning();
|
|
60
|
+
}, []);
|
|
61
|
+
|
|
62
|
+
const stopCapture = useCallback((commit: boolean) => {
|
|
63
|
+
const s = silenceRef.current;
|
|
64
|
+
s.clearPoll();
|
|
65
|
+
recorderRef.current?.stop();
|
|
66
|
+
if (!commit) return;
|
|
67
|
+
if (s.consumeSkipCommitOnMute()) return;
|
|
68
|
+
s.tryCommit('manual');
|
|
69
|
+
}, []);
|
|
70
|
+
|
|
71
|
+
const startCapture = useCallback(async () => {
|
|
72
|
+
const session = sessionRef.current;
|
|
73
|
+
if (!session) {
|
|
74
|
+
console.warn('[nxtlinq] startCapture skipped: voice session not bound');
|
|
75
|
+
return;
|
|
76
|
+
}
|
|
77
|
+
if (isIOS() && !iosSessionPrimedRef.current) {
|
|
78
|
+
try {
|
|
79
|
+
await waitForIOSAudioSessionReady();
|
|
80
|
+
iosSessionPrimedRef.current = true;
|
|
81
|
+
} catch (err) {
|
|
82
|
+
console.warn('[nxtlinq] waitForIOSAudioSessionReady failed', err);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
const s = silenceRef.current;
|
|
86
|
+
if (!recorderRef.current) {
|
|
87
|
+
recorderRef.current = new WsPcmRecorder();
|
|
88
|
+
}
|
|
89
|
+
playerRef.current?.clearQueue();
|
|
90
|
+
recorderRef.current.bindSession(session);
|
|
91
|
+
recorderRef.current.setOnRms(s.onSpeechRms);
|
|
92
|
+
s.resetTurn();
|
|
93
|
+
try {
|
|
94
|
+
await recorderRef.current.initialize();
|
|
95
|
+
await recorderRef.current.start();
|
|
96
|
+
s.startPoll();
|
|
97
|
+
} catch (err) {
|
|
98
|
+
s.clearPoll();
|
|
99
|
+
console.error('[nxtlinq] mic capture start failed', err);
|
|
100
|
+
}
|
|
101
|
+
}, []);
|
|
102
|
+
|
|
103
|
+
const cleanup = useCallback(() => {
|
|
104
|
+
const s = silenceRef.current;
|
|
105
|
+
s.clearPoll();
|
|
106
|
+
iosSessionPrimedRef.current = false;
|
|
107
|
+
recorderRef.current?.stop();
|
|
108
|
+
recorderRef.current?.cleanup();
|
|
109
|
+
recorderRef.current = null;
|
|
110
|
+
playerRef.current?.cleanup();
|
|
111
|
+
playerRef.current = null;
|
|
112
|
+
sessionRef.current = null;
|
|
113
|
+
}, []);
|
|
114
|
+
|
|
115
|
+
const stopCaptureRef = useRef(stopCapture);
|
|
116
|
+
const startCaptureRef = useRef(startCapture);
|
|
117
|
+
const cleanupRef = useRef(cleanup);
|
|
118
|
+
stopCaptureRef.current = stopCapture;
|
|
119
|
+
startCaptureRef.current = startCapture;
|
|
120
|
+
cleanupRef.current = cleanup;
|
|
121
|
+
|
|
122
|
+
const bindSession = useCallback(
|
|
123
|
+
(session: VoiceSession | null, captureWhenUnmuted = false) => {
|
|
124
|
+
sessionRef.current = session;
|
|
125
|
+
recorderRef.current?.bindSession(session);
|
|
126
|
+
if (session && captureWhenUnmuted && isCaptureActiveRef.current) {
|
|
127
|
+
void startCaptureRef.current();
|
|
128
|
+
}
|
|
129
|
+
},
|
|
130
|
+
[],
|
|
131
|
+
);
|
|
132
|
+
|
|
133
|
+
const buildCallbacks = useCallback(
|
|
134
|
+
(overrides?: Partial<WsVoiceCallbacks>): WsVoiceCallbacks => ({
|
|
135
|
+
onOpen: () => {
|
|
136
|
+
void ensurePlayer();
|
|
137
|
+
overrides?.onOpen?.();
|
|
138
|
+
},
|
|
139
|
+
onAudioDelta: (pcm16) => {
|
|
140
|
+
void ensurePlayer().then(() => playerRef.current?.addAudio(pcm16));
|
|
141
|
+
overrides?.onAudioDelta?.(pcm16);
|
|
142
|
+
},
|
|
143
|
+
onClose: (reason) => {
|
|
144
|
+
cleanupRef.current();
|
|
145
|
+
overrides?.onClose?.(reason);
|
|
146
|
+
},
|
|
147
|
+
onError: (err) => {
|
|
148
|
+
cleanupRef.current();
|
|
149
|
+
overrides?.onError?.(err);
|
|
150
|
+
},
|
|
151
|
+
}),
|
|
152
|
+
[ensurePlayer],
|
|
153
|
+
);
|
|
154
|
+
|
|
155
|
+
useEffect(() => {
|
|
156
|
+
if (!isVoiceActive) {
|
|
157
|
+
prevCaptureActiveRef.current = false;
|
|
158
|
+
cleanupRef.current();
|
|
159
|
+
return;
|
|
160
|
+
}
|
|
161
|
+
const prev = prevCaptureActiveRef.current;
|
|
162
|
+
prevCaptureActiveRef.current = isCaptureActive;
|
|
163
|
+
if (isCaptureActive && !prev) {
|
|
164
|
+
void startCaptureRef.current().catch((err) => {
|
|
165
|
+
console.error('[nxtlinq] mic capture start failed', err);
|
|
166
|
+
});
|
|
167
|
+
return;
|
|
168
|
+
}
|
|
169
|
+
if (!isCaptureActive && prev) {
|
|
170
|
+
stopCaptureRef.current(true);
|
|
171
|
+
}
|
|
172
|
+
}, [isCaptureActive, isVoiceActive]);
|
|
173
|
+
|
|
174
|
+
useEffect(() => {
|
|
175
|
+
if (!isVoiceActive) return;
|
|
176
|
+
if (!recorderRef.current) {
|
|
177
|
+
recorderRef.current = new WsPcmRecorder();
|
|
178
|
+
}
|
|
179
|
+
void recorderRef.current.initialize().catch((err) => {
|
|
180
|
+
console.warn('[nxtlinq] WsPcmRecorder prewarm failed', err);
|
|
181
|
+
});
|
|
182
|
+
}, [isVoiceActive]);
|
|
183
|
+
|
|
184
|
+
useEffect(() => () => cleanupRef.current(), []);
|
|
185
|
+
|
|
186
|
+
const getOutputAudioLevel = useCallback(() => {
|
|
187
|
+
const fromPlayer = playerRef.current?.getAudioLevel() ?? 0;
|
|
188
|
+
if (fromPlayer > 0) return fromPlayer;
|
|
189
|
+
return sessionRef.current?.getOutputAudioLevel() ?? 0;
|
|
190
|
+
}, []);
|
|
191
|
+
|
|
192
|
+
return {
|
|
193
|
+
buildCallbacks,
|
|
194
|
+
bindSession,
|
|
195
|
+
cleanup,
|
|
196
|
+
beginCapture: startCapture,
|
|
197
|
+
endCapture: stopCapture,
|
|
198
|
+
getOutputAudioLevel,
|
|
199
|
+
};
|
|
200
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/** Smaller payload for WS / SCTP voice channels (Berify-aligned). */
|
|
2
|
+
export const VOICE_IMAGE_PICKER_OPTIONS = {
|
|
3
|
+
mediaType: 'photo' as const,
|
|
4
|
+
multiple: false,
|
|
5
|
+
compressImageMaxWidth: 384,
|
|
6
|
+
compressImageMaxHeight: 384,
|
|
7
|
+
compressImageQuality: 0.55,
|
|
8
|
+
/** iOS albums are often HEIC; force JPEG for OpenAI-compatible MIME. */
|
|
9
|
+
forceJpg: true,
|
|
10
|
+
};
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/** Min RMS to treat as user speech (~-42 dBFS). Berify-aligned. */
|
|
2
|
+
export const MIC_SPEECH_RMS_THRESHOLD = 0.008;
|
|
3
|
+
|
|
4
|
+
/** Louder threshold while assistant speaks — reduces echo false triggers. */
|
|
5
|
+
export const MIC_BARGE_IN_RMS_THRESHOLD = 0.045;
|
|
6
|
+
|
|
7
|
+
/** Commit turn after this much silence while mic is open. */
|
|
8
|
+
export const MIC_SILENCE_COMMIT_MS = 400;
|
|
9
|
+
|
|
10
|
+
export const MIC_SILENCE_POLL_MS = 100;
|