@bytexbyte/nxtlinq-ai-agent-web-development 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. package/dist/context/NxtlinqAgentContext.d.ts +12 -0
  2. package/dist/context/NxtlinqAgentContext.d.ts.map +1 -0
  3. package/dist/context/NxtlinqAgentContext.js +33 -0
  4. package/dist/createNxtlinqAgent.d.ts +9 -0
  5. package/dist/createNxtlinqAgent.d.ts.map +1 -0
  6. package/dist/createNxtlinqAgent.js +19 -0
  7. package/dist/hooks/useNxtlinqAgent.d.ts +18 -0
  8. package/dist/hooks/useNxtlinqAgent.d.ts.map +1 -0
  9. package/dist/hooks/useNxtlinqAgent.js +23 -0
  10. package/dist/hooks/useNxtlinqVoice.d.ts +21 -0
  11. package/dist/hooks/useNxtlinqVoice.d.ts.map +1 -0
  12. package/dist/hooks/useNxtlinqVoice.js +75 -0
  13. package/dist/index.d.ts +12 -0
  14. package/dist/index.d.ts.map +1 -0
  15. package/dist/index.js +9 -0
  16. package/dist/legacy/api/nxtlinq-api.d.ts +8 -0
  17. package/dist/legacy/api/nxtlinq-api.d.ts.map +1 -0
  18. package/dist/legacy/api/nxtlinq-api.js +13 -0
  19. package/dist/legacy/api/voice.d.ts +11 -0
  20. package/dist/legacy/api/voice.d.ts.map +1 -0
  21. package/dist/legacy/api/voice.js +26 -0
  22. package/dist/legacy/core/lib/messageHistory.d.ts +2 -0
  23. package/dist/legacy/core/lib/messageHistory.d.ts.map +1 -0
  24. package/dist/legacy/core/lib/messageHistory.js +1 -0
  25. package/dist/legacy/core/lib/textToSpeech.d.ts +14 -0
  26. package/dist/legacy/core/lib/textToSpeech.d.ts.map +1 -0
  27. package/dist/legacy/core/lib/textToSpeech.js +82 -0
  28. package/dist/legacy/core/lib/useDraggable.d.ts +15 -0
  29. package/dist/legacy/core/lib/useDraggable.d.ts.map +1 -0
  30. package/dist/legacy/core/lib/useDraggable.js +158 -0
  31. package/dist/legacy/core/lib/useLocalStorage.d.ts +11 -0
  32. package/dist/legacy/core/lib/useLocalStorage.d.ts.map +1 -0
  33. package/dist/legacy/core/lib/useLocalStorage.js +83 -0
  34. package/dist/legacy/core/lib/useResizable.d.ts +17 -0
  35. package/dist/legacy/core/lib/useResizable.d.ts.map +1 -0
  36. package/dist/legacy/core/lib/useResizable.js +203 -0
  37. package/dist/legacy/core/lib/useSessionStorage.d.ts +11 -0
  38. package/dist/legacy/core/lib/useSessionStorage.d.ts.map +1 -0
  39. package/dist/legacy/core/lib/useSessionStorage.js +37 -0
  40. package/dist/legacy/core/lib/useSpeechToTextFromMic/helper.d.ts +26 -0
  41. package/dist/legacy/core/lib/useSpeechToTextFromMic/helper.d.ts.map +1 -0
  42. package/dist/legacy/core/lib/useSpeechToTextFromMic/helper.js +102 -0
  43. package/dist/legacy/core/lib/useSpeechToTextFromMic/index.d.ts +16 -0
  44. package/dist/legacy/core/lib/useSpeechToTextFromMic/index.d.ts.map +1 -0
  45. package/dist/legacy/core/lib/useSpeechToTextFromMic/index.js +92 -0
  46. package/dist/legacy/core/lib/useVoiceMode.d.ts +32 -0
  47. package/dist/legacy/core/lib/useVoiceMode.d.ts.map +1 -0
  48. package/dist/legacy/core/lib/useVoiceMode.js +373 -0
  49. package/dist/legacy/core/metakeepClient.d.ts +4 -0
  50. package/dist/legacy/core/metakeepClient.d.ts.map +1 -0
  51. package/dist/legacy/core/metakeepClient.js +10 -0
  52. package/dist/legacy/core/utils/aitUtils.d.ts +31 -0
  53. package/dist/legacy/core/utils/aitUtils.d.ts.map +1 -0
  54. package/dist/legacy/core/utils/aitUtils.js +35 -0
  55. package/dist/legacy/core/utils/ethersUtils.d.ts +8 -0
  56. package/dist/legacy/core/utils/ethersUtils.d.ts.map +1 -0
  57. package/dist/legacy/core/utils/ethersUtils.js +19 -0
  58. package/dist/legacy/core/utils/index.d.ts +3 -0
  59. package/dist/legacy/core/utils/index.d.ts.map +1 -0
  60. package/dist/legacy/core/utils/index.js +4 -0
  61. package/dist/legacy/core/utils/notificationUtils.d.ts +29 -0
  62. package/dist/legacy/core/utils/notificationUtils.d.ts.map +1 -0
  63. package/dist/legacy/core/utils/notificationUtils.js +47 -0
  64. package/dist/legacy/core/utils/urlUtils.d.ts +25 -0
  65. package/dist/legacy/core/utils/urlUtils.d.ts.map +1 -0
  66. package/dist/legacy/core/utils/urlUtils.js +135 -0
  67. package/dist/legacy/core/utils/walletTextUtils.d.ts +14 -0
  68. package/dist/legacy/core/utils/walletTextUtils.d.ts.map +1 -0
  69. package/dist/legacy/core/utils/walletTextUtils.js +23 -0
  70. package/dist/legacy/core/utils/walletUtils.d.ts +10 -0
  71. package/dist/legacy/core/utils/walletUtils.d.ts.map +1 -0
  72. package/dist/legacy/core/utils/walletUtils.js +38 -0
  73. package/dist/legacy/index.d.ts +19 -0
  74. package/dist/legacy/index.d.ts.map +1 -0
  75. package/dist/legacy/index.js +16 -0
  76. package/dist/ports/createWebPlatformPorts.d.ts +13 -0
  77. package/dist/ports/createWebPlatformPorts.d.ts.map +1 -0
  78. package/dist/ports/createWebPlatformPorts.js +25 -0
  79. package/dist/utils/fileToAttachment.d.ts +4 -0
  80. package/dist/utils/fileToAttachment.d.ts.map +1 -0
  81. package/dist/utils/fileToAttachment.js +28 -0
  82. package/dist/voice/useVoiceSilenceCommit.d.ts +11 -0
  83. package/dist/voice/useVoiceSilenceCommit.d.ts.map +1 -0
  84. package/dist/voice/useVoiceSilenceCommit.js +68 -0
  85. package/dist/voice/useVoiceTranscriptMessages.d.ts +16 -0
  86. package/dist/voice/useVoiceTranscriptMessages.d.ts.map +1 -0
  87. package/dist/voice/useVoiceTranscriptMessages.js +134 -0
  88. package/dist/voice/useWsRealtimeAudio.d.ts +18 -0
  89. package/dist/voice/useWsRealtimeAudio.d.ts.map +1 -0
  90. package/dist/voice/useWsRealtimeAudio.js +115 -0
  91. package/dist/voice/voiceMicConstants.d.ts +4 -0
  92. package/dist/voice/voiceMicConstants.d.ts.map +1 -0
  93. package/dist/voice/voiceMicConstants.js +10 -0
  94. package/dist/voice/ws/BrowserWsPcmPlayer.d.ts +23 -0
  95. package/dist/voice/ws/BrowserWsPcmPlayer.d.ts.map +1 -0
  96. package/dist/voice/ws/BrowserWsPcmPlayer.js +138 -0
  97. package/dist/voice/ws/BrowserWsPcmRecorder.d.ts +19 -0
  98. package/dist/voice/ws/BrowserWsPcmRecorder.d.ts.map +1 -0
  99. package/dist/voice/ws/BrowserWsPcmRecorder.js +76 -0
  100. package/dist/voice/ws/float32ToPcm16.d.ts +2 -0
  101. package/dist/voice/ws/float32ToPcm16.d.ts.map +1 -0
  102. package/dist/voice/ws/float32ToPcm16.js +8 -0
  103. package/dist/voice/ws/voiceSilenceConstants.d.ts +5 -0
  104. package/dist/voice/ws/voiceSilenceConstants.d.ts.map +1 -0
  105. package/dist/voice/ws/voiceSilenceConstants.js +4 -0
  106. package/dist/voice/ws/wsRealtimeConstants.d.ts +2 -0
  107. package/dist/voice/ws/wsRealtimeConstants.d.ts.map +1 -0
  108. package/dist/voice/ws/wsRealtimeConstants.js +1 -0
  109. package/dist/webAgentDefaults.d.ts +9 -0
  110. package/dist/webAgentDefaults.d.ts.map +1 -0
  111. package/dist/webAgentDefaults.js +9 -0
  112. package/package.json +55 -0
  113. package/src/context/NxtlinqAgentContext.tsx +79 -0
  114. package/src/createNxtlinqAgent.ts +36 -0
  115. package/src/hooks/useNxtlinqAgent.ts +73 -0
  116. package/src/hooks/useNxtlinqVoice.ts +143 -0
  117. package/src/index.ts +84 -0
  118. package/src/legacy/api/nxtlinq-api.ts +32 -0
  119. package/src/legacy/api/voice.ts +72 -0
  120. package/src/legacy/core/lib/messageHistory.ts +6 -0
  121. package/src/legacy/core/lib/textToSpeech.ts +127 -0
  122. package/src/legacy/core/lib/useDraggable.ts +193 -0
  123. package/src/legacy/core/lib/useLocalStorage.ts +89 -0
  124. package/src/legacy/core/lib/useResizable.ts +256 -0
  125. package/src/legacy/core/lib/useSessionStorage.ts +43 -0
  126. package/src/legacy/core/lib/useSpeechToTextFromMic/helper.ts +132 -0
  127. package/src/legacy/core/lib/useSpeechToTextFromMic/index.ts +126 -0
  128. package/src/legacy/core/lib/useVoiceMode.ts +407 -0
  129. package/src/legacy/core/metakeepClient.ts +12 -0
  130. package/src/legacy/core/utils/aitUtils.ts +55 -0
  131. package/src/legacy/core/utils/ethersUtils.ts +24 -0
  132. package/src/legacy/core/utils/index.ts +5 -0
  133. package/src/legacy/core/utils/notificationUtils.ts +64 -0
  134. package/src/legacy/core/utils/urlUtils.ts +160 -0
  135. package/src/legacy/core/utils/walletTextUtils.ts +26 -0
  136. package/src/legacy/core/utils/walletUtils.ts +53 -0
  137. package/src/legacy/index.ts +35 -0
  138. package/src/ports/createWebPlatformPorts.ts +44 -0
  139. package/src/utils/fileToAttachment.ts +32 -0
  140. package/src/voice/useVoiceSilenceCommit.ts +84 -0
  141. package/src/voice/useVoiceTranscriptMessages.ts +184 -0
  142. package/src/voice/useWsRealtimeAudio.ts +141 -0
  143. package/src/voice/voiceMicConstants.ts +13 -0
  144. package/src/voice/ws/BrowserWsPcmPlayer.ts +139 -0
  145. package/src/voice/ws/BrowserWsPcmRecorder.ts +83 -0
  146. package/src/voice/ws/float32ToPcm16.ts +8 -0
  147. package/src/voice/ws/voiceSilenceConstants.ts +4 -0
  148. package/src/voice/ws/wsRealtimeConstants.ts +1 -0
  149. package/src/webAgentDefaults.ts +12 -0
@@ -0,0 +1,132 @@
1
+ import {
2
+ AudioConfig,
3
+ CancellationReason,
4
+ PropertyId,
5
+ ResultReason,
6
+ SpeechConfig,
7
+ SpeechRecognizer,
8
+ } from 'microsoft-cognitiveservices-speech-sdk';
9
+ import { Dispatch, SetStateAction } from 'react';
10
+ import Cookie from 'universal-cookie';
11
+ import { createNxtlinqApi } from '../../../api/nxtlinq-api';
12
+
13
+ /**
14
+ * Start speech recognition
15
+ * - partialTranscript: temporary partial transcription (recognizing)
16
+ * - setSpeechToTextArray: finalized sentence array (recognized)
17
+ */
18
+ export const startSpeechToTextFromMic = async (
19
+ setSpeechToTextArray: Dispatch<SetStateAction<string[]>>, // finalized sentences
20
+ config: { apiKey: string; apiSecret: string },
21
+ historyRef: React.MutableRefObject<string[]>,
22
+ indexRef: React.MutableRefObject<number>,
23
+ setPartialTranscript?: Dispatch<SetStateAction<string>> // temporary partial transcription state updater
24
+ ): Promise<SpeechRecognizer | undefined> => {
25
+ const tokenRes = await getTokenOrRefresh(config.apiKey, config.apiSecret);
26
+ if (!tokenRes.authToken || !tokenRes.region) {
27
+ console.error('Speech token retrieval failed:', tokenRes.error);
28
+ return;
29
+ }
30
+
31
+ const speechConfig = SpeechConfig.fromAuthorizationToken(tokenRes.authToken, tokenRes.region);
32
+ speechConfig.speechRecognitionLanguage = 'en-US';
33
+
34
+ // silence detection and segmentation configuration
35
+ speechConfig.setProperty(PropertyId.SpeechServiceConnection_InitialSilenceTimeoutMs, '10000');
36
+ speechConfig.setProperty(PropertyId.SpeechServiceConnection_EndSilenceTimeoutMs, '86400000');
37
+ speechConfig.setProperty(PropertyId.Speech_SegmentationSilenceTimeoutMs, '3000');
38
+
39
+ const audioConfig = AudioConfig.fromDefaultMicrophoneInput();
40
+ const recognizer = new SpeechRecognizer(speechConfig, audioConfig);
41
+
42
+ // temporary partial transcription
43
+ recognizer.recognizing = (_s, e) => {
44
+ if (setPartialTranscript) {
45
+ setPartialTranscript(e.result.text);
46
+ }
47
+ };
48
+
49
+ // finalized sentences
50
+ recognizer.recognized = (_s, e) => {
51
+ if (e.result.reason === ResultReason.RecognizedSpeech) {
52
+ const text = e.result.text.trim();
53
+
54
+ // Filter out very short text (likely background noise or false positives)
55
+ // Minimum 3 characters to reduce sensitivity to background sounds
56
+ if (text.length < 3) {
57
+ return;
58
+ }
59
+
60
+ // Check confidence if available (some SDK versions provide this)
61
+ // Lower confidence results are more likely to be background noise
62
+ // Increased threshold to 0.4 to better filter out low-volume background noise
63
+ const confidence = (e.result as any).confidence;
64
+ if (confidence !== undefined && confidence < 0.4) {
65
+ return;
66
+ }
67
+
68
+ historyRef.current[indexRef.current] = text;
69
+ indexRef.current += 1;
70
+
71
+ // return only the latest finalized sentence to the UI
72
+ setSpeechToTextArray([text]);
73
+
74
+ if (setPartialTranscript) setPartialTranscript('');
75
+ }
76
+ };
77
+
78
+ recognizer.canceled = (_s, e) => {
79
+ console.warn('Speech recognition canceled:', e.reason);
80
+ if (e.reason === CancellationReason.Error) {
81
+ console.error(`Error code: ${e.errorCode}`);
82
+ console.error(`Error details: ${e.errorDetails}`);
83
+ }
84
+ };
85
+
86
+ await new Promise<void>((resolve, reject) => {
87
+ recognizer.startContinuousRecognitionAsync(
88
+ () => resolve(),
89
+ error => {
90
+ console.error('Failed to start continuous speech recognition:', error);
91
+ reject(error);
92
+ }
93
+ );
94
+ });
95
+
96
+ return recognizer;
97
+ };
98
+
99
+ /**
100
+ * Stop speech recognition
101
+ */
102
+ export const stopRecognition = (recognizer: SpeechRecognizer | undefined) => {
103
+ if (recognizer) {
104
+ recognizer.stopContinuousRecognitionAsync();
105
+ }
106
+ };
107
+
108
+ export async function getTokenOrRefresh(apiKey: string, apiSecret: string) {
109
+ const nxtlinqApi = createNxtlinqApi(apiKey, apiSecret);
110
+ const cookie = new Cookie();
111
+ const speechToken = cookie.get('speech-token');
112
+
113
+ if (speechToken === undefined) {
114
+ try {
115
+ const res = await nxtlinqApi.cognitive.getCognitiveToken();
116
+ if ('error' in res) {
117
+ throw new Error(res.error);
118
+ }
119
+ const token = res.token;
120
+ const region = res.region;
121
+ cookie.set('speech-token', region + ':' + token, { maxAge: 540, path: '/' });
122
+
123
+ return { authToken: token, region: region };
124
+ } catch (e) {
125
+ const err = e as { response: { data: string } };
126
+ return { authToken: null, error: err.response.data };
127
+ }
128
+ } else {
129
+ const idx = speechToken.indexOf(':');
130
+ return { authToken: speechToken.slice(idx + 1), region: speechToken.slice(0, idx) };
131
+ }
132
+ }
@@ -0,0 +1,126 @@
1
+ import { SpeechRecognizer } from 'microsoft-cognitiveservices-speech-sdk';
2
+ import { useCallback, useEffect, useRef, useState } from 'react';
3
+ import { startSpeechToTextFromMic, stopRecognition } from './helper';
4
+
5
+ interface Props {
6
+ apiKey: string;
7
+ apiSecret: string;
8
+ autoClearTranscript?: boolean;
9
+ }
10
+
11
+ type UseSpeechToTextFromMicResult = {
12
+ start: () => Promise<void>;
13
+ stop: () => void;
14
+ clear: () => void;
15
+ isMicEnabled: boolean;
16
+ transcript: string;
17
+ partialTranscript: string;
18
+ };
19
+
20
+ export function useSpeechToTextFromMic({
21
+ apiKey,
22
+ apiSecret,
23
+ autoClearTranscript = true,
24
+ }: Props): UseSpeechToTextFromMicResult {
25
+ const [isMicEnabled, setIsMicEnabled] = useState(false);
26
+ const [transcriptArray, setTranscriptArray] = useState<string[]>([]);
27
+ const [partialTranscript, setPartialTranscript] = useState('');
28
+ const [recognizer, setRecognizer] = useState<SpeechRecognizer | undefined>();
29
+
30
+ const wakelock = useRef<WakeLockSentinel>();
31
+ const historyRef = useRef<string[]>([]);
32
+ const indexRef = useRef<number>(0);
33
+
34
+ const lockWakeState = useCallback(async () => {
35
+ if (typeof navigator === 'undefined' || !('wakeLock' in navigator)) return;
36
+ try {
37
+ wakelock.current = await navigator.wakeLock.request('screen');
38
+ wakelock.current.addEventListener('release', () => {
39
+ // Wake lock was released by the system
40
+ });
41
+ } catch (err) {
42
+ console.error('Wake lock error:', err);
43
+ }
44
+ }, []);
45
+
46
+ const clear = useCallback(() => {
47
+ historyRef.current = [];
48
+ indexRef.current = 0;
49
+ setTranscriptArray([]);
50
+ setPartialTranscript('');
51
+ }, [setPartialTranscript, setTranscriptArray]);
52
+
53
+ const start = useCallback(async () => {
54
+ clear();
55
+ await lockWakeState();
56
+ try {
57
+ const recognizerInstance = await startSpeechToTextFromMic(
58
+ setTranscriptArray,
59
+ { apiKey, apiSecret },
60
+ historyRef,
61
+ indexRef,
62
+ setPartialTranscript
63
+ );
64
+ setRecognizer(recognizerInstance);
65
+ setIsMicEnabled(true);
66
+ } catch (error) {
67
+ wakelock.current?.release();
68
+ setIsMicEnabled(false);
69
+ console.error('Failed to start speech recognition:', error);
70
+ throw error;
71
+ }
72
+ }, [apiKey, apiSecret, clear, lockWakeState, setPartialTranscript, setTranscriptArray]);
73
+
74
+ const stop = useCallback(() => {
75
+ wakelock.current?.release();
76
+ stopRecognition(recognizer);
77
+ setIsMicEnabled(false);
78
+ setRecognizer(undefined);
79
+ }, [recognizer, setRecognizer]);
80
+
81
+ // Keep UI state in sync if recognizer stops or gets canceled by the SDK/browser
82
+ useEffect(() => {
83
+ if (!recognizer) return;
84
+
85
+ const prevCanceled = recognizer.canceled;
86
+ const prevSessionStopped = recognizer.sessionStopped;
87
+
88
+ recognizer.canceled = (s, e) => {
89
+ setIsMicEnabled(false);
90
+ if (prevCanceled) prevCanceled(s, e);
91
+ };
92
+
93
+ recognizer.sessionStopped = (s, e) => {
94
+ setIsMicEnabled(false);
95
+ if (prevSessionStopped) prevSessionStopped(s, e);
96
+ };
97
+
98
+ return () => {
99
+ // No explicit detach API; restoring previous handlers to avoid leaks
100
+ if (recognizer) {
101
+ recognizer.canceled = prevCanceled;
102
+ recognizer.sessionStopped = prevSessionStopped;
103
+ }
104
+ };
105
+ }, [recognizer]);
106
+
107
+ useEffect(() => {
108
+ if (autoClearTranscript && transcriptArray.length > 0) {
109
+ const timer = setTimeout(() => {
110
+ setTranscriptArray([]);
111
+ }, 100);
112
+ return () => clearTimeout(timer);
113
+ }
114
+ }, [transcriptArray, autoClearTranscript]);
115
+
116
+ const transcript = transcriptArray.join(' ');
117
+
118
+ return {
119
+ start,
120
+ stop,
121
+ clear,
122
+ isMicEnabled,
123
+ transcript,
124
+ partialTranscript,
125
+ };
126
+ }
@@ -0,0 +1,407 @@
1
+ import * as React from 'react';
2
+ import { flushSync } from 'react-dom';
3
+ import type { AITApi, Message, VoiceTransport } from '@bytexbyte/nxtlinq-ai-agent-core-development';
4
+ import {
5
+ mergeStreamingTranscript,
6
+ normalizeTranscriptKey,
7
+ type VoiceDoneEvent,
8
+ type VoiceSession,
9
+ type VoiceStatus,
10
+ type VoiceTranscriptEvent,
11
+ } from '../../api/voice';
12
+ import { appendServerHistoryIntoMessages } from './messageHistory';
13
+ import { useVoiceTranscriptMessages } from '../../../voice/useVoiceTranscriptMessages';
14
+ import { useWsRealtimeAudio } from '../../../voice/useWsRealtimeAudio';
15
+
16
+ const TURN_SETTLE_MS = 1500;
17
+ const VOICE_HISTORY_SYNC_LAST = 20;
18
+
19
+ const USER_TRANSCRIPT_BLOCKED: ReadonlySet<VoiceStatus> = new Set([
20
+ 'thinking', 'generating', 'speaking',
21
+ ]);
22
+
23
+ const ASSISTANT_MIC_HOLD_STATUSES: ReadonlySet<VoiceStatus> = new Set([
24
+ 'transcribing', 'thinking', 'generating', 'speaking',
25
+ ]);
26
+
27
+ export type UseVoiceModeOptions = {
28
+ apiKey: string;
29
+ apiSecret: string;
30
+ pseudoId: string;
31
+ externalId?: string;
32
+ aitId?: string;
33
+ walletAddress?: string;
34
+ aitToken?: string;
35
+ metadata?: Record<string, unknown>;
36
+ nxtlinqApi: AITApi;
37
+ getMessages: () => Message[];
38
+ setMessages: React.Dispatch<React.SetStateAction<Message[]>>;
39
+ onError?: (error: Error) => void;
40
+ stopRecording: () => void;
41
+ stopTextToSpeech: () => void;
42
+ voiceTransport?: VoiceTransport;
43
+ };
44
+
45
+ export function useVoiceMode({
46
+ apiKey,
47
+ apiSecret,
48
+ pseudoId,
49
+ externalId,
50
+ aitId,
51
+ walletAddress,
52
+ aitToken,
53
+ metadata,
54
+ nxtlinqApi,
55
+ getMessages,
56
+ setMessages,
57
+ onError,
58
+ stopRecording,
59
+ stopTextToSpeech,
60
+ voiceTransport = 'ws-realtime',
61
+ }: UseVoiceModeOptions) {
62
+ const [isVoiceMode, setIsVoiceMode] = React.useState(false);
63
+ const [voiceStatus, setVoiceStatus] = React.useState<VoiceStatus>('idle');
64
+ const [isVoiceConnecting, setIsVoiceConnecting] = React.useState(false);
65
+ const [isMicMuted, setIsMicMuted] = React.useState(true);
66
+ const [isMicCaptureActive, setIsMicCaptureActive] = React.useState(false);
67
+ const [voiceSessionId, setVoiceSessionId] = React.useState<string | null>(null);
68
+
69
+ const sessionRef = React.useRef<VoiceSession | null>(null);
70
+ const remoteAudioRef = React.useRef<HTMLAudioElement | null>(null);
71
+ const voiceStatusRef = React.useRef<VoiceStatus>('idle');
72
+ const activeTurnRef = React.useRef(false);
73
+ const historyRefreshInFlightRef = React.useRef<Promise<void> | null>(null);
74
+ const turnUserTextRef = React.useRef('');
75
+ const lastCommittedUserRef = React.useRef('');
76
+ const userMicMutedRef = React.useRef(true);
77
+ const userMicOptInRef = React.useRef(false);
78
+ const assistantMicHoldRef = React.useRef(false);
79
+ const voiceConnectGenerationRef = React.useRef(0);
80
+ const connectInFlightRef = React.useRef(false);
81
+ const turnSettleUntilRef = React.useRef(0);
82
+
83
+ const syncVoiceTurnHistory = React.useCallback(async (last = VOICE_HISTORY_SYNC_LAST) => {
84
+ if (historyRefreshInFlightRef.current) return historyRefreshInFlightRef.current;
85
+ const run = (async () => {
86
+ const result = await nxtlinqApi.agent.getMessageHistory({
87
+ apiKey, apiSecret, pseudoId, externalId, last,
88
+ });
89
+ if ('error' in result) {
90
+ onError?.(new Error(result.error));
91
+ return;
92
+ }
93
+ flushSync(() => {
94
+ setMessages((prev) => appendServerHistoryIntoMessages(prev, result.messages));
95
+ });
96
+ })().finally(() => { historyRefreshInFlightRef.current = null; });
97
+ historyRefreshInFlightRef.current = run;
98
+ return run;
99
+ }, [apiKey, apiSecret, pseudoId, externalId, nxtlinqApi, setMessages, onError]);
100
+
101
+ const voiceSessionIdRef = React.useRef<string | null>(null);
102
+
103
+ const transcriptApi = React.useMemo(() => ({
104
+ getMessages,
105
+ setMessages: (messages: Message[]) => setMessages(messages),
106
+ syncVoiceTurnHistory: (opts?: { last?: number }) => syncVoiceTurnHistory(opts?.last),
107
+ }), [getMessages, setMessages, syncVoiceTurnHistory]);
108
+
109
+ const { handleTranscript: handleTranscriptUi, handleDone: handleDoneUi, clearVoiceStream } =
110
+ useVoiceTranscriptMessages(transcriptApi, 'voice', voiceSessionId, () => voiceSessionIdRef.current);
111
+
112
+ const resetMicAfterTurn = React.useCallback(() => {
113
+ lastCommittedUserRef.current = turnUserTextRef.current.trim();
114
+ turnUserTextRef.current = '';
115
+ activeTurnRef.current = false;
116
+ turnSettleUntilRef.current = Date.now() + TURN_SETTLE_MS;
117
+ userMicMutedRef.current = true;
118
+ userMicOptInRef.current = false;
119
+ setIsMicCaptureActive(false);
120
+ applyMicStateRef.current();
121
+ }, []);
122
+
123
+ const applyMicState = React.useCallback(() => {
124
+ const shouldMute = userMicMutedRef.current || assistantMicHoldRef.current;
125
+ sessionRef.current?.muteMic(shouldMute);
126
+ setIsMicMuted(shouldMute);
127
+ const captureActive = isVoiceMode && !shouldMute && userMicOptInRef.current;
128
+ setIsMicCaptureActive(captureActive);
129
+ }, [isVoiceMode]);
130
+
131
+ const applyMicStateRef = React.useRef(applyMicState);
132
+ applyMicStateRef.current = applyMicState;
133
+
134
+ const wsAudio = useWsRealtimeAudio(isMicCaptureActive, isVoiceMode, {
135
+ voiceStatus,
136
+ muteAfterSilenceCommit: () => {
137
+ userMicMutedRef.current = true;
138
+ userMicOptInRef.current = false;
139
+ setIsMicCaptureActive(false);
140
+ applyMicStateRef.current();
141
+ },
142
+ });
143
+ const wsAudioRef = React.useRef(wsAudio);
144
+ wsAudioRef.current = wsAudio;
145
+
146
+ const setAssistantMicHold = React.useCallback((held: boolean) => {
147
+ if (assistantMicHoldRef.current === held) return;
148
+ assistantMicHoldRef.current = held;
149
+ applyMicState();
150
+ }, [applyMicState]);
151
+
152
+ const attachRemoteAudio = React.useCallback(() => {
153
+ if (voiceTransport === 'ws-realtime') return false;
154
+ const session = sessionRef.current;
155
+ const audio = remoteAudioRef.current;
156
+ if (!session || !audio) return false;
157
+ const stream = session.getRemoteAudioStream();
158
+ if (stream) {
159
+ audio.srcObject = stream;
160
+ void audio.play().catch(() => {});
161
+ return true;
162
+ }
163
+ return false;
164
+ }, [voiceTransport]);
165
+
166
+ const beginTurn = React.useCallback(() => { activeTurnRef.current = true; }, []);
167
+
168
+ const handleTranscriptTurn = React.useCallback((event: VoiceTranscriptEvent) => {
169
+ if (event.role !== 'user') return;
170
+ if (Date.now() < turnSettleUntilRef.current) return;
171
+ if (userMicMutedRef.current) return;
172
+ if (!userMicOptInRef.current && !activeTurnRef.current) return;
173
+ if (assistantMicHoldRef.current || USER_TRANSCRIPT_BLOCKED.has(voiceStatusRef.current)) return;
174
+ if (!activeTurnRef.current) beginTurn();
175
+ const merged = (event.interim === false && event.text.trim())
176
+ ? event.text.trim()
177
+ : mergeStreamingTranscript(turnUserTextRef.current, event.text);
178
+ turnUserTextRef.current = merged;
179
+ if (lastCommittedUserRef.current &&
180
+ normalizeTranscriptKey(merged) === normalizeTranscriptKey(lastCommittedUserRef.current)) {
181
+ turnUserTextRef.current = '';
182
+ return;
183
+ }
184
+ if (!event.interim && event.text.trim()) lastCommittedUserRef.current = '';
185
+ }, [beginTurn]);
186
+
187
+ const handleStatus = React.useCallback((status: VoiceStatus) => {
188
+ const prev = voiceStatusRef.current;
189
+ if (status === 'listening' && userMicMutedRef.current && !activeTurnRef.current &&
190
+ Date.now() < turnSettleUntilRef.current) return;
191
+ voiceStatusRef.current = status;
192
+ setVoiceStatus(status);
193
+ if (ASSISTANT_MIC_HOLD_STATUSES.has(status)) setAssistantMicHold(true);
194
+ else if (status === 'listening' || status === 'idle') setAssistantMicHold(false);
195
+ if (status === 'transcribing' && prev === 'listening') beginTurn();
196
+ if (status === 'generating' && prev !== 'generating') beginTurn();
197
+ if (status === 'listening' && prev === 'speaking') resetMicAfterTurn();
198
+ }, [beginTurn, resetMicAfterTurn, setAssistantMicHold]);
199
+
200
+ const handleDone = React.useCallback((event: VoiceDoneEvent) => {
201
+ if (event.guardrailsBlocked || event.billingBlocked) {
202
+ activeTurnRef.current = false;
203
+ return;
204
+ }
205
+ if (event.error) {
206
+ onError?.(new Error(event.error));
207
+ return;
208
+ }
209
+ handleDoneUi(event);
210
+ resetMicAfterTurn();
211
+ }, [handleDoneUi, onError, resetMicAfterTurn]);
212
+
213
+ const voiceHandlersRef = React.useRef({
214
+ onStatus: (_status: VoiceStatus) => {},
215
+ onTranscript: (_event: VoiceTranscriptEvent) => {},
216
+ onDone: (_event: VoiceDoneEvent) => {},
217
+ });
218
+
219
+ const stopVoiceSession = React.useCallback(async (reason = 'unknown') => {
220
+ const session = sessionRef.current;
221
+ if (!session && !voiceSessionIdRef.current) return;
222
+ sessionRef.current = null;
223
+ voiceSessionIdRef.current = null;
224
+ setVoiceSessionId(null);
225
+ clearVoiceStream();
226
+ if (voiceTransport === 'ws-realtime') wsAudioRef.current.bindSession(null);
227
+ if (session) {
228
+ try { await session.stop(); } catch { /* best-effort */ }
229
+ }
230
+ const audio = remoteAudioRef.current;
231
+ if (audio) audio.srcObject = null;
232
+ voiceStatusRef.current = 'idle';
233
+ setVoiceStatus('idle');
234
+ userMicMutedRef.current = true;
235
+ userMicOptInRef.current = false;
236
+ assistantMicHoldRef.current = false;
237
+ turnSettleUntilRef.current = 0;
238
+ setIsMicMuted(true);
239
+ setIsMicCaptureActive(false);
240
+ lastCommittedUserRef.current = '';
241
+ turnUserTextRef.current = '';
242
+ activeTurnRef.current = false;
243
+ }, [clearVoiceStream, voiceTransport]);
244
+
245
+ const stopVoiceSessionRef = React.useRef(stopVoiceSession);
246
+ stopVoiceSessionRef.current = stopVoiceSession;
247
+
248
+ const exitVoiceMode = React.useCallback(async () => {
249
+ voiceConnectGenerationRef.current += 1;
250
+ setIsVoiceMode(false);
251
+ setIsVoiceConnecting(false);
252
+ await stopVoiceSession('exitVoiceMode');
253
+ }, [stopVoiceSession]);
254
+
255
+ const enterVoiceMode = React.useCallback(async () => {
256
+ if (connectInFlightRef.current || sessionRef.current) return;
257
+ connectInFlightRef.current = true;
258
+ const connectGeneration = voiceConnectGenerationRef.current + 1;
259
+ voiceConnectGenerationRef.current = connectGeneration;
260
+ const isConnectCancelled = () => connectGeneration !== voiceConnectGenerationRef.current;
261
+
262
+ stopRecording();
263
+ stopTextToSpeech();
264
+ setIsVoiceMode(true);
265
+ setIsVoiceConnecting(true);
266
+ clearVoiceStream();
267
+ lastCommittedUserRef.current = '';
268
+ turnUserTextRef.current = '';
269
+ voiceStatusRef.current = 'idle';
270
+ setVoiceStatus('idle');
271
+ userMicMutedRef.current = true;
272
+ userMicOptInRef.current = false;
273
+ assistantMicHoldRef.current = false;
274
+ turnSettleUntilRef.current = 0;
275
+ setIsMicMuted(true);
276
+ setIsMicCaptureActive(false);
277
+ activeTurnRef.current = false;
278
+
279
+ const wsCallbacks = voiceTransport === 'ws-realtime'
280
+ ? wsAudioRef.current.buildCallbacks({
281
+ onOpen: () => {
282
+ if (isConnectCancelled()) return;
283
+ voiceStatusRef.current = 'listening';
284
+ setVoiceStatus('listening');
285
+ assistantMicHoldRef.current = false;
286
+ applyMicState();
287
+ },
288
+ onClose: () => {
289
+ if (isConnectCancelled()) return;
290
+ voiceStatusRef.current = 'idle';
291
+ setVoiceStatus('idle');
292
+ connectInFlightRef.current = false;
293
+ },
294
+ onError: (err: Error) => {
295
+ if (isConnectCancelled()) return;
296
+ onError?.(err);
297
+ },
298
+ })
299
+ : undefined;
300
+
301
+ try {
302
+ let session: VoiceSession | undefined;
303
+ session = await nxtlinqApi.voice.startSession({
304
+ apiKey, apiSecret, pseudoId, externalId, aitId, walletAddress, aitToken,
305
+ voiceMode: 'realtime',
306
+ transport: voiceTransport,
307
+ startWithMicMuted: true,
308
+ metadata,
309
+ onAudioDelta: wsCallbacks?.onAudioDelta,
310
+ onOpen: wsCallbacks?.onOpen ?? (() => {
311
+ if (isConnectCancelled()) { void session?.stop(); return; }
312
+ voiceStatusRef.current = 'listening';
313
+ setVoiceStatus('listening');
314
+ assistantMicHoldRef.current = false;
315
+ applyMicState();
316
+ const tryAttach = () => { if (!attachRemoteAudio()) window.setTimeout(tryAttach, 100); };
317
+ tryAttach();
318
+ }),
319
+ onClose: wsCallbacks?.onClose ?? (() => {
320
+ if (isConnectCancelled()) return;
321
+ voiceStatusRef.current = 'idle';
322
+ setVoiceStatus('idle');
323
+ connectInFlightRef.current = false;
324
+ }),
325
+ onStatus: (status) => voiceHandlersRef.current.onStatus(status),
326
+ onTranscript: (event) => voiceHandlersRef.current.onTranscript(event),
327
+ onDone: (event) => voiceHandlersRef.current.onDone(event),
328
+ onError: (err) => {
329
+ if (isConnectCancelled()) return;
330
+ onError?.(err);
331
+ },
332
+ });
333
+
334
+ if (isConnectCancelled()) {
335
+ await session.stop();
336
+ return;
337
+ }
338
+ voiceSessionIdRef.current = session.id;
339
+ sessionRef.current = session;
340
+ setVoiceSessionId(session.id);
341
+ if (voiceTransport === 'ws-realtime') {
342
+ wsAudioRef.current.bindSession(session, false);
343
+ } else {
344
+ session.muteMic(true);
345
+ const tryAttach = () => { if (!attachRemoteAudio()) window.setTimeout(tryAttach, 100); };
346
+ tryAttach();
347
+ }
348
+ } catch (err) {
349
+ if (isConnectCancelled()) return;
350
+ onError?.(err instanceof Error ? err : new Error(String(err)));
351
+ setIsVoiceMode(false);
352
+ await stopVoiceSession('enterVoiceMode_error');
353
+ } finally {
354
+ connectInFlightRef.current = false;
355
+ if (!isConnectCancelled()) setIsVoiceConnecting(false);
356
+ }
357
+ }, [
358
+ apiKey, apiSecret, pseudoId, externalId, aitId, walletAddress, aitToken, metadata,
359
+ nxtlinqApi, stopRecording, stopTextToSpeech, attachRemoteAudio,
360
+ onError, stopVoiceSession, applyMicState, voiceTransport, clearVoiceStream,
361
+ ]);
362
+
363
+ const toggleMicMute = React.useCallback(() => {
364
+ if (!sessionRef.current) return;
365
+ if (assistantMicHoldRef.current && userMicMutedRef.current) return;
366
+ if (assistantMicHoldRef.current) {
367
+ userMicMutedRef.current = true;
368
+ applyMicState();
369
+ return;
370
+ }
371
+ const nextMuted = !userMicMutedRef.current;
372
+ userMicMutedRef.current = nextMuted;
373
+ userMicOptInRef.current = !nextMuted;
374
+ if (!nextMuted) turnSettleUntilRef.current = 0;
375
+ applyMicState();
376
+ }, [applyMicState]);
377
+
378
+ const interruptVoice = React.useCallback(() => {
379
+ sessionRef.current?.interrupt();
380
+ setAssistantMicHold(false);
381
+ }, [setAssistantMicHold]);
382
+
383
+ React.useEffect(() => {
384
+ voiceHandlersRef.current = {
385
+ onStatus: handleStatus,
386
+ onTranscript: (event) => {
387
+ handleTranscriptTurn(event);
388
+ handleTranscriptUi(event);
389
+ },
390
+ onDone: handleDone,
391
+ };
392
+ }, [handleStatus, handleTranscriptTurn, handleTranscriptUi, handleDone]);
393
+
394
+ React.useEffect(() => () => { void stopVoiceSessionRef.current('unmount'); }, []);
395
+
396
+ return {
397
+ isVoiceMode,
398
+ voiceStatus,
399
+ isVoiceConnecting,
400
+ isMicMuted,
401
+ remoteAudioRef,
402
+ enterVoiceMode,
403
+ exitVoiceMode,
404
+ toggleMicMute,
405
+ interruptVoice,
406
+ };
407
+ }
@@ -0,0 +1,12 @@
1
+ import { MetaKeep } from 'metakeep';
2
+
3
+ const metakeepClient = new MetaKeep({
4
+ appId: 'e7d521f7-3eea-42d7-af42-4d8b962d9a6d',
5
+ chainId: 80002,
6
+ /* RPC node urls map */
7
+ rpcNodeUrls: {
8
+ 80002: 'https://rpc-amoy.polygon.technology'
9
+ }
10
+ });
11
+
12
+ export default metakeepClient;