@vrplatform/voice-chat 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +35 -0
- package/src/SupportAssistantDock.tsx +4060 -0
- package/src/VRPilotHighlightProvider.tsx +942 -0
- package/src/_styles.ts +911 -0
- package/src/index.ts +33 -0
- package/src/supportAssistantActivity.ts +115 -0
|
@@ -0,0 +1,4060 @@
|
|
|
1
|
+
'use client';
|
|
2
|
+
|
|
3
|
+
import {
|
|
4
|
+
type CSSProperties,
|
|
5
|
+
type FormEvent,
|
|
6
|
+
type KeyboardEvent,
|
|
7
|
+
useCallback,
|
|
8
|
+
useEffect,
|
|
9
|
+
useMemo,
|
|
10
|
+
useRef,
|
|
11
|
+
useState,
|
|
12
|
+
} from 'react';
|
|
13
|
+
import { BracketsIcon, CopyIcon } from '@vrplatform/icons';
|
|
14
|
+
import {
|
|
15
|
+
createRealtimeActivityState,
|
|
16
|
+
reduceRealtimeActivity,
|
|
17
|
+
type RealtimeActivityState,
|
|
18
|
+
type RealtimeActivityStatus,
|
|
19
|
+
type SupportAssistantActivity,
|
|
20
|
+
} from './supportAssistantActivity';
|
|
21
|
+
import {
|
|
22
|
+
type VRPilotFillFieldRequest,
|
|
23
|
+
type VRPilotHighlightTargetRequest,
|
|
24
|
+
useVRPilotHighlights,
|
|
25
|
+
} from './VRPilotHighlightProvider';
|
|
26
|
+
import { useStyles } from './_styles';
|
|
27
|
+
|
|
28
|
+
export type { SupportAssistantActivity } from './supportAssistantActivity';
|
|
29
|
+
|
|
30
|
+
export type SupportAssistantLanguage = {
|
|
31
|
+
code: string;
|
|
32
|
+
label: string;
|
|
33
|
+
nativeLabel?: string;
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
export type SupportAssistantSessionContext = {
|
|
37
|
+
goal?: string;
|
|
38
|
+
currentPath?: string;
|
|
39
|
+
currentTitle?: string;
|
|
40
|
+
dashboard?: string;
|
|
41
|
+
isGeneralLedgerEnabled?: boolean;
|
|
42
|
+
teamCurrency?: string;
|
|
43
|
+
teamId?: string;
|
|
44
|
+
teamName?: string;
|
|
45
|
+
teamPartnerId?: string;
|
|
46
|
+
teamStatus?: string;
|
|
47
|
+
teamType?: string;
|
|
48
|
+
workspaceName?: string;
|
|
49
|
+
activeIssueSummary?: string;
|
|
50
|
+
primaryIssueAction?: string;
|
|
51
|
+
primaryIssueItemName?: string;
|
|
52
|
+
primaryIssueMessage?: string;
|
|
53
|
+
primaryIssueTitle?: string;
|
|
54
|
+
primaryIssueType?: string;
|
|
55
|
+
userEmail?: string;
|
|
56
|
+
userFirstName?: string;
|
|
57
|
+
userId?: string;
|
|
58
|
+
userLastName?: string;
|
|
59
|
+
userName?: string;
|
|
60
|
+
userRole?: string;
|
|
61
|
+
language?: string;
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
export type SupportAssistantEvent =
|
|
65
|
+
| {
|
|
66
|
+
type: 'status';
|
|
67
|
+
activity: SupportAssistantActivity;
|
|
68
|
+
message: string;
|
|
69
|
+
}
|
|
70
|
+
| {
|
|
71
|
+
type: 'realtime';
|
|
72
|
+
payload: unknown;
|
|
73
|
+
}
|
|
74
|
+
| {
|
|
75
|
+
type: 'screen';
|
|
76
|
+
shared: boolean;
|
|
77
|
+
}
|
|
78
|
+
| {
|
|
79
|
+
type: 'snapshot';
|
|
80
|
+
framesSent: number;
|
|
81
|
+
}
|
|
82
|
+
| {
|
|
83
|
+
type: 'session_completed';
|
|
84
|
+
session: SupportAssistantSessionEvent;
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
export type SupportAssistantDockProps = {
|
|
88
|
+
apiBaseUrl?: string;
|
|
89
|
+
callEndpoint?: string;
|
|
90
|
+
dev?: boolean;
|
|
91
|
+
localApiBaseUrl?: string;
|
|
92
|
+
productionApiBaseUrl?: string;
|
|
93
|
+
sessionContext?:
|
|
94
|
+
| SupportAssistantSessionContext
|
|
95
|
+
| (() => SupportAssistantSessionContext);
|
|
96
|
+
languages?: SupportAssistantLanguage[];
|
|
97
|
+
defaultLanguageCode?: string;
|
|
98
|
+
defaultGoal?: string;
|
|
99
|
+
userRole?: string;
|
|
100
|
+
placement?: 'right' | 'left';
|
|
101
|
+
floating?: boolean;
|
|
102
|
+
className?: string;
|
|
103
|
+
style?: CSSProperties;
|
|
104
|
+
getCallRequestHeaders?: () => HeadersInit | Promise<HeadersInit>;
|
|
105
|
+
onEvent?: (event: SupportAssistantEvent) => void;
|
|
106
|
+
showRealtimeDebug?: boolean;
|
|
107
|
+
};
|
|
108
|
+
|
|
109
|
+
type RealtimeDebugDirection = 'client' | 'server' | 'local';
|
|
110
|
+
type DevApiTarget = 'production' | 'local';
|
|
111
|
+
|
|
112
|
+
type RealtimeDebugEntry = {
|
|
113
|
+
id: number;
|
|
114
|
+
time: string;
|
|
115
|
+
label: string;
|
|
116
|
+
detail: string;
|
|
117
|
+
payload?: Record<string, unknown>;
|
|
118
|
+
};
|
|
119
|
+
|
|
120
|
+
type RealtimeRawDebugEntry = RealtimeDebugEntry & {
|
|
121
|
+
direction: RealtimeDebugDirection;
|
|
122
|
+
timestamp: string;
|
|
123
|
+
};
|
|
124
|
+
|
|
125
|
+
type RealtimeTraceDebugContext = {
|
|
126
|
+
workflowName: string | null;
|
|
127
|
+
groupId: string | null;
|
|
128
|
+
sessionId: string | null;
|
|
129
|
+
};
|
|
130
|
+
|
|
131
|
+
type SupportSessionScreenConfig = {
|
|
132
|
+
captureIntervalMs: number;
|
|
133
|
+
activeCaptureIntervalMs?: number;
|
|
134
|
+
maxImageLongEdgePx: number;
|
|
135
|
+
imageMimeType: 'image/jpeg';
|
|
136
|
+
imageQuality: number;
|
|
137
|
+
imageDetail: 'low';
|
|
138
|
+
eventType: 'conversation.item.create';
|
|
139
|
+
itemType: 'message';
|
|
140
|
+
role: 'user';
|
|
141
|
+
contentType: 'input_image';
|
|
142
|
+
};
|
|
143
|
+
|
|
144
|
+
type SupportSessionRealtimeCall = {
|
|
145
|
+
provider: 'openai';
|
|
146
|
+
sdpAnswer: string;
|
|
147
|
+
model: string;
|
|
148
|
+
voice: string;
|
|
149
|
+
screen: SupportSessionScreenConfig;
|
|
150
|
+
};
|
|
151
|
+
|
|
152
|
+
type SupportSourceLink = {
|
|
153
|
+
articleId: string;
|
|
154
|
+
title: string;
|
|
155
|
+
url: string;
|
|
156
|
+
relevance: number;
|
|
157
|
+
};
|
|
158
|
+
|
|
159
|
+
export type SupportAssistantTranscriptMessage = {
|
|
160
|
+
role: 'user' | 'assistant';
|
|
161
|
+
text: string;
|
|
162
|
+
timestamp: string;
|
|
163
|
+
itemId?: string;
|
|
164
|
+
responseId?: string;
|
|
165
|
+
};
|
|
166
|
+
|
|
167
|
+
export type SupportAssistantSessionCost = {
|
|
168
|
+
amountUsd: number | null;
|
|
169
|
+
currency: 'USD';
|
|
170
|
+
model: string;
|
|
171
|
+
pricingSource: string;
|
|
172
|
+
usage: Record<string, unknown> | null;
|
|
173
|
+
};
|
|
174
|
+
|
|
175
|
+
export type SupportAssistantSessionEvent = {
|
|
176
|
+
openAiSessionId: string | null;
|
|
177
|
+
model: string | null;
|
|
178
|
+
transcript: SupportAssistantTranscriptMessage[];
|
|
179
|
+
summary: string;
|
|
180
|
+
cost: SupportAssistantSessionCost;
|
|
181
|
+
durationMs: number;
|
|
182
|
+
framesSent: number;
|
|
183
|
+
currentPath?: string;
|
|
184
|
+
currentTitle?: string;
|
|
185
|
+
language?: string;
|
|
186
|
+
};
|
|
187
|
+
|
|
188
|
+
type AssistantSessionAnalytics = {
|
|
189
|
+
startedAt: number;
|
|
190
|
+
payload: SupportAssistantSessionContext;
|
|
191
|
+
transcript: SupportAssistantTranscriptMessage[];
|
|
192
|
+
responseTranscriptDeltas: Map<string, string>;
|
|
193
|
+
usage: Record<string, unknown> | null;
|
|
194
|
+
finalized: boolean;
|
|
195
|
+
};
|
|
196
|
+
|
|
197
|
+
type RuntimeSession = {
|
|
198
|
+
peerConnection: RTCPeerConnection | null;
|
|
199
|
+
dataChannel: RTCDataChannel | null;
|
|
200
|
+
microphoneStream: MediaStream | null;
|
|
201
|
+
screenStream: MediaStream | null;
|
|
202
|
+
screenTimer: number | null;
|
|
203
|
+
visualWatchTimer: number | null;
|
|
204
|
+
sessionConfig: SupportSessionRealtimeCall | null;
|
|
205
|
+
muted: boolean;
|
|
206
|
+
snapshotInFlight: boolean;
|
|
207
|
+
lastSnapshotStartedAt: number;
|
|
208
|
+
visualAnalysisCanvas: HTMLCanvasElement | null;
|
|
209
|
+
lastVisualSignature: Uint8ClampedArray | null;
|
|
210
|
+
visualChangeActive: boolean;
|
|
211
|
+
lastVisualChangeAt: number;
|
|
212
|
+
lastProactivePromptAt: number;
|
|
213
|
+
proactiveResponseInFlight: boolean;
|
|
214
|
+
pendingProactiveScreenPrompt: boolean;
|
|
215
|
+
pendingScreenChangeCorrection: boolean;
|
|
216
|
+
clientResponseInFlight: boolean;
|
|
217
|
+
currentResponseId: string | null;
|
|
218
|
+
interruptedResponseIds: Set<string>;
|
|
219
|
+
waitingForFirstScreenAcknowledgement: boolean;
|
|
220
|
+
suppressScreenChangePromptsUntil: number;
|
|
221
|
+
pendingAcknowledgement: {
|
|
222
|
+
text: string;
|
|
223
|
+
instructions?: string;
|
|
224
|
+
} | null;
|
|
225
|
+
waitingForInitialResponse: boolean;
|
|
226
|
+
waitingToEnableAutoResponses: boolean;
|
|
227
|
+
autoResponseEnabled: boolean;
|
|
228
|
+
};
|
|
229
|
+
|
|
230
|
+
type ScreenChangePromptOptions = {
|
|
231
|
+
interruptActiveResponse?: boolean;
|
|
232
|
+
};
|
|
233
|
+
|
|
234
|
+
type SupportAssistantResponseMode = 'voice' | 'text';
|
|
235
|
+
type RealtimeOutputModality = 'audio' | 'text';
|
|
236
|
+
|
|
237
|
+
type ChatTranscriptMessage = {
|
|
238
|
+
id: string;
|
|
239
|
+
role: 'assistant' | 'user';
|
|
240
|
+
responseId: string;
|
|
241
|
+
text: string;
|
|
242
|
+
time: string;
|
|
243
|
+
mode: SupportAssistantResponseMode;
|
|
244
|
+
status: 'streaming' | 'done' | 'cancelled' | 'failed';
|
|
245
|
+
};
|
|
246
|
+
|
|
247
|
+
type StartCallOptions = {
|
|
248
|
+
initialTextMessage?: string;
|
|
249
|
+
};
|
|
250
|
+
|
|
251
|
+
const defaultLanguages: SupportAssistantLanguage[] = [
|
|
252
|
+
{ code: 'en', label: 'English' },
|
|
253
|
+
{ code: 'de', label: 'German', nativeLabel: 'Deutsch' },
|
|
254
|
+
{ code: 'fr', label: 'French', nativeLabel: 'Francais' },
|
|
255
|
+
{ code: 'es', label: 'Spanish', nativeLabel: 'Espanol' },
|
|
256
|
+
];
|
|
257
|
+
|
|
258
|
+
const minSnapshotSpacingMs = 250;
|
|
259
|
+
const visualWatchIntervalMs = 300;
|
|
260
|
+
const visualSettleDelayMs = 850;
|
|
261
|
+
const visualSampleWidthPx = 48;
|
|
262
|
+
const visualAverageDifferenceThreshold = 4.5;
|
|
263
|
+
const visualChangedPixelRatioThreshold = 0.006;
|
|
264
|
+
const visualPixelDifferenceThreshold = 45;
|
|
265
|
+
const minProactivePromptSpacingMs = 6_000;
|
|
266
|
+
const screenSharePromptWarmupMs = 4_000;
|
|
267
|
+
const maxRealtimeDebugEntries = 500;
|
|
268
|
+
const maxChatTranscriptMessages = 40;
|
|
269
|
+
const realtimeMaxOutputTokens = 'inf';
|
|
270
|
+
const defaultLocalApiBaseUrl = 'http://localhost:8787';
|
|
271
|
+
const defaultProductionApiBaseUrl = 'https://voice-chat.vrplatform.app';
|
|
272
|
+
|
|
273
|
+
function hasActiveAssistantTurn(state: RealtimeActivityState) {
|
|
274
|
+
return state.responseInProgress || state.assistantAudioActive;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
function createRuntimeSession(): RuntimeSession {
|
|
278
|
+
return {
|
|
279
|
+
peerConnection: null,
|
|
280
|
+
dataChannel: null,
|
|
281
|
+
microphoneStream: null,
|
|
282
|
+
screenStream: null,
|
|
283
|
+
screenTimer: null,
|
|
284
|
+
visualWatchTimer: null,
|
|
285
|
+
sessionConfig: null,
|
|
286
|
+
muted: false,
|
|
287
|
+
snapshotInFlight: false,
|
|
288
|
+
lastSnapshotStartedAt: 0,
|
|
289
|
+
visualAnalysisCanvas: null,
|
|
290
|
+
lastVisualSignature: null,
|
|
291
|
+
visualChangeActive: false,
|
|
292
|
+
lastVisualChangeAt: 0,
|
|
293
|
+
lastProactivePromptAt: 0,
|
|
294
|
+
proactiveResponseInFlight: false,
|
|
295
|
+
pendingProactiveScreenPrompt: false,
|
|
296
|
+
pendingScreenChangeCorrection: false,
|
|
297
|
+
clientResponseInFlight: false,
|
|
298
|
+
currentResponseId: null,
|
|
299
|
+
interruptedResponseIds: new Set(),
|
|
300
|
+
waitingForFirstScreenAcknowledgement: false,
|
|
301
|
+
suppressScreenChangePromptsUntil: 0,
|
|
302
|
+
pendingAcknowledgement: null,
|
|
303
|
+
waitingForInitialResponse: false,
|
|
304
|
+
waitingToEnableAutoResponses: false,
|
|
305
|
+
autoResponseEnabled: false,
|
|
306
|
+
};
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
function getRealtimeOutputModalities(
|
|
310
|
+
mode: SupportAssistantResponseMode
|
|
311
|
+
): RealtimeOutputModality[] {
|
|
312
|
+
return [mode === 'text' ? 'text' : 'audio'];
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
function joinUrl(baseUrl: string, endpoint: string) {
|
|
316
|
+
if (/^https?:\/\//i.test(endpoint)) {
|
|
317
|
+
return endpoint;
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
if (!baseUrl) {
|
|
321
|
+
return endpoint;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
return `${baseUrl.replace(/\/$/, '')}/${endpoint.replace(/^\//, '')}`;
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
function getMessage(error: unknown, fallback: string) {
|
|
328
|
+
return error instanceof Error ? error.message : fallback;
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
function getRecord(value: unknown): Record<string, unknown> | null {
|
|
332
|
+
return typeof value === 'object' && value !== null && !Array.isArray(value)
|
|
333
|
+
? (value as Record<string, unknown>)
|
|
334
|
+
: null;
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
function getStringValue(value: unknown) {
|
|
338
|
+
if (typeof value === 'string' || typeof value === 'number') {
|
|
339
|
+
return String(value);
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
return undefined;
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
function getDebugValue(value: unknown) {
|
|
346
|
+
if (typeof value === 'string' || typeof value === 'number') {
|
|
347
|
+
return String(value);
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
return undefined;
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
function getNumberValue(value: unknown) {
|
|
354
|
+
if (typeof value === 'number' && Number.isFinite(value)) {
|
|
355
|
+
return value;
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
if (typeof value === 'string' && value.trim()) {
|
|
359
|
+
const numberValue = Number(value);
|
|
360
|
+
|
|
361
|
+
return Number.isFinite(numberValue) ? numberValue : undefined;
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
return undefined;
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
function getTranscriptText(value: unknown) {
|
|
368
|
+
return typeof value === 'string' && value.trim() ? value.trim() : undefined;
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
function getTranscriptDelta(value: unknown) {
|
|
372
|
+
return typeof value === 'string' && value.length ? value : undefined;
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
function getTranscriptKey(payload: Record<string, unknown>) {
|
|
376
|
+
return [
|
|
377
|
+
getStringValue(payload.response_id),
|
|
378
|
+
getStringValue(payload.item_id),
|
|
379
|
+
getStringValue(payload.output_index),
|
|
380
|
+
getStringValue(payload.content_index),
|
|
381
|
+
]
|
|
382
|
+
.filter(Boolean)
|
|
383
|
+
.join(':');
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
function getTokenCount(record: Record<string, unknown> | null, key: string) {
|
|
387
|
+
return getNumberValue(record?.[key]) ?? 0;
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
function mergeTokenUsage(
|
|
391
|
+
current: Record<string, unknown> | null,
|
|
392
|
+
next: Record<string, unknown>
|
|
393
|
+
): Record<string, unknown> {
|
|
394
|
+
const merged: Record<string, unknown> = { ...(current ?? {}) };
|
|
395
|
+
|
|
396
|
+
for (const [key, value] of Object.entries(next)) {
|
|
397
|
+
const currentValue = merged[key];
|
|
398
|
+
|
|
399
|
+
if (typeof value === 'number') {
|
|
400
|
+
merged[key] = (getNumberValue(currentValue) ?? 0) + value;
|
|
401
|
+
continue;
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
const valueRecord = getRecord(value);
|
|
405
|
+
if (valueRecord) {
|
|
406
|
+
merged[key] = mergeTokenUsage(getRecord(currentValue), valueRecord);
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
return merged;
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
function getCachedAudioTokens(details: Record<string, unknown> | null) {
|
|
414
|
+
const cachedDetails =
|
|
415
|
+
getRecord(details?.cached_tokens_details) ??
|
|
416
|
+
getRecord(details?.cache_read_input_tokens_details);
|
|
417
|
+
|
|
418
|
+
return getTokenCount(cachedDetails, 'audio_tokens');
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
function getCachedImageTokens(details: Record<string, unknown> | null) {
|
|
422
|
+
const cachedDetails =
|
|
423
|
+
getRecord(details?.cached_tokens_details) ??
|
|
424
|
+
getRecord(details?.cache_read_input_tokens_details);
|
|
425
|
+
|
|
426
|
+
return getTokenCount(cachedDetails, 'image_tokens');
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
function calculateRealtimeSessionCost(
|
|
430
|
+
model: string | null,
|
|
431
|
+
usage: Record<string, unknown> | null
|
|
432
|
+
): SupportAssistantSessionCost {
|
|
433
|
+
const pricingSource = 'openai_api_pricing_2026-05-20_gpt-realtime-2';
|
|
434
|
+
|
|
435
|
+
if (!usage || model !== 'gpt-realtime-2') {
|
|
436
|
+
return {
|
|
437
|
+
amountUsd: null,
|
|
438
|
+
currency: 'USD',
|
|
439
|
+
model: model ?? 'unknown',
|
|
440
|
+
pricingSource,
|
|
441
|
+
usage,
|
|
442
|
+
};
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
const inputDetails = getRecord(usage.input_token_details);
|
|
446
|
+
const outputDetails = getRecord(usage.output_token_details);
|
|
447
|
+
const inputTokens = getTokenCount(usage, 'input_tokens');
|
|
448
|
+
const outputTokens = getTokenCount(usage, 'output_tokens');
|
|
449
|
+
const cachedTokens = getTokenCount(inputDetails, 'cached_tokens');
|
|
450
|
+
const audioInputTokens = getTokenCount(inputDetails, 'audio_tokens');
|
|
451
|
+
const imageInputTokens = getTokenCount(inputDetails, 'image_tokens');
|
|
452
|
+
const cachedAudioTokens = getCachedAudioTokens(inputDetails);
|
|
453
|
+
const cachedImageTokens = getCachedImageTokens(inputDetails);
|
|
454
|
+
const cachedTextTokens = Math.max(
|
|
455
|
+
0,
|
|
456
|
+
cachedTokens - cachedAudioTokens - cachedImageTokens
|
|
457
|
+
);
|
|
458
|
+
const textInputTokens = Math.max(
|
|
459
|
+
0,
|
|
460
|
+
inputTokens - audioInputTokens - imageInputTokens
|
|
461
|
+
);
|
|
462
|
+
const audioOutputTokens = getTokenCount(outputDetails, 'audio_tokens');
|
|
463
|
+
const textOutputTokens = Math.max(0, outputTokens - audioOutputTokens);
|
|
464
|
+
const usd =
|
|
465
|
+
(Math.max(0, textInputTokens - cachedTextTokens) * 4 +
|
|
466
|
+
cachedTextTokens * 0.4 +
|
|
467
|
+
Math.max(0, audioInputTokens - cachedAudioTokens) * 32 +
|
|
468
|
+
cachedAudioTokens * 0.4 +
|
|
469
|
+
Math.max(0, imageInputTokens - cachedImageTokens) * 5 +
|
|
470
|
+
cachedImageTokens * 0.5 +
|
|
471
|
+
textOutputTokens * 24 +
|
|
472
|
+
audioOutputTokens * 64) /
|
|
473
|
+
1_000_000;
|
|
474
|
+
|
|
475
|
+
return {
|
|
476
|
+
amountUsd: Number(usd.toFixed(6)),
|
|
477
|
+
currency: 'USD',
|
|
478
|
+
model,
|
|
479
|
+
pricingSource,
|
|
480
|
+
usage,
|
|
481
|
+
};
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
function buildSessionSummary(transcript: SupportAssistantTranscriptMessage[]) {
|
|
485
|
+
const userMessages = transcript.filter((message) => message.role === 'user');
|
|
486
|
+
const assistantMessages = transcript.filter(
|
|
487
|
+
(message) => message.role === 'assistant'
|
|
488
|
+
);
|
|
489
|
+
const firstUserMessage = userMessages[0]?.text;
|
|
490
|
+
const lastAssistantMessage = assistantMessages.at(-1)?.text;
|
|
491
|
+
|
|
492
|
+
if (!transcript.length) {
|
|
493
|
+
return 'VRPilot session ended without a captured transcript.';
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
return [
|
|
497
|
+
`${transcript.length} transcript message${
|
|
498
|
+
transcript.length === 1 ? '' : 's'
|
|
499
|
+
} captured.`,
|
|
500
|
+
firstUserMessage ? `User asked: ${firstUserMessage}` : undefined,
|
|
501
|
+
lastAssistantMessage
|
|
502
|
+
? `Latest assistant response: ${lastAssistantMessage}`
|
|
503
|
+
: undefined,
|
|
504
|
+
]
|
|
505
|
+
.filter(Boolean)
|
|
506
|
+
.join(' ');
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
function getAssistantTranscriptFromResponse(response: Record<string, unknown>) {
|
|
510
|
+
const output = Array.isArray(response.output) ? response.output : [];
|
|
511
|
+
const transcripts: string[] = [];
|
|
512
|
+
|
|
513
|
+
for (const outputItem of output) {
|
|
514
|
+
const item = getRecord(outputItem);
|
|
515
|
+
const content = Array.isArray(item?.content) ? item.content : [];
|
|
516
|
+
|
|
517
|
+
for (const contentItem of content) {
|
|
518
|
+
const contentRecord = getRecord(contentItem);
|
|
519
|
+
const transcript =
|
|
520
|
+
getTranscriptText(contentRecord?.transcript) ??
|
|
521
|
+
getTranscriptText(contentRecord?.text);
|
|
522
|
+
|
|
523
|
+
if (transcript) {
|
|
524
|
+
transcripts.push(transcript);
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
return getTranscriptText(transcripts.join(' '));
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
function getDebugTimestampParts() {
|
|
533
|
+
const date = new Date();
|
|
534
|
+
|
|
535
|
+
return {
|
|
536
|
+
timestamp: date.toISOString(),
|
|
537
|
+
time: date.toLocaleTimeString('en-GB', {
|
|
538
|
+
hour: '2-digit',
|
|
539
|
+
minute: '2-digit',
|
|
540
|
+
second: '2-digit',
|
|
541
|
+
}),
|
|
542
|
+
};
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
function sanitizeRealtimeDebugPayload(value: unknown): unknown {
|
|
546
|
+
if (typeof value === 'string') {
|
|
547
|
+
if (value.startsWith('data:image/')) {
|
|
548
|
+
const mimeType = value.slice(5, value.indexOf(';'));
|
|
549
|
+
|
|
550
|
+
return `[${mimeType} data URL omitted; chars=${value.length}]`;
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
if (value.length > 4_000) {
|
|
554
|
+
return `[long string omitted; chars=${value.length}]`;
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
return value;
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
if (Array.isArray(value)) {
|
|
561
|
+
return value.map((item) => sanitizeRealtimeDebugPayload(item));
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
const record = getRecord(value);
|
|
565
|
+
if (!record) {
|
|
566
|
+
return value;
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
return Object.fromEntries(
|
|
570
|
+
Object.entries(record).map(([key, item]) => [
|
|
571
|
+
key,
|
|
572
|
+
key === 'delta' && typeof item === 'string' && item.length > 512
|
|
573
|
+
? `[delta omitted; chars=${item.length}]`
|
|
574
|
+
: sanitizeRealtimeDebugPayload(item),
|
|
575
|
+
])
|
|
576
|
+
);
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
function getRealtimeEventType(event: Record<string, unknown>) {
|
|
580
|
+
return typeof event.type === 'string' ? event.type : 'unknown';
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
function getRealtimeClientEventDetail(event: Record<string, unknown>) {
|
|
584
|
+
const eventType = getRealtimeEventType(event);
|
|
585
|
+
|
|
586
|
+
if (eventType === 'session.update') {
|
|
587
|
+
const session = getRecord(event.session);
|
|
588
|
+
const tracing = getRecord(session?.tracing);
|
|
589
|
+
const groupId = getDebugValue(tracing?.group_id);
|
|
590
|
+
|
|
591
|
+
return groupId ? `group_id=${groupId}` : 'session config';
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
if (eventType === 'response.create') {
|
|
595
|
+
const response = getRecord(event.response);
|
|
596
|
+
const instructions = getDebugValue(response?.instructions);
|
|
597
|
+
|
|
598
|
+
return instructions
|
|
599
|
+
? `instructions=${instructions.slice(0, 120)}`
|
|
600
|
+
: 'response request';
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
if (eventType === 'conversation.item.create') {
|
|
604
|
+
const item = getRecord(event.item);
|
|
605
|
+
const content = item?.content;
|
|
606
|
+
|
|
607
|
+
if (Array.isArray(content)) {
|
|
608
|
+
const contentTypes = content
|
|
609
|
+
.map((contentItem) => getDebugValue(getRecord(contentItem)?.type))
|
|
610
|
+
.filter(Boolean)
|
|
611
|
+
.join(',');
|
|
612
|
+
|
|
613
|
+
return contentTypes ? `content=${contentTypes}` : 'conversation item';
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
return '-';
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
function getBooleanValue(value: unknown) {
|
|
621
|
+
return typeof value === 'boolean' ? value : undefined;
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
async function createCallRequestHeaders(
|
|
625
|
+
getCallRequestHeaders?: () => HeadersInit | Promise<HeadersInit>
|
|
626
|
+
) {
|
|
627
|
+
const headers = new Headers({
|
|
628
|
+
'Content-Type': 'application/json',
|
|
629
|
+
});
|
|
630
|
+
|
|
631
|
+
const requestHeaders = await getCallRequestHeaders?.();
|
|
632
|
+
if (requestHeaders) {
|
|
633
|
+
new Headers(requestHeaders).forEach((value, key) => {
|
|
634
|
+
headers.set(key, value);
|
|
635
|
+
});
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
return headers;
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
function waitForDataChannelOpen(channel: RTCDataChannel) {
|
|
642
|
+
if (channel.readyState === 'open') {
|
|
643
|
+
return Promise.resolve();
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
return new Promise<void>((resolve, reject) => {
|
|
647
|
+
channel.addEventListener('open', () => resolve(), { once: true });
|
|
648
|
+
channel.addEventListener(
|
|
649
|
+
'error',
|
|
650
|
+
() => reject(new Error('Realtime data channel failed to open')),
|
|
651
|
+
{ once: true }
|
|
652
|
+
);
|
|
653
|
+
});
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
function waitForIceGatheringComplete(peerConnection: RTCPeerConnection) {
|
|
657
|
+
if (peerConnection.iceGatheringState === 'complete') {
|
|
658
|
+
return Promise.resolve();
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
return new Promise<void>((resolve, reject) => {
|
|
662
|
+
const timeout = window.setTimeout(() => {
|
|
663
|
+
peerConnection.removeEventListener('icegatheringstatechange', handler);
|
|
664
|
+
reject(new Error('WebRTC setup timed out while gathering candidates'));
|
|
665
|
+
}, 10_000);
|
|
666
|
+
|
|
667
|
+
function handler() {
|
|
668
|
+
if (peerConnection.iceGatheringState === 'complete') {
|
|
669
|
+
window.clearTimeout(timeout);
|
|
670
|
+
peerConnection.removeEventListener('icegatheringstatechange', handler);
|
|
671
|
+
resolve();
|
|
672
|
+
}
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
peerConnection.addEventListener('icegatheringstatechange', handler);
|
|
676
|
+
});
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
function getLocalhostUrl() {
|
|
680
|
+
const port = window.location.port ? `:${window.location.port}` : '';
|
|
681
|
+
|
|
682
|
+
return `http://localhost${port}`;
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
function getMediaUnavailableMessage(feature: string) {
|
|
686
|
+
if (!window.isSecureContext) {
|
|
687
|
+
return (
|
|
688
|
+
`${feature} access needs a secure browser origin. Open ` +
|
|
689
|
+
`${getLocalhostUrl()} or use HTTPS.`
|
|
690
|
+
);
|
|
691
|
+
}
|
|
692
|
+
|
|
693
|
+
return (
|
|
694
|
+
`${feature} access is unavailable in this browser. Open this app in ` +
|
|
695
|
+
'Chrome, Edge, or Safari and allow media permissions.'
|
|
696
|
+
);
|
|
697
|
+
}
|
|
698
|
+
|
|
699
|
+
function requestUserMedia(constraints: MediaStreamConstraints) {
|
|
700
|
+
if (navigator.mediaDevices?.getUserMedia) {
|
|
701
|
+
return navigator.mediaDevices.getUserMedia(constraints);
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
return Promise.reject(new Error(getMediaUnavailableMessage('Microphone')));
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
function requestDisplayMedia(constraints: DisplayMediaStreamOptions) {
|
|
708
|
+
if (navigator.mediaDevices?.getDisplayMedia) {
|
|
709
|
+
return navigator.mediaDevices.getDisplayMedia(constraints);
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
return Promise.reject(
|
|
713
|
+
new Error(getMediaUnavailableMessage('Screen sharing'))
|
|
714
|
+
);
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
function assertRealtimeBrowserSupport() {
|
|
718
|
+
if (typeof RTCPeerConnection === 'undefined') {
|
|
719
|
+
throw new Error(
|
|
720
|
+
'WebRTC is unavailable in this browser. Open this app in Chrome, Edge, or Safari.'
|
|
721
|
+
);
|
|
722
|
+
}
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
function waitForVideoMetadata(video: HTMLVideoElement) {
|
|
726
|
+
if (video.videoWidth > 0 && video.videoHeight > 0) {
|
|
727
|
+
return Promise.resolve();
|
|
728
|
+
}
|
|
729
|
+
|
|
730
|
+
return new Promise<void>((resolve) => {
|
|
731
|
+
const timeout = window.setTimeout(done, 1_000);
|
|
732
|
+
|
|
733
|
+
function done() {
|
|
734
|
+
window.clearTimeout(timeout);
|
|
735
|
+
video.removeEventListener('loadedmetadata', done);
|
|
736
|
+
video.removeEventListener('resize', done);
|
|
737
|
+
resolve();
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
video.addEventListener('loadedmetadata', done, { once: true });
|
|
741
|
+
video.addEventListener('resize', done, { once: true });
|
|
742
|
+
});
|
|
743
|
+
}
|
|
744
|
+
|
|
745
|
+
function getScaledCanvasSize(
|
|
746
|
+
width: number,
|
|
747
|
+
height: number,
|
|
748
|
+
maxLongEdge: number
|
|
749
|
+
) {
|
|
750
|
+
const longEdge = Math.max(width, height);
|
|
751
|
+
const scale = longEdge > maxLongEdge ? maxLongEdge / longEdge : 1;
|
|
752
|
+
|
|
753
|
+
return {
|
|
754
|
+
width: Math.round(width * scale),
|
|
755
|
+
height: Math.round(height * scale),
|
|
756
|
+
};
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
function blobToDataUrl(blob: Blob) {
|
|
760
|
+
return new Promise<string>((resolve, reject) => {
|
|
761
|
+
const reader = new FileReader();
|
|
762
|
+
reader.addEventListener('load', () => resolve(String(reader.result)));
|
|
763
|
+
reader.addEventListener('error', () => reject(reader.error));
|
|
764
|
+
reader.readAsDataURL(blob);
|
|
765
|
+
});
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
function canvasToBlob(
|
|
769
|
+
canvas: HTMLCanvasElement,
|
|
770
|
+
mimeType: string,
|
|
771
|
+
quality: number
|
|
772
|
+
) {
|
|
773
|
+
return new Promise<Blob | null>((resolve) => {
|
|
774
|
+
canvas.toBlob(resolve, mimeType, quality);
|
|
775
|
+
});
|
|
776
|
+
}
|
|
777
|
+
|
|
778
|
+
function getVisualDifference(
|
|
779
|
+
previousSignature: Uint8ClampedArray,
|
|
780
|
+
currentSignature: Uint8ClampedArray
|
|
781
|
+
) {
|
|
782
|
+
if (previousSignature.length !== currentSignature.length) {
|
|
783
|
+
return {
|
|
784
|
+
averageDelta: Number.POSITIVE_INFINITY,
|
|
785
|
+
changedRatio: 1,
|
|
786
|
+
};
|
|
787
|
+
}
|
|
788
|
+
|
|
789
|
+
let totalDelta = 0;
|
|
790
|
+
let changedPixels = 0;
|
|
791
|
+
const pixelCount = currentSignature.length / 3;
|
|
792
|
+
|
|
793
|
+
for (let index = 0; index < currentSignature.length; index += 3) {
|
|
794
|
+
const pixelDelta =
|
|
795
|
+
Math.abs(currentSignature[index] - previousSignature[index]) +
|
|
796
|
+
Math.abs(currentSignature[index + 1] - previousSignature[index + 1]) +
|
|
797
|
+
Math.abs(currentSignature[index + 2] - previousSignature[index + 2]);
|
|
798
|
+
|
|
799
|
+
totalDelta += pixelDelta / 3;
|
|
800
|
+
if (pixelDelta >= visualPixelDifferenceThreshold) {
|
|
801
|
+
changedPixels += 1;
|
|
802
|
+
}
|
|
803
|
+
}
|
|
804
|
+
|
|
805
|
+
return {
|
|
806
|
+
averageDelta: totalDelta / pixelCount,
|
|
807
|
+
changedRatio: changedPixels / pixelCount,
|
|
808
|
+
};
|
|
809
|
+
}
|
|
810
|
+
|
|
811
|
+
function IconMic() {
|
|
812
|
+
return (
|
|
813
|
+
<svg viewBox="0 0 24 24" aria-hidden="true">
|
|
814
|
+
<path d="M12 3a3 3 0 0 0-3 3v6a3 3 0 0 0 6 0V6a3 3 0 0 0-3-3Z" />
|
|
815
|
+
<path d="M5 11a7 7 0 0 0 14 0" />
|
|
816
|
+
<path d="M12 18v3" />
|
|
817
|
+
</svg>
|
|
818
|
+
);
|
|
819
|
+
}
|
|
820
|
+
|
|
821
|
+
function IconMicOff() {
|
|
822
|
+
return (
|
|
823
|
+
<svg viewBox="0 0 24 24" aria-hidden="true">
|
|
824
|
+
<path d="m4 4 16 16" />
|
|
825
|
+
<path d="M9 9v3a3 3 0 0 0 5 2.2" />
|
|
826
|
+
<path d="M15 9.4V6a3 3 0 0 0-5.1-2.1" />
|
|
827
|
+
<path d="M5 11a7 7 0 0 0 10.7 5.9" />
|
|
828
|
+
<path d="M19 11a7 7 0 0 1-1 3.6" />
|
|
829
|
+
<path d="M12 18v3" />
|
|
830
|
+
</svg>
|
|
831
|
+
);
|
|
832
|
+
}
|
|
833
|
+
|
|
834
|
+
function IconScreenShare() {
|
|
835
|
+
return (
|
|
836
|
+
<svg viewBox="0 0 24 24" aria-hidden="true">
|
|
837
|
+
<rect x="4" y="5" width="16" height="11" rx="2" />
|
|
838
|
+
<path d="M12 16v3" />
|
|
839
|
+
<path d="M8.5 19h7" />
|
|
840
|
+
<path d="m12 8 3 3" />
|
|
841
|
+
<path d="m15 8v3h-3" />
|
|
842
|
+
</svg>
|
|
843
|
+
);
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
function IconVoiceMode() {
|
|
847
|
+
return (
|
|
848
|
+
<svg viewBox="0 0 24 24" aria-hidden="true">
|
|
849
|
+
<path d="M5 10v4" />
|
|
850
|
+
<path d="M9 7v10" />
|
|
851
|
+
<path d="M13 4v16" />
|
|
852
|
+
<path d="M17 8v8" />
|
|
853
|
+
<path d="M21 11v2" />
|
|
854
|
+
</svg>
|
|
855
|
+
);
|
|
856
|
+
}
|
|
857
|
+
|
|
858
|
+
function IconEndCall() {
|
|
859
|
+
return (
|
|
860
|
+
<svg viewBox="0 0 24 24" aria-hidden="true">
|
|
861
|
+
<path d="M5.2 10.6c4.3-2.5 9.3-2.5 13.6 0" />
|
|
862
|
+
<path d="M8 13.6 5.4 16a1.4 1.4 0 0 1-2-.1l-1.3-1.6a1.5 1.5 0 0 1 .2-2.1l2.2-1.7" />
|
|
863
|
+
<path d="m16 13.6 2.6 2.4a1.4 1.4 0 0 0 2-.1l1.3-1.6a1.5 1.5 0 0 0-.2-2.1l-2.2-1.7" />
|
|
864
|
+
</svg>
|
|
865
|
+
);
|
|
866
|
+
}
|
|
867
|
+
|
|
868
|
+
function IconSend() {
|
|
869
|
+
return (
|
|
870
|
+
<svg viewBox="0 0 24 24" aria-hidden="true">
|
|
871
|
+
<path d="M5 12h13" />
|
|
872
|
+
<path d="m13 6 6 6-6 6" />
|
|
873
|
+
</svg>
|
|
874
|
+
);
|
|
875
|
+
}
|
|
876
|
+
|
|
877
|
+
const highlightUiTargetTool = {
|
|
878
|
+
type: 'function',
|
|
879
|
+
name: 'highlight_ui_target',
|
|
880
|
+
description:
|
|
881
|
+
'Visually highlight one visible UI element in the current VRPlatform app. ' +
|
|
882
|
+
'Use this when pointing the user to a specific button, tab, menu item, field, or section. ' +
|
|
883
|
+
'Prefer targetKey for data-vrpilot-target/data-testid values, then accessible label or visible text. ' +
|
|
884
|
+
'This tool only highlights; it never clicks, changes, saves, or submits anything.',
|
|
885
|
+
parameters: {
|
|
886
|
+
type: 'object',
|
|
887
|
+
properties: {
|
|
888
|
+
targetKey: {
|
|
889
|
+
type: 'string',
|
|
890
|
+
description:
|
|
891
|
+
'Stable target key such as data-vrpilot-target, data-testid, data-test, or element id.',
|
|
892
|
+
},
|
|
893
|
+
selector: {
|
|
894
|
+
type: 'string',
|
|
895
|
+
description:
|
|
896
|
+
'CSS selector for the element when a stable key is not available.',
|
|
897
|
+
},
|
|
898
|
+
text: {
|
|
899
|
+
type: 'string',
|
|
900
|
+
description:
|
|
901
|
+
'Visible text to match, for example the label on a button or tab.',
|
|
902
|
+
},
|
|
903
|
+
label: {
|
|
904
|
+
type: 'string',
|
|
905
|
+
description:
|
|
906
|
+
'Accessible label or title to match, for example aria-label.',
|
|
907
|
+
},
|
|
908
|
+
role: {
|
|
909
|
+
type: 'string',
|
|
910
|
+
enum: [
|
|
911
|
+
'any',
|
|
912
|
+
'button',
|
|
913
|
+
'checkbox',
|
|
914
|
+
'heading',
|
|
915
|
+
'input',
|
|
916
|
+
'link',
|
|
917
|
+
'menuitem',
|
|
918
|
+
'tab',
|
|
919
|
+
],
|
|
920
|
+
description: 'Optional UI role used to narrow text or label matching.',
|
|
921
|
+
},
|
|
922
|
+
reason: {
|
|
923
|
+
type: 'string',
|
|
924
|
+
description:
|
|
925
|
+
'Short internal reason for the highlight, useful for debugging.',
|
|
926
|
+
},
|
|
927
|
+
durationMs: {
|
|
928
|
+
type: 'number',
|
|
929
|
+
minimum: 1_200,
|
|
930
|
+
maximum: 12_000,
|
|
931
|
+
description: 'How long the highlight should stay visible.',
|
|
932
|
+
},
|
|
933
|
+
scrollIntoView: {
|
|
934
|
+
type: 'boolean',
|
|
935
|
+
description:
|
|
936
|
+
'Whether the app should scroll the matching element into view first.',
|
|
937
|
+
},
|
|
938
|
+
},
|
|
939
|
+
additionalProperties: false,
|
|
940
|
+
},
|
|
941
|
+
} as const;
|
|
942
|
+
|
|
943
|
+
const fillUiFieldTool = {
|
|
944
|
+
type: 'function',
|
|
945
|
+
name: 'fill_ui_field',
|
|
946
|
+
description:
|
|
947
|
+
'Fill or update one visible editable field in the current VRPlatform app. ' +
|
|
948
|
+
'Use this only for inputs, textareas, selects, checkboxes, and radios when the user clearly asks to enter or choose a value. ' +
|
|
949
|
+
'This tool never submits, saves, clicks buttons, or triggers irreversible actions. ' +
|
|
950
|
+
'Do not use it for passwords, tokens, payment fields, bank details, tax IDs, or other sensitive secrets.',
|
|
951
|
+
parameters: {
|
|
952
|
+
type: 'object',
|
|
953
|
+
properties: {
|
|
954
|
+
targetKey: {
|
|
955
|
+
type: 'string',
|
|
956
|
+
description:
|
|
957
|
+
'Stable target key such as data-vrpilot-target, data-testid, data-test, or element id.',
|
|
958
|
+
},
|
|
959
|
+
selector: {
|
|
960
|
+
type: 'string',
|
|
961
|
+
description:
|
|
962
|
+
'CSS selector for the field or a wrapper containing the field when a stable key is not available.',
|
|
963
|
+
},
|
|
964
|
+
text: {
|
|
965
|
+
type: 'string',
|
|
966
|
+
description:
|
|
967
|
+
'Visible text near the field, for example the field label.',
|
|
968
|
+
},
|
|
969
|
+
label: {
|
|
970
|
+
type: 'string',
|
|
971
|
+
description:
|
|
972
|
+
'Accessible label, title, placeholder, or visible label for the field.',
|
|
973
|
+
},
|
|
974
|
+
role: {
|
|
975
|
+
type: 'string',
|
|
976
|
+
enum: ['any', 'input', 'checkbox', 'radio', 'select'],
|
|
977
|
+
description: 'Optional field role used to narrow matching.',
|
|
978
|
+
},
|
|
979
|
+
value: {
|
|
980
|
+
type: 'string',
|
|
981
|
+
description:
|
|
982
|
+
'The value to enter. For selects, this may be an option value or visible option label.',
|
|
983
|
+
},
|
|
984
|
+
checked: {
|
|
985
|
+
type: 'boolean',
|
|
986
|
+
description:
|
|
987
|
+
'For checkboxes and radio buttons, whether the field should be checked.',
|
|
988
|
+
},
|
|
989
|
+
mode: {
|
|
990
|
+
type: 'string',
|
|
991
|
+
enum: ['replace', 'append'],
|
|
992
|
+
description: 'Whether to replace the field value or append to it.',
|
|
993
|
+
},
|
|
994
|
+
reason: {
|
|
995
|
+
type: 'string',
|
|
996
|
+
description:
|
|
997
|
+
'Short internal reason for the fill action, useful for debugging.',
|
|
998
|
+
},
|
|
999
|
+
scrollIntoView: {
|
|
1000
|
+
type: 'boolean',
|
|
1001
|
+
description:
|
|
1002
|
+
'Whether the app should scroll the matching field into view first.',
|
|
1003
|
+
},
|
|
1004
|
+
},
|
|
1005
|
+
additionalProperties: false,
|
|
1006
|
+
},
|
|
1007
|
+
} as const;
|
|
1008
|
+
|
|
1009
|
+
const searchSupportMemoryTool = {
|
|
1010
|
+
type: 'function',
|
|
1011
|
+
name: 'search_support_memory',
|
|
1012
|
+
description:
|
|
1013
|
+
'Search the VRTrust support knowledge memory for help article context. Use this before answering questions about VRTrust workflows, troubleshooting, reports, settings, reconciliation, statements, reservations, expenses, listings, fees, deposits, or integrations. The tool returns matching chunks and structured sourceLinks that the UI can display below the assistant.',
|
|
1014
|
+
parameters: {
|
|
1015
|
+
type: 'object',
|
|
1016
|
+
properties: {
|
|
1017
|
+
query: {
|
|
1018
|
+
type: 'string',
|
|
1019
|
+
description:
|
|
1020
|
+
"The user's support question rewritten as a concise retrieval query.",
|
|
1021
|
+
},
|
|
1022
|
+
limit: {
|
|
1023
|
+
type: 'number',
|
|
1024
|
+
minimum: 1,
|
|
1025
|
+
maximum: 8,
|
|
1026
|
+
description: 'Maximum number of chunks to retrieve.',
|
|
1027
|
+
},
|
|
1028
|
+
},
|
|
1029
|
+
required: ['query'],
|
|
1030
|
+
additionalProperties: false,
|
|
1031
|
+
},
|
|
1032
|
+
} as const;
|
|
1033
|
+
|
|
1034
|
+
function buildRealtimeTools() {
|
|
1035
|
+
return [highlightUiTargetTool, fillUiFieldTool, searchSupportMemoryTool];
|
|
1036
|
+
}
|
|
1037
|
+
|
|
1038
|
+
function buildPostScreenIssueInstruction(
|
|
1039
|
+
input: SupportAssistantSessionContext
|
|
1040
|
+
) {
|
|
1041
|
+
if (!input.activeIssueSummary) {
|
|
1042
|
+
return undefined;
|
|
1043
|
+
}
|
|
1044
|
+
|
|
1045
|
+
const title = input.primaryIssueTitle ?? 'There is an active issue';
|
|
1046
|
+
const item = input.primaryIssueItemName
|
|
1047
|
+
? ` affecting ${input.primaryIssueItemName}`
|
|
1048
|
+
: '';
|
|
1049
|
+
const action = input.primaryIssueAction
|
|
1050
|
+
? ` The likely next step is: ${input.primaryIssueAction}`
|
|
1051
|
+
: '';
|
|
1052
|
+
|
|
1053
|
+
return (
|
|
1054
|
+
`${title}${item}.${action} Summarize the issue in user-friendly language; ` +
|
|
1055
|
+
'do not read raw error text, timestamps, ids, or stack-like details aloud. ' +
|
|
1056
|
+
'Ask whether the user wants help with this issue or needs help with something else.'
|
|
1057
|
+
);
|
|
1058
|
+
}
|
|
1059
|
+
|
|
1060
|
+
function buildLanguageInstruction(language: SupportAssistantLanguage) {
|
|
1061
|
+
const languageName =
|
|
1062
|
+
language.nativeLabel && language.nativeLabel !== language.label
|
|
1063
|
+
? `${language.label} (${language.nativeLabel})`
|
|
1064
|
+
: language.label;
|
|
1065
|
+
|
|
1066
|
+
return (
|
|
1067
|
+
`Mandatory response language: ${languageName}. ` +
|
|
1068
|
+
`Speak and answer only in ${languageName}, even if prompts or system text are in another language. ` +
|
|
1069
|
+
'Do not switch languages unless the user explicitly asks.'
|
|
1070
|
+
);
|
|
1071
|
+
}
|
|
1072
|
+
|
|
1073
|
+
function buildSessionInstructions(input: SupportAssistantSessionContext) {
|
|
1074
|
+
const greetingName = getPreferredGreetingName(input);
|
|
1075
|
+
|
|
1076
|
+
return [
|
|
1077
|
+
"You are ChatVRT, VRPlatform's live in-product voice agent.",
|
|
1078
|
+
'Your job is to help the user complete their current task with calm, low-latency, screen-aware guidance.',
|
|
1079
|
+
'Speak briefly. Most spoken responses should be one or two sentences.',
|
|
1080
|
+
'Your spoken turn must be one compact message. Never split a turn into a commentary or preamble followed by a separate answer.',
|
|
1081
|
+
'Start with the next concrete step or the one clarifying question needed to identify it. Do not start with an acknowledgement-only setup sentence like "Okay Frank", "Let us get this done", or "I will help with that".',
|
|
1082
|
+
'The first assistant turn must use the ChatVRT screen-share triage greeting. If an initial greeting name is provided, say: "Hi [name], I\'m ChatVRT. Please share your screen if you want UI guidance; if not, I can still guide you by voice. Are you here for first-time onboarding, ongoing support, or monthly close?" If no name is provided, say the same greeting without the name.',
|
|
1083
|
+
'After the initial greeting, do not start replies with the user name or repeat it as an attention marker unless the user explicitly asks.',
|
|
1084
|
+
'If the user does not share their screen after being asked, declines screen sharing, or cannot share, do not keep prompting them to share. Say briefly that you can still guide them through the UI by voice, then ask for the page they are on or the visible label they see.',
|
|
1085
|
+
'When the user starts screen sharing, acknowledge that you can see the screen in one short clause, then continue with the current task. Do not restart the triage greeting or ask what they need next unless there is no active task.',
|
|
1086
|
+
'Do not narrate your reasoning or delay with filler phrases like "let me think", "let me think through", "I am thinking", "I am checking", "let me check", "let me reason through this", "I need to look at this", "I will figure this out", or "I am working through this".',
|
|
1087
|
+
'Do not repeatedly correct yourself with phrases like "actually I see" or "actually you are now". If the visible screen changes, switch to the latest screen quietly and give the next useful step.',
|
|
1088
|
+
'For broad goals like closing a month, do not announce that you are planning or thinking. Move forward with the first concrete step, or ask for the missing detail needed to choose it.',
|
|
1089
|
+
'Infer whether the user is navigating, troubleshooting, explaining a bug, entering data, reviewing something, or asking for a summary. If unclear, ask one short question.',
|
|
1090
|
+
'Guide one step at a time, then pause.',
|
|
1091
|
+
'Do not talk like a tutorial narrator. Behave like a patient live operator: listen while the user is explaining, wait while they click, type, read, search, or think, respond to corrections immediately, keep compact working memory of the task, and avoid long explanations during active UI work.',
|
|
1092
|
+
'If the user says "hold on", "one second", "let me check", "I am looking for", or anything similar, stay quiet unless they ask for help or the screen clearly shows a blocker.',
|
|
1093
|
+
"If the user starts speaking while you are speaking, stop. On your next turn, respond to the user's new information instead of finishing the old sentence.",
|
|
1094
|
+
'If interrupted mid-instruction, abandon the partial instruction and recover cleanly with a short acknowledgement before continuing from the new information.',
|
|
1095
|
+
'Avoid half-instructions. Do not leave the user with an incomplete phrase like "Click the..." unless you can complete the instruction clearly.',
|
|
1096
|
+
'Use the latest shared screen snapshot as the source of truth for visible UI. Do not assume hidden fields, menu contents, permissions, backend state, sync status, or saved settings unless shown on screen or provided in context.',
|
|
1097
|
+
'Before giving UI steps, briefly confirm what you can see on the current screen. Use visible labels when possible.',
|
|
1098
|
+
'ChatVRT is only the embedded assistant UI. Do not treat the ChatVRT dock as the product screen the user needs help with.',
|
|
1099
|
+
'If the screen is missing, stale, or unclear, say what is unclear and ask for the relevant screen or a fresh share.',
|
|
1100
|
+
'If the user opens another app, email, spreadsheet, document, or browser tab to gather information, treat that as part of the task. Do not redirect them back to VRPlatform unless they ask what to do next, the screen is irrelevant for a long time, or there is a visible blocker.',
|
|
1101
|
+
'If the user shows an external source such as an email, spreadsheet, or document, summarize only the task-relevant details and ask whether they want to use those details in VRPlatform. Do not store or generalize details from the source as product knowledge.',
|
|
1102
|
+
'When the user gives an update, immediately incorporate it into the working state: briefly acknowledge the new fact, update the task scope, then give the next concrete action only if needed.',
|
|
1103
|
+
'When corrected by the user, explicitly discard the wrong interpretation and restate the corrected one. Never argue from your previous assumption; corrections are high-priority input.',
|
|
1104
|
+
'If the user challenges guidance, reconcile the difference instead of simply agreeing or doubling down. Explain the distinction briefly and ask which option matches their workflow if needed.',
|
|
1105
|
+
'Before giving workflow, template, mapping, form, or troubleshooting instructions, identify the exact object and scope: one line item, one fee or account, one reservation, one listing, one owner, one statement period, all statements, or all items in a category.',
|
|
1106
|
+
'Also confirm the intended action when it matters: rename, move, hide, remap, explain, fix, summarize, or report as a bug.',
|
|
1107
|
+
'When the user says "just this", "only", "not all", or corrects the scope, restate the corrected scope before continuing. Do not propose broad changes until scope is confirmed.',
|
|
1108
|
+
'Users often describe issues in fragments and revise themselves while speaking. If the user is mid-thought, trailing off, saying "I want to", "or", "but", or correcting themselves, wait instead of jumping to a solution.',
|
|
1109
|
+
'Before navigation, mirror the concrete object visible on screen, state the user goal narrowly, ask one confirmation question if ambiguous, then give the next click.',
|
|
1110
|
+
'When troubleshooting, identify the visible symptom first, then suggest the smallest next check.',
|
|
1111
|
+
'If the user has already tried a path, treat that as evidence. Do not send them through the same path again unless you can explain why this attempt is different.',
|
|
1112
|
+
'If visible UI conflicts with expected configuration, acknowledge the mismatch directly.',
|
|
1113
|
+
'Switch into diagnostic capture mode when the user says or implies they already tried something, a change will not stick, the same issue happened again, something seems like a bug, they need a summary, they need to remember it, or they need to send it to support or another teammate.',
|
|
1114
|
+
'In diagnostic capture mode, stop trying to solve by default. Listen, collect facts, and summarize: team or account if visible, listing or property, owner if relevant, month or period, expected behavior, actual visible behavior, visible error, what the user already tried, whether it repeats elsewhere, and workflow impact.',
|
|
1115
|
+
'When the user says "remember this", immediately confirm the captured facts in one short sentence. For repeated bugs, keep separate numbered notes.',
|
|
1116
|
+
'Switch into support handoff mode when the user asks to summarize, report, send to support, or send to someone else.',
|
|
1117
|
+
'In support handoff mode, create a short copy-paste-ready message with known identifiers, expected behavior, actual behavior, visible error, and what was already tried. Tell the user what identifiers are included, then ask whether they want help submitting it.',
|
|
1118
|
+
'If the support UI is visible in the app, guide the user through that visible path instead of sending a generic link. Do not ask "anything else?" until the message is sent or the user says they will handle it later.',
|
|
1119
|
+
'Do not claim you clicked, saved, submitted, deleted, connected, reconnected, sent, synced, or triggered irreversible actions.',
|
|
1120
|
+
'The user controls irreversible UI actions. For destructive, financial, billing, permission, sync, delete, archive, submit, save, or send actions, explain the next step and ask the user to perform it themselves.',
|
|
1121
|
+
'You may use tools to highlight UI elements and fill editable form fields.',
|
|
1122
|
+
'When it helps orientation, call highlight_ui_target to visually mark a visible control or section before describing it.',
|
|
1123
|
+
'For highlights, prefer stable targetKey values, then accessible labels, then visible text. Never use highlights to imply that you clicked or changed the UI.',
|
|
1124
|
+
'When the user clearly asks to enter, fill, choose, check, or update a visible form value, call fill_ui_field without asking for a separate confirmation.',
|
|
1125
|
+
'For form filling, prefer stable targetKey values, then accessible labels, placeholders, associated labels, and visible text.',
|
|
1126
|
+
'Never call fill_ui_field for passwords, tokens, payment fields, bank details, tax IDs, or other sensitive secrets. Ask the user to enter those manually.',
|
|
1127
|
+
'After filling a field, tell the user what changed and remind them to review before saving or submitting if needed.',
|
|
1128
|
+
'Before answering VRTrust documentation, workflow, setup, troubleshooting, reporting, reconciliation, statements, reservations, expenses, listings, fees, deposits, integrations, or settings questions, call search_support_memory with a concise query.',
|
|
1129
|
+
'When search_support_memory returns sourceLinks, use the returned context for the answer and keep the spoken answer concise. The UI will display source links below the dock; do not read long URLs aloud.',
|
|
1130
|
+
'Use plain, layman terms. Prefer visible screen words and concrete actions over internal product, accounting, or technical terminology. If you use a term like allocation, ledger, reconciliation, adjustment, mapping, fee split, commission split, template, or statement layout, explain it briefly in everyday language.',
|
|
1131
|
+
'Avoid vague instructions like "check the allocation", "review the mapping", or "verify the setup" unless you explain exactly what the user should look at.',
|
|
1132
|
+
'Every instruction should name the visible control or row, the action to take, and what the user is checking.',
|
|
1133
|
+
'Do not proceed as if a field or selection is complete unless the screen or user confirms it.',
|
|
1134
|
+
'If the user is actively entering data, track the task compactly, for example by noting how many visible lines or fields are complete and what remains.',
|
|
1135
|
+
'When reading from the screen, summarize. Do not list every visible item unless the user asks. For voice, prefer one clear next step over a full plan.',
|
|
1136
|
+
'Avoid reading raw error text, IDs, timestamps, stack traces, or long table contents aloud unless the user asks.',
|
|
1137
|
+
'If you are unsure, say what is uncertain and what would make it clear. Do not guess.',
|
|
1138
|
+
'Before wrapping up, briefly confirm the current outcome: what was fixed, what was documented, what still needs support, or what the user should do next.',
|
|
1139
|
+
'Use the server-provided ChatVRT agent guide and VRTrust knowledge as workflow routing, help-center knowledge, app navigation, and checklist guidance, not as customer-specific truth. Do not read source documents verbatim or announce section numbers unless the user asks.',
|
|
1140
|
+
'When using support knowledge, first classify the request as onboarding, monthly close, or support/troubleshooting. Then use only the relevant section and translate it into one plain spoken next step at a time.',
|
|
1141
|
+
'If support knowledge conflicts with the visible screen, the user, or live app context, trust the visible screen and ask one clarifying question before continuing.',
|
|
1142
|
+
input.userName ? `Requester name: ${input.userName}.` : undefined,
|
|
1143
|
+
input.userFirstName
|
|
1144
|
+
? `Requester first name: ${input.userFirstName}.`
|
|
1145
|
+
: undefined,
|
|
1146
|
+
input.userLastName
|
|
1147
|
+
? `Requester last name: ${input.userLastName}.`
|
|
1148
|
+
: undefined,
|
|
1149
|
+
input.userEmail ? `Requester email: ${input.userEmail}.` : undefined,
|
|
1150
|
+
input.workspaceName ? `Workspace: ${input.workspaceName}.` : undefined,
|
|
1151
|
+
input.teamName ? `Team name: ${input.teamName}.` : undefined,
|
|
1152
|
+
input.userRole ? `Requester role: ${input.userRole}.` : undefined,
|
|
1153
|
+
input.dashboard ? `Dashboard: ${input.dashboard}.` : undefined,
|
|
1154
|
+
input.teamType ? `Team type: ${input.teamType}.` : undefined,
|
|
1155
|
+
input.teamStatus ? `Team status: ${input.teamStatus}.` : undefined,
|
|
1156
|
+
input.teamCurrency ? `Team currency: ${input.teamCurrency}.` : undefined,
|
|
1157
|
+
input.teamPartnerId
|
|
1158
|
+
? `Partner team ID: ${input.teamPartnerId}.`
|
|
1159
|
+
: undefined,
|
|
1160
|
+
input.activeIssueSummary
|
|
1161
|
+
? `Active issue visible in the app: ${input.activeIssueSummary}.`
|
|
1162
|
+
: undefined,
|
|
1163
|
+
input.primaryIssueTitle
|
|
1164
|
+
? `Primary active issue title: ${input.primaryIssueTitle}.`
|
|
1165
|
+
: undefined,
|
|
1166
|
+
input.primaryIssueMessage
|
|
1167
|
+
? `Primary active issue message: ${input.primaryIssueMessage}.`
|
|
1168
|
+
: undefined,
|
|
1169
|
+
input.primaryIssueItemName
|
|
1170
|
+
? `Primary active issue item: ${input.primaryIssueItemName}.`
|
|
1171
|
+
: undefined,
|
|
1172
|
+
input.primaryIssueType
|
|
1173
|
+
? `Primary active issue type: ${input.primaryIssueType}.`
|
|
1174
|
+
: undefined,
|
|
1175
|
+
input.primaryIssueAction
|
|
1176
|
+
? `Suggested issue action: ${input.primaryIssueAction}.`
|
|
1177
|
+
: undefined,
|
|
1178
|
+
input.activeIssueSummary
|
|
1179
|
+
? 'After the user shares a window, mention the active issue in user-friendly terms and ask whether they want help with it or need help elsewhere. Do not read raw error text, timestamps, ids, or stack-like details aloud.'
|
|
1180
|
+
: undefined,
|
|
1181
|
+
input.isGeneralLedgerEnabled !== undefined
|
|
1182
|
+
? `General ledger enabled: ${input.isGeneralLedgerEnabled ? 'yes' : 'no'}.`
|
|
1183
|
+
: undefined,
|
|
1184
|
+
greetingName
|
|
1185
|
+
? `Initial greeting name: ${greetingName}. Insert this exact name into the first ChatVRT screen-share triage greeting only. Do not use it as a prefix for follow-up turns.`
|
|
1186
|
+
: undefined,
|
|
1187
|
+
input.language
|
|
1188
|
+
? `Mandatory response language: ${input.language}. Speak and answer only in ${input.language}, even if prompts or system text are in another language. Do not switch languages unless the user explicitly asks.`
|
|
1189
|
+
: undefined,
|
|
1190
|
+
input.currentTitle
|
|
1191
|
+
? `Current page title: ${input.currentTitle}.`
|
|
1192
|
+
: undefined,
|
|
1193
|
+
input.currentPath ? `Current path: ${input.currentPath}.` : undefined,
|
|
1194
|
+
input.goal ? `User goal: ${input.goal}.` : undefined,
|
|
1195
|
+
]
|
|
1196
|
+
.filter((line): line is string => typeof line === 'string')
|
|
1197
|
+
.join('\n');
|
|
1198
|
+
}
|
|
1199
|
+
|
|
1200
|
+
function getPreferredGreetingName(input: SupportAssistantSessionContext) {
|
|
1201
|
+
return input.userFirstName?.trim() || input.userName?.trim() || undefined;
|
|
1202
|
+
}
|
|
1203
|
+
|
|
1204
|
+
type RealtimeFunctionCallItem = {
|
|
1205
|
+
callId: string;
|
|
1206
|
+
name: string;
|
|
1207
|
+
arguments: string;
|
|
1208
|
+
};
|
|
1209
|
+
|
|
1210
|
+
function parseHighlightToolArguments(
|
|
1211
|
+
rawArguments: string
|
|
1212
|
+
): VRPilotHighlightTargetRequest {
|
|
1213
|
+
let value: unknown = {};
|
|
1214
|
+
|
|
1215
|
+
try {
|
|
1216
|
+
value = rawArguments ? JSON.parse(rawArguments) : {};
|
|
1217
|
+
} catch {
|
|
1218
|
+
return {};
|
|
1219
|
+
}
|
|
1220
|
+
|
|
1221
|
+
const record = getRecord(value);
|
|
1222
|
+
if (!record) {
|
|
1223
|
+
return {};
|
|
1224
|
+
}
|
|
1225
|
+
|
|
1226
|
+
return {
|
|
1227
|
+
targetKey: getStringValue(record.targetKey),
|
|
1228
|
+
selector: getStringValue(record.selector),
|
|
1229
|
+
text: getStringValue(record.text),
|
|
1230
|
+
label: getStringValue(record.label),
|
|
1231
|
+
role: getStringValue(record.role) as VRPilotHighlightTargetRequest['role'],
|
|
1232
|
+
reason: getStringValue(record.reason),
|
|
1233
|
+
durationMs: getNumberValue(record.durationMs),
|
|
1234
|
+
scrollIntoView: getBooleanValue(record.scrollIntoView),
|
|
1235
|
+
};
|
|
1236
|
+
}
|
|
1237
|
+
|
|
1238
|
+
function parseFillFieldToolArguments(
|
|
1239
|
+
rawArguments: string
|
|
1240
|
+
): VRPilotFillFieldRequest {
|
|
1241
|
+
let value: unknown = {};
|
|
1242
|
+
|
|
1243
|
+
try {
|
|
1244
|
+
value = rawArguments ? JSON.parse(rawArguments) : {};
|
|
1245
|
+
} catch {
|
|
1246
|
+
return {};
|
|
1247
|
+
}
|
|
1248
|
+
|
|
1249
|
+
const record = getRecord(value);
|
|
1250
|
+
if (!record) {
|
|
1251
|
+
return {};
|
|
1252
|
+
}
|
|
1253
|
+
|
|
1254
|
+
return {
|
|
1255
|
+
targetKey: getStringValue(record.targetKey),
|
|
1256
|
+
selector: getStringValue(record.selector),
|
|
1257
|
+
text: getStringValue(record.text),
|
|
1258
|
+
label: getStringValue(record.label),
|
|
1259
|
+
role: getStringValue(record.role) as VRPilotFillFieldRequest['role'],
|
|
1260
|
+
value: getStringValue(record.value),
|
|
1261
|
+
checked: getBooleanValue(record.checked),
|
|
1262
|
+
mode: getStringValue(record.mode) as VRPilotFillFieldRequest['mode'],
|
|
1263
|
+
reason: getStringValue(record.reason),
|
|
1264
|
+
scrollIntoView: getBooleanValue(record.scrollIntoView),
|
|
1265
|
+
};
|
|
1266
|
+
}
|
|
1267
|
+
|
|
1268
|
+
function parseSearchMemoryToolArguments(rawArguments: string) {
|
|
1269
|
+
let value: unknown = {};
|
|
1270
|
+
|
|
1271
|
+
try {
|
|
1272
|
+
value = rawArguments ? JSON.parse(rawArguments) : {};
|
|
1273
|
+
} catch {
|
|
1274
|
+
return { query: '' };
|
|
1275
|
+
}
|
|
1276
|
+
|
|
1277
|
+
const record = getRecord(value);
|
|
1278
|
+
if (!record) {
|
|
1279
|
+
return { query: '' };
|
|
1280
|
+
}
|
|
1281
|
+
|
|
1282
|
+
return {
|
|
1283
|
+
query: getStringValue(record.query) ?? '',
|
|
1284
|
+
limit: getNumberValue(record.limit),
|
|
1285
|
+
};
|
|
1286
|
+
}
|
|
1287
|
+
|
|
1288
|
+
async function postSupportSessionEvent(
|
|
1289
|
+
apiBaseUrl: string,
|
|
1290
|
+
getCallRequestHeaders:
|
|
1291
|
+
| (() => HeadersInit | Promise<HeadersInit>)
|
|
1292
|
+
| undefined,
|
|
1293
|
+
session: SupportAssistantSessionEvent
|
|
1294
|
+
) {
|
|
1295
|
+
try {
|
|
1296
|
+
await fetch(joinUrl(apiBaseUrl, '/api/memory/session-events'), {
|
|
1297
|
+
method: 'POST',
|
|
1298
|
+
headers: await createCallRequestHeaders(getCallRequestHeaders),
|
|
1299
|
+
body: JSON.stringify({
|
|
1300
|
+
eventType: 'session_completed',
|
|
1301
|
+
sessionId: session.openAiSessionId ?? undefined,
|
|
1302
|
+
payload: session,
|
|
1303
|
+
}),
|
|
1304
|
+
});
|
|
1305
|
+
} catch {
|
|
1306
|
+
// Analytics storage must not interrupt call teardown.
|
|
1307
|
+
}
|
|
1308
|
+
}
|
|
1309
|
+
|
|
1310
|
+
function getRealtimeFunctionCallItem(
|
|
1311
|
+
value: unknown
|
|
1312
|
+
): RealtimeFunctionCallItem | null {
|
|
1313
|
+
const item = getRecord(value);
|
|
1314
|
+
|
|
1315
|
+
if (!item || item.type !== 'function_call') {
|
|
1316
|
+
return null;
|
|
1317
|
+
}
|
|
1318
|
+
|
|
1319
|
+
const callId = getStringValue(item.call_id);
|
|
1320
|
+
const name = getStringValue(item.name);
|
|
1321
|
+
const rawArguments = getStringValue(item.arguments);
|
|
1322
|
+
|
|
1323
|
+
if (!callId || !name) {
|
|
1324
|
+
return null;
|
|
1325
|
+
}
|
|
1326
|
+
|
|
1327
|
+
return {
|
|
1328
|
+
callId,
|
|
1329
|
+
name,
|
|
1330
|
+
arguments: rawArguments ?? '',
|
|
1331
|
+
};
|
|
1332
|
+
}
|
|
1333
|
+
|
|
1334
|
+
function getRealtimeFunctionCalls(payload: Record<string, unknown>) {
|
|
1335
|
+
const calls: RealtimeFunctionCallItem[] = [];
|
|
1336
|
+
const itemCall = getRealtimeFunctionCallItem(payload.item);
|
|
1337
|
+
|
|
1338
|
+
if (itemCall) {
|
|
1339
|
+
calls.push(itemCall);
|
|
1340
|
+
}
|
|
1341
|
+
|
|
1342
|
+
const response = getRecord(payload.response);
|
|
1343
|
+
const output = response?.output;
|
|
1344
|
+
|
|
1345
|
+
if (Array.isArray(output)) {
|
|
1346
|
+
for (const outputItem of output) {
|
|
1347
|
+
const call = getRealtimeFunctionCallItem(outputItem);
|
|
1348
|
+
|
|
1349
|
+
if (call) {
|
|
1350
|
+
calls.push(call);
|
|
1351
|
+
}
|
|
1352
|
+
}
|
|
1353
|
+
}
|
|
1354
|
+
|
|
1355
|
+
return calls;
|
|
1356
|
+
}
|
|
1357
|
+
|
|
1358
|
+
export function SupportAssistantDock({
|
|
1359
|
+
apiBaseUrl = '',
|
|
1360
|
+
callEndpoint = '/api/support-sessions/realtime/call',
|
|
1361
|
+
dev = false,
|
|
1362
|
+
localApiBaseUrl = defaultLocalApiBaseUrl,
|
|
1363
|
+
productionApiBaseUrl = defaultProductionApiBaseUrl,
|
|
1364
|
+
sessionContext,
|
|
1365
|
+
languages = defaultLanguages,
|
|
1366
|
+
defaultLanguageCode = 'en',
|
|
1367
|
+
defaultGoal = 'Help me use the current VRPlatform screen.',
|
|
1368
|
+
userRole,
|
|
1369
|
+
placement = 'right',
|
|
1370
|
+
floating = true,
|
|
1371
|
+
className,
|
|
1372
|
+
style,
|
|
1373
|
+
getCallRequestHeaders,
|
|
1374
|
+
onEvent,
|
|
1375
|
+
showRealtimeDebug = false,
|
|
1376
|
+
}: SupportAssistantDockProps) {
|
|
1377
|
+
const { fillField, highlightTarget } = useVRPilotHighlights();
|
|
1378
|
+
const runtimeRef = useRef<RuntimeSession>(createRuntimeSession());
|
|
1379
|
+
const isConnectedRef = useRef(false);
|
|
1380
|
+
const screenVisibleRef = useRef(false);
|
|
1381
|
+
const remoteAudioRef = useRef<HTMLAudioElement | null>(null);
|
|
1382
|
+
const screenPreviewRef = useRef<HTMLVideoElement | null>(null);
|
|
1383
|
+
const frameCanvasRef = useRef<HTMLCanvasElement | null>(null);
|
|
1384
|
+
const chatTranscriptRef = useRef<HTMLOListElement | null>(null);
|
|
1385
|
+
const onEventRef = useRef(onEvent);
|
|
1386
|
+
const sentFramesRef = useRef(0);
|
|
1387
|
+
const debugEventIdRef = useRef(0);
|
|
1388
|
+
const rawDebugEventIdRef = useRef(0);
|
|
1389
|
+
const rawDebugEventsRef = useRef<RealtimeRawDebugEntry[]>([]);
|
|
1390
|
+
const openaiTraceRef = useRef<RealtimeTraceDebugContext>({
|
|
1391
|
+
workflowName: null,
|
|
1392
|
+
groupId: null,
|
|
1393
|
+
sessionId: null,
|
|
1394
|
+
});
|
|
1395
|
+
const sessionAnalyticsRef = useRef<AssistantSessionAnalytics | null>(null);
|
|
1396
|
+
const realtimeActivityRef = useRef(createRealtimeActivityState());
|
|
1397
|
+
const responseModeRef = useRef<SupportAssistantResponseMode>('voice');
|
|
1398
|
+
const handledToolCallsRef = useRef(new Set<string>());
|
|
1399
|
+
const scheduleSnapshotCaptureRef = useRef<(() => void) | null>(null);
|
|
1400
|
+
const restartSnapshotTimerRef = useRef<(() => void) | null>(null);
|
|
1401
|
+
const requestNextStepAfterScreenChangeRef = useRef<
|
|
1402
|
+
((options?: ScreenChangePromptOptions) => void) | null
|
|
1403
|
+
>(null);
|
|
1404
|
+
const startCallRef = useRef<
|
|
1405
|
+
((options?: StartCallOptions) => Promise<void>) | null
|
|
1406
|
+
>(null);
|
|
1407
|
+
const speakingFallbackTimerRef = useRef<number | null>(null);
|
|
1408
|
+
const { classes, cx } = useStyles();
|
|
1409
|
+
|
|
1410
|
+
const [isConnecting, setIsConnecting] = useState(false);
|
|
1411
|
+
const [isConnected, setIsConnected] = useState(false);
|
|
1412
|
+
const [screenVisible, setScreenVisible] = useState(false);
|
|
1413
|
+
const [muted, setMuted] = useState(false);
|
|
1414
|
+
const [activity, setActivity] = useState<SupportAssistantActivity>('idle');
|
|
1415
|
+
const [, setStatusMessage] = useState('Ready');
|
|
1416
|
+
const [, setErrorMessage] = useState<string | null>(null);
|
|
1417
|
+
const [responseMode, setResponseMode] =
|
|
1418
|
+
useState<SupportAssistantResponseMode>('voice');
|
|
1419
|
+
const [textInputValue, setTextInputValue] = useState('');
|
|
1420
|
+
const [chatMessages, setChatMessages] = useState<ChatTranscriptMessage[]>([]);
|
|
1421
|
+
const visibleChatMessages = useMemo(
|
|
1422
|
+
() => chatMessages.filter((message) => message.mode === responseMode),
|
|
1423
|
+
[chatMessages, responseMode]
|
|
1424
|
+
);
|
|
1425
|
+
const [debugEvents, setDebugEvents] = useState<RealtimeDebugEntry[]>([]);
|
|
1426
|
+
const [debugCopyState, setDebugCopyState] = useState<
|
|
1427
|
+
'idle' | 'summary-copied' | 'raw-copied' | 'failed'
|
|
1428
|
+
>('idle');
|
|
1429
|
+
const [debugOpen, setDebugOpen] = useState(showRealtimeDebug);
|
|
1430
|
+
const [sourceLinks, setSourceLinks] = useState<SupportSourceLink[]>([]);
|
|
1431
|
+
const [sourcesOpen, setSourcesOpen] = useState(false);
|
|
1432
|
+
const [devApiTarget, setDevApiTarget] = useState<DevApiTarget>(() =>
|
|
1433
|
+
apiBaseUrl.includes('localhost') || apiBaseUrl.includes('127.0.0.1')
|
|
1434
|
+
? 'local'
|
|
1435
|
+
: 'production'
|
|
1436
|
+
);
|
|
1437
|
+
const effectiveApiBaseUrl = dev
|
|
1438
|
+
? devApiTarget === 'local'
|
|
1439
|
+
? localApiBaseUrl
|
|
1440
|
+
: productionApiBaseUrl
|
|
1441
|
+
: apiBaseUrl;
|
|
1442
|
+
const effectiveShowRealtimeDebug = dev ? debugOpen : showRealtimeDebug;
|
|
1443
|
+
|
|
1444
|
+
useEffect(() => {
|
|
1445
|
+
onEventRef.current = onEvent;
|
|
1446
|
+
}, [onEvent]);
|
|
1447
|
+
|
|
1448
|
+
useEffect(() => {
|
|
1449
|
+
if (debugCopyState === 'idle') {
|
|
1450
|
+
return;
|
|
1451
|
+
}
|
|
1452
|
+
|
|
1453
|
+
const timer = window.setTimeout(() => {
|
|
1454
|
+
setDebugCopyState('idle');
|
|
1455
|
+
}, 1_400);
|
|
1456
|
+
|
|
1457
|
+
return () => {
|
|
1458
|
+
window.clearTimeout(timer);
|
|
1459
|
+
};
|
|
1460
|
+
}, [debugCopyState]);
|
|
1461
|
+
|
|
1462
|
+
useEffect(() => {
|
|
1463
|
+
isConnectedRef.current = isConnected;
|
|
1464
|
+
}, [isConnected]);
|
|
1465
|
+
|
|
1466
|
+
useEffect(() => {
|
|
1467
|
+
screenVisibleRef.current = screenVisible;
|
|
1468
|
+
}, [screenVisible]);
|
|
1469
|
+
|
|
1470
|
+
useEffect(() => {
|
|
1471
|
+
responseModeRef.current = responseMode;
|
|
1472
|
+
}, [responseMode]);
|
|
1473
|
+
|
|
1474
|
+
useEffect(() => {
|
|
1475
|
+
const element = chatTranscriptRef.current;
|
|
1476
|
+
if (!element) {
|
|
1477
|
+
return;
|
|
1478
|
+
}
|
|
1479
|
+
|
|
1480
|
+
element.scrollTop = element.scrollHeight;
|
|
1481
|
+
});
|
|
1482
|
+
|
|
1483
|
+
const selectedLanguage = useMemo(() => {
|
|
1484
|
+
return (
|
|
1485
|
+
languages.find((language) => language.code === defaultLanguageCode) ??
|
|
1486
|
+
languages[0] ??
|
|
1487
|
+
defaultLanguages[0]
|
|
1488
|
+
);
|
|
1489
|
+
}, [defaultLanguageCode, languages]);
|
|
1490
|
+
|
|
1491
|
+
const setStatus = useCallback(
|
|
1492
|
+
(message: string, nextActivity: SupportAssistantActivity = 'idle') => {
|
|
1493
|
+
const resolvedActivity =
|
|
1494
|
+
runtimeRef.current.muted && nextActivity !== 'error'
|
|
1495
|
+
? 'muted'
|
|
1496
|
+
: nextActivity;
|
|
1497
|
+
setStatusMessage(message);
|
|
1498
|
+
setActivity(resolvedActivity);
|
|
1499
|
+
onEventRef.current?.({
|
|
1500
|
+
type: 'status',
|
|
1501
|
+
activity: resolvedActivity,
|
|
1502
|
+
message,
|
|
1503
|
+
});
|
|
1504
|
+
},
|
|
1505
|
+
[]
|
|
1506
|
+
);
|
|
1507
|
+
|
|
1508
|
+
const getReadyStatusMessage = useCallback(() => {
|
|
1509
|
+
return screenVisibleRef.current ? 'Ready' : 'Screen share needed';
|
|
1510
|
+
}, []);
|
|
1511
|
+
|
|
1512
|
+
const clearSpeakingFallback = useCallback(() => {
|
|
1513
|
+
if (speakingFallbackTimerRef.current !== null) {
|
|
1514
|
+
window.clearTimeout(speakingFallbackTimerRef.current);
|
|
1515
|
+
speakingFallbackTimerRef.current = null;
|
|
1516
|
+
}
|
|
1517
|
+
}, []);
|
|
1518
|
+
|
|
1519
|
+
const applyRealtimeActivityStatus = useCallback(
|
|
1520
|
+
(status: RealtimeActivityStatus) => {
|
|
1521
|
+
if (status === 'speaking') {
|
|
1522
|
+
setStatus('Speaking', 'speaking');
|
|
1523
|
+
return;
|
|
1524
|
+
}
|
|
1525
|
+
|
|
1526
|
+
if (status === 'thinking') {
|
|
1527
|
+
setStatus('Thinking', 'thinking');
|
|
1528
|
+
return;
|
|
1529
|
+
}
|
|
1530
|
+
|
|
1531
|
+
if (status === 'listening') {
|
|
1532
|
+
setStatus('Listening', 'listening');
|
|
1533
|
+
return;
|
|
1534
|
+
}
|
|
1535
|
+
|
|
1536
|
+
setStatus(getReadyStatusMessage(), 'listening');
|
|
1537
|
+
},
|
|
1538
|
+
[getReadyStatusMessage, setStatus]
|
|
1539
|
+
);
|
|
1540
|
+
|
|
1541
|
+
const scheduleSpeakingFallback = useCallback(() => {
|
|
1542
|
+
clearSpeakingFallback();
|
|
1543
|
+
speakingFallbackTimerRef.current = window.setTimeout(() => {
|
|
1544
|
+
speakingFallbackTimerRef.current = null;
|
|
1545
|
+
const state = realtimeActivityRef.current;
|
|
1546
|
+
|
|
1547
|
+
if (state.assistantAudioActive || state.responseInProgress) {
|
|
1548
|
+
return;
|
|
1549
|
+
}
|
|
1550
|
+
|
|
1551
|
+
applyRealtimeActivityStatus(
|
|
1552
|
+
state.userSpeechActive ? 'listening' : 'ready'
|
|
1553
|
+
);
|
|
1554
|
+
}, 1_200);
|
|
1555
|
+
}, [applyRealtimeActivityStatus, clearSpeakingFallback]);
|
|
1556
|
+
|
|
1557
|
+
const appendRawDebugEntry = useCallback(
|
|
1558
|
+
(
|
|
1559
|
+
direction: RealtimeDebugDirection,
|
|
1560
|
+
label: string,
|
|
1561
|
+
detail = '-',
|
|
1562
|
+
payload?: Record<string, unknown>
|
|
1563
|
+
) => {
|
|
1564
|
+
if (!effectiveShowRealtimeDebug) {
|
|
1565
|
+
return;
|
|
1566
|
+
}
|
|
1567
|
+
|
|
1568
|
+
rawDebugEventIdRef.current += 1;
|
|
1569
|
+
const { time, timestamp } = getDebugTimestampParts();
|
|
1570
|
+
rawDebugEventsRef.current = [
|
|
1571
|
+
{
|
|
1572
|
+
id: rawDebugEventIdRef.current,
|
|
1573
|
+
time,
|
|
1574
|
+
timestamp,
|
|
1575
|
+
direction,
|
|
1576
|
+
label,
|
|
1577
|
+
detail,
|
|
1578
|
+
payload: payload
|
|
1579
|
+
? (sanitizeRealtimeDebugPayload(payload) as Record<string, unknown>)
|
|
1580
|
+
: undefined,
|
|
1581
|
+
},
|
|
1582
|
+
...rawDebugEventsRef.current,
|
|
1583
|
+
].slice(0, maxRealtimeDebugEntries);
|
|
1584
|
+
},
|
|
1585
|
+
[effectiveShowRealtimeDebug]
|
|
1586
|
+
);
|
|
1587
|
+
|
|
1588
|
+
const appendDebugEntry = useCallback(
|
|
1589
|
+
(
|
|
1590
|
+
label: string,
|
|
1591
|
+
detail = '-',
|
|
1592
|
+
payload?: Record<string, unknown>,
|
|
1593
|
+
direction: RealtimeDebugDirection = 'local',
|
|
1594
|
+
includeRaw = true
|
|
1595
|
+
) => {
|
|
1596
|
+
if (!effectiveShowRealtimeDebug) {
|
|
1597
|
+
return;
|
|
1598
|
+
}
|
|
1599
|
+
|
|
1600
|
+
debugEventIdRef.current += 1;
|
|
1601
|
+
const { time } = getDebugTimestampParts();
|
|
1602
|
+
const entry: RealtimeDebugEntry = {
|
|
1603
|
+
id: debugEventIdRef.current,
|
|
1604
|
+
time,
|
|
1605
|
+
label,
|
|
1606
|
+
detail,
|
|
1607
|
+
payload,
|
|
1608
|
+
};
|
|
1609
|
+
|
|
1610
|
+
setDebugEvents((current) =>
|
|
1611
|
+
[entry, ...current].slice(0, maxRealtimeDebugEntries)
|
|
1612
|
+
);
|
|
1613
|
+
|
|
1614
|
+
if (includeRaw) {
|
|
1615
|
+
appendRawDebugEntry(direction, label, detail, payload);
|
|
1616
|
+
}
|
|
1617
|
+
},
|
|
1618
|
+
[appendRawDebugEntry, effectiveShowRealtimeDebug]
|
|
1619
|
+
);
|
|
1620
|
+
|
|
1621
|
+
const sendRealtimeEvent = useCallback(
|
|
1622
|
+
(event: unknown) => {
|
|
1623
|
+
const channel = runtimeRef.current.dataChannel;
|
|
1624
|
+
|
|
1625
|
+
if (!channel || channel.readyState !== 'open') {
|
|
1626
|
+
throw new Error('Realtime data channel is not open');
|
|
1627
|
+
}
|
|
1628
|
+
|
|
1629
|
+
const payload = getRecord(event);
|
|
1630
|
+
if (payload) {
|
|
1631
|
+
if (getRealtimeEventType(payload) === 'response.create') {
|
|
1632
|
+
runtimeRef.current.clientResponseInFlight = true;
|
|
1633
|
+
}
|
|
1634
|
+
|
|
1635
|
+
appendRawDebugEntry(
|
|
1636
|
+
'client',
|
|
1637
|
+
getRealtimeEventType(payload),
|
|
1638
|
+
getRealtimeClientEventDetail(payload),
|
|
1639
|
+
payload
|
|
1640
|
+
);
|
|
1641
|
+
}
|
|
1642
|
+
|
|
1643
|
+
channel.send(JSON.stringify(event));
|
|
1644
|
+
},
|
|
1645
|
+
[appendRawDebugEntry]
|
|
1646
|
+
);
|
|
1647
|
+
|
|
1648
|
+
const getCurrentResponseOptions = useCallback(
|
|
1649
|
+
(instructions?: string) => ({
|
|
1650
|
+
max_output_tokens: realtimeMaxOutputTokens,
|
|
1651
|
+
output_modalities: getRealtimeOutputModalities(responseModeRef.current),
|
|
1652
|
+
...(instructions ? { instructions } : {}),
|
|
1653
|
+
}),
|
|
1654
|
+
[]
|
|
1655
|
+
);
|
|
1656
|
+
|
|
1657
|
+
const appendAssistantTranscript = useCallback(
|
|
1658
|
+
(
|
|
1659
|
+
responseId: string,
|
|
1660
|
+
text: string,
|
|
1661
|
+
mode: SupportAssistantResponseMode,
|
|
1662
|
+
options: {
|
|
1663
|
+
replace?: boolean;
|
|
1664
|
+
status?: ChatTranscriptMessage['status'];
|
|
1665
|
+
} = {}
|
|
1666
|
+
) => {
|
|
1667
|
+
if (!text) {
|
|
1668
|
+
return;
|
|
1669
|
+
}
|
|
1670
|
+
|
|
1671
|
+
const { time } = getDebugTimestampParts();
|
|
1672
|
+
setChatMessages((current) => {
|
|
1673
|
+
const existingIndex = current.findIndex(
|
|
1674
|
+
(message) => message.responseId === responseId
|
|
1675
|
+
);
|
|
1676
|
+
|
|
1677
|
+
if (existingIndex >= 0) {
|
|
1678
|
+
return current.map((message, index) =>
|
|
1679
|
+
index === existingIndex
|
|
1680
|
+
? {
|
|
1681
|
+
...message,
|
|
1682
|
+
mode,
|
|
1683
|
+
status: options.status ?? message.status,
|
|
1684
|
+
text: options.replace ? text : `${message.text}${text}`,
|
|
1685
|
+
}
|
|
1686
|
+
: message
|
|
1687
|
+
);
|
|
1688
|
+
}
|
|
1689
|
+
|
|
1690
|
+
const message: ChatTranscriptMessage = {
|
|
1691
|
+
id: `${responseId}-${Date.now()}`,
|
|
1692
|
+
role: 'assistant',
|
|
1693
|
+
responseId,
|
|
1694
|
+
text,
|
|
1695
|
+
time,
|
|
1696
|
+
mode,
|
|
1697
|
+
status: options.status ?? 'streaming',
|
|
1698
|
+
};
|
|
1699
|
+
|
|
1700
|
+
return [...current, message].slice(-maxChatTranscriptMessages);
|
|
1701
|
+
});
|
|
1702
|
+
},
|
|
1703
|
+
[]
|
|
1704
|
+
);
|
|
1705
|
+
|
|
1706
|
+
const appendUserTranscript = useCallback((text: string) => {
|
|
1707
|
+
const { time } = getDebugTimestampParts();
|
|
1708
|
+
const id = `user-${Date.now()}`;
|
|
1709
|
+
|
|
1710
|
+
const message: ChatTranscriptMessage = {
|
|
1711
|
+
id,
|
|
1712
|
+
role: 'user',
|
|
1713
|
+
responseId: id,
|
|
1714
|
+
text,
|
|
1715
|
+
time,
|
|
1716
|
+
mode: 'text',
|
|
1717
|
+
status: 'done',
|
|
1718
|
+
};
|
|
1719
|
+
|
|
1720
|
+
setChatMessages((current) =>
|
|
1721
|
+
[...current, message].slice(-maxChatTranscriptMessages)
|
|
1722
|
+
);
|
|
1723
|
+
}, []);
|
|
1724
|
+
|
|
1725
|
+
const updateAssistantTranscriptStatus = useCallback(
|
|
1726
|
+
(responseId: string, status: ChatTranscriptMessage['status']) => {
|
|
1727
|
+
setChatMessages((current) =>
|
|
1728
|
+
current.map((message) =>
|
|
1729
|
+
message.responseId === responseId ? { ...message, status } : message
|
|
1730
|
+
)
|
|
1731
|
+
);
|
|
1732
|
+
},
|
|
1733
|
+
[]
|
|
1734
|
+
);
|
|
1735
|
+
|
|
1736
|
+
const handleAssistantTranscriptEvent = useCallback(
|
|
1737
|
+
(payload: Record<string, unknown>, eventType: string) => {
|
|
1738
|
+
const responseId =
|
|
1739
|
+
getDebugValue(payload.response_id) ??
|
|
1740
|
+
getDebugValue(payload.item_id) ??
|
|
1741
|
+
getDebugValue(getRecord(payload.response)?.id);
|
|
1742
|
+
|
|
1743
|
+
if (!responseId) {
|
|
1744
|
+
return;
|
|
1745
|
+
}
|
|
1746
|
+
|
|
1747
|
+
if (eventType === 'response.output_text.delta') {
|
|
1748
|
+
appendAssistantTranscript(
|
|
1749
|
+
responseId,
|
|
1750
|
+
getDebugValue(payload.delta) ?? '',
|
|
1751
|
+
'text'
|
|
1752
|
+
);
|
|
1753
|
+
return;
|
|
1754
|
+
}
|
|
1755
|
+
|
|
1756
|
+
if (eventType === 'response.output_text.done') {
|
|
1757
|
+
appendAssistantTranscript(
|
|
1758
|
+
responseId,
|
|
1759
|
+
getDebugValue(payload.text) ?? '',
|
|
1760
|
+
'text',
|
|
1761
|
+
{ replace: true, status: 'done' }
|
|
1762
|
+
);
|
|
1763
|
+
return;
|
|
1764
|
+
}
|
|
1765
|
+
|
|
1766
|
+
if (eventType === 'response.output_audio_transcript.delta') {
|
|
1767
|
+
appendAssistantTranscript(
|
|
1768
|
+
responseId,
|
|
1769
|
+
getDebugValue(payload.delta) ?? '',
|
|
1770
|
+
'voice'
|
|
1771
|
+
);
|
|
1772
|
+
return;
|
|
1773
|
+
}
|
|
1774
|
+
|
|
1775
|
+
if (eventType === 'response.output_audio_transcript.done') {
|
|
1776
|
+
appendAssistantTranscript(
|
|
1777
|
+
responseId,
|
|
1778
|
+
getDebugValue(payload.transcript) ?? '',
|
|
1779
|
+
'voice',
|
|
1780
|
+
{ replace: true, status: 'done' }
|
|
1781
|
+
);
|
|
1782
|
+
}
|
|
1783
|
+
},
|
|
1784
|
+
[appendAssistantTranscript]
|
|
1785
|
+
);
|
|
1786
|
+
|
|
1787
|
+
const handleResponseModeChange = useCallback(
|
|
1788
|
+
(nextMode: SupportAssistantResponseMode) => {
|
|
1789
|
+
if (responseModeRef.current === nextMode && responseMode === nextMode) {
|
|
1790
|
+
return;
|
|
1791
|
+
}
|
|
1792
|
+
|
|
1793
|
+
responseModeRef.current = nextMode;
|
|
1794
|
+
setResponseMode(nextMode);
|
|
1795
|
+
appendDebugEntry(
|
|
1796
|
+
'response_mode_changed',
|
|
1797
|
+
nextMode === 'text' ? 'text output' : 'voice output'
|
|
1798
|
+
);
|
|
1799
|
+
|
|
1800
|
+
if (runtimeRef.current.dataChannel?.readyState !== 'open') {
|
|
1801
|
+
return;
|
|
1802
|
+
}
|
|
1803
|
+
|
|
1804
|
+
sendRealtimeEvent({
|
|
1805
|
+
type: 'session.update',
|
|
1806
|
+
session: {
|
|
1807
|
+
type: 'realtime',
|
|
1808
|
+
output_modalities: getRealtimeOutputModalities(nextMode),
|
|
1809
|
+
},
|
|
1810
|
+
});
|
|
1811
|
+
},
|
|
1812
|
+
[appendDebugEntry, responseMode, sendRealtimeEvent]
|
|
1813
|
+
);
|
|
1814
|
+
|
|
1815
|
+
const sendTextMessageToRealtime = useCallback(
|
|
1816
|
+
(text: string) => {
|
|
1817
|
+
sendRealtimeEvent({
|
|
1818
|
+
type: 'conversation.item.create',
|
|
1819
|
+
item: {
|
|
1820
|
+
type: 'message',
|
|
1821
|
+
role: 'user',
|
|
1822
|
+
content: [
|
|
1823
|
+
{
|
|
1824
|
+
type: 'input_text',
|
|
1825
|
+
text,
|
|
1826
|
+
},
|
|
1827
|
+
],
|
|
1828
|
+
},
|
|
1829
|
+
});
|
|
1830
|
+
sendRealtimeEvent({
|
|
1831
|
+
type: 'response.create',
|
|
1832
|
+
response: getCurrentResponseOptions(
|
|
1833
|
+
`${buildLanguageInstruction(selectedLanguage)} Respond directly to the user's typed message in plain language. Use the shared screen only when it helps. Keep the answer concise and action-oriented.`
|
|
1834
|
+
),
|
|
1835
|
+
});
|
|
1836
|
+
setStatus('Thinking', 'thinking');
|
|
1837
|
+
},
|
|
1838
|
+
[getCurrentResponseOptions, selectedLanguage, sendRealtimeEvent, setStatus]
|
|
1839
|
+
);
|
|
1840
|
+
|
|
1841
|
+
const submitTextMessage = useCallback(() => {
|
|
1842
|
+
const text = textInputValue.trim();
|
|
1843
|
+
if (!text) {
|
|
1844
|
+
return;
|
|
1845
|
+
}
|
|
1846
|
+
|
|
1847
|
+
if (isConnecting) {
|
|
1848
|
+
return;
|
|
1849
|
+
}
|
|
1850
|
+
|
|
1851
|
+
const runtime = runtimeRef.current;
|
|
1852
|
+
if (!isConnectedRef.current || runtime.dataChannel?.readyState !== 'open') {
|
|
1853
|
+
handleResponseModeChange('text');
|
|
1854
|
+
setTextInputValue('');
|
|
1855
|
+
startCallRef.current?.({ initialTextMessage: text });
|
|
1856
|
+
return;
|
|
1857
|
+
}
|
|
1858
|
+
|
|
1859
|
+
if (
|
|
1860
|
+
runtime.clientResponseInFlight ||
|
|
1861
|
+
hasActiveAssistantTurn(realtimeActivityRef.current)
|
|
1862
|
+
) {
|
|
1863
|
+
appendDebugEntry(
|
|
1864
|
+
'text_message_deferred',
|
|
1865
|
+
'waiting for active response to finish'
|
|
1866
|
+
);
|
|
1867
|
+
setStatus('Wait for ChatVRT to finish', 'thinking');
|
|
1868
|
+
return;
|
|
1869
|
+
}
|
|
1870
|
+
|
|
1871
|
+
if (responseModeRef.current !== 'text') {
|
|
1872
|
+
handleResponseModeChange('text');
|
|
1873
|
+
}
|
|
1874
|
+
|
|
1875
|
+
appendUserTranscript(text);
|
|
1876
|
+
setTextInputValue('');
|
|
1877
|
+
sendTextMessageToRealtime(text);
|
|
1878
|
+
}, [
|
|
1879
|
+
appendDebugEntry,
|
|
1880
|
+
appendUserTranscript,
|
|
1881
|
+
handleResponseModeChange,
|
|
1882
|
+
isConnecting,
|
|
1883
|
+
sendTextMessageToRealtime,
|
|
1884
|
+
setStatus,
|
|
1885
|
+
textInputValue,
|
|
1886
|
+
]);
|
|
1887
|
+
|
|
1888
|
+
const handleTextMessageSubmit = useCallback(
|
|
1889
|
+
(event: FormEvent<HTMLFormElement>) => {
|
|
1890
|
+
event.preventDefault();
|
|
1891
|
+
submitTextMessage();
|
|
1892
|
+
},
|
|
1893
|
+
[submitTextMessage]
|
|
1894
|
+
);
|
|
1895
|
+
|
|
1896
|
+
const handleTextInputKeyDown = useCallback(
|
|
1897
|
+
(event: KeyboardEvent<HTMLInputElement>) => {
|
|
1898
|
+
if (event.key !== 'Enter' || event.nativeEvent.isComposing) {
|
|
1899
|
+
return;
|
|
1900
|
+
}
|
|
1901
|
+
|
|
1902
|
+
event.preventDefault();
|
|
1903
|
+
submitTextMessage();
|
|
1904
|
+
},
|
|
1905
|
+
[submitTextMessage]
|
|
1906
|
+
);
|
|
1907
|
+
|
|
1908
|
+
const cancelRealtimeResponse = useCallback(
|
|
1909
|
+
(
|
|
1910
|
+
responseId: string,
|
|
1911
|
+
options: {
|
|
1912
|
+
clearAudio: boolean;
|
|
1913
|
+
label: string;
|
|
1914
|
+
detail?: string;
|
|
1915
|
+
keepPendingScreenPrompt?: boolean;
|
|
1916
|
+
}
|
|
1917
|
+
) => {
|
|
1918
|
+
const runtime = runtimeRef.current;
|
|
1919
|
+
if (runtime.interruptedResponseIds.has(responseId)) {
|
|
1920
|
+
if (options.clearAudio) {
|
|
1921
|
+
appendDebugEntry(
|
|
1922
|
+
options.label,
|
|
1923
|
+
`${responseId} | response already cancelled | clear_audio=true`
|
|
1924
|
+
);
|
|
1925
|
+
sendRealtimeEvent({
|
|
1926
|
+
event_id: `clear_${Date.now()}`,
|
|
1927
|
+
type: 'output_audio_buffer.clear',
|
|
1928
|
+
});
|
|
1929
|
+
}
|
|
1930
|
+
return;
|
|
1931
|
+
}
|
|
1932
|
+
|
|
1933
|
+
runtime.interruptedResponseIds.add(responseId);
|
|
1934
|
+
runtime.clientResponseInFlight = false;
|
|
1935
|
+
runtime.proactiveResponseInFlight = false;
|
|
1936
|
+
if (!options.keepPendingScreenPrompt) {
|
|
1937
|
+
runtime.pendingProactiveScreenPrompt = false;
|
|
1938
|
+
runtime.pendingScreenChangeCorrection = false;
|
|
1939
|
+
}
|
|
1940
|
+
appendDebugEntry(
|
|
1941
|
+
options.label,
|
|
1942
|
+
[
|
|
1943
|
+
responseId,
|
|
1944
|
+
options.detail,
|
|
1945
|
+
options.clearAudio ? 'clear_audio=true' : null,
|
|
1946
|
+
]
|
|
1947
|
+
.filter(Boolean)
|
|
1948
|
+
.join(' | ')
|
|
1949
|
+
);
|
|
1950
|
+
sendRealtimeEvent({
|
|
1951
|
+
type: 'response.cancel',
|
|
1952
|
+
response_id: responseId,
|
|
1953
|
+
});
|
|
1954
|
+
|
|
1955
|
+
if (options.clearAudio) {
|
|
1956
|
+
sendRealtimeEvent({
|
|
1957
|
+
event_id: `clear_${Date.now()}`,
|
|
1958
|
+
type: 'output_audio_buffer.clear',
|
|
1959
|
+
});
|
|
1960
|
+
}
|
|
1961
|
+
},
|
|
1962
|
+
[appendDebugEntry, sendRealtimeEvent]
|
|
1963
|
+
);
|
|
1964
|
+
|
|
1965
|
+
const cancelRealtimeResponseForUserSpeech = useCallback(
|
|
1966
|
+
(responseId: string, clearAudio: boolean) => {
|
|
1967
|
+
cancelRealtimeResponse(responseId, {
|
|
1968
|
+
clearAudio,
|
|
1969
|
+
label: 'user_speech_interrupt',
|
|
1970
|
+
});
|
|
1971
|
+
},
|
|
1972
|
+
[cancelRealtimeResponse]
|
|
1973
|
+
);
|
|
1974
|
+
|
|
1975
|
+
const copyRealtimeDebugLog = useCallback(async () => {
|
|
1976
|
+
if (!debugEvents.length) {
|
|
1977
|
+
return;
|
|
1978
|
+
}
|
|
1979
|
+
|
|
1980
|
+
try {
|
|
1981
|
+
if (!navigator.clipboard) {
|
|
1982
|
+
throw new Error('Clipboard is not available');
|
|
1983
|
+
}
|
|
1984
|
+
|
|
1985
|
+
await navigator.clipboard.writeText(
|
|
1986
|
+
[
|
|
1987
|
+
'time\tlabel\tdetail',
|
|
1988
|
+
...debugEvents.map(
|
|
1989
|
+
(event) => `${event.time}\t${event.label}\t${event.detail}`
|
|
1990
|
+
),
|
|
1991
|
+
].join('\n')
|
|
1992
|
+
);
|
|
1993
|
+
setDebugCopyState('summary-copied');
|
|
1994
|
+
} catch (error) {
|
|
1995
|
+
setDebugCopyState('failed');
|
|
1996
|
+
appendDebugEntry(
|
|
1997
|
+
'debug_copy_failed',
|
|
1998
|
+
getMessage(error, 'Failed to copy realtime debug log')
|
|
1999
|
+
);
|
|
2000
|
+
}
|
|
2001
|
+
}, [appendDebugEntry, debugEvents]);
|
|
2002
|
+
|
|
2003
|
+
const copyRealtimeDebugRawLog = useCallback(async () => {
|
|
2004
|
+
if (!debugEvents.length) {
|
|
2005
|
+
return;
|
|
2006
|
+
}
|
|
2007
|
+
|
|
2008
|
+
try {
|
|
2009
|
+
if (!navigator.clipboard) {
|
|
2010
|
+
throw new Error('Clipboard is not available');
|
|
2011
|
+
}
|
|
2012
|
+
|
|
2013
|
+
await navigator.clipboard.writeText(
|
|
2014
|
+
JSON.stringify(
|
|
2015
|
+
{
|
|
2016
|
+
copiedAt: new Date().toISOString(),
|
|
2017
|
+
openaiTrace: openaiTraceRef.current,
|
|
2018
|
+
visibleEvents: debugEvents.map((event) => ({
|
|
2019
|
+
time: event.time,
|
|
2020
|
+
label: event.label,
|
|
2021
|
+
detail: event.detail,
|
|
2022
|
+
payload: event.payload ?? null,
|
|
2023
|
+
})),
|
|
2024
|
+
rawEvents: [...rawDebugEventsRef.current].reverse(),
|
|
2025
|
+
},
|
|
2026
|
+
null,
|
|
2027
|
+
2
|
|
2028
|
+
)
|
|
2029
|
+
);
|
|
2030
|
+
setDebugCopyState('raw-copied');
|
|
2031
|
+
} catch (error) {
|
|
2032
|
+
setDebugCopyState('failed');
|
|
2033
|
+
appendDebugEntry(
|
|
2034
|
+
'debug_copy_failed',
|
|
2035
|
+
getMessage(error, 'Failed to copy realtime debug JSON')
|
|
2036
|
+
);
|
|
2037
|
+
}
|
|
2038
|
+
}, [appendDebugEntry, debugEvents]);
|
|
2039
|
+
|
|
2040
|
+
const appendRealtimeDebugEvent = useCallback(
|
|
2041
|
+
(payload: Record<string, unknown>, assistantWasActive: boolean) => {
|
|
2042
|
+
const eventType =
|
|
2043
|
+
typeof payload.type === 'string' ? payload.type : 'unknown';
|
|
2044
|
+
const parts: string[] = [];
|
|
2045
|
+
let label: string | null = null;
|
|
2046
|
+
|
|
2047
|
+
if (eventType === 'input_audio_buffer.speech_started') {
|
|
2048
|
+
label = assistantWasActive
|
|
2049
|
+
? 'speech_started / INTERRUPT?'
|
|
2050
|
+
: 'speech_started';
|
|
2051
|
+
const audioStartMs = getDebugValue(payload.audio_start_ms);
|
|
2052
|
+
if (audioStartMs) {
|
|
2053
|
+
parts.push(`start=${audioStartMs}ms`);
|
|
2054
|
+
}
|
|
2055
|
+
}
|
|
2056
|
+
|
|
2057
|
+
if (eventType === 'input_audio_buffer.speech_stopped') {
|
|
2058
|
+
label = 'speech_stopped';
|
|
2059
|
+
const audioEndMs = getDebugValue(payload.audio_end_ms);
|
|
2060
|
+
if (audioEndMs) {
|
|
2061
|
+
parts.push(`end=${audioEndMs}ms`);
|
|
2062
|
+
}
|
|
2063
|
+
}
|
|
2064
|
+
|
|
2065
|
+
if (eventType === 'output_audio_buffer.started') {
|
|
2066
|
+
label = 'output_started';
|
|
2067
|
+
}
|
|
2068
|
+
|
|
2069
|
+
if (eventType === 'output_audio_buffer.stopped') {
|
|
2070
|
+
label = 'output_stopped';
|
|
2071
|
+
}
|
|
2072
|
+
|
|
2073
|
+
if (eventType === 'output_audio_buffer.cleared') {
|
|
2074
|
+
label = 'output_CLEARED';
|
|
2075
|
+
if (assistantWasActive) {
|
|
2076
|
+
parts.push('assistant was speaking');
|
|
2077
|
+
}
|
|
2078
|
+
}
|
|
2079
|
+
|
|
2080
|
+
if (eventType === 'response.created') {
|
|
2081
|
+
label = 'response_created';
|
|
2082
|
+
}
|
|
2083
|
+
|
|
2084
|
+
if (eventType === 'response.done') {
|
|
2085
|
+
label = 'response_done';
|
|
2086
|
+
const response = getRecord(payload.response);
|
|
2087
|
+
const status = getDebugValue(response?.status);
|
|
2088
|
+
const statusDetails = getRecord(response?.status_details);
|
|
2089
|
+
const detailType = getDebugValue(statusDetails?.type);
|
|
2090
|
+
const reason = getDebugValue(statusDetails?.reason);
|
|
2091
|
+
|
|
2092
|
+
if (status) {
|
|
2093
|
+
parts.push(`status=${status}`);
|
|
2094
|
+
}
|
|
2095
|
+
|
|
2096
|
+
if (detailType) {
|
|
2097
|
+
parts.push(`type=${detailType}`);
|
|
2098
|
+
}
|
|
2099
|
+
|
|
2100
|
+
if (reason) {
|
|
2101
|
+
parts.push(`reason=${reason}`);
|
|
2102
|
+
}
|
|
2103
|
+
}
|
|
2104
|
+
|
|
2105
|
+
if (eventType === 'error') {
|
|
2106
|
+
label = 'error';
|
|
2107
|
+
const error = getRecord(payload.error);
|
|
2108
|
+
const message = getDebugValue(error?.message);
|
|
2109
|
+
if (message) {
|
|
2110
|
+
parts.push(message);
|
|
2111
|
+
}
|
|
2112
|
+
}
|
|
2113
|
+
|
|
2114
|
+
if (!label) {
|
|
2115
|
+
return;
|
|
2116
|
+
}
|
|
2117
|
+
|
|
2118
|
+
const responseId =
|
|
2119
|
+
getDebugValue(payload.response_id) ??
|
|
2120
|
+
getDebugValue(getRecord(payload.response)?.id);
|
|
2121
|
+
if (responseId) {
|
|
2122
|
+
parts.push(responseId);
|
|
2123
|
+
}
|
|
2124
|
+
|
|
2125
|
+
appendDebugEntry(
|
|
2126
|
+
label,
|
|
2127
|
+
parts.join(' | ') || '-',
|
|
2128
|
+
payload,
|
|
2129
|
+
'server',
|
|
2130
|
+
false
|
|
2131
|
+
);
|
|
2132
|
+
},
|
|
2133
|
+
[appendDebugEntry]
|
|
2134
|
+
);
|
|
2135
|
+
|
|
2136
|
+
const recordRealtimeSessionAnalytics = useCallback(
|
|
2137
|
+
(payload: Record<string, unknown>) => {
|
|
2138
|
+
const analytics = sessionAnalyticsRef.current;
|
|
2139
|
+
if (!analytics || analytics.finalized) {
|
|
2140
|
+
return;
|
|
2141
|
+
}
|
|
2142
|
+
|
|
2143
|
+
const eventType = getRealtimeEventType(payload);
|
|
2144
|
+
const timestamp = new Date().toISOString();
|
|
2145
|
+
|
|
2146
|
+
if (
|
|
2147
|
+
eventType === 'conversation.item.input_audio_transcription.completed'
|
|
2148
|
+
) {
|
|
2149
|
+
const text = getTranscriptText(payload.transcript);
|
|
2150
|
+
if (text) {
|
|
2151
|
+
analytics.transcript.push({
|
|
2152
|
+
role: 'user',
|
|
2153
|
+
text,
|
|
2154
|
+
timestamp,
|
|
2155
|
+
itemId: getStringValue(payload.item_id),
|
|
2156
|
+
});
|
|
2157
|
+
}
|
|
2158
|
+
}
|
|
2159
|
+
|
|
2160
|
+
if (
|
|
2161
|
+
eventType === 'response.audio_transcript.delta' ||
|
|
2162
|
+
eventType === 'response.output_audio_transcript.delta'
|
|
2163
|
+
) {
|
|
2164
|
+
const key = getTranscriptKey(payload);
|
|
2165
|
+
const delta = getTranscriptDelta(payload.delta);
|
|
2166
|
+
|
|
2167
|
+
if (key && delta) {
|
|
2168
|
+
analytics.responseTranscriptDeltas.set(
|
|
2169
|
+
key,
|
|
2170
|
+
`${analytics.responseTranscriptDeltas.get(key) ?? ''}${delta}`
|
|
2171
|
+
);
|
|
2172
|
+
}
|
|
2173
|
+
}
|
|
2174
|
+
|
|
2175
|
+
if (
|
|
2176
|
+
eventType === 'response.audio_transcript.done' ||
|
|
2177
|
+
eventType === 'response.output_audio_transcript.done'
|
|
2178
|
+
) {
|
|
2179
|
+
const key = getTranscriptKey(payload);
|
|
2180
|
+
const transcript =
|
|
2181
|
+
getTranscriptText(payload.transcript) ??
|
|
2182
|
+
(key
|
|
2183
|
+
? getTranscriptText(analytics.responseTranscriptDeltas.get(key))
|
|
2184
|
+
: undefined);
|
|
2185
|
+
|
|
2186
|
+
if (transcript) {
|
|
2187
|
+
const responseId = getStringValue(payload.response_id);
|
|
2188
|
+
|
|
2189
|
+
if (
|
|
2190
|
+
!responseId ||
|
|
2191
|
+
!analytics.transcript.some(
|
|
2192
|
+
(message) =>
|
|
2193
|
+
message.role === 'assistant' &&
|
|
2194
|
+
message.responseId === responseId
|
|
2195
|
+
)
|
|
2196
|
+
) {
|
|
2197
|
+
analytics.transcript.push({
|
|
2198
|
+
role: 'assistant',
|
|
2199
|
+
text: transcript,
|
|
2200
|
+
timestamp,
|
|
2201
|
+
itemId: getStringValue(payload.item_id),
|
|
2202
|
+
responseId,
|
|
2203
|
+
});
|
|
2204
|
+
}
|
|
2205
|
+
}
|
|
2206
|
+
|
|
2207
|
+
if (key) {
|
|
2208
|
+
analytics.responseTranscriptDeltas.delete(key);
|
|
2209
|
+
}
|
|
2210
|
+
}
|
|
2211
|
+
|
|
2212
|
+
if (eventType === 'response.done') {
|
|
2213
|
+
const response = getRecord(payload.response);
|
|
2214
|
+
const usage = getRecord(response?.usage);
|
|
2215
|
+
const responseId = getStringValue(response?.id);
|
|
2216
|
+
|
|
2217
|
+
if (usage) {
|
|
2218
|
+
analytics.usage = mergeTokenUsage(analytics.usage, usage);
|
|
2219
|
+
}
|
|
2220
|
+
|
|
2221
|
+
if (
|
|
2222
|
+
response &&
|
|
2223
|
+
responseId &&
|
|
2224
|
+
!analytics.transcript.some(
|
|
2225
|
+
(message) =>
|
|
2226
|
+
message.role === 'assistant' && message.responseId === responseId
|
|
2227
|
+
)
|
|
2228
|
+
) {
|
|
2229
|
+
const transcript = getAssistantTranscriptFromResponse(response);
|
|
2230
|
+
|
|
2231
|
+
if (transcript) {
|
|
2232
|
+
analytics.transcript.push({
|
|
2233
|
+
role: 'assistant',
|
|
2234
|
+
text: transcript,
|
|
2235
|
+
timestamp,
|
|
2236
|
+
responseId,
|
|
2237
|
+
});
|
|
2238
|
+
}
|
|
2239
|
+
}
|
|
2240
|
+
}
|
|
2241
|
+
},
|
|
2242
|
+
[]
|
|
2243
|
+
);
|
|
2244
|
+
|
|
2245
|
+
const finalizeRealtimeSessionAnalytics = useCallback(() => {
|
|
2246
|
+
const analytics = sessionAnalyticsRef.current;
|
|
2247
|
+
|
|
2248
|
+
if (!analytics || analytics.finalized) {
|
|
2249
|
+
return;
|
|
2250
|
+
}
|
|
2251
|
+
|
|
2252
|
+
analytics.finalized = true;
|
|
2253
|
+
const model = runtimeRef.current.sessionConfig?.model ?? null;
|
|
2254
|
+
const sessionEvent: SupportAssistantSessionEvent = {
|
|
2255
|
+
openAiSessionId: openaiTraceRef.current.sessionId,
|
|
2256
|
+
model,
|
|
2257
|
+
transcript: analytics.transcript,
|
|
2258
|
+
summary: buildSessionSummary(analytics.transcript),
|
|
2259
|
+
cost: calculateRealtimeSessionCost(model, analytics.usage),
|
|
2260
|
+
durationMs: Date.now() - analytics.startedAt,
|
|
2261
|
+
framesSent: sentFramesRef.current,
|
|
2262
|
+
currentPath: analytics.payload.currentPath,
|
|
2263
|
+
currentTitle: analytics.payload.currentTitle,
|
|
2264
|
+
language: analytics.payload.language,
|
|
2265
|
+
};
|
|
2266
|
+
|
|
2267
|
+
onEventRef.current?.({
|
|
2268
|
+
type: 'session_completed',
|
|
2269
|
+
session: sessionEvent,
|
|
2270
|
+
});
|
|
2271
|
+
void postSupportSessionEvent(
|
|
2272
|
+
effectiveApiBaseUrl,
|
|
2273
|
+
getCallRequestHeaders,
|
|
2274
|
+
sessionEvent
|
|
2275
|
+
);
|
|
2276
|
+
}, [effectiveApiBaseUrl, getCallRequestHeaders]);
|
|
2277
|
+
|
|
2278
|
+
const enableAutomaticVoiceResponses = useCallback(() => {
|
|
2279
|
+
const runtime = runtimeRef.current;
|
|
2280
|
+
if (runtime.autoResponseEnabled) {
|
|
2281
|
+
return;
|
|
2282
|
+
}
|
|
2283
|
+
|
|
2284
|
+
sendRealtimeEvent({
|
|
2285
|
+
type: 'session.update',
|
|
2286
|
+
session: {
|
|
2287
|
+
type: 'realtime',
|
|
2288
|
+
max_output_tokens: realtimeMaxOutputTokens,
|
|
2289
|
+
output_modalities: getRealtimeOutputModalities(responseModeRef.current),
|
|
2290
|
+
audio: {
|
|
2291
|
+
input: {
|
|
2292
|
+
noise_reduction: {
|
|
2293
|
+
type: 'far_field',
|
|
2294
|
+
},
|
|
2295
|
+
turn_detection: {
|
|
2296
|
+
type: 'server_vad',
|
|
2297
|
+
interrupt_response: true,
|
|
2298
|
+
create_response: true,
|
|
2299
|
+
threshold: 0.5,
|
|
2300
|
+
prefix_padding_ms: 300,
|
|
2301
|
+
silence_duration_ms: 500,
|
|
2302
|
+
},
|
|
2303
|
+
},
|
|
2304
|
+
},
|
|
2305
|
+
},
|
|
2306
|
+
});
|
|
2307
|
+
runtime.autoResponseEnabled = true;
|
|
2308
|
+
runtime.waitingToEnableAutoResponses = false;
|
|
2309
|
+
appendDebugEntry(
|
|
2310
|
+
'auto_vad_enabled',
|
|
2311
|
+
`server_vad | output=${responseModeRef.current} | noise=far_field | interrupt=true | threshold=0.5 | prefix=300ms | silence=500ms`
|
|
2312
|
+
);
|
|
2313
|
+
setStatus(getReadyStatusMessage(), 'listening');
|
|
2314
|
+
}, [appendDebugEntry, getReadyStatusMessage, sendRealtimeEvent, setStatus]);
|
|
2315
|
+
|
|
2316
|
+
const scheduleAutoResponseEnableFallback = useCallback(() => {
|
|
2317
|
+
window.setTimeout(() => {
|
|
2318
|
+
const runtime = runtimeRef.current;
|
|
2319
|
+
if (
|
|
2320
|
+
runtime.waitingToEnableAutoResponses &&
|
|
2321
|
+
!runtime.autoResponseEnabled
|
|
2322
|
+
) {
|
|
2323
|
+
appendDebugEntry('auto_vad_fallback', 'no output_stopped event');
|
|
2324
|
+
enableAutomaticVoiceResponses();
|
|
2325
|
+
}
|
|
2326
|
+
}, 1_800);
|
|
2327
|
+
}, [appendDebugEntry, enableAutomaticVoiceResponses]);
|
|
2328
|
+
|
|
2329
|
+
const requestAgentAcknowledgement = useCallback(
|
|
2330
|
+
(
|
|
2331
|
+
text: string,
|
|
2332
|
+
instructions?: string,
|
|
2333
|
+
options: { force?: boolean } = {}
|
|
2334
|
+
) => {
|
|
2335
|
+
const languageInstruction = buildLanguageInstruction(selectedLanguage);
|
|
2336
|
+
const runtime = runtimeRef.current;
|
|
2337
|
+
|
|
2338
|
+
if (
|
|
2339
|
+
!options.force &&
|
|
2340
|
+
(runtime.clientResponseInFlight ||
|
|
2341
|
+
hasActiveAssistantTurn(realtimeActivityRef.current))
|
|
2342
|
+
) {
|
|
2343
|
+
runtime.pendingAcknowledgement = { text, instructions };
|
|
2344
|
+
appendDebugEntry(
|
|
2345
|
+
'acknowledgement_deferred',
|
|
2346
|
+
runtime.clientResponseInFlight
|
|
2347
|
+
? 'pending response.create'
|
|
2348
|
+
: 'active realtime turn'
|
|
2349
|
+
);
|
|
2350
|
+
return;
|
|
2351
|
+
}
|
|
2352
|
+
|
|
2353
|
+
sendRealtimeEvent({
|
|
2354
|
+
type: 'conversation.item.create',
|
|
2355
|
+
item: {
|
|
2356
|
+
type: 'message',
|
|
2357
|
+
role: 'user',
|
|
2358
|
+
content: [
|
|
2359
|
+
{
|
|
2360
|
+
type: 'input_text',
|
|
2361
|
+
text,
|
|
2362
|
+
},
|
|
2363
|
+
],
|
|
2364
|
+
},
|
|
2365
|
+
});
|
|
2366
|
+
sendRealtimeEvent({
|
|
2367
|
+
type: 'response.create',
|
|
2368
|
+
response: getCurrentResponseOptions(
|
|
2369
|
+
`${languageInstruction} ` +
|
|
2370
|
+
(instructions ??
|
|
2371
|
+
'Acknowledge this screen-share status change in one short sentence.')
|
|
2372
|
+
),
|
|
2373
|
+
});
|
|
2374
|
+
},
|
|
2375
|
+
[
|
|
2376
|
+
appendDebugEntry,
|
|
2377
|
+
getCurrentResponseOptions,
|
|
2378
|
+
selectedLanguage,
|
|
2379
|
+
sendRealtimeEvent,
|
|
2380
|
+
]
|
|
2381
|
+
);
|
|
2382
|
+
|
|
2383
|
+
const flushPendingAgentAcknowledgement = useCallback(() => {
|
|
2384
|
+
const runtime = runtimeRef.current;
|
|
2385
|
+
const pendingAcknowledgement = runtime.pendingAcknowledgement;
|
|
2386
|
+
|
|
2387
|
+
if (
|
|
2388
|
+
!pendingAcknowledgement ||
|
|
2389
|
+
runtime.clientResponseInFlight ||
|
|
2390
|
+
hasActiveAssistantTurn(realtimeActivityRef.current)
|
|
2391
|
+
) {
|
|
2392
|
+
return false;
|
|
2393
|
+
}
|
|
2394
|
+
|
|
2395
|
+
runtime.pendingAcknowledgement = null;
|
|
2396
|
+
requestAgentAcknowledgement(
|
|
2397
|
+
pendingAcknowledgement.text,
|
|
2398
|
+
pendingAcknowledgement.instructions,
|
|
2399
|
+
{ force: true }
|
|
2400
|
+
);
|
|
2401
|
+
|
|
2402
|
+
return true;
|
|
2403
|
+
}, [requestAgentAcknowledgement]);
|
|
2404
|
+
|
|
2405
|
+
const schedulePendingAgentAcknowledgementFlush = useCallback(() => {
|
|
2406
|
+
window.setTimeout(() => {
|
|
2407
|
+
flushPendingAgentAcknowledgement();
|
|
2408
|
+
}, 0);
|
|
2409
|
+
}, [flushPendingAgentAcknowledgement]);
|
|
2410
|
+
|
|
2411
|
+
const schedulePendingScreenPrompt = useCallback(() => {
|
|
2412
|
+
window.setTimeout(() => {
|
|
2413
|
+
if (!flushPendingAgentAcknowledgement()) {
|
|
2414
|
+
requestNextStepAfterScreenChangeRef.current?.();
|
|
2415
|
+
}
|
|
2416
|
+
}, 0);
|
|
2417
|
+
}, [flushPendingAgentAcknowledgement]);
|
|
2418
|
+
|
|
2419
|
+
const requestInitialAgentResponse = useCallback(
|
|
2420
|
+
(payload: SupportAssistantSessionContext) => {
|
|
2421
|
+
const languageInstruction = buildLanguageInstruction(selectedLanguage);
|
|
2422
|
+
const greetingName = getPreferredGreetingName(payload);
|
|
2423
|
+
const initialGreeting = greetingName
|
|
2424
|
+
? `Hi ${greetingName}, I'm ChatVRT. Please share your screen if you want UI guidance; if not, I can still guide you by voice. Are you here for first-time onboarding, ongoing support, or monthly close?`
|
|
2425
|
+
: "Hi, I'm ChatVRT. Please share your screen if you want UI guidance; if not, I can still guide you by voice. Are you here for first-time onboarding, ongoing support, or monthly close?";
|
|
2426
|
+
const initialGreetingInstruction = `Start with this exact ChatVRT screen-share triage greeting and no extra preamble: "${initialGreeting}" `;
|
|
2427
|
+
|
|
2428
|
+
runtimeRef.current.waitingForInitialResponse = true;
|
|
2429
|
+
sendRealtimeEvent({
|
|
2430
|
+
type: 'conversation.item.create',
|
|
2431
|
+
item: {
|
|
2432
|
+
type: 'message',
|
|
2433
|
+
role: 'user',
|
|
2434
|
+
content: [
|
|
2435
|
+
{
|
|
2436
|
+
type: 'input_text',
|
|
2437
|
+
text:
|
|
2438
|
+
`${languageInstruction} ` +
|
|
2439
|
+
initialGreetingInstruction +
|
|
2440
|
+
' Do not say that the share dialog is already open.',
|
|
2441
|
+
},
|
|
2442
|
+
],
|
|
2443
|
+
},
|
|
2444
|
+
});
|
|
2445
|
+
sendRealtimeEvent({
|
|
2446
|
+
type: 'response.create',
|
|
2447
|
+
response: getCurrentResponseOptions(
|
|
2448
|
+
`${languageInstruction} ${initialGreetingInstruction}` +
|
|
2449
|
+
'Do not say that the share dialog is already open.'
|
|
2450
|
+
),
|
|
2451
|
+
});
|
|
2452
|
+
setStatus(
|
|
2453
|
+
responseModeRef.current === 'text' ? 'Thinking' : 'Speaking',
|
|
2454
|
+
responseModeRef.current === 'text' ? 'thinking' : 'speaking'
|
|
2455
|
+
);
|
|
2456
|
+
},
|
|
2457
|
+
[getCurrentResponseOptions, selectedLanguage, sendRealtimeEvent, setStatus]
|
|
2458
|
+
);
|
|
2459
|
+
|
|
2460
|
+
const searchSupportMemory = useCallback(
|
|
2461
|
+
async (rawArguments: string) => {
|
|
2462
|
+
const { query, limit } = parseSearchMemoryToolArguments(rawArguments);
|
|
2463
|
+
if (!query.trim()) {
|
|
2464
|
+
return {
|
|
2465
|
+
ok: false,
|
|
2466
|
+
error: 'query is required',
|
|
2467
|
+
matches: [],
|
|
2468
|
+
sourceLinks: [],
|
|
2469
|
+
};
|
|
2470
|
+
}
|
|
2471
|
+
|
|
2472
|
+
const response = await fetch(
|
|
2473
|
+
joinUrl(effectiveApiBaseUrl, '/api/memory/retrieve'),
|
|
2474
|
+
{
|
|
2475
|
+
method: 'POST',
|
|
2476
|
+
headers: await createCallRequestHeaders(getCallRequestHeaders),
|
|
2477
|
+
body: JSON.stringify({
|
|
2478
|
+
query,
|
|
2479
|
+
limit: limit ?? 6,
|
|
2480
|
+
sessionId: openaiTraceRef.current.sessionId ?? undefined,
|
|
2481
|
+
}),
|
|
2482
|
+
}
|
|
2483
|
+
);
|
|
2484
|
+
const body = (await response.json()) as unknown;
|
|
2485
|
+
|
|
2486
|
+
if (!response.ok) {
|
|
2487
|
+
const record = getRecord(body);
|
|
2488
|
+
throw new Error(
|
|
2489
|
+
getStringValue(record?.error) ??
|
|
2490
|
+
getStringValue(record?.message) ??
|
|
2491
|
+
'Memory retrieval failed'
|
|
2492
|
+
);
|
|
2493
|
+
}
|
|
2494
|
+
|
|
2495
|
+
const record = getRecord(body);
|
|
2496
|
+
const links = Array.isArray(record?.sourceLinks)
|
|
2497
|
+
? record.sourceLinks
|
|
2498
|
+
.map((item): SupportSourceLink | null => {
|
|
2499
|
+
const link = getRecord(item);
|
|
2500
|
+
const articleId = getStringValue(link?.articleId);
|
|
2501
|
+
const title = getStringValue(link?.title);
|
|
2502
|
+
const url = getStringValue(link?.url);
|
|
2503
|
+
const relevance = getNumberValue(link?.relevance);
|
|
2504
|
+
|
|
2505
|
+
if (!articleId || !title || !url) {
|
|
2506
|
+
return null;
|
|
2507
|
+
}
|
|
2508
|
+
|
|
2509
|
+
return {
|
|
2510
|
+
articleId,
|
|
2511
|
+
title,
|
|
2512
|
+
url,
|
|
2513
|
+
relevance: relevance ?? 0,
|
|
2514
|
+
};
|
|
2515
|
+
})
|
|
2516
|
+
.filter((link): link is SupportSourceLink => Boolean(link))
|
|
2517
|
+
: [];
|
|
2518
|
+
|
|
2519
|
+
setSourceLinks(links);
|
|
2520
|
+
setSourcesOpen(false);
|
|
2521
|
+
|
|
2522
|
+
return body;
|
|
2523
|
+
},
|
|
2524
|
+
[effectiveApiBaseUrl, getCallRequestHeaders]
|
|
2525
|
+
);
|
|
2526
|
+
|
|
2527
|
+
const handleRealtimeToolCalls = useCallback(
|
|
2528
|
+
(payload: Record<string, unknown>) => {
|
|
2529
|
+
let handledCall = false;
|
|
2530
|
+
let handledFill = false;
|
|
2531
|
+
let handledMemory = false;
|
|
2532
|
+
|
|
2533
|
+
for (const call of getRealtimeFunctionCalls(payload)) {
|
|
2534
|
+
if (handledToolCallsRef.current.has(call.callId)) {
|
|
2535
|
+
continue;
|
|
2536
|
+
}
|
|
2537
|
+
|
|
2538
|
+
if (
|
|
2539
|
+
call.name !== highlightUiTargetTool.name &&
|
|
2540
|
+
call.name !== fillUiFieldTool.name &&
|
|
2541
|
+
call.name !== searchSupportMemoryTool.name
|
|
2542
|
+
) {
|
|
2543
|
+
continue;
|
|
2544
|
+
}
|
|
2545
|
+
|
|
2546
|
+
handledToolCallsRef.current.add(call.callId);
|
|
2547
|
+
|
|
2548
|
+
if (call.name === searchSupportMemoryTool.name) {
|
|
2549
|
+
void searchSupportMemory(call.arguments)
|
|
2550
|
+
.then((result) => {
|
|
2551
|
+
sendRealtimeEvent({
|
|
2552
|
+
type: 'conversation.item.create',
|
|
2553
|
+
item: {
|
|
2554
|
+
type: 'function_call_output',
|
|
2555
|
+
call_id: call.callId,
|
|
2556
|
+
output: JSON.stringify(result),
|
|
2557
|
+
},
|
|
2558
|
+
});
|
|
2559
|
+
sendRealtimeEvent({
|
|
2560
|
+
type: 'response.create',
|
|
2561
|
+
response: {
|
|
2562
|
+
instructions:
|
|
2563
|
+
`${buildLanguageInstruction(selectedLanguage)} ` +
|
|
2564
|
+
'Answer using the memory search result. Keep the spoken answer concise and do not read source URLs aloud; the UI will show the help article link separately.',
|
|
2565
|
+
},
|
|
2566
|
+
});
|
|
2567
|
+
})
|
|
2568
|
+
.catch((error) => {
|
|
2569
|
+
const message = getMessage(error, 'Memory retrieval failed');
|
|
2570
|
+
sendRealtimeEvent({
|
|
2571
|
+
type: 'conversation.item.create',
|
|
2572
|
+
item: {
|
|
2573
|
+
type: 'function_call_output',
|
|
2574
|
+
call_id: call.callId,
|
|
2575
|
+
output: JSON.stringify({
|
|
2576
|
+
ok: false,
|
|
2577
|
+
error: message,
|
|
2578
|
+
matches: [],
|
|
2579
|
+
sourceLinks: [],
|
|
2580
|
+
}),
|
|
2581
|
+
},
|
|
2582
|
+
});
|
|
2583
|
+
sendRealtimeEvent({
|
|
2584
|
+
type: 'response.create',
|
|
2585
|
+
response: getCurrentResponseOptions(
|
|
2586
|
+
`${buildLanguageInstruction(selectedLanguage)} ` +
|
|
2587
|
+
'Memory search failed. Answer from visible screen context only, say when documentation could not be checked, and keep it concise.'
|
|
2588
|
+
),
|
|
2589
|
+
});
|
|
2590
|
+
});
|
|
2591
|
+
handledCall = true;
|
|
2592
|
+
handledMemory = true;
|
|
2593
|
+
continue;
|
|
2594
|
+
}
|
|
2595
|
+
|
|
2596
|
+
try {
|
|
2597
|
+
const result =
|
|
2598
|
+
call.name === fillUiFieldTool.name
|
|
2599
|
+
? fillField(parseFillFieldToolArguments(call.arguments))
|
|
2600
|
+
: highlightTarget(parseHighlightToolArguments(call.arguments));
|
|
2601
|
+
|
|
2602
|
+
sendRealtimeEvent({
|
|
2603
|
+
type: 'conversation.item.create',
|
|
2604
|
+
item: {
|
|
2605
|
+
type: 'function_call_output',
|
|
2606
|
+
call_id: call.callId,
|
|
2607
|
+
output: JSON.stringify(result),
|
|
2608
|
+
},
|
|
2609
|
+
});
|
|
2610
|
+
handledCall = true;
|
|
2611
|
+
handledFill ||= call.name === fillUiFieldTool.name;
|
|
2612
|
+
} catch (error) {
|
|
2613
|
+
const message = getMessage(error, 'ChatVRT tool call failed');
|
|
2614
|
+
setErrorMessage(message);
|
|
2615
|
+
setStatus(message, 'error');
|
|
2616
|
+
}
|
|
2617
|
+
}
|
|
2618
|
+
|
|
2619
|
+
if (handledCall) {
|
|
2620
|
+
if (handledMemory) {
|
|
2621
|
+
return;
|
|
2622
|
+
}
|
|
2623
|
+
|
|
2624
|
+
sendRealtimeEvent({
|
|
2625
|
+
type: 'response.create',
|
|
2626
|
+
response: getCurrentResponseOptions(
|
|
2627
|
+
`${buildLanguageInstruction(selectedLanguage)} ` +
|
|
2628
|
+
(handledFill
|
|
2629
|
+
? 'If the field update succeeded, briefly tell the user what you filled and remind them to review before saving or submitting. If it failed, ask the user to describe the field or make it visible.'
|
|
2630
|
+
: 'If the highlight succeeded, briefly tell the user what you highlighted. If it failed, ask the user to describe the target or make it visible.')
|
|
2631
|
+
),
|
|
2632
|
+
});
|
|
2633
|
+
}
|
|
2634
|
+
},
|
|
2635
|
+
[
|
|
2636
|
+
fillField,
|
|
2637
|
+
getCurrentResponseOptions,
|
|
2638
|
+
highlightTarget,
|
|
2639
|
+
searchSupportMemory,
|
|
2640
|
+
selectedLanguage,
|
|
2641
|
+
sendRealtimeEvent,
|
|
2642
|
+
setStatus,
|
|
2643
|
+
]
|
|
2644
|
+
);
|
|
2645
|
+
|
|
2646
|
+
const stopScreenShare = useCallback(
|
|
2647
|
+
(options: { acknowledge?: boolean } = {}) => {
|
|
2648
|
+
const runtime = runtimeRef.current;
|
|
2649
|
+
|
|
2650
|
+
if (runtime.screenTimer) {
|
|
2651
|
+
window.clearTimeout(runtime.screenTimer);
|
|
2652
|
+
runtime.screenTimer = null;
|
|
2653
|
+
}
|
|
2654
|
+
|
|
2655
|
+
if (runtime.visualWatchTimer) {
|
|
2656
|
+
window.clearInterval(runtime.visualWatchTimer);
|
|
2657
|
+
runtime.visualWatchTimer = null;
|
|
2658
|
+
}
|
|
2659
|
+
runtime.lastVisualSignature = null;
|
|
2660
|
+
runtime.visualChangeActive = false;
|
|
2661
|
+
runtime.lastVisualChangeAt = 0;
|
|
2662
|
+
runtime.pendingProactiveScreenPrompt = false;
|
|
2663
|
+
runtime.pendingScreenChangeCorrection = false;
|
|
2664
|
+
runtime.proactiveResponseInFlight = false;
|
|
2665
|
+
runtime.waitingForFirstScreenAcknowledgement = false;
|
|
2666
|
+
runtime.suppressScreenChangePromptsUntil = 0;
|
|
2667
|
+
|
|
2668
|
+
if (!runtime.screenStream) {
|
|
2669
|
+
setScreenVisible(false);
|
|
2670
|
+
screenVisibleRef.current = false;
|
|
2671
|
+
return;
|
|
2672
|
+
}
|
|
2673
|
+
|
|
2674
|
+
const screenStream = runtime.screenStream;
|
|
2675
|
+
runtime.screenStream = null;
|
|
2676
|
+
|
|
2677
|
+
for (const track of screenStream.getTracks()) {
|
|
2678
|
+
track.stop();
|
|
2679
|
+
}
|
|
2680
|
+
|
|
2681
|
+
if (screenPreviewRef.current) {
|
|
2682
|
+
screenPreviewRef.current.srcObject = null;
|
|
2683
|
+
}
|
|
2684
|
+
|
|
2685
|
+
setScreenVisible(false);
|
|
2686
|
+
screenVisibleRef.current = false;
|
|
2687
|
+
onEventRef.current?.({ type: 'screen', shared: false });
|
|
2688
|
+
setStatus(
|
|
2689
|
+
isConnectedRef.current ? 'Screen share needed' : 'Ready',
|
|
2690
|
+
'listening'
|
|
2691
|
+
);
|
|
2692
|
+
|
|
2693
|
+
if (
|
|
2694
|
+
options.acknowledge !== false &&
|
|
2695
|
+
runtime.dataChannel?.readyState === 'open'
|
|
2696
|
+
) {
|
|
2697
|
+
requestAgentAcknowledgement(
|
|
2698
|
+
'The user has stopped sharing their screen. Acknowledge that screen ' +
|
|
2699
|
+
'sharing is off and explain that you can continue by voice.'
|
|
2700
|
+
);
|
|
2701
|
+
}
|
|
2702
|
+
},
|
|
2703
|
+
[requestAgentAcknowledgement, setStatus]
|
|
2704
|
+
);
|
|
2705
|
+
|
|
2706
|
+
const cleanupSession = useCallback(
|
|
2707
|
+
(resetStatus = true, reason = resetStatus ? 'manual' : 'cleanup') => {
|
|
2708
|
+
const runtime = runtimeRef.current;
|
|
2709
|
+
clearSpeakingFallback();
|
|
2710
|
+
finalizeRealtimeSessionAnalytics();
|
|
2711
|
+
appendDebugEntry('session_cleanup', reason);
|
|
2712
|
+
|
|
2713
|
+
if (runtime.screenTimer) {
|
|
2714
|
+
window.clearTimeout(runtime.screenTimer);
|
|
2715
|
+
}
|
|
2716
|
+
|
|
2717
|
+
if (runtime.visualWatchTimer) {
|
|
2718
|
+
window.clearInterval(runtime.visualWatchTimer);
|
|
2719
|
+
}
|
|
2720
|
+
|
|
2721
|
+
for (const track of runtime.screenStream?.getTracks() ?? []) {
|
|
2722
|
+
track.stop();
|
|
2723
|
+
}
|
|
2724
|
+
|
|
2725
|
+
for (const track of runtime.microphoneStream?.getTracks() ?? []) {
|
|
2726
|
+
track.stop();
|
|
2727
|
+
}
|
|
2728
|
+
|
|
2729
|
+
runtime.dataChannel?.close();
|
|
2730
|
+
runtime.peerConnection?.close();
|
|
2731
|
+
runtimeRef.current = createRuntimeSession();
|
|
2732
|
+
realtimeActivityRef.current = createRealtimeActivityState();
|
|
2733
|
+
handledToolCallsRef.current = new Set();
|
|
2734
|
+
|
|
2735
|
+
if (remoteAudioRef.current) {
|
|
2736
|
+
remoteAudioRef.current.srcObject = null;
|
|
2737
|
+
}
|
|
2738
|
+
|
|
2739
|
+
if (screenPreviewRef.current) {
|
|
2740
|
+
screenPreviewRef.current.srcObject = null;
|
|
2741
|
+
}
|
|
2742
|
+
|
|
2743
|
+
setIsConnecting(false);
|
|
2744
|
+
setIsConnected(false);
|
|
2745
|
+
setScreenVisible(false);
|
|
2746
|
+
isConnectedRef.current = false;
|
|
2747
|
+
screenVisibleRef.current = false;
|
|
2748
|
+
setMuted(false);
|
|
2749
|
+
sentFramesRef.current = 0;
|
|
2750
|
+
|
|
2751
|
+
if (resetStatus) {
|
|
2752
|
+
setErrorMessage(null);
|
|
2753
|
+
setStatus('Ready', 'idle');
|
|
2754
|
+
}
|
|
2755
|
+
},
|
|
2756
|
+
[
|
|
2757
|
+
appendDebugEntry,
|
|
2758
|
+
clearSpeakingFallback,
|
|
2759
|
+
finalizeRealtimeSessionAnalytics,
|
|
2760
|
+
setStatus,
|
|
2761
|
+
]
|
|
2762
|
+
);
|
|
2763
|
+
|
|
2764
|
+
useEffect(() => {
|
|
2765
|
+
return () => cleanupSession(false, 'component_unmount');
|
|
2766
|
+
}, [cleanupSession]);
|
|
2767
|
+
|
|
2768
|
+
const buildSupportSessionPayload = useCallback(
|
|
2769
|
+
(language = selectedLanguage.label) => {
|
|
2770
|
+
const providedContext =
|
|
2771
|
+
typeof sessionContext === 'function'
|
|
2772
|
+
? sessionContext()
|
|
2773
|
+
: (sessionContext ?? {});
|
|
2774
|
+
const providedRecord =
|
|
2775
|
+
providedContext as SupportAssistantSessionContext & {
|
|
2776
|
+
intent?: unknown;
|
|
2777
|
+
};
|
|
2778
|
+
const {
|
|
2779
|
+
intent: _intent,
|
|
2780
|
+
userRole: contextUserRole,
|
|
2781
|
+
...contextWithoutIntent
|
|
2782
|
+
} = providedRecord;
|
|
2783
|
+
|
|
2784
|
+
return {
|
|
2785
|
+
goal: defaultGoal,
|
|
2786
|
+
currentPath:
|
|
2787
|
+
typeof window === 'undefined' ? undefined : window.location.pathname,
|
|
2788
|
+
currentTitle:
|
|
2789
|
+
typeof document === 'undefined' ? undefined : document.title,
|
|
2790
|
+
workspaceName: 'VRPlatform',
|
|
2791
|
+
...contextWithoutIntent,
|
|
2792
|
+
userRole: userRole ?? contextUserRole,
|
|
2793
|
+
language,
|
|
2794
|
+
};
|
|
2795
|
+
},
|
|
2796
|
+
[defaultGoal, selectedLanguage.label, sessionContext, userRole]
|
|
2797
|
+
);
|
|
2798
|
+
|
|
2799
|
+
const createRealtimeCall = useCallback(
|
|
2800
|
+
async (payload: SupportAssistantSessionContext, sdpOffer: string) => {
|
|
2801
|
+
const response = await fetch(joinUrl(effectiveApiBaseUrl, callEndpoint), {
|
|
2802
|
+
method: 'POST',
|
|
2803
|
+
headers: await createCallRequestHeaders(getCallRequestHeaders),
|
|
2804
|
+
body: JSON.stringify({
|
|
2805
|
+
...payload,
|
|
2806
|
+
sdpOffer,
|
|
2807
|
+
}),
|
|
2808
|
+
});
|
|
2809
|
+
|
|
2810
|
+
const body = (await response.json()) as unknown;
|
|
2811
|
+
if (!response.ok) {
|
|
2812
|
+
const responseBody = getRecord(body);
|
|
2813
|
+
const errorMessage =
|
|
2814
|
+
getStringValue(responseBody?.error) ??
|
|
2815
|
+
getStringValue(responseBody?.message);
|
|
2816
|
+
|
|
2817
|
+
throw new Error(errorMessage ?? 'Failed to create support session');
|
|
2818
|
+
}
|
|
2819
|
+
|
|
2820
|
+
return body as SupportSessionRealtimeCall;
|
|
2821
|
+
},
|
|
2822
|
+
[effectiveApiBaseUrl, callEndpoint, getCallRequestHeaders]
|
|
2823
|
+
);
|
|
2824
|
+
|
|
2825
|
+
const handleRealtimeMessage = useCallback(
|
|
2826
|
+
(payload: Record<string, unknown>) => {
|
|
2827
|
+
onEventRef.current?.({ type: 'realtime', payload });
|
|
2828
|
+
recordRealtimeSessionAnalytics(payload);
|
|
2829
|
+
const eventType = typeof payload.type === 'string' ? payload.type : '';
|
|
2830
|
+
appendRawDebugEntry(
|
|
2831
|
+
'server',
|
|
2832
|
+
eventType || 'unknown',
|
|
2833
|
+
getDebugValue(payload.response_id) ??
|
|
2834
|
+
getDebugValue(getRecord(payload.response)?.id) ??
|
|
2835
|
+
'-',
|
|
2836
|
+
payload
|
|
2837
|
+
);
|
|
2838
|
+
if (eventType === 'session.created' || eventType === 'session.updated') {
|
|
2839
|
+
const session = getRecord(payload.session);
|
|
2840
|
+
const tracing = getRecord(session?.tracing);
|
|
2841
|
+
const traceContext: RealtimeTraceDebugContext = {
|
|
2842
|
+
workflowName: getDebugValue(tracing?.workflow_name) ?? null,
|
|
2843
|
+
groupId: getDebugValue(tracing?.group_id) ?? null,
|
|
2844
|
+
sessionId: getDebugValue(session?.id) ?? null,
|
|
2845
|
+
};
|
|
2846
|
+
openaiTraceRef.current = traceContext;
|
|
2847
|
+
|
|
2848
|
+
if (eventType === 'session.created') {
|
|
2849
|
+
appendDebugEntry(
|
|
2850
|
+
'trace_session',
|
|
2851
|
+
[
|
|
2852
|
+
traceContext.sessionId
|
|
2853
|
+
? `session_id=${traceContext.sessionId}`
|
|
2854
|
+
: null,
|
|
2855
|
+
traceContext.workflowName
|
|
2856
|
+
? `workflow=${traceContext.workflowName}`
|
|
2857
|
+
: null,
|
|
2858
|
+
traceContext.groupId ? `group_id=${traceContext.groupId}` : null,
|
|
2859
|
+
]
|
|
2860
|
+
.filter(Boolean)
|
|
2861
|
+
.join(' | ') || '-',
|
|
2862
|
+
payload,
|
|
2863
|
+
'server',
|
|
2864
|
+
false
|
|
2865
|
+
);
|
|
2866
|
+
}
|
|
2867
|
+
}
|
|
2868
|
+
const assistantWasActive =
|
|
2869
|
+
realtimeActivityRef.current.assistantAudioActive ||
|
|
2870
|
+
realtimeActivityRef.current.responseInProgress;
|
|
2871
|
+
appendRealtimeDebugEvent(payload, assistantWasActive);
|
|
2872
|
+
handleAssistantTranscriptEvent(payload, eventType);
|
|
2873
|
+
|
|
2874
|
+
if (eventType === 'error') {
|
|
2875
|
+
const error = payload.error as { message?: string } | undefined;
|
|
2876
|
+
const message = error?.message ?? 'Realtime error';
|
|
2877
|
+
setErrorMessage(message);
|
|
2878
|
+
setStatus(message, 'error');
|
|
2879
|
+
return;
|
|
2880
|
+
}
|
|
2881
|
+
|
|
2882
|
+
const transition = reduceRealtimeActivity(
|
|
2883
|
+
realtimeActivityRef.current,
|
|
2884
|
+
eventType
|
|
2885
|
+
);
|
|
2886
|
+
realtimeActivityRef.current = transition.state;
|
|
2887
|
+
|
|
2888
|
+
if (transition.status) {
|
|
2889
|
+
clearSpeakingFallback();
|
|
2890
|
+
applyRealtimeActivityStatus(transition.status);
|
|
2891
|
+
}
|
|
2892
|
+
|
|
2893
|
+
const responseId =
|
|
2894
|
+
getDebugValue(payload.response_id) ??
|
|
2895
|
+
getDebugValue(getRecord(payload.response)?.id);
|
|
2896
|
+
|
|
2897
|
+
if (eventType === 'input_audio_buffer.speech_started') {
|
|
2898
|
+
const runtime = runtimeRef.current;
|
|
2899
|
+
runtime.pendingProactiveScreenPrompt = false;
|
|
2900
|
+
runtime.pendingScreenChangeCorrection = false;
|
|
2901
|
+
runtime.proactiveResponseInFlight = false;
|
|
2902
|
+
|
|
2903
|
+
if (
|
|
2904
|
+
runtime.currentResponseId &&
|
|
2905
|
+
(transition.state.responseInProgress ||
|
|
2906
|
+
transition.state.assistantAudioActive)
|
|
2907
|
+
) {
|
|
2908
|
+
cancelRealtimeResponseForUserSpeech(
|
|
2909
|
+
runtime.currentResponseId,
|
|
2910
|
+
transition.state.assistantAudioActive
|
|
2911
|
+
);
|
|
2912
|
+
}
|
|
2913
|
+
|
|
2914
|
+
if (runtime.clientResponseInFlight) {
|
|
2915
|
+
appendDebugEntry(
|
|
2916
|
+
'user_speech_interrupt_pending',
|
|
2917
|
+
'waiting for response.created before cancel'
|
|
2918
|
+
);
|
|
2919
|
+
}
|
|
2920
|
+
}
|
|
2921
|
+
|
|
2922
|
+
if (eventType === 'response.created' && responseId) {
|
|
2923
|
+
const runtime = runtimeRef.current;
|
|
2924
|
+
runtime.currentResponseId = responseId;
|
|
2925
|
+
|
|
2926
|
+
if (transition.state.userSpeechActive) {
|
|
2927
|
+
cancelRealtimeResponseForUserSpeech(responseId, false);
|
|
2928
|
+
}
|
|
2929
|
+
}
|
|
2930
|
+
|
|
2931
|
+
if (eventType === 'output_audio_buffer.started' && responseId) {
|
|
2932
|
+
if (transition.state.userSpeechActive) {
|
|
2933
|
+
cancelRealtimeResponseForUserSpeech(responseId, true);
|
|
2934
|
+
}
|
|
2935
|
+
}
|
|
2936
|
+
|
|
2937
|
+
if (
|
|
2938
|
+
eventType === 'input_audio_buffer.speech_started' ||
|
|
2939
|
+
eventType === 'input_audio_buffer.speech_stopped'
|
|
2940
|
+
) {
|
|
2941
|
+
scheduleSnapshotCaptureRef.current?.();
|
|
2942
|
+
restartSnapshotTimerRef.current?.();
|
|
2943
|
+
}
|
|
2944
|
+
|
|
2945
|
+
if (eventType === 'response.done') {
|
|
2946
|
+
const response = getRecord(payload.response);
|
|
2947
|
+
const status = getDebugValue(response?.status);
|
|
2948
|
+
if (responseId && status === 'cancelled') {
|
|
2949
|
+
updateAssistantTranscriptStatus(responseId, 'cancelled');
|
|
2950
|
+
} else if (responseId && status === 'failed') {
|
|
2951
|
+
updateAssistantTranscriptStatus(responseId, 'failed');
|
|
2952
|
+
} else if (responseId) {
|
|
2953
|
+
updateAssistantTranscriptStatus(responseId, 'done');
|
|
2954
|
+
}
|
|
2955
|
+
handleRealtimeToolCalls(payload);
|
|
2956
|
+
if (responseId === runtimeRef.current.currentResponseId) {
|
|
2957
|
+
runtimeRef.current.currentResponseId = null;
|
|
2958
|
+
}
|
|
2959
|
+
runtimeRef.current.clientResponseInFlight = false;
|
|
2960
|
+
runtimeRef.current.proactiveResponseInFlight = false;
|
|
2961
|
+
|
|
2962
|
+
if (runtimeRef.current.waitingForFirstScreenAcknowledgement) {
|
|
2963
|
+
runtimeRef.current.waitingForFirstScreenAcknowledgement = false;
|
|
2964
|
+
runtimeRef.current.pendingProactiveScreenPrompt = false;
|
|
2965
|
+
runtimeRef.current.pendingScreenChangeCorrection = false;
|
|
2966
|
+
runtimeRef.current.lastProactivePromptAt = Date.now();
|
|
2967
|
+
}
|
|
2968
|
+
|
|
2969
|
+
if (runtimeRef.current.waitingForInitialResponse) {
|
|
2970
|
+
runtimeRef.current.waitingForInitialResponse = false;
|
|
2971
|
+
if (responseModeRef.current === 'text') {
|
|
2972
|
+
enableAutomaticVoiceResponses();
|
|
2973
|
+
} else {
|
|
2974
|
+
runtimeRef.current.waitingToEnableAutoResponses = true;
|
|
2975
|
+
appendDebugEntry(
|
|
2976
|
+
'auto_vad_pending',
|
|
2977
|
+
'waiting for output_stopped before listening'
|
|
2978
|
+
);
|
|
2979
|
+
scheduleAutoResponseEnableFallback();
|
|
2980
|
+
}
|
|
2981
|
+
}
|
|
2982
|
+
|
|
2983
|
+
if (transition.shouldFallbackToReady) {
|
|
2984
|
+
scheduleSpeakingFallback();
|
|
2985
|
+
}
|
|
2986
|
+
|
|
2987
|
+
if (runtimeRef.current.pendingAcknowledgement) {
|
|
2988
|
+
schedulePendingAgentAcknowledgementFlush();
|
|
2989
|
+
} else if (runtimeRef.current.pendingProactiveScreenPrompt) {
|
|
2990
|
+
schedulePendingScreenPrompt();
|
|
2991
|
+
}
|
|
2992
|
+
}
|
|
2993
|
+
|
|
2994
|
+
if (
|
|
2995
|
+
eventType === 'output_audio_buffer.stopped' ||
|
|
2996
|
+
eventType === 'output_audio_buffer.cleared'
|
|
2997
|
+
) {
|
|
2998
|
+
if (runtimeRef.current.waitingToEnableAutoResponses) {
|
|
2999
|
+
enableAutomaticVoiceResponses();
|
|
3000
|
+
}
|
|
3001
|
+
|
|
3002
|
+
if (runtimeRef.current.pendingAcknowledgement) {
|
|
3003
|
+
schedulePendingAgentAcknowledgementFlush();
|
|
3004
|
+
} else if (runtimeRef.current.pendingProactiveScreenPrompt) {
|
|
3005
|
+
schedulePendingScreenPrompt();
|
|
3006
|
+
}
|
|
3007
|
+
}
|
|
3008
|
+
},
|
|
3009
|
+
[
|
|
3010
|
+
applyRealtimeActivityStatus,
|
|
3011
|
+
appendDebugEntry,
|
|
3012
|
+
appendRealtimeDebugEvent,
|
|
3013
|
+
appendRawDebugEntry,
|
|
3014
|
+
cancelRealtimeResponseForUserSpeech,
|
|
3015
|
+
clearSpeakingFallback,
|
|
3016
|
+
enableAutomaticVoiceResponses,
|
|
3017
|
+
handleAssistantTranscriptEvent,
|
|
3018
|
+
handleRealtimeToolCalls,
|
|
3019
|
+
recordRealtimeSessionAnalytics,
|
|
3020
|
+
scheduleAutoResponseEnableFallback,
|
|
3021
|
+
schedulePendingAgentAcknowledgementFlush,
|
|
3022
|
+
schedulePendingScreenPrompt,
|
|
3023
|
+
scheduleSpeakingFallback,
|
|
3024
|
+
setStatus,
|
|
3025
|
+
updateAssistantTranscriptStatus,
|
|
3026
|
+
]
|
|
3027
|
+
);
|
|
3028
|
+
|
|
3029
|
+
const connectRealtime = useCallback(
|
|
3030
|
+
async (payload: SupportAssistantSessionContext) => {
|
|
3031
|
+
assertRealtimeBrowserSupport();
|
|
3032
|
+
const peerConnection = new RTCPeerConnection();
|
|
3033
|
+
const dataChannel = peerConnection.createDataChannel('oai-events');
|
|
3034
|
+
const runtime = runtimeRef.current;
|
|
3035
|
+
runtime.peerConnection = peerConnection;
|
|
3036
|
+
runtime.dataChannel = dataChannel;
|
|
3037
|
+
|
|
3038
|
+
dataChannel.addEventListener('open', () => {
|
|
3039
|
+
appendDebugEntry('data_channel_state', 'open');
|
|
3040
|
+
});
|
|
3041
|
+
dataChannel.addEventListener('close', () => {
|
|
3042
|
+
appendDebugEntry('data_channel_state', 'closed');
|
|
3043
|
+
});
|
|
3044
|
+
dataChannel.addEventListener('error', () => {
|
|
3045
|
+
appendDebugEntry('data_channel_error', dataChannel.readyState);
|
|
3046
|
+
});
|
|
3047
|
+
peerConnection.addEventListener('connectionstatechange', () => {
|
|
3048
|
+
appendDebugEntry(
|
|
3049
|
+
'peer_connection_state',
|
|
3050
|
+
peerConnection.connectionState
|
|
3051
|
+
);
|
|
3052
|
+
});
|
|
3053
|
+
peerConnection.addEventListener('iceconnectionstatechange', () => {
|
|
3054
|
+
appendDebugEntry(
|
|
3055
|
+
'peer_ice_connection_state',
|
|
3056
|
+
peerConnection.iceConnectionState
|
|
3057
|
+
);
|
|
3058
|
+
});
|
|
3059
|
+
peerConnection.addEventListener('signalingstatechange', () => {
|
|
3060
|
+
appendDebugEntry('peer_signaling_state', peerConnection.signalingState);
|
|
3061
|
+
});
|
|
3062
|
+
|
|
3063
|
+
const microphoneStream = await requestUserMedia({
|
|
3064
|
+
audio: {
|
|
3065
|
+
echoCancellation: true,
|
|
3066
|
+
noiseSuppression: true,
|
|
3067
|
+
autoGainControl: true,
|
|
3068
|
+
},
|
|
3069
|
+
});
|
|
3070
|
+
|
|
3071
|
+
for (const track of microphoneStream.getAudioTracks()) {
|
|
3072
|
+
peerConnection.addTrack(track, microphoneStream);
|
|
3073
|
+
}
|
|
3074
|
+
runtime.microphoneStream = microphoneStream;
|
|
3075
|
+
|
|
3076
|
+
peerConnection.addEventListener('track', (event) => {
|
|
3077
|
+
if (remoteAudioRef.current) {
|
|
3078
|
+
remoteAudioRef.current.srcObject = event.streams[0];
|
|
3079
|
+
}
|
|
3080
|
+
|
|
3081
|
+
event.track.addEventListener('ended', () => {
|
|
3082
|
+
appendDebugEntry('remote_audio_track_state', 'ended');
|
|
3083
|
+
});
|
|
3084
|
+
event.track.addEventListener('mute', () => {
|
|
3085
|
+
appendDebugEntry('remote_audio_track_state', 'mute');
|
|
3086
|
+
});
|
|
3087
|
+
event.track.addEventListener('unmute', () => {
|
|
3088
|
+
appendDebugEntry('remote_audio_track_state', 'unmute');
|
|
3089
|
+
});
|
|
3090
|
+
});
|
|
3091
|
+
|
|
3092
|
+
dataChannel.addEventListener('message', (event) => {
|
|
3093
|
+
try {
|
|
3094
|
+
const data = JSON.parse(event.data);
|
|
3095
|
+
if (typeof data === 'object' && data !== null) {
|
|
3096
|
+
handleRealtimeMessage(data);
|
|
3097
|
+
}
|
|
3098
|
+
} catch (error) {
|
|
3099
|
+
setErrorMessage(getMessage(error, 'Invalid realtime event'));
|
|
3100
|
+
}
|
|
3101
|
+
});
|
|
3102
|
+
|
|
3103
|
+
const offer = await peerConnection.createOffer();
|
|
3104
|
+
if (!offer.sdp) {
|
|
3105
|
+
throw new Error('WebRTC offer SDP was empty');
|
|
3106
|
+
}
|
|
3107
|
+
|
|
3108
|
+
await peerConnection.setLocalDescription(offer);
|
|
3109
|
+
waitForIceGatheringComplete(peerConnection).catch(() => undefined);
|
|
3110
|
+
|
|
3111
|
+
const sessionConfig = await createRealtimeCall(payload, offer.sdp);
|
|
3112
|
+
runtime.sessionConfig = sessionConfig;
|
|
3113
|
+
|
|
3114
|
+
await peerConnection.setRemoteDescription({
|
|
3115
|
+
type: 'answer',
|
|
3116
|
+
sdp: sessionConfig.sdpAnswer,
|
|
3117
|
+
});
|
|
3118
|
+
await waitForDataChannelOpen(dataChannel);
|
|
3119
|
+
|
|
3120
|
+
sendRealtimeEvent({
|
|
3121
|
+
type: 'session.update',
|
|
3122
|
+
session: {
|
|
3123
|
+
type: 'realtime',
|
|
3124
|
+
instructions: buildSessionInstructions(payload),
|
|
3125
|
+
max_output_tokens: realtimeMaxOutputTokens,
|
|
3126
|
+
output_modalities: getRealtimeOutputModalities(
|
|
3127
|
+
responseModeRef.current
|
|
3128
|
+
),
|
|
3129
|
+
audio: {
|
|
3130
|
+
input: {
|
|
3131
|
+
noise_reduction: {
|
|
3132
|
+
type: 'far_field',
|
|
3133
|
+
},
|
|
3134
|
+
},
|
|
3135
|
+
},
|
|
3136
|
+
tools: buildRealtimeTools(),
|
|
3137
|
+
tool_choice: 'auto',
|
|
3138
|
+
},
|
|
3139
|
+
});
|
|
3140
|
+
},
|
|
3141
|
+
[
|
|
3142
|
+
appendDebugEntry,
|
|
3143
|
+
createRealtimeCall,
|
|
3144
|
+
handleRealtimeMessage,
|
|
3145
|
+
sendRealtimeEvent,
|
|
3146
|
+
]
|
|
3147
|
+
);
|
|
3148
|
+
|
|
3149
|
+
const getSnapshotIntervalMs = useCallback(() => {
|
|
3150
|
+
const config = runtimeRef.current.sessionConfig?.screen;
|
|
3151
|
+
|
|
3152
|
+
if (!config) {
|
|
3153
|
+
return 1_000;
|
|
3154
|
+
}
|
|
3155
|
+
|
|
3156
|
+
if (realtimeActivityRef.current.userSpeechActive) {
|
|
3157
|
+
return config.activeCaptureIntervalMs ?? config.captureIntervalMs;
|
|
3158
|
+
}
|
|
3159
|
+
|
|
3160
|
+
return config.captureIntervalMs;
|
|
3161
|
+
}, []);
|
|
3162
|
+
|
|
3163
|
+
const captureAndSendSnapshot = useCallback(
|
|
3164
|
+
async (options: { force?: boolean } = {}) => {
|
|
3165
|
+
const runtime = runtimeRef.current;
|
|
3166
|
+
const config = runtime.sessionConfig;
|
|
3167
|
+
const video = screenPreviewRef.current;
|
|
3168
|
+
const canvas = frameCanvasRef.current;
|
|
3169
|
+
const force = options.force === true;
|
|
3170
|
+
|
|
3171
|
+
if (!config || !runtime.screenStream || !video || !canvas) {
|
|
3172
|
+
return false;
|
|
3173
|
+
}
|
|
3174
|
+
|
|
3175
|
+
if (runtime.snapshotInFlight || video.videoWidth === 0) {
|
|
3176
|
+
return false;
|
|
3177
|
+
}
|
|
3178
|
+
|
|
3179
|
+
const now = Date.now();
|
|
3180
|
+
if (
|
|
3181
|
+
!force &&
|
|
3182
|
+
now - runtime.lastSnapshotStartedAt <
|
|
3183
|
+
Math.max(minSnapshotSpacingMs, getSnapshotIntervalMs())
|
|
3184
|
+
) {
|
|
3185
|
+
return false;
|
|
3186
|
+
}
|
|
3187
|
+
|
|
3188
|
+
runtime.snapshotInFlight = true;
|
|
3189
|
+
runtime.lastSnapshotStartedAt = now;
|
|
3190
|
+
|
|
3191
|
+
try {
|
|
3192
|
+
const { width, height } = getScaledCanvasSize(
|
|
3193
|
+
video.videoWidth,
|
|
3194
|
+
video.videoHeight,
|
|
3195
|
+
config.screen.maxImageLongEdgePx
|
|
3196
|
+
);
|
|
3197
|
+
canvas.width = width;
|
|
3198
|
+
canvas.height = height;
|
|
3199
|
+
|
|
3200
|
+
const context = canvas.getContext('2d');
|
|
3201
|
+
if (!context) {
|
|
3202
|
+
throw new Error('Could not create canvas context');
|
|
3203
|
+
}
|
|
3204
|
+
|
|
3205
|
+
context.drawImage(video, 0, 0, width, height);
|
|
3206
|
+
const blob = await canvasToBlob(
|
|
3207
|
+
canvas,
|
|
3208
|
+
config.screen.imageMimeType,
|
|
3209
|
+
config.screen.imageQuality
|
|
3210
|
+
);
|
|
3211
|
+
|
|
3212
|
+
if (!blob) {
|
|
3213
|
+
throw new Error('Screen snapshot could not be encoded');
|
|
3214
|
+
}
|
|
3215
|
+
|
|
3216
|
+
const imageUrl = await blobToDataUrl(blob);
|
|
3217
|
+
|
|
3218
|
+
sendRealtimeEvent({
|
|
3219
|
+
type: config.screen.eventType,
|
|
3220
|
+
event_id: `screen_${Date.now()}`,
|
|
3221
|
+
item: {
|
|
3222
|
+
type: config.screen.itemType,
|
|
3223
|
+
role: config.screen.role,
|
|
3224
|
+
content: [
|
|
3225
|
+
{
|
|
3226
|
+
type: config.screen.contentType,
|
|
3227
|
+
image_url: imageUrl,
|
|
3228
|
+
detail: config.screen.imageDetail,
|
|
3229
|
+
},
|
|
3230
|
+
],
|
|
3231
|
+
},
|
|
3232
|
+
});
|
|
3233
|
+
|
|
3234
|
+
sentFramesRef.current += 1;
|
|
3235
|
+
onEventRef.current?.({
|
|
3236
|
+
type: 'snapshot',
|
|
3237
|
+
framesSent: sentFramesRef.current,
|
|
3238
|
+
});
|
|
3239
|
+
|
|
3240
|
+
return true;
|
|
3241
|
+
} finally {
|
|
3242
|
+
runtime.snapshotInFlight = false;
|
|
3243
|
+
}
|
|
3244
|
+
},
|
|
3245
|
+
[getSnapshotIntervalMs, sendRealtimeEvent]
|
|
3246
|
+
);
|
|
3247
|
+
|
|
3248
|
+
const scheduleSnapshotCapture = useCallback(
|
|
3249
|
+
(options?: { force?: boolean }) => {
|
|
3250
|
+
return captureAndSendSnapshot(options).catch((error) => {
|
|
3251
|
+
setStatus(getMessage(error, 'Screen capture failed'), 'error');
|
|
3252
|
+
|
|
3253
|
+
return false;
|
|
3254
|
+
});
|
|
3255
|
+
},
|
|
3256
|
+
[captureAndSendSnapshot, setStatus]
|
|
3257
|
+
);
|
|
3258
|
+
|
|
3259
|
+
const restartSnapshotTimer = useCallback(() => {
|
|
3260
|
+
const runtime = runtimeRef.current;
|
|
3261
|
+
const config = runtime.sessionConfig;
|
|
3262
|
+
|
|
3263
|
+
if (runtime.screenTimer) {
|
|
3264
|
+
window.clearTimeout(runtime.screenTimer);
|
|
3265
|
+
runtime.screenTimer = null;
|
|
3266
|
+
}
|
|
3267
|
+
|
|
3268
|
+
if (!config || !runtime.screenStream) {
|
|
3269
|
+
return;
|
|
3270
|
+
}
|
|
3271
|
+
|
|
3272
|
+
runtime.screenTimer = window.setTimeout(() => {
|
|
3273
|
+
scheduleSnapshotCapture();
|
|
3274
|
+
restartSnapshotTimer();
|
|
3275
|
+
}, getSnapshotIntervalMs());
|
|
3276
|
+
}, [getSnapshotIntervalMs, scheduleSnapshotCapture]);
|
|
3277
|
+
|
|
3278
|
+
useEffect(() => {
|
|
3279
|
+
scheduleSnapshotCaptureRef.current = () => {
|
|
3280
|
+
scheduleSnapshotCapture();
|
|
3281
|
+
};
|
|
3282
|
+
restartSnapshotTimerRef.current = restartSnapshotTimer;
|
|
3283
|
+
}, [restartSnapshotTimer, scheduleSnapshotCapture]);
|
|
3284
|
+
|
|
3285
|
+
const startSnapshotLoop = useCallback(async () => {
|
|
3286
|
+
if (!runtimeRef.current.sessionConfig) {
|
|
3287
|
+
throw new Error('Missing session configuration');
|
|
3288
|
+
}
|
|
3289
|
+
|
|
3290
|
+
const sent = await scheduleSnapshotCapture({ force: true });
|
|
3291
|
+
restartSnapshotTimer();
|
|
3292
|
+
|
|
3293
|
+
return sent;
|
|
3294
|
+
}, [restartSnapshotTimer, scheduleSnapshotCapture]);
|
|
3295
|
+
|
|
3296
|
+
const sampleVisualSignature = useCallback(() => {
|
|
3297
|
+
const video = screenPreviewRef.current;
|
|
3298
|
+
if (!video || video.videoWidth === 0 || video.videoHeight === 0) {
|
|
3299
|
+
return null;
|
|
3300
|
+
}
|
|
3301
|
+
|
|
3302
|
+
const width = visualSampleWidthPx;
|
|
3303
|
+
const height = Math.max(
|
|
3304
|
+
1,
|
|
3305
|
+
Math.round((video.videoHeight / video.videoWidth) * width)
|
|
3306
|
+
);
|
|
3307
|
+
const runtime = runtimeRef.current;
|
|
3308
|
+
const canvas =
|
|
3309
|
+
runtime.visualAnalysisCanvas ?? document.createElement('canvas');
|
|
3310
|
+
runtime.visualAnalysisCanvas = canvas;
|
|
3311
|
+
canvas.width = width;
|
|
3312
|
+
canvas.height = height;
|
|
3313
|
+
|
|
3314
|
+
const context = canvas.getContext('2d', { willReadFrequently: true });
|
|
3315
|
+
if (!context) {
|
|
3316
|
+
return null;
|
|
3317
|
+
}
|
|
3318
|
+
|
|
3319
|
+
let imageData: Uint8ClampedArray;
|
|
3320
|
+
try {
|
|
3321
|
+
context.drawImage(video, 0, 0, width, height);
|
|
3322
|
+
imageData = context.getImageData(0, 0, width, height).data;
|
|
3323
|
+
} catch {
|
|
3324
|
+
return null;
|
|
3325
|
+
}
|
|
3326
|
+
|
|
3327
|
+
const signature = new Uint8ClampedArray(width * height * 3);
|
|
3328
|
+
|
|
3329
|
+
for (
|
|
3330
|
+
let sourceIndex = 0, targetIndex = 0;
|
|
3331
|
+
sourceIndex < imageData.length;
|
|
3332
|
+
sourceIndex += 4
|
|
3333
|
+
) {
|
|
3334
|
+
signature[targetIndex] = imageData[sourceIndex];
|
|
3335
|
+
signature[targetIndex + 1] = imageData[sourceIndex + 1];
|
|
3336
|
+
signature[targetIndex + 2] = imageData[sourceIndex + 2];
|
|
3337
|
+
targetIndex += 3;
|
|
3338
|
+
}
|
|
3339
|
+
|
|
3340
|
+
return signature;
|
|
3341
|
+
}, []);
|
|
3342
|
+
|
|
3343
|
+
const requestNextStepAfterScreenChange = useCallback(
|
|
3344
|
+
async (options: ScreenChangePromptOptions = {}) => {
|
|
3345
|
+
const runtime = runtimeRef.current;
|
|
3346
|
+
if (
|
|
3347
|
+
!runtime.sessionConfig ||
|
|
3348
|
+
!runtime.screenStream ||
|
|
3349
|
+
!runtime.dataChannel
|
|
3350
|
+
) {
|
|
3351
|
+
return;
|
|
3352
|
+
}
|
|
3353
|
+
|
|
3354
|
+
if (
|
|
3355
|
+
runtime.waitingForFirstScreenAcknowledgement ||
|
|
3356
|
+
Date.now() < runtime.suppressScreenChangePromptsUntil
|
|
3357
|
+
) {
|
|
3358
|
+
runtime.pendingProactiveScreenPrompt = false;
|
|
3359
|
+
runtime.pendingScreenChangeCorrection = false;
|
|
3360
|
+
runtime.proactiveResponseInFlight = false;
|
|
3361
|
+
return;
|
|
3362
|
+
}
|
|
3363
|
+
|
|
3364
|
+
const shouldCorrectStaleScreen =
|
|
3365
|
+
options.interruptActiveResponse === true ||
|
|
3366
|
+
runtime.pendingScreenChangeCorrection;
|
|
3367
|
+
const deferPrompt = (detail: string) => {
|
|
3368
|
+
appendDebugEntry('proactive_screen_prompt_deferred', detail);
|
|
3369
|
+
runtime.proactiveResponseInFlight = false;
|
|
3370
|
+
runtime.pendingProactiveScreenPrompt = true;
|
|
3371
|
+
runtime.pendingScreenChangeCorrection ||= shouldCorrectStaleScreen;
|
|
3372
|
+
};
|
|
3373
|
+
|
|
3374
|
+
const activityBeforeSnapshot = realtimeActivityRef.current;
|
|
3375
|
+
if (activityBeforeSnapshot.userSpeechActive) {
|
|
3376
|
+
deferPrompt('user speech active before snapshot');
|
|
3377
|
+
return;
|
|
3378
|
+
}
|
|
3379
|
+
|
|
3380
|
+
if (
|
|
3381
|
+
hasActiveAssistantTurn(activityBeforeSnapshot) ||
|
|
3382
|
+
runtime.clientResponseInFlight
|
|
3383
|
+
) {
|
|
3384
|
+
deferPrompt(
|
|
3385
|
+
runtime.clientResponseInFlight
|
|
3386
|
+
? 'pending response.create before snapshot'
|
|
3387
|
+
: 'active realtime turn before snapshot'
|
|
3388
|
+
);
|
|
3389
|
+
return;
|
|
3390
|
+
}
|
|
3391
|
+
|
|
3392
|
+
const now = Date.now();
|
|
3393
|
+
if (runtime.proactiveResponseInFlight) {
|
|
3394
|
+
runtime.pendingProactiveScreenPrompt ||= shouldCorrectStaleScreen;
|
|
3395
|
+
runtime.pendingScreenChangeCorrection ||= shouldCorrectStaleScreen;
|
|
3396
|
+
return;
|
|
3397
|
+
}
|
|
3398
|
+
|
|
3399
|
+
if (
|
|
3400
|
+
!shouldCorrectStaleScreen &&
|
|
3401
|
+
now - runtime.lastProactivePromptAt < minProactivePromptSpacingMs
|
|
3402
|
+
) {
|
|
3403
|
+
return;
|
|
3404
|
+
}
|
|
3405
|
+
|
|
3406
|
+
runtime.proactiveResponseInFlight = true;
|
|
3407
|
+
const snapshotSent = await scheduleSnapshotCapture({ force: true });
|
|
3408
|
+
if (!snapshotSent) {
|
|
3409
|
+
runtime.proactiveResponseInFlight = false;
|
|
3410
|
+
return;
|
|
3411
|
+
}
|
|
3412
|
+
|
|
3413
|
+
const activityAfterSnapshot = realtimeActivityRef.current;
|
|
3414
|
+
if (activityAfterSnapshot.userSpeechActive) {
|
|
3415
|
+
deferPrompt('user speech active after snapshot');
|
|
3416
|
+
return;
|
|
3417
|
+
}
|
|
3418
|
+
|
|
3419
|
+
if (
|
|
3420
|
+
hasActiveAssistantTurn(activityAfterSnapshot) ||
|
|
3421
|
+
runtime.clientResponseInFlight
|
|
3422
|
+
) {
|
|
3423
|
+
deferPrompt(
|
|
3424
|
+
runtime.clientResponseInFlight
|
|
3425
|
+
? 'pending response.create after snapshot'
|
|
3426
|
+
: 'active realtime turn after snapshot'
|
|
3427
|
+
);
|
|
3428
|
+
return;
|
|
3429
|
+
}
|
|
3430
|
+
|
|
3431
|
+
runtime.pendingProactiveScreenPrompt = false;
|
|
3432
|
+
runtime.pendingScreenChangeCorrection = false;
|
|
3433
|
+
runtime.lastProactivePromptAt = now;
|
|
3434
|
+
|
|
3435
|
+
const languageInstruction = buildLanguageInstruction(selectedLanguage);
|
|
3436
|
+
const promptText = shouldCorrectStaleScreen
|
|
3437
|
+
? 'The shared screen changed while you were speaking. Stop following the previous screen. Use only the latest shared screen. If the previous spoken step is no longer right, switch direction without saying "actually", then give one short next step. Do not mention snapshots, do not use a preamble, and do not say that you are thinking, checking, or thinking through it.'
|
|
3438
|
+
: "The shared screen changed and now appears stable. Only respond if the change is directly relevant to the user's active task and a short next step would help. If the user appears to be waiting, looking something up, reading, or working outside VRPlatform, stay quiet unless there is a visible error or blocker. If you do respond, give one short, plain-language next step based on the latest screen. Do not use a preamble or say that you are thinking, checking, or thinking through it.";
|
|
3439
|
+
const responseInstructions = shouldCorrectStaleScreen
|
|
3440
|
+
? `${languageInstruction} The shared screen changed while you were speaking. Stop following the previous screen. Use only the latest shared screen. If the previous spoken step is no longer right, switch direction without saying "actually" or repeating a correction phrase, then give one short next step based on the latest screen. Do not continue the old instruction. Do not apologize repeatedly. Do not mention snapshots. Do not use a preamble. Do not say that you are thinking, checking, reasoning, or thinking through it.`
|
|
3441
|
+
: `${languageInstruction} Only respond if the screen change is directly relevant to the user's active task and a short next step would help. If the user appears to be waiting, looking something up, reading, or working outside VRPlatform, stay quiet unless there is a visible error or blocker. If you do respond, give one short, plain-language next step based on the latest screen. Do not use a preamble. Do not say "actually". Do not say that you are thinking, checking, reasoning, or thinking through it.`;
|
|
3442
|
+
|
|
3443
|
+
sendRealtimeEvent({
|
|
3444
|
+
type: 'conversation.item.create',
|
|
3445
|
+
item: {
|
|
3446
|
+
type: 'message',
|
|
3447
|
+
role: 'user',
|
|
3448
|
+
content: [
|
|
3449
|
+
{
|
|
3450
|
+
type: 'input_text',
|
|
3451
|
+
text: promptText,
|
|
3452
|
+
},
|
|
3453
|
+
],
|
|
3454
|
+
},
|
|
3455
|
+
});
|
|
3456
|
+
sendRealtimeEvent({
|
|
3457
|
+
type: 'response.create',
|
|
3458
|
+
response: getCurrentResponseOptions(responseInstructions),
|
|
3459
|
+
});
|
|
3460
|
+
setStatus('Thinking', 'thinking');
|
|
3461
|
+
},
|
|
3462
|
+
[
|
|
3463
|
+
appendDebugEntry,
|
|
3464
|
+
getCurrentResponseOptions,
|
|
3465
|
+
scheduleSnapshotCapture,
|
|
3466
|
+
selectedLanguage,
|
|
3467
|
+
sendRealtimeEvent,
|
|
3468
|
+
setStatus,
|
|
3469
|
+
]
|
|
3470
|
+
);
|
|
3471
|
+
|
|
3472
|
+
useEffect(() => {
|
|
3473
|
+
requestNextStepAfterScreenChangeRef.current = (options) => {
|
|
3474
|
+
requestNextStepAfterScreenChange(options).catch((error) => {
|
|
3475
|
+
runtimeRef.current.proactiveResponseInFlight = false;
|
|
3476
|
+
setStatus(getMessage(error, 'Screen update prompt failed'), 'error');
|
|
3477
|
+
});
|
|
3478
|
+
};
|
|
3479
|
+
}, [requestNextStepAfterScreenChange, setStatus]);
|
|
3480
|
+
|
|
3481
|
+
const analyzeVisualFrame = useCallback(() => {
|
|
3482
|
+
const runtime = runtimeRef.current;
|
|
3483
|
+
if (!runtime.sessionConfig || !runtime.screenStream) {
|
|
3484
|
+
return;
|
|
3485
|
+
}
|
|
3486
|
+
|
|
3487
|
+
const signature = sampleVisualSignature();
|
|
3488
|
+
if (!signature) {
|
|
3489
|
+
return;
|
|
3490
|
+
}
|
|
3491
|
+
|
|
3492
|
+
const previousSignature = runtime.lastVisualSignature;
|
|
3493
|
+
runtime.lastVisualSignature = signature;
|
|
3494
|
+
|
|
3495
|
+
if (!previousSignature) {
|
|
3496
|
+
return;
|
|
3497
|
+
}
|
|
3498
|
+
|
|
3499
|
+
const now = Date.now();
|
|
3500
|
+
if (now < runtime.suppressScreenChangePromptsUntil) {
|
|
3501
|
+
runtime.visualChangeActive = false;
|
|
3502
|
+
runtime.lastVisualChangeAt = 0;
|
|
3503
|
+
return;
|
|
3504
|
+
}
|
|
3505
|
+
|
|
3506
|
+
const difference = getVisualDifference(previousSignature, signature);
|
|
3507
|
+
const hasChanged =
|
|
3508
|
+
difference.averageDelta >= visualAverageDifferenceThreshold ||
|
|
3509
|
+
difference.changedRatio >= visualChangedPixelRatioThreshold;
|
|
3510
|
+
|
|
3511
|
+
if (hasChanged) {
|
|
3512
|
+
runtime.visualChangeActive = true;
|
|
3513
|
+
runtime.lastVisualChangeAt = now;
|
|
3514
|
+
return;
|
|
3515
|
+
}
|
|
3516
|
+
|
|
3517
|
+
if (
|
|
3518
|
+
runtime.visualChangeActive &&
|
|
3519
|
+
now - runtime.lastVisualChangeAt >= visualSettleDelayMs
|
|
3520
|
+
) {
|
|
3521
|
+
runtime.visualChangeActive = false;
|
|
3522
|
+
requestNextStepAfterScreenChangeRef.current?.();
|
|
3523
|
+
}
|
|
3524
|
+
}, [sampleVisualSignature]);
|
|
3525
|
+
|
|
3526
|
+
const startVisualChangeWatcher = useCallback(() => {
|
|
3527
|
+
const runtime = runtimeRef.current;
|
|
3528
|
+
|
|
3529
|
+
if (runtime.visualWatchTimer) {
|
|
3530
|
+
window.clearInterval(runtime.visualWatchTimer);
|
|
3531
|
+
}
|
|
3532
|
+
|
|
3533
|
+
runtime.lastVisualSignature = null;
|
|
3534
|
+
runtime.visualChangeActive = false;
|
|
3535
|
+
runtime.lastVisualChangeAt = 0;
|
|
3536
|
+
runtime.visualWatchTimer = window.setInterval(
|
|
3537
|
+
analyzeVisualFrame,
|
|
3538
|
+
visualWatchIntervalMs
|
|
3539
|
+
);
|
|
3540
|
+
}, [analyzeVisualFrame]);
|
|
3541
|
+
|
|
3542
|
+
const requestScreenStream = useCallback(() => {
|
|
3543
|
+
const runtime = runtimeRef.current;
|
|
3544
|
+
const maxImageLongEdgePx =
|
|
3545
|
+
runtime.sessionConfig?.screen.maxImageLongEdgePx ?? 960;
|
|
3546
|
+
|
|
3547
|
+
return requestDisplayMedia({
|
|
3548
|
+
video: {
|
|
3549
|
+
frameRate: {
|
|
3550
|
+
max: 5,
|
|
3551
|
+
},
|
|
3552
|
+
width: {
|
|
3553
|
+
ideal: maxImageLongEdgePx,
|
|
3554
|
+
},
|
|
3555
|
+
},
|
|
3556
|
+
audio: false,
|
|
3557
|
+
});
|
|
3558
|
+
}, []);
|
|
3559
|
+
|
|
3560
|
+
const attachScreenStream = useCallback(
|
|
3561
|
+
async (
|
|
3562
|
+
screenStream: MediaStream,
|
|
3563
|
+
options: { acknowledge?: boolean } = {}
|
|
3564
|
+
) => {
|
|
3565
|
+
const runtime = runtimeRef.current;
|
|
3566
|
+
const video = screenPreviewRef.current;
|
|
3567
|
+
|
|
3568
|
+
if (!runtime.sessionConfig) {
|
|
3569
|
+
throw new Error('Join the call before sharing the screen');
|
|
3570
|
+
}
|
|
3571
|
+
|
|
3572
|
+
if (!video) {
|
|
3573
|
+
throw new Error('Screen preview element is missing');
|
|
3574
|
+
}
|
|
3575
|
+
|
|
3576
|
+
runtime.screenStream = screenStream;
|
|
3577
|
+
runtime.lastSnapshotStartedAt = 0;
|
|
3578
|
+
runtime.lastProactivePromptAt = 0;
|
|
3579
|
+
runtime.proactiveResponseInFlight = false;
|
|
3580
|
+
runtime.pendingProactiveScreenPrompt = false;
|
|
3581
|
+
runtime.pendingScreenChangeCorrection = false;
|
|
3582
|
+
runtime.suppressScreenChangePromptsUntil =
|
|
3583
|
+
Date.now() + screenSharePromptWarmupMs;
|
|
3584
|
+
video.srcObject = screenStream;
|
|
3585
|
+
await video.play();
|
|
3586
|
+
await waitForVideoMetadata(video);
|
|
3587
|
+
|
|
3588
|
+
const [track] = screenStream.getVideoTracks();
|
|
3589
|
+
track?.addEventListener('ended', () => stopScreenShare());
|
|
3590
|
+
|
|
3591
|
+
setScreenVisible(true);
|
|
3592
|
+
screenVisibleRef.current = true;
|
|
3593
|
+
onEventRef.current?.({ type: 'screen', shared: true });
|
|
3594
|
+
setStatus('Ready', 'listening');
|
|
3595
|
+
runtime.waitingForFirstScreenAcknowledgement = true;
|
|
3596
|
+
const firstSnapshotSent = await startSnapshotLoop();
|
|
3597
|
+
startVisualChangeWatcher();
|
|
3598
|
+
|
|
3599
|
+
if (options.acknowledge !== false && firstSnapshotSent) {
|
|
3600
|
+
const payload = buildSupportSessionPayload();
|
|
3601
|
+
const postScreenIssueInstruction =
|
|
3602
|
+
buildPostScreenIssueInstruction(payload);
|
|
3603
|
+
|
|
3604
|
+
requestAgentAcknowledgement(
|
|
3605
|
+
postScreenIssueInstruction
|
|
3606
|
+
? 'The user has started sharing their VRPlatform screen. Acknowledge that you can use the shared screen as context. Then mention the active issue summary provided in your instructions.'
|
|
3607
|
+
: 'The user has started sharing their VRPlatform screen. Acknowledge that you can see the screen now. Then continue with the current task using the latest screen as context. If there is no active task yet, ask whether they are here for first-time onboarding, ongoing support, or monthly close.',
|
|
3608
|
+
postScreenIssueInstruction
|
|
3609
|
+
? `${buildLanguageInstruction(selectedLanguage)} Acknowledge that screen sharing is active in one short clause. Then ${postScreenIssueInstruction}`
|
|
3610
|
+
: `${buildLanguageInstruction(selectedLanguage)} Acknowledge that you can see the screen now in one short clause, then continue with the current task using the latest screen as context. Do not restart the greeting. Do not ask what they need next unless there is no active task. Do not say that you are thinking, checking, or thinking through it.`
|
|
3611
|
+
);
|
|
3612
|
+
} else {
|
|
3613
|
+
runtime.waitingForFirstScreenAcknowledgement = false;
|
|
3614
|
+
}
|
|
3615
|
+
},
|
|
3616
|
+
[
|
|
3617
|
+
buildSupportSessionPayload,
|
|
3618
|
+
requestAgentAcknowledgement,
|
|
3619
|
+
selectedLanguage,
|
|
3620
|
+
startSnapshotLoop,
|
|
3621
|
+
startVisualChangeWatcher,
|
|
3622
|
+
stopScreenShare,
|
|
3623
|
+
setStatus,
|
|
3624
|
+
]
|
|
3625
|
+
);
|
|
3626
|
+
|
|
3627
|
+
const startCall = useCallback(
|
|
3628
|
+
async (options: StartCallOptions = {}) => {
|
|
3629
|
+
const initialTextMessage = options.initialTextMessage?.trim();
|
|
3630
|
+
|
|
3631
|
+
if (initialTextMessage) {
|
|
3632
|
+
responseModeRef.current = 'text';
|
|
3633
|
+
setResponseMode('text');
|
|
3634
|
+
}
|
|
3635
|
+
|
|
3636
|
+
clearSpeakingFallback();
|
|
3637
|
+
realtimeActivityRef.current = createRealtimeActivityState();
|
|
3638
|
+
handledToolCallsRef.current = new Set();
|
|
3639
|
+
debugEventIdRef.current = 0;
|
|
3640
|
+
rawDebugEventIdRef.current = 0;
|
|
3641
|
+
rawDebugEventsRef.current = [];
|
|
3642
|
+
openaiTraceRef.current = {
|
|
3643
|
+
workflowName: null,
|
|
3644
|
+
groupId: null,
|
|
3645
|
+
sessionId: null,
|
|
3646
|
+
};
|
|
3647
|
+
setTextInputValue('');
|
|
3648
|
+
setChatMessages([]);
|
|
3649
|
+
setDebugEvents([]);
|
|
3650
|
+
setDebugCopyState('idle');
|
|
3651
|
+
setIsConnecting(true);
|
|
3652
|
+
setErrorMessage(null);
|
|
3653
|
+
sentFramesRef.current = 0;
|
|
3654
|
+
const payload = buildSupportSessionPayload();
|
|
3655
|
+
setStatus('Requesting microphone', 'connecting');
|
|
3656
|
+
|
|
3657
|
+
try {
|
|
3658
|
+
sessionAnalyticsRef.current = {
|
|
3659
|
+
startedAt: Date.now(),
|
|
3660
|
+
payload,
|
|
3661
|
+
transcript: [],
|
|
3662
|
+
responseTranscriptDeltas: new Map(),
|
|
3663
|
+
usage: null,
|
|
3664
|
+
finalized: false,
|
|
3665
|
+
};
|
|
3666
|
+
await connectRealtime(payload);
|
|
3667
|
+
setIsConnected(true);
|
|
3668
|
+
isConnectedRef.current = true;
|
|
3669
|
+
|
|
3670
|
+
if (initialTextMessage) {
|
|
3671
|
+
appendUserTranscript(initialTextMessage);
|
|
3672
|
+
sendTextMessageToRealtime(initialTextMessage);
|
|
3673
|
+
} else {
|
|
3674
|
+
requestInitialAgentResponse(payload);
|
|
3675
|
+
setStatus('Connected', 'speaking');
|
|
3676
|
+
}
|
|
3677
|
+
} catch (error) {
|
|
3678
|
+
const message = getMessage(error, 'Connection failed');
|
|
3679
|
+
sessionAnalyticsRef.current = null;
|
|
3680
|
+
cleanupSession(false, 'connection_failed');
|
|
3681
|
+
setErrorMessage(message);
|
|
3682
|
+
setStatus(message, 'error');
|
|
3683
|
+
} finally {
|
|
3684
|
+
setIsConnecting(false);
|
|
3685
|
+
}
|
|
3686
|
+
},
|
|
3687
|
+
[
|
|
3688
|
+
buildSupportSessionPayload,
|
|
3689
|
+
clearSpeakingFallback,
|
|
3690
|
+
cleanupSession,
|
|
3691
|
+
connectRealtime,
|
|
3692
|
+
appendUserTranscript,
|
|
3693
|
+
requestInitialAgentResponse,
|
|
3694
|
+
sendTextMessageToRealtime,
|
|
3695
|
+
setStatus,
|
|
3696
|
+
]
|
|
3697
|
+
);
|
|
3698
|
+
|
|
3699
|
+
useEffect(() => {
|
|
3700
|
+
startCallRef.current = startCall;
|
|
3701
|
+
}, [startCall]);
|
|
3702
|
+
|
|
3703
|
+
const requestScreenShare = useCallback(async () => {
|
|
3704
|
+
let requestedScreenStream: MediaStream | null = null;
|
|
3705
|
+
|
|
3706
|
+
setErrorMessage(null);
|
|
3707
|
+
setStatus('Requesting screen share', 'connecting');
|
|
3708
|
+
|
|
3709
|
+
try {
|
|
3710
|
+
requestedScreenStream = await requestScreenStream();
|
|
3711
|
+
await attachScreenStream(requestedScreenStream);
|
|
3712
|
+
requestedScreenStream = null;
|
|
3713
|
+
} catch (error) {
|
|
3714
|
+
for (const track of requestedScreenStream?.getTracks() ?? []) {
|
|
3715
|
+
track.stop();
|
|
3716
|
+
}
|
|
3717
|
+
|
|
3718
|
+
if (
|
|
3719
|
+
error instanceof DOMException &&
|
|
3720
|
+
(error.name === 'NotAllowedError' || error.name === 'AbortError')
|
|
3721
|
+
) {
|
|
3722
|
+
setStatus('Screen share needed', 'listening');
|
|
3723
|
+
return;
|
|
3724
|
+
}
|
|
3725
|
+
|
|
3726
|
+
const message = getMessage(error, 'Screen sharing failed');
|
|
3727
|
+
setErrorMessage(message);
|
|
3728
|
+
setStatus(message, 'error');
|
|
3729
|
+
}
|
|
3730
|
+
}, [attachScreenStream, requestScreenStream, setStatus]);
|
|
3731
|
+
|
|
3732
|
+
const toggleScreenShare = useCallback(() => {
|
|
3733
|
+
if (screenVisibleRef.current) {
|
|
3734
|
+
stopScreenShare();
|
|
3735
|
+
return;
|
|
3736
|
+
}
|
|
3737
|
+
|
|
3738
|
+
requestScreenShare();
|
|
3739
|
+
}, [requestScreenShare, stopScreenShare]);
|
|
3740
|
+
|
|
3741
|
+
const handleVoiceButtonClick = useCallback(() => {
|
|
3742
|
+
if (isConnecting) {
|
|
3743
|
+
return;
|
|
3744
|
+
}
|
|
3745
|
+
|
|
3746
|
+
if (!isConnectedRef.current) {
|
|
3747
|
+
handleResponseModeChange('voice');
|
|
3748
|
+
startCall();
|
|
3749
|
+
return;
|
|
3750
|
+
}
|
|
3751
|
+
|
|
3752
|
+
handleResponseModeChange(responseMode === 'voice' ? 'text' : 'voice');
|
|
3753
|
+
}, [handleResponseModeChange, isConnecting, responseMode, startCall]);
|
|
3754
|
+
|
|
3755
|
+
const endConversation = useCallback(() => {
|
|
3756
|
+
if (!isConnectedRef.current && !isConnecting) {
|
|
3757
|
+
return;
|
|
3758
|
+
}
|
|
3759
|
+
|
|
3760
|
+
cleanupSession(true, 'manual_end');
|
|
3761
|
+
}, [cleanupSession, isConnecting]);
|
|
3762
|
+
|
|
3763
|
+
const toggleMute = useCallback(() => {
|
|
3764
|
+
const runtime = runtimeRef.current;
|
|
3765
|
+
const nextMuted = !runtime.muted;
|
|
3766
|
+
runtime.muted = nextMuted;
|
|
3767
|
+
|
|
3768
|
+
for (const track of runtime.microphoneStream?.getAudioTracks() ?? []) {
|
|
3769
|
+
track.enabled = !nextMuted;
|
|
3770
|
+
}
|
|
3771
|
+
|
|
3772
|
+
setMuted(nextMuted);
|
|
3773
|
+
setStatus(
|
|
3774
|
+
nextMuted ? 'Muted' : 'Listening',
|
|
3775
|
+
nextMuted ? 'muted' : 'listening'
|
|
3776
|
+
);
|
|
3777
|
+
}, [setStatus]);
|
|
3778
|
+
|
|
3779
|
+
const isVoiceActive = isConnected && responseMode === 'voice';
|
|
3780
|
+
const canToggleScreenShare = isConnected && !isConnecting;
|
|
3781
|
+
const canEndConversation = isConnected || isConnecting;
|
|
3782
|
+
const canSubmitTextMessage =
|
|
3783
|
+
!isConnecting && textInputValue.trim().length > 0;
|
|
3784
|
+
const rootClassName = cx(
|
|
3785
|
+
classes.root,
|
|
3786
|
+
placement === 'right' ? classes.rootRight : classes.rootLeft,
|
|
3787
|
+
floating ? classes.rootFloating : classes.rootInline,
|
|
3788
|
+
className
|
|
3789
|
+
);
|
|
3790
|
+
const composerClassName = cx(
|
|
3791
|
+
classes.composer,
|
|
3792
|
+
canEndConversation && classes.composerConnected,
|
|
3793
|
+
isVoiceActive && classes.composerVoiceActive
|
|
3794
|
+
);
|
|
3795
|
+
const debugCopyLabel =
|
|
3796
|
+
debugCopyState === 'summary-copied'
|
|
3797
|
+
? 'Realtime debug log copied'
|
|
3798
|
+
: debugCopyState === 'failed'
|
|
3799
|
+
? 'Realtime debug log copy failed'
|
|
3800
|
+
: 'Copy realtime debug log';
|
|
3801
|
+
const debugRawCopyLabel =
|
|
3802
|
+
debugCopyState === 'raw-copied'
|
|
3803
|
+
? 'Realtime raw debug JSON copied'
|
|
3804
|
+
: debugCopyState === 'failed'
|
|
3805
|
+
? 'Realtime debug log copy failed'
|
|
3806
|
+
: 'Copy realtime raw debug JSON';
|
|
3807
|
+
|
|
3808
|
+
return (
|
|
3809
|
+
<div className={rootClassName} style={style} data-activity={activity}>
|
|
3810
|
+
<section
|
|
3811
|
+
className={classes.responsePanel}
|
|
3812
|
+
aria-label="ChatVRT message composer"
|
|
3813
|
+
>
|
|
3814
|
+
{dev ? (
|
|
3815
|
+
<button
|
|
3816
|
+
aria-expanded={debugOpen}
|
|
3817
|
+
aria-label={debugOpen ? 'Hide realtime debug log' : 'Show realtime debug log'}
|
|
3818
|
+
className={cx(
|
|
3819
|
+
classes.debugToggleButton,
|
|
3820
|
+
debugOpen && classes.debugToggleButtonActive
|
|
3821
|
+
)}
|
|
3822
|
+
onClick={() => setDebugOpen((current) => !current)}
|
|
3823
|
+
title={debugOpen ? 'Hide debug log' : 'Show debug log'}
|
|
3824
|
+
type="button"
|
|
3825
|
+
>
|
|
3826
|
+
Log
|
|
3827
|
+
</button>
|
|
3828
|
+
) : null}
|
|
3829
|
+
{responseMode === 'text' ? (
|
|
3830
|
+
<div className={classes.chatPanel}>
|
|
3831
|
+
<ol
|
|
3832
|
+
aria-live="polite"
|
|
3833
|
+
className={classes.chatMessages}
|
|
3834
|
+
ref={chatTranscriptRef}
|
|
3835
|
+
>
|
|
3836
|
+
{visibleChatMessages.length > 0 ? (
|
|
3837
|
+
visibleChatMessages.map((message) => (
|
|
3838
|
+
<li
|
|
3839
|
+
aria-busy={message.status === 'streaming'}
|
|
3840
|
+
className={cx(
|
|
3841
|
+
classes.chatMessage,
|
|
3842
|
+
message.role === 'user' && classes.chatMessageUser,
|
|
3843
|
+
message.status === 'streaming' &&
|
|
3844
|
+
classes.chatMessageStreaming
|
|
3845
|
+
)}
|
|
3846
|
+
key={message.id}
|
|
3847
|
+
>
|
|
3848
|
+
<div className={classes.chatMeta}>
|
|
3849
|
+
<strong>
|
|
3850
|
+
{message.role === 'user' ? 'You' : 'ChatVRT'}
|
|
3851
|
+
</strong>
|
|
3852
|
+
<time>{message.time}</time>
|
|
3853
|
+
</div>
|
|
3854
|
+
<p>{message.text}</p>
|
|
3855
|
+
</li>
|
|
3856
|
+
))
|
|
3857
|
+
) : (
|
|
3858
|
+
<li className={classes.chatEmpty}>No response yet.</li>
|
|
3859
|
+
)}
|
|
3860
|
+
</ol>
|
|
3861
|
+
</div>
|
|
3862
|
+
) : null}
|
|
3863
|
+
|
|
3864
|
+
<form className={composerClassName} onSubmit={handleTextMessageSubmit}>
|
|
3865
|
+
<button
|
|
3866
|
+
aria-pressed={screenVisible}
|
|
3867
|
+
className={cx(
|
|
3868
|
+
classes.composerAction,
|
|
3869
|
+
screenVisible && classes.composerActionActive
|
|
3870
|
+
)}
|
|
3871
|
+
disabled={!canToggleScreenShare}
|
|
3872
|
+
onClick={toggleScreenShare}
|
|
3873
|
+
title={screenVisible ? 'Stop screen share' : 'Share screen'}
|
|
3874
|
+
aria-label={screenVisible ? 'Stop screen share' : 'Share screen'}
|
|
3875
|
+
type="button"
|
|
3876
|
+
>
|
|
3877
|
+
<IconScreenShare />
|
|
3878
|
+
</button>
|
|
3879
|
+
<input
|
|
3880
|
+
aria-label="Ask ChatVRT"
|
|
3881
|
+
className={classes.composerInput}
|
|
3882
|
+
onChange={(event) => setTextInputValue(event.currentTarget.value)}
|
|
3883
|
+
onKeyDown={handleTextInputKeyDown}
|
|
3884
|
+
placeholder="Ask anything"
|
|
3885
|
+
type="text"
|
|
3886
|
+
value={textInputValue}
|
|
3887
|
+
/>
|
|
3888
|
+
{isVoiceActive ? (
|
|
3889
|
+
<button
|
|
3890
|
+
className={classes.composerAction}
|
|
3891
|
+
type="button"
|
|
3892
|
+
onClick={toggleMute}
|
|
3893
|
+
title={muted ? 'Unmute microphone' : 'Mute microphone'}
|
|
3894
|
+
aria-label={muted ? 'Unmute microphone' : 'Mute microphone'}
|
|
3895
|
+
>
|
|
3896
|
+
{muted ? <IconMicOff /> : <IconMic />}
|
|
3897
|
+
</button>
|
|
3898
|
+
) : null}
|
|
3899
|
+
{canEndConversation ? (
|
|
3900
|
+
<button
|
|
3901
|
+
className={cx(classes.composerAction, classes.composerEnd)}
|
|
3902
|
+
type="button"
|
|
3903
|
+
onClick={endConversation}
|
|
3904
|
+
title="End conversation"
|
|
3905
|
+
aria-label="End conversation"
|
|
3906
|
+
>
|
|
3907
|
+
<IconEndCall />
|
|
3908
|
+
</button>
|
|
3909
|
+
) : null}
|
|
3910
|
+
<button
|
|
3911
|
+
aria-pressed={responseMode === 'voice'}
|
|
3912
|
+
className={cx(
|
|
3913
|
+
classes.composerAction,
|
|
3914
|
+
responseMode === 'voice' && classes.composerActionActive
|
|
3915
|
+
)}
|
|
3916
|
+
disabled={isConnecting}
|
|
3917
|
+
onClick={handleVoiceButtonClick}
|
|
3918
|
+
title={
|
|
3919
|
+
!isConnected
|
|
3920
|
+
? 'Start voice mode'
|
|
3921
|
+
: responseMode === 'voice'
|
|
3922
|
+
? 'Turn off voice responses'
|
|
3923
|
+
: 'Activate voice mode'
|
|
3924
|
+
}
|
|
3925
|
+
aria-label={
|
|
3926
|
+
!isConnected
|
|
3927
|
+
? 'Start voice mode'
|
|
3928
|
+
: responseMode === 'voice'
|
|
3929
|
+
? 'Turn off voice responses'
|
|
3930
|
+
: 'Activate voice mode'
|
|
3931
|
+
}
|
|
3932
|
+
type="button"
|
|
3933
|
+
>
|
|
3934
|
+
<IconVoiceMode />
|
|
3935
|
+
</button>
|
|
3936
|
+
<button
|
|
3937
|
+
className={classes.composerSubmit}
|
|
3938
|
+
disabled={!canSubmitTextMessage}
|
|
3939
|
+
onClick={submitTextMessage}
|
|
3940
|
+
title="Send message"
|
|
3941
|
+
aria-label="Send message"
|
|
3942
|
+
type="button"
|
|
3943
|
+
>
|
|
3944
|
+
<IconSend />
|
|
3945
|
+
</button>
|
|
3946
|
+
</form>
|
|
3947
|
+
</section>
|
|
3948
|
+
|
|
3949
|
+
{effectiveShowRealtimeDebug ? (
|
|
3950
|
+
<section className={classes.debugPanel} aria-label="Realtime debug log">
|
|
3951
|
+
<div className={classes.debugTitle}>
|
|
3952
|
+
<strong>REALTIME DEBUG</strong>
|
|
3953
|
+
<div className={classes.debugTitleActions}>
|
|
3954
|
+
<button
|
|
3955
|
+
aria-label={debugCopyLabel}
|
|
3956
|
+
className={classes.debugCopyButton}
|
|
3957
|
+
data-tooltip="Copy compact log"
|
|
3958
|
+
disabled={!debugEvents.length}
|
|
3959
|
+
onClick={copyRealtimeDebugLog}
|
|
3960
|
+
type="button"
|
|
3961
|
+
>
|
|
3962
|
+
<CopyIcon aria-hidden size={13} />
|
|
3963
|
+
</button>
|
|
3964
|
+
<button
|
|
3965
|
+
aria-label={debugRawCopyLabel}
|
|
3966
|
+
className={classes.debugCopyButton}
|
|
3967
|
+
data-tooltip="Copy raw event JSON"
|
|
3968
|
+
disabled={!debugEvents.length}
|
|
3969
|
+
onClick={copyRealtimeDebugRawLog}
|
|
3970
|
+
type="button"
|
|
3971
|
+
>
|
|
3972
|
+
<BracketsIcon aria-hidden size={13} />
|
|
3973
|
+
</button>
|
|
3974
|
+
</div>
|
|
3975
|
+
</div>
|
|
3976
|
+
{debugEvents.length > 0 ? (
|
|
3977
|
+
<ol className={classes.debugList}>
|
|
3978
|
+
{debugEvents.map((event) => (
|
|
3979
|
+
<li key={event.id}>
|
|
3980
|
+
<time>{event.time}</time>
|
|
3981
|
+
<strong>{event.label}</strong>
|
|
3982
|
+
<span>{event.detail}</span>
|
|
3983
|
+
</li>
|
|
3984
|
+
))}
|
|
3985
|
+
</ol>
|
|
3986
|
+
) : (
|
|
3987
|
+
<p className={classes.debugEmpty}>
|
|
3988
|
+
Waiting for VAD / output buffer events...
|
|
3989
|
+
</p>
|
|
3990
|
+
)}
|
|
3991
|
+
{dev ? (
|
|
3992
|
+
<div className={classes.debugApiSwitch} aria-label="Voice Chat API">
|
|
3993
|
+
<span>API:</span>
|
|
3994
|
+
<button
|
|
3995
|
+
aria-pressed={devApiTarget === 'local'}
|
|
3996
|
+
className={cx(
|
|
3997
|
+
classes.debugApiOption,
|
|
3998
|
+
devApiTarget === 'local' && classes.debugApiOptionActive
|
|
3999
|
+
)}
|
|
4000
|
+
onClick={() => setDevApiTarget('local')}
|
|
4001
|
+
type="button"
|
|
4002
|
+
>
|
|
4003
|
+
local
|
|
4004
|
+
</button>
|
|
4005
|
+
<span>|</span>
|
|
4006
|
+
<button
|
|
4007
|
+
aria-pressed={devApiTarget === 'production'}
|
|
4008
|
+
className={cx(
|
|
4009
|
+
classes.debugApiOption,
|
|
4010
|
+
devApiTarget === 'production' && classes.debugApiOptionActive
|
|
4011
|
+
)}
|
|
4012
|
+
onClick={() => setDevApiTarget('production')}
|
|
4013
|
+
type="button"
|
|
4014
|
+
>
|
|
4015
|
+
remote
|
|
4016
|
+
</button>
|
|
4017
|
+
</div>
|
|
4018
|
+
) : null}
|
|
4019
|
+
</section>
|
|
4020
|
+
) : null}
|
|
4021
|
+
|
|
4022
|
+
{sourceLinks.length ? (
|
|
4023
|
+
<section className={classes.sourcePanel} aria-label="Help articles">
|
|
4024
|
+
<button
|
|
4025
|
+
className={classes.sourceToggle}
|
|
4026
|
+
type="button"
|
|
4027
|
+
onClick={() => setSourcesOpen((current) => !current)}
|
|
4028
|
+
aria-expanded={sourcesOpen}
|
|
4029
|
+
>
|
|
4030
|
+
{sourceLinks.length === 1 ? 'Help article' : 'Help articles'}
|
|
4031
|
+
</button>
|
|
4032
|
+
{sourcesOpen ? (
|
|
4033
|
+
<div className={classes.sourceList}>
|
|
4034
|
+
{sourceLinks.map((link) => (
|
|
4035
|
+
<a
|
|
4036
|
+
key={`${link.articleId}:${link.url}`}
|
|
4037
|
+
href={link.url}
|
|
4038
|
+
target="_blank"
|
|
4039
|
+
rel="noreferrer"
|
|
4040
|
+
>
|
|
4041
|
+
{link.title}
|
|
4042
|
+
</a>
|
|
4043
|
+
))}
|
|
4044
|
+
</div>
|
|
4045
|
+
) : null}
|
|
4046
|
+
</section>
|
|
4047
|
+
) : null}
|
|
4048
|
+
|
|
4049
|
+
{/* biome-ignore lint/a11y/useMediaCaption: Remote assistant audio has no text track in this prototype. */}
|
|
4050
|
+
<audio className={classes.hiddenMedia} ref={remoteAudioRef} autoPlay />
|
|
4051
|
+
<video
|
|
4052
|
+
className={classes.captureVideo}
|
|
4053
|
+
ref={screenPreviewRef}
|
|
4054
|
+
muted
|
|
4055
|
+
playsInline
|
|
4056
|
+
/>
|
|
4057
|
+
<canvas ref={frameCanvasRef} hidden />
|
|
4058
|
+
</div>
|
|
4059
|
+
);
|
|
4060
|
+
}
|