ui-soxo-bootstrap-core 2.6.26 → 2.6.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/core/lib/components/global-header/global-header.js +3 -4
- package/core/lib/components/sidemenu/sidemenu.scss +1 -1
- package/core/lib/models/process/components/process-dashboard/process-dashboard.js +469 -3
- package/core/lib/models/process/components/process-dashboard/process-dashboard.scss +4 -0
- package/core/modules/steps/action-buttons.js +57 -47
- package/core/modules/steps/action-buttons.scss +45 -34
- package/core/modules/steps/chat-assistant.js +141 -0
- package/core/modules/steps/openai-realtime.js +275 -0
- package/core/modules/steps/readme.md +167 -0
- package/core/modules/steps/steps.js +1030 -89
- package/core/modules/steps/steps.scss +546 -285
- package/core/modules/steps/voice-navigation.js +709 -0
- package/package.json +1 -1
|
@@ -0,0 +1,709 @@
|
|
|
1
|
+
const GEMINI_LIVE_WS_ENDPOINT =
|
|
2
|
+
process.env.GEMINI_LIVE_WS_ENDPOINT ||
|
|
3
|
+
process.env.REACT_APP_GEMINI_LIVE_WS_ENDPOINT ||
|
|
4
|
+
'wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent';
|
|
5
|
+
|
|
6
|
+
const GEMINI_LIVE_MODEL =
|
|
7
|
+
process.env.GEMINI_LIVE_MODEL || process.env.REACT_APP_GEMINI_LIVE_MODEL || 'gemini-live-2.5-flash-preview';
|
|
8
|
+
|
|
9
|
+
const OPENAI_REALTIME_WS_ENDPOINT =
|
|
10
|
+
process.env.OPENAI_REALTIME_WS_ENDPOINT ||
|
|
11
|
+
process.env.REACT_APP_OPENAI_REALTIME_WS_ENDPOINT ||
|
|
12
|
+
process.env.OPENAI_REALTIME_ENDPOINT ||
|
|
13
|
+
process.env.REACT_APP_OPENAI_REALTIME_ENDPOINT ||
|
|
14
|
+
'wss://api.openai.com/v1/realtime';
|
|
15
|
+
|
|
16
|
+
const OPENAI_REALTIME_MODEL =
|
|
17
|
+
process.env.OPENAI_REALTIME_MODEL || process.env.REACT_APP_OPENAI_REALTIME_MODEL || 'gpt-realtime';
|
|
18
|
+
|
|
19
|
+
const OPENAI_REALTIME_INSTRUCTIONS =
|
|
20
|
+
process.env.OPENAI_REALTIME_INSTRUCTIONS ||
|
|
21
|
+
process.env.REACT_APP_OPENAI_REALTIME_INSTRUCTIONS ||
|
|
22
|
+
'You are a calm and clear healthcare onboarding assistant in a guided step-by-step workflow.';
|
|
23
|
+
|
|
24
|
+
const ALLOWED_ACTIONS = ['NEXT', 'PREVIOUS', 'SKIP', 'FINISH', 'GOTO', 'REPEAT', 'NONE'];
|
|
25
|
+
|
|
26
|
+
function normalizeModelName(modelName) {
|
|
27
|
+
if (!modelName) {
|
|
28
|
+
return 'models/gemini-live-2.5-flash-preview';
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
if (modelName.indexOf('models/') === 0) {
|
|
32
|
+
return modelName;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
return `models/${modelName}`;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function getGeminiApiKey() {
|
|
39
|
+
if (process.env.GEMINI_API_KEY) {
|
|
40
|
+
return process.env.GEMINI_API_KEY;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
if (process.env.REACT_APP_GEMINI_API_KEY) {
|
|
44
|
+
return process.env.REACT_APP_GEMINI_API_KEY;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
if (typeof window !== 'undefined') {
|
|
48
|
+
try {
|
|
49
|
+
if (window.localStorage) {
|
|
50
|
+
return window.localStorage.getItem('gemini_api_key');
|
|
51
|
+
}
|
|
52
|
+
} catch (error) {
|
|
53
|
+
return null;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
return null;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function getFromStorage(storageKey) {
|
|
61
|
+
if (typeof window === 'undefined' || !window.localStorage) {
|
|
62
|
+
return null;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
try {
|
|
66
|
+
return window.localStorage.getItem(storageKey);
|
|
67
|
+
} catch (error) {
|
|
68
|
+
return null;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
export function getOpenAIRealtimeApiKey() {
|
|
73
|
+
return (
|
|
74
|
+
process.env.OPEN_AI_KEY ||
|
|
75
|
+
process.env.OPENAI_API_KEY ||
|
|
76
|
+
process.env.REACT_APP_OPEN_AI_KEY ||
|
|
77
|
+
process.env.REACT_APP_OPENAI_API_KEY ||
|
|
78
|
+
getFromStorage('open_ai_key') ||
|
|
79
|
+
getFromStorage('openai_api_key') ||
|
|
80
|
+
getFromStorage('OPEN_AI_KEY') ||
|
|
81
|
+
getFromStorage('OPENAI_API_KEY') ||
|
|
82
|
+
getFromStorage('REACT_APP_OPEN_AI_KEY') ||
|
|
83
|
+
getFromStorage('REACT_APP_OPENAI_API_KEY')
|
|
84
|
+
);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function sanitizeJsonText(text) {
|
|
88
|
+
if (!text) {
|
|
89
|
+
return '';
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
return text.replace(/```json/gi, '').replace(/```/g, '').trim();
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function normalizeCommandPayload(payload = {}) {
|
|
96
|
+
const rawAction = String(payload.action || '').toUpperCase();
|
|
97
|
+
const action = ALLOWED_ACTIONS.indexOf(rawAction) !== -1 ? rawAction : 'NONE';
|
|
98
|
+
|
|
99
|
+
const rawStepNumber = payload.stepNumber !== undefined ? payload.stepNumber : payload.step_number;
|
|
100
|
+
const parsedStepNumber = Number.parseInt(rawStepNumber, 10);
|
|
101
|
+
const stepNumber = Number.isNaN(parsedStepNumber) ? null : parsedStepNumber;
|
|
102
|
+
|
|
103
|
+
const message = typeof payload.message === 'string' ? payload.message.trim() : '';
|
|
104
|
+
|
|
105
|
+
return {
|
|
106
|
+
action,
|
|
107
|
+
stepNumber,
|
|
108
|
+
message,
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
function extractJsonObject(text) {
|
|
113
|
+
const start = text.indexOf('{');
|
|
114
|
+
const end = text.lastIndexOf('}');
|
|
115
|
+
|
|
116
|
+
if (start === -1 || end === -1 || end <= start) {
|
|
117
|
+
return null;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
return text.slice(start, end + 1);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
function parseCommandFromModelText(modelText) {
|
|
124
|
+
const cleaned = sanitizeJsonText(modelText);
|
|
125
|
+
const directJson = extractJsonObject(cleaned);
|
|
126
|
+
|
|
127
|
+
if (directJson) {
|
|
128
|
+
try {
|
|
129
|
+
return normalizeCommandPayload(JSON.parse(directJson));
|
|
130
|
+
} catch (error) {
|
|
131
|
+
return null;
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
return null;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
export function fallbackVoiceCommand(transcript = '') {
|
|
139
|
+
const text = transcript.toLowerCase().trim();
|
|
140
|
+
|
|
141
|
+
const gotoMatch = text.match(/(?:go to|goto|open|jump to)\s+(?:step\s+)?(\d+)/);
|
|
142
|
+
if (gotoMatch && gotoMatch[1]) {
|
|
143
|
+
return {
|
|
144
|
+
action: 'GOTO',
|
|
145
|
+
stepNumber: Number.parseInt(gotoMatch[1], 10),
|
|
146
|
+
message: 'Navigating to requested step',
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
if (/(^|\s)(repeat|again|read step|read this step)(\s|$)/.test(text)) {
|
|
151
|
+
return { action: 'REPEAT', stepNumber: null, message: 'Repeating current step' };
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
if (/(^|\s)(previous|back|go back|last step)(\s|$)/.test(text)) {
|
|
155
|
+
return { action: 'PREVIOUS', stepNumber: null, message: 'Going to previous step' };
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
if (/(^|\s)(next|continue|forward|go next)(\s|$)/.test(text)) {
|
|
159
|
+
return { action: 'NEXT', stepNumber: null, message: 'Going to next step' };
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
if (/(^|\s)(skip|skip step)(\s|$)/.test(text)) {
|
|
163
|
+
return { action: 'SKIP', stepNumber: null, message: 'Skipping current step' };
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
if (/(^|\s)(finish|complete process|submit process|done)(\s|$)/.test(text)) {
|
|
167
|
+
return { action: 'FINISH', stepNumber: null, message: 'Finishing process' };
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
return { action: 'NONE', stepNumber: null, message: 'Command not recognized' };
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
function buildCommandPrompt({ transcript, context }) {
|
|
174
|
+
const {
|
|
175
|
+
activeStepNumber = 1,
|
|
176
|
+
totalSteps = 1,
|
|
177
|
+
isLastStep = false,
|
|
178
|
+
canSkip = false,
|
|
179
|
+
isStepCompleted = false,
|
|
180
|
+
} = context || {};
|
|
181
|
+
|
|
182
|
+
return [
|
|
183
|
+
'You are a strict command interpreter for a step wizard.',
|
|
184
|
+
'Return ONLY JSON with keys: action, stepNumber, message.',
|
|
185
|
+
'action must be one of: NEXT, PREVIOUS, SKIP, FINISH, GOTO, REPEAT, NONE.',
|
|
186
|
+
'stepNumber must be null unless action is GOTO.',
|
|
187
|
+
`Current step: ${activeStepNumber} of ${totalSteps}.`,
|
|
188
|
+
`Is last step: ${isLastStep ? 'yes' : 'no'}.`,
|
|
189
|
+
`Can skip current step: ${canSkip ? 'yes' : 'no'}.`,
|
|
190
|
+
`Current step completed: ${isStepCompleted ? 'yes' : 'no'}.`,
|
|
191
|
+
`User voice transcript: "${transcript}".`,
|
|
192
|
+
].join(' ');
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
export function createGeminiLiveNavigator({ apiKey, model, endpoint } = {}) {
|
|
196
|
+
const resolvedApiKey = apiKey || getGeminiApiKey();
|
|
197
|
+
const resolvedModel = normalizeModelName(model || GEMINI_LIVE_MODEL);
|
|
198
|
+
const resolvedEndpoint = endpoint || GEMINI_LIVE_WS_ENDPOINT;
|
|
199
|
+
|
|
200
|
+
let socket = null;
|
|
201
|
+
let isSetupComplete = false;
|
|
202
|
+
let setupInFlight = null;
|
|
203
|
+
let pendingCommand = null;
|
|
204
|
+
|
|
205
|
+
function cleanupPending(errorMessage) {
|
|
206
|
+
if (!pendingCommand) {
|
|
207
|
+
return;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
clearTimeout(pendingCommand.timeoutId);
|
|
211
|
+
|
|
212
|
+
const reject = pendingCommand.reject;
|
|
213
|
+
pendingCommand = null;
|
|
214
|
+
|
|
215
|
+
reject(new Error(errorMessage || 'Voice command interrupted'));
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
function close() {
|
|
219
|
+
cleanupPending('Gemini Live session closed');
|
|
220
|
+
|
|
221
|
+
if (socket && (socket.readyState === WebSocket.OPEN || socket.readyState === WebSocket.CONNECTING)) {
|
|
222
|
+
socket.close();
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
socket = null;
|
|
226
|
+
isSetupComplete = false;
|
|
227
|
+
setupInFlight = null;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
async function connect() {
|
|
231
|
+
if (!resolvedApiKey) {
|
|
232
|
+
throw new Error('Gemini API key is missing');
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
if (socket && socket.readyState === WebSocket.OPEN && isSetupComplete) {
|
|
236
|
+
return;
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
if (setupInFlight) {
|
|
240
|
+
return setupInFlight;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
setupInFlight = new Promise((resolve, reject) => {
|
|
244
|
+
const ws = new WebSocket(`${resolvedEndpoint}?key=${encodeURIComponent(resolvedApiKey)}`);
|
|
245
|
+
let settled = false;
|
|
246
|
+
let setupTimeoutId = null;
|
|
247
|
+
|
|
248
|
+
function safeResolve() {
|
|
249
|
+
if (settled) {
|
|
250
|
+
return;
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
settled = true;
|
|
254
|
+
clearTimeout(setupTimeoutId);
|
|
255
|
+
resolve();
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
function safeReject(error) {
|
|
259
|
+
if (settled) {
|
|
260
|
+
return;
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
settled = true;
|
|
264
|
+
clearTimeout(setupTimeoutId);
|
|
265
|
+
reject(error);
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
socket = ws;
|
|
269
|
+
|
|
270
|
+
ws.onopen = () => {
|
|
271
|
+
ws.send(
|
|
272
|
+
JSON.stringify({
|
|
273
|
+
setup: {
|
|
274
|
+
model: resolvedModel,
|
|
275
|
+
generationConfig: {
|
|
276
|
+
responseModalities: ['TEXT'],
|
|
277
|
+
temperature: 0.1,
|
|
278
|
+
maxOutputTokens: 256,
|
|
279
|
+
},
|
|
280
|
+
systemInstruction: {
|
|
281
|
+
parts: [
|
|
282
|
+
{
|
|
283
|
+
text:
|
|
284
|
+
'You convert spoken step navigation requests into strict JSON command output only.',
|
|
285
|
+
},
|
|
286
|
+
],
|
|
287
|
+
},
|
|
288
|
+
},
|
|
289
|
+
})
|
|
290
|
+
);
|
|
291
|
+
};
|
|
292
|
+
|
|
293
|
+
ws.onmessage = (event) => {
|
|
294
|
+
let message;
|
|
295
|
+
try {
|
|
296
|
+
message = JSON.parse(event.data);
|
|
297
|
+
} catch (error) {
|
|
298
|
+
return;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
if (message.setupComplete) {
|
|
302
|
+
isSetupComplete = true;
|
|
303
|
+
safeResolve();
|
|
304
|
+
return;
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
if (message.goAway) {
|
|
308
|
+
close();
|
|
309
|
+
safeReject(new Error('Gemini Live server closed the session'));
|
|
310
|
+
return;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
if (pendingCommand && message.serverContent) {
|
|
314
|
+
const parts =
|
|
315
|
+
message.serverContent.modelTurn &&
|
|
316
|
+
message.serverContent.modelTurn.parts &&
|
|
317
|
+
Array.isArray(message.serverContent.modelTurn.parts)
|
|
318
|
+
? message.serverContent.modelTurn.parts
|
|
319
|
+
: [];
|
|
320
|
+
|
|
321
|
+
parts.forEach((part) => {
|
|
322
|
+
if (part && typeof part.text === 'string') {
|
|
323
|
+
pendingCommand.buffer += part.text;
|
|
324
|
+
}
|
|
325
|
+
});
|
|
326
|
+
|
|
327
|
+
if (message.serverContent.turnComplete) {
|
|
328
|
+
const resolveCommand = pendingCommand.resolve;
|
|
329
|
+
const rejectCommand = pendingCommand.reject;
|
|
330
|
+
const buffer = pendingCommand.buffer;
|
|
331
|
+
|
|
332
|
+
clearTimeout(pendingCommand.timeoutId);
|
|
333
|
+
pendingCommand = null;
|
|
334
|
+
|
|
335
|
+
const parsed = parseCommandFromModelText(buffer);
|
|
336
|
+
if (parsed) {
|
|
337
|
+
resolveCommand(parsed);
|
|
338
|
+
} else {
|
|
339
|
+
rejectCommand(new Error('Unable to parse Gemini voice command response'));
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
};
|
|
344
|
+
|
|
345
|
+
ws.onerror = () => {
|
|
346
|
+
close();
|
|
347
|
+
safeReject(new Error('Gemini Live WebSocket error'));
|
|
348
|
+
};
|
|
349
|
+
|
|
350
|
+
ws.onclose = () => {
|
|
351
|
+
if (!settled) {
|
|
352
|
+
safeReject(new Error('Gemini Live WebSocket closed before setup'));
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
cleanupPending('Gemini Live WebSocket closed');
|
|
356
|
+
|
|
357
|
+
socket = null;
|
|
358
|
+
isSetupComplete = false;
|
|
359
|
+
};
|
|
360
|
+
|
|
361
|
+
setupTimeoutId = setTimeout(() => {
|
|
362
|
+
close();
|
|
363
|
+
safeReject(new Error('Gemini Live setup timed out'));
|
|
364
|
+
}, 10000);
|
|
365
|
+
});
|
|
366
|
+
|
|
367
|
+
try {
|
|
368
|
+
await setupInFlight;
|
|
369
|
+
} finally {
|
|
370
|
+
setupInFlight = null;
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
async function interpretCommand({ transcript, context }) {
|
|
375
|
+
await connect();
|
|
376
|
+
|
|
377
|
+
if (!socket || socket.readyState !== WebSocket.OPEN) {
|
|
378
|
+
throw new Error('Gemini Live socket is not connected');
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
if (pendingCommand) {
|
|
382
|
+
cleanupPending('A new command interrupted the previous command');
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
const prompt = buildCommandPrompt({ transcript, context });
|
|
386
|
+
|
|
387
|
+
const response = await new Promise((resolve, reject) => {
|
|
388
|
+
const timeoutId = setTimeout(() => {
|
|
389
|
+
pendingCommand = null;
|
|
390
|
+
reject(new Error('Gemini Live command timed out'));
|
|
391
|
+
}, 12000);
|
|
392
|
+
|
|
393
|
+
pendingCommand = {
|
|
394
|
+
buffer: '',
|
|
395
|
+
timeoutId,
|
|
396
|
+
resolve,
|
|
397
|
+
reject,
|
|
398
|
+
};
|
|
399
|
+
|
|
400
|
+
socket.send(
|
|
401
|
+
JSON.stringify({
|
|
402
|
+
clientContent: {
|
|
403
|
+
turns: [
|
|
404
|
+
{
|
|
405
|
+
role: 'user',
|
|
406
|
+
parts: [{ text: prompt }],
|
|
407
|
+
},
|
|
408
|
+
],
|
|
409
|
+
turnComplete: true,
|
|
410
|
+
},
|
|
411
|
+
})
|
|
412
|
+
);
|
|
413
|
+
});
|
|
414
|
+
|
|
415
|
+
return normalizeCommandPayload(response);
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
return {
|
|
419
|
+
interpretCommand,
|
|
420
|
+
close,
|
|
421
|
+
};
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
function extractOpenAIRealtimeText(message = {}) {
|
|
425
|
+
if (typeof message.delta === 'string' && message.delta.trim()) {
|
|
426
|
+
return message.delta;
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
if (typeof message.text === 'string' && message.text.trim()) {
|
|
430
|
+
return message.text;
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
if (message.item && Array.isArray(message.item.content)) {
|
|
434
|
+
return message.item.content
|
|
435
|
+
.map((part) => {
|
|
436
|
+
if (typeof part?.text === 'string') {
|
|
437
|
+
return part.text;
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
if (typeof part?.transcript === 'string') {
|
|
441
|
+
return part.transcript;
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
return '';
|
|
445
|
+
})
|
|
446
|
+
.filter(Boolean)
|
|
447
|
+
.join(' ');
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
if (message.response && Array.isArray(message.response.output)) {
|
|
451
|
+
return message.response.output
|
|
452
|
+
.flatMap((outputItem) => (Array.isArray(outputItem?.content) ? outputItem.content : []))
|
|
453
|
+
.map((part) => {
|
|
454
|
+
if (typeof part?.text === 'string') {
|
|
455
|
+
return part.text;
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
if (typeof part?.transcript === 'string') {
|
|
459
|
+
return part.transcript;
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
return '';
|
|
463
|
+
})
|
|
464
|
+
.filter(Boolean)
|
|
465
|
+
.join(' ');
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
return '';
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
export function createOpenAIRealtimeConversation({ apiKey, model, endpoint, instructions } = {}) {
|
|
472
|
+
const resolvedApiKey = apiKey || getOpenAIRealtimeApiKey();
|
|
473
|
+
const resolvedModel = model || OPENAI_REALTIME_MODEL;
|
|
474
|
+
const resolvedEndpoint = endpoint || OPENAI_REALTIME_WS_ENDPOINT;
|
|
475
|
+
const resolvedInstructions = instructions || OPENAI_REALTIME_INSTRUCTIONS;
|
|
476
|
+
|
|
477
|
+
const openAIOrganizationId = process.env.OPENAI_ORG_ID || process.env.REACT_APP_OPENAI_ORG_ID || '';
|
|
478
|
+
const openAIProjectId = process.env.OPENAI_PROJECT_ID || process.env.REACT_APP_OPENAI_PROJECT_ID || '';
|
|
479
|
+
|
|
480
|
+
let socket = null;
|
|
481
|
+
let sessionReady = false;
|
|
482
|
+
let connectInFlight = null;
|
|
483
|
+
let pendingResponse = null;
|
|
484
|
+
|
|
485
|
+
function clearPending(errorMessage) {
|
|
486
|
+
if (!pendingResponse) {
|
|
487
|
+
return;
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
clearTimeout(pendingResponse.timeoutId);
|
|
491
|
+
|
|
492
|
+
const rejectTurn = pendingResponse.reject;
|
|
493
|
+
pendingResponse = null;
|
|
494
|
+
|
|
495
|
+
rejectTurn(new Error(errorMessage || 'OpenAI Realtime conversation interrupted'));
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
function close() {
|
|
499
|
+
clearPending('OpenAI Realtime session closed');
|
|
500
|
+
|
|
501
|
+
if (socket && (socket.readyState === WebSocket.OPEN || socket.readyState === WebSocket.CONNECTING)) {
|
|
502
|
+
socket.close();
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
socket = null;
|
|
506
|
+
sessionReady = false;
|
|
507
|
+
connectInFlight = null;
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
async function connect() {
|
|
511
|
+
if (!resolvedApiKey) {
|
|
512
|
+
throw new Error('OpenAI Realtime API key is missing');
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
if (socket && socket.readyState === WebSocket.OPEN && sessionReady) {
|
|
516
|
+
return;
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
if (connectInFlight) {
|
|
520
|
+
return connectInFlight;
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
connectInFlight = new Promise((resolve, reject) => {
|
|
524
|
+
const protocols = ['realtime', `openai-insecure-api-key.${resolvedApiKey}`];
|
|
525
|
+
if (openAIOrganizationId) {
|
|
526
|
+
protocols.push(`openai-organization.${openAIOrganizationId}`);
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
if (openAIProjectId) {
|
|
530
|
+
protocols.push(`openai-project.${openAIProjectId}`);
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
const ws = new WebSocket(`${resolvedEndpoint}?model=${encodeURIComponent(resolvedModel)}`, protocols);
|
|
534
|
+
let settled = false;
|
|
535
|
+
let setupTimeoutId = null;
|
|
536
|
+
|
|
537
|
+
function safeResolve() {
|
|
538
|
+
if (settled) {
|
|
539
|
+
return;
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
settled = true;
|
|
543
|
+
clearTimeout(setupTimeoutId);
|
|
544
|
+
resolve();
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
function safeReject(error) {
|
|
548
|
+
if (settled) {
|
|
549
|
+
return;
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
settled = true;
|
|
553
|
+
clearTimeout(setupTimeoutId);
|
|
554
|
+
reject(error);
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
socket = ws;
|
|
558
|
+
|
|
559
|
+
ws.onopen = () => {
|
|
560
|
+
ws.send(
|
|
561
|
+
JSON.stringify({
|
|
562
|
+
type: 'session.update',
|
|
563
|
+
session: {
|
|
564
|
+
instructions: resolvedInstructions,
|
|
565
|
+
modalities: ['text'],
|
|
566
|
+
temperature: 0.6,
|
|
567
|
+
},
|
|
568
|
+
})
|
|
569
|
+
);
|
|
570
|
+
};
|
|
571
|
+
|
|
572
|
+
ws.onmessage = (event) => {
|
|
573
|
+
let message;
|
|
574
|
+
try {
|
|
575
|
+
message = JSON.parse(event.data);
|
|
576
|
+
} catch (error) {
|
|
577
|
+
return;
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
if (message.type === 'session.updated') {
|
|
581
|
+
sessionReady = true;
|
|
582
|
+
safeResolve();
|
|
583
|
+
return;
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
if (message.type === 'error') {
|
|
587
|
+
const errorMessage = message.error?.message || 'OpenAI Realtime WebSocket error';
|
|
588
|
+
if (!sessionReady) {
|
|
589
|
+
safeReject(new Error(errorMessage));
|
|
590
|
+
} else {
|
|
591
|
+
clearPending(errorMessage);
|
|
592
|
+
}
|
|
593
|
+
return;
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
if (!pendingResponse) {
|
|
597
|
+
return;
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
const chunk = extractOpenAIRealtimeText(message);
|
|
601
|
+
if (chunk) {
|
|
602
|
+
pendingResponse.buffer += chunk;
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
if (message.type === 'response.done') {
|
|
606
|
+
const resolveTurn = pendingResponse.resolve;
|
|
607
|
+
const rejectTurn = pendingResponse.reject;
|
|
608
|
+
const buffer = pendingResponse.buffer.trim();
|
|
609
|
+
|
|
610
|
+
clearTimeout(pendingResponse.timeoutId);
|
|
611
|
+
pendingResponse = null;
|
|
612
|
+
|
|
613
|
+
if (buffer) {
|
|
614
|
+
resolveTurn(buffer);
|
|
615
|
+
} else {
|
|
616
|
+
rejectTurn(new Error('OpenAI Realtime response did not include text'));
|
|
617
|
+
}
|
|
618
|
+
}
|
|
619
|
+
};
|
|
620
|
+
|
|
621
|
+
ws.onerror = () => {
|
|
622
|
+
close();
|
|
623
|
+
safeReject(new Error('OpenAI Realtime WebSocket connection error'));
|
|
624
|
+
};
|
|
625
|
+
|
|
626
|
+
ws.onclose = () => {
|
|
627
|
+
if (!settled && !sessionReady) {
|
|
628
|
+
safeReject(new Error('OpenAI Realtime WebSocket closed before setup'));
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
clearPending('OpenAI Realtime WebSocket closed');
|
|
632
|
+
socket = null;
|
|
633
|
+
sessionReady = false;
|
|
634
|
+
};
|
|
635
|
+
|
|
636
|
+
setupTimeoutId = setTimeout(() => {
|
|
637
|
+
close();
|
|
638
|
+
safeReject(new Error('OpenAI Realtime setup timed out'));
|
|
639
|
+
}, 10000);
|
|
640
|
+
});
|
|
641
|
+
|
|
642
|
+
try {
|
|
643
|
+
await connectInFlight;
|
|
644
|
+
} finally {
|
|
645
|
+
connectInFlight = null;
|
|
646
|
+
}
|
|
647
|
+
}
|
|
648
|
+
|
|
649
|
+
async function sendUserText({ text, context = '', responseInstructions = '' } = {}) {
|
|
650
|
+
const normalizedText = typeof text === 'string' ? text.trim() : '';
|
|
651
|
+
|
|
652
|
+
if (!normalizedText) {
|
|
653
|
+
throw new Error('User text is required');
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
await connect();
|
|
657
|
+
|
|
658
|
+
if (!socket || socket.readyState !== WebSocket.OPEN) {
|
|
659
|
+
throw new Error('OpenAI Realtime socket is not connected');
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
if (pendingResponse) {
|
|
663
|
+
clearPending('A new turn interrupted the previous response');
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
const prompt = context ? `${context}\n\nUser: ${normalizedText}` : normalizedText;
|
|
667
|
+
|
|
668
|
+
return new Promise((resolve, reject) => {
|
|
669
|
+
const timeoutId = setTimeout(() => {
|
|
670
|
+
pendingResponse = null;
|
|
671
|
+
reject(new Error('OpenAI Realtime response timed out'));
|
|
672
|
+
}, 25000);
|
|
673
|
+
|
|
674
|
+
pendingResponse = {
|
|
675
|
+
buffer: '',
|
|
676
|
+
timeoutId,
|
|
677
|
+
resolve,
|
|
678
|
+
reject,
|
|
679
|
+
};
|
|
680
|
+
|
|
681
|
+
socket.send(
|
|
682
|
+
JSON.stringify({
|
|
683
|
+
type: 'conversation.item.create',
|
|
684
|
+
item: {
|
|
685
|
+
type: 'message',
|
|
686
|
+
role: 'user',
|
|
687
|
+
content: [{ type: 'input_text', text: prompt }],
|
|
688
|
+
},
|
|
689
|
+
})
|
|
690
|
+
);
|
|
691
|
+
|
|
692
|
+
socket.send(
|
|
693
|
+
JSON.stringify({
|
|
694
|
+
type: 'response.create',
|
|
695
|
+
response: {
|
|
696
|
+
modalities: ['text'],
|
|
697
|
+
...(responseInstructions ? { instructions: responseInstructions } : {}),
|
|
698
|
+
},
|
|
699
|
+
})
|
|
700
|
+
);
|
|
701
|
+
});
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
return {
|
|
705
|
+
connect,
|
|
706
|
+
sendUserText,
|
|
707
|
+
close,
|
|
708
|
+
};
|
|
709
|
+
}
|