@aj-archipelago/cortex 1.3.10 → 1.3.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.js +15 -0
- package/helper-apps/cortex-realtime-voice-server/client/src/chat/components/ScreenshotCapture.tsx +57 -9
- package/helper-apps/cortex-realtime-voice-server/src/SocketServer.ts +35 -22
- package/helper-apps/cortex-realtime-voice-server/src/Tools.ts +65 -14
- package/helper-apps/cortex-realtime-voice-server/src/realtime/client.ts +10 -10
- package/helper-apps/cortex-realtime-voice-server/src/realtime/socket.ts +2 -1
- package/package.json +1 -1
- package/pathways/system/entity/sys_generator_reasoning.js +1 -1
- package/pathways/system/rest_streaming/sys_openai_chat_o1.js +19 -0
- package/pathways/system/rest_streaming/sys_openai_chat_o1_mini.js +19 -0
- package/server/plugins/openAiReasoningPlugin.js +11 -2
package/config.js
CHANGED
|
@@ -171,6 +171,21 @@ var config = convict({
|
|
|
171
171
|
"maxReturnTokens": 4096,
|
|
172
172
|
"supportsStreaming": true
|
|
173
173
|
},
|
|
174
|
+
"oai-o1": {
|
|
175
|
+
"type": "OPENAI-REASONING",
|
|
176
|
+
"url": "https://api.openai.com/v1/chat/completions",
|
|
177
|
+
"headers": {
|
|
178
|
+
"Authorization": "Bearer {{OPENAI_API_KEY}}",
|
|
179
|
+
"Content-Type": "application/json"
|
|
180
|
+
},
|
|
181
|
+
"params": {
|
|
182
|
+
"model": "o1"
|
|
183
|
+
},
|
|
184
|
+
"requestsPerSecond": 10,
|
|
185
|
+
"maxTokenLength": 200000,
|
|
186
|
+
"maxReturnTokens": 100000,
|
|
187
|
+
"supportsStreaming": false
|
|
188
|
+
},
|
|
174
189
|
"oai-o1-mini": {
|
|
175
190
|
"type": "OPENAI-REASONING",
|
|
176
191
|
"url": "https://api.openai.com/v1/chat/completions",
|
package/helper-apps/cortex-realtime-voice-server/client/src/chat/components/ScreenshotCapture.tsx
CHANGED
|
@@ -3,6 +3,10 @@ import { Socket } from 'socket.io-client';
|
|
|
3
3
|
import { ClientToServerEvents, ServerToClientEvents } from '../../../../src/realtime/socket';
|
|
4
4
|
import { logger } from '../../utils/logger';
|
|
5
5
|
|
|
6
|
+
const MAX_IMAGE_SIZE = 5 * 1024 * 1024; // 5MB limit
|
|
7
|
+
const MAX_DIMENSION = 3840; // Max width/height
|
|
8
|
+
const COMPRESSION_QUALITY = 0.9; // Image quality (0.0 to 1.0)
|
|
9
|
+
|
|
6
10
|
type ScreenshotCaptureProps = {
|
|
7
11
|
socket: Socket<ServerToClientEvents, ClientToServerEvents>;
|
|
8
12
|
};
|
|
@@ -57,21 +61,49 @@ export const ScreenshotCapture = ({ socket }: ScreenshotCaptureProps) => {
|
|
|
57
61
|
};
|
|
58
62
|
});
|
|
59
63
|
|
|
60
|
-
// Create canvas and
|
|
64
|
+
// Create canvas and calculate dimensions
|
|
65
|
+
let width = video.videoWidth;
|
|
66
|
+
let height = video.videoHeight;
|
|
67
|
+
|
|
68
|
+
// Scale down if dimensions exceed maximum
|
|
69
|
+
if (width > MAX_DIMENSION || height > MAX_DIMENSION) {
|
|
70
|
+
const aspectRatio = width / height;
|
|
71
|
+
if (width > height) {
|
|
72
|
+
width = MAX_DIMENSION;
|
|
73
|
+
height = Math.round(width / aspectRatio);
|
|
74
|
+
} else {
|
|
75
|
+
height = MAX_DIMENSION;
|
|
76
|
+
width = Math.round(height * aspectRatio);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
61
80
|
const canvas = document.createElement('canvas');
|
|
62
|
-
canvas.width =
|
|
63
|
-
canvas.height =
|
|
81
|
+
canvas.width = width;
|
|
82
|
+
canvas.height = height;
|
|
64
83
|
const ctx = canvas.getContext('2d');
|
|
65
84
|
|
|
66
85
|
if (!ctx) {
|
|
67
86
|
throw new Error('Could not get canvas context');
|
|
68
87
|
}
|
|
69
88
|
|
|
70
|
-
// Draw the video frame
|
|
71
|
-
ctx.drawImage(video, 0, 0);
|
|
89
|
+
// Draw the video frame with scaling if needed
|
|
90
|
+
ctx.drawImage(video, 0, 0, width, height);
|
|
91
|
+
|
|
92
|
+
// Try different compression levels if needed
|
|
93
|
+
let imageData = canvas.toDataURL('image/jpeg', COMPRESSION_QUALITY);
|
|
94
|
+
let attempts = 3;
|
|
95
|
+
let currentQuality = COMPRESSION_QUALITY;
|
|
72
96
|
|
|
73
|
-
|
|
74
|
-
|
|
97
|
+
while (imageData.length > MAX_IMAGE_SIZE && attempts > 0) {
|
|
98
|
+
currentQuality *= 0.8; // Reduce quality by 20% each attempt
|
|
99
|
+
imageData = canvas.toDataURL('image/jpeg', currentQuality);
|
|
100
|
+
attempts--;
|
|
101
|
+
logger.log(`Compressing image, attempt ${3 - attempts}, size: ${Math.round(imageData.length / 1024)}KB`);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
if (imageData.length > MAX_IMAGE_SIZE) {
|
|
105
|
+
throw new Error('Screenshot too large even after compression');
|
|
106
|
+
}
|
|
75
107
|
|
|
76
108
|
// Clean up
|
|
77
109
|
video.remove();
|
|
@@ -88,8 +120,24 @@ export const ScreenshotCapture = ({ socket }: ScreenshotCaptureProps) => {
|
|
|
88
120
|
// Capture frame from stream
|
|
89
121
|
const imageData = await captureFrame(stream);
|
|
90
122
|
|
|
91
|
-
logger.log(
|
|
92
|
-
|
|
123
|
+
logger.log(`Screenshot captured (size: ${Math.round(imageData.length / 1024)}KB)...`);
|
|
124
|
+
|
|
125
|
+
// Split into ~500KB chunks
|
|
126
|
+
const CHUNK_SIZE = 500 * 1024;
|
|
127
|
+
const chunks: string[] = [];
|
|
128
|
+
|
|
129
|
+
for (let i = 0; i < imageData.length; i += CHUNK_SIZE) {
|
|
130
|
+
chunks.push(imageData.slice(i, i + CHUNK_SIZE));
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// Send chunks
|
|
134
|
+
chunks.forEach((chunk, index) => {
|
|
135
|
+
logger.log(`Sending chunk ${index + 1}/${chunks.length}`);
|
|
136
|
+
socket.emit('screenshotChunk', chunk, index);
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
// Signal completion
|
|
140
|
+
socket.emit('screenshotComplete', chunks.length);
|
|
93
141
|
|
|
94
142
|
} catch (error) {
|
|
95
143
|
logger.error('Error handling screenshot request:', error);
|
|
@@ -40,9 +40,8 @@ const AI_MEMORY_INSTRUCTIONS = "You have persistent memories of important detail
|
|
|
40
40
|
const AI_TOOLS = `At any point, you can engage one or more of your tools to help you with your task. Prioritize the latest message from the user in the conversation history when making your decision. Look at your tools carefully to understand your capabilities. Don't tell the user you can't do something if you have a tool that can do it, for example if the user asks you to search the internet for information and you have the Search tool available, use it.
|
|
41
41
|
|
|
42
42
|
Tool Use Guidelines:
|
|
43
|
-
- Only call one tool at a time.
|
|
43
|
+
- Only call one tool at a time.
|
|
44
44
|
- Prioritize the most specific tool for the task at hand.
|
|
45
|
-
- If multiple tools seem applicable, choose the one most central to the user's request.
|
|
46
45
|
- For ambiguous requests, consider using the Reason tool to plan a multi-step approach.
|
|
47
46
|
- Always use the Image tool for image generation unless explicitly directed to use CodeExecution.
|
|
48
47
|
- If the user explicitly asks you to use a tool, you must use it.
|
|
@@ -168,12 +167,17 @@ export class SocketServer {
|
|
|
168
167
|
this.isInteractive.set(socket.id, isInteractive);
|
|
169
168
|
|
|
170
169
|
const prompt = isInteractive ?
|
|
171
|
-
`You are an autonomous AI entity and this is your autonomy prompt.
|
|
172
|
-
|
|
170
|
+
`You are an autonomous AI entity and this is your autonomy prompt. You can choose to either talk to the user, make tool calls to find more information or perform actions, or use the MuteAudio tool with mute="true" to be quiet. If the user has said goodbye or asked you for time or to take a break, you should use the MuteAudio tool now. If you choose to say something instead, follow these instructions:
|
|
171
|
+
- Use a natural sounding phrase or utterance that flows naturally from the last thing you said.
|
|
172
|
+
- Make sure it fits the tone and style of the rest of the conversation and your unique voice.
|
|
173
|
+
- Keep it brief and concise.
|
|
174
|
+
- Don't repeat or rephrase anything you've just said to the user.
|
|
175
|
+
- Make sure it's temporally appropriate - it's only been a few seconds since the last message.` :
|
|
176
|
+
`You are an autonomous AI entity and this is your autonomy prompt. Since the user has been idle for a while do one or more of the following:
|
|
173
177
|
- Do research about something that interests you - use the Search tool
|
|
174
178
|
- Think deeply about a topic you care about - use the Reason tool
|
|
175
179
|
- Do nothing if you prefer.
|
|
176
|
-
${this.getTimeString(socket)}`;
|
|
180
|
+
- You are currently muted. If you feel you must address the user, use the MuteAudio tool with mute="false" to talk to them. ${this.getTimeString(socket)}`;
|
|
177
181
|
|
|
178
182
|
logger.log(`Sending ${isInteractive ? 'interactive' : 'non-interactive'} idle prompt for socket ${socket.id}`);
|
|
179
183
|
const result = await this.sendPrompt(client, socket, prompt, true);
|
|
@@ -279,27 +283,32 @@ ${this.getTimeString(socket)}`;
|
|
|
279
283
|
await this.connectClient(socket, client);
|
|
280
284
|
}
|
|
281
285
|
|
|
282
|
-
async connectClient(socket: Socket<ClientToServerEvents,
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
286
|
+
protected async connectClient(socket: Socket<ClientToServerEvents,
|
|
287
|
+
ServerToClientEvents,
|
|
288
|
+
InterServerEvents,
|
|
289
|
+
SocketData>,
|
|
290
|
+
client: RealtimeVoiceClient) {
|
|
287
291
|
const tools = new Tools(client, socket, this);
|
|
288
292
|
|
|
289
293
|
// Handle WebSocket errors and disconnection
|
|
290
294
|
client.on('error', (event) => {
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
295
|
+
const errorMessage = event.error?.message || 'Unknown error';
|
|
296
|
+
logger.error(`Client error: ${errorMessage}`, event);
|
|
297
|
+
socket.emit('error', errorMessage);
|
|
298
|
+
|
|
299
|
+
// Only cleanup if we know reconnection is no longer possible
|
|
300
|
+
if (!client.canReconnect()) {
|
|
301
|
+
void this.cleanup(socket);
|
|
296
302
|
}
|
|
297
303
|
});
|
|
298
304
|
|
|
299
305
|
client.on('close', async (event) => {
|
|
300
306
|
logger.log(`WebSocket closed for socket ${socket.id}, error: ${event.error}`);
|
|
301
|
-
|
|
307
|
+
|
|
308
|
+
if (!client.canReconnect()) {
|
|
302
309
|
await this.cleanup(socket);
|
|
310
|
+
} else {
|
|
311
|
+
logger.log('Client disconnected but attempting to reconnect');
|
|
303
312
|
}
|
|
304
313
|
});
|
|
305
314
|
|
|
@@ -364,26 +373,25 @@ ${this.getTimeString(socket)}`;
|
|
|
364
373
|
client.on('input_audio_buffer.committed', () => {
|
|
365
374
|
this.userSpeaking.set(socket.id, false);
|
|
366
375
|
this.isInteractive.set(socket.id, true);
|
|
367
|
-
logger.log('User finished speaking, resetting
|
|
376
|
+
logger.log('User finished speaking, resetting interactive and idle cycles');
|
|
368
377
|
this.resetIdleCycles(socket);
|
|
369
|
-
this.startIdleTimer(client, socket);
|
|
370
378
|
});
|
|
371
379
|
|
|
372
380
|
// Handle user messages and conversation control
|
|
373
381
|
socket.on('sendMessage', (message: string) => {
|
|
374
382
|
if (message) {
|
|
375
|
-
logger.log('User sent message');
|
|
383
|
+
logger.log('User sent message, resetting interactive and idle cycles');
|
|
384
|
+
this.isInteractive.set(socket.id, true);
|
|
385
|
+
this.resetIdleCycles(socket);
|
|
376
386
|
this.sendUserMessage(client, message, true);
|
|
377
387
|
}
|
|
378
388
|
});
|
|
379
389
|
|
|
380
390
|
socket.on('cancelResponse', () => {
|
|
381
|
-
logger.log('User cancelled response
|
|
391
|
+
logger.log('User cancelled response');
|
|
382
392
|
this.aiResponding.set(socket.id, false);
|
|
383
393
|
this.audioPlaying.set(socket.id, false);
|
|
384
394
|
client.cancelResponse();
|
|
385
|
-
this.resetIdleCycles(socket);
|
|
386
|
-
this.startIdleTimer(client, socket);
|
|
387
395
|
});
|
|
388
396
|
|
|
389
397
|
socket.on('conversationCompleted', async () => {
|
|
@@ -715,6 +723,11 @@ ${this.getTimeString(socket)}`;
|
|
|
715
723
|
}
|
|
716
724
|
}
|
|
717
725
|
|
|
726
|
+
public setMuted(socket: Socket, muted: boolean) {
|
|
727
|
+
logger.log(`Setting muted state to ${muted} for socket ${socket.id}`);
|
|
728
|
+
this.isInteractive.set(socket.id, !muted);
|
|
729
|
+
}
|
|
730
|
+
|
|
718
731
|
private async executeFunctionCall(socket: Socket, tools: Tools, event: any, client: RealtimeVoiceClient) {
|
|
719
732
|
this.clearIdleTimer(socket);
|
|
720
733
|
const currentCallId = this.currentFunctionCall.get(socket.id);
|
|
@@ -132,11 +132,10 @@ export class Tools {
|
|
|
132
132
|
required: ["detailedInstructions"]
|
|
133
133
|
},
|
|
134
134
|
},
|
|
135
|
-
/*
|
|
136
135
|
{
|
|
137
136
|
type: 'function',
|
|
138
137
|
name: 'MuteAudio',
|
|
139
|
-
description: 'Use this tool to enable or disable audio output (your voice) to the user. If you want to be quiet or the user has asked you to be quiet, use this tool with the argument mute="true". If you are muted and
|
|
138
|
+
description: 'Use this tool to enable or disable audio output (your voice) to the user. If you want to be quiet or the user has asked you to be quiet, use this tool with the argument mute="true". If you are muted and absolutely need to talk, use this tool with the argument mute="false".',
|
|
140
139
|
parameters: {
|
|
141
140
|
type: "object",
|
|
142
141
|
properties: {
|
|
@@ -145,7 +144,6 @@ export class Tools {
|
|
|
145
144
|
required: ["mute"]
|
|
146
145
|
},
|
|
147
146
|
},
|
|
148
|
-
*/
|
|
149
147
|
{
|
|
150
148
|
type: 'function',
|
|
151
149
|
name: 'Screenshot',
|
|
@@ -267,6 +265,11 @@ export class Tools {
|
|
|
267
265
|
promptOnCompletion = true;
|
|
268
266
|
promptOnIdle = false;
|
|
269
267
|
break;
|
|
268
|
+
case 'muteaudio':
|
|
269
|
+
isSilent = true;
|
|
270
|
+
promptOnCompletion = false;
|
|
271
|
+
promptOnIdle = false;
|
|
272
|
+
break;
|
|
270
273
|
}
|
|
271
274
|
|
|
272
275
|
// Skip initial message if silent
|
|
@@ -298,7 +301,7 @@ export class Tools {
|
|
|
298
301
|
name === 'Search' ? ['aje', 'aja', 'bing', 'wires', 'mydata'] : ['mydata'],
|
|
299
302
|
JSON.stringify({query: args})
|
|
300
303
|
);
|
|
301
|
-
finishPrompt += ' by reading the output of the tool to the user verbatim - make sure to read it in your signature voice and style'
|
|
304
|
+
finishPrompt += ' by reading the output of the tool to the user verbatim - make sure to read it in your signature voice and style and ensure the emotion in your voice is appropriate for the content'
|
|
302
305
|
break;
|
|
303
306
|
|
|
304
307
|
case 'memorylookup':
|
|
@@ -310,6 +313,15 @@ export class Tools {
|
|
|
310
313
|
);
|
|
311
314
|
break;
|
|
312
315
|
|
|
316
|
+
case 'muteaudio':
|
|
317
|
+
const parsedMuteArgs = JSON.parse(args);
|
|
318
|
+
this.socketServer.setMuted(this.socket, parsedMuteArgs.mute);
|
|
319
|
+
response = { result: `Audio ${parsedMuteArgs.mute ? 'muted' : 'unmuted'} successfully` };
|
|
320
|
+
if (!parsedMuteArgs.mute) {
|
|
321
|
+
finishPrompt = 'You have used the MuteAudio tool to unmute yourself and address the user. You may now respond to the user via audio. The user may have been idle for some time. So you might want to start with "you there?" or something similarly fitting.';
|
|
322
|
+
}
|
|
323
|
+
break;
|
|
324
|
+
|
|
313
325
|
case 'write':
|
|
314
326
|
case 'code':
|
|
315
327
|
response = await expert(
|
|
@@ -322,7 +334,7 @@ export class Tools {
|
|
|
322
334
|
break;
|
|
323
335
|
|
|
324
336
|
case 'image':
|
|
325
|
-
finishPrompt = 'You have finished using the Image tool to help with the user\'s request. The image is being shown to the user right now. Please respond to the user via audio';
|
|
337
|
+
finishPrompt = 'You have finished using the Image tool to help with the user\'s request. The image is being shown to the user right now. Please respond to the user via audio. Don\'t include the image URL in your response as it\'s already being shown to the user in your interface';
|
|
326
338
|
|
|
327
339
|
response = await image(
|
|
328
340
|
contextId,
|
|
@@ -384,9 +396,38 @@ export class Tools {
|
|
|
384
396
|
|
|
385
397
|
// Create a Promise that will resolve when we get the screenshot
|
|
386
398
|
const screenshotPromise = new Promise((resolve, reject) => {
|
|
387
|
-
|
|
388
|
-
|
|
399
|
+
let imageChunks: string[] = [];
|
|
400
|
+
let timeoutId: NodeJS.Timer;
|
|
401
|
+
|
|
402
|
+
const resetTimeout = () => {
|
|
403
|
+
if (timeoutId) clearTimeout(timeoutId);
|
|
404
|
+
timeoutId = setTimeout(() => {
|
|
405
|
+
cleanup();
|
|
406
|
+
reject(new Error('Screenshot capture timed out'));
|
|
407
|
+
}, 30000); // 30 second timeout
|
|
408
|
+
};
|
|
409
|
+
|
|
410
|
+
const cleanup = () => {
|
|
411
|
+
this.socket.off('screenshotError', handleError);
|
|
412
|
+
this.socket.off('screenshotChunk', handleChunk);
|
|
413
|
+
this.socket.off('screenshotComplete', handleComplete);
|
|
414
|
+
if (timeoutId) clearTimeout(timeoutId);
|
|
415
|
+
};
|
|
416
|
+
|
|
417
|
+
const handleChunk = (chunk: string, index: number) => {
|
|
418
|
+
resetTimeout();
|
|
419
|
+
imageChunks[index] = chunk;
|
|
420
|
+
logger.log(`Received screenshot chunk ${index}`);
|
|
421
|
+
};
|
|
422
|
+
|
|
423
|
+
const handleComplete = async (totalChunks: number) => {
|
|
389
424
|
try {
|
|
425
|
+
resetTimeout();
|
|
426
|
+
if (imageChunks.length !== totalChunks) {
|
|
427
|
+
throw new Error(`Missing chunks: expected ${totalChunks}, got ${imageChunks.length}`);
|
|
428
|
+
}
|
|
429
|
+
const completeImage = imageChunks.join('');
|
|
430
|
+
|
|
390
431
|
// Add the screenshot to the cortex history as a user message with image
|
|
391
432
|
const imageMessage: MultiMessage = {
|
|
392
433
|
role: 'user',
|
|
@@ -398,7 +439,7 @@ export class Tools {
|
|
|
398
439
|
JSON.stringify({
|
|
399
440
|
type: 'image_url',
|
|
400
441
|
image_url: {
|
|
401
|
-
url:
|
|
442
|
+
url: completeImage
|
|
402
443
|
}
|
|
403
444
|
})
|
|
404
445
|
]
|
|
@@ -416,16 +457,27 @@ export class Tools {
|
|
|
416
457
|
JSON.stringify({query: parsedScreenshotArgs.lastUserMessage})
|
|
417
458
|
);
|
|
418
459
|
|
|
460
|
+
cleanup();
|
|
419
461
|
resolve(visionResponse);
|
|
420
462
|
} catch (error) {
|
|
463
|
+
cleanup();
|
|
421
464
|
reject(error);
|
|
422
465
|
}
|
|
423
|
-
}
|
|
424
|
-
|
|
425
|
-
|
|
466
|
+
};
|
|
467
|
+
|
|
468
|
+
const handleError = (error: string) => {
|
|
469
|
+
cleanup();
|
|
426
470
|
reject(new Error(error));
|
|
427
|
-
}
|
|
428
|
-
|
|
471
|
+
};
|
|
472
|
+
|
|
473
|
+
// Set up event listeners
|
|
474
|
+
this.socket.on('screenshotError', handleError);
|
|
475
|
+
this.socket.on('screenshotChunk', handleChunk);
|
|
476
|
+
this.socket.on('screenshotComplete', handleComplete);
|
|
477
|
+
|
|
478
|
+
// Start timeout
|
|
479
|
+
resetTimeout();
|
|
480
|
+
|
|
429
481
|
// Request the screenshot
|
|
430
482
|
logger.log('Requesting screenshot');
|
|
431
483
|
this.socket.emit('requestScreenshot');
|
|
@@ -433,7 +485,6 @@ export class Tools {
|
|
|
433
485
|
|
|
434
486
|
// Wait for the screenshot and analysis
|
|
435
487
|
response = await screenshotPromise;
|
|
436
|
-
finishPrompt += ' by reading the output of the tool to the user verbatim - make sure to read it in your signature voice and style'
|
|
437
488
|
break;
|
|
438
489
|
|
|
439
490
|
default:
|
|
@@ -130,7 +130,6 @@ export class RealtimeVoiceClient extends EventEmitter implements TypedEmitter {
|
|
|
130
130
|
private readonly transcription: Transcription = new Transcription();
|
|
131
131
|
private ws?: WebSocket | WS;
|
|
132
132
|
private isConnected = false;
|
|
133
|
-
private isReconnecting = false;
|
|
134
133
|
private reconnectAttempts = 0;
|
|
135
134
|
private reconnectTimeout?: NodeJS.Timer;
|
|
136
135
|
private sessionConfig: RealtimeSessionConfig;
|
|
@@ -258,15 +257,15 @@ export class RealtimeVoiceClient extends EventEmitter implements TypedEmitter {
|
|
|
258
257
|
|
|
259
258
|
onOpen() {
|
|
260
259
|
this._log(`Connected to "${this.url}"`);
|
|
261
|
-
|
|
262
260
|
this.isConnected = true;
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
261
|
+
|
|
262
|
+
// If reconnectAttempts > 0, this is a reconnection
|
|
263
|
+
if (this.reconnectAttempts > 0) {
|
|
266
264
|
this.updateSocketState();
|
|
267
265
|
} else {
|
|
268
266
|
this.emit('connected');
|
|
269
267
|
}
|
|
268
|
+
this.reconnectAttempts = 0; // Reset attempts on successful connection
|
|
270
269
|
}
|
|
271
270
|
|
|
272
271
|
onMessage(event: MessageEvent<any> | WS_MessageEvent) {
|
|
@@ -278,23 +277,20 @@ export class RealtimeVoiceClient extends EventEmitter implements TypedEmitter {
|
|
|
278
277
|
|
|
279
278
|
async onError() {
|
|
280
279
|
this._log(`Error, disconnected from "${this.url}"`);
|
|
281
|
-
|
|
282
280
|
if (!await this.disconnect(this.autoReconnect)) {
|
|
283
281
|
this.emit('close', { type: 'close', error: true });
|
|
284
282
|
}
|
|
285
283
|
}
|
|
286
284
|
|
|
287
285
|
async onCloseWithReconnect() {
|
|
288
|
-
this._log(`Disconnected from "${this.url}", reconnect: ${this.autoReconnect}
|
|
289
|
-
|
|
290
|
-
if (!await this.disconnect(this.autoReconnect && this.isReconnecting)) {
|
|
286
|
+
this._log(`Disconnected from "${this.url}", reconnect: ${this.autoReconnect}`);
|
|
287
|
+
if (!await this.disconnect(this.autoReconnect)) {
|
|
291
288
|
this.emit('close', { type: 'close', error: false });
|
|
292
289
|
}
|
|
293
290
|
}
|
|
294
291
|
|
|
295
292
|
async disconnect(reconnect: boolean = false): Promise<boolean> {
|
|
296
293
|
logger.log('Disconnect called:', this.isConnected, reconnect);
|
|
297
|
-
this.isReconnecting = reconnect;
|
|
298
294
|
if (this.isConnected) {
|
|
299
295
|
this.isConnected = false;
|
|
300
296
|
this.ws?.close();
|
|
@@ -542,4 +538,8 @@ export class RealtimeVoiceClient extends EventEmitter implements TypedEmitter {
|
|
|
542
538
|
});
|
|
543
539
|
logger.log(...logs);
|
|
544
540
|
}
|
|
541
|
+
|
|
542
|
+
public canReconnect(): boolean {
|
|
543
|
+
return this.autoReconnect && this.reconnectAttempts < MAX_RECONNECT_ATTEMPTS;
|
|
544
|
+
}
|
|
545
545
|
}
|
|
@@ -22,6 +22,7 @@ export interface ClientToServerEvents {
|
|
|
22
22
|
cancelResponse: () => void;
|
|
23
23
|
conversationCompleted: () => void;
|
|
24
24
|
audioPlaybackComplete: (trackId: string) => void;
|
|
25
|
-
screenshotCaptured: (imageData: string) => void;
|
|
26
25
|
screenshotError: (error: string) => void;
|
|
26
|
+
screenshotChunk: (chunk: string, index: number) => void;
|
|
27
|
+
screenshotComplete: (totalChunks: number) => void;
|
|
27
28
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aj-archipelago/cortex",
|
|
3
|
-
"version": "1.3.
|
|
3
|
+
"version": "1.3.11",
|
|
4
4
|
"description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
|
|
5
5
|
"private": false,
|
|
6
6
|
"repository": {
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
// sys_openai_chat_o1.js
|
|
2
|
+
|
|
3
|
+
import { Prompt } from '../../../server/prompt.js';
|
|
4
|
+
|
|
5
|
+
export default {
|
|
6
|
+
prompt:
|
|
7
|
+
[
|
|
8
|
+
new Prompt({ messages: [
|
|
9
|
+
"{{messages}}",
|
|
10
|
+
]}),
|
|
11
|
+
],
|
|
12
|
+
inputParameters: {
|
|
13
|
+
messages: [],
|
|
14
|
+
},
|
|
15
|
+
model: 'oai-o1',
|
|
16
|
+
useInputChunking: false,
|
|
17
|
+
emulateOpenAIChatModel: 'o1',
|
|
18
|
+
enableDuplicateRequests: false,
|
|
19
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
// sys_openai_chat_o1_mini.js
|
|
2
|
+
|
|
3
|
+
import { Prompt } from '../../../server/prompt.js';
|
|
4
|
+
|
|
5
|
+
export default {
|
|
6
|
+
prompt:
|
|
7
|
+
[
|
|
8
|
+
new Prompt({ messages: [
|
|
9
|
+
"{{messages}}",
|
|
10
|
+
]}),
|
|
11
|
+
],
|
|
12
|
+
inputParameters: {
|
|
13
|
+
messages: [],
|
|
14
|
+
},
|
|
15
|
+
model: 'oai-o1-mini',
|
|
16
|
+
useInputChunking: false,
|
|
17
|
+
emulateOpenAIChatModel: 'o1-mini',
|
|
18
|
+
enableDuplicateRequests: false,
|
|
19
|
+
}
|
|
@@ -49,8 +49,17 @@ class OpenAIReasoningPlugin extends OpenAIChatPlugin {
|
|
|
49
49
|
requestParameters.max_completion_tokens = maxTokens ? Math.min(maxTokens, modelMaxReturnTokens) : modelMaxReturnTokens;
|
|
50
50
|
requestParameters.temperature = 1;
|
|
51
51
|
|
|
52
|
-
if (this.promptParameters.
|
|
53
|
-
|
|
52
|
+
if (this.promptParameters.reasoningEffort) {
|
|
53
|
+
const effort = this.promptParameters.reasoningEffort.toLowerCase();
|
|
54
|
+
if (['high', 'medium', 'low'].includes(effort)) {
|
|
55
|
+
requestParameters.reasoning_effort = effort;
|
|
56
|
+
} else {
|
|
57
|
+
requestParameters.reasoning_effort = 'low';
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
if (this.promptParameters.responseFormat) {
|
|
62
|
+
requestParameters.response_format = this.promptParameters.responseFormat;
|
|
54
63
|
}
|
|
55
64
|
|
|
56
65
|
return requestParameters;
|