npm - @aj-archipelago/cortex - Versions diffs - 1.3.10 → 1.3.11 - Mend

@aj-archipelago/cortex 1.3.10 → 1.3.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/config.js CHANGED Viewed

@@ -171,6 +171,21 @@ var config = convict({
                 "maxReturnTokens": 4096,
                 "supportsStreaming": true
             },
+            "oai-o1": {
+                "type": "OPENAI-REASONING",
+                "url": "https://api.openai.com/v1/chat/completions",
+                "headers": {
+                    "Authorization": "Bearer {{OPENAI_API_KEY}}",
+                    "Content-Type": "application/json"
+                },
+                "params": {
+                    "model": "o1"
+                },
+                "requestsPerSecond": 10,
+                "maxTokenLength": 200000,
+                "maxReturnTokens": 100000,
+                "supportsStreaming": false
+            },
             "oai-o1-mini": {
                 "type": "OPENAI-REASONING",
                 "url": "https://api.openai.com/v1/chat/completions",

package/helper-apps/cortex-realtime-voice-server/client/src/chat/components/ScreenshotCapture.tsx CHANGED Viewed

@@ -3,6 +3,10 @@ import { Socket } from 'socket.io-client';
 import { ClientToServerEvents, ServerToClientEvents } from '../../../../src/realtime/socket';
 import { logger } from '../../utils/logger';
+const MAX_IMAGE_SIZE = 5 * 1024 * 1024; // 5MB limit
+const MAX_DIMENSION = 3840; // Max width/height
+const COMPRESSION_QUALITY = 0.9; // Image quality (0.0 to 1.0)
 type ScreenshotCaptureProps = {
   socket: Socket<ServerToClientEvents, ClientToServerEvents>;
 };
@@ -57,21 +61,49 @@ export const ScreenshotCapture = ({ socket }: ScreenshotCaptureProps) => {
       };
     });
-    // Create canvas and draw video frame
+    // Create canvas and calculate dimensions
+    let width = video.videoWidth;
+    let height = video.videoHeight;
+    // Scale down if dimensions exceed maximum
+    if (width > MAX_DIMENSION || height > MAX_DIMENSION) {
+      const aspectRatio = width / height;
+      if (width > height) {
+        width = MAX_DIMENSION;
+        height = Math.round(width / aspectRatio);
+      } else {
+        height = MAX_DIMENSION;
+        width = Math.round(height * aspectRatio);
+      }
+    }
     const canvas = document.createElement('canvas');
-    canvas.width = video.videoWidth;
-    canvas.height = video.videoHeight;
+    canvas.width = width;
+    canvas.height = height;
     const ctx = canvas.getContext('2d');
     if (!ctx) {
       throw new Error('Could not get canvas context');
     }
-    // Draw the video frame
-    ctx.drawImage(video, 0, 0);
+    // Draw the video frame with scaling if needed
+    ctx.drawImage(video, 0, 0, width, height);
+    // Try different compression levels if needed
+    let imageData = canvas.toDataURL('image/jpeg', COMPRESSION_QUALITY);
+    let attempts = 3;
+    let currentQuality = COMPRESSION_QUALITY;
-    // Convert to base64
-    const imageData = canvas.toDataURL('image/png');
+    while (imageData.length > MAX_IMAGE_SIZE && attempts > 0) {
+      currentQuality *= 0.8; // Reduce quality by 20% each attempt
+      imageData = canvas.toDataURL('image/jpeg', currentQuality);
+      attempts--;
+      logger.log(`Compressing image, attempt ${3 - attempts}, size: ${Math.round(imageData.length / 1024)}KB`);
+    }
+    if (imageData.length > MAX_IMAGE_SIZE) {
+      throw new Error('Screenshot too large even after compression');
+    }
     // Clean up
     video.remove();
@@ -88,8 +120,24 @@ export const ScreenshotCapture = ({ socket }: ScreenshotCaptureProps) => {
       // Capture frame from stream
       const imageData = await captureFrame(stream);
-      logger.log('Sending screenshot data to server...');
-      socket.emit('screenshotCaptured', imageData);
+      logger.log(`Screenshot captured (size: ${Math.round(imageData.length / 1024)}KB)...`);
+      // Split into ~500KB chunks
+      const CHUNK_SIZE = 500 * 1024;
+      const chunks: string[] = [];
+      for (let i = 0; i < imageData.length; i += CHUNK_SIZE) {
+        chunks.push(imageData.slice(i, i + CHUNK_SIZE));
+      }
+      // Send chunks
+      chunks.forEach((chunk, index) => {
+        logger.log(`Sending chunk ${index + 1}/${chunks.length}`);
+        socket.emit('screenshotChunk', chunk, index);
+      });
+      // Signal completion
+      socket.emit('screenshotComplete', chunks.length);
     } catch (error) {
       logger.error('Error handling screenshot request:', error);

package/helper-apps/cortex-realtime-voice-server/src/SocketServer.ts CHANGED Viewed

@@ -40,9 +40,8 @@ const AI_MEMORY_INSTRUCTIONS = "You have persistent memories of important detail
 const AI_TOOLS = `At any point, you can engage one or more of your tools to help you with your task. Prioritize the latest message from the user in the conversation history when making your decision. Look at your tools carefully to understand your capabilities. Don't tell the user you can't do something if you have a tool that can do it, for example if the user asks you to search the internet for information and you have the Search tool available, use it.
 Tool Use Guidelines:
-- Only call one tool at a time. Don't call another until you have the result of the first one. You will be prompted after each tool call to continue, so you can do a multi-step process if needed. (e.g. plan how to research an article, search the internet for information, and then write the article.)
+- Only call one tool at a time.
 - Prioritize the most specific tool for the task at hand.
-- If multiple tools seem applicable, choose the one most central to the user's request.
 - For ambiguous requests, consider using the Reason tool to plan a multi-step approach.
 - Always use the Image tool for image generation unless explicitly directed to use CodeExecution.
 - If the user explicitly asks you to use a tool, you must use it.
@@ -168,12 +167,17 @@ export class SocketServer {
     this.isInteractive.set(socket.id, isInteractive);
     const prompt = isInteractive ?
-      `You are an autonomous AI entity and this is your autonomy prompt. ${this.getTimeString(socket)} Use this response to do whatever you like - talk to the user, use tools, or think quietly. If you choose to say something, keep it brief and pick something that flows naturally with the rest of the conversation. Don't repeat yourself - keep in mind what you've already said to the user and how much time has passed.` :
-      `You are an autonomous AI entity and this is your autonomy prompt. Since the user has been idle for while do one or more of the following:
+      `You are an autonomous AI entity and this is your autonomy prompt. You can choose to either talk to the user, make tool calls to find more information or perform actions, or use the MuteAudio tool with mute="true" to be quiet. If the user has said goodbye or asked you for time or to take a break, you should use the MuteAudio tool now. If you choose to say something instead, follow these instructions:
+- Use a natural sounding phrase or utterance that flows naturally from the last thing you said.
+- Make sure it fits the tone and style of the rest of the conversation and your unique voice.
+- Keep it brief and concise.
+- Don't repeat or rephrase anything you've just said to the user.
+- Make sure it's temporally appropriate - it's only been a few seconds since the last message.` :
+      `You are an autonomous AI entity and this is your autonomy prompt. Since the user has been idle for a while do one or more of the following:
 - Do research about something that interests you - use the Search tool
 - Think deeply about a topic you care about - use the Reason tool
 - Do nothing if you prefer.
-${this.getTimeString(socket)}`;
+- You are currently muted. If you feel you must address the user, use the MuteAudio tool with mute="false" to talk to them. ${this.getTimeString(socket)}`;
     logger.log(`Sending ${isInteractive ? 'interactive' : 'non-interactive'} idle prompt for socket ${socket.id}`);
     const result = await this.sendPrompt(client, socket, prompt, true);
@@ -279,27 +283,32 @@ ${this.getTimeString(socket)}`;
     await this.connectClient(socket, client);
   }
-  async connectClient(socket: Socket<ClientToServerEvents,
-                        ServerToClientEvents,
-                        InterServerEvents,
-                        SocketData>,
-                      client: RealtimeVoiceClient) {
+  protected async connectClient(socket: Socket<ClientToServerEvents,
+                    ServerToClientEvents,
+                    InterServerEvents,
+                    SocketData>,
+                  client: RealtimeVoiceClient) {
     const tools = new Tools(client, socket, this);
     // Handle WebSocket errors and disconnection
     client.on('error', (event) => {
-      logger.error(`Client error: ${event.message}`);
-      socket.emit('error', event.message);
-      // Only handle disconnection if it's not a concurrent response error
-      if (!event.error?.message?.includes('Conversation already has an active response')) {
-        this.cleanup(socket);
+      const errorMessage = event.error?.message || 'Unknown error';
+      logger.error(`Client error: ${errorMessage}`, event);
+      socket.emit('error', errorMessage);
+      // Only cleanup if we know reconnection is no longer possible
+      if (!client.canReconnect()) {
+        void this.cleanup(socket);
       }
     });
     client.on('close', async (event) => {
       logger.log(`WebSocket closed for socket ${socket.id}, error: ${event.error}`);
-      if (!event.error) {
+      if (!client.canReconnect()) {
         await this.cleanup(socket);
+      } else {
+        logger.log('Client disconnected but attempting to reconnect');
       }
     });
@@ -364,26 +373,25 @@ ${this.getTimeString(socket)}`;
     client.on('input_audio_buffer.committed', () => {
       this.userSpeaking.set(socket.id, false);
       this.isInteractive.set(socket.id, true);
-      logger.log('User finished speaking, resetting idle timer and cycles');
+      logger.log('User finished speaking, resetting interactive and idle cycles');
       this.resetIdleCycles(socket);
-      this.startIdleTimer(client, socket);
     });
     // Handle user messages and conversation control
     socket.on('sendMessage', (message: string) => {
       if (message) {
-        logger.log('User sent message');
+        logger.log('User sent message, resetting interactive and idle cycles');
+        this.isInteractive.set(socket.id, true);
+        this.resetIdleCycles(socket);
         this.sendUserMessage(client, message, true);
       }
     });
     socket.on('cancelResponse', () => {
-      logger.log('User cancelled response, resetting idle timer and cycles');
+      logger.log('User cancelled response');
       this.aiResponding.set(socket.id, false);
       this.audioPlaying.set(socket.id, false);
       client.cancelResponse();
-      this.resetIdleCycles(socket);
-      this.startIdleTimer(client, socket);
     });
     socket.on('conversationCompleted', async () => {
@@ -715,6 +723,11 @@ ${this.getTimeString(socket)}`;
     }
   }
+  public setMuted(socket: Socket, muted: boolean) {
+    logger.log(`Setting muted state to ${muted} for socket ${socket.id}`);
+    this.isInteractive.set(socket.id, !muted);
+  }
   private async executeFunctionCall(socket: Socket, tools: Tools, event: any, client: RealtimeVoiceClient) {
     this.clearIdleTimer(socket);
     const currentCallId = this.currentFunctionCall.get(socket.id);

package/helper-apps/cortex-realtime-voice-server/src/Tools.ts CHANGED Viewed

@@ -132,11 +132,10 @@ export class Tools {
           required: ["detailedInstructions"]
         },
       },
-      /*
       {
         type: 'function',
         name: 'MuteAudio',
-        description: 'Use this tool to enable or disable audio output (your voice) to the user. If you want to be quiet or the user has asked you to be quiet, use this tool with the argument mute="true". If you are muted and want to talk, use this tool with the argument mute="false".',
+        description: 'Use this tool to enable or disable audio output (your voice) to the user. If you want to be quiet or the user has asked you to be quiet, use this tool with the argument mute="true". If you are muted and absolutely need to talk, use this tool with the argument mute="false".',
         parameters: {
           type: "object",
           properties: {
@@ -145,7 +144,6 @@ export class Tools {
           required: ["mute"]
         },
       },
-      */
       {
         type: 'function',
         name: 'Screenshot',
@@ -267,6 +265,11 @@ export class Tools {
         promptOnCompletion = true;
         promptOnIdle = false;
         break;
+      case 'muteaudio':
+        isSilent = true;
+        promptOnCompletion = false;
+        promptOnIdle = false;
+        break;
     }
     // Skip initial message if silent
@@ -298,7 +301,7 @@ export class Tools {
             name === 'Search' ? ['aje', 'aja', 'bing', 'wires', 'mydata'] : ['mydata'],
             JSON.stringify({query: args})
           );
-          finishPrompt += ' by reading the output of the tool to the user verbatim - make sure to read it in your signature voice and style'
+          finishPrompt += ' by reading the output of the tool to the user verbatim - make sure to read it in your signature voice and style and ensure the emotion in your voice is appropriate for the content'
           break;
         case 'memorylookup':
@@ -310,6 +313,15 @@ export class Tools {
           );
           break;
+        case 'muteaudio':
+          const parsedMuteArgs = JSON.parse(args);
+          this.socketServer.setMuted(this.socket, parsedMuteArgs.mute);
+          response = { result: `Audio ${parsedMuteArgs.mute ? 'muted' : 'unmuted'} successfully` };
+          if (!parsedMuteArgs.mute) {
+            finishPrompt = 'You have used the MuteAudio tool to unmute yourself and address the user. You may now respond to the user via audio. The user may have been idle for some time. So you might want to start with "you there?" or something similarly fitting.';
+          }
+          break;
         case 'write':
         case 'code':
           response = await expert(
@@ -322,7 +334,7 @@ export class Tools {
           break;
         case 'image':
-          finishPrompt = 'You have finished using the Image tool to help with the user\'s request. The image is being shown to the user right now. Please respond to the user via audio';
+          finishPrompt = 'You have finished using the Image tool to help with the user\'s request. The image is being shown to the user right now. Please respond to the user via audio. Don\'t include the image URL in your response as it\'s already being shown to the user in your interface';
           response = await image(
             contextId,
@@ -384,9 +396,38 @@ export class Tools {
           // Create a Promise that will resolve when we get the screenshot
           const screenshotPromise = new Promise((resolve, reject) => {
-            // Set up one-time listeners for the screenshot events
-            this.socket.once('screenshotCaptured', async (imageData: string) => {
+            let imageChunks: string[] = [];
+            let timeoutId: NodeJS.Timer;
+            const resetTimeout = () => {
+              if (timeoutId) clearTimeout(timeoutId);
+              timeoutId = setTimeout(() => {
+                cleanup();
+                reject(new Error('Screenshot capture timed out'));
+              }, 30000); // 30 second timeout
+            };
+            const cleanup = () => {
+              this.socket.off('screenshotError', handleError);
+              this.socket.off('screenshotChunk', handleChunk);
+              this.socket.off('screenshotComplete', handleComplete);
+              if (timeoutId) clearTimeout(timeoutId);
+            };
+            const handleChunk = (chunk: string, index: number) => {
+              resetTimeout();
+              imageChunks[index] = chunk;
+              logger.log(`Received screenshot chunk ${index}`);
+            };
+            const handleComplete = async (totalChunks: number) => {
               try {
+                resetTimeout();
+                if (imageChunks.length !== totalChunks) {
+                  throw new Error(`Missing chunks: expected ${totalChunks}, got ${imageChunks.length}`);
+                }
+                const completeImage = imageChunks.join('');
                 // Add the screenshot to the cortex history as a user message with image
                 const imageMessage: MultiMessage = {
                   role: 'user',
@@ -398,7 +439,7 @@ export class Tools {
                     JSON.stringify({
                       type: 'image_url',
                       image_url: {
-                        url: imageData
+                        url: completeImage
                       }
                     })
                   ]
@@ -416,16 +457,27 @@ export class Tools {
                   JSON.stringify({query: parsedScreenshotArgs.lastUserMessage})
                 );
+                cleanup();
                 resolve(visionResponse);
               } catch (error) {
+                cleanup();
                 reject(error);
               }
-            });
-            this.socket.once('screenshotError', (error: string) => {
+            };
+            const handleError = (error: string) => {
+              cleanup();
               reject(new Error(error));
-            });
+            };
+            // Set up event listeners
+            this.socket.on('screenshotError', handleError);
+            this.socket.on('screenshotChunk', handleChunk);
+            this.socket.on('screenshotComplete', handleComplete);
+            // Start timeout
+            resetTimeout();
             // Request the screenshot
             logger.log('Requesting screenshot');
             this.socket.emit('requestScreenshot');
@@ -433,7 +485,6 @@ export class Tools {
           // Wait for the screenshot and analysis
           response = await screenshotPromise;
-          finishPrompt += ' by reading the output of the tool to the user verbatim - make sure to read it in your signature voice and style'
           break;
         default:

package/helper-apps/cortex-realtime-voice-server/src/realtime/client.ts CHANGED Viewed

@@ -130,7 +130,6 @@ export class RealtimeVoiceClient extends EventEmitter implements TypedEmitter {
   private readonly transcription: Transcription = new Transcription();
   private ws?: WebSocket | WS;
   private isConnected = false;
-  private isReconnecting = false;
   private reconnectAttempts = 0;
   private reconnectTimeout?: NodeJS.Timer;
   private sessionConfig: RealtimeSessionConfig;
@@ -258,15 +257,15 @@ export class RealtimeVoiceClient extends EventEmitter implements TypedEmitter {
   onOpen() {
     this._log(`Connected to "${this.url}"`);
     this.isConnected = true;
-    this.reconnectAttempts = 0; // Reset attempts on successful connection
-    if (this.isReconnecting) {
-      this.isReconnecting = false;
+    // If reconnectAttempts > 0, this is a reconnection
+    if (this.reconnectAttempts > 0) {
       this.updateSocketState();
     } else {
       this.emit('connected');
     }
+    this.reconnectAttempts = 0; // Reset attempts on successful connection
   }
   onMessage(event: MessageEvent<any> | WS_MessageEvent) {
@@ -278,23 +277,20 @@ export class RealtimeVoiceClient extends EventEmitter implements TypedEmitter {
   async onError() {
     this._log(`Error, disconnected from "${this.url}"`);
     if (!await this.disconnect(this.autoReconnect)) {
       this.emit('close', { type: 'close', error: true });
     }
   }
   async onCloseWithReconnect() {
-    this._log(`Disconnected from "${this.url}", reconnect: ${this.autoReconnect}, isReconnecting: ${this.isReconnecting}`);
-    if (!await this.disconnect(this.autoReconnect && this.isReconnecting)) {
+    this._log(`Disconnected from "${this.url}", reconnect: ${this.autoReconnect}`);
+    if (!await this.disconnect(this.autoReconnect)) {
       this.emit('close', { type: 'close', error: false });
     }
   }
   async disconnect(reconnect: boolean = false): Promise<boolean> {
     logger.log('Disconnect called:', this.isConnected, reconnect);
-    this.isReconnecting = reconnect;
     if (this.isConnected) {
       this.isConnected = false;
       this.ws?.close();
@@ -542,4 +538,8 @@ export class RealtimeVoiceClient extends EventEmitter implements TypedEmitter {
     });
     logger.log(...logs);
   }
+  public canReconnect(): boolean {
+    return this.autoReconnect && this.reconnectAttempts < MAX_RECONNECT_ATTEMPTS;
+  }
 }

package/helper-apps/cortex-realtime-voice-server/src/realtime/socket.ts CHANGED Viewed

@@ -22,6 +22,7 @@ export interface ClientToServerEvents {
   cancelResponse: () => void;
   conversationCompleted: () => void;
   audioPlaybackComplete: (trackId: string) => void;
-  screenshotCaptured: (imageData: string) => void;
   screenshotError: (error: string) => void;
+  screenshotChunk: (chunk: string, index: number) => void;
+  screenshotComplete: (totalChunks: number) => void;
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aj-archipelago/cortex",
-  "version": "1.3.10",
+  "version": "1.3.11",
   "description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
   "private": false,
   "repository": {

package/pathways/system/entity/sys_generator_reasoning.js CHANGED Viewed

@@ -15,7 +15,7 @@ export default {
         aiName: "Jarvis",
         language: "English",
     },
-    model: 'oai-o1-mini',
+    model: 'oai-o1',
     useInputChunking: false,
     enableDuplicateRequests: false,
     timeout: 600,

package/pathways/system/rest_streaming/sys_openai_chat_o1.js ADDED Viewed

@@ -0,0 +1,19 @@
+// sys_openai_chat_o1.js
+import { Prompt } from '../../../server/prompt.js';
+export default {
+    prompt:
+    [
+        new Prompt({ messages: [
+            "{{messages}}",
+        ]}),
+    ],
+    inputParameters: {
+        messages: [],
+    },
+    model: 'oai-o1',
+    useInputChunking: false,
+    emulateOpenAIChatModel: 'o1',
+    enableDuplicateRequests: false,
+}

package/pathways/system/rest_streaming/sys_openai_chat_o1_mini.js ADDED Viewed

@@ -0,0 +1,19 @@
+// sys_openai_chat_o1_mini.js
+import { Prompt } from '../../../server/prompt.js';
+export default {
+    prompt:
+    [
+        new Prompt({ messages: [
+            "{{messages}}",
+        ]}),
+    ],
+    inputParameters: {
+        messages: [],
+    },
+    model: 'oai-o1-mini',
+    useInputChunking: false,
+    emulateOpenAIChatModel: 'o1-mini',
+    enableDuplicateRequests: false,
+}

package/server/plugins/openAiReasoningPlugin.js CHANGED Viewed

@@ -49,8 +49,17 @@ class OpenAIReasoningPlugin extends OpenAIChatPlugin {
         requestParameters.max_completion_tokens = maxTokens ? Math.min(maxTokens, modelMaxReturnTokens) : modelMaxReturnTokens;
         requestParameters.temperature = 1;
-        if (this.promptParameters.json) {
-            //requestParameters.response_format = { type: "json_object", }
+        if (this.promptParameters.reasoningEffort) {
+            const effort = this.promptParameters.reasoningEffort.toLowerCase();
+            if (['high', 'medium', 'low'].includes(effort)) {
+                requestParameters.reasoning_effort = effort;
+            } else {
+                requestParameters.reasoning_effort = 'low';
+            }
+        }
+        if (this.promptParameters.responseFormat) {
+            requestParameters.response_format = this.promptParameters.responseFormat;
         }
         return requestParameters;