npm - @aj-archipelago/cortex - Versions diffs - 1.3.7 → 1.3.9 - Mend

@aj-archipelago/cortex 1.3.7 → 1.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/README.md +578 -80
package/helper-apps/cortex-file-handler/blobHandler.js +27 -8
package/helper-apps/cortex-file-handler/index.js +20 -2
package/helper-apps/cortex-realtime-voice-server/client/src/chat/Chat.tsx +51 -11
package/helper-apps/cortex-realtime-voice-server/src/SocketServer.ts +220 -183
package/helper-apps/cortex-realtime-voice-server/src/Tools.ts +18 -34
package/helper-apps/cortex-realtime-voice-server/src/cortex/utils.ts +29 -15
package/helper-apps/cortex-realtime-voice-server/src/realtime/client.ts +47 -1
package/helper-apps/cortex-realtime-voice-server/src/utils/prompt.ts +2 -11
package/package.json +1 -1
package/pathways/system/entity/memory/sys_search_memory.js +2 -1
package/pathways/system/entity/sys_entity_start.js +6 -7
package/pathways/system/entity/sys_generator_voice_sample.js +2 -2
package/pathways/translate_gpt4_omni.js +20 -0
package/pathways/translate_subtitle.js +326 -135
package/pathways/translate_subtitle_helper.js +4 -16
package/server/plugins/azureVideoTranslatePlugin.js +27 -15
package/server/plugins/claude3VertexPlugin.js +10 -17
package/server/plugins/gemini15VisionPlugin.js +16 -3
package/server/plugins/modelPlugin.js +27 -0
package/server/plugins/openAiVisionPlugin.js +26 -8
package/tests/multimodal_conversion.test.js +88 -12
package/tests/translate_srt.test.js +66 -14

package/server/plugins/gemini15VisionPlugin.js CHANGED Viewed

@@ -40,21 +40,34 @@ class Gemini15VisionPlugin extends Gemini15ChatPlugin {
                         } else if (type === 'text') {
                             return { text: text };
                         } else if (type === 'image_url') {
+                            if (!fileUrl) {
+                                return null;
+                            }
                             if (fileUrl.startsWith('gs://')) {
+                                // Validate GCS URL has at least a bucket name after gs://
+                                const gcsPath = fileUrl.slice(5); // Remove 'gs://'
+                                if (!gcsPath || gcsPath.length < 1) {
+                                    return null;
+                                }
                                 return {
                                     fileData: {
                                         mimeType: mime.lookup(fileUrl) || 'image/jpeg',
                                         fileUri: fileUrl
                                     }
                                 };
-                            } else {
+                            } else if (fileUrl.includes('base64,')) {
+                                const base64Data = fileUrl.split('base64,')[1];
+                                if (!base64Data) {
+                                    return null;
+                                }
                                 return {
                                     inlineData: {
-                                        mimeType: 'image/jpeg', // fixed for now as there's no MIME type in the request
-                                        data: fileUrl.split('base64,')[1]
+                                        mimeType: 'image/jpeg',
+                                        data: base64Data
                                     }
                                 };
                             }
+                            return null;
                         }
                     } catch (e) {
                         // this space intentionally left blank

package/server/plugins/modelPlugin.js CHANGED Viewed

@@ -5,11 +5,13 @@ import { encode } from '../../lib/encodeCache.js';
 import { getFirstNToken } from '../chunker.js';
 import logger, { obscureUrlParams } from '../../lib/logger.js';
 import { config } from '../../config.js';
+import axios from 'axios';
 const DEFAULT_MAX_TOKENS = 4096;
 const DEFAULT_MAX_RETURN_TOKENS = 256;
 const DEFAULT_PROMPT_TOKEN_RATIO = 0.5;
 const DEFAULT_MAX_IMAGE_SIZE = 20 * 1024 * 1024; // 20MB default
+const DEFAULT_ALLOWED_MIME_TYPES = ['image/jpeg', 'image/png', 'image/gif', 'image/webp'];
 class ModelPlugin {
     constructor(pathway, model) {
@@ -22,6 +24,7 @@ class ModelPlugin {
         this.pathwayName = pathway.name;
         this.promptParameters = {};
         this.isMultiModal = false;
+        this.allowedMIMETypes = model.allowedMIMETypes || DEFAULT_ALLOWED_MIME_TYPES;
         // Make all of the parameters defined on the pathway itself available to the prompt
         for (const [k, v] of Object.entries(pathway)) {
@@ -36,6 +39,30 @@ class ModelPlugin {
         this.requestCount = 0;
     }
+    async validateImageUrl(url) {
+        if (url.startsWith('data:')) {
+            const [, mimeType = ""] = url.match(/data:([a-zA-Z0-9]+\/[a-zA-Z0-9-.+]+).*,.*/) || [];
+            return this.allowedMIMETypes.includes(mimeType);
+        }
+        try {
+            const headResponse = await axios.head(url, {
+                timeout: 30000,
+                maxRedirects: 5
+            });
+            const contentType = headResponse.headers['content-type'];
+            if (!contentType || !this.allowedMIMETypes.includes(contentType)) {
+                logger.warn(`Unsupported image type: ${contentType} - skipping image content.`);
+                return false;
+            }
+            return true;
+        } catch (e) {
+            logger.error(`Failed to validate image URL: ${url}. ${e}`);
+            return false;
+        }
+    }
     safeGetEncodedLength(data) {
         if (data && data.length > 100000) {
             return data.length * 3 / 16;

package/server/plugins/openAiVisionPlugin.js CHANGED Viewed

@@ -17,14 +17,14 @@ class OpenAIVisionPlugin extends OpenAIChatPlugin {
         this.isMultiModal = true;
     }
-    tryParseMessages(messages) {
-        return messages.map(message => {
+    async tryParseMessages(messages) {
+        return await Promise.all(messages.map(async message => {
             try {
                 if (message.role === "tool") {
                     return message;
                 }
                 if (Array.isArray(message.content)) {
-                    message.content = message.content.map(item => {
+                    message.content = await Promise.all(message.content.map(async item => {
                         const parsedItem = safeJsonParse(item);
                         if (typeof parsedItem === 'string') {
@@ -32,17 +32,21 @@ class OpenAIVisionPlugin extends OpenAIChatPlugin {
                         }
                         if (typeof parsedItem === 'object' && parsedItem !== null && parsedItem.type === 'image_url') {
-                            return {type: parsedItem.type, image_url: {url: parsedItem.url || parsedItem.image_url.url}};
+                            const url = parsedItem.url || parsedItem.image_url?.url;
+                            if (url && await this.validateImageUrl(url)) {
+                                return {type: parsedItem.type, image_url: {url}};
+                            }
+                            return { type: 'text', text: 'Image skipped: unsupported format' };
                         }
                         return parsedItem;
-                    });
+                    }));
                 }
             } catch (e) {
                 return message;
             }
             return message;
-        });
+        }));
     }
     // Override the logging function to display the messages and responses
@@ -100,10 +104,10 @@ class OpenAIVisionPlugin extends OpenAIChatPlugin {
     }
-    getRequestParameters(text, parameters, prompt) {
+    async getRequestParameters(text, parameters, prompt) {
         const requestParameters = super.getRequestParameters(text, parameters, prompt);
-        this.tryParseMessages(requestParameters.messages);
+        requestParameters.messages = await this.tryParseMessages(requestParameters.messages);
         const modelMaxReturnTokens = this.getModelMaxReturnTokens();
         const maxTokensPrompt = this.promptParameters.max_tokens;
@@ -120,6 +124,20 @@ class OpenAIVisionPlugin extends OpenAIChatPlugin {
         return requestParameters;
     }
+    async execute(text, parameters, prompt, cortexRequest) {
+        const requestParameters = await this.getRequestParameters(text, parameters, prompt);
+        const { stream } = parameters;
+        cortexRequest.data = {
+            ...(cortexRequest.data || {}),
+            ...requestParameters,
+        };
+        cortexRequest.params = {}; // query params
+        cortexRequest.stream = stream;
+        return this.executeRequest(cortexRequest);
+    }
 }
 export default OpenAIVisionPlugin;

package/tests/multimodal_conversion.test.js CHANGED Viewed

@@ -31,7 +31,7 @@ test('OpenAI to Claude conversion data url', async (t) => {
         ]}
     ];
-    const parsedOpenAI = openai.tryParseMessages(openaiMessages);
+    const parsedOpenAI = await openai.tryParseMessages(openaiMessages);
     const { system, modifiedMessages } = await claude.convertMessagesToClaudeVertex(parsedOpenAI);
     t.is(modifiedMessages.length, 1);
@@ -55,7 +55,7 @@ test('OpenAI to Claude conversion image url', async (t) => {
         ]}
     ];
-    const parsedOpenAI = openai.tryParseMessages(openaiMessages);
+    const parsedOpenAI = await openai.tryParseMessages(openaiMessages);
     const { system, modifiedMessages } = await claude.convertMessagesToClaudeVertex(parsedOpenAI);
     t.is(modifiedMessages.length, 1);
@@ -68,8 +68,8 @@ test('OpenAI to Claude conversion image url', async (t) => {
 });
 // Test OpenAI to Gemini conversion
-test('OpenAI to Gemini conversion', t => {
-    const { openai, gemini, gemini15 } = createPlugins();
+test('OpenAI to Gemini conversion', async (t) => {
+    const { gemini, gemini15 } = createPlugins();
     const openaiMessages = [
         { role: 'system', content: 'You are a helpful assistant.' },
@@ -79,9 +79,8 @@ test('OpenAI to Gemini conversion', t => {
         ]}
     ];
-    const parsedOpenAI = openai.tryParseMessages(openaiMessages);
-    const { modifiedMessages, system } = gemini.convertMessagesToGemini(parsedOpenAI);
-    const { modifiedMessages: modifiedMessages15, system: system15 } = gemini15.convertMessagesToGemini(parsedOpenAI);
+    const { modifiedMessages, system } = gemini.convertMessagesToGemini(openaiMessages);
+    const { modifiedMessages: modifiedMessages15, system: system15 } = gemini15.convertMessagesToGemini(openaiMessages);
     // Gemini
     t.is(modifiedMessages.length, 1);
@@ -188,11 +187,12 @@ test('Unsupported mime type conversion', async (t) => {
         ]}
     ];
-    const parsedOpenAI = openai.tryParseMessages(pdfMessage);
+    const parsedOpenAI = await openai.tryParseMessages(pdfMessage);
     const { system, modifiedMessages } = await claude.convertMessagesToClaudeVertex(parsedOpenAI);
-    t.is(modifiedMessages[0].content.length, 1);
+    t.is(modifiedMessages[0].content.length, 2);
     t.is(modifiedMessages[0].content[0].text, 'Can you analyze this PDF?');
+    t.is(modifiedMessages[0].content[1].text, 'Image skipped: unsupported format');
 });
 // Test pathological cases
@@ -215,7 +215,7 @@ test('Pathological cases', async (t) => {
         { role: 'user', content: 'Another question' },
     ];
-    const parsedOpenAI = openai.tryParseMessages(pathologicalMessages);
+    const parsedOpenAI = await openai.tryParseMessages(pathologicalMessages);
     // Test Claude conversion
     const { system: claudeSystem, modifiedMessages: claudeMessages } = await claude.convertMessagesToClaudeVertex(parsedOpenAI);
@@ -273,7 +273,7 @@ test('Empty message array', async (t) => {
     const emptyMessages = [];
-    const parsedOpenAI = openai.tryParseMessages(emptyMessages);
+    const parsedOpenAI = await openai.tryParseMessages(emptyMessages);
     // Test Claude conversion
     const { system: claudeSystem, modifiedMessages: claudeMessages } = await claude.convertMessagesToClaudeVertex(parsedOpenAI);
@@ -302,7 +302,7 @@ test('Only system messages', async (t) => {
         { role: 'system', content: 'You are helpful and friendly.' },
     ];
-    const parsedOpenAI = openai.tryParseMessages(onlySystemMessages);
+    const parsedOpenAI = await openai.tryParseMessages(onlySystemMessages);
     // Test Claude conversion
     const { system: claudeSystem, modifiedMessages: claudeMessages } = await claude.convertMessagesToClaudeVertex(parsedOpenAI);
@@ -324,3 +324,79 @@ test('Only system messages', async (t) => {
     t.is(geminiSystem15.parts[1].text, 'You are helpful and friendly.');
     t.is(geminiMessages15.length, 0);
 });
+// Test different image URL types for Gemini 1.5
+test('Gemini 1.5 image URL type handling', t => {
+    const { gemini15 } = createPlugins();
+    const messages = [
+        { role: 'user', content: [
+            { type: 'text', text: 'Process these images:' },
+            // GCS URL - should be converted to fileData
+            { type: 'image_url', image_url: { url: 'gs://my-bucket/image1.jpg' } },
+            // Base64 URL - should be converted to inlineData
+            { type: 'image_url', image_url: { url: 'data:image/jpeg;base64,/9j/4AAQSkZJRg...' } },
+            // Regular HTTP URL - should be dropped (return null)
+            { type: 'image_url', image_url: { url: 'https://example.com/image.jpg' } },
+            // Azure blob URL - should be dropped (return null)
+            { type: 'image_url', image_url: { url: 'https://myaccount.blob.core.windows.net/container/image.jpg' } }
+        ]}
+    ];
+    const { modifiedMessages } = gemini15.convertMessagesToGemini(messages);
+    t.is(modifiedMessages.length, 1);
+    t.is(modifiedMessages[0].parts.length, 3); // text + gcs + base64 (2 urls dropped)
+    // Check text part
+    t.is(modifiedMessages[0].parts[0].text, 'Process these images:');
+    // Check GCS URL handling
+    t.true('fileData' in modifiedMessages[0].parts[1]);
+    t.is(modifiedMessages[0].parts[1].fileData.fileUri, 'gs://my-bucket/image1.jpg');
+    t.is(modifiedMessages[0].parts[1].fileData.mimeType, 'image/jpeg');
+    // Check base64 URL handling
+    t.true('inlineData' in modifiedMessages[0].parts[2]);
+    t.is(modifiedMessages[0].parts[2].inlineData.mimeType, 'image/jpeg');
+    t.is(modifiedMessages[0].parts[2].inlineData.data, '/9j/4AAQSkZJRg...');
+});
+// Test edge cases for image URLs in Gemini 1.5
+test('Gemini 1.5 image URL edge cases', t => {
+    const { gemini15 } = createPlugins();
+    const messages = [
+        { role: 'user', content: [
+            { type: 'text', text: 'Process these edge cases:' },
+            // Empty URL
+            { type: 'image_url', image_url: { url: '' } },
+            // Malformed base64
+            { type: 'image_url', image_url: { url: 'data:image/jpeg;base64,' } },
+            // Malformed GCS URL
+            { type: 'image_url', image_url: { url: 'gs://' } },
+            // Missing URL property
+            { type: 'image_url', image_url: {} },
+            // Null URL
+            { type: 'image_url', image_url: { url: null } }
+        ]}
+    ];
+    const { modifiedMessages } = gemini15.convertMessagesToGemini(messages);
+    // Verify basic message structure
+    t.is(modifiedMessages.length, 1);
+    t.true(Array.isArray(modifiedMessages[0].parts));
+    // Check each part to ensure no invalid images were converted
+    modifiedMessages[0].parts.forEach(part => {
+        if (part.text) {
+            t.is(part.text, 'Process these edge cases:', 'Only expected text content should be present');
+        } else {
+            t.fail('Found non-text part that should have been filtered out: ' + JSON.stringify(part));
+        }
+    });
+    // Verify we only have one part (the text)
+    t.is(modifiedMessages[0].parts.length, 1, 'Should only have the text part');
+});

package/tests/translate_srt.test.js CHANGED Viewed

@@ -22,33 +22,63 @@ test.after.always('cleanup', async () => {
     }
 });
-async function testTranslateSrt(t, text, language='English') {
+async function testSubtitleTranslation(t, text, language = 'English', format = 'srt') {
     const response = await testServer.executeOperation({
-        query: 'query translate_subtitle($text: String!, $to:String) { translate_subtitle(text: $text, to:$to) { result } }',
+        query: 'query translate_subtitle($text: String!, $to: String, $format: String) { translate_subtitle(text: $text, to: $to, format: $format) { result } }',
         variables: {
             to: language,
-            text
-         },
+            text,
+            format
+        },
     });
     t.falsy(response.body?.singleResult?.errors);
     const result = response.body?.singleResult?.data?.translate_subtitle?.result;
-    t.true(result?.length > text.length*0.5);
+    t.true(result?.length > text.length * 0.5);
-    //check all timestamps are still there and not translated
-    const originalTimestamps = text.match(/\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}/g);
-    const translatedTimestamps = result.match(/\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}/g);
+    // Check format-specific header
+    if (format === 'vtt') {
+        t.true(result.startsWith('WEBVTT\n\n'), 'VTT output should start with WEBVTT header');
+    }
+    // Check timestamps based on format
+    const timestampPattern = format === 'srt'
+        ? /\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}/g
+        : /\d{2}:\d{2}:\d{2}\.\d{3} --> \d{2}:\d{2}:\d{2}\.\d{3}/g;
+    const originalTimestamps = text.match(timestampPattern);
+    const translatedTimestamps = result.match(timestampPattern);
     t.deepEqual(originalTimestamps, translatedTimestamps, 'All timestamps should be present and unchanged');
+    // Check line count (accounting for WEBVTT header in VTT)
     const originalLineCount = text.split('\n').length;
     const translatedLineCount = result.split('\n').length;
     t.is(originalLineCount, translatedLineCount, 'Total number of lines should be the same');
+    // For VTT, verify any custom identifiers are preserved
+    if (format === 'vtt') {
+        const originalBlocks = text.split(/\n\s*\n/).filter(block => block.trim());
+        const translatedBlocks = result.split(/\n\s*\n/).filter(block => block.trim());
+        // Skip WEBVTT header block
+        const startIndex = originalBlocks[0].trim() === 'WEBVTT' ? 1 : 0;
+        for (let i = startIndex; i < originalBlocks.length; i++) {
+            const origLines = originalBlocks[i].split('\n');
+            const transLines = translatedBlocks[i].split('\n');
+            // If first line isn't a timestamp, it's an identifier and should be preserved
+            if (!/^\d{2}:\d{2}/.test(origLines[0])) {
+                t.is(transLines[0], origLines[0], 'VTT identifiers should be preserved');
+            }
+        }
+    }
 }
-test('test translate_srt endpoint with simple srt', async t => {
+test('test subtitle translation with SRT format', async t => {
     const text = `1
 00:00:03,069 --> 00:00:04,771
 Who's that?
@@ -66,17 +96,39 @@ Who is Aseel a mom to?
 Aseel is mommy
 `;
-    await testTranslateSrt(t, text, 'Spanish');
+    await testSubtitleTranslation(t, text, 'Spanish', 'srt');
+});
+test('test subtitle translation with VTT format', async t => {
+    const text = `WEBVTT
+1
+00:00:00.000 --> 00:00:07.000
+It's here to change the game.
+intro
+00:00:07.000 --> 00:00:11.360
+With the power of AI transforming the future.
+question
+00:00:11.360 --> 00:00:14.160
+The possibilities endless.
+00:00:14.160 --> 00:00:17.240
+It's not just about the generative AI itself.
+`;
+    await testSubtitleTranslation(t, text, 'Spanish', 'vtt');
 });
-test('test translate_srt endpoint with long srt file', async t => {
+test('test subtitle translation with long SRT file', async t => {
     t.timeout(400000);
     const text = fs.readFileSync(path.join(__dirname, 'sublong.srt'), 'utf8');
-    await testTranslateSrt(t, text, 'English');
+    await testSubtitleTranslation(t, text, 'English', 'srt');
 });
-test('test translate_srt endpoint with horizontal srt file', async t => {
+test('test subtitle translation with horizontal SRT file', async t => {
     t.timeout(400000);
     const text = fs.readFileSync(path.join(__dirname, 'subhorizontal.srt'), 'utf8');
-    await testTranslateSrt(t, text, 'Turkish');
+    await testSubtitleTranslation(t, text, 'Turkish', 'srt');
 });