@aj-archipelago/cortex 1.3.7 → 1.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -40,21 +40,34 @@ class Gemini15VisionPlugin extends Gemini15ChatPlugin {
40
40
  } else if (type === 'text') {
41
41
  return { text: text };
42
42
  } else if (type === 'image_url') {
43
+ if (!fileUrl) {
44
+ return null;
45
+ }
43
46
  if (fileUrl.startsWith('gs://')) {
47
+ // Validate GCS URL has at least a bucket name after gs://
48
+ const gcsPath = fileUrl.slice(5); // Remove 'gs://'
49
+ if (!gcsPath || gcsPath.length < 1) {
50
+ return null;
51
+ }
44
52
  return {
45
53
  fileData: {
46
54
  mimeType: mime.lookup(fileUrl) || 'image/jpeg',
47
55
  fileUri: fileUrl
48
56
  }
49
57
  };
50
- } else {
58
+ } else if (fileUrl.includes('base64,')) {
59
+ const base64Data = fileUrl.split('base64,')[1];
60
+ if (!base64Data) {
61
+ return null;
62
+ }
51
63
  return {
52
64
  inlineData: {
53
- mimeType: 'image/jpeg', // fixed for now as there's no MIME type in the request
54
- data: fileUrl.split('base64,')[1]
65
+ mimeType: 'image/jpeg',
66
+ data: base64Data
55
67
  }
56
68
  };
57
69
  }
70
+ return null;
58
71
  }
59
72
  } catch (e) {
60
73
  // this space intentionally left blank
@@ -5,11 +5,13 @@ import { encode } from '../../lib/encodeCache.js';
5
5
  import { getFirstNToken } from '../chunker.js';
6
6
  import logger, { obscureUrlParams } from '../../lib/logger.js';
7
7
  import { config } from '../../config.js';
8
+ import axios from 'axios';
8
9
 
9
10
  const DEFAULT_MAX_TOKENS = 4096;
10
11
  const DEFAULT_MAX_RETURN_TOKENS = 256;
11
12
  const DEFAULT_PROMPT_TOKEN_RATIO = 0.5;
12
13
  const DEFAULT_MAX_IMAGE_SIZE = 20 * 1024 * 1024; // 20MB default
14
+ const DEFAULT_ALLOWED_MIME_TYPES = ['image/jpeg', 'image/png', 'image/gif', 'image/webp'];
13
15
 
14
16
  class ModelPlugin {
15
17
  constructor(pathway, model) {
@@ -22,6 +24,7 @@ class ModelPlugin {
22
24
  this.pathwayName = pathway.name;
23
25
  this.promptParameters = {};
24
26
  this.isMultiModal = false;
27
+ this.allowedMIMETypes = model.allowedMIMETypes || DEFAULT_ALLOWED_MIME_TYPES;
25
28
 
26
29
  // Make all of the parameters defined on the pathway itself available to the prompt
27
30
  for (const [k, v] of Object.entries(pathway)) {
@@ -36,6 +39,30 @@ class ModelPlugin {
36
39
  this.requestCount = 0;
37
40
  }
38
41
 
42
+ async validateImageUrl(url) {
43
+ if (url.startsWith('data:')) {
44
+ const [, mimeType = ""] = url.match(/data:([a-zA-Z0-9]+\/[a-zA-Z0-9-.+]+).*,.*/) || [];
45
+ return this.allowedMIMETypes.includes(mimeType);
46
+ }
47
+
48
+ try {
49
+ const headResponse = await axios.head(url, {
50
+ timeout: 30000,
51
+ maxRedirects: 5
52
+ });
53
+
54
+ const contentType = headResponse.headers['content-type'];
55
+ if (!contentType || !this.allowedMIMETypes.includes(contentType)) {
56
+ logger.warn(`Unsupported image type: ${contentType} - skipping image content.`);
57
+ return false;
58
+ }
59
+ return true;
60
+ } catch (e) {
61
+ logger.error(`Failed to validate image URL: ${url}. ${e}`);
62
+ return false;
63
+ }
64
+ }
65
+
39
66
  safeGetEncodedLength(data) {
40
67
  if (data && data.length > 100000) {
41
68
  return data.length * 3 / 16;
@@ -17,14 +17,14 @@ class OpenAIVisionPlugin extends OpenAIChatPlugin {
17
17
  this.isMultiModal = true;
18
18
  }
19
19
 
20
- tryParseMessages(messages) {
21
- return messages.map(message => {
20
+ async tryParseMessages(messages) {
21
+ return await Promise.all(messages.map(async message => {
22
22
  try {
23
23
  if (message.role === "tool") {
24
24
  return message;
25
25
  }
26
26
  if (Array.isArray(message.content)) {
27
- message.content = message.content.map(item => {
27
+ message.content = await Promise.all(message.content.map(async item => {
28
28
  const parsedItem = safeJsonParse(item);
29
29
 
30
30
  if (typeof parsedItem === 'string') {
@@ -32,17 +32,21 @@ class OpenAIVisionPlugin extends OpenAIChatPlugin {
32
32
  }
33
33
 
34
34
  if (typeof parsedItem === 'object' && parsedItem !== null && parsedItem.type === 'image_url') {
35
- return {type: parsedItem.type, image_url: {url: parsedItem.url || parsedItem.image_url.url}};
35
+ const url = parsedItem.url || parsedItem.image_url?.url;
36
+ if (url && await this.validateImageUrl(url)) {
37
+ return {type: parsedItem.type, image_url: {url}};
38
+ }
39
+ return { type: 'text', text: 'Image skipped: unsupported format' };
36
40
  }
37
41
 
38
42
  return parsedItem;
39
- });
43
+ }));
40
44
  }
41
45
  } catch (e) {
42
46
  return message;
43
47
  }
44
48
  return message;
45
- });
49
+ }));
46
50
  }
47
51
 
48
52
  // Override the logging function to display the messages and responses
@@ -100,10 +104,10 @@ class OpenAIVisionPlugin extends OpenAIChatPlugin {
100
104
  }
101
105
 
102
106
 
103
- getRequestParameters(text, parameters, prompt) {
107
+ async getRequestParameters(text, parameters, prompt) {
104
108
  const requestParameters = super.getRequestParameters(text, parameters, prompt);
105
109
 
106
- this.tryParseMessages(requestParameters.messages);
110
+ requestParameters.messages = await this.tryParseMessages(requestParameters.messages);
107
111
 
108
112
  const modelMaxReturnTokens = this.getModelMaxReturnTokens();
109
113
  const maxTokensPrompt = this.promptParameters.max_tokens;
@@ -120,6 +124,20 @@ class OpenAIVisionPlugin extends OpenAIChatPlugin {
120
124
  return requestParameters;
121
125
  }
122
126
 
127
+ async execute(text, parameters, prompt, cortexRequest) {
128
+ const requestParameters = await this.getRequestParameters(text, parameters, prompt);
129
+ const { stream } = parameters;
130
+
131
+ cortexRequest.data = {
132
+ ...(cortexRequest.data || {}),
133
+ ...requestParameters,
134
+ };
135
+ cortexRequest.params = {}; // query params
136
+ cortexRequest.stream = stream;
137
+
138
+ return this.executeRequest(cortexRequest);
139
+ }
140
+
123
141
  }
124
142
 
125
143
  export default OpenAIVisionPlugin;
@@ -31,7 +31,7 @@ test('OpenAI to Claude conversion data url', async (t) => {
31
31
  ]}
32
32
  ];
33
33
 
34
- const parsedOpenAI = openai.tryParseMessages(openaiMessages);
34
+ const parsedOpenAI = await openai.tryParseMessages(openaiMessages);
35
35
  const { system, modifiedMessages } = await claude.convertMessagesToClaudeVertex(parsedOpenAI);
36
36
 
37
37
  t.is(modifiedMessages.length, 1);
@@ -55,7 +55,7 @@ test('OpenAI to Claude conversion image url', async (t) => {
55
55
  ]}
56
56
  ];
57
57
 
58
- const parsedOpenAI = openai.tryParseMessages(openaiMessages);
58
+ const parsedOpenAI = await openai.tryParseMessages(openaiMessages);
59
59
  const { system, modifiedMessages } = await claude.convertMessagesToClaudeVertex(parsedOpenAI);
60
60
 
61
61
  t.is(modifiedMessages.length, 1);
@@ -68,8 +68,8 @@ test('OpenAI to Claude conversion image url', async (t) => {
68
68
  });
69
69
 
70
70
  // Test OpenAI to Gemini conversion
71
- test('OpenAI to Gemini conversion', t => {
72
- const { openai, gemini, gemini15 } = createPlugins();
71
+ test('OpenAI to Gemini conversion', async (t) => {
72
+ const { gemini, gemini15 } = createPlugins();
73
73
 
74
74
  const openaiMessages = [
75
75
  { role: 'system', content: 'You are a helpful assistant.' },
@@ -79,9 +79,8 @@ test('OpenAI to Gemini conversion', t => {
79
79
  ]}
80
80
  ];
81
81
 
82
- const parsedOpenAI = openai.tryParseMessages(openaiMessages);
83
- const { modifiedMessages, system } = gemini.convertMessagesToGemini(parsedOpenAI);
84
- const { modifiedMessages: modifiedMessages15, system: system15 } = gemini15.convertMessagesToGemini(parsedOpenAI);
82
+ const { modifiedMessages, system } = gemini.convertMessagesToGemini(openaiMessages);
83
+ const { modifiedMessages: modifiedMessages15, system: system15 } = gemini15.convertMessagesToGemini(openaiMessages);
85
84
 
86
85
  // Gemini
87
86
  t.is(modifiedMessages.length, 1);
@@ -188,11 +187,12 @@ test('Unsupported mime type conversion', async (t) => {
188
187
  ]}
189
188
  ];
190
189
 
191
- const parsedOpenAI = openai.tryParseMessages(pdfMessage);
190
+ const parsedOpenAI = await openai.tryParseMessages(pdfMessage);
192
191
  const { system, modifiedMessages } = await claude.convertMessagesToClaudeVertex(parsedOpenAI);
193
192
 
194
- t.is(modifiedMessages[0].content.length, 1);
193
+ t.is(modifiedMessages[0].content.length, 2);
195
194
  t.is(modifiedMessages[0].content[0].text, 'Can you analyze this PDF?');
195
+ t.is(modifiedMessages[0].content[1].text, 'Image skipped: unsupported format');
196
196
  });
197
197
 
198
198
  // Test pathological cases
@@ -215,7 +215,7 @@ test('Pathological cases', async (t) => {
215
215
  { role: 'user', content: 'Another question' },
216
216
  ];
217
217
 
218
- const parsedOpenAI = openai.tryParseMessages(pathologicalMessages);
218
+ const parsedOpenAI = await openai.tryParseMessages(pathologicalMessages);
219
219
 
220
220
  // Test Claude conversion
221
221
  const { system: claudeSystem, modifiedMessages: claudeMessages } = await claude.convertMessagesToClaudeVertex(parsedOpenAI);
@@ -273,7 +273,7 @@ test('Empty message array', async (t) => {
273
273
 
274
274
  const emptyMessages = [];
275
275
 
276
- const parsedOpenAI = openai.tryParseMessages(emptyMessages);
276
+ const parsedOpenAI = await openai.tryParseMessages(emptyMessages);
277
277
 
278
278
  // Test Claude conversion
279
279
  const { system: claudeSystem, modifiedMessages: claudeMessages } = await claude.convertMessagesToClaudeVertex(parsedOpenAI);
@@ -302,7 +302,7 @@ test('Only system messages', async (t) => {
302
302
  { role: 'system', content: 'You are helpful and friendly.' },
303
303
  ];
304
304
 
305
- const parsedOpenAI = openai.tryParseMessages(onlySystemMessages);
305
+ const parsedOpenAI = await openai.tryParseMessages(onlySystemMessages);
306
306
 
307
307
  // Test Claude conversion
308
308
  const { system: claudeSystem, modifiedMessages: claudeMessages } = await claude.convertMessagesToClaudeVertex(parsedOpenAI);
@@ -324,3 +324,79 @@ test('Only system messages', async (t) => {
324
324
  t.is(geminiSystem15.parts[1].text, 'You are helpful and friendly.');
325
325
  t.is(geminiMessages15.length, 0);
326
326
  });
327
+
328
+ // Test different image URL types for Gemini 1.5
329
+ test('Gemini 1.5 image URL type handling', t => {
330
+ const { gemini15 } = createPlugins();
331
+
332
+ const messages = [
333
+ { role: 'user', content: [
334
+ { type: 'text', text: 'Process these images:' },
335
+ // GCS URL - should be converted to fileData
336
+ { type: 'image_url', image_url: { url: 'gs://my-bucket/image1.jpg' } },
337
+ // Base64 URL - should be converted to inlineData
338
+ { type: 'image_url', image_url: { url: 'data:image/jpeg;base64,/9j/4AAQSkZJRg...' } },
339
+ // Regular HTTP URL - should be dropped (return null)
340
+ { type: 'image_url', image_url: { url: 'https://example.com/image.jpg' } },
341
+ // Azure blob URL - should be dropped (return null)
342
+ { type: 'image_url', image_url: { url: 'https://myaccount.blob.core.windows.net/container/image.jpg' } }
343
+ ]}
344
+ ];
345
+
346
+ const { modifiedMessages } = gemini15.convertMessagesToGemini(messages);
347
+
348
+ t.is(modifiedMessages.length, 1);
349
+ t.is(modifiedMessages[0].parts.length, 3); // text + gcs + base64 (2 urls dropped)
350
+
351
+ // Check text part
352
+ t.is(modifiedMessages[0].parts[0].text, 'Process these images:');
353
+
354
+ // Check GCS URL handling
355
+ t.true('fileData' in modifiedMessages[0].parts[1]);
356
+ t.is(modifiedMessages[0].parts[1].fileData.fileUri, 'gs://my-bucket/image1.jpg');
357
+ t.is(modifiedMessages[0].parts[1].fileData.mimeType, 'image/jpeg');
358
+
359
+ // Check base64 URL handling
360
+ t.true('inlineData' in modifiedMessages[0].parts[2]);
361
+ t.is(modifiedMessages[0].parts[2].inlineData.mimeType, 'image/jpeg');
362
+ t.is(modifiedMessages[0].parts[2].inlineData.data, '/9j/4AAQSkZJRg...');
363
+ });
364
+
365
+ // Test edge cases for image URLs in Gemini 1.5
366
+ test('Gemini 1.5 image URL edge cases', t => {
367
+ const { gemini15 } = createPlugins();
368
+
369
+ const messages = [
370
+ { role: 'user', content: [
371
+ { type: 'text', text: 'Process these edge cases:' },
372
+ // Empty URL
373
+ { type: 'image_url', image_url: { url: '' } },
374
+ // Malformed base64
375
+ { type: 'image_url', image_url: { url: 'data:image/jpeg;base64,' } },
376
+ // Malformed GCS URL
377
+ { type: 'image_url', image_url: { url: 'gs://' } },
378
+ // Missing URL property
379
+ { type: 'image_url', image_url: {} },
380
+ // Null URL
381
+ { type: 'image_url', image_url: { url: null } }
382
+ ]}
383
+ ];
384
+
385
+ const { modifiedMessages } = gemini15.convertMessagesToGemini(messages);
386
+
387
+ // Verify basic message structure
388
+ t.is(modifiedMessages.length, 1);
389
+ t.true(Array.isArray(modifiedMessages[0].parts));
390
+
391
+ // Check each part to ensure no invalid images were converted
392
+ modifiedMessages[0].parts.forEach(part => {
393
+ if (part.text) {
394
+ t.is(part.text, 'Process these edge cases:', 'Only expected text content should be present');
395
+ } else {
396
+ t.fail('Found non-text part that should have been filtered out: ' + JSON.stringify(part));
397
+ }
398
+ });
399
+
400
+ // Verify we only have one part (the text)
401
+ t.is(modifiedMessages[0].parts.length, 1, 'Should only have the text part');
402
+ });
@@ -22,33 +22,63 @@ test.after.always('cleanup', async () => {
22
22
  }
23
23
  });
24
24
 
25
- async function testTranslateSrt(t, text, language='English') {
25
+ async function testSubtitleTranslation(t, text, language = 'English', format = 'srt') {
26
26
  const response = await testServer.executeOperation({
27
- query: 'query translate_subtitle($text: String!, $to:String) { translate_subtitle(text: $text, to:$to) { result } }',
27
+ query: 'query translate_subtitle($text: String!, $to: String, $format: String) { translate_subtitle(text: $text, to: $to, format: $format) { result } }',
28
28
  variables: {
29
29
  to: language,
30
- text
31
- },
30
+ text,
31
+ format
32
+ },
32
33
  });
33
34
 
34
35
  t.falsy(response.body?.singleResult?.errors);
35
36
 
36
37
  const result = response.body?.singleResult?.data?.translate_subtitle?.result;
37
- t.true(result?.length > text.length*0.5);
38
+ t.true(result?.length > text.length * 0.5);
38
39
 
39
- //check all timestamps are still there and not translated
40
- const originalTimestamps = text.match(/\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}/g);
41
- const translatedTimestamps = result.match(/\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}/g);
40
+ // Check format-specific header
41
+ if (format === 'vtt') {
42
+ t.true(result.startsWith('WEBVTT\n\n'), 'VTT output should start with WEBVTT header');
43
+ }
44
+
45
+ // Check timestamps based on format
46
+ const timestampPattern = format === 'srt'
47
+ ? /\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}/g
48
+ : /\d{2}:\d{2}:\d{2}\.\d{3} --> \d{2}:\d{2}:\d{2}\.\d{3}/g;
49
+
50
+ const originalTimestamps = text.match(timestampPattern);
51
+ const translatedTimestamps = result.match(timestampPattern);
42
52
 
43
53
  t.deepEqual(originalTimestamps, translatedTimestamps, 'All timestamps should be present and unchanged');
44
54
 
55
+ // Check line count (accounting for WEBVTT header in VTT)
45
56
  const originalLineCount = text.split('\n').length;
46
57
  const translatedLineCount = result.split('\n').length;
47
58
 
48
59
  t.is(originalLineCount, translatedLineCount, 'Total number of lines should be the same');
60
+
61
+ // For VTT, verify any custom identifiers are preserved
62
+ if (format === 'vtt') {
63
+ const originalBlocks = text.split(/\n\s*\n/).filter(block => block.trim());
64
+ const translatedBlocks = result.split(/\n\s*\n/).filter(block => block.trim());
65
+
66
+ // Skip WEBVTT header block
67
+ const startIndex = originalBlocks[0].trim() === 'WEBVTT' ? 1 : 0;
68
+
69
+ for (let i = startIndex; i < originalBlocks.length; i++) {
70
+ const origLines = originalBlocks[i].split('\n');
71
+ const transLines = translatedBlocks[i].split('\n');
72
+
73
+ // If first line isn't a timestamp, it's an identifier and should be preserved
74
+ if (!/^\d{2}:\d{2}/.test(origLines[0])) {
75
+ t.is(transLines[0], origLines[0], 'VTT identifiers should be preserved');
76
+ }
77
+ }
78
+ }
49
79
  }
50
80
 
51
- test('test translate_srt endpoint with simple srt', async t => {
81
+ test('test subtitle translation with SRT format', async t => {
52
82
  const text = `1
53
83
  00:00:03,069 --> 00:00:04,771
54
84
  Who's that?
@@ -66,17 +96,39 @@ Who is Aseel a mom to?
66
96
  Aseel is mommy
67
97
  `;
68
98
 
69
- await testTranslateSrt(t, text, 'Spanish');
99
+ await testSubtitleTranslation(t, text, 'Spanish', 'srt');
100
+ });
101
+
102
+ test('test subtitle translation with VTT format', async t => {
103
+ const text = `WEBVTT
104
+
105
+ 1
106
+ 00:00:00.000 --> 00:00:07.000
107
+ It's here to change the game.
108
+
109
+ intro
110
+ 00:00:07.000 --> 00:00:11.360
111
+ With the power of AI transforming the future.
112
+
113
+ question
114
+ 00:00:11.360 --> 00:00:14.160
115
+ The possibilities endless.
116
+
117
+ 00:00:14.160 --> 00:00:17.240
118
+ It's not just about the generative AI itself.
119
+ `;
120
+
121
+ await testSubtitleTranslation(t, text, 'Spanish', 'vtt');
70
122
  });
71
123
 
72
- test('test translate_srt endpoint with long srt file', async t => {
124
+ test('test subtitle translation with long SRT file', async t => {
73
125
  t.timeout(400000);
74
126
  const text = fs.readFileSync(path.join(__dirname, 'sublong.srt'), 'utf8');
75
- await testTranslateSrt(t, text, 'English');
127
+ await testSubtitleTranslation(t, text, 'English', 'srt');
76
128
  });
77
129
 
78
- test('test translate_srt endpoint with horizontal srt file', async t => {
130
+ test('test subtitle translation with horizontal SRT file', async t => {
79
131
  t.timeout(400000);
80
132
  const text = fs.readFileSync(path.join(__dirname, 'subhorizontal.srt'), 'utf8');
81
- await testTranslateSrt(t, text, 'Turkish');
133
+ await testSubtitleTranslation(t, text, 'Turkish', 'srt');
82
134
  });