@aj-archipelago/cortex 1.3.7 → 1.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,164 +1,291 @@
1
- import subsrt from "subsrt";
2
1
  import logger from "../lib/logger.js";
3
2
  import { callPathway } from "../lib/pathwayTools.js";
4
- import { publishRequestProgress } from "../lib/redisSubscription.js";
5
3
 
6
- function preprocessStr(str) {
4
+ function preprocessStr(str, format) {
7
5
  try {
8
6
  if (!str) return "";
9
- return (
10
- str
11
- .replace(/\r\n?/g, "\n")
12
- .replace(/\n+/g, "\n")
13
- .replace(/(\d+)\n(\d{2}:\d{2}:\d{2},\d{3})/g, "\n\n$1\n$2")
14
- .trim() + "\n\n"
15
- );
7
+ let content = str
8
+ // Normalize line endings
9
+ .replace(/\r\n?/g, "\n")
10
+ // Remove WEBVTT header for processing
11
+ .replace(/^WEBVTT\n\n/, '');
12
+
13
+ // For SRT, convert commas to dots in timestamps
14
+ if (format === 'srt') {
15
+ content = content.replace(/(\d{2}:\d{2}:\d{2}),(\d{3})/g, "$1.$2");
16
+ }
17
+
18
+ return content
19
+ // Ensure each subtitle block is properly separated
20
+ .split(/\n\s*\n/)
21
+ .map(block => block.trim())
22
+ .filter(block => {
23
+ // Match both numeric indices (SRT) and optional caption identifiers (VTT)
24
+ const firstLine = block.split('\n')[0];
25
+ return block && (
26
+ /^\d+$/.test(firstLine) || // SRT style
27
+ /^\d{2}:\d{2}/.test(firstLine) || // VTT style without identifier
28
+ /^[^\n]+\n\d{2}:\d{2}/.test(block) // VTT style with identifier
29
+ );
30
+ })
31
+ .join("\n\n")
32
+ + "\n\n";
16
33
  } catch (e) {
17
34
  logger.error(`An error occurred in content text preprocessing: ${e}`);
18
35
  return "";
19
36
  }
20
37
  }
21
38
 
22
- async function processBatch(batch, args) {
23
- const batchText = batch
24
- .map((caption, index) => `LINE#${index + 1}: ${caption.content}`)
25
- .join("\n");
39
+ function timeToMs(timeStr) {
40
+ const [time, ms] = timeStr.split(/[.,]/);
41
+ const [hours, minutes, seconds] = time.split(':').map(Number);
42
+ return (hours * 3600 + minutes * 60 + seconds) * 1000 + parseInt(ms);
43
+ }
26
44
 
27
- const translatedText = await callPathway("translate_subtitle_helper", {
28
- ...args,
29
- text: batchText,
30
- async: false,
31
- });
45
+ function msToTimestamp(ms, format) {
46
+ const date = new Date(ms);
47
+ const timestamp = date.toISOString().slice(11, 23);
48
+ return format === 'srt' ? timestamp.replace('.', ',') : timestamp;
49
+ }
32
50
 
33
- // Remove LINE# and LINE() labels
34
- const translatedLines = translatedText.split("\n");
35
- translatedLines.forEach((line, i) => {
36
- translatedLines[i] = line.replace(/^LINE#\d+:\s*/, "").trim();
37
- });
38
- //make sure translatedLines.length===batch.length
39
- if (translatedLines.length < batch.length) {
40
- const emptyLines = Array(batch.length - translatedLines.length).fill("-");
41
- translatedLines.push(...emptyLines);
42
- } else if (translatedLines.length > batch.length) {
43
- //first remove the empty lines
44
- translatedLines.splice(0, translatedLines.length, ...translatedLines.filter(line => line.trim() !== ""));
45
-
46
- if(translatedLines.length>batch.length) {
47
- //merge extra lines to end
48
- const lastLine = translatedLines[batch.length - 1];
49
- const mergedLines = translatedLines.slice(batch.length);
50
- mergedLines.unshift(lastLine);
51
- translatedLines.splice(batch.length - 1, translatedLines.length - batch.length + 1, mergedLines.join(" "));
52
- }else {
53
- const emptyLines = Array(batch.length - translatedLines.length).fill("-");
54
- translatedLines.push(...emptyLines);
51
+ function parseSubtitles(content, format) {
52
+ const blocks = content.split(/\n\s*\n/).filter(block => block.trim());
53
+ const captions = [];
54
+
55
+ for (const block of blocks) {
56
+ const lines = block.split('\n');
57
+ if (lines.length < 2) continue;
58
+
59
+ let index, timelineIndex;
60
+ if (format === 'srt') {
61
+ // SRT format: numeric index required
62
+ if (!/^\d+$/.test(lines[0])) continue;
63
+ index = parseInt(lines[0]);
64
+ timelineIndex = 1;
65
+ } else {
66
+ // VTT format: optional identifier
67
+ timelineIndex = /^\d{2}:\d{2}/.test(lines[0]) ? 0 : 1;
68
+ index = timelineIndex === 0 ? captions.length + 1 : lines[0];
55
69
  }
70
+
71
+ const timeMatch = lines[timelineIndex].match(/^(\d{2}:\d{2}:\d{2}[.,]\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2}[.,]\d{3})/);
72
+ if (!timeMatch) continue;
73
+
74
+ const startTime = timeMatch[1].replace(',', '.');
75
+ const endTime = timeMatch[2].replace(',', '.');
76
+ const content = lines.slice(timelineIndex + 1).join('\n');
77
+
78
+ captions.push({
79
+ type: "caption",
80
+ index: typeof index === 'number' ? index : captions.length + 1,
81
+ identifier: typeof index === 'string' ? index : null,
82
+ start: timeToMs(startTime),
83
+ end: timeToMs(endTime),
84
+ duration: timeToMs(endTime) - timeToMs(startTime),
85
+ content: content,
86
+ text: content
87
+ });
56
88
  }
89
+
90
+ return captions;
91
+ }
57
92
 
93
+ function splitIntoOverlappingChunks(captions, chunkSize = 20, overlap = 3) {
94
+ const chunks = [];
95
+ for (let i = 0; i < captions.length; i += (chunkSize - overlap)) {
96
+ const end = Math.min(i + chunkSize, captions.length);
97
+ const chunk = captions.slice(i, end);
98
+ chunks.push({
99
+ captions: chunk,
100
+ startIndex: i,
101
+ endIndex: end - 1,
102
+ isOverlap: i > 0 || end < captions.length
103
+ });
104
+ }
105
+ return chunks;
106
+ }
58
107
 
59
- // Handle last empty line
60
- if (translatedLines[translatedLines.length - 1].trim() === "") {
61
- let lastNonEmptyIndex = translatedLines.length - 2;
62
- while (lastNonEmptyIndex >= 0 && translatedLines[lastNonEmptyIndex].trim() === "") {
63
- lastNonEmptyIndex--;
64
- }
65
- if (lastNonEmptyIndex >= 0) {
66
- translatedLines[translatedLines.length - 1] = translatedLines[lastNonEmptyIndex];
67
- translatedLines[lastNonEmptyIndex] = "";
68
- }
69
- }
70
-
108
+ function selectBestTranslation(translations, startIndex, endIndex) {
109
+ // If we only have one translation for this caption, use it
110
+ if (translations.length === 1) return translations[0];
71
111
 
72
- return batch.map((caption, i) => ({
73
- ...caption,
74
- content: translatedLines[i].replace(/^LINE\(\d+\):\s*/, "").trim(),
75
- text: translatedLines[i].replace(/^LINE\(\d+\):\s*/, "").trim(),
76
- }));
112
+ // For multiple translations, prefer the one from the middle of its chunk
113
+ // This helps avoid edge effects in translation
114
+ return translations.reduce((best, current) => {
115
+ const currentDistance = Math.min(
116
+ Math.abs(current.chunkStart - startIndex),
117
+ Math.abs(current.chunkEnd - endIndex)
118
+ );
119
+ const bestDistance = Math.min(
120
+ Math.abs(best.chunkStart - startIndex),
121
+ Math.abs(best.chunkEnd - endIndex)
122
+ );
123
+ return currentDistance < bestDistance ? current : best;
124
+ });
77
125
  }
78
126
 
79
- async function myResolver(args, requestId) {
80
- try {
81
- const { text, format } = args;
82
- const captions = subsrt.parse(preprocessStr(text), {
83
- format: format,
84
- verbose: true,
85
- eol: "\n",
86
- });
87
- const maxLineCount = 100;
88
- const maxWordCount = 300;
89
- let translatedCaptions = [];
90
- let currentBatch = [];
91
- let currentWordCount = 0;
92
-
93
- const totalCount = captions.length;
94
- let completedCount = 0;
95
-
96
- const sendProgress = () => {
97
- if (completedCount >= totalCount) return;
98
- if(!requestId) {
99
- logger.warn(`No requestId found for progress update`);
100
- return;
101
- }
127
+ function validateFinalOutput(result, originalText, format) {
128
+ // Basic structure validation
129
+ if (!result || !result.trim()) {
130
+ logger.error("Empty or whitespace-only result");
131
+ return false;
132
+ }
133
+
134
+ // Check for VTT header if needed
135
+ if (format === 'vtt' && !result.startsWith('WEBVTT\n\n')) {
136
+ logger.error("Missing WEBVTT header");
137
+ return false;
138
+ }
139
+
140
+ // Check for timestamp format
141
+ const timestampPattern = format === 'srt'
142
+ ? /\d{2}:\d{2}:\d{2},\d{3}\s*-->\s*\d{2}:\d{2}:\d{2},\d{3}/
143
+ : /\d{2}:\d{2}:\d{2}\.\d{3}\s*-->\s*\d{2}:\d{2}:\d{2}\.\d{3}/;
144
+
145
+ const hasTimestamps = timestampPattern.test(result);
146
+ if (!hasTimestamps) {
147
+ logger.error(`No valid ${format.toUpperCase()} timestamps found in result`);
148
+ return false;
149
+ }
102
150
 
103
- const progress = completedCount / totalCount;
104
- logger.info(`Progress for ${requestId}: ${progress}`);
151
+ // Check overall length ratio
152
+ if (result.length < originalText.length * 0.5) {
153
+ logger.error(`Result length (${result.length}) is less than 50% of original length (${originalText.length})`);
154
+ return false;
155
+ }
105
156
 
106
- publishRequestProgress({
107
- requestId,
108
- progress,
109
- data: null,
110
- });
111
- };
112
-
113
- for (let i = 0; i < captions.length; i++) {
114
- const caption = captions[i];
115
- const captionWordCount = caption.content.split(/\s+/).length;
116
- if (
117
- (currentWordCount + captionWordCount > maxWordCount ||
118
- currentBatch.length >= maxLineCount) &&
119
- currentBatch.length > 0
120
- ) {
121
- completedCount=i;
122
- sendProgress();
123
- const translatedBatch = await processBatch(
124
- currentBatch,
125
- args,
126
- );
127
- translatedCaptions = translatedCaptions.concat(translatedBatch);
128
- currentBatch = [];
129
- currentWordCount = 0;
157
+ // Validate subtitle block structure
158
+ const blocks = result.split(/\n\s*\n/).filter(block => block.trim());
159
+
160
+ // Skip WEBVTT header for VTT format
161
+ const startIndex = format === 'vtt' && blocks[0].trim() === 'WEBVTT' ? 1 : 0;
162
+
163
+ for (let i = startIndex; i < blocks.length; i++) {
164
+ const block = blocks[i];
165
+ const lines = block.trim().split('\n');
166
+
167
+ if (lines.length < 2) {
168
+ logger.error(`Block ${i + 1} has insufficient lines (${lines.length}):\n${block}`);
169
+ return false;
170
+ }
171
+
172
+ // Find the timestamp line
173
+ let timestampLineIndex = -1;
174
+ for (let j = 0; j < lines.length; j++) {
175
+ if (timestampPattern.test(lines[j])) {
176
+ timestampLineIndex = j;
177
+ break;
130
178
  }
131
- currentBatch.push(caption);
132
- currentWordCount += captionWordCount;
133
179
  }
134
-
135
- if (currentBatch.length > 0) {
136
- const translatedBatch = await processBatch(
137
- currentBatch,
138
- args,
139
- );
140
- translatedCaptions = translatedCaptions.concat(translatedBatch);
180
+
181
+ if (timestampLineIndex === -1) {
182
+ logger.error(`Block ${i + 1} has no valid timestamp line:\n${block}`);
183
+ return false;
141
184
  }
185
+
186
+ // Check that we have content after the timestamp
187
+ if (timestampLineIndex === lines.length - 1) {
188
+ logger.error(`Block ${i + 1} has no content after timestamp:\n${block}`);
189
+ return false;
190
+ }
191
+
192
+ // Log the content for inspection
193
+ logger.debug(`Block ${i + 1} content:\n${lines.slice(timestampLineIndex + 1).join('\n')}`);
194
+ }
142
195
 
143
- return (
144
- subsrt
145
- .build(translatedCaptions, {
146
- format: format === "vtt" ? "vtt" : "srt",
147
- eol: "\n",
148
- })
149
- .trim() + "\n"
150
- );
151
- } catch (e) {
152
- logger.warn(
153
- `${e} - could be that there are no subtitles, so attempting block translation.`
154
- );
196
+ return true;
197
+ }
198
+
199
+ async function translateChunk(chunk, args, maxRetries = 3) {
200
+ const format = args.format || 'srt';
201
+ const chunkText = chunk.captions
202
+ .map(c => {
203
+ const startTime = msToTimestamp(c.start, format);
204
+ const endTime = msToTimestamp(c.end, format);
205
+ const index = format === 'srt' || !c.identifier ? c.index : c.identifier;
206
+ return `${index}\n${startTime} --> ${endTime}\n${c.content}`;
207
+ })
208
+ .join('\n\n');
209
+
210
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
155
211
  try {
156
- return await callPathway("translate_gpt4_omni", {...args, async: false});
212
+ const translated = await callPathway("translate_subtitle_helper", {
213
+ ...args,
214
+ text: chunkText,
215
+ async: false,
216
+ });
217
+
218
+ // Basic validation - just check for SUBTITLES tags and some content
219
+ const match = translated.match(/<SUBTITLES>([\s\S]*)<\/SUBTITLES>/);
220
+ if (!match || !match[1].trim()) {
221
+ logger.warn(`Attempt ${attempt + 1}: Invalid translation format`);
222
+ continue;
223
+ }
224
+
225
+ const content = match[1].trim();
226
+ const blocks = content.split(/\n\s*\n/);
227
+
228
+ // Check if any blocks are empty or invalid
229
+ let hasEmptyBlocks = false;
230
+ const processedBlocks = chunk.captions.map((caption, index) => {
231
+ const block = blocks[index];
232
+ if (!block) {
233
+ logger.warn(`Attempt ${attempt + 1}: Empty block for caption ${caption.index}`);
234
+ hasEmptyBlocks = true;
235
+ return null;
236
+ }
237
+
238
+ const lines = block.split('\n');
239
+ if (lines.length < 3) {
240
+ logger.warn(`Attempt ${attempt + 1}: Invalid block structure for caption ${caption.index}`);
241
+ hasEmptyBlocks = true;
242
+ return null;
243
+ }
244
+
245
+ const content = lines.slice(2).join('\n').trim();
246
+ if (!content) {
247
+ logger.warn(`Attempt ${attempt + 1}: Empty content for caption ${caption.index}`);
248
+ hasEmptyBlocks = true;
249
+ return null;
250
+ }
251
+
252
+ return {
253
+ ...caption,
254
+ content: content,
255
+ text: content,
256
+ chunkStart: chunk.startIndex,
257
+ chunkEnd: chunk.endIndex
258
+ };
259
+ });
260
+
261
+ // If no empty blocks, return the processed blocks
262
+ if (!hasEmptyBlocks) {
263
+ return processedBlocks;
264
+ }
265
+
266
+ // If this was the last attempt and we still have empty blocks,
267
+ // return what we have but keep original content for empty blocks
268
+ if (attempt === maxRetries - 1) {
269
+ logger.warn(`Failed to get valid translations for all blocks after ${maxRetries} attempts`);
270
+ return chunk.captions.map((caption, index) => {
271
+ return processedBlocks[index] || {
272
+ ...caption,
273
+ chunkStart: chunk.startIndex,
274
+ chunkEnd: chunk.endIndex
275
+ };
276
+ });
277
+ }
278
+
279
+ // Otherwise, try again
280
+ logger.info(`Retrying chunk due to empty blocks (attempt ${attempt + 1}/${maxRetries})`);
281
+
157
282
  } catch (e) {
158
- logger.error(`An error occurred in subtitle translation: ${e}`);
159
- return "";
283
+ logger.error(`Error translating chunk ${chunk.startIndex}-${chunk.endIndex} (attempt ${attempt + 1}): ${e}`);
284
+ if (attempt === maxRetries - 1) throw e;
160
285
  }
161
286
  }
287
+
288
+ throw new Error(`Failed to translate chunk ${chunk.startIndex}-${chunk.endIndex} after ${maxRetries} attempts`);
162
289
  }
163
290
 
164
291
  export default {
@@ -173,9 +300,73 @@ export default {
173
300
  model: "oai-gpt4o",
174
301
  enableDuplicateRequests: false,
175
302
  timeout: 3600,
176
- executePathway: async (executePathwayArgs) => {
177
- const { args } = executePathwayArgs;
178
- const requestId = executePathwayArgs?.resolver?.requestId;
179
- return await myResolver(args, requestId);
303
+ executePathway: async ({args}) => {
304
+ try {
305
+ const { text, format = 'srt' } = args;
306
+ const preprocessedText = preprocessStr(text, format);
307
+ const captions = parseSubtitles(preprocessedText, format);
308
+
309
+ if (!captions || captions.length === 0) {
310
+ throw new Error("No captions found in input");
311
+ }
312
+
313
+ // Split into overlapping chunks
314
+ const chunks = splitIntoOverlappingChunks(captions);
315
+ logger.info(`Split subtitles into ${chunks.length} overlapping chunks`);
316
+
317
+ // Translate all chunks in parallel
318
+ const chunkPromises = chunks.map(chunk => translateChunk(chunk, args));
319
+ const translatedChunks = await Promise.all(chunkPromises);
320
+
321
+ // Create a map of caption index to all its translations
322
+ const translationMap = new Map();
323
+ translatedChunks.flat().forEach(caption => {
324
+ if (!translationMap.has(caption.index)) {
325
+ translationMap.set(caption.index, []);
326
+ }
327
+ translationMap.get(caption.index).push(caption);
328
+ });
329
+
330
+ // Select best translation for each caption
331
+ const finalCaptions = captions.map(caption => {
332
+ const translations = translationMap.get(caption.index) || [caption];
333
+ return selectBestTranslation(translations, caption.index, caption.index);
334
+ });
335
+
336
+ // Format the output
337
+ let result = finalCaptions
338
+ .map(caption => {
339
+ const startTime = msToTimestamp(caption.start, format);
340
+ const endTime = msToTimestamp(caption.end, format);
341
+ // Only include index/identifier if it was in the original
342
+ const hasIdentifier = caption.identifier !== null || format === 'srt';
343
+ const index = format === 'srt' || !caption.identifier ? caption.index : caption.identifier;
344
+ return hasIdentifier ?
345
+ `${index}\n${startTime} --> ${endTime}\n${caption.content}` :
346
+ `${startTime} --> ${endTime}\n${caption.content}`;
347
+ })
348
+ .join('\n\n')
349
+ .trim();
350
+
351
+ // Add final newline only if input had one
352
+ if (text.endsWith('\n')) {
353
+ result += '\n';
354
+ }
355
+
356
+ // Add WEBVTT header for VTT format
357
+ if (format === 'vtt') {
358
+ result = 'WEBVTT\n\n' + result;
359
+ }
360
+
361
+ // Validate final output
362
+ if (!validateFinalOutput(result, text, format)) {
363
+ throw new Error("Final subtitle reconstruction failed validation");
364
+ }
365
+
366
+ return result;
367
+ } catch (e) {
368
+ logger.error(`Subtitle translation failed: ${e}`);
369
+ throw e;
370
+ }
180
371
  },
181
372
  };
@@ -7,25 +7,13 @@ export default {
7
7
  {
8
8
  role: "system",
9
9
  content:
10
- `Expert translator: Convert ALL text to {{to}}. Unbreakable rules:
11
-
12
- 1. Translate EVERY SINGLE LINE. Zero exceptions.
13
- 2. Output MUST have EXACTLY the same line count as input.
14
- 3. One input line = One output line. Always.
15
- 4. Only translations. Nothing extra.
16
- 5. Non-translatable stays unchanged.
17
- 6. Keep all formatting and characters.
18
- 7. Prefix: "LINE#lineNumber:".
19
- 8. Untranslatable: Copy as-is with prefix.
20
- 9. Internal checks: Verify line count and content after each line.
21
- 10. Final verification: Recount, check numbering, confirm content, cross-check with input.
22
-
23
- Translate ALL lines. Constant vigilance. Exhaustive final cross-check.`
10
+ `You are an expert subtitle translator. You will be given a block of subtitles and asked to translate them into {{to}}.
11
+ You must maintain the original format (caption numbers and timestamps) exactly and make the content fit as naturally as possible.
12
+ Output only the translated subtitles in a <SUBTITLES> tag with no other text or commentary.`
24
13
  },
25
14
  {
26
15
  role: "user",
27
- // content: `"PreviousLines":\n{{{prevLine}}}\n\n"CurrentLines":\n{{{text}}}\n"NextLines":\n{{{nextLine}}}\n\n`,
28
- content: `{{{text}}}`,
16
+ content: `<SUBTITLES>\n{{{text}}}\n</SUBTITLES>`,
29
17
  },
30
18
  ],
31
19
  }),
@@ -81,17 +81,6 @@ class AzureVideoTranslatePlugin extends ModelPlugin {
81
81
  throw new Error(this.jsonBuffer);
82
82
  }
83
83
 
84
- if (isValidJSON(this.jsonBuffer)) {
85
- const parsedData = JSON.parse(this.jsonBuffer);
86
- if (parsedData.progress !== undefined) {
87
- publishRequestProgress({
88
- requestId: this.requestId,
89
- progress: parsedData.progress,
90
- info: this.jsonBuffer
91
- });
92
- }
93
- }
94
-
95
84
  onData(this.jsonBuffer);
96
85
  this.jsonBuffer = '';
97
86
  this.jsonDepth = 0;
@@ -118,11 +107,34 @@ class AzureVideoTranslatePlugin extends ModelPlugin {
118
107
  let finalJson = '';
119
108
  this.handleStream(response.data,
120
109
  (data) => {
121
- publishRequestProgress({
122
- requestId: this.requestId,
123
- info: data
124
- });
110
+ let sent = false;
111
+ if (isValidJSON(data)) {
112
+ const parsedData = JSON.parse(data);
113
+ if (parsedData.progress !== undefined) {
114
+ let timeInfo = '';
115
+ if (parsedData.estimated_time_remaining && parsedData.elapsed_time) {
116
+ const minutes = Math.ceil(parsedData.estimated_time_remaining / 60);
117
+ timeInfo = minutes <= 2
118
+ ? `Should be done soon (${parsedData.elapsed_time} elapsed)`
119
+ : `Estimated ${minutes} minutes remaining`;
120
+ }
121
+
122
+ publishRequestProgress({
123
+ requestId: this.requestId,
124
+ progress: parsedData.progress,
125
+ info: timeInfo
126
+ });
127
+ sent = true;
128
+ }
129
+ }
130
+ if (!sent) {
131
+ publishRequestProgress({
132
+ requestId: this.requestId,
133
+ info: data
134
+ });
135
+ }
125
136
  logger.debug('Data:', data);
137
+
126
138
  // Extract JSON content if message contains targetLocales
127
139
  const jsonMatch = data.match(/{[\s\S]*"targetLocales"[\s\S]*}/);
128
140
  if (jsonMatch) {
@@ -2,9 +2,7 @@ import OpenAIVisionPlugin from "./openAiVisionPlugin.js";
2
2
  import logger from "../../lib/logger.js";
3
3
  import axios from 'axios';
4
4
 
5
- const allowedMIMETypes = ['image/jpeg', 'image/png', 'image/gif', 'image/webp'];
6
-
7
- async function convertContentItem(item, maxImageSize) {
5
+ async function convertContentItem(item, maxImageSize, plugin) {
8
6
  let imageUrl = "";
9
7
 
10
8
  try {
@@ -26,6 +24,12 @@ async function convertContentItem(item, maxImageSize) {
26
24
  }
27
25
 
28
26
  try {
27
+ // First validate the image URL
28
+ if (!await plugin.validateImageUrl(imageUrl)) {
29
+ return null;
30
+ }
31
+
32
+ // Then fetch and convert to base64 if needed
29
33
  const urlData = imageUrl.startsWith("data:") ? imageUrl : await fetchImageAsDataURL(imageUrl);
30
34
  if (!urlData) { return null; }
31
35
 
@@ -69,25 +73,14 @@ async function convertContentItem(item, maxImageSize) {
69
73
  // Fetch image and convert to base 64 data URL
70
74
  async function fetchImageAsDataURL(imageUrl) {
71
75
  try {
72
- // First check headers
73
- const headResponse = await axios.head(imageUrl, {
74
- timeout: 30000, // 30 second timeout
75
- maxRedirects: 5
76
- });
77
-
78
- const contentType = headResponse.headers['content-type'];
79
- if (!contentType || !allowedMIMETypes.includes(contentType)) {
80
- logger.warn(`Unsupported image type: ${contentType} - skipping image content.`);
81
- return null;
82
- }
83
-
84
- // Then get the actual image data
76
+ // Get the actual image data
85
77
  const dataResponse = await axios.get(imageUrl, {
86
78
  timeout: 30000,
87
79
  responseType: 'arraybuffer',
88
80
  maxRedirects: 5
89
81
  });
90
82
 
83
+ const contentType = dataResponse.headers['content-type'];
91
84
  const base64Image = Buffer.from(dataResponse.data).toString('base64');
92
85
  return `data:${contentType};base64,${base64Image}`;
93
86
  }
@@ -161,7 +154,7 @@ class Claude3VertexPlugin extends OpenAIVisionPlugin {
161
154
  const claude3Messages = await Promise.all(
162
155
  finalMessages.map(async (message) => {
163
156
  const contentArray = Array.isArray(message.content) ? message.content : [message.content];
164
- const claude3Content = await Promise.all(contentArray.map(item => convertContentItem(item, this.getModelMaxImageSize())));
157
+ const claude3Content = await Promise.all(contentArray.map(item => convertContentItem(item, this.getModelMaxImageSize(), this)));
165
158
  return {
166
159
  role: message.role,
167
160
  content: claude3Content.filter(Boolean),