@juspay/neurolink 7.35.0 → 7.36.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/dist/adapters/providerImageAdapter.d.ts +56 -0
- package/dist/adapters/providerImageAdapter.js +257 -0
- package/dist/cli/commands/config.d.ts +20 -20
- package/dist/cli/factories/commandFactory.d.ts +1 -0
- package/dist/cli/factories/commandFactory.js +26 -3
- package/dist/core/baseProvider.js +99 -45
- package/dist/core/types.d.ts +3 -0
- package/dist/lib/adapters/providerImageAdapter.d.ts +56 -0
- package/dist/lib/adapters/providerImageAdapter.js +257 -0
- package/dist/lib/core/baseProvider.js +99 -45
- package/dist/lib/core/types.d.ts +3 -0
- package/dist/lib/neurolink.js +8 -3
- package/dist/lib/types/content.d.ts +78 -0
- package/dist/lib/types/content.js +5 -0
- package/dist/lib/types/conversation.d.ts +19 -0
- package/dist/lib/types/generateTypes.d.ts +4 -1
- package/dist/lib/types/streamTypes.d.ts +6 -3
- package/dist/lib/utils/imageProcessor.d.ts +84 -0
- package/dist/lib/utils/imageProcessor.js +362 -0
- package/dist/lib/utils/messageBuilder.d.ts +8 -1
- package/dist/lib/utils/messageBuilder.js +279 -0
- package/dist/neurolink.js +8 -3
- package/dist/types/content.d.ts +78 -0
- package/dist/types/content.js +5 -0
- package/dist/types/conversation.d.ts +19 -0
- package/dist/types/generateTypes.d.ts +4 -1
- package/dist/types/streamTypes.d.ts +6 -3
- package/dist/utils/imageProcessor.d.ts +84 -0
- package/dist/utils/imageProcessor.js +362 -0
- package/dist/utils/messageBuilder.d.ts +8 -1
- package/dist/utils/messageBuilder.js +279 -0
- package/package.json +1 -1
@@ -0,0 +1,362 @@
|
|
1
|
+
/**
|
2
|
+
* Image processing utilities for multimodal support
|
3
|
+
* Handles format conversion for different AI providers
|
4
|
+
*/
|
5
|
+
import { logger } from "./logger.js";
|
6
|
+
/**
|
7
|
+
* Image processor class for handling provider-specific image formatting
|
8
|
+
*/
|
9
|
+
export class ImageProcessor {
|
10
|
+
/**
|
11
|
+
* Process image for OpenAI (requires data URI format)
|
12
|
+
*/
|
13
|
+
static processImageForOpenAI(image) {
|
14
|
+
try {
|
15
|
+
if (typeof image === "string") {
|
16
|
+
// Handle URLs
|
17
|
+
if (image.startsWith("http")) {
|
18
|
+
return image;
|
19
|
+
}
|
20
|
+
// Handle data URIs
|
21
|
+
if (image.startsWith("data:")) {
|
22
|
+
return image;
|
23
|
+
}
|
24
|
+
// Handle base64 - convert to data URI
|
25
|
+
return `data:image/jpeg;base64,${image}`;
|
26
|
+
}
|
27
|
+
// Handle Buffer - convert to data URI
|
28
|
+
const base64 = image.toString("base64");
|
29
|
+
return `data:image/jpeg;base64,${base64}`;
|
30
|
+
}
|
31
|
+
catch (error) {
|
32
|
+
logger.error("Failed to process image for OpenAI:", error);
|
33
|
+
throw new Error(`Image processing failed for OpenAI: ${error instanceof Error ? error.message : "Unknown error"}`);
|
34
|
+
}
|
35
|
+
}
|
36
|
+
/**
|
37
|
+
* Process image for Google AI (requires base64 without data URI prefix)
|
38
|
+
*/
|
39
|
+
static processImageForGoogle(image) {
|
40
|
+
try {
|
41
|
+
let base64Data;
|
42
|
+
let mimeType = "image/jpeg"; // Default
|
43
|
+
if (typeof image === "string") {
|
44
|
+
if (image.startsWith("data:")) {
|
45
|
+
// Extract mime type and base64 from data URI
|
46
|
+
const match = image.match(/^data:([^;]+);base64,(.+)$/);
|
47
|
+
if (match) {
|
48
|
+
mimeType = match[1];
|
49
|
+
base64Data = match[2];
|
50
|
+
}
|
51
|
+
else {
|
52
|
+
base64Data = image.split(",")[1] || image;
|
53
|
+
}
|
54
|
+
}
|
55
|
+
else {
|
56
|
+
base64Data = image;
|
57
|
+
}
|
58
|
+
}
|
59
|
+
else {
|
60
|
+
base64Data = image.toString("base64");
|
61
|
+
}
|
62
|
+
return {
|
63
|
+
mimeType,
|
64
|
+
data: base64Data, // Google wants base64 WITHOUT data URI prefix
|
65
|
+
};
|
66
|
+
}
|
67
|
+
catch (error) {
|
68
|
+
logger.error("Failed to process image for Google AI:", error);
|
69
|
+
throw new Error(`Image processing failed for Google AI: ${error instanceof Error ? error.message : "Unknown error"}`);
|
70
|
+
}
|
71
|
+
}
|
72
|
+
/**
|
73
|
+
* Process image for Anthropic (requires base64 without data URI prefix)
|
74
|
+
*/
|
75
|
+
static processImageForAnthropic(image) {
|
76
|
+
try {
|
77
|
+
let base64Data;
|
78
|
+
let mediaType = "image/jpeg"; // Default
|
79
|
+
if (typeof image === "string") {
|
80
|
+
if (image.startsWith("data:")) {
|
81
|
+
// Extract mime type and base64 from data URI
|
82
|
+
const match = image.match(/^data:([^;]+);base64,(.+)$/);
|
83
|
+
if (match) {
|
84
|
+
mediaType = match[1];
|
85
|
+
base64Data = match[2];
|
86
|
+
}
|
87
|
+
else {
|
88
|
+
base64Data = image.split(",")[1] || image;
|
89
|
+
}
|
90
|
+
}
|
91
|
+
else {
|
92
|
+
base64Data = image;
|
93
|
+
}
|
94
|
+
}
|
95
|
+
else {
|
96
|
+
base64Data = image.toString("base64");
|
97
|
+
}
|
98
|
+
return {
|
99
|
+
mediaType,
|
100
|
+
data: base64Data, // Anthropic wants base64 WITHOUT data URI prefix
|
101
|
+
};
|
102
|
+
}
|
103
|
+
catch (error) {
|
104
|
+
logger.error("Failed to process image for Anthropic:", error);
|
105
|
+
throw new Error(`Image processing failed for Anthropic: ${error instanceof Error ? error.message : "Unknown error"}`);
|
106
|
+
}
|
107
|
+
}
|
108
|
+
/**
|
109
|
+
* Process image for Vertex AI (model-specific routing)
|
110
|
+
*/
|
111
|
+
static processImageForVertex(image, model) {
|
112
|
+
try {
|
113
|
+
// Route based on model type
|
114
|
+
if (model.includes("gemini")) {
|
115
|
+
// Use Google AI format for Gemini models
|
116
|
+
return ImageProcessor.processImageForGoogle(image);
|
117
|
+
}
|
118
|
+
else if (model.includes("claude")) {
|
119
|
+
// Use Anthropic format for Claude models
|
120
|
+
return ImageProcessor.processImageForAnthropic(image);
|
121
|
+
}
|
122
|
+
else {
|
123
|
+
// Default to Google format
|
124
|
+
return ImageProcessor.processImageForGoogle(image);
|
125
|
+
}
|
126
|
+
}
|
127
|
+
catch (error) {
|
128
|
+
logger.error("Failed to process image for Vertex AI:", error);
|
129
|
+
throw new Error(`Image processing failed for Vertex AI: ${error instanceof Error ? error.message : "Unknown error"}`);
|
130
|
+
}
|
131
|
+
}
|
132
|
+
/**
|
133
|
+
* Detect image type from filename or data
|
134
|
+
*/
|
135
|
+
static detectImageType(input) {
|
136
|
+
try {
|
137
|
+
if (typeof input === "string") {
|
138
|
+
// Check if it's a data URI
|
139
|
+
if (input.startsWith("data:")) {
|
140
|
+
const match = input.match(/^data:([^;]+);/);
|
141
|
+
return match ? match[1] : "image/jpeg";
|
142
|
+
}
|
143
|
+
// Check if it's a filename
|
144
|
+
const extension = input.toLowerCase().split(".").pop();
|
145
|
+
const imageTypes = {
|
146
|
+
jpg: "image/jpeg",
|
147
|
+
jpeg: "image/jpeg",
|
148
|
+
png: "image/png",
|
149
|
+
gif: "image/gif",
|
150
|
+
webp: "image/webp",
|
151
|
+
bmp: "image/bmp",
|
152
|
+
tiff: "image/tiff",
|
153
|
+
tif: "image/tiff",
|
154
|
+
};
|
155
|
+
return imageTypes[extension || ""] || "image/jpeg";
|
156
|
+
}
|
157
|
+
// For Buffer, try to detect from magic bytes
|
158
|
+
if (input.length >= 4) {
|
159
|
+
const header = input.subarray(0, 4);
|
160
|
+
// PNG: 89 50 4E 47
|
161
|
+
if (header[0] === 0x89 &&
|
162
|
+
header[1] === 0x50 &&
|
163
|
+
header[2] === 0x4e &&
|
164
|
+
header[3] === 0x47) {
|
165
|
+
return "image/png";
|
166
|
+
}
|
167
|
+
// JPEG: FF D8 FF
|
168
|
+
if (header[0] === 0xff && header[1] === 0xd8 && header[2] === 0xff) {
|
169
|
+
return "image/jpeg";
|
170
|
+
}
|
171
|
+
// GIF: 47 49 46 38
|
172
|
+
if (header[0] === 0x47 &&
|
173
|
+
header[1] === 0x49 &&
|
174
|
+
header[2] === 0x46 &&
|
175
|
+
header[3] === 0x38) {
|
176
|
+
return "image/gif";
|
177
|
+
}
|
178
|
+
// WebP: check for RIFF and WEBP
|
179
|
+
if (input.length >= 12) {
|
180
|
+
const riff = input.subarray(0, 4);
|
181
|
+
const webp = input.subarray(8, 12);
|
182
|
+
if (riff.toString() === "RIFF" && webp.toString() === "WEBP") {
|
183
|
+
return "image/webp";
|
184
|
+
}
|
185
|
+
}
|
186
|
+
}
|
187
|
+
return "image/jpeg"; // Default fallback
|
188
|
+
}
|
189
|
+
catch (error) {
|
190
|
+
logger.warn("Failed to detect image type, using default:", error);
|
191
|
+
return "image/jpeg";
|
192
|
+
}
|
193
|
+
}
|
194
|
+
/**
|
195
|
+
* Validate image size (default 10MB limit)
|
196
|
+
*/
|
197
|
+
static validateImageSize(data, maxSize = 10 * 1024 * 1024) {
|
198
|
+
try {
|
199
|
+
const size = typeof data === "string"
|
200
|
+
? Buffer.byteLength(data, "base64")
|
201
|
+
: data.length;
|
202
|
+
return size <= maxSize;
|
203
|
+
}
|
204
|
+
catch (error) {
|
205
|
+
logger.warn("Failed to validate image size:", error);
|
206
|
+
return false;
|
207
|
+
}
|
208
|
+
}
|
209
|
+
/**
|
210
|
+
* Validate image format
|
211
|
+
*/
|
212
|
+
static validateImageFormat(mediaType) {
|
213
|
+
const supportedFormats = [
|
214
|
+
"image/jpeg",
|
215
|
+
"image/png",
|
216
|
+
"image/gif",
|
217
|
+
"image/webp",
|
218
|
+
"image/bmp",
|
219
|
+
"image/tiff",
|
220
|
+
];
|
221
|
+
return supportedFormats.includes(mediaType.toLowerCase());
|
222
|
+
}
|
223
|
+
/**
|
224
|
+
* Get image dimensions from Buffer (basic implementation)
|
225
|
+
*/
|
226
|
+
static getImageDimensions(buffer) {
|
227
|
+
try {
|
228
|
+
// Basic PNG dimension extraction
|
229
|
+
if (buffer.length >= 24 &&
|
230
|
+
buffer.subarray(0, 8).toString("hex") === "89504e470d0a1a0a") {
|
231
|
+
const width = buffer.readUInt32BE(16);
|
232
|
+
const height = buffer.readUInt32BE(20);
|
233
|
+
return { width, height };
|
234
|
+
}
|
235
|
+
// Basic JPEG dimension extraction (simplified)
|
236
|
+
if (buffer.length >= 4 && buffer[0] === 0xff && buffer[1] === 0xd8) {
|
237
|
+
// This is a very basic implementation
|
238
|
+
// For production, consider using a proper image library
|
239
|
+
return null;
|
240
|
+
}
|
241
|
+
return null;
|
242
|
+
}
|
243
|
+
catch (error) {
|
244
|
+
logger.warn("Failed to extract image dimensions:", error);
|
245
|
+
return null;
|
246
|
+
}
|
247
|
+
}
|
248
|
+
/**
|
249
|
+
* Convert image to ProcessedImage format
|
250
|
+
*/
|
251
|
+
static processImage(image, provider, model) {
|
252
|
+
try {
|
253
|
+
const mediaType = ImageProcessor.detectImageType(image);
|
254
|
+
const size = typeof image === "string"
|
255
|
+
? Buffer.byteLength(image, "base64")
|
256
|
+
: image.length;
|
257
|
+
let data;
|
258
|
+
let format;
|
259
|
+
switch (provider.toLowerCase()) {
|
260
|
+
case "openai":
|
261
|
+
data = ImageProcessor.processImageForOpenAI(image);
|
262
|
+
format = "data_uri";
|
263
|
+
break;
|
264
|
+
case "google-ai":
|
265
|
+
case "google": {
|
266
|
+
const googleResult = ImageProcessor.processImageForGoogle(image);
|
267
|
+
data = googleResult.data;
|
268
|
+
format = "base64";
|
269
|
+
break;
|
270
|
+
}
|
271
|
+
case "anthropic": {
|
272
|
+
const anthropicResult = ImageProcessor.processImageForAnthropic(image);
|
273
|
+
data = anthropicResult.data;
|
274
|
+
format = "base64";
|
275
|
+
break;
|
276
|
+
}
|
277
|
+
case "vertex": {
|
278
|
+
const vertexResult = ImageProcessor.processImageForVertex(image, model || "");
|
279
|
+
data = vertexResult.data;
|
280
|
+
format = "base64";
|
281
|
+
break;
|
282
|
+
}
|
283
|
+
default:
|
284
|
+
// Default to base64
|
285
|
+
if (typeof image === "string") {
|
286
|
+
data = image.startsWith("data:")
|
287
|
+
? image.split(",")[1] || image
|
288
|
+
: image;
|
289
|
+
}
|
290
|
+
else {
|
291
|
+
data = image.toString("base64");
|
292
|
+
}
|
293
|
+
format = "base64";
|
294
|
+
}
|
295
|
+
return {
|
296
|
+
data,
|
297
|
+
mediaType,
|
298
|
+
size,
|
299
|
+
format,
|
300
|
+
};
|
301
|
+
}
|
302
|
+
catch (error) {
|
303
|
+
logger.error(`Failed to process image for ${provider}:`, error);
|
304
|
+
throw new Error(`Image processing failed: ${error instanceof Error ? error.message : "Unknown error"}`);
|
305
|
+
}
|
306
|
+
}
|
307
|
+
}
|
308
|
+
/**
|
309
|
+
* Utility functions for image handling
|
310
|
+
*/
|
311
|
+
export const imageUtils = {
|
312
|
+
/**
|
313
|
+
* Check if a string is a valid data URI
|
314
|
+
*/
|
315
|
+
isDataUri: (str) => {
|
316
|
+
return (typeof str === "string" &&
|
317
|
+
str.startsWith("data:") &&
|
318
|
+
str.includes("base64,"));
|
319
|
+
},
|
320
|
+
/**
|
321
|
+
* Check if a string is a valid URL
|
322
|
+
*/
|
323
|
+
isUrl: (str) => {
|
324
|
+
try {
|
325
|
+
new URL(str);
|
326
|
+
return str.startsWith("http://") || str.startsWith("https://");
|
327
|
+
}
|
328
|
+
catch {
|
329
|
+
return false;
|
330
|
+
}
|
331
|
+
},
|
332
|
+
/**
|
333
|
+
* Check if a string is base64 encoded
|
334
|
+
*/
|
335
|
+
isBase64: (str) => {
|
336
|
+
try {
|
337
|
+
return btoa(atob(str)) === str;
|
338
|
+
}
|
339
|
+
catch {
|
340
|
+
return false;
|
341
|
+
}
|
342
|
+
},
|
343
|
+
/**
|
344
|
+
* Extract file extension from filename or URL
|
345
|
+
*/
|
346
|
+
getFileExtension: (filename) => {
|
347
|
+
const match = filename.match(/\.([^.]+)$/);
|
348
|
+
return match ? match[1].toLowerCase() : null;
|
349
|
+
},
|
350
|
+
/**
|
351
|
+
* Convert file size to human readable format
|
352
|
+
*/
|
353
|
+
formatFileSize: (bytes) => {
|
354
|
+
if (bytes === 0) {
|
355
|
+
return "0 Bytes";
|
356
|
+
}
|
357
|
+
const k = 1024;
|
358
|
+
const sizes = ["Bytes", "KB", "MB", "GB"];
|
359
|
+
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
360
|
+
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + " " + sizes[i];
|
361
|
+
},
|
362
|
+
};
|
@@ -1,13 +1,20 @@
|
|
1
1
|
/**
|
2
2
|
* Message Builder Utility
|
3
3
|
* Centralized logic for building message arrays from TextGenerationOptions
|
4
|
+
* Enhanced with multimodal support for images
|
4
5
|
*/
|
5
|
-
import type { ChatMessage } from "../types/conversation.js";
|
6
|
+
import type { ChatMessage, MultimodalChatMessage } from "../types/conversation.js";
|
6
7
|
import type { TextGenerationOptions } from "../types/index.js";
|
7
8
|
import type { StreamOptions } from "../types/streamTypes.js";
|
9
|
+
import type { GenerateOptions } from "../types/generateTypes.js";
|
8
10
|
/**
|
9
11
|
* Build a properly formatted message array for AI providers
|
10
12
|
* Combines system prompt, conversation history, and current user prompt
|
11
13
|
* Supports both TextGenerationOptions and StreamOptions
|
12
14
|
*/
|
13
15
|
export declare function buildMessagesArray(options: TextGenerationOptions | StreamOptions): ChatMessage[];
|
16
|
+
/**
|
17
|
+
* Build multimodal message array with image support
|
18
|
+
* Detects when images are present and routes through provider adapter
|
19
|
+
*/
|
20
|
+
export declare function buildMultimodalMessagesArray(options: GenerateOptions, provider: string, model: string): Promise<MultimodalChatMessage[]>;
|
@@ -1,8 +1,13 @@
|
|
1
1
|
/**
|
2
2
|
* Message Builder Utility
|
3
3
|
* Centralized logic for building message arrays from TextGenerationOptions
|
4
|
+
* Enhanced with multimodal support for images
|
4
5
|
*/
|
5
6
|
import { CONVERSATION_INSTRUCTIONS } from "../config/conversationMemory.js";
|
7
|
+
import { ProviderImageAdapter, MultimodalLogger, } from "../adapters/providerImageAdapter.js";
|
8
|
+
import { logger } from "./logger.js";
|
9
|
+
import { request } from "undici";
|
10
|
+
import { readFileSync, existsSync } from "fs";
|
6
11
|
/**
|
7
12
|
* Build a properly formatted message array for AI providers
|
8
13
|
* Combines system prompt, conversation history, and current user prompt
|
@@ -46,3 +51,277 @@ export function buildMessagesArray(options) {
|
|
46
51
|
}
|
47
52
|
return messages;
|
48
53
|
}
|
54
|
+
/**
|
55
|
+
* Build multimodal message array with image support
|
56
|
+
* Detects when images are present and routes through provider adapter
|
57
|
+
*/
|
58
|
+
export async function buildMultimodalMessagesArray(options, provider, model) {
|
59
|
+
// Check if this is a multimodal request
|
60
|
+
const hasImages = (options.input.images && options.input.images.length > 0) ||
|
61
|
+
(options.input.content &&
|
62
|
+
options.input.content.some((c) => c.type === "image"));
|
63
|
+
// If no images, use standard message building and convert to MultimodalChatMessage[]
|
64
|
+
if (!hasImages) {
|
65
|
+
const standardMessages = buildMessagesArray(options);
|
66
|
+
return standardMessages.map((msg) => ({ ...msg, content: msg.content }));
|
67
|
+
}
|
68
|
+
// Validate provider supports vision
|
69
|
+
if (!ProviderImageAdapter.supportsVision(provider, model)) {
|
70
|
+
throw new Error(`Provider ${provider} with model ${model} does not support vision processing. ` +
|
71
|
+
`Supported providers: ${ProviderImageAdapter.getVisionProviders().join(", ")}`);
|
72
|
+
}
|
73
|
+
const messages = [];
|
74
|
+
// Build enhanced system prompt
|
75
|
+
let systemPrompt = options.systemPrompt?.trim() || "";
|
76
|
+
// Add conversation-aware instructions when history exists
|
77
|
+
const hasConversationHistory = options.conversationHistory && options.conversationHistory.length > 0;
|
78
|
+
if (hasConversationHistory) {
|
79
|
+
systemPrompt = `${systemPrompt.trim()}${CONVERSATION_INSTRUCTIONS}`;
|
80
|
+
}
|
81
|
+
// Add system message if we have one
|
82
|
+
if (systemPrompt.trim()) {
|
83
|
+
messages.push({
|
84
|
+
role: "system",
|
85
|
+
content: systemPrompt.trim(),
|
86
|
+
});
|
87
|
+
}
|
88
|
+
// Add conversation history if available
|
89
|
+
if (hasConversationHistory && options.conversationHistory) {
|
90
|
+
// Convert conversation history to MultimodalChatMessage format
|
91
|
+
options.conversationHistory.forEach((msg) => {
|
92
|
+
messages.push({
|
93
|
+
role: msg.role,
|
94
|
+
content: msg.content,
|
95
|
+
});
|
96
|
+
});
|
97
|
+
}
|
98
|
+
// Handle multimodal content
|
99
|
+
try {
|
100
|
+
let userContent;
|
101
|
+
if (options.input.content && options.input.content.length > 0) {
|
102
|
+
// Advanced content format - convert to provider-specific format
|
103
|
+
userContent = await convertContentToProviderFormat(options.input.content, provider, model);
|
104
|
+
}
|
105
|
+
else if (options.input.images && options.input.images.length > 0) {
|
106
|
+
// Simple images format - convert to provider-specific format
|
107
|
+
userContent = await convertSimpleImagesToProviderFormat(options.input.text, options.input.images, provider, model);
|
108
|
+
}
|
109
|
+
else {
|
110
|
+
// Text-only fallback
|
111
|
+
userContent = options.input.text;
|
112
|
+
}
|
113
|
+
// 🔧 CRITICAL FIX: Handle multimodal content properly for Vercel AI SDK
|
114
|
+
if (typeof userContent === "string") {
|
115
|
+
// Simple text content - use standard MultimodalChatMessage format
|
116
|
+
messages.push({
|
117
|
+
role: "user",
|
118
|
+
content: userContent,
|
119
|
+
});
|
120
|
+
}
|
121
|
+
else {
|
122
|
+
// 🔧 MULTIMODAL CONTENT: Wrap the content array in a proper message object
|
123
|
+
// The Vercel AI SDK expects messages with multimodal content arrays
|
124
|
+
messages.push({
|
125
|
+
role: "user",
|
126
|
+
content: userContent,
|
127
|
+
});
|
128
|
+
}
|
129
|
+
return messages;
|
130
|
+
}
|
131
|
+
catch (error) {
|
132
|
+
MultimodalLogger.logError("MULTIMODAL_BUILD", error, {
|
133
|
+
provider,
|
134
|
+
model,
|
135
|
+
hasImages,
|
136
|
+
imageCount: options.input.images?.length || 0,
|
137
|
+
});
|
138
|
+
throw error;
|
139
|
+
}
|
140
|
+
}
|
141
|
+
/**
|
142
|
+
* Convert advanced content format to provider-specific format
|
143
|
+
*/
|
144
|
+
async function convertContentToProviderFormat(content, provider, _model) {
|
145
|
+
const textContent = content.find((c) => c.type === "text");
|
146
|
+
const imageContent = content.filter((c) => c.type === "image");
|
147
|
+
if (!textContent) {
|
148
|
+
throw new Error("Multimodal content must include at least one text element");
|
149
|
+
}
|
150
|
+
if (imageContent.length === 0) {
|
151
|
+
return textContent.text;
|
152
|
+
}
|
153
|
+
// Extract images as Buffer | string array
|
154
|
+
const images = imageContent.map((img) => img.data);
|
155
|
+
return await convertSimpleImagesToProviderFormat(textContent.text, images, provider, _model);
|
156
|
+
}
|
157
|
+
/**
|
158
|
+
* Check if a string is an internet URL
|
159
|
+
*/
|
160
|
+
function isInternetUrl(input) {
|
161
|
+
return input.startsWith("http://") || input.startsWith("https://");
|
162
|
+
}
|
163
|
+
/**
|
164
|
+
* Download image from URL and convert to base64 data URI
|
165
|
+
*/
|
166
|
+
async function downloadImageFromUrl(url) {
|
167
|
+
try {
|
168
|
+
const response = await request(url, {
|
169
|
+
method: "GET",
|
170
|
+
headersTimeout: 10000, // 10 second timeout for headers
|
171
|
+
bodyTimeout: 30000, // 30 second timeout for body
|
172
|
+
maxRedirections: 5,
|
173
|
+
});
|
174
|
+
if (response.statusCode !== 200) {
|
175
|
+
throw new Error(`HTTP ${response.statusCode}: Failed to download image from ${url}`);
|
176
|
+
}
|
177
|
+
// Get content type from headers
|
178
|
+
const contentType = response.headers["content-type"] || "image/jpeg";
|
179
|
+
// Validate it's an image
|
180
|
+
if (!contentType.startsWith("image/")) {
|
181
|
+
throw new Error(`URL does not point to an image. Content-Type: ${contentType}`);
|
182
|
+
}
|
183
|
+
// Read the response body
|
184
|
+
const chunks = [];
|
185
|
+
for await (const chunk of response.body) {
|
186
|
+
chunks.push(chunk);
|
187
|
+
}
|
188
|
+
const buffer = Buffer.concat(chunks);
|
189
|
+
// Check file size (limit to 10MB)
|
190
|
+
const maxSize = 10 * 1024 * 1024; // 10MB
|
191
|
+
if (buffer.length > maxSize) {
|
192
|
+
throw new Error(`Image too large: ${buffer.length} bytes (max: ${maxSize} bytes)`);
|
193
|
+
}
|
194
|
+
// Convert to base64 data URI
|
195
|
+
const base64 = buffer.toString("base64");
|
196
|
+
const dataUri = `data:${contentType};base64,${base64}`;
|
197
|
+
return dataUri;
|
198
|
+
}
|
199
|
+
catch (error) {
|
200
|
+
MultimodalLogger.logError("URL_DOWNLOAD_FAILED", error, { url });
|
201
|
+
throw new Error(`Failed to download image from ${url}: ${error instanceof Error ? error.message : String(error)}`);
|
202
|
+
}
|
203
|
+
}
|
204
|
+
/**
|
205
|
+
* Convert simple images format to Vercel AI SDK format with smart auto-detection
|
206
|
+
* - URLs: Downloaded and converted to base64 for Vercel AI SDK compatibility
|
207
|
+
* - Local files: Converted to base64 for Vercel AI SDK compatibility
|
208
|
+
* - Buffers/Data URIs: Processed normally
|
209
|
+
*/
|
210
|
+
async function convertSimpleImagesToProviderFormat(text, images, provider, _model) {
|
211
|
+
// For Vercel AI SDK, we need to return the content in the standard format
|
212
|
+
// The Vercel AI SDK will handle provider-specific formatting internally
|
213
|
+
// Smart auto-detection: separate URLs from actual image data
|
214
|
+
const urlImages = [];
|
215
|
+
const actualImages = [];
|
216
|
+
images.forEach((image, _index) => {
|
217
|
+
if (typeof image === "string" && isInternetUrl(image)) {
|
218
|
+
// Internet URL - will be downloaded and converted to base64
|
219
|
+
urlImages.push(image);
|
220
|
+
}
|
221
|
+
else {
|
222
|
+
// Actual image data (file path, Buffer, data URI) - process for Vercel AI SDK
|
223
|
+
actualImages.push(image);
|
224
|
+
}
|
225
|
+
});
|
226
|
+
// Download URL images and add to actual images
|
227
|
+
for (const url of urlImages) {
|
228
|
+
try {
|
229
|
+
const downloadedDataUri = await downloadImageFromUrl(url);
|
230
|
+
actualImages.push(downloadedDataUri);
|
231
|
+
}
|
232
|
+
catch (error) {
|
233
|
+
MultimodalLogger.logError("URL_DOWNLOAD_FAILED_SKIPPING", error, { url });
|
234
|
+
// Continue processing other images even if one URL fails
|
235
|
+
logger.warn(`Failed to download image from ${url}, skipping: ${error instanceof Error ? error.message : String(error)}`);
|
236
|
+
}
|
237
|
+
}
|
238
|
+
const content = [{ type: "text", text }];
|
239
|
+
// Process all images (including downloaded URLs) for Vercel AI SDK
|
240
|
+
actualImages.forEach((image, index) => {
|
241
|
+
try {
|
242
|
+
// Vercel AI SDK expects { type: 'image', image: Buffer | string, mimeType?: string }
|
243
|
+
// For Vertex AI, we need to include mimeType
|
244
|
+
let imageData;
|
245
|
+
let mimeType = "image/jpeg"; // Default mime type
|
246
|
+
if (typeof image === "string") {
|
247
|
+
if (image.startsWith("data:")) {
|
248
|
+
// Data URI (including downloaded URLs) - extract mime type and use directly
|
249
|
+
const match = image.match(/^data:([^;]+);base64,(.+)$/);
|
250
|
+
if (match) {
|
251
|
+
mimeType = match[1];
|
252
|
+
imageData = image; // Keep as data URI for Vercel AI SDK
|
253
|
+
}
|
254
|
+
else {
|
255
|
+
imageData = image;
|
256
|
+
}
|
257
|
+
}
|
258
|
+
else if (isInternetUrl(image)) {
|
259
|
+
// This should not happen as URLs are processed separately above
|
260
|
+
// But handle it gracefully just in case
|
261
|
+
throw new Error(`Unprocessed URL found in actualImages: ${image}`);
|
262
|
+
}
|
263
|
+
else {
|
264
|
+
// File path string - convert to base64 data URI
|
265
|
+
try {
|
266
|
+
if (existsSync(image)) {
|
267
|
+
const buffer = readFileSync(image);
|
268
|
+
const base64 = buffer.toString("base64");
|
269
|
+
// Detect mime type from file extension
|
270
|
+
const ext = image.toLowerCase().split(".").pop();
|
271
|
+
switch (ext) {
|
272
|
+
case "png":
|
273
|
+
mimeType = "image/png";
|
274
|
+
break;
|
275
|
+
case "gif":
|
276
|
+
mimeType = "image/gif";
|
277
|
+
break;
|
278
|
+
case "webp":
|
279
|
+
mimeType = "image/webp";
|
280
|
+
break;
|
281
|
+
case "bmp":
|
282
|
+
mimeType = "image/bmp";
|
283
|
+
break;
|
284
|
+
case "tiff":
|
285
|
+
case "tif":
|
286
|
+
mimeType = "image/tiff";
|
287
|
+
break;
|
288
|
+
default:
|
289
|
+
mimeType = "image/jpeg";
|
290
|
+
break;
|
291
|
+
}
|
292
|
+
imageData = `data:${mimeType};base64,${base64}`;
|
293
|
+
}
|
294
|
+
else {
|
295
|
+
throw new Error(`Image file not found: ${image}`);
|
296
|
+
}
|
297
|
+
}
|
298
|
+
catch (error) {
|
299
|
+
MultimodalLogger.logError("FILE_PATH_CONVERSION", error, {
|
300
|
+
index,
|
301
|
+
filePath: image,
|
302
|
+
});
|
303
|
+
throw new Error(`Failed to convert file path to base64: ${image}. ${error}`);
|
304
|
+
}
|
305
|
+
}
|
306
|
+
}
|
307
|
+
else {
|
308
|
+
// Buffer - convert to base64 data URI
|
309
|
+
const base64 = image.toString("base64");
|
310
|
+
imageData = `data:${mimeType};base64,${base64}`;
|
311
|
+
}
|
312
|
+
content.push({
|
313
|
+
type: "image",
|
314
|
+
image: imageData,
|
315
|
+
mimeType: mimeType, // Add mimeType for Vertex AI compatibility
|
316
|
+
});
|
317
|
+
}
|
318
|
+
catch (error) {
|
319
|
+
MultimodalLogger.logError("ADD_IMAGE_TO_CONTENT", error, {
|
320
|
+
index,
|
321
|
+
provider,
|
322
|
+
});
|
323
|
+
throw error;
|
324
|
+
}
|
325
|
+
});
|
326
|
+
return content;
|
327
|
+
}
|