@juspay/neurolink 7.46.0 → 7.47.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/dist/adapters/providerImageAdapter.js +12 -0
- package/dist/core/constants.js +1 -1
- package/dist/lib/adapters/providerImageAdapter.js +12 -0
- package/dist/lib/core/constants.js +1 -1
- package/dist/lib/providers/azureOpenai.js +36 -3
- package/dist/lib/providers/googleAiStudio.js +37 -3
- package/dist/lib/providers/googleVertex.js +37 -3
- package/dist/lib/utils/imageProcessor.d.ts +44 -0
- package/dist/lib/utils/imageProcessor.js +159 -8
- package/dist/lib/utils/messageBuilder.d.ts +4 -6
- package/dist/lib/utils/messageBuilder.js +145 -1
- package/dist/providers/azureOpenai.js +36 -3
- package/dist/providers/googleAiStudio.js +37 -3
- package/dist/providers/googleVertex.js +37 -3
- package/dist/utils/imageProcessor.d.ts +44 -0
- package/dist/utils/imageProcessor.js +159 -8
- package/dist/utils/messageBuilder.d.ts +4 -6
- package/dist/utils/messageBuilder.js +145 -1
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,9 @@
|
|
|
1
|
+
## [7.47.0](https://github.com/juspay/neurolink/compare/v7.46.0...v7.47.0) (2025-09-25)
|
|
2
|
+
|
|
3
|
+
### Features
|
|
4
|
+
|
|
5
|
+
- **(chat):** Implement multimodal UI and extend SDK support ([12a2f59](https://github.com/juspay/neurolink/commit/12a2f59c4826e82ab1feb1347d08980682748ad2))
|
|
6
|
+
|
|
1
7
|
## [7.46.0](https://github.com/juspay/neurolink/compare/v7.45.0...v7.46.0) (2025-09-24)
|
|
2
8
|
|
|
3
9
|
### Features
|
|
@@ -34,6 +34,14 @@ const VISION_CAPABILITIES = {
|
|
|
34
34
|
"claude-3-sonnet",
|
|
35
35
|
"claude-3-haiku",
|
|
36
36
|
],
|
|
37
|
+
azure: [
|
|
38
|
+
"gpt-4o",
|
|
39
|
+
"gpt-4o-mini",
|
|
40
|
+
"gpt-4-turbo",
|
|
41
|
+
"gpt-4-vision-preview",
|
|
42
|
+
"gpt-4.1",
|
|
43
|
+
"gpt-4",
|
|
44
|
+
],
|
|
37
45
|
vertex: [
|
|
38
46
|
// Gemini models on Vertex AI
|
|
39
47
|
"gemini-2.5-pro",
|
|
@@ -78,6 +86,10 @@ export class ProviderImageAdapter {
|
|
|
78
86
|
case "openai":
|
|
79
87
|
adaptedPayload = this.formatForOpenAI(text, images);
|
|
80
88
|
break;
|
|
89
|
+
case "azure":
|
|
90
|
+
case "azure-openai":
|
|
91
|
+
adaptedPayload = this.formatForOpenAI(text, images);
|
|
92
|
+
break;
|
|
81
93
|
case "google-ai":
|
|
82
94
|
case "google":
|
|
83
95
|
adaptedPayload = this.formatForGoogleAI(text, images);
|
package/dist/core/constants.js
CHANGED
|
@@ -34,6 +34,14 @@ const VISION_CAPABILITIES = {
|
|
|
34
34
|
"claude-3-sonnet",
|
|
35
35
|
"claude-3-haiku",
|
|
36
36
|
],
|
|
37
|
+
azure: [
|
|
38
|
+
"gpt-4o",
|
|
39
|
+
"gpt-4o-mini",
|
|
40
|
+
"gpt-4-turbo",
|
|
41
|
+
"gpt-4-vision-preview",
|
|
42
|
+
"gpt-4.1",
|
|
43
|
+
"gpt-4",
|
|
44
|
+
],
|
|
37
45
|
vertex: [
|
|
38
46
|
// Gemini models on Vertex AI
|
|
39
47
|
"gemini-2.5-pro",
|
|
@@ -78,6 +86,10 @@ export class ProviderImageAdapter {
|
|
|
78
86
|
case "openai":
|
|
79
87
|
adaptedPayload = this.formatForOpenAI(text, images);
|
|
80
88
|
break;
|
|
89
|
+
case "azure":
|
|
90
|
+
case "azure-openai":
|
|
91
|
+
adaptedPayload = this.formatForOpenAI(text, images);
|
|
92
|
+
break;
|
|
81
93
|
case "google-ai":
|
|
82
94
|
case "google":
|
|
83
95
|
adaptedPayload = this.formatForGoogleAI(text, images);
|
|
@@ -4,7 +4,7 @@ import { BaseProvider } from "../core/baseProvider.js";
|
|
|
4
4
|
import { APIVersions } from "../types/providers.js";
|
|
5
5
|
import { validateApiKey, createAzureAPIKeyConfig, createAzureEndpointConfig, } from "../utils/providerConfig.js";
|
|
6
6
|
import { logger } from "../utils/logger.js";
|
|
7
|
-
import { buildMessagesArray } from "../utils/messageBuilder.js";
|
|
7
|
+
import { buildMessagesArray, buildMultimodalMessagesArray, convertToCoreMessages, } from "../utils/messageBuilder.js";
|
|
8
8
|
import { createProxyFetch } from "../proxy/proxyFetch.js";
|
|
9
9
|
import { DEFAULT_MAX_STEPS } from "../core/constants.js";
|
|
10
10
|
export class AzureOpenAIProvider extends BaseProvider {
|
|
@@ -109,8 +109,41 @@ export class AzureOpenAIProvider extends BaseProvider {
|
|
|
109
109
|
})),
|
|
110
110
|
});
|
|
111
111
|
}
|
|
112
|
-
// Build message array from options
|
|
113
|
-
const
|
|
112
|
+
// Build message array from options with multimodal support
|
|
113
|
+
const hasMultimodalInput = !!(options.input?.images?.length || options.input?.content?.length);
|
|
114
|
+
let messages;
|
|
115
|
+
if (hasMultimodalInput) {
|
|
116
|
+
logger.debug(`Azure OpenAI: Detected multimodal input, using multimodal message builder`, {
|
|
117
|
+
hasImages: !!options.input?.images?.length,
|
|
118
|
+
imageCount: options.input?.images?.length || 0,
|
|
119
|
+
hasContent: !!options.input?.content?.length,
|
|
120
|
+
contentCount: options.input?.content?.length || 0,
|
|
121
|
+
});
|
|
122
|
+
// Create multimodal options for buildMultimodalMessagesArray
|
|
123
|
+
const multimodalOptions = {
|
|
124
|
+
input: {
|
|
125
|
+
text: options.input?.text || "",
|
|
126
|
+
images: options.input?.images,
|
|
127
|
+
content: options.input?.content,
|
|
128
|
+
},
|
|
129
|
+
systemPrompt: options.systemPrompt,
|
|
130
|
+
conversationHistory: options.conversationMessages,
|
|
131
|
+
provider: this.providerName,
|
|
132
|
+
model: this.modelName,
|
|
133
|
+
temperature: options.temperature,
|
|
134
|
+
maxTokens: options.maxTokens,
|
|
135
|
+
enableAnalytics: options.enableAnalytics,
|
|
136
|
+
enableEvaluation: options.enableEvaluation,
|
|
137
|
+
context: options.context,
|
|
138
|
+
};
|
|
139
|
+
const mm = await buildMultimodalMessagesArray(multimodalOptions, this.providerName, this.modelName);
|
|
140
|
+
// Convert multimodal messages to Vercel AI SDK format (CoreMessage[])
|
|
141
|
+
messages = convertToCoreMessages(mm);
|
|
142
|
+
}
|
|
143
|
+
else {
|
|
144
|
+
logger.debug(`Azure OpenAI: Text-only input, using standard message builder`);
|
|
145
|
+
messages = buildMessagesArray(options);
|
|
146
|
+
}
|
|
114
147
|
const model = await this.getAISDKModelWithMiddleware(options);
|
|
115
148
|
const stream = await streamText({
|
|
116
149
|
model,
|
|
@@ -7,8 +7,9 @@ import { createTimeoutController, TimeoutError } from "../utils/timeout.js";
|
|
|
7
7
|
import { AuthenticationError, NetworkError, ProviderError, RateLimitError, } from "../types/errors.js";
|
|
8
8
|
import { DEFAULT_MAX_STEPS } from "../core/constants.js";
|
|
9
9
|
import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
|
|
10
|
-
import { buildMessagesArray } from "../utils/messageBuilder.js";
|
|
10
|
+
import { buildMessagesArray, buildMultimodalMessagesArray, convertToCoreMessages, } from "../utils/messageBuilder.js";
|
|
11
11
|
// Google AI Live API types now imported from ../types/providerSpecific.js
|
|
12
|
+
// Import proper types for multimodal message handling
|
|
12
13
|
// Create Google GenAI client
|
|
13
14
|
async function createGoogleGenAIClient(apiKey) {
|
|
14
15
|
const mod = await import("@google/genai");
|
|
@@ -90,8 +91,41 @@ export class GoogleAIStudioProvider extends BaseProvider {
|
|
|
90
91
|
// Get tools consistently with generate method
|
|
91
92
|
const shouldUseTools = !options.disableTools && this.supportsTools();
|
|
92
93
|
const tools = shouldUseTools ? await this.getAllTools() : {};
|
|
93
|
-
// Build message array from options
|
|
94
|
-
const
|
|
94
|
+
// Build message array from options with multimodal support
|
|
95
|
+
const hasMultimodalInput = !!(options.input?.images?.length || options.input?.content?.length);
|
|
96
|
+
let messages;
|
|
97
|
+
if (hasMultimodalInput) {
|
|
98
|
+
logger.debug(`Google AI Studio: Detected multimodal input, using multimodal message builder`, {
|
|
99
|
+
hasImages: !!options.input?.images?.length,
|
|
100
|
+
imageCount: options.input?.images?.length || 0,
|
|
101
|
+
hasContent: !!options.input?.content?.length,
|
|
102
|
+
contentCount: options.input?.content?.length || 0,
|
|
103
|
+
});
|
|
104
|
+
// Create multimodal options for buildMultimodalMessagesArray
|
|
105
|
+
const multimodalOptions = {
|
|
106
|
+
input: {
|
|
107
|
+
text: options.input?.text || "",
|
|
108
|
+
images: options.input?.images,
|
|
109
|
+
content: options.input?.content,
|
|
110
|
+
},
|
|
111
|
+
systemPrompt: options.systemPrompt,
|
|
112
|
+
conversationHistory: options.conversationMessages,
|
|
113
|
+
provider: this.providerName,
|
|
114
|
+
model: this.modelName,
|
|
115
|
+
temperature: options.temperature,
|
|
116
|
+
maxTokens: options.maxTokens,
|
|
117
|
+
enableAnalytics: options.enableAnalytics,
|
|
118
|
+
enableEvaluation: options.enableEvaluation,
|
|
119
|
+
context: options.context,
|
|
120
|
+
};
|
|
121
|
+
const mm = await buildMultimodalMessagesArray(multimodalOptions, this.providerName, this.modelName);
|
|
122
|
+
// Convert multimodal messages to Vercel AI SDK format (CoreMessage[])
|
|
123
|
+
messages = convertToCoreMessages(mm);
|
|
124
|
+
}
|
|
125
|
+
else {
|
|
126
|
+
logger.debug(`Google AI Studio: Text-only input, using standard message builder`);
|
|
127
|
+
messages = buildMessagesArray(options);
|
|
128
|
+
}
|
|
95
129
|
const result = await streamText({
|
|
96
130
|
model,
|
|
97
131
|
messages: messages,
|
|
@@ -11,8 +11,9 @@ import fs from "fs";
|
|
|
11
11
|
import path from "path";
|
|
12
12
|
import os from "os";
|
|
13
13
|
import dns from "dns";
|
|
14
|
-
import { buildMessagesArray } from "../utils/messageBuilder.js";
|
|
14
|
+
import { buildMessagesArray, buildMultimodalMessagesArray, convertToCoreMessages, } from "../utils/messageBuilder.js";
|
|
15
15
|
import { createProxyFetch } from "../proxy/proxyFetch.js";
|
|
16
|
+
// Import proper types for multimodal message handling
|
|
16
17
|
// Enhanced Anthropic support with direct imports
|
|
17
18
|
// Using the dual provider architecture from Vercel AI SDK
|
|
18
19
|
const hasAnthropicSupport = () => {
|
|
@@ -594,8 +595,41 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
594
595
|
try {
|
|
595
596
|
// Validate stream options
|
|
596
597
|
this.validateStreamOptionsOnly(options);
|
|
597
|
-
// Build message array from options
|
|
598
|
-
const
|
|
598
|
+
// Build message array from options with multimodal support
|
|
599
|
+
const hasMultimodalInput = !!(options.input?.images?.length || options.input?.content?.length);
|
|
600
|
+
let messages;
|
|
601
|
+
if (hasMultimodalInput) {
|
|
602
|
+
logger.debug(`${functionTag}: Detected multimodal input, using multimodal message builder`, {
|
|
603
|
+
hasImages: !!options.input?.images?.length,
|
|
604
|
+
imageCount: options.input?.images?.length || 0,
|
|
605
|
+
hasContent: !!options.input?.content?.length,
|
|
606
|
+
contentCount: options.input?.content?.length || 0,
|
|
607
|
+
});
|
|
608
|
+
// Create multimodal options for buildMultimodalMessagesArray
|
|
609
|
+
const multimodalOptions = {
|
|
610
|
+
input: {
|
|
611
|
+
text: options.input?.text || "",
|
|
612
|
+
images: options.input?.images,
|
|
613
|
+
content: options.input?.content,
|
|
614
|
+
},
|
|
615
|
+
systemPrompt: options.systemPrompt,
|
|
616
|
+
conversationHistory: options.conversationMessages,
|
|
617
|
+
provider: this.providerName,
|
|
618
|
+
model: this.modelName,
|
|
619
|
+
temperature: options.temperature,
|
|
620
|
+
maxTokens: options.maxTokens,
|
|
621
|
+
enableAnalytics: options.enableAnalytics,
|
|
622
|
+
enableEvaluation: options.enableEvaluation,
|
|
623
|
+
context: options.context,
|
|
624
|
+
};
|
|
625
|
+
const mm = await buildMultimodalMessagesArray(multimodalOptions, this.providerName, this.modelName);
|
|
626
|
+
// Convert multimodal messages to Vercel AI SDK format (CoreMessage[])
|
|
627
|
+
messages = convertToCoreMessages(mm);
|
|
628
|
+
}
|
|
629
|
+
else {
|
|
630
|
+
logger.debug(`${functionTag}: Text-only input, using standard message builder`);
|
|
631
|
+
messages = buildMessagesArray(options);
|
|
632
|
+
}
|
|
599
633
|
const model = await this.getAISDKModelWithMiddleware(options); // This is where network connection happens!
|
|
600
634
|
// Get all available tools (direct + MCP + external) for streaming
|
|
601
635
|
const shouldUseTools = !options.disableTools && this.supportsTools();
|
|
@@ -81,4 +81,48 @@ export declare const imageUtils: {
|
|
|
81
81
|
* Convert file size to human readable format
|
|
82
82
|
*/
|
|
83
83
|
formatFileSize: (bytes: number) => string;
|
|
84
|
+
/**
|
|
85
|
+
* Convert Buffer to base64 string
|
|
86
|
+
*/
|
|
87
|
+
bufferToBase64: (buffer: Buffer) => string;
|
|
88
|
+
/**
|
|
89
|
+
* Convert base64 string to Buffer
|
|
90
|
+
*/
|
|
91
|
+
base64ToBuffer: (base64: string) => Buffer;
|
|
92
|
+
/**
|
|
93
|
+
* Convert file path to base64 data URI
|
|
94
|
+
*/
|
|
95
|
+
fileToBase64DataUri: (filePath: string, maxBytes?: number) => Promise<string>;
|
|
96
|
+
/**
|
|
97
|
+
* Convert URL to base64 data URI by downloading the image
|
|
98
|
+
*/
|
|
99
|
+
urlToBase64DataUri: (url: string, { timeoutMs, maxBytes }?: {
|
|
100
|
+
timeoutMs?: number | undefined;
|
|
101
|
+
maxBytes?: number | undefined;
|
|
102
|
+
}) => Promise<string>;
|
|
103
|
+
/**
|
|
104
|
+
* Extract base64 data from data URI
|
|
105
|
+
*/
|
|
106
|
+
extractBase64FromDataUri: (dataUri: string) => string;
|
|
107
|
+
/**
|
|
108
|
+
* Extract MIME type from data URI
|
|
109
|
+
*/
|
|
110
|
+
extractMimeTypeFromDataUri: (dataUri: string) => string;
|
|
111
|
+
/**
|
|
112
|
+
* Create data URI from base64 and MIME type
|
|
113
|
+
*/
|
|
114
|
+
createDataUri: (base64: string, mimeType?: string) => string;
|
|
115
|
+
/**
|
|
116
|
+
* Validate base64 string format
|
|
117
|
+
*/
|
|
118
|
+
isValidBase64: (str: string) => boolean;
|
|
119
|
+
/**
|
|
120
|
+
* Get base64 string size in bytes
|
|
121
|
+
*/
|
|
122
|
+
getBase64Size: (base64: string) => number;
|
|
123
|
+
/**
|
|
124
|
+
* Compress base64 image by reducing quality (basic implementation)
|
|
125
|
+
* Note: This is a placeholder - for production use, consider using sharp or similar
|
|
126
|
+
*/
|
|
127
|
+
compressBase64: (base64: string, _quality?: number) => string;
|
|
84
128
|
};
|
|
@@ -151,6 +151,8 @@ export class ImageProcessor {
|
|
|
151
151
|
bmp: "image/bmp",
|
|
152
152
|
tiff: "image/tiff",
|
|
153
153
|
tif: "image/tiff",
|
|
154
|
+
svg: "image/svg+xml",
|
|
155
|
+
avif: "image/avif",
|
|
154
156
|
};
|
|
155
157
|
return imageTypes[extension || ""] || "image/jpeg";
|
|
156
158
|
}
|
|
@@ -183,6 +185,21 @@ export class ImageProcessor {
|
|
|
183
185
|
return "image/webp";
|
|
184
186
|
}
|
|
185
187
|
}
|
|
188
|
+
// SVG: check for "<svg" or "<?xml" at start (text-based)
|
|
189
|
+
if (input.length >= 4) {
|
|
190
|
+
const start = input.subarray(0, 4).toString();
|
|
191
|
+
if (start === "<svg" || start === "<?xm") {
|
|
192
|
+
return "image/svg+xml";
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
// AVIF: check for "ftypavif" signature at bytes 4-11
|
|
196
|
+
if (input.length >= 12) {
|
|
197
|
+
const ftyp = input.subarray(4, 8).toString();
|
|
198
|
+
const brand = input.subarray(8, 12).toString();
|
|
199
|
+
if (ftyp === "ftyp" && brand === "avif") {
|
|
200
|
+
return "image/avif";
|
|
201
|
+
}
|
|
202
|
+
}
|
|
186
203
|
}
|
|
187
204
|
return "image/jpeg"; // Default fallback
|
|
188
205
|
}
|
|
@@ -217,6 +234,8 @@ export class ImageProcessor {
|
|
|
217
234
|
"image/webp",
|
|
218
235
|
"image/bmp",
|
|
219
236
|
"image/tiff",
|
|
237
|
+
"image/svg+xml",
|
|
238
|
+
"image/avif",
|
|
220
239
|
];
|
|
221
240
|
return supportedFormats.includes(mediaType.toLowerCase());
|
|
222
241
|
}
|
|
@@ -332,14 +351,7 @@ export const imageUtils = {
|
|
|
332
351
|
/**
|
|
333
352
|
* Check if a string is base64 encoded
|
|
334
353
|
*/
|
|
335
|
-
isBase64: (str) =>
|
|
336
|
-
try {
|
|
337
|
-
return btoa(atob(str)) === str;
|
|
338
|
-
}
|
|
339
|
-
catch {
|
|
340
|
-
return false;
|
|
341
|
-
}
|
|
342
|
-
},
|
|
354
|
+
isBase64: (str) => imageUtils.isValidBase64(str),
|
|
343
355
|
/**
|
|
344
356
|
* Extract file extension from filename or URL
|
|
345
357
|
*/
|
|
@@ -359,4 +371,143 @@ export const imageUtils = {
|
|
|
359
371
|
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
|
360
372
|
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + " " + sizes[i];
|
|
361
373
|
},
|
|
374
|
+
/**
|
|
375
|
+
* Convert Buffer to base64 string
|
|
376
|
+
*/
|
|
377
|
+
bufferToBase64: (buffer) => {
|
|
378
|
+
return buffer.toString("base64");
|
|
379
|
+
},
|
|
380
|
+
/**
|
|
381
|
+
* Convert base64 string to Buffer
|
|
382
|
+
*/
|
|
383
|
+
base64ToBuffer: (base64) => {
|
|
384
|
+
// Remove data URI prefix if present
|
|
385
|
+
const cleanBase64 = base64.includes(",") ? base64.split(",")[1] : base64;
|
|
386
|
+
return Buffer.from(cleanBase64, "base64");
|
|
387
|
+
},
|
|
388
|
+
/**
|
|
389
|
+
* Convert file path to base64 data URI
|
|
390
|
+
*/
|
|
391
|
+
fileToBase64DataUri: async (filePath, maxBytes = 10 * 1024 * 1024) => {
|
|
392
|
+
try {
|
|
393
|
+
const fs = await import("fs/promises");
|
|
394
|
+
// File existence and type validation
|
|
395
|
+
const stat = await fs.stat(filePath);
|
|
396
|
+
if (!stat.isFile()) {
|
|
397
|
+
throw new Error("Not a file");
|
|
398
|
+
}
|
|
399
|
+
// Size check before reading - prevent memory exhaustion
|
|
400
|
+
if (stat.size > maxBytes) {
|
|
401
|
+
throw new Error(`File too large: ${stat.size} bytes (max: ${maxBytes} bytes)`);
|
|
402
|
+
}
|
|
403
|
+
const buffer = await fs.readFile(filePath);
|
|
404
|
+
// Enhanced MIME detection: try buffer content first, fallback to filename
|
|
405
|
+
const mimeType = ImageProcessor.detectImageType(buffer) ||
|
|
406
|
+
ImageProcessor.detectImageType(filePath);
|
|
407
|
+
const base64 = buffer.toString("base64");
|
|
408
|
+
return `data:${mimeType};base64,${base64}`;
|
|
409
|
+
}
|
|
410
|
+
catch (error) {
|
|
411
|
+
throw new Error(`Failed to convert file to base64: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
412
|
+
}
|
|
413
|
+
},
|
|
414
|
+
/**
|
|
415
|
+
* Convert URL to base64 data URI by downloading the image
|
|
416
|
+
*/
|
|
417
|
+
urlToBase64DataUri: async (url, { timeoutMs = 15000, maxBytes = 10 * 1024 * 1024 } = {}) => {
|
|
418
|
+
try {
|
|
419
|
+
// Basic protocol whitelist
|
|
420
|
+
if (!/^https?:\/\//i.test(url)) {
|
|
421
|
+
throw new Error("Unsupported protocol");
|
|
422
|
+
}
|
|
423
|
+
const controller = new AbortController();
|
|
424
|
+
const t = setTimeout(() => controller.abort(), timeoutMs);
|
|
425
|
+
try {
|
|
426
|
+
const response = await fetch(url, { signal: controller.signal });
|
|
427
|
+
if (!response.ok) {
|
|
428
|
+
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
429
|
+
}
|
|
430
|
+
const contentType = response.headers.get("content-type") || "";
|
|
431
|
+
if (!/^image\//i.test(contentType)) {
|
|
432
|
+
throw new Error(`Unsupported content-type: ${contentType || "unknown"}`);
|
|
433
|
+
}
|
|
434
|
+
const len = Number(response.headers.get("content-length") || 0);
|
|
435
|
+
if (len && len > maxBytes) {
|
|
436
|
+
throw new Error(`Content too large: ${len} bytes`);
|
|
437
|
+
}
|
|
438
|
+
const buffer = await response.arrayBuffer();
|
|
439
|
+
if (buffer.byteLength > maxBytes) {
|
|
440
|
+
throw new Error(`Downloaded content too large: ${buffer.byteLength} bytes`);
|
|
441
|
+
}
|
|
442
|
+
const base64 = Buffer.from(buffer).toString("base64");
|
|
443
|
+
return `data:${contentType || "image/jpeg"};base64,${base64}`;
|
|
444
|
+
}
|
|
445
|
+
finally {
|
|
446
|
+
clearTimeout(t);
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
catch (error) {
|
|
450
|
+
throw new Error(`Failed to download and convert URL to base64: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
451
|
+
}
|
|
452
|
+
},
|
|
453
|
+
/**
|
|
454
|
+
* Extract base64 data from data URI
|
|
455
|
+
*/
|
|
456
|
+
extractBase64FromDataUri: (dataUri) => {
|
|
457
|
+
if (!dataUri.includes(",")) {
|
|
458
|
+
return dataUri; // Already just base64
|
|
459
|
+
}
|
|
460
|
+
return dataUri.split(",")[1];
|
|
461
|
+
},
|
|
462
|
+
/**
|
|
463
|
+
* Extract MIME type from data URI
|
|
464
|
+
*/
|
|
465
|
+
extractMimeTypeFromDataUri: (dataUri) => {
|
|
466
|
+
const match = dataUri.match(/^data:([^;]+);base64,/);
|
|
467
|
+
return match ? match[1] : "image/jpeg";
|
|
468
|
+
},
|
|
469
|
+
/**
|
|
470
|
+
* Create data URI from base64 and MIME type
|
|
471
|
+
*/
|
|
472
|
+
createDataUri: (base64, mimeType = "image/jpeg") => {
|
|
473
|
+
// Remove data URI prefix if already present
|
|
474
|
+
const cleanBase64 = base64.includes(",") ? base64.split(",")[1] : base64;
|
|
475
|
+
return `data:${mimeType};base64,${cleanBase64}`;
|
|
476
|
+
},
|
|
477
|
+
/**
|
|
478
|
+
* Validate base64 string format
|
|
479
|
+
*/
|
|
480
|
+
isValidBase64: (str) => {
|
|
481
|
+
try {
|
|
482
|
+
// Remove data URI prefix if present
|
|
483
|
+
const cleanBase64 = str.includes(",") ? str.split(",")[1] : str;
|
|
484
|
+
// Check if it's valid base64
|
|
485
|
+
const decoded = Buffer.from(cleanBase64, "base64");
|
|
486
|
+
const reencoded = decoded.toString("base64");
|
|
487
|
+
// Remove padding for comparison (base64 can have different padding)
|
|
488
|
+
const normalizeBase64 = (b64) => b64.replace(/=+$/, "");
|
|
489
|
+
return normalizeBase64(cleanBase64) === normalizeBase64(reencoded);
|
|
490
|
+
}
|
|
491
|
+
catch {
|
|
492
|
+
return false;
|
|
493
|
+
}
|
|
494
|
+
},
|
|
495
|
+
/**
|
|
496
|
+
* Get base64 string size in bytes
|
|
497
|
+
*/
|
|
498
|
+
getBase64Size: (base64) => {
|
|
499
|
+
// Remove data URI prefix if present
|
|
500
|
+
const cleanBase64 = base64.includes(",") ? base64.split(",")[1] : base64;
|
|
501
|
+
return Buffer.byteLength(cleanBase64, "base64");
|
|
502
|
+
},
|
|
503
|
+
/**
|
|
504
|
+
* Compress base64 image by reducing quality (basic implementation)
|
|
505
|
+
* Note: This is a placeholder - for production use, consider using sharp or similar
|
|
506
|
+
*/
|
|
507
|
+
compressBase64: (base64, _quality = 0.8) => {
|
|
508
|
+
// This is a basic implementation that just returns the original
|
|
509
|
+
// In a real implementation, you'd use an image processing library
|
|
510
|
+
logger.warn("Base64 compression not implemented - returning original");
|
|
511
|
+
return base64;
|
|
512
|
+
},
|
|
362
513
|
};
|
|
@@ -7,13 +7,12 @@ import type { MultimodalChatMessage } from "../types/conversation.js";
|
|
|
7
7
|
import type { TextGenerationOptions } from "../types/index.js";
|
|
8
8
|
import type { StreamOptions } from "../types/streamTypes.js";
|
|
9
9
|
import type { GenerateOptions } from "../types/generateTypes.js";
|
|
10
|
+
import type { CoreMessage } from "ai";
|
|
10
11
|
/**
|
|
11
|
-
*
|
|
12
|
+
* Type-safe conversion from MultimodalChatMessage[] to CoreMessage[]
|
|
13
|
+
* Filters out invalid content and ensures strict CoreMessage contract compliance
|
|
12
14
|
*/
|
|
13
|
-
|
|
14
|
-
role: "user" | "assistant" | "system";
|
|
15
|
-
content: string;
|
|
16
|
-
};
|
|
15
|
+
export declare function convertToCoreMessages(messages: MultimodalChatMessage[]): CoreMessage[];
|
|
17
16
|
/**
|
|
18
17
|
* Build a properly formatted message array for AI providers
|
|
19
18
|
* Combines system prompt, conversation history, and current user prompt
|
|
@@ -25,4 +24,3 @@ export declare function buildMessagesArray(options: TextGenerationOptions | Stre
|
|
|
25
24
|
* Detects when images are present and routes through provider adapter
|
|
26
25
|
*/
|
|
27
26
|
export declare function buildMultimodalMessagesArray(options: GenerateOptions, provider: string, model: string): Promise<MultimodalChatMessage[]>;
|
|
28
|
-
export {};
|
|
@@ -8,6 +8,147 @@ import { ProviderImageAdapter, MultimodalLogger, } from "../adapters/providerIma
|
|
|
8
8
|
import { logger } from "./logger.js";
|
|
9
9
|
import { request } from "undici";
|
|
10
10
|
import { readFileSync, existsSync } from "fs";
|
|
11
|
+
/**
|
|
12
|
+
* Type guard for validating message roles
|
|
13
|
+
*/
|
|
14
|
+
function isValidRole(role) {
|
|
15
|
+
return (typeof role === "string" &&
|
|
16
|
+
(role === "user" || role === "assistant" || role === "system"));
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Type guard for validating content items
|
|
20
|
+
*/
|
|
21
|
+
function isValidContentItem(item) {
|
|
22
|
+
if (!item || typeof item !== "object") {
|
|
23
|
+
return false;
|
|
24
|
+
}
|
|
25
|
+
const contentItem = item;
|
|
26
|
+
if (contentItem.type === "text") {
|
|
27
|
+
return typeof contentItem.text === "string";
|
|
28
|
+
}
|
|
29
|
+
if (contentItem.type === "image") {
|
|
30
|
+
return (typeof contentItem.image === "string" &&
|
|
31
|
+
(contentItem.mimeType === undefined ||
|
|
32
|
+
typeof contentItem.mimeType === "string"));
|
|
33
|
+
}
|
|
34
|
+
return false;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Safely convert content item to AI SDK content format
|
|
38
|
+
*/
|
|
39
|
+
function convertContentItem(item) {
|
|
40
|
+
if (!isValidContentItem(item)) {
|
|
41
|
+
return null;
|
|
42
|
+
}
|
|
43
|
+
const contentItem = item;
|
|
44
|
+
if (contentItem.type === "text" && typeof contentItem.text === "string") {
|
|
45
|
+
return { type: "text", text: contentItem.text };
|
|
46
|
+
}
|
|
47
|
+
if (contentItem.type === "image" && typeof contentItem.image === "string") {
|
|
48
|
+
return {
|
|
49
|
+
type: "image",
|
|
50
|
+
image: contentItem.image,
|
|
51
|
+
...(contentItem.mimeType && { mimeType: contentItem.mimeType }),
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
return null;
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Type-safe conversion from MultimodalChatMessage[] to CoreMessage[]
|
|
58
|
+
* Filters out invalid content and ensures strict CoreMessage contract compliance
|
|
59
|
+
*/
|
|
60
|
+
export function convertToCoreMessages(messages) {
|
|
61
|
+
return messages
|
|
62
|
+
.map((msg) => {
|
|
63
|
+
// Validate role
|
|
64
|
+
if (!isValidRole(msg.role)) {
|
|
65
|
+
logger.warn("Invalid message role found, skipping", { role: msg.role });
|
|
66
|
+
return null;
|
|
67
|
+
}
|
|
68
|
+
// Handle string content
|
|
69
|
+
if (typeof msg.content === "string") {
|
|
70
|
+
// Create properly typed discriminated union messages
|
|
71
|
+
if (msg.role === "system") {
|
|
72
|
+
return {
|
|
73
|
+
role: "system",
|
|
74
|
+
content: msg.content,
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
else if (msg.role === "user") {
|
|
78
|
+
return {
|
|
79
|
+
role: "user",
|
|
80
|
+
content: msg.content,
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
else if (msg.role === "assistant") {
|
|
84
|
+
return {
|
|
85
|
+
role: "assistant",
|
|
86
|
+
content: msg.content,
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
// Handle array content (multimodal) - only user messages support full multimodal content
|
|
91
|
+
if (Array.isArray(msg.content)) {
|
|
92
|
+
const validContent = msg.content
|
|
93
|
+
.map(convertContentItem)
|
|
94
|
+
.filter((item) => item !== null);
|
|
95
|
+
// If no valid content items, skip the message
|
|
96
|
+
if (validContent.length === 0) {
|
|
97
|
+
logger.warn("No valid content items found in multimodal message, skipping");
|
|
98
|
+
return null;
|
|
99
|
+
}
|
|
100
|
+
if (msg.role === "user") {
|
|
101
|
+
// User messages support both text and image content
|
|
102
|
+
return {
|
|
103
|
+
role: "user",
|
|
104
|
+
content: validContent,
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
else if (msg.role === "assistant") {
|
|
108
|
+
// Assistant messages only support text content, filter out images
|
|
109
|
+
const textOnlyContent = validContent.filter((item) => item.type === "text");
|
|
110
|
+
if (textOnlyContent.length === 0) {
|
|
111
|
+
// If no text content, convert to empty string
|
|
112
|
+
return {
|
|
113
|
+
role: "assistant",
|
|
114
|
+
content: "",
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
else if (textOnlyContent.length === 1) {
|
|
118
|
+
// Single text item, use string content
|
|
119
|
+
return {
|
|
120
|
+
role: "assistant",
|
|
121
|
+
content: textOnlyContent[0].text,
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
else {
|
|
125
|
+
// Multiple text items, concatenate them
|
|
126
|
+
const combinedText = textOnlyContent
|
|
127
|
+
.map((item) => item.text)
|
|
128
|
+
.join(" ");
|
|
129
|
+
return {
|
|
130
|
+
role: "assistant",
|
|
131
|
+
content: combinedText,
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
else {
|
|
136
|
+
// System messages cannot have multimodal content, convert to text
|
|
137
|
+
const textContent = validContent.find((item) => item.type === "text")?.text || "";
|
|
138
|
+
return {
|
|
139
|
+
role: "system",
|
|
140
|
+
content: textContent,
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
// Invalid content type
|
|
145
|
+
logger.warn("Invalid message content type found, skipping", {
|
|
146
|
+
contentType: typeof msg.content,
|
|
147
|
+
});
|
|
148
|
+
return null;
|
|
149
|
+
})
|
|
150
|
+
.filter((msg) => msg !== null);
|
|
151
|
+
}
|
|
11
152
|
/**
|
|
12
153
|
* Convert ChatMessage to CoreMessage for AI SDK compatibility
|
|
13
154
|
*/
|
|
@@ -84,7 +225,10 @@ export async function buildMultimodalMessagesArray(options, provider, model) {
|
|
|
84
225
|
// If no images, use standard message building and convert to MultimodalChatMessage[]
|
|
85
226
|
if (!hasImages) {
|
|
86
227
|
const standardMessages = buildMessagesArray(options);
|
|
87
|
-
return standardMessages.map((msg) => ({
|
|
228
|
+
return standardMessages.map((msg) => ({
|
|
229
|
+
role: msg.role,
|
|
230
|
+
content: typeof msg.content === "string" ? msg.content : msg.content,
|
|
231
|
+
}));
|
|
88
232
|
}
|
|
89
233
|
// Validate provider supports vision
|
|
90
234
|
if (!ProviderImageAdapter.supportsVision(provider, model)) {
|
|
@@ -4,7 +4,7 @@ import { BaseProvider } from "../core/baseProvider.js";
|
|
|
4
4
|
import { APIVersions } from "../types/providers.js";
|
|
5
5
|
import { validateApiKey, createAzureAPIKeyConfig, createAzureEndpointConfig, } from "../utils/providerConfig.js";
|
|
6
6
|
import { logger } from "../utils/logger.js";
|
|
7
|
-
import { buildMessagesArray } from "../utils/messageBuilder.js";
|
|
7
|
+
import { buildMessagesArray, buildMultimodalMessagesArray, convertToCoreMessages, } from "../utils/messageBuilder.js";
|
|
8
8
|
import { createProxyFetch } from "../proxy/proxyFetch.js";
|
|
9
9
|
import { DEFAULT_MAX_STEPS } from "../core/constants.js";
|
|
10
10
|
export class AzureOpenAIProvider extends BaseProvider {
|
|
@@ -109,8 +109,41 @@ export class AzureOpenAIProvider extends BaseProvider {
|
|
|
109
109
|
})),
|
|
110
110
|
});
|
|
111
111
|
}
|
|
112
|
-
// Build message array from options
|
|
113
|
-
const
|
|
112
|
+
// Build message array from options with multimodal support
|
|
113
|
+
const hasMultimodalInput = !!(options.input?.images?.length || options.input?.content?.length);
|
|
114
|
+
let messages;
|
|
115
|
+
if (hasMultimodalInput) {
|
|
116
|
+
logger.debug(`Azure OpenAI: Detected multimodal input, using multimodal message builder`, {
|
|
117
|
+
hasImages: !!options.input?.images?.length,
|
|
118
|
+
imageCount: options.input?.images?.length || 0,
|
|
119
|
+
hasContent: !!options.input?.content?.length,
|
|
120
|
+
contentCount: options.input?.content?.length || 0,
|
|
121
|
+
});
|
|
122
|
+
// Create multimodal options for buildMultimodalMessagesArray
|
|
123
|
+
const multimodalOptions = {
|
|
124
|
+
input: {
|
|
125
|
+
text: options.input?.text || "",
|
|
126
|
+
images: options.input?.images,
|
|
127
|
+
content: options.input?.content,
|
|
128
|
+
},
|
|
129
|
+
systemPrompt: options.systemPrompt,
|
|
130
|
+
conversationHistory: options.conversationMessages,
|
|
131
|
+
provider: this.providerName,
|
|
132
|
+
model: this.modelName,
|
|
133
|
+
temperature: options.temperature,
|
|
134
|
+
maxTokens: options.maxTokens,
|
|
135
|
+
enableAnalytics: options.enableAnalytics,
|
|
136
|
+
enableEvaluation: options.enableEvaluation,
|
|
137
|
+
context: options.context,
|
|
138
|
+
};
|
|
139
|
+
const mm = await buildMultimodalMessagesArray(multimodalOptions, this.providerName, this.modelName);
|
|
140
|
+
// Convert multimodal messages to Vercel AI SDK format (CoreMessage[])
|
|
141
|
+
messages = convertToCoreMessages(mm);
|
|
142
|
+
}
|
|
143
|
+
else {
|
|
144
|
+
logger.debug(`Azure OpenAI: Text-only input, using standard message builder`);
|
|
145
|
+
messages = buildMessagesArray(options);
|
|
146
|
+
}
|
|
114
147
|
const model = await this.getAISDKModelWithMiddleware(options);
|
|
115
148
|
const stream = await streamText({
|
|
116
149
|
model,
|
|
@@ -7,8 +7,9 @@ import { createTimeoutController, TimeoutError } from "../utils/timeout.js";
|
|
|
7
7
|
import { AuthenticationError, NetworkError, ProviderError, RateLimitError, } from "../types/errors.js";
|
|
8
8
|
import { DEFAULT_MAX_STEPS } from "../core/constants.js";
|
|
9
9
|
import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
|
|
10
|
-
import { buildMessagesArray } from "../utils/messageBuilder.js";
|
|
10
|
+
import { buildMessagesArray, buildMultimodalMessagesArray, convertToCoreMessages, } from "../utils/messageBuilder.js";
|
|
11
11
|
// Google AI Live API types now imported from ../types/providerSpecific.js
|
|
12
|
+
// Import proper types for multimodal message handling
|
|
12
13
|
// Create Google GenAI client
|
|
13
14
|
async function createGoogleGenAIClient(apiKey) {
|
|
14
15
|
const mod = await import("@google/genai");
|
|
@@ -90,8 +91,41 @@ export class GoogleAIStudioProvider extends BaseProvider {
|
|
|
90
91
|
// Get tools consistently with generate method
|
|
91
92
|
const shouldUseTools = !options.disableTools && this.supportsTools();
|
|
92
93
|
const tools = shouldUseTools ? await this.getAllTools() : {};
|
|
93
|
-
// Build message array from options
|
|
94
|
-
const
|
|
94
|
+
// Build message array from options with multimodal support
|
|
95
|
+
const hasMultimodalInput = !!(options.input?.images?.length || options.input?.content?.length);
|
|
96
|
+
let messages;
|
|
97
|
+
if (hasMultimodalInput) {
|
|
98
|
+
logger.debug(`Google AI Studio: Detected multimodal input, using multimodal message builder`, {
|
|
99
|
+
hasImages: !!options.input?.images?.length,
|
|
100
|
+
imageCount: options.input?.images?.length || 0,
|
|
101
|
+
hasContent: !!options.input?.content?.length,
|
|
102
|
+
contentCount: options.input?.content?.length || 0,
|
|
103
|
+
});
|
|
104
|
+
// Create multimodal options for buildMultimodalMessagesArray
|
|
105
|
+
const multimodalOptions = {
|
|
106
|
+
input: {
|
|
107
|
+
text: options.input?.text || "",
|
|
108
|
+
images: options.input?.images,
|
|
109
|
+
content: options.input?.content,
|
|
110
|
+
},
|
|
111
|
+
systemPrompt: options.systemPrompt,
|
|
112
|
+
conversationHistory: options.conversationMessages,
|
|
113
|
+
provider: this.providerName,
|
|
114
|
+
model: this.modelName,
|
|
115
|
+
temperature: options.temperature,
|
|
116
|
+
maxTokens: options.maxTokens,
|
|
117
|
+
enableAnalytics: options.enableAnalytics,
|
|
118
|
+
enableEvaluation: options.enableEvaluation,
|
|
119
|
+
context: options.context,
|
|
120
|
+
};
|
|
121
|
+
const mm = await buildMultimodalMessagesArray(multimodalOptions, this.providerName, this.modelName);
|
|
122
|
+
// Convert multimodal messages to Vercel AI SDK format (CoreMessage[])
|
|
123
|
+
messages = convertToCoreMessages(mm);
|
|
124
|
+
}
|
|
125
|
+
else {
|
|
126
|
+
logger.debug(`Google AI Studio: Text-only input, using standard message builder`);
|
|
127
|
+
messages = buildMessagesArray(options);
|
|
128
|
+
}
|
|
95
129
|
const result = await streamText({
|
|
96
130
|
model,
|
|
97
131
|
messages: messages,
|
|
@@ -11,8 +11,9 @@ import fs from "fs";
|
|
|
11
11
|
import path from "path";
|
|
12
12
|
import os from "os";
|
|
13
13
|
import dns from "dns";
|
|
14
|
-
import { buildMessagesArray } from "../utils/messageBuilder.js";
|
|
14
|
+
import { buildMessagesArray, buildMultimodalMessagesArray, convertToCoreMessages, } from "../utils/messageBuilder.js";
|
|
15
15
|
import { createProxyFetch } from "../proxy/proxyFetch.js";
|
|
16
|
+
// Import proper types for multimodal message handling
|
|
16
17
|
// Enhanced Anthropic support with direct imports
|
|
17
18
|
// Using the dual provider architecture from Vercel AI SDK
|
|
18
19
|
const hasAnthropicSupport = () => {
|
|
@@ -594,8 +595,41 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
594
595
|
try {
|
|
595
596
|
// Validate stream options
|
|
596
597
|
this.validateStreamOptionsOnly(options);
|
|
597
|
-
// Build message array from options
|
|
598
|
-
const
|
|
598
|
+
// Build message array from options with multimodal support
|
|
599
|
+
const hasMultimodalInput = !!(options.input?.images?.length || options.input?.content?.length);
|
|
600
|
+
let messages;
|
|
601
|
+
if (hasMultimodalInput) {
|
|
602
|
+
logger.debug(`${functionTag}: Detected multimodal input, using multimodal message builder`, {
|
|
603
|
+
hasImages: !!options.input?.images?.length,
|
|
604
|
+
imageCount: options.input?.images?.length || 0,
|
|
605
|
+
hasContent: !!options.input?.content?.length,
|
|
606
|
+
contentCount: options.input?.content?.length || 0,
|
|
607
|
+
});
|
|
608
|
+
// Create multimodal options for buildMultimodalMessagesArray
|
|
609
|
+
const multimodalOptions = {
|
|
610
|
+
input: {
|
|
611
|
+
text: options.input?.text || "",
|
|
612
|
+
images: options.input?.images,
|
|
613
|
+
content: options.input?.content,
|
|
614
|
+
},
|
|
615
|
+
systemPrompt: options.systemPrompt,
|
|
616
|
+
conversationHistory: options.conversationMessages,
|
|
617
|
+
provider: this.providerName,
|
|
618
|
+
model: this.modelName,
|
|
619
|
+
temperature: options.temperature,
|
|
620
|
+
maxTokens: options.maxTokens,
|
|
621
|
+
enableAnalytics: options.enableAnalytics,
|
|
622
|
+
enableEvaluation: options.enableEvaluation,
|
|
623
|
+
context: options.context,
|
|
624
|
+
};
|
|
625
|
+
const mm = await buildMultimodalMessagesArray(multimodalOptions, this.providerName, this.modelName);
|
|
626
|
+
// Convert multimodal messages to Vercel AI SDK format (CoreMessage[])
|
|
627
|
+
messages = convertToCoreMessages(mm);
|
|
628
|
+
}
|
|
629
|
+
else {
|
|
630
|
+
logger.debug(`${functionTag}: Text-only input, using standard message builder`);
|
|
631
|
+
messages = buildMessagesArray(options);
|
|
632
|
+
}
|
|
599
633
|
const model = await this.getAISDKModelWithMiddleware(options); // This is where network connection happens!
|
|
600
634
|
// Get all available tools (direct + MCP + external) for streaming
|
|
601
635
|
const shouldUseTools = !options.disableTools && this.supportsTools();
|
|
@@ -81,4 +81,48 @@ export declare const imageUtils: {
|
|
|
81
81
|
* Convert file size to human readable format
|
|
82
82
|
*/
|
|
83
83
|
formatFileSize: (bytes: number) => string;
|
|
84
|
+
/**
|
|
85
|
+
* Convert Buffer to base64 string
|
|
86
|
+
*/
|
|
87
|
+
bufferToBase64: (buffer: Buffer) => string;
|
|
88
|
+
/**
|
|
89
|
+
* Convert base64 string to Buffer
|
|
90
|
+
*/
|
|
91
|
+
base64ToBuffer: (base64: string) => Buffer;
|
|
92
|
+
/**
|
|
93
|
+
* Convert file path to base64 data URI
|
|
94
|
+
*/
|
|
95
|
+
fileToBase64DataUri: (filePath: string, maxBytes?: number) => Promise<string>;
|
|
96
|
+
/**
|
|
97
|
+
* Convert URL to base64 data URI by downloading the image
|
|
98
|
+
*/
|
|
99
|
+
urlToBase64DataUri: (url: string, { timeoutMs, maxBytes }?: {
|
|
100
|
+
timeoutMs?: number | undefined;
|
|
101
|
+
maxBytes?: number | undefined;
|
|
102
|
+
}) => Promise<string>;
|
|
103
|
+
/**
|
|
104
|
+
* Extract base64 data from data URI
|
|
105
|
+
*/
|
|
106
|
+
extractBase64FromDataUri: (dataUri: string) => string;
|
|
107
|
+
/**
|
|
108
|
+
* Extract MIME type from data URI
|
|
109
|
+
*/
|
|
110
|
+
extractMimeTypeFromDataUri: (dataUri: string) => string;
|
|
111
|
+
/**
|
|
112
|
+
* Create data URI from base64 and MIME type
|
|
113
|
+
*/
|
|
114
|
+
createDataUri: (base64: string, mimeType?: string) => string;
|
|
115
|
+
/**
|
|
116
|
+
* Validate base64 string format
|
|
117
|
+
*/
|
|
118
|
+
isValidBase64: (str: string) => boolean;
|
|
119
|
+
/**
|
|
120
|
+
* Get base64 string size in bytes
|
|
121
|
+
*/
|
|
122
|
+
getBase64Size: (base64: string) => number;
|
|
123
|
+
/**
|
|
124
|
+
* Compress base64 image by reducing quality (basic implementation)
|
|
125
|
+
* Note: This is a placeholder - for production use, consider using sharp or similar
|
|
126
|
+
*/
|
|
127
|
+
compressBase64: (base64: string, _quality?: number) => string;
|
|
84
128
|
};
|
|
@@ -151,6 +151,8 @@ export class ImageProcessor {
|
|
|
151
151
|
bmp: "image/bmp",
|
|
152
152
|
tiff: "image/tiff",
|
|
153
153
|
tif: "image/tiff",
|
|
154
|
+
svg: "image/svg+xml",
|
|
155
|
+
avif: "image/avif",
|
|
154
156
|
};
|
|
155
157
|
return imageTypes[extension || ""] || "image/jpeg";
|
|
156
158
|
}
|
|
@@ -183,6 +185,21 @@ export class ImageProcessor {
|
|
|
183
185
|
return "image/webp";
|
|
184
186
|
}
|
|
185
187
|
}
|
|
188
|
+
// SVG: check for "<svg" or "<?xml" at start (text-based)
|
|
189
|
+
if (input.length >= 4) {
|
|
190
|
+
const start = input.subarray(0, 4).toString();
|
|
191
|
+
if (start === "<svg" || start === "<?xm") {
|
|
192
|
+
return "image/svg+xml";
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
// AVIF: check for "ftypavif" signature at bytes 4-11
|
|
196
|
+
if (input.length >= 12) {
|
|
197
|
+
const ftyp = input.subarray(4, 8).toString();
|
|
198
|
+
const brand = input.subarray(8, 12).toString();
|
|
199
|
+
if (ftyp === "ftyp" && brand === "avif") {
|
|
200
|
+
return "image/avif";
|
|
201
|
+
}
|
|
202
|
+
}
|
|
186
203
|
}
|
|
187
204
|
return "image/jpeg"; // Default fallback
|
|
188
205
|
}
|
|
@@ -217,6 +234,8 @@ export class ImageProcessor {
|
|
|
217
234
|
"image/webp",
|
|
218
235
|
"image/bmp",
|
|
219
236
|
"image/tiff",
|
|
237
|
+
"image/svg+xml",
|
|
238
|
+
"image/avif",
|
|
220
239
|
];
|
|
221
240
|
return supportedFormats.includes(mediaType.toLowerCase());
|
|
222
241
|
}
|
|
@@ -332,14 +351,7 @@ export const imageUtils = {
|
|
|
332
351
|
/**
|
|
333
352
|
* Check if a string is base64 encoded
|
|
334
353
|
*/
|
|
335
|
-
isBase64: (str) =>
|
|
336
|
-
try {
|
|
337
|
-
return btoa(atob(str)) === str;
|
|
338
|
-
}
|
|
339
|
-
catch {
|
|
340
|
-
return false;
|
|
341
|
-
}
|
|
342
|
-
},
|
|
354
|
+
isBase64: (str) => imageUtils.isValidBase64(str),
|
|
343
355
|
/**
|
|
344
356
|
* Extract file extension from filename or URL
|
|
345
357
|
*/
|
|
@@ -359,4 +371,143 @@ export const imageUtils = {
|
|
|
359
371
|
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
|
360
372
|
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + " " + sizes[i];
|
|
361
373
|
},
|
|
374
|
+
/**
|
|
375
|
+
* Convert Buffer to base64 string
|
|
376
|
+
*/
|
|
377
|
+
bufferToBase64: (buffer) => {
|
|
378
|
+
return buffer.toString("base64");
|
|
379
|
+
},
|
|
380
|
+
/**
|
|
381
|
+
* Convert base64 string to Buffer
|
|
382
|
+
*/
|
|
383
|
+
base64ToBuffer: (base64) => {
|
|
384
|
+
// Remove data URI prefix if present
|
|
385
|
+
const cleanBase64 = base64.includes(",") ? base64.split(",")[1] : base64;
|
|
386
|
+
return Buffer.from(cleanBase64, "base64");
|
|
387
|
+
},
|
|
388
|
+
/**
|
|
389
|
+
* Convert file path to base64 data URI
|
|
390
|
+
*/
|
|
391
|
+
fileToBase64DataUri: async (filePath, maxBytes = 10 * 1024 * 1024) => {
|
|
392
|
+
try {
|
|
393
|
+
const fs = await import("fs/promises");
|
|
394
|
+
// File existence and type validation
|
|
395
|
+
const stat = await fs.stat(filePath);
|
|
396
|
+
if (!stat.isFile()) {
|
|
397
|
+
throw new Error("Not a file");
|
|
398
|
+
}
|
|
399
|
+
// Size check before reading - prevent memory exhaustion
|
|
400
|
+
if (stat.size > maxBytes) {
|
|
401
|
+
throw new Error(`File too large: ${stat.size} bytes (max: ${maxBytes} bytes)`);
|
|
402
|
+
}
|
|
403
|
+
const buffer = await fs.readFile(filePath);
|
|
404
|
+
// Enhanced MIME detection: try buffer content first, fallback to filename
|
|
405
|
+
const mimeType = ImageProcessor.detectImageType(buffer) ||
|
|
406
|
+
ImageProcessor.detectImageType(filePath);
|
|
407
|
+
const base64 = buffer.toString("base64");
|
|
408
|
+
return `data:${mimeType};base64,${base64}`;
|
|
409
|
+
}
|
|
410
|
+
catch (error) {
|
|
411
|
+
throw new Error(`Failed to convert file to base64: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
412
|
+
}
|
|
413
|
+
},
|
|
414
|
+
/**
|
|
415
|
+
* Convert URL to base64 data URI by downloading the image
|
|
416
|
+
*/
|
|
417
|
+
urlToBase64DataUri: async (url, { timeoutMs = 15000, maxBytes = 10 * 1024 * 1024 } = {}) => {
|
|
418
|
+
try {
|
|
419
|
+
// Basic protocol whitelist
|
|
420
|
+
if (!/^https?:\/\//i.test(url)) {
|
|
421
|
+
throw new Error("Unsupported protocol");
|
|
422
|
+
}
|
|
423
|
+
const controller = new AbortController();
|
|
424
|
+
const t = setTimeout(() => controller.abort(), timeoutMs);
|
|
425
|
+
try {
|
|
426
|
+
const response = await fetch(url, { signal: controller.signal });
|
|
427
|
+
if (!response.ok) {
|
|
428
|
+
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
429
|
+
}
|
|
430
|
+
const contentType = response.headers.get("content-type") || "";
|
|
431
|
+
if (!/^image\//i.test(contentType)) {
|
|
432
|
+
throw new Error(`Unsupported content-type: ${contentType || "unknown"}`);
|
|
433
|
+
}
|
|
434
|
+
const len = Number(response.headers.get("content-length") || 0);
|
|
435
|
+
if (len && len > maxBytes) {
|
|
436
|
+
throw new Error(`Content too large: ${len} bytes`);
|
|
437
|
+
}
|
|
438
|
+
const buffer = await response.arrayBuffer();
|
|
439
|
+
if (buffer.byteLength > maxBytes) {
|
|
440
|
+
throw new Error(`Downloaded content too large: ${buffer.byteLength} bytes`);
|
|
441
|
+
}
|
|
442
|
+
const base64 = Buffer.from(buffer).toString("base64");
|
|
443
|
+
return `data:${contentType || "image/jpeg"};base64,${base64}`;
|
|
444
|
+
}
|
|
445
|
+
finally {
|
|
446
|
+
clearTimeout(t);
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
catch (error) {
|
|
450
|
+
throw new Error(`Failed to download and convert URL to base64: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
451
|
+
}
|
|
452
|
+
},
|
|
453
|
+
/**
|
|
454
|
+
* Extract base64 data from data URI
|
|
455
|
+
*/
|
|
456
|
+
extractBase64FromDataUri: (dataUri) => {
|
|
457
|
+
if (!dataUri.includes(",")) {
|
|
458
|
+
return dataUri; // Already just base64
|
|
459
|
+
}
|
|
460
|
+
return dataUri.split(",")[1];
|
|
461
|
+
},
|
|
462
|
+
/**
|
|
463
|
+
* Extract MIME type from data URI
|
|
464
|
+
*/
|
|
465
|
+
extractMimeTypeFromDataUri: (dataUri) => {
|
|
466
|
+
const match = dataUri.match(/^data:([^;]+);base64,/);
|
|
467
|
+
return match ? match[1] : "image/jpeg";
|
|
468
|
+
},
|
|
469
|
+
/**
|
|
470
|
+
* Create data URI from base64 and MIME type
|
|
471
|
+
*/
|
|
472
|
+
createDataUri: (base64, mimeType = "image/jpeg") => {
|
|
473
|
+
// Remove data URI prefix if already present
|
|
474
|
+
const cleanBase64 = base64.includes(",") ? base64.split(",")[1] : base64;
|
|
475
|
+
return `data:${mimeType};base64,${cleanBase64}`;
|
|
476
|
+
},
|
|
477
|
+
/**
|
|
478
|
+
* Validate base64 string format
|
|
479
|
+
*/
|
|
480
|
+
isValidBase64: (str) => {
|
|
481
|
+
try {
|
|
482
|
+
// Remove data URI prefix if present
|
|
483
|
+
const cleanBase64 = str.includes(",") ? str.split(",")[1] : str;
|
|
484
|
+
// Check if it's valid base64
|
|
485
|
+
const decoded = Buffer.from(cleanBase64, "base64");
|
|
486
|
+
const reencoded = decoded.toString("base64");
|
|
487
|
+
// Remove padding for comparison (base64 can have different padding)
|
|
488
|
+
const normalizeBase64 = (b64) => b64.replace(/=+$/, "");
|
|
489
|
+
return normalizeBase64(cleanBase64) === normalizeBase64(reencoded);
|
|
490
|
+
}
|
|
491
|
+
catch {
|
|
492
|
+
return false;
|
|
493
|
+
}
|
|
494
|
+
},
|
|
495
|
+
/**
|
|
496
|
+
* Get base64 string size in bytes
|
|
497
|
+
*/
|
|
498
|
+
getBase64Size: (base64) => {
|
|
499
|
+
// Remove data URI prefix if present
|
|
500
|
+
const cleanBase64 = base64.includes(",") ? base64.split(",")[1] : base64;
|
|
501
|
+
return Buffer.byteLength(cleanBase64, "base64");
|
|
502
|
+
},
|
|
503
|
+
/**
|
|
504
|
+
* Compress base64 image by reducing quality (basic implementation)
|
|
505
|
+
* Note: This is a placeholder - for production use, consider using sharp or similar
|
|
506
|
+
*/
|
|
507
|
+
compressBase64: (base64, _quality = 0.8) => {
|
|
508
|
+
// This is a basic implementation that just returns the original
|
|
509
|
+
// In a real implementation, you'd use an image processing library
|
|
510
|
+
logger.warn("Base64 compression not implemented - returning original");
|
|
511
|
+
return base64;
|
|
512
|
+
},
|
|
362
513
|
};
|
|
@@ -7,13 +7,12 @@ import type { MultimodalChatMessage } from "../types/conversation.js";
|
|
|
7
7
|
import type { TextGenerationOptions } from "../types/index.js";
|
|
8
8
|
import type { StreamOptions } from "../types/streamTypes.js";
|
|
9
9
|
import type { GenerateOptions } from "../types/generateTypes.js";
|
|
10
|
+
import type { CoreMessage } from "ai";
|
|
10
11
|
/**
|
|
11
|
-
*
|
|
12
|
+
* Type-safe conversion from MultimodalChatMessage[] to CoreMessage[]
|
|
13
|
+
* Filters out invalid content and ensures strict CoreMessage contract compliance
|
|
12
14
|
*/
|
|
13
|
-
|
|
14
|
-
role: "user" | "assistant" | "system";
|
|
15
|
-
content: string;
|
|
16
|
-
};
|
|
15
|
+
export declare function convertToCoreMessages(messages: MultimodalChatMessage[]): CoreMessage[];
|
|
17
16
|
/**
|
|
18
17
|
* Build a properly formatted message array for AI providers
|
|
19
18
|
* Combines system prompt, conversation history, and current user prompt
|
|
@@ -25,4 +24,3 @@ export declare function buildMessagesArray(options: TextGenerationOptions | Stre
|
|
|
25
24
|
* Detects when images are present and routes through provider adapter
|
|
26
25
|
*/
|
|
27
26
|
export declare function buildMultimodalMessagesArray(options: GenerateOptions, provider: string, model: string): Promise<MultimodalChatMessage[]>;
|
|
28
|
-
export {};
|
|
@@ -8,6 +8,147 @@ import { ProviderImageAdapter, MultimodalLogger, } from "../adapters/providerIma
|
|
|
8
8
|
import { logger } from "./logger.js";
|
|
9
9
|
import { request } from "undici";
|
|
10
10
|
import { readFileSync, existsSync } from "fs";
|
|
11
|
+
/**
|
|
12
|
+
* Type guard for validating message roles
|
|
13
|
+
*/
|
|
14
|
+
function isValidRole(role) {
|
|
15
|
+
return (typeof role === "string" &&
|
|
16
|
+
(role === "user" || role === "assistant" || role === "system"));
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Type guard for validating content items
|
|
20
|
+
*/
|
|
21
|
+
function isValidContentItem(item) {
|
|
22
|
+
if (!item || typeof item !== "object") {
|
|
23
|
+
return false;
|
|
24
|
+
}
|
|
25
|
+
const contentItem = item;
|
|
26
|
+
if (contentItem.type === "text") {
|
|
27
|
+
return typeof contentItem.text === "string";
|
|
28
|
+
}
|
|
29
|
+
if (contentItem.type === "image") {
|
|
30
|
+
return (typeof contentItem.image === "string" &&
|
|
31
|
+
(contentItem.mimeType === undefined ||
|
|
32
|
+
typeof contentItem.mimeType === "string"));
|
|
33
|
+
}
|
|
34
|
+
return false;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Safely convert content item to AI SDK content format
|
|
38
|
+
*/
|
|
39
|
+
function convertContentItem(item) {
|
|
40
|
+
if (!isValidContentItem(item)) {
|
|
41
|
+
return null;
|
|
42
|
+
}
|
|
43
|
+
const contentItem = item;
|
|
44
|
+
if (contentItem.type === "text" && typeof contentItem.text === "string") {
|
|
45
|
+
return { type: "text", text: contentItem.text };
|
|
46
|
+
}
|
|
47
|
+
if (contentItem.type === "image" && typeof contentItem.image === "string") {
|
|
48
|
+
return {
|
|
49
|
+
type: "image",
|
|
50
|
+
image: contentItem.image,
|
|
51
|
+
...(contentItem.mimeType && { mimeType: contentItem.mimeType }),
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
return null;
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Type-safe conversion from MultimodalChatMessage[] to CoreMessage[]
|
|
58
|
+
* Filters out invalid content and ensures strict CoreMessage contract compliance
|
|
59
|
+
*/
|
|
60
|
+
export function convertToCoreMessages(messages) {
|
|
61
|
+
return messages
|
|
62
|
+
.map((msg) => {
|
|
63
|
+
// Validate role
|
|
64
|
+
if (!isValidRole(msg.role)) {
|
|
65
|
+
logger.warn("Invalid message role found, skipping", { role: msg.role });
|
|
66
|
+
return null;
|
|
67
|
+
}
|
|
68
|
+
// Handle string content
|
|
69
|
+
if (typeof msg.content === "string") {
|
|
70
|
+
// Create properly typed discriminated union messages
|
|
71
|
+
if (msg.role === "system") {
|
|
72
|
+
return {
|
|
73
|
+
role: "system",
|
|
74
|
+
content: msg.content,
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
else if (msg.role === "user") {
|
|
78
|
+
return {
|
|
79
|
+
role: "user",
|
|
80
|
+
content: msg.content,
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
else if (msg.role === "assistant") {
|
|
84
|
+
return {
|
|
85
|
+
role: "assistant",
|
|
86
|
+
content: msg.content,
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
// Handle array content (multimodal) - only user messages support full multimodal content
|
|
91
|
+
if (Array.isArray(msg.content)) {
|
|
92
|
+
const validContent = msg.content
|
|
93
|
+
.map(convertContentItem)
|
|
94
|
+
.filter((item) => item !== null);
|
|
95
|
+
// If no valid content items, skip the message
|
|
96
|
+
if (validContent.length === 0) {
|
|
97
|
+
logger.warn("No valid content items found in multimodal message, skipping");
|
|
98
|
+
return null;
|
|
99
|
+
}
|
|
100
|
+
if (msg.role === "user") {
|
|
101
|
+
// User messages support both text and image content
|
|
102
|
+
return {
|
|
103
|
+
role: "user",
|
|
104
|
+
content: validContent,
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
else if (msg.role === "assistant") {
|
|
108
|
+
// Assistant messages only support text content, filter out images
|
|
109
|
+
const textOnlyContent = validContent.filter((item) => item.type === "text");
|
|
110
|
+
if (textOnlyContent.length === 0) {
|
|
111
|
+
// If no text content, convert to empty string
|
|
112
|
+
return {
|
|
113
|
+
role: "assistant",
|
|
114
|
+
content: "",
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
else if (textOnlyContent.length === 1) {
|
|
118
|
+
// Single text item, use string content
|
|
119
|
+
return {
|
|
120
|
+
role: "assistant",
|
|
121
|
+
content: textOnlyContent[0].text,
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
else {
|
|
125
|
+
// Multiple text items, concatenate them
|
|
126
|
+
const combinedText = textOnlyContent
|
|
127
|
+
.map((item) => item.text)
|
|
128
|
+
.join(" ");
|
|
129
|
+
return {
|
|
130
|
+
role: "assistant",
|
|
131
|
+
content: combinedText,
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
else {
|
|
136
|
+
// System messages cannot have multimodal content, convert to text
|
|
137
|
+
const textContent = validContent.find((item) => item.type === "text")?.text || "";
|
|
138
|
+
return {
|
|
139
|
+
role: "system",
|
|
140
|
+
content: textContent,
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
// Invalid content type
|
|
145
|
+
logger.warn("Invalid message content type found, skipping", {
|
|
146
|
+
contentType: typeof msg.content,
|
|
147
|
+
});
|
|
148
|
+
return null;
|
|
149
|
+
})
|
|
150
|
+
.filter((msg) => msg !== null);
|
|
151
|
+
}
|
|
11
152
|
/**
|
|
12
153
|
* Convert ChatMessage to CoreMessage for AI SDK compatibility
|
|
13
154
|
*/
|
|
@@ -84,7 +225,10 @@ export async function buildMultimodalMessagesArray(options, provider, model) {
|
|
|
84
225
|
// If no images, use standard message building and convert to MultimodalChatMessage[]
|
|
85
226
|
if (!hasImages) {
|
|
86
227
|
const standardMessages = buildMessagesArray(options);
|
|
87
|
-
return standardMessages.map((msg) => ({
|
|
228
|
+
return standardMessages.map((msg) => ({
|
|
229
|
+
role: msg.role,
|
|
230
|
+
content: typeof msg.content === "string" ? msg.content : msg.content,
|
|
231
|
+
}));
|
|
88
232
|
}
|
|
89
233
|
// Validate provider supports vision
|
|
90
234
|
if (!ProviderImageAdapter.supportsVision(provider, model)) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@juspay/neurolink",
|
|
3
|
-
"version": "7.
|
|
3
|
+
"version": "7.47.0",
|
|
4
4
|
"description": "Universal AI Development Platform with working MCP integration, multi-provider support, and professional CLI. Built-in tools operational, 58+ external MCP servers discoverable. Connect to filesystem, GitHub, database operations, and more. Build, test, and deploy AI applications with 9 major providers: OpenAI, Anthropic, Google AI, AWS Bedrock, Azure, Hugging Face, Ollama, and Mistral AI.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Juspay Technologies",
|