@juspay/neurolink 7.46.0 → 7.47.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ ## [7.47.0](https://github.com/juspay/neurolink/compare/v7.46.0...v7.47.0) (2025-09-25)
2
+
3
+ ### Features
4
+
5
+ - **(chat):** Implement multimodal UI and extend SDK support ([12a2f59](https://github.com/juspay/neurolink/commit/12a2f59c4826e82ab1feb1347d08980682748ad2))
6
+
1
7
  ## [7.46.0](https://github.com/juspay/neurolink/compare/v7.45.0...v7.46.0) (2025-09-24)
2
8
 
3
9
  ### Features
@@ -34,6 +34,14 @@ const VISION_CAPABILITIES = {
34
34
  "claude-3-sonnet",
35
35
  "claude-3-haiku",
36
36
  ],
37
+ azure: [
38
+ "gpt-4o",
39
+ "gpt-4o-mini",
40
+ "gpt-4-turbo",
41
+ "gpt-4-vision-preview",
42
+ "gpt-4.1",
43
+ "gpt-4",
44
+ ],
37
45
  vertex: [
38
46
  // Gemini models on Vertex AI
39
47
  "gemini-2.5-pro",
@@ -78,6 +86,10 @@ export class ProviderImageAdapter {
78
86
  case "openai":
79
87
  adaptedPayload = this.formatForOpenAI(text, images);
80
88
  break;
89
+ case "azure":
90
+ case "azure-openai":
91
+ adaptedPayload = this.formatForOpenAI(text, images);
92
+ break;
81
93
  case "google-ai":
82
94
  case "google":
83
95
  adaptedPayload = this.formatForGoogleAI(text, images);
@@ -51,7 +51,7 @@ export const PROVIDER_MAX_TOKENS = {
51
51
  default: 64000,
52
52
  },
53
53
  azure: {
54
- default: 64000,
54
+ default: 32000,
55
55
  },
56
56
  ollama: {
57
57
  default: 64000,
@@ -34,6 +34,14 @@ const VISION_CAPABILITIES = {
34
34
  "claude-3-sonnet",
35
35
  "claude-3-haiku",
36
36
  ],
37
+ azure: [
38
+ "gpt-4o",
39
+ "gpt-4o-mini",
40
+ "gpt-4-turbo",
41
+ "gpt-4-vision-preview",
42
+ "gpt-4.1",
43
+ "gpt-4",
44
+ ],
37
45
  vertex: [
38
46
  // Gemini models on Vertex AI
39
47
  "gemini-2.5-pro",
@@ -78,6 +86,10 @@ export class ProviderImageAdapter {
78
86
  case "openai":
79
87
  adaptedPayload = this.formatForOpenAI(text, images);
80
88
  break;
89
+ case "azure":
90
+ case "azure-openai":
91
+ adaptedPayload = this.formatForOpenAI(text, images);
92
+ break;
81
93
  case "google-ai":
82
94
  case "google":
83
95
  adaptedPayload = this.formatForGoogleAI(text, images);
@@ -51,7 +51,7 @@ export const PROVIDER_MAX_TOKENS = {
51
51
  default: 64000,
52
52
  },
53
53
  azure: {
54
- default: 64000,
54
+ default: 32000,
55
55
  },
56
56
  ollama: {
57
57
  default: 64000,
@@ -4,7 +4,7 @@ import { BaseProvider } from "../core/baseProvider.js";
4
4
  import { APIVersions } from "../types/providers.js";
5
5
  import { validateApiKey, createAzureAPIKeyConfig, createAzureEndpointConfig, } from "../utils/providerConfig.js";
6
6
  import { logger } from "../utils/logger.js";
7
- import { buildMessagesArray } from "../utils/messageBuilder.js";
7
+ import { buildMessagesArray, buildMultimodalMessagesArray, convertToCoreMessages, } from "../utils/messageBuilder.js";
8
8
  import { createProxyFetch } from "../proxy/proxyFetch.js";
9
9
  import { DEFAULT_MAX_STEPS } from "../core/constants.js";
10
10
  export class AzureOpenAIProvider extends BaseProvider {
@@ -109,8 +109,41 @@ export class AzureOpenAIProvider extends BaseProvider {
109
109
  })),
110
110
  });
111
111
  }
112
- // Build message array from options
113
- const messages = buildMessagesArray(options);
112
+ // Build message array from options with multimodal support
113
+ const hasMultimodalInput = !!(options.input?.images?.length || options.input?.content?.length);
114
+ let messages;
115
+ if (hasMultimodalInput) {
116
+ logger.debug(`Azure OpenAI: Detected multimodal input, using multimodal message builder`, {
117
+ hasImages: !!options.input?.images?.length,
118
+ imageCount: options.input?.images?.length || 0,
119
+ hasContent: !!options.input?.content?.length,
120
+ contentCount: options.input?.content?.length || 0,
121
+ });
122
+ // Create multimodal options for buildMultimodalMessagesArray
123
+ const multimodalOptions = {
124
+ input: {
125
+ text: options.input?.text || "",
126
+ images: options.input?.images,
127
+ content: options.input?.content,
128
+ },
129
+ systemPrompt: options.systemPrompt,
130
+ conversationHistory: options.conversationMessages,
131
+ provider: this.providerName,
132
+ model: this.modelName,
133
+ temperature: options.temperature,
134
+ maxTokens: options.maxTokens,
135
+ enableAnalytics: options.enableAnalytics,
136
+ enableEvaluation: options.enableEvaluation,
137
+ context: options.context,
138
+ };
139
+ const mm = await buildMultimodalMessagesArray(multimodalOptions, this.providerName, this.modelName);
140
+ // Convert multimodal messages to Vercel AI SDK format (CoreMessage[])
141
+ messages = convertToCoreMessages(mm);
142
+ }
143
+ else {
144
+ logger.debug(`Azure OpenAI: Text-only input, using standard message builder`);
145
+ messages = buildMessagesArray(options);
146
+ }
114
147
  const model = await this.getAISDKModelWithMiddleware(options);
115
148
  const stream = await streamText({
116
149
  model,
@@ -7,8 +7,9 @@ import { createTimeoutController, TimeoutError } from "../utils/timeout.js";
7
7
  import { AuthenticationError, NetworkError, ProviderError, RateLimitError, } from "../types/errors.js";
8
8
  import { DEFAULT_MAX_STEPS } from "../core/constants.js";
9
9
  import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
10
- import { buildMessagesArray } from "../utils/messageBuilder.js";
10
+ import { buildMessagesArray, buildMultimodalMessagesArray, convertToCoreMessages, } from "../utils/messageBuilder.js";
11
11
  // Google AI Live API types now imported from ../types/providerSpecific.js
12
+ // Import proper types for multimodal message handling
12
13
  // Create Google GenAI client
13
14
  async function createGoogleGenAIClient(apiKey) {
14
15
  const mod = await import("@google/genai");
@@ -90,8 +91,41 @@ export class GoogleAIStudioProvider extends BaseProvider {
90
91
  // Get tools consistently with generate method
91
92
  const shouldUseTools = !options.disableTools && this.supportsTools();
92
93
  const tools = shouldUseTools ? await this.getAllTools() : {};
93
- // Build message array from options
94
- const messages = buildMessagesArray(options);
94
+ // Build message array from options with multimodal support
95
+ const hasMultimodalInput = !!(options.input?.images?.length || options.input?.content?.length);
96
+ let messages;
97
+ if (hasMultimodalInput) {
98
+ logger.debug(`Google AI Studio: Detected multimodal input, using multimodal message builder`, {
99
+ hasImages: !!options.input?.images?.length,
100
+ imageCount: options.input?.images?.length || 0,
101
+ hasContent: !!options.input?.content?.length,
102
+ contentCount: options.input?.content?.length || 0,
103
+ });
104
+ // Create multimodal options for buildMultimodalMessagesArray
105
+ const multimodalOptions = {
106
+ input: {
107
+ text: options.input?.text || "",
108
+ images: options.input?.images,
109
+ content: options.input?.content,
110
+ },
111
+ systemPrompt: options.systemPrompt,
112
+ conversationHistory: options.conversationMessages,
113
+ provider: this.providerName,
114
+ model: this.modelName,
115
+ temperature: options.temperature,
116
+ maxTokens: options.maxTokens,
117
+ enableAnalytics: options.enableAnalytics,
118
+ enableEvaluation: options.enableEvaluation,
119
+ context: options.context,
120
+ };
121
+ const mm = await buildMultimodalMessagesArray(multimodalOptions, this.providerName, this.modelName);
122
+ // Convert multimodal messages to Vercel AI SDK format (CoreMessage[])
123
+ messages = convertToCoreMessages(mm);
124
+ }
125
+ else {
126
+ logger.debug(`Google AI Studio: Text-only input, using standard message builder`);
127
+ messages = buildMessagesArray(options);
128
+ }
95
129
  const result = await streamText({
96
130
  model,
97
131
  messages: messages,
@@ -11,8 +11,9 @@ import fs from "fs";
11
11
  import path from "path";
12
12
  import os from "os";
13
13
  import dns from "dns";
14
- import { buildMessagesArray } from "../utils/messageBuilder.js";
14
+ import { buildMessagesArray, buildMultimodalMessagesArray, convertToCoreMessages, } from "../utils/messageBuilder.js";
15
15
  import { createProxyFetch } from "../proxy/proxyFetch.js";
16
+ // Import proper types for multimodal message handling
16
17
  // Enhanced Anthropic support with direct imports
17
18
  // Using the dual provider architecture from Vercel AI SDK
18
19
  const hasAnthropicSupport = () => {
@@ -594,8 +595,41 @@ export class GoogleVertexProvider extends BaseProvider {
594
595
  try {
595
596
  // Validate stream options
596
597
  this.validateStreamOptionsOnly(options);
597
- // Build message array from options
598
- const messages = buildMessagesArray(options);
598
+ // Build message array from options with multimodal support
599
+ const hasMultimodalInput = !!(options.input?.images?.length || options.input?.content?.length);
600
+ let messages;
601
+ if (hasMultimodalInput) {
602
+ logger.debug(`${functionTag}: Detected multimodal input, using multimodal message builder`, {
603
+ hasImages: !!options.input?.images?.length,
604
+ imageCount: options.input?.images?.length || 0,
605
+ hasContent: !!options.input?.content?.length,
606
+ contentCount: options.input?.content?.length || 0,
607
+ });
608
+ // Create multimodal options for buildMultimodalMessagesArray
609
+ const multimodalOptions = {
610
+ input: {
611
+ text: options.input?.text || "",
612
+ images: options.input?.images,
613
+ content: options.input?.content,
614
+ },
615
+ systemPrompt: options.systemPrompt,
616
+ conversationHistory: options.conversationMessages,
617
+ provider: this.providerName,
618
+ model: this.modelName,
619
+ temperature: options.temperature,
620
+ maxTokens: options.maxTokens,
621
+ enableAnalytics: options.enableAnalytics,
622
+ enableEvaluation: options.enableEvaluation,
623
+ context: options.context,
624
+ };
625
+ const mm = await buildMultimodalMessagesArray(multimodalOptions, this.providerName, this.modelName);
626
+ // Convert multimodal messages to Vercel AI SDK format (CoreMessage[])
627
+ messages = convertToCoreMessages(mm);
628
+ }
629
+ else {
630
+ logger.debug(`${functionTag}: Text-only input, using standard message builder`);
631
+ messages = buildMessagesArray(options);
632
+ }
599
633
  const model = await this.getAISDKModelWithMiddleware(options); // This is where network connection happens!
600
634
  // Get all available tools (direct + MCP + external) for streaming
601
635
  const shouldUseTools = !options.disableTools && this.supportsTools();
@@ -81,4 +81,48 @@ export declare const imageUtils: {
81
81
  * Convert file size to human readable format
82
82
  */
83
83
  formatFileSize: (bytes: number) => string;
84
+ /**
85
+ * Convert Buffer to base64 string
86
+ */
87
+ bufferToBase64: (buffer: Buffer) => string;
88
+ /**
89
+ * Convert base64 string to Buffer
90
+ */
91
+ base64ToBuffer: (base64: string) => Buffer;
92
+ /**
93
+ * Convert file path to base64 data URI
94
+ */
95
+ fileToBase64DataUri: (filePath: string, maxBytes?: number) => Promise<string>;
96
+ /**
97
+ * Convert URL to base64 data URI by downloading the image
98
+ */
99
+ urlToBase64DataUri: (url: string, { timeoutMs, maxBytes }?: {
100
+ timeoutMs?: number | undefined;
101
+ maxBytes?: number | undefined;
102
+ }) => Promise<string>;
103
+ /**
104
+ * Extract base64 data from data URI
105
+ */
106
+ extractBase64FromDataUri: (dataUri: string) => string;
107
+ /**
108
+ * Extract MIME type from data URI
109
+ */
110
+ extractMimeTypeFromDataUri: (dataUri: string) => string;
111
+ /**
112
+ * Create data URI from base64 and MIME type
113
+ */
114
+ createDataUri: (base64: string, mimeType?: string) => string;
115
+ /**
116
+ * Validate base64 string format
117
+ */
118
+ isValidBase64: (str: string) => boolean;
119
+ /**
120
+ * Get base64 string size in bytes
121
+ */
122
+ getBase64Size: (base64: string) => number;
123
+ /**
124
+ * Compress base64 image by reducing quality (basic implementation)
125
+ * Note: This is a placeholder - for production use, consider using sharp or similar
126
+ */
127
+ compressBase64: (base64: string, _quality?: number) => string;
84
128
  };
@@ -151,6 +151,8 @@ export class ImageProcessor {
151
151
  bmp: "image/bmp",
152
152
  tiff: "image/tiff",
153
153
  tif: "image/tiff",
154
+ svg: "image/svg+xml",
155
+ avif: "image/avif",
154
156
  };
155
157
  return imageTypes[extension || ""] || "image/jpeg";
156
158
  }
@@ -183,6 +185,21 @@ export class ImageProcessor {
183
185
  return "image/webp";
184
186
  }
185
187
  }
188
+ // SVG: check for "<svg" or "<?xml" at start (text-based)
189
+ if (input.length >= 4) {
190
+ const start = input.subarray(0, 4).toString();
191
+ if (start === "<svg" || start === "<?xm") {
192
+ return "image/svg+xml";
193
+ }
194
+ }
195
+ // AVIF: check for "ftypavif" signature at bytes 4-11
196
+ if (input.length >= 12) {
197
+ const ftyp = input.subarray(4, 8).toString();
198
+ const brand = input.subarray(8, 12).toString();
199
+ if (ftyp === "ftyp" && brand === "avif") {
200
+ return "image/avif";
201
+ }
202
+ }
186
203
  }
187
204
  return "image/jpeg"; // Default fallback
188
205
  }
@@ -217,6 +234,8 @@ export class ImageProcessor {
217
234
  "image/webp",
218
235
  "image/bmp",
219
236
  "image/tiff",
237
+ "image/svg+xml",
238
+ "image/avif",
220
239
  ];
221
240
  return supportedFormats.includes(mediaType.toLowerCase());
222
241
  }
@@ -332,14 +351,7 @@ export const imageUtils = {
332
351
  /**
333
352
  * Check if a string is base64 encoded
334
353
  */
335
- isBase64: (str) => {
336
- try {
337
- return btoa(atob(str)) === str;
338
- }
339
- catch {
340
- return false;
341
- }
342
- },
354
+ isBase64: (str) => imageUtils.isValidBase64(str),
343
355
  /**
344
356
  * Extract file extension from filename or URL
345
357
  */
@@ -359,4 +371,143 @@ export const imageUtils = {
359
371
  const i = Math.floor(Math.log(bytes) / Math.log(k));
360
372
  return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + " " + sizes[i];
361
373
  },
374
+ /**
375
+ * Convert Buffer to base64 string
376
+ */
377
+ bufferToBase64: (buffer) => {
378
+ return buffer.toString("base64");
379
+ },
380
+ /**
381
+ * Convert base64 string to Buffer
382
+ */
383
+ base64ToBuffer: (base64) => {
384
+ // Remove data URI prefix if present
385
+ const cleanBase64 = base64.includes(",") ? base64.split(",")[1] : base64;
386
+ return Buffer.from(cleanBase64, "base64");
387
+ },
388
+ /**
389
+ * Convert file path to base64 data URI
390
+ */
391
+ fileToBase64DataUri: async (filePath, maxBytes = 10 * 1024 * 1024) => {
392
+ try {
393
+ const fs = await import("fs/promises");
394
+ // File existence and type validation
395
+ const stat = await fs.stat(filePath);
396
+ if (!stat.isFile()) {
397
+ throw new Error("Not a file");
398
+ }
399
+ // Size check before reading - prevent memory exhaustion
400
+ if (stat.size > maxBytes) {
401
+ throw new Error(`File too large: ${stat.size} bytes (max: ${maxBytes} bytes)`);
402
+ }
403
+ const buffer = await fs.readFile(filePath);
404
+ // Enhanced MIME detection: try buffer content first, fallback to filename
405
+ const mimeType = ImageProcessor.detectImageType(buffer) ||
406
+ ImageProcessor.detectImageType(filePath);
407
+ const base64 = buffer.toString("base64");
408
+ return `data:${mimeType};base64,${base64}`;
409
+ }
410
+ catch (error) {
411
+ throw new Error(`Failed to convert file to base64: ${error instanceof Error ? error.message : "Unknown error"}`);
412
+ }
413
+ },
414
+ /**
415
+ * Convert URL to base64 data URI by downloading the image
416
+ */
417
+ urlToBase64DataUri: async (url, { timeoutMs = 15000, maxBytes = 10 * 1024 * 1024 } = {}) => {
418
+ try {
419
+ // Basic protocol whitelist
420
+ if (!/^https?:\/\//i.test(url)) {
421
+ throw new Error("Unsupported protocol");
422
+ }
423
+ const controller = new AbortController();
424
+ const t = setTimeout(() => controller.abort(), timeoutMs);
425
+ try {
426
+ const response = await fetch(url, { signal: controller.signal });
427
+ if (!response.ok) {
428
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`);
429
+ }
430
+ const contentType = response.headers.get("content-type") || "";
431
+ if (!/^image\//i.test(contentType)) {
432
+ throw new Error(`Unsupported content-type: ${contentType || "unknown"}`);
433
+ }
434
+ const len = Number(response.headers.get("content-length") || 0);
435
+ if (len && len > maxBytes) {
436
+ throw new Error(`Content too large: ${len} bytes`);
437
+ }
438
+ const buffer = await response.arrayBuffer();
439
+ if (buffer.byteLength > maxBytes) {
440
+ throw new Error(`Downloaded content too large: ${buffer.byteLength} bytes`);
441
+ }
442
+ const base64 = Buffer.from(buffer).toString("base64");
443
+ return `data:${contentType || "image/jpeg"};base64,${base64}`;
444
+ }
445
+ finally {
446
+ clearTimeout(t);
447
+ }
448
+ }
449
+ catch (error) {
450
+ throw new Error(`Failed to download and convert URL to base64: ${error instanceof Error ? error.message : "Unknown error"}`);
451
+ }
452
+ },
453
+ /**
454
+ * Extract base64 data from data URI
455
+ */
456
+ extractBase64FromDataUri: (dataUri) => {
457
+ if (!dataUri.includes(",")) {
458
+ return dataUri; // Already just base64
459
+ }
460
+ return dataUri.split(",")[1];
461
+ },
462
+ /**
463
+ * Extract MIME type from data URI
464
+ */
465
+ extractMimeTypeFromDataUri: (dataUri) => {
466
+ const match = dataUri.match(/^data:([^;]+);base64,/);
467
+ return match ? match[1] : "image/jpeg";
468
+ },
469
+ /**
470
+ * Create data URI from base64 and MIME type
471
+ */
472
+ createDataUri: (base64, mimeType = "image/jpeg") => {
473
+ // Remove data URI prefix if already present
474
+ const cleanBase64 = base64.includes(",") ? base64.split(",")[1] : base64;
475
+ return `data:${mimeType};base64,${cleanBase64}`;
476
+ },
477
+ /**
478
+ * Validate base64 string format
479
+ */
480
+ isValidBase64: (str) => {
481
+ try {
482
+ // Remove data URI prefix if present
483
+ const cleanBase64 = str.includes(",") ? str.split(",")[1] : str;
484
+ // Check if it's valid base64
485
+ const decoded = Buffer.from(cleanBase64, "base64");
486
+ const reencoded = decoded.toString("base64");
487
+ // Remove padding for comparison (base64 can have different padding)
488
+ const normalizeBase64 = (b64) => b64.replace(/=+$/, "");
489
+ return normalizeBase64(cleanBase64) === normalizeBase64(reencoded);
490
+ }
491
+ catch {
492
+ return false;
493
+ }
494
+ },
495
+ /**
496
+ * Get base64 string size in bytes
497
+ */
498
+ getBase64Size: (base64) => {
499
+ // Remove data URI prefix if present
500
+ const cleanBase64 = base64.includes(",") ? base64.split(",")[1] : base64;
501
+ return Buffer.byteLength(cleanBase64, "base64");
502
+ },
503
+ /**
504
+ * Compress base64 image by reducing quality (basic implementation)
505
+ * Note: This is a placeholder - for production use, consider using sharp or similar
506
+ */
507
+ compressBase64: (base64, _quality = 0.8) => {
508
+ // This is a basic implementation that just returns the original
509
+ // In a real implementation, you'd use an image processing library
510
+ logger.warn("Base64 compression not implemented - returning original");
511
+ return base64;
512
+ },
362
513
  };
@@ -7,13 +7,12 @@ import type { MultimodalChatMessage } from "../types/conversation.js";
7
7
  import type { TextGenerationOptions } from "../types/index.js";
8
8
  import type { StreamOptions } from "../types/streamTypes.js";
9
9
  import type { GenerateOptions } from "../types/generateTypes.js";
10
+ import type { CoreMessage } from "ai";
10
11
  /**
11
- * Core message type compatible with AI SDK
12
+ * Type-safe conversion from MultimodalChatMessage[] to CoreMessage[]
13
+ * Filters out invalid content and ensures strict CoreMessage contract compliance
12
14
  */
13
- type CoreMessage = {
14
- role: "user" | "assistant" | "system";
15
- content: string;
16
- };
15
+ export declare function convertToCoreMessages(messages: MultimodalChatMessage[]): CoreMessage[];
17
16
  /**
18
17
  * Build a properly formatted message array for AI providers
19
18
  * Combines system prompt, conversation history, and current user prompt
@@ -25,4 +24,3 @@ export declare function buildMessagesArray(options: TextGenerationOptions | Stre
25
24
  * Detects when images are present and routes through provider adapter
26
25
  */
27
26
  export declare function buildMultimodalMessagesArray(options: GenerateOptions, provider: string, model: string): Promise<MultimodalChatMessage[]>;
28
- export {};
@@ -8,6 +8,147 @@ import { ProviderImageAdapter, MultimodalLogger, } from "../adapters/providerIma
8
8
  import { logger } from "./logger.js";
9
9
  import { request } from "undici";
10
10
  import { readFileSync, existsSync } from "fs";
11
+ /**
12
+ * Type guard for validating message roles
13
+ */
14
+ function isValidRole(role) {
15
+ return (typeof role === "string" &&
16
+ (role === "user" || role === "assistant" || role === "system"));
17
+ }
18
+ /**
19
+ * Type guard for validating content items
20
+ */
21
+ function isValidContentItem(item) {
22
+ if (!item || typeof item !== "object") {
23
+ return false;
24
+ }
25
+ const contentItem = item;
26
+ if (contentItem.type === "text") {
27
+ return typeof contentItem.text === "string";
28
+ }
29
+ if (contentItem.type === "image") {
30
+ return (typeof contentItem.image === "string" &&
31
+ (contentItem.mimeType === undefined ||
32
+ typeof contentItem.mimeType === "string"));
33
+ }
34
+ return false;
35
+ }
36
+ /**
37
+ * Safely convert content item to AI SDK content format
38
+ */
39
+ function convertContentItem(item) {
40
+ if (!isValidContentItem(item)) {
41
+ return null;
42
+ }
43
+ const contentItem = item;
44
+ if (contentItem.type === "text" && typeof contentItem.text === "string") {
45
+ return { type: "text", text: contentItem.text };
46
+ }
47
+ if (contentItem.type === "image" && typeof contentItem.image === "string") {
48
+ return {
49
+ type: "image",
50
+ image: contentItem.image,
51
+ ...(contentItem.mimeType && { mimeType: contentItem.mimeType }),
52
+ };
53
+ }
54
+ return null;
55
+ }
56
+ /**
57
+ * Type-safe conversion from MultimodalChatMessage[] to CoreMessage[]
58
+ * Filters out invalid content and ensures strict CoreMessage contract compliance
59
+ */
60
+ export function convertToCoreMessages(messages) {
61
+ return messages
62
+ .map((msg) => {
63
+ // Validate role
64
+ if (!isValidRole(msg.role)) {
65
+ logger.warn("Invalid message role found, skipping", { role: msg.role });
66
+ return null;
67
+ }
68
+ // Handle string content
69
+ if (typeof msg.content === "string") {
70
+ // Create properly typed discriminated union messages
71
+ if (msg.role === "system") {
72
+ return {
73
+ role: "system",
74
+ content: msg.content,
75
+ };
76
+ }
77
+ else if (msg.role === "user") {
78
+ return {
79
+ role: "user",
80
+ content: msg.content,
81
+ };
82
+ }
83
+ else if (msg.role === "assistant") {
84
+ return {
85
+ role: "assistant",
86
+ content: msg.content,
87
+ };
88
+ }
89
+ }
90
+ // Handle array content (multimodal) - only user messages support full multimodal content
91
+ if (Array.isArray(msg.content)) {
92
+ const validContent = msg.content
93
+ .map(convertContentItem)
94
+ .filter((item) => item !== null);
95
+ // If no valid content items, skip the message
96
+ if (validContent.length === 0) {
97
+ logger.warn("No valid content items found in multimodal message, skipping");
98
+ return null;
99
+ }
100
+ if (msg.role === "user") {
101
+ // User messages support both text and image content
102
+ return {
103
+ role: "user",
104
+ content: validContent,
105
+ };
106
+ }
107
+ else if (msg.role === "assistant") {
108
+ // Assistant messages only support text content, filter out images
109
+ const textOnlyContent = validContent.filter((item) => item.type === "text");
110
+ if (textOnlyContent.length === 0) {
111
+ // If no text content, convert to empty string
112
+ return {
113
+ role: "assistant",
114
+ content: "",
115
+ };
116
+ }
117
+ else if (textOnlyContent.length === 1) {
118
+ // Single text item, use string content
119
+ return {
120
+ role: "assistant",
121
+ content: textOnlyContent[0].text,
122
+ };
123
+ }
124
+ else {
125
+ // Multiple text items, concatenate them
126
+ const combinedText = textOnlyContent
127
+ .map((item) => item.text)
128
+ .join(" ");
129
+ return {
130
+ role: "assistant",
131
+ content: combinedText,
132
+ };
133
+ }
134
+ }
135
+ else {
136
+ // System messages cannot have multimodal content, convert to text
137
+ const textContent = validContent.find((item) => item.type === "text")?.text || "";
138
+ return {
139
+ role: "system",
140
+ content: textContent,
141
+ };
142
+ }
143
+ }
144
+ // Invalid content type
145
+ logger.warn("Invalid message content type found, skipping", {
146
+ contentType: typeof msg.content,
147
+ });
148
+ return null;
149
+ })
150
+ .filter((msg) => msg !== null);
151
+ }
11
152
  /**
12
153
  * Convert ChatMessage to CoreMessage for AI SDK compatibility
13
154
  */
@@ -84,7 +225,10 @@ export async function buildMultimodalMessagesArray(options, provider, model) {
84
225
  // If no images, use standard message building and convert to MultimodalChatMessage[]
85
226
  if (!hasImages) {
86
227
  const standardMessages = buildMessagesArray(options);
87
- return standardMessages.map((msg) => ({ ...msg, content: msg.content }));
228
+ return standardMessages.map((msg) => ({
229
+ role: msg.role,
230
+ content: typeof msg.content === "string" ? msg.content : msg.content,
231
+ }));
88
232
  }
89
233
  // Validate provider supports vision
90
234
  if (!ProviderImageAdapter.supportsVision(provider, model)) {
@@ -4,7 +4,7 @@ import { BaseProvider } from "../core/baseProvider.js";
4
4
  import { APIVersions } from "../types/providers.js";
5
5
  import { validateApiKey, createAzureAPIKeyConfig, createAzureEndpointConfig, } from "../utils/providerConfig.js";
6
6
  import { logger } from "../utils/logger.js";
7
- import { buildMessagesArray } from "../utils/messageBuilder.js";
7
+ import { buildMessagesArray, buildMultimodalMessagesArray, convertToCoreMessages, } from "../utils/messageBuilder.js";
8
8
  import { createProxyFetch } from "../proxy/proxyFetch.js";
9
9
  import { DEFAULT_MAX_STEPS } from "../core/constants.js";
10
10
  export class AzureOpenAIProvider extends BaseProvider {
@@ -109,8 +109,41 @@ export class AzureOpenAIProvider extends BaseProvider {
109
109
  })),
110
110
  });
111
111
  }
112
- // Build message array from options
113
- const messages = buildMessagesArray(options);
112
+ // Build message array from options with multimodal support
113
+ const hasMultimodalInput = !!(options.input?.images?.length || options.input?.content?.length);
114
+ let messages;
115
+ if (hasMultimodalInput) {
116
+ logger.debug(`Azure OpenAI: Detected multimodal input, using multimodal message builder`, {
117
+ hasImages: !!options.input?.images?.length,
118
+ imageCount: options.input?.images?.length || 0,
119
+ hasContent: !!options.input?.content?.length,
120
+ contentCount: options.input?.content?.length || 0,
121
+ });
122
+ // Create multimodal options for buildMultimodalMessagesArray
123
+ const multimodalOptions = {
124
+ input: {
125
+ text: options.input?.text || "",
126
+ images: options.input?.images,
127
+ content: options.input?.content,
128
+ },
129
+ systemPrompt: options.systemPrompt,
130
+ conversationHistory: options.conversationMessages,
131
+ provider: this.providerName,
132
+ model: this.modelName,
133
+ temperature: options.temperature,
134
+ maxTokens: options.maxTokens,
135
+ enableAnalytics: options.enableAnalytics,
136
+ enableEvaluation: options.enableEvaluation,
137
+ context: options.context,
138
+ };
139
+ const mm = await buildMultimodalMessagesArray(multimodalOptions, this.providerName, this.modelName);
140
+ // Convert multimodal messages to Vercel AI SDK format (CoreMessage[])
141
+ messages = convertToCoreMessages(mm);
142
+ }
143
+ else {
144
+ logger.debug(`Azure OpenAI: Text-only input, using standard message builder`);
145
+ messages = buildMessagesArray(options);
146
+ }
114
147
  const model = await this.getAISDKModelWithMiddleware(options);
115
148
  const stream = await streamText({
116
149
  model,
@@ -7,8 +7,9 @@ import { createTimeoutController, TimeoutError } from "../utils/timeout.js";
7
7
  import { AuthenticationError, NetworkError, ProviderError, RateLimitError, } from "../types/errors.js";
8
8
  import { DEFAULT_MAX_STEPS } from "../core/constants.js";
9
9
  import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
10
- import { buildMessagesArray } from "../utils/messageBuilder.js";
10
+ import { buildMessagesArray, buildMultimodalMessagesArray, convertToCoreMessages, } from "../utils/messageBuilder.js";
11
11
  // Google AI Live API types now imported from ../types/providerSpecific.js
12
+ // Import proper types for multimodal message handling
12
13
  // Create Google GenAI client
13
14
  async function createGoogleGenAIClient(apiKey) {
14
15
  const mod = await import("@google/genai");
@@ -90,8 +91,41 @@ export class GoogleAIStudioProvider extends BaseProvider {
90
91
  // Get tools consistently with generate method
91
92
  const shouldUseTools = !options.disableTools && this.supportsTools();
92
93
  const tools = shouldUseTools ? await this.getAllTools() : {};
93
- // Build message array from options
94
- const messages = buildMessagesArray(options);
94
+ // Build message array from options with multimodal support
95
+ const hasMultimodalInput = !!(options.input?.images?.length || options.input?.content?.length);
96
+ let messages;
97
+ if (hasMultimodalInput) {
98
+ logger.debug(`Google AI Studio: Detected multimodal input, using multimodal message builder`, {
99
+ hasImages: !!options.input?.images?.length,
100
+ imageCount: options.input?.images?.length || 0,
101
+ hasContent: !!options.input?.content?.length,
102
+ contentCount: options.input?.content?.length || 0,
103
+ });
104
+ // Create multimodal options for buildMultimodalMessagesArray
105
+ const multimodalOptions = {
106
+ input: {
107
+ text: options.input?.text || "",
108
+ images: options.input?.images,
109
+ content: options.input?.content,
110
+ },
111
+ systemPrompt: options.systemPrompt,
112
+ conversationHistory: options.conversationMessages,
113
+ provider: this.providerName,
114
+ model: this.modelName,
115
+ temperature: options.temperature,
116
+ maxTokens: options.maxTokens,
117
+ enableAnalytics: options.enableAnalytics,
118
+ enableEvaluation: options.enableEvaluation,
119
+ context: options.context,
120
+ };
121
+ const mm = await buildMultimodalMessagesArray(multimodalOptions, this.providerName, this.modelName);
122
+ // Convert multimodal messages to Vercel AI SDK format (CoreMessage[])
123
+ messages = convertToCoreMessages(mm);
124
+ }
125
+ else {
126
+ logger.debug(`Google AI Studio: Text-only input, using standard message builder`);
127
+ messages = buildMessagesArray(options);
128
+ }
95
129
  const result = await streamText({
96
130
  model,
97
131
  messages: messages,
@@ -11,8 +11,9 @@ import fs from "fs";
11
11
  import path from "path";
12
12
  import os from "os";
13
13
  import dns from "dns";
14
- import { buildMessagesArray } from "../utils/messageBuilder.js";
14
+ import { buildMessagesArray, buildMultimodalMessagesArray, convertToCoreMessages, } from "../utils/messageBuilder.js";
15
15
  import { createProxyFetch } from "../proxy/proxyFetch.js";
16
+ // Import proper types for multimodal message handling
16
17
  // Enhanced Anthropic support with direct imports
17
18
  // Using the dual provider architecture from Vercel AI SDK
18
19
  const hasAnthropicSupport = () => {
@@ -594,8 +595,41 @@ export class GoogleVertexProvider extends BaseProvider {
594
595
  try {
595
596
  // Validate stream options
596
597
  this.validateStreamOptionsOnly(options);
597
- // Build message array from options
598
- const messages = buildMessagesArray(options);
598
+ // Build message array from options with multimodal support
599
+ const hasMultimodalInput = !!(options.input?.images?.length || options.input?.content?.length);
600
+ let messages;
601
+ if (hasMultimodalInput) {
602
+ logger.debug(`${functionTag}: Detected multimodal input, using multimodal message builder`, {
603
+ hasImages: !!options.input?.images?.length,
604
+ imageCount: options.input?.images?.length || 0,
605
+ hasContent: !!options.input?.content?.length,
606
+ contentCount: options.input?.content?.length || 0,
607
+ });
608
+ // Create multimodal options for buildMultimodalMessagesArray
609
+ const multimodalOptions = {
610
+ input: {
611
+ text: options.input?.text || "",
612
+ images: options.input?.images,
613
+ content: options.input?.content,
614
+ },
615
+ systemPrompt: options.systemPrompt,
616
+ conversationHistory: options.conversationMessages,
617
+ provider: this.providerName,
618
+ model: this.modelName,
619
+ temperature: options.temperature,
620
+ maxTokens: options.maxTokens,
621
+ enableAnalytics: options.enableAnalytics,
622
+ enableEvaluation: options.enableEvaluation,
623
+ context: options.context,
624
+ };
625
+ const mm = await buildMultimodalMessagesArray(multimodalOptions, this.providerName, this.modelName);
626
+ // Convert multimodal messages to Vercel AI SDK format (CoreMessage[])
627
+ messages = convertToCoreMessages(mm);
628
+ }
629
+ else {
630
+ logger.debug(`${functionTag}: Text-only input, using standard message builder`);
631
+ messages = buildMessagesArray(options);
632
+ }
599
633
  const model = await this.getAISDKModelWithMiddleware(options); // This is where network connection happens!
600
634
  // Get all available tools (direct + MCP + external) for streaming
601
635
  const shouldUseTools = !options.disableTools && this.supportsTools();
@@ -81,4 +81,48 @@ export declare const imageUtils: {
81
81
  * Convert file size to human readable format
82
82
  */
83
83
  formatFileSize: (bytes: number) => string;
84
+ /**
85
+ * Convert Buffer to base64 string
86
+ */
87
+ bufferToBase64: (buffer: Buffer) => string;
88
+ /**
89
+ * Convert base64 string to Buffer
90
+ */
91
+ base64ToBuffer: (base64: string) => Buffer;
92
+ /**
93
+ * Convert file path to base64 data URI
94
+ */
95
+ fileToBase64DataUri: (filePath: string, maxBytes?: number) => Promise<string>;
96
+ /**
97
+ * Convert URL to base64 data URI by downloading the image
98
+ */
99
+ urlToBase64DataUri: (url: string, { timeoutMs, maxBytes }?: {
100
+ timeoutMs?: number | undefined;
101
+ maxBytes?: number | undefined;
102
+ }) => Promise<string>;
103
+ /**
104
+ * Extract base64 data from data URI
105
+ */
106
+ extractBase64FromDataUri: (dataUri: string) => string;
107
+ /**
108
+ * Extract MIME type from data URI
109
+ */
110
+ extractMimeTypeFromDataUri: (dataUri: string) => string;
111
+ /**
112
+ * Create data URI from base64 and MIME type
113
+ */
114
+ createDataUri: (base64: string, mimeType?: string) => string;
115
+ /**
116
+ * Validate base64 string format
117
+ */
118
+ isValidBase64: (str: string) => boolean;
119
+ /**
120
+ * Get base64 string size in bytes
121
+ */
122
+ getBase64Size: (base64: string) => number;
123
+ /**
124
+ * Compress base64 image by reducing quality (basic implementation)
125
+ * Note: This is a placeholder - for production use, consider using sharp or similar
126
+ */
127
+ compressBase64: (base64: string, _quality?: number) => string;
84
128
  };
@@ -151,6 +151,8 @@ export class ImageProcessor {
151
151
  bmp: "image/bmp",
152
152
  tiff: "image/tiff",
153
153
  tif: "image/tiff",
154
+ svg: "image/svg+xml",
155
+ avif: "image/avif",
154
156
  };
155
157
  return imageTypes[extension || ""] || "image/jpeg";
156
158
  }
@@ -183,6 +185,21 @@ export class ImageProcessor {
183
185
  return "image/webp";
184
186
  }
185
187
  }
188
+ // SVG: check for "<svg" or "<?xml" at start (text-based)
189
+ if (input.length >= 4) {
190
+ const start = input.subarray(0, 4).toString();
191
+ if (start === "<svg" || start === "<?xm") {
192
+ return "image/svg+xml";
193
+ }
194
+ }
195
+ // AVIF: check for "ftypavif" signature at bytes 4-11
196
+ if (input.length >= 12) {
197
+ const ftyp = input.subarray(4, 8).toString();
198
+ const brand = input.subarray(8, 12).toString();
199
+ if (ftyp === "ftyp" && brand === "avif") {
200
+ return "image/avif";
201
+ }
202
+ }
186
203
  }
187
204
  return "image/jpeg"; // Default fallback
188
205
  }
@@ -217,6 +234,8 @@ export class ImageProcessor {
217
234
  "image/webp",
218
235
  "image/bmp",
219
236
  "image/tiff",
237
+ "image/svg+xml",
238
+ "image/avif",
220
239
  ];
221
240
  return supportedFormats.includes(mediaType.toLowerCase());
222
241
  }
@@ -332,14 +351,7 @@ export const imageUtils = {
332
351
  /**
333
352
  * Check if a string is base64 encoded
334
353
  */
335
- isBase64: (str) => {
336
- try {
337
- return btoa(atob(str)) === str;
338
- }
339
- catch {
340
- return false;
341
- }
342
- },
354
+ isBase64: (str) => imageUtils.isValidBase64(str),
343
355
  /**
344
356
  * Extract file extension from filename or URL
345
357
  */
@@ -359,4 +371,143 @@ export const imageUtils = {
359
371
  const i = Math.floor(Math.log(bytes) / Math.log(k));
360
372
  return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + " " + sizes[i];
361
373
  },
374
+ /**
375
+ * Convert Buffer to base64 string
376
+ */
377
+ bufferToBase64: (buffer) => {
378
+ return buffer.toString("base64");
379
+ },
380
+ /**
381
+ * Convert base64 string to Buffer
382
+ */
383
+ base64ToBuffer: (base64) => {
384
+ // Remove data URI prefix if present
385
+ const cleanBase64 = base64.includes(",") ? base64.split(",")[1] : base64;
386
+ return Buffer.from(cleanBase64, "base64");
387
+ },
388
+ /**
389
+ * Convert file path to base64 data URI
390
+ */
391
+ fileToBase64DataUri: async (filePath, maxBytes = 10 * 1024 * 1024) => {
392
+ try {
393
+ const fs = await import("fs/promises");
394
+ // File existence and type validation
395
+ const stat = await fs.stat(filePath);
396
+ if (!stat.isFile()) {
397
+ throw new Error("Not a file");
398
+ }
399
+ // Size check before reading - prevent memory exhaustion
400
+ if (stat.size > maxBytes) {
401
+ throw new Error(`File too large: ${stat.size} bytes (max: ${maxBytes} bytes)`);
402
+ }
403
+ const buffer = await fs.readFile(filePath);
404
+ // Enhanced MIME detection: try buffer content first, fallback to filename
405
+ const mimeType = ImageProcessor.detectImageType(buffer) ||
406
+ ImageProcessor.detectImageType(filePath);
407
+ const base64 = buffer.toString("base64");
408
+ return `data:${mimeType};base64,${base64}`;
409
+ }
410
+ catch (error) {
411
+ throw new Error(`Failed to convert file to base64: ${error instanceof Error ? error.message : "Unknown error"}`);
412
+ }
413
+ },
414
+ /**
415
+ * Convert URL to base64 data URI by downloading the image
416
+ */
417
+ urlToBase64DataUri: async (url, { timeoutMs = 15000, maxBytes = 10 * 1024 * 1024 } = {}) => {
418
+ try {
419
+ // Basic protocol whitelist
420
+ if (!/^https?:\/\//i.test(url)) {
421
+ throw new Error("Unsupported protocol");
422
+ }
423
+ const controller = new AbortController();
424
+ const t = setTimeout(() => controller.abort(), timeoutMs);
425
+ try {
426
+ const response = await fetch(url, { signal: controller.signal });
427
+ if (!response.ok) {
428
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`);
429
+ }
430
+ const contentType = response.headers.get("content-type") || "";
431
+ if (!/^image\//i.test(contentType)) {
432
+ throw new Error(`Unsupported content-type: ${contentType || "unknown"}`);
433
+ }
434
+ const len = Number(response.headers.get("content-length") || 0);
435
+ if (len && len > maxBytes) {
436
+ throw new Error(`Content too large: ${len} bytes`);
437
+ }
438
+ const buffer = await response.arrayBuffer();
439
+ if (buffer.byteLength > maxBytes) {
440
+ throw new Error(`Downloaded content too large: ${buffer.byteLength} bytes`);
441
+ }
442
+ const base64 = Buffer.from(buffer).toString("base64");
443
+ return `data:${contentType || "image/jpeg"};base64,${base64}`;
444
+ }
445
+ finally {
446
+ clearTimeout(t);
447
+ }
448
+ }
449
+ catch (error) {
450
+ throw new Error(`Failed to download and convert URL to base64: ${error instanceof Error ? error.message : "Unknown error"}`);
451
+ }
452
+ },
453
+ /**
454
+ * Extract base64 data from data URI
455
+ */
456
+ extractBase64FromDataUri: (dataUri) => {
457
+ if (!dataUri.includes(",")) {
458
+ return dataUri; // Already just base64
459
+ }
460
+ return dataUri.split(",")[1];
461
+ },
462
+ /**
463
+ * Extract MIME type from data URI
464
+ */
465
+ extractMimeTypeFromDataUri: (dataUri) => {
466
+ const match = dataUri.match(/^data:([^;]+);base64,/);
467
+ return match ? match[1] : "image/jpeg";
468
+ },
469
+ /**
470
+ * Create data URI from base64 and MIME type
471
+ */
472
+ createDataUri: (base64, mimeType = "image/jpeg") => {
473
+ // Remove data URI prefix if already present
474
+ const cleanBase64 = base64.includes(",") ? base64.split(",")[1] : base64;
475
+ return `data:${mimeType};base64,${cleanBase64}`;
476
+ },
477
+ /**
478
+ * Validate base64 string format
479
+ */
480
+ isValidBase64: (str) => {
481
+ try {
482
+ // Remove data URI prefix if present
483
+ const cleanBase64 = str.includes(",") ? str.split(",")[1] : str;
484
+ // Check if it's valid base64
485
+ const decoded = Buffer.from(cleanBase64, "base64");
486
+ const reencoded = decoded.toString("base64");
487
+ // Remove padding for comparison (base64 can have different padding)
488
+ const normalizeBase64 = (b64) => b64.replace(/=+$/, "");
489
+ return normalizeBase64(cleanBase64) === normalizeBase64(reencoded);
490
+ }
491
+ catch {
492
+ return false;
493
+ }
494
+ },
495
+ /**
496
+ * Get base64 string size in bytes
497
+ */
498
+ getBase64Size: (base64) => {
499
+ // Remove data URI prefix if present
500
+ const cleanBase64 = base64.includes(",") ? base64.split(",")[1] : base64;
501
+ return Buffer.byteLength(cleanBase64, "base64");
502
+ },
503
+ /**
504
+ * Compress base64 image by reducing quality (basic implementation)
505
+ * Note: This is a placeholder - for production use, consider using sharp or similar
506
+ */
507
+ compressBase64: (base64, _quality = 0.8) => {
508
+ // This is a basic implementation that just returns the original
509
+ // In a real implementation, you'd use an image processing library
510
+ logger.warn("Base64 compression not implemented - returning original");
511
+ return base64;
512
+ },
362
513
  };
@@ -7,13 +7,12 @@ import type { MultimodalChatMessage } from "../types/conversation.js";
7
7
  import type { TextGenerationOptions } from "../types/index.js";
8
8
  import type { StreamOptions } from "../types/streamTypes.js";
9
9
  import type { GenerateOptions } from "../types/generateTypes.js";
10
+ import type { CoreMessage } from "ai";
10
11
  /**
11
- * Core message type compatible with AI SDK
12
+ * Type-safe conversion from MultimodalChatMessage[] to CoreMessage[]
13
+ * Filters out invalid content and ensures strict CoreMessage contract compliance
12
14
  */
13
- type CoreMessage = {
14
- role: "user" | "assistant" | "system";
15
- content: string;
16
- };
15
+ export declare function convertToCoreMessages(messages: MultimodalChatMessage[]): CoreMessage[];
17
16
  /**
18
17
  * Build a properly formatted message array for AI providers
19
18
  * Combines system prompt, conversation history, and current user prompt
@@ -25,4 +24,3 @@ export declare function buildMessagesArray(options: TextGenerationOptions | Stre
25
24
  * Detects when images are present and routes through provider adapter
26
25
  */
27
26
  export declare function buildMultimodalMessagesArray(options: GenerateOptions, provider: string, model: string): Promise<MultimodalChatMessage[]>;
28
- export {};
@@ -8,6 +8,147 @@ import { ProviderImageAdapter, MultimodalLogger, } from "../adapters/providerIma
8
8
  import { logger } from "./logger.js";
9
9
  import { request } from "undici";
10
10
  import { readFileSync, existsSync } from "fs";
11
+ /**
12
+ * Type guard for validating message roles
13
+ */
14
+ function isValidRole(role) {
15
+ return (typeof role === "string" &&
16
+ (role === "user" || role === "assistant" || role === "system"));
17
+ }
18
+ /**
19
+ * Type guard for validating content items
20
+ */
21
+ function isValidContentItem(item) {
22
+ if (!item || typeof item !== "object") {
23
+ return false;
24
+ }
25
+ const contentItem = item;
26
+ if (contentItem.type === "text") {
27
+ return typeof contentItem.text === "string";
28
+ }
29
+ if (contentItem.type === "image") {
30
+ return (typeof contentItem.image === "string" &&
31
+ (contentItem.mimeType === undefined ||
32
+ typeof contentItem.mimeType === "string"));
33
+ }
34
+ return false;
35
+ }
36
+ /**
37
+ * Safely convert content item to AI SDK content format
38
+ */
39
+ function convertContentItem(item) {
40
+ if (!isValidContentItem(item)) {
41
+ return null;
42
+ }
43
+ const contentItem = item;
44
+ if (contentItem.type === "text" && typeof contentItem.text === "string") {
45
+ return { type: "text", text: contentItem.text };
46
+ }
47
+ if (contentItem.type === "image" && typeof contentItem.image === "string") {
48
+ return {
49
+ type: "image",
50
+ image: contentItem.image,
51
+ ...(contentItem.mimeType && { mimeType: contentItem.mimeType }),
52
+ };
53
+ }
54
+ return null;
55
+ }
56
+ /**
57
+ * Type-safe conversion from MultimodalChatMessage[] to CoreMessage[]
58
+ * Filters out invalid content and ensures strict CoreMessage contract compliance
59
+ */
60
+ export function convertToCoreMessages(messages) {
61
+ return messages
62
+ .map((msg) => {
63
+ // Validate role
64
+ if (!isValidRole(msg.role)) {
65
+ logger.warn("Invalid message role found, skipping", { role: msg.role });
66
+ return null;
67
+ }
68
+ // Handle string content
69
+ if (typeof msg.content === "string") {
70
+ // Create properly typed discriminated union messages
71
+ if (msg.role === "system") {
72
+ return {
73
+ role: "system",
74
+ content: msg.content,
75
+ };
76
+ }
77
+ else if (msg.role === "user") {
78
+ return {
79
+ role: "user",
80
+ content: msg.content,
81
+ };
82
+ }
83
+ else if (msg.role === "assistant") {
84
+ return {
85
+ role: "assistant",
86
+ content: msg.content,
87
+ };
88
+ }
89
+ }
90
+ // Handle array content (multimodal) - only user messages support full multimodal content
91
+ if (Array.isArray(msg.content)) {
92
+ const validContent = msg.content
93
+ .map(convertContentItem)
94
+ .filter((item) => item !== null);
95
+ // If no valid content items, skip the message
96
+ if (validContent.length === 0) {
97
+ logger.warn("No valid content items found in multimodal message, skipping");
98
+ return null;
99
+ }
100
+ if (msg.role === "user") {
101
+ // User messages support both text and image content
102
+ return {
103
+ role: "user",
104
+ content: validContent,
105
+ };
106
+ }
107
+ else if (msg.role === "assistant") {
108
+ // Assistant messages only support text content, filter out images
109
+ const textOnlyContent = validContent.filter((item) => item.type === "text");
110
+ if (textOnlyContent.length === 0) {
111
+ // If no text content, convert to empty string
112
+ return {
113
+ role: "assistant",
114
+ content: "",
115
+ };
116
+ }
117
+ else if (textOnlyContent.length === 1) {
118
+ // Single text item, use string content
119
+ return {
120
+ role: "assistant",
121
+ content: textOnlyContent[0].text,
122
+ };
123
+ }
124
+ else {
125
+ // Multiple text items, concatenate them
126
+ const combinedText = textOnlyContent
127
+ .map((item) => item.text)
128
+ .join(" ");
129
+ return {
130
+ role: "assistant",
131
+ content: combinedText,
132
+ };
133
+ }
134
+ }
135
+ else {
136
+ // System messages cannot have multimodal content, convert to text
137
+ const textContent = validContent.find((item) => item.type === "text")?.text || "";
138
+ return {
139
+ role: "system",
140
+ content: textContent,
141
+ };
142
+ }
143
+ }
144
+ // Invalid content type
145
+ logger.warn("Invalid message content type found, skipping", {
146
+ contentType: typeof msg.content,
147
+ });
148
+ return null;
149
+ })
150
+ .filter((msg) => msg !== null);
151
+ }
11
152
  /**
12
153
  * Convert ChatMessage to CoreMessage for AI SDK compatibility
13
154
  */
@@ -84,7 +225,10 @@ export async function buildMultimodalMessagesArray(options, provider, model) {
84
225
  // If no images, use standard message building and convert to MultimodalChatMessage[]
85
226
  if (!hasImages) {
86
227
  const standardMessages = buildMessagesArray(options);
87
- return standardMessages.map((msg) => ({ ...msg, content: msg.content }));
228
+ return standardMessages.map((msg) => ({
229
+ role: msg.role,
230
+ content: typeof msg.content === "string" ? msg.content : msg.content,
231
+ }));
88
232
  }
89
233
  // Validate provider supports vision
90
234
  if (!ProviderImageAdapter.supportsVision(provider, model)) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@juspay/neurolink",
3
- "version": "7.46.0",
3
+ "version": "7.47.0",
4
4
  "description": "Universal AI Development Platform with working MCP integration, multi-provider support, and professional CLI. Built-in tools operational, 58+ external MCP servers discoverable. Connect to filesystem, GitHub, database operations, and more. Build, test, and deploy AI applications with 9 major providers: OpenAI, Anthropic, Google AI, AWS Bedrock, Azure, Hugging Face, Ollama, and Mistral AI.",
5
5
  "author": {
6
6
  "name": "Juspay Technologies",