@juspay/neurolink 7.46.0 → 7.47.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,15 @@
1
+ ## [7.47.1](https://github.com/juspay/neurolink/compare/v7.47.0...v7.47.1) (2025-09-26)
2
+
3
+ ### Bug Fixes
4
+
5
+ - **(tools):** Unregistered tools getting called ([45fd67a](https://github.com/juspay/neurolink/commit/45fd67af418b5e458ce6a261a7891234a8d489b8))
6
+
7
+ ## [7.47.0](https://github.com/juspay/neurolink/compare/v7.46.0...v7.47.0) (2025-09-25)
8
+
9
+ ### Features
10
+
11
+ - **(chat):** Implement multimodal UI and extend SDK support ([12a2f59](https://github.com/juspay/neurolink/commit/12a2f59c4826e82ab1feb1347d08980682748ad2))
12
+
1
13
  ## [7.46.0](https://github.com/juspay/neurolink/compare/v7.45.0...v7.46.0) (2025-09-24)
2
14
 
3
15
  ### Features
@@ -34,6 +34,14 @@ const VISION_CAPABILITIES = {
34
34
  "claude-3-sonnet",
35
35
  "claude-3-haiku",
36
36
  ],
37
+ azure: [
38
+ "gpt-4o",
39
+ "gpt-4o-mini",
40
+ "gpt-4-turbo",
41
+ "gpt-4-vision-preview",
42
+ "gpt-4.1",
43
+ "gpt-4",
44
+ ],
37
45
  vertex: [
38
46
  // Gemini models on Vertex AI
39
47
  "gemini-2.5-pro",
@@ -78,6 +86,10 @@ export class ProviderImageAdapter {
78
86
  case "openai":
79
87
  adaptedPayload = this.formatForOpenAI(text, images);
80
88
  break;
89
+ case "azure":
90
+ case "azure-openai":
91
+ adaptedPayload = this.formatForOpenAI(text, images);
92
+ break;
81
93
  case "google-ai":
82
94
  case "google":
83
95
  adaptedPayload = this.formatForGoogleAI(text, images);
@@ -51,7 +51,7 @@ export const PROVIDER_MAX_TOKENS = {
51
51
  default: 64000,
52
52
  },
53
53
  azure: {
54
- default: 64000,
54
+ default: 32000,
55
55
  },
56
56
  ollama: {
57
57
  default: 64000,
@@ -87,7 +87,7 @@ export class ProviderRegistry {
87
87
  // Register Amazon SageMaker provider
88
88
  ProviderFactory.registerProvider(AIProviderName.SAGEMAKER, async (modelName, _providerName, _sdk, region) => {
89
89
  const { AmazonSageMakerProvider } = await import("../providers/amazonSagemaker.js");
90
- return new AmazonSageMakerProvider(modelName, region);
90
+ return new AmazonSageMakerProvider(modelName, undefined, region);
91
91
  }, process.env.SAGEMAKER_MODEL || "sagemaker-model", ["sagemaker", "aws-sagemaker"]);
92
92
  logger.debug("All providers registered successfully");
93
93
  this.registered = true;
@@ -34,6 +34,14 @@ const VISION_CAPABILITIES = {
34
34
  "claude-3-sonnet",
35
35
  "claude-3-haiku",
36
36
  ],
37
+ azure: [
38
+ "gpt-4o",
39
+ "gpt-4o-mini",
40
+ "gpt-4-turbo",
41
+ "gpt-4-vision-preview",
42
+ "gpt-4.1",
43
+ "gpt-4",
44
+ ],
37
45
  vertex: [
38
46
  // Gemini models on Vertex AI
39
47
  "gemini-2.5-pro",
@@ -78,6 +86,10 @@ export class ProviderImageAdapter {
78
86
  case "openai":
79
87
  adaptedPayload = this.formatForOpenAI(text, images);
80
88
  break;
89
+ case "azure":
90
+ case "azure-openai":
91
+ adaptedPayload = this.formatForOpenAI(text, images);
92
+ break;
81
93
  case "google-ai":
82
94
  case "google":
83
95
  adaptedPayload = this.formatForGoogleAI(text, images);
@@ -51,7 +51,7 @@ export const PROVIDER_MAX_TOKENS = {
51
51
  default: 64000,
52
52
  },
53
53
  azure: {
54
- default: 64000,
54
+ default: 32000,
55
55
  },
56
56
  ollama: {
57
57
  default: 64000,
@@ -87,7 +87,7 @@ export class ProviderRegistry {
87
87
  // Register Amazon SageMaker provider
88
88
  ProviderFactory.registerProvider(AIProviderName.SAGEMAKER, async (modelName, _providerName, _sdk, region) => {
89
89
  const { AmazonSageMakerProvider } = await import("../providers/amazonSagemaker.js");
90
- return new AmazonSageMakerProvider(modelName, region);
90
+ return new AmazonSageMakerProvider(modelName, undefined, region);
91
91
  }, process.env.SAGEMAKER_MODEL || "sagemaker-model", ["sagemaker", "aws-sagemaker"]);
92
92
  logger.debug("All providers registered successfully");
93
93
  this.registered = true;
@@ -6,6 +6,7 @@
6
6
  * Uses real MCP infrastructure for tool discovery and execution.
7
7
  */
8
8
  import type { TextGenerationOptions, TextGenerationResult } from "./types/index.js";
9
+ import { MCPToolRegistry } from "./mcp/toolRegistry.js";
9
10
  import type { GenerateOptions, GenerateResult } from "./types/generateTypes.js";
10
11
  import type { StreamOptions, StreamResult } from "./types/streamTypes.js";
11
12
  import type { MCPServerInfo, MCPExecutableTool } from "./types/mcpTypes.js";
@@ -46,6 +47,7 @@ export interface MCPStatus {
46
47
  export declare class NeuroLink {
47
48
  private mcpInitialized;
48
49
  private emitter;
50
+ private toolRegistry;
49
51
  private autoDiscoveredServerInfos;
50
52
  private externalServerManager;
51
53
  private toolCache;
@@ -99,6 +101,7 @@ export declare class NeuroLink {
99
101
  * @param config.hitl.dangerousActions - Keywords that trigger confirmation (default: ['delete', 'remove', 'drop'])
100
102
  * @param config.hitl.timeout - Confirmation timeout in milliseconds (default: 30000)
101
103
  * @param config.hitl.allowArgumentModification - Allow users to modify tool parameters (default: true)
104
+ * @param config.toolRegistry - Optional tool registry instance for advanced use cases (default: new MCPToolRegistry())
102
105
  *
103
106
  * @example
104
107
  * ```typescript
@@ -139,6 +142,7 @@ export declare class NeuroLink {
139
142
  conversationMemory?: Partial<ConversationMemoryConfig>;
140
143
  enableOrchestration?: boolean;
141
144
  hitl?: HITLConfig;
145
+ toolRegistry?: MCPToolRegistry;
142
146
  });
143
147
  /**
144
148
  * Initialize provider registry with security settings
@@ -18,7 +18,7 @@ import { mcpLogger } from "./utils/logger.js";
18
18
  import { SYSTEM_LIMITS } from "./core/constants.js";
19
19
  import { NANOSECOND_TO_MS_DIVISOR, TOOL_TIMEOUTS, RETRY_ATTEMPTS, RETRY_DELAYS, CIRCUIT_BREAKER, CIRCUIT_BREAKER_RESET_MS, MEMORY_THRESHOLDS, PROVIDER_TIMEOUTS, PERFORMANCE_THRESHOLDS, } from "./constants/index.js";
20
20
  import pLimit from "p-limit";
21
- import { toolRegistry } from "./mcp/toolRegistry.js";
21
+ import { MCPToolRegistry } from "./mcp/toolRegistry.js";
22
22
  import { logger } from "./utils/logger.js";
23
23
  import { getBestProvider } from "./utils/providerUtils.js";
24
24
  import { ProviderRegistry } from "./factories/providerRegistry.js";
@@ -45,6 +45,7 @@ import { isZodSchema } from "./utils/schemaConversion.js";
45
45
  export class NeuroLink {
46
46
  mcpInitialized = false;
47
47
  emitter = new EventEmitter();
48
+ toolRegistry;
48
49
  autoDiscoveredServerInfos = [];
49
50
  // External MCP server management
50
51
  externalServerManager;
@@ -140,6 +141,7 @@ export class NeuroLink {
140
141
  * @param config.hitl.dangerousActions - Keywords that trigger confirmation (default: ['delete', 'remove', 'drop'])
141
142
  * @param config.hitl.timeout - Confirmation timeout in milliseconds (default: 30000)
142
143
  * @param config.hitl.allowArgumentModification - Allow users to modify tool parameters (default: true)
144
+ * @param config.toolRegistry - Optional tool registry instance for advanced use cases (default: new MCPToolRegistry())
143
145
  *
144
146
  * @example
145
147
  * ```typescript
@@ -177,6 +179,7 @@ export class NeuroLink {
177
179
  * @throws {Error} When HITL configuration is invalid (if enabled)
178
180
  */
179
181
  constructor(config) {
182
+ this.toolRegistry = config?.toolRegistry || new MCPToolRegistry();
180
183
  // Initialize orchestration setting
181
184
  this.enableOrchestration = config?.enableOrchestration ?? false;
182
185
  // Read tool cache duration from environment variables, with a default
@@ -278,7 +281,7 @@ export class NeuroLink {
278
281
  // Initialize HITL manager
279
282
  this.hitlManager = new HITLManager(config.hitl);
280
283
  // Inject HITL manager into tool registry
281
- toolRegistry.setHITLManager(this.hitlManager);
284
+ this.toolRegistry.setHITLManager(this.hitlManager);
282
285
  // Inject HITL manager into external server manager
283
286
  this.externalServerManager.setHITLManager(this.hitlManager);
284
287
  // Set up HITL event forwarding to main emitter
@@ -627,7 +630,7 @@ export class NeuroLink {
627
630
  mcpLogger.debug("Direct tools server are disabled via environment variable.");
628
631
  }
629
632
  else {
630
- await toolRegistry.registerServer("neurolink-direct", directToolsServer);
633
+ await this.toolRegistry.registerServer("neurolink-direct", directToolsServer);
631
634
  mcpLogger.debug("[NeuroLink] Direct tools server registered successfully", {
632
635
  serverId: "neurolink-direct",
633
636
  });
@@ -1371,7 +1374,7 @@ export class NeuroLink {
1371
1374
  mcpInitialized: this.mcpInitialized,
1372
1375
  mcpComponents: {
1373
1376
  hasExternalServerManager: !!this.externalServerManager,
1374
- hasToolRegistry: !!toolRegistry,
1377
+ hasToolRegistry: !!this.toolRegistry,
1375
1378
  hasProviderRegistry: !!AIProviderFactory,
1376
1379
  },
1377
1380
  fallbackReason: "MCP_NOT_INITIALIZED",
@@ -2403,7 +2406,7 @@ export class NeuroLink {
2403
2406
  // SMART DEFAULTS: Use utility to eliminate boilerplate creation
2404
2407
  const mcpServerInfo = createCustomToolServerInfo(name, convertedTool);
2405
2408
  // Register with toolRegistry using MCPServerInfo directly
2406
- toolRegistry.registerServer(mcpServerInfo);
2409
+ this.toolRegistry.registerServer(mcpServerInfo);
2407
2410
  // Emit tool registration success event
2408
2411
  this.emitter.emit("tools-register:end", {
2409
2412
  toolName: name,
@@ -2475,7 +2478,7 @@ export class NeuroLink {
2475
2478
  unregisterTool(name) {
2476
2479
  this.invalidateToolCache(); // Invalidate cache when a tool is unregistered
2477
2480
  const serverId = `custom-tool-${name}`;
2478
- const removed = toolRegistry.unregisterServer(serverId);
2481
+ const removed = this.toolRegistry.unregisterServer(serverId);
2479
2482
  if (removed) {
2480
2483
  logger.info(`Unregistered custom tool: ${name}`);
2481
2484
  }
@@ -2487,7 +2490,7 @@ export class NeuroLink {
2487
2490
  */
2488
2491
  getCustomTools() {
2489
2492
  // Get tools from toolRegistry with smart category detection
2490
- const customTools = toolRegistry.getToolsByCategory(detectCategory({ isCustomTool: true }));
2493
+ const customTools = this.toolRegistry.getToolsByCategory(detectCategory({ isCustomTool: true }));
2491
2494
  const toolMap = new Map();
2492
2495
  for (const tool of customTools) {
2493
2496
  const effectiveSchema = tool.inputSchema || tool.parameters;
@@ -2545,7 +2548,7 @@ export class NeuroLink {
2545
2548
  hasShopId: !!executionContext.shopId,
2546
2549
  sessionId: executionContext.sessionId,
2547
2550
  });
2548
- return await toolRegistry.executeTool(tool.name, params, executionContext);
2551
+ return await this.toolRegistry.executeTool(tool.name, params, executionContext);
2549
2552
  },
2550
2553
  });
2551
2554
  }
@@ -2566,7 +2569,7 @@ export class NeuroLink {
2566
2569
  serverInfo.tools = [];
2567
2570
  }
2568
2571
  // ZERO CONVERSIONS: Pass MCPServerInfo directly to toolRegistry
2569
- await toolRegistry.registerServer(serverInfo);
2572
+ await this.toolRegistry.registerServer(serverInfo);
2570
2573
  mcpLogger.info(`[NeuroLink] Successfully registered in-memory server: ${serverId}`, {
2571
2574
  category: serverInfo.metadata?.category,
2572
2575
  provider: serverInfo.metadata?.provider,
@@ -2584,7 +2587,7 @@ export class NeuroLink {
2584
2587
  */
2585
2588
  getInMemoryServers() {
2586
2589
  // Get in-memory servers from toolRegistry
2587
- const serverInfos = toolRegistry.getBuiltInServerInfos();
2590
+ const serverInfos = this.toolRegistry.getBuiltInServerInfos();
2588
2591
  const serverMap = new Map();
2589
2592
  for (const serverInfo of serverInfos) {
2590
2593
  if (detectCategory({
@@ -2603,7 +2606,7 @@ export class NeuroLink {
2603
2606
  */
2604
2607
  getInMemoryServerInfos() {
2605
2608
  // Get in-memory servers from centralized tool registry
2606
- const allServers = toolRegistry.getBuiltInServerInfos();
2609
+ const allServers = this.toolRegistry.getBuiltInServerInfos();
2607
2610
  return allServers.filter((server) => detectCategory({
2608
2611
  existingCategory: server.metadata?.category,
2609
2612
  serverId: server.id,
@@ -2855,7 +2858,7 @@ export class NeuroLink {
2855
2858
  storedContextKeys: Object.keys(storedContext),
2856
2859
  finalContextKeys: Object.keys(context),
2857
2860
  });
2858
- const result = (await toolRegistry.executeTool(toolName, params, context));
2861
+ const result = (await this.toolRegistry.executeTool(toolName, params, context));
2859
2862
  // ADD: Check if result indicates a failure and emit error event
2860
2863
  if (result &&
2861
2864
  typeof result === "object" &&
@@ -2905,9 +2908,9 @@ export class NeuroLink {
2905
2908
  getAllToolsHrTimeStart: getAllToolsHrTimeStart.toString(),
2906
2909
  // 🔧 Tool registry state
2907
2910
  toolRegistryState: {
2908
- hasToolRegistry: !!toolRegistry,
2911
+ hasToolRegistry: !!this.toolRegistry,
2909
2912
  toolRegistrySize: 0, // Not accessible as size property
2910
- toolRegistryType: toolRegistry?.constructor?.name || "NOT_SET",
2913
+ toolRegistryType: this.toolRegistry?.constructor?.name || "NOT_SET",
2911
2914
  hasExternalServerManager: !!this.externalServerManager,
2912
2915
  externalServerManagerType: this.externalServerManager?.constructor?.name || "NOT_SET",
2913
2916
  },
@@ -2926,7 +2929,7 @@ export class NeuroLink {
2926
2929
  // Optimized: Collect all tools with minimal object creation
2927
2930
  const allTools = new Map();
2928
2931
  // 1. Add MCP server tools (built-in direct tools)
2929
- const mcpToolsRaw = await toolRegistry.listTools();
2932
+ const mcpToolsRaw = await this.toolRegistry.listTools();
2930
2933
  for (const tool of mcpToolsRaw) {
2931
2934
  if (!allTools.has(tool.name)) {
2932
2935
  const optimizedTool = optimizeToolForCollection(tool, {
@@ -2936,7 +2939,7 @@ export class NeuroLink {
2936
2939
  }
2937
2940
  }
2938
2941
  // 2. Add custom tools from this NeuroLink instance
2939
- const customToolsRaw = toolRegistry.getToolsByCategory(detectCategory({ isCustomTool: true }));
2942
+ const customToolsRaw = this.toolRegistry.getToolsByCategory(detectCategory({ isCustomTool: true }));
2940
2943
  for (const tool of customToolsRaw) {
2941
2944
  if (!allTools.has(tool.name)) {
2942
2945
  const optimizedTool = optimizeToolForCollection(tool, {
@@ -2952,7 +2955,7 @@ export class NeuroLink {
2952
2955
  }
2953
2956
  }
2954
2957
  // 3. Add tools from in-memory MCP servers
2955
- const inMemoryToolsRaw = toolRegistry.getToolsByCategory("in-memory");
2958
+ const inMemoryToolsRaw = this.toolRegistry.getToolsByCategory("in-memory");
2956
2959
  for (const tool of inMemoryToolsRaw) {
2957
2960
  if (!allTools.has(tool.name)) {
2958
2961
  const optimizedTool = optimizeToolForCollection(tool, {
@@ -3231,13 +3234,13 @@ export class NeuroLink {
3231
3234
  // Initialize MCP if not already initialized (loads external servers from config)
3232
3235
  await this.initializeMCP();
3233
3236
  // Get built-in tools
3234
- const allTools = await toolRegistry.listTools();
3237
+ const allTools = await this.toolRegistry.listTools();
3235
3238
  // Get external MCP server statistics
3236
3239
  const externalStats = this.externalServerManager.getStatistics();
3237
3240
  // DIRECT RETURNS - ZERO conversion
3238
3241
  const externalMCPServers = this.externalServerManager.listServers();
3239
3242
  const inMemoryServerInfos = this.getInMemoryServerInfos();
3240
- const builtInServerInfos = toolRegistry.getBuiltInServerInfos();
3243
+ const builtInServerInfos = this.toolRegistry.getBuiltInServerInfos();
3241
3244
  const autoDiscoveredServerInfos = this.getAutoDiscoveredServerInfos();
3242
3245
  // Calculate totals
3243
3246
  const totalServers = externalMCPServers.length +
@@ -3255,7 +3258,7 @@ export class NeuroLink {
3255
3258
  autoDiscoveredCount: autoDiscoveredServerInfos.length,
3256
3259
  totalTools,
3257
3260
  autoDiscoveredServers: autoDiscoveredServerInfos,
3258
- customToolsCount: toolRegistry.getToolsByCategory(detectCategory({ isCustomTool: true })).length,
3261
+ customToolsCount: this.toolRegistry.getToolsByCategory(detectCategory({ isCustomTool: true })).length,
3259
3262
  inMemoryServersCount: inMemoryServerInfos.length,
3260
3263
  externalMCPServersCount: externalMCPServers.length,
3261
3264
  externalMCPConnectedCount: externalStats.connectedServers,
@@ -3271,7 +3274,7 @@ export class NeuroLink {
3271
3274
  autoDiscoveredCount: 0,
3272
3275
  totalTools: 0,
3273
3276
  autoDiscoveredServers: [],
3274
- customToolsCount: toolRegistry.getToolsByCategory(detectCategory({ isCustomTool: true })).length,
3277
+ customToolsCount: this.toolRegistry.getToolsByCategory(detectCategory({ isCustomTool: true })).length,
3275
3278
  inMemoryServersCount: 0,
3276
3279
  externalMCPServersCount: 0,
3277
3280
  externalMCPConnectedCount: 0,
@@ -3290,7 +3293,7 @@ export class NeuroLink {
3290
3293
  return [
3291
3294
  ...this.externalServerManager.listServers(), // Direct return
3292
3295
  ...this.getInMemoryServerInfos(), // Direct return
3293
- ...toolRegistry.getBuiltInServerInfos(), // Direct return
3296
+ ...this.toolRegistry.getBuiltInServerInfos(), // Direct return
3294
3297
  ...this.getAutoDiscoveredServerInfos(), // Direct return
3295
3298
  ];
3296
3299
  }
@@ -3303,7 +3306,7 @@ export class NeuroLink {
3303
3306
  try {
3304
3307
  // Test built-in tools
3305
3308
  if (serverId === "neurolink-direct") {
3306
- const tools = await toolRegistry.listTools();
3309
+ const tools = await this.toolRegistry.listTools();
3307
3310
  return tools.length > 0;
3308
3311
  }
3309
3312
  // Test in-memory servers
@@ -3480,7 +3483,7 @@ export class NeuroLink {
3480
3483
  const tools = {};
3481
3484
  let healthyCount = 0;
3482
3485
  // Get all tool names from toolRegistry
3483
- const allTools = await toolRegistry.listTools();
3486
+ const allTools = await this.toolRegistry.listTools();
3484
3487
  const allToolNames = new Set(allTools.map((tool) => tool.name));
3485
3488
  for (const toolName of allToolNames) {
3486
3489
  const metrics = this.toolExecutionMetrics.get(toolName);
@@ -3908,7 +3911,7 @@ export class NeuroLink {
3908
3911
  try {
3909
3912
  const externalTools = this.externalServerManager.getServerTools(serverId);
3910
3913
  for (const tool of externalTools) {
3911
- toolRegistry.removeTool(tool.name);
3914
+ this.toolRegistry.removeTool(tool.name);
3912
3915
  mcpLogger.debug(`[NeuroLink] Unregistered external MCP tool from main registry: ${tool.name}`);
3913
3916
  }
3914
3917
  }
@@ -3921,7 +3924,7 @@ export class NeuroLink {
3921
3924
  */
3922
3925
  unregisterExternalMCPToolFromRegistry(toolName) {
3923
3926
  try {
3924
- toolRegistry.removeTool(toolName);
3927
+ this.toolRegistry.removeTool(toolName);
3925
3928
  mcpLogger.debug(`[NeuroLink] Unregistered external MCP tool from main registry: ${toolName}`);
3926
3929
  }
3927
3930
  catch (error) {
@@ -3979,7 +3982,7 @@ export class NeuroLink {
3979
3982
  try {
3980
3983
  const externalTools = this.externalServerManager.getAllTools();
3981
3984
  for (const tool of externalTools) {
3982
- toolRegistry.removeTool(tool.name);
3985
+ this.toolRegistry.removeTool(tool.name);
3983
3986
  }
3984
3987
  mcpLogger.debug(`[NeuroLink] Unregistered ${externalTools.length} external MCP tools from main registry`);
3985
3988
  }
@@ -4,7 +4,7 @@ import { BaseProvider } from "../core/baseProvider.js";
4
4
  import { APIVersions } from "../types/providers.js";
5
5
  import { validateApiKey, createAzureAPIKeyConfig, createAzureEndpointConfig, } from "../utils/providerConfig.js";
6
6
  import { logger } from "../utils/logger.js";
7
- import { buildMessagesArray } from "../utils/messageBuilder.js";
7
+ import { buildMessagesArray, buildMultimodalMessagesArray, convertToCoreMessages, } from "../utils/messageBuilder.js";
8
8
  import { createProxyFetch } from "../proxy/proxyFetch.js";
9
9
  import { DEFAULT_MAX_STEPS } from "../core/constants.js";
10
10
  export class AzureOpenAIProvider extends BaseProvider {
@@ -109,8 +109,41 @@ export class AzureOpenAIProvider extends BaseProvider {
109
109
  })),
110
110
  });
111
111
  }
112
- // Build message array from options
113
- const messages = buildMessagesArray(options);
112
+ // Build message array from options with multimodal support
113
+ const hasMultimodalInput = !!(options.input?.images?.length || options.input?.content?.length);
114
+ let messages;
115
+ if (hasMultimodalInput) {
116
+ logger.debug(`Azure OpenAI: Detected multimodal input, using multimodal message builder`, {
117
+ hasImages: !!options.input?.images?.length,
118
+ imageCount: options.input?.images?.length || 0,
119
+ hasContent: !!options.input?.content?.length,
120
+ contentCount: options.input?.content?.length || 0,
121
+ });
122
+ // Create multimodal options for buildMultimodalMessagesArray
123
+ const multimodalOptions = {
124
+ input: {
125
+ text: options.input?.text || "",
126
+ images: options.input?.images,
127
+ content: options.input?.content,
128
+ },
129
+ systemPrompt: options.systemPrompt,
130
+ conversationHistory: options.conversationMessages,
131
+ provider: this.providerName,
132
+ model: this.modelName,
133
+ temperature: options.temperature,
134
+ maxTokens: options.maxTokens,
135
+ enableAnalytics: options.enableAnalytics,
136
+ enableEvaluation: options.enableEvaluation,
137
+ context: options.context,
138
+ };
139
+ const mm = await buildMultimodalMessagesArray(multimodalOptions, this.providerName, this.modelName);
140
+ // Convert multimodal messages to Vercel AI SDK format (CoreMessage[])
141
+ messages = convertToCoreMessages(mm);
142
+ }
143
+ else {
144
+ logger.debug(`Azure OpenAI: Text-only input, using standard message builder`);
145
+ messages = buildMessagesArray(options);
146
+ }
114
147
  const model = await this.getAISDKModelWithMiddleware(options);
115
148
  const stream = await streamText({
116
149
  model,
@@ -7,8 +7,9 @@ import { createTimeoutController, TimeoutError } from "../utils/timeout.js";
7
7
  import { AuthenticationError, NetworkError, ProviderError, RateLimitError, } from "../types/errors.js";
8
8
  import { DEFAULT_MAX_STEPS } from "../core/constants.js";
9
9
  import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
10
- import { buildMessagesArray } from "../utils/messageBuilder.js";
10
+ import { buildMessagesArray, buildMultimodalMessagesArray, convertToCoreMessages, } from "../utils/messageBuilder.js";
11
11
  // Google AI Live API types now imported from ../types/providerSpecific.js
12
+ // Import proper types for multimodal message handling
12
13
  // Create Google GenAI client
13
14
  async function createGoogleGenAIClient(apiKey) {
14
15
  const mod = await import("@google/genai");
@@ -90,8 +91,41 @@ export class GoogleAIStudioProvider extends BaseProvider {
90
91
  // Get tools consistently with generate method
91
92
  const shouldUseTools = !options.disableTools && this.supportsTools();
92
93
  const tools = shouldUseTools ? await this.getAllTools() : {};
93
- // Build message array from options
94
- const messages = buildMessagesArray(options);
94
+ // Build message array from options with multimodal support
95
+ const hasMultimodalInput = !!(options.input?.images?.length || options.input?.content?.length);
96
+ let messages;
97
+ if (hasMultimodalInput) {
98
+ logger.debug(`Google AI Studio: Detected multimodal input, using multimodal message builder`, {
99
+ hasImages: !!options.input?.images?.length,
100
+ imageCount: options.input?.images?.length || 0,
101
+ hasContent: !!options.input?.content?.length,
102
+ contentCount: options.input?.content?.length || 0,
103
+ });
104
+ // Create multimodal options for buildMultimodalMessagesArray
105
+ const multimodalOptions = {
106
+ input: {
107
+ text: options.input?.text || "",
108
+ images: options.input?.images,
109
+ content: options.input?.content,
110
+ },
111
+ systemPrompt: options.systemPrompt,
112
+ conversationHistory: options.conversationMessages,
113
+ provider: this.providerName,
114
+ model: this.modelName,
115
+ temperature: options.temperature,
116
+ maxTokens: options.maxTokens,
117
+ enableAnalytics: options.enableAnalytics,
118
+ enableEvaluation: options.enableEvaluation,
119
+ context: options.context,
120
+ };
121
+ const mm = await buildMultimodalMessagesArray(multimodalOptions, this.providerName, this.modelName);
122
+ // Convert multimodal messages to Vercel AI SDK format (CoreMessage[])
123
+ messages = convertToCoreMessages(mm);
124
+ }
125
+ else {
126
+ logger.debug(`Google AI Studio: Text-only input, using standard message builder`);
127
+ messages = buildMessagesArray(options);
128
+ }
95
129
  const result = await streamText({
96
130
  model,
97
131
  messages: messages,
@@ -11,8 +11,9 @@ import fs from "fs";
11
11
  import path from "path";
12
12
  import os from "os";
13
13
  import dns from "dns";
14
- import { buildMessagesArray } from "../utils/messageBuilder.js";
14
+ import { buildMessagesArray, buildMultimodalMessagesArray, convertToCoreMessages, } from "../utils/messageBuilder.js";
15
15
  import { createProxyFetch } from "../proxy/proxyFetch.js";
16
+ // Import proper types for multimodal message handling
16
17
  // Enhanced Anthropic support with direct imports
17
18
  // Using the dual provider architecture from Vercel AI SDK
18
19
  const hasAnthropicSupport = () => {
@@ -594,8 +595,41 @@ export class GoogleVertexProvider extends BaseProvider {
594
595
  try {
595
596
  // Validate stream options
596
597
  this.validateStreamOptionsOnly(options);
597
- // Build message array from options
598
- const messages = buildMessagesArray(options);
598
+ // Build message array from options with multimodal support
599
+ const hasMultimodalInput = !!(options.input?.images?.length || options.input?.content?.length);
600
+ let messages;
601
+ if (hasMultimodalInput) {
602
+ logger.debug(`${functionTag}: Detected multimodal input, using multimodal message builder`, {
603
+ hasImages: !!options.input?.images?.length,
604
+ imageCount: options.input?.images?.length || 0,
605
+ hasContent: !!options.input?.content?.length,
606
+ contentCount: options.input?.content?.length || 0,
607
+ });
608
+ // Create multimodal options for buildMultimodalMessagesArray
609
+ const multimodalOptions = {
610
+ input: {
611
+ text: options.input?.text || "",
612
+ images: options.input?.images,
613
+ content: options.input?.content,
614
+ },
615
+ systemPrompt: options.systemPrompt,
616
+ conversationHistory: options.conversationMessages,
617
+ provider: this.providerName,
618
+ model: this.modelName,
619
+ temperature: options.temperature,
620
+ maxTokens: options.maxTokens,
621
+ enableAnalytics: options.enableAnalytics,
622
+ enableEvaluation: options.enableEvaluation,
623
+ context: options.context,
624
+ };
625
+ const mm = await buildMultimodalMessagesArray(multimodalOptions, this.providerName, this.modelName);
626
+ // Convert multimodal messages to Vercel AI SDK format (CoreMessage[])
627
+ messages = convertToCoreMessages(mm);
628
+ }
629
+ else {
630
+ logger.debug(`${functionTag}: Text-only input, using standard message builder`);
631
+ messages = buildMessagesArray(options);
632
+ }
599
633
  const model = await this.getAISDKModelWithMiddleware(options); // This is where network connection happens!
600
634
  // Get all available tools (direct + MCP + external) for streaming
601
635
  const shouldUseTools = !options.disableTools && this.supportsTools();
@@ -81,4 +81,48 @@ export declare const imageUtils: {
81
81
  * Convert file size to human readable format
82
82
  */
83
83
  formatFileSize: (bytes: number) => string;
84
+ /**
85
+ * Convert Buffer to base64 string
86
+ */
87
+ bufferToBase64: (buffer: Buffer) => string;
88
+ /**
89
+ * Convert base64 string to Buffer
90
+ */
91
+ base64ToBuffer: (base64: string) => Buffer;
92
+ /**
93
+ * Convert file path to base64 data URI
94
+ */
95
+ fileToBase64DataUri: (filePath: string, maxBytes?: number) => Promise<string>;
96
+ /**
97
+ * Convert URL to base64 data URI by downloading the image
98
+ */
99
+ urlToBase64DataUri: (url: string, { timeoutMs, maxBytes }?: {
100
+ timeoutMs?: number | undefined;
101
+ maxBytes?: number | undefined;
102
+ }) => Promise<string>;
103
+ /**
104
+ * Extract base64 data from data URI
105
+ */
106
+ extractBase64FromDataUri: (dataUri: string) => string;
107
+ /**
108
+ * Extract MIME type from data URI
109
+ */
110
+ extractMimeTypeFromDataUri: (dataUri: string) => string;
111
+ /**
112
+ * Create data URI from base64 and MIME type
113
+ */
114
+ createDataUri: (base64: string, mimeType?: string) => string;
115
+ /**
116
+ * Validate base64 string format
117
+ */
118
+ isValidBase64: (str: string) => boolean;
119
+ /**
120
+ * Get base64 string size in bytes
121
+ */
122
+ getBase64Size: (base64: string) => number;
123
+ /**
124
+ * Compress base64 image by reducing quality (basic implementation)
125
+ * Note: This is a placeholder - for production use, consider using sharp or similar
126
+ */
127
+ compressBase64: (base64: string, _quality?: number) => string;
84
128
  };