npm - @juspay/neurolink - Versions diffs - 7.35.0 → 7.36.0 - Mend

@juspay/neurolink 7.35.0 → 7.36.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/CHANGELOG.md +6 -0
package/dist/adapters/providerImageAdapter.d.ts +56 -0
package/dist/adapters/providerImageAdapter.js +257 -0
package/dist/cli/commands/config.d.ts +20 -20
package/dist/cli/factories/commandFactory.d.ts +1 -0
package/dist/cli/factories/commandFactory.js +26 -3
package/dist/core/baseProvider.js +99 -45
package/dist/core/types.d.ts +3 -0
package/dist/lib/adapters/providerImageAdapter.d.ts +56 -0
package/dist/lib/adapters/providerImageAdapter.js +257 -0
package/dist/lib/core/baseProvider.js +99 -45
package/dist/lib/core/types.d.ts +3 -0
package/dist/lib/neurolink.js +8 -3
package/dist/lib/types/content.d.ts +78 -0
package/dist/lib/types/content.js +5 -0
package/dist/lib/types/conversation.d.ts +19 -0
package/dist/lib/types/generateTypes.d.ts +4 -1
package/dist/lib/types/streamTypes.d.ts +6 -3
package/dist/lib/utils/imageProcessor.d.ts +84 -0
package/dist/lib/utils/imageProcessor.js +362 -0
package/dist/lib/utils/messageBuilder.d.ts +8 -1
package/dist/lib/utils/messageBuilder.js +279 -0
package/dist/neurolink.js +8 -3
package/dist/types/content.d.ts +78 -0
package/dist/types/content.js +5 -0
package/dist/types/conversation.d.ts +19 -0
package/dist/types/generateTypes.d.ts +4 -1
package/dist/types/streamTypes.d.ts +6 -3
package/dist/utils/imageProcessor.d.ts +84 -0
package/dist/utils/imageProcessor.js +362 -0
package/dist/utils/messageBuilder.d.ts +8 -1
package/dist/utils/messageBuilder.js +279 -0
package/package.json +1 -1

package/dist/lib/core/baseProvider.js CHANGED Viewed

@@ -1,3 +1,4 @@
+import { generateText } from "ai";
 import { MiddlewareFactory } from "../middleware/factory.js";
 import { logger } from "../utils/logger.js";
 import { DEFAULT_MAX_STEPS, STEP_LIMITS } from "../core/constants.js";
@@ -5,12 +6,11 @@ import { directAgentTools } from "../agent/directTools.js";
 import { getSafeMaxTokens } from "../utils/tokenLimits.js";
 import { createTimeoutController, TimeoutError } from "../utils/timeout.js";
 import { shouldDisableBuiltinTools } from "../utils/toolUtils.js";
-import { buildMessagesArray } from "../utils/messageBuilder.js";
+import { buildMessagesArray, buildMultimodalMessagesArray, } from "../utils/messageBuilder.js";
 import { getKeysAsString, getKeyCount } from "../utils/transformationUtils.js";
 import { validateStreamOptions as validateStreamOpts, validateTextGenerationOptions, ValidationError, createValidationSummary, } from "../utils/parameterValidation.js";
 import { recordProviderPerformanceFromMetrics, getPerformanceOptimizedProvider, } from "./evaluationProviders.js";
 import { modelConfig } from "./modelConfiguration.js";
-// Provider types moved to ../types/providers.js
 /**
  * Abstract base class for all AI providers
  * Tools are integrated as first-class citizens - always available by default
@@ -166,7 +166,7 @@ export class BaseProvider {
         try {
             // Import streamText dynamically to avoid circular dependencies
             // Using streamText instead of generateText for unified implementation
-            const { streamText } = await import("ai");
+            // const { streamText } = await import("ai");
             // Get ALL available tools (direct + MCP + external from options)
             const shouldUseTools = !options.disableTools && this.supportsTools();
             const baseTools = shouldUseTools ? await this.getAllTools() : {};
@@ -211,42 +211,86 @@ export class BaseProvider {
             });
             const model = await this.getAISDKModelWithMiddleware(options);
             // Build proper message array with conversation history
-            const messages = buildMessagesArray(options);
-            // Use streamText and accumulate results instead of generateText
-            const streamResult = await streamText({
+            // Check if this is a multimodal request (images or content present)
+            let messages;
+            // Type guard to check if options has multimodal input
+            const hasMultimodalInput = (opts) => {
+                const input = opts.input;
+                const hasImages = !!input?.images?.length;
+                const hasContent = !!input?.content?.length;
+                return hasImages || hasContent;
+            };
+            if (hasMultimodalInput(options)) {
+                if (process.env.NEUROLINK_DEBUG === "true") {
+                    logger.info("🖼️ [MULTIMODAL-REQUEST] Detected multimodal input, using multimodal message builder");
+                }
+                // This is a multimodal request - use multimodal message builder
+                // Convert TextGenerationOptions to GenerateOptions format for multimodal processing
+                const input = options.input;
+                const multimodalOptions = {
+                    input: {
+                        text: options.prompt || options.input?.text || "",
+                        images: input?.images,
+                        content: input?.content,
+                    },
+                    provider: options.provider,
+                    model: options.model,
+                    temperature: options.temperature,
+                    maxTokens: options.maxTokens,
+                    systemPrompt: options.systemPrompt,
+                    enableAnalytics: options.enableAnalytics,
+                    enableEvaluation: options.enableEvaluation,
+                    context: options.context,
+                };
+                messages = await buildMultimodalMessagesArray(multimodalOptions, this.providerName, this.modelName);
+            }
+            else {
+                if (process.env.NEUROLINK_DEBUG === "true") {
+                    logger.info("📝 [TEXT-ONLY-REQUEST] No multimodal input detected, using standard message builder");
+                }
+                // Standard text-only request
+                messages = buildMessagesArray(options);
+            }
+            // Convert messages to Vercel AI SDK format
+            const aiSDKMessages = messages.map((msg) => {
+                if (typeof msg.content === "string") {
+                    // Simple text content
+                    return {
+                        role: msg.role,
+                        content: msg.content,
+                    };
+                }
+                else {
+                    // Multimodal content array - convert to Vercel AI SDK format
+                    // The Vercel AI SDK expects content to be in a specific format
+                    return {
+                        role: msg.role,
+                        content: msg.content.map((item) => {
+                            if (item.type === "text") {
+                                return { type: "text", text: item.text || "" };
+                            }
+                            else if (item.type === "image") {
+                                return { type: "image", image: item.image || "" };
+                            }
+                            return item;
+                        }),
+                    };
+                }
+            });
+            const generateResult = await generateText({
                 model,
-                messages: messages,
+                messages: aiSDKMessages,
                 tools,
                 maxSteps: options.maxSteps || DEFAULT_MAX_STEPS,
                 toolChoice: shouldUseTools ? "auto" : "none",
                 temperature: options.temperature,
                 maxTokens: options.maxTokens, // No default limit - unlimited unless specified
             });
-            // Accumulate the streamed content
-            let accumulatedContent = "";
-            // Wait for the stream to complete and accumulate content
-            try {
-                for await (const chunk of streamResult.textStream) {
-                    accumulatedContent += chunk;
-                }
-            }
-            catch (streamError) {
-                logger.error(`Error reading text stream for ${this.providerName}:`, streamError);
-                throw streamError;
-            }
-            // Get the final result - this should include usage, toolCalls, etc.
-            const usage = await streamResult.usage;
-            const toolCalls = await streamResult.toolCalls;
-            const toolResults = await streamResult.toolResults;
             const responseTime = Date.now() - startTime;
-            // Create a result object compatible with generateText format
-            const result = {
-                text: accumulatedContent,
-                usage: usage,
-                toolCalls: toolCalls,
-                toolResults: toolResults,
-                steps: streamResult.steps, // Include steps for tool execution tracking
-            };
+            // Extract properties from generateResult
+            const usage = generateResult.usage;
+            const toolCalls = generateResult.toolCalls;
+            const toolResults = generateResult.toolResults;
             try {
                 const actualCost = await this.calculateActualCost(usage || { promptTokens: 0, completionTokens: 0, totalTokens: 0 });
                 recordProviderPerformanceFromMetrics(this.providerName, {
@@ -273,14 +317,14 @@ export class BaseProvider {
             // First check direct tool calls (fallback)
             if (toolCalls && toolCalls.length > 0) {
                 toolsUsed.push(...toolCalls.map((tc) => {
-                    return tc.toolName || "unknown";
+                    return tc.toolName || tc.name || "unknown";
                 }));
             }
             // Then check steps for tool calls (primary source for multi-step)
-            if (result.steps &&
-                Array.isArray(result.steps)) {
-                for (const step of result.steps ||
-                    []) {
+            if (generateResult.steps &&
+                Array.isArray(generateResult.steps)) {
+                for (const step of generateResult
+                    .steps || []) {
                     if (step?.toolCalls && Array.isArray(step.toolCalls)) {
                         toolsUsed.push(...step.toolCalls.map((tc) => {
                             return tc.toolName || tc.name || "unknown";
@@ -295,10 +339,10 @@ export class BaseProvider {
             // Create a map of tool calls to their arguments for matching with results
             const toolCallArgsMap = new Map();
             // Extract tool executions from AI SDK result steps
-            if (result.steps &&
-                Array.isArray(result.steps)) {
-                for (const step of result.steps ||
-                    []) {
+            if (generateResult.steps &&
+                Array.isArray(generateResult.steps)) {
+                for (const step of generateResult
+                    .steps || []) {
                     // First, collect tool calls and their arguments
                     if (step?.toolCalls && Array.isArray(step.toolCalls)) {
                         for (const toolCall of step.toolCalls) {
@@ -359,11 +403,11 @@ export class BaseProvider {
             }
             // Format the result with tool executions included
             const enhancedResult = {
-                content: result.text,
+                content: generateResult.text,
                 usage: {
-                    input: result.usage?.promptTokens || 0,
-                    output: result.usage?.completionTokens || 0,
-                    total: result.usage?.totalTokens || 0,
+                    input: generateResult.usage?.promptTokens || 0,
+                    output: generateResult.usage?.completionTokens || 0,
+                    total: generateResult.usage?.totalTokens || 0,
                 },
                 provider: this.providerName,
                 model: this.modelName,
@@ -943,13 +987,23 @@ export class BaseProvider {
         const providerName = optionsOrPrompt.provider || this.providerName;
         // Apply safe maxTokens based on provider and model
         const safeMaxTokens = getSafeMaxTokens(providerName, modelName, optionsOrPrompt.maxTokens);
-        return {
+        // CRITICAL FIX: Preserve the entire input object for multimodal support
+        // This ensures images and content arrays are not lost during normalization
+        const normalizedOptions = {
             ...optionsOrPrompt,
             prompt,
             provider: providerName,
             model: modelName,
             maxTokens: safeMaxTokens,
         };
+        // Ensure input object is preserved if it exists (for multimodal support)
+        if (optionsOrPrompt.input) {
+            normalizedOptions.input = {
+                ...optionsOrPrompt.input,
+                text: prompt, // Ensure text is consistent
+            };
+        }
+        return normalizedOptions;
     }
     normalizeStreamOptions(optionsOrPrompt) {
         if (typeof optionsOrPrompt === "string") {

package/dist/lib/core/types.d.ts CHANGED Viewed

@@ -175,11 +175,14 @@ export interface StreamingOptions {
 }
 /**
  * Text generation options interface
+ * Extended to support multimodal content with zero breaking changes
  */
 export interface TextGenerationOptions {
     prompt?: string;
     input?: {
         text: string;
+        images?: Array<Buffer | string>;
+        content?: Array<import("../types/content.js").TextContent | import("../types/content.js").ImageContent>;
     };
     provider?: AIProviderName;
     model?: string;

package/dist/lib/neurolink.js CHANGED Viewed

@@ -854,7 +854,7 @@ export class NeuroLink {
                 // Continue with warning rather than throwing - graceful degradation
             }
         }
-        // Convert to TextGenerationOptions using factory utilities
+        // 🔧 CRITICAL FIX: Convert to TextGenerationOptions while preserving the input object for multimodal support
         const baseOptions = {
             prompt: options.input.text,
             provider: options.provider,
@@ -868,6 +868,7 @@ export class NeuroLink {
             context: options.context,
             evaluationDomain: options.evaluationDomain,
             toolUsageContext: options.toolUsageContext,
+            input: options.input, // This includes text, images, and content arrays
         };
         // Apply factory enhancement using centralized utilities
         const textOptions = enhanceTextGenerationOptions(baseOptions, factoryResult);
@@ -1664,7 +1665,9 @@ export class NeuroLink {
             const processedStream = (async function* (self) {
                 try {
                     for await (const chunk of mcpStream) {
-                        if (chunk && "content" in chunk && typeof chunk.content === "string") {
+                        if (chunk &&
+                            "content" in chunk &&
+                            typeof chunk.content === "string") {
                             accumulatedContent += chunk.content;
                             // Emit chunk event for compatibility
                             self.emitter.emit("response:chunk", chunk.content);
@@ -1941,7 +1944,9 @@ export class NeuroLink {
         const fallbackProcessedStream = (async function* (self) {
             try {
                 for await (const chunk of fallbackStreamResult.stream) {
-                    if (chunk && "content" in chunk && typeof chunk.content === "string") {
+                    if (chunk &&
+                        "content" in chunk &&
+                        typeof chunk.content === "string") {
                         fallbackAccumulatedContent += chunk.content;
                         // Emit chunk event
                         self.emitter.emit("response:chunk", chunk.content);

package/dist/lib/types/content.d.ts ADDED Viewed

@@ -0,0 +1,78 @@
+/**
+ * Content type definitions for multimodal support
+ * Supports text and image content with provider-specific formatting
+ */
+/**
+ * Text content type for multimodal messages
+ */
+export interface TextContent {
+    type: "text";
+    text: string;
+}
+/**
+ * Image content type for multimodal messages
+ */
+export interface ImageContent {
+    type: "image";
+    data: Buffer | string;
+    mediaType?: "image/jpeg" | "image/png" | "image/gif" | "image/webp" | "image/bmp" | "image/tiff";
+    metadata?: {
+        description?: string;
+        quality?: "low" | "high" | "auto";
+        dimensions?: {
+            width: number;
+            height: number;
+        };
+        filename?: string;
+    };
+}
+/**
+ * Union type for all content types
+ */
+export type Content = TextContent | ImageContent;
+/**
+ * Vision capability information for providers
+ */
+export interface VisionCapability {
+    provider: string;
+    supportedModels: string[];
+    maxImageSize?: number;
+    supportedFormats: string[];
+    maxImagesPerRequest?: number;
+}
+/**
+ * Provider-specific image format requirements
+ */
+export interface ProviderImageFormat {
+    provider: string;
+    format: "data_uri" | "base64" | "inline_data" | "source";
+    requiresPrefix?: boolean;
+    mimeTypeField?: string;
+    dataField?: string;
+}
+/**
+ * Image processing result
+ */
+export interface ProcessedImage {
+    data: string;
+    mediaType: string;
+    size: number;
+    format: "data_uri" | "base64" | "inline_data" | "source";
+}
+/**
+ * Multimodal message structure for provider adapters
+ */
+export interface MultimodalMessage {
+    role: "user" | "assistant" | "system";
+    content: Content[];
+}
+/**
+ * Provider-specific multimodal payload
+ */
+export interface ProviderMultimodalPayload {
+    provider: string;
+    model: string;
+    messages?: MultimodalMessage[];
+    contents?: unknown[];
+    [key: string]: unknown;
+}

package/dist/lib/types/content.js ADDED Viewed

@@ -0,0 +1,5 @@
+/**
+ * Content type definitions for multimodal support
+ * Supports text and image content with provider-specific formatting
+ */
+export {};

package/dist/lib/types/conversation.d.ts CHANGED Viewed

@@ -66,6 +66,25 @@ export interface ChatMessage {
     /** Content of the message */
     content: string;
 }
+/**
+ * Content format for multimodal messages (used internally)
+ */
+export interface MessageContent {
+    type: string;
+    text?: string;
+    image?: string;
+    mimeType?: string;
+    [key: string]: unknown;
+}
+/**
+ * Extended chat message for multimodal support (internal use)
+ */
+export interface MultimodalChatMessage {
+    /** Role of the message sender */
+    role: "user" | "assistant" | "system";
+    /** Content of the message - can be text or multimodal content array */
+    content: string | MessageContent[];
+}
 /**
  * Events emitted by conversation memory system
  */

package/dist/lib/types/generateTypes.d.ts CHANGED Viewed

@@ -6,13 +6,16 @@ import type { EvaluationData } from "./evaluation.js";
 import type { ChatMessage, ConversationMemoryConfig } from "./conversation.js";
 import type { MiddlewareFactoryOptions } from "./middlewareTypes.js";
 import type { JsonValue } from "./common.js";
+import type { TextContent, ImageContent } from "./content.js";
 /**
  * Generate function options type - Primary method for content generation
- * Future-ready for multi-modal capabilities while maintaining text focus
+ * Supports multimodal content while maintaining backward compatibility
  */
 export type GenerateOptions = {
     input: {
         text: string;
+        images?: Array<Buffer | string>;
+        content?: Array<TextContent | ImageContent>;
     };
     output?: {
         format?: "text" | "structured" | "json";

package/dist/lib/types/streamTypes.d.ts CHANGED Viewed

@@ -5,6 +5,7 @@ import type { AnalyticsData, TokenUsage } from "./analytics.js";
 import type { EvaluationData } from "./evaluation.js";
 import type { UnknownRecord, JsonValue } from "./common.js";
 import type { ChatMessage } from "./conversation.js";
+import type { TextContent, ImageContent } from "./content.js";
 import type { MiddlewareFactoryOptions } from "./middlewareTypes.js";
 /**
  * Progress tracking and metadata for streaming operations
@@ -118,10 +119,12 @@ export interface AudioChunk {
     channels: number;
     encoding: PCMEncoding;
 }
-export type StreamOptions = {
+export interface StreamOptions {
     input: {
-        text?: string;
+        text: string;
         audio?: AudioInputSpec;
+        images?: Array<Buffer | string>;
+        content?: Array<TextContent | ImageContent>;
     };
     output?: {
         format?: "text" | "structured" | "json";
@@ -166,7 +169,7 @@ export type StreamOptions = {
     };
     conversationMessages?: ChatMessage[];
     middleware?: MiddlewareFactoryOptions;
-};
+}
 /**
  * Stream function result type - Primary output format for streaming
  * Future-ready for multi-modal outputs while maintaining text focus

package/dist/lib/utils/imageProcessor.d.ts ADDED Viewed

@@ -0,0 +1,84 @@
+/**
+ * Image processing utilities for multimodal support
+ * Handles format conversion for different AI providers
+ */
+import type { ProcessedImage } from "../types/content.js";
+/**
+ * Image processor class for handling provider-specific image formatting
+ */
+export declare class ImageProcessor {
+    /**
+     * Process image for OpenAI (requires data URI format)
+     */
+    static processImageForOpenAI(image: Buffer | string): string;
+    /**
+     * Process image for Google AI (requires base64 without data URI prefix)
+     */
+    static processImageForGoogle(image: Buffer | string): {
+        mimeType: string;
+        data: string;
+    };
+    /**
+     * Process image for Anthropic (requires base64 without data URI prefix)
+     */
+    static processImageForAnthropic(image: Buffer | string): {
+        mediaType: string;
+        data: string;
+    };
+    /**
+     * Process image for Vertex AI (model-specific routing)
+     */
+    static processImageForVertex(image: Buffer | string, model: string): {
+        mimeType?: string;
+        mediaType?: string;
+        data: string;
+    };
+    /**
+     * Detect image type from filename or data
+     */
+    static detectImageType(input: string | Buffer): string;
+    /**
+     * Validate image size (default 10MB limit)
+     */
+    static validateImageSize(data: Buffer | string, maxSize?: number): boolean;
+    /**
+     * Validate image format
+     */
+    static validateImageFormat(mediaType: string): boolean;
+    /**
+     * Get image dimensions from Buffer (basic implementation)
+     */
+    static getImageDimensions(buffer: Buffer): {
+        width: number;
+        height: number;
+    } | null;
+    /**
+     * Convert image to ProcessedImage format
+     */
+    static processImage(image: Buffer | string, provider: string, model?: string): ProcessedImage;
+}
+/**
+ * Utility functions for image handling
+ */
+export declare const imageUtils: {
+    /**
+     * Check if a string is a valid data URI
+     */
+    isDataUri: (str: string) => boolean;
+    /**
+     * Check if a string is a valid URL
+     */
+    isUrl: (str: string) => boolean;
+    /**
+     * Check if a string is base64 encoded
+     */
+    isBase64: (str: string) => boolean;
+    /**
+     * Extract file extension from filename or URL
+     */
+    getFileExtension: (filename: string) => string | null;
+    /**
+     * Convert file size to human readable format
+     */
+    formatFileSize: (bytes: number) => string;
+};